Skip to content

Commit 9cf0c3a

Browse files
committed
Fixup accumulation for categorical histograms
1 parent 328c8be commit 9cf0c3a

File tree

11 files changed

+177
-29
lines changed

11 files changed

+177
-29
lines changed

configs/adaptive-performance-power-saving-v1.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ histograms:
1616

1717
# CPU time per process type
1818
- metrics.labeled_counter.power_cpu_time_per_process_type_ms:
19-
aggregate: percentiles
19+
aggregate: sum
2020

2121
# Page load metrics
2222
- metrics.timing_distribution.performance_pageload_fcp

lib/analysis.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ def createNumericalTemplate():
288288

289289

290290
def createCategoricalTemplate():
291-
template = {"desc": "", "labels": [], "counts": [], "sum": 0}
291+
template = {"desc": "", "labels": [], "counts": [], "sample_counts": [], "sum": 0}
292292
return template
293293

294294

@@ -424,8 +424,10 @@ def processCategoricalMetrics(self, categorical_metrics):
424424
# Apply unified categorical analysis
425425
labels = data["bins"]
426426
counts = data["counts"]
427+
sample_counts = data.get("sample_counts", [])
427428
result_location["labels"] = labels
428429
result_location["counts"] = counts
430+
result_location["sample_counts"] = sample_counts
429431
total = sum(counts)
430432
result_location["sum"] = total
431433

lib/parser.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -271,8 +271,13 @@ def annotateHistograms(config: Dict[str, Any], probeIndex: Dict[str, Any]) -> No
271271
sys.exit(1)
272272

273273
if aggregate == "sum":
274-
# Sum mode: single scalar value
275-
config["histograms"][hist]["kind"] = "scalar"
274+
# Sum mode
275+
# For labeled_counter: keep per-label sums (use categorical kind for display)
276+
# For quantity: single scalar value
277+
if schema["type"] == "labeled_counter":
278+
config["histograms"][hist]["kind"] = "categorical"
279+
else:
280+
config["histograms"][hist]["kind"] = "scalar"
276281
else:
277282
# Percentiles mode: calculate median, p75, p95
278283
# For labeled_counter, use labeled_percentiles (per-label tables)
@@ -386,8 +391,13 @@ def annotateHistograms(config: Dict[str, Any], probeIndex: Dict[str, Any]) -> No
386391
sys.exit(1)
387392

388393
if aggregate == "sum":
389-
# Sum mode: single scalar value
390-
config["histograms"][hist]["kind"] = "scalar"
394+
# Sum mode
395+
# For labeled_counter: keep per-label sums (use categorical kind for display)
396+
# For quantity: single scalar value
397+
if schema["type"] == "labeled_counter":
398+
config["histograms"][hist]["kind"] = "categorical"
399+
else:
400+
config["histograms"][hist]["kind"] = "scalar"
391401
else:
392402
# Percentiles mode: calculate median, p75, p95
393403
# For labeled_counter, use labeled_percentiles (per-label tables)

lib/report.py

Lines changed: 98 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,17 @@ def createSummarySection(self):
204204

205205
# Generate summary for categorical metrics
206206
if data_type == "categorical":
207+
# Check if this is a labeled_counter with sum aggregate
208+
is_sum_aggregate = False
209+
for hist_key in self.data.get("histograms", {}):
210+
hist_name = hist_key.split(".")[-1]
211+
if hist_name == metric:
212+
is_sum_aggregate = (
213+
self.data["histograms"][hist_key].get("aggregate")
214+
== "sum"
215+
)
216+
break
217+
207218
branches = []
208219
for branch in self.data["branches"]:
209220
branch_name = (
@@ -223,23 +234,66 @@ def createSummarySection(self):
223234
][metric]
224235
rows = []
225236
n_labels = len(metric_data["labels"])
226-
for i in range(n_labels):
237+
238+
# For labeled_counter with sum aggregate and many labels,
239+
# only show top 2-3 by absolute uplift
240+
if is_sum_aggregate and n_labels > 10:
241+
# Create list of (index, label, uplift) tuples
242+
label_uplifts = [
243+
(
244+
i,
245+
metric_data["labels"][i],
246+
metric_data["uplift"][i],
247+
)
248+
for i in range(n_labels)
249+
]
250+
# Sort by absolute uplift descending
251+
label_uplifts.sort(
252+
key=lambda x: abs(x[2]), reverse=True
253+
)
254+
# Take top 3
255+
label_uplifts = label_uplifts[:3]
256+
indices_to_show = [x[0] for x in label_uplifts]
257+
else:
258+
indices_to_show = range(n_labels)
259+
260+
for i in indices_to_show:
227261
label = metric_data["labels"][i]
228262
uplift = metric_data["uplift"][i]
229263

230264
# Skip small uplifts for enumerated histograms with many labels
231-
if n_labels > 5 and abs(uplift) < 0.05:
265+
if (
266+
not is_sum_aggregate
267+
and n_labels > 5
268+
and abs(uplift) < 0.05
269+
):
232270
continue
233271

234272
weight = "font-weight:normal;"
273+
color = ""
235274
if abs(uplift) >= 10:
236275
effect = "Large"
237276
weight = "font-weight:bold;"
277+
color = (
278+
"color:red;"
279+
if uplift > 0
280+
else "color:green;"
281+
)
238282
elif abs(uplift) >= 5:
239283
effect = "Medium"
240284
weight = "font-weight:bold;"
285+
color = (
286+
"color:red;"
287+
if uplift > 0
288+
else "color:green;"
289+
)
241290
elif abs(uplift) >= 2:
242291
effect = "Small"
292+
color = (
293+
"color:red;"
294+
if uplift > 0
295+
else "color:green;"
296+
)
243297
else:
244298
effect = "None"
245299

@@ -255,6 +309,7 @@ def createSummarySection(self):
255309
"uplift": uplift_desc,
256310
"effect": effect,
257311
"weight": weight,
312+
"color": color,
258313
"style": f"background:{row_background};",
259314
}
260315
)
@@ -1154,36 +1209,51 @@ def createCategoricalComparison(self, segment, metric, metric_type):
11541209

11551210
n_elem = len(self.data[control][segment][metric_type][metric]["counts"])
11561211

1157-
# Get unit from metric config if available
1212+
# Get unit and aggregate type from metric config if available
11581213
unit = ""
1214+
aggregate = None
11591215
for hist_key in self.data.get("histograms", {}):
11601216
hist_name = hist_key.split(".")[-1]
11611217
if hist_name == metric:
11621218
unit = self.data["histograms"][hist_key].get("unit", "")
1219+
aggregate = self.data["histograms"][hist_key].get("aggregate")
11631220
break
1164-
if n_elem <= 10:
1221+
if n_elem <= 20:
1222+
# For metrics with <= 20 categories (like labeled counters), show all
11651223
indices = set(range(0, n_elem))
1224+
else:
1225+
# For metrics with many categories, only show significant changes
1226+
for branch in self.data["branches"]:
1227+
if branch == control:
1228+
continue
1229+
uplift = self.data[branch][segment][metric_type][metric]["uplift"]
11661230

1167-
for branch in self.data["branches"]:
1168-
if branch == control:
1169-
continue
1170-
uplift = self.data[branch][segment][metric_type][metric]["uplift"]
1171-
1172-
for i in range(len(uplift)):
1173-
# Show categories with > 1% change
1174-
if abs(uplift[i]) > 1:
1175-
indices.add(i)
1231+
for i in range(len(uplift)):
1232+
# Show categories with > 1% change
1233+
if abs(uplift[i]) > 1:
1234+
indices.add(i)
11761235

11771236
datasets = []
11781237
for branch in self.data["branches"]:
11791238
counts_branch = [
11801239
float(self.data[branch][segment][metric_type][metric]["counts"][i])
11811240
for i in indices
11821241
]
1242+
sample_counts_branch = []
1243+
if "sample_counts" in self.data[branch][segment][metric_type][metric]:
1244+
sample_counts_list = self.data[branch][segment][metric_type][metric][
1245+
"sample_counts"
1246+
]
1247+
if sample_counts_list:
1248+
sample_counts_branch = [
1249+
float(sample_counts_list[i]) for i in indices
1250+
]
1251+
11831252
datasets.append(
11841253
{
11851254
"branch": branch,
11861255
"counts": counts_branch,
1256+
"sample_counts": sample_counts_branch,
11871257
}
11881258
)
11891259

@@ -1217,6 +1287,19 @@ def createCategoricalComparison(self, segment, metric, metric_type):
12171287
else:
12181288
count_display = f"{count_num:,}"
12191289

1290+
# Get sample count if available
1291+
sample_count = None
1292+
if "sample_counts" in dataset and dataset["sample_counts"]:
1293+
sample_count_num = int(dataset["sample_counts"][idx])
1294+
if sample_count_num >= 1000000000:
1295+
sample_count = f"{sample_count_num / 1000000000:.1f}B"
1296+
elif sample_count_num >= 1000000:
1297+
sample_count = f"{sample_count_num / 1000000:.1f}M"
1298+
elif sample_count_num >= 1000:
1299+
sample_count = f"{sample_count_num / 1000:.1f}K"
1300+
else:
1301+
sample_count = f"{sample_count_num:,}"
1302+
12201303
# Uplift only for non-control branches
12211304
uplift = None
12221305
if j > 0 and len(datasets) > 1 and "uplift" in datasets[1]:
@@ -1226,6 +1309,7 @@ def createCategoricalComparison(self, segment, metric, metric_type):
12261309
{
12271310
"branch_name": dataset["branch"],
12281311
"count": count_display,
1312+
"sample_count": sample_count,
12291313
"uplift": uplift,
12301314
}
12311315
)
@@ -1246,6 +1330,7 @@ def createCategoricalComparison(self, segment, metric, metric_type):
12461330
"metric": metric,
12471331
"segment": segment,
12481332
"unit": unit,
1333+
"aggregate": aggregate,
12491334
}
12501335
self.doc(t.render(context))
12511336

lib/telemetry.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,16 +468,18 @@ def collectResultsFromQuery_OS_segments(
468468
df = histograms[histogram]
469469
if segment == "All":
470470
subset = (
471-
df[df["branch"] == branch][["bucket", "counts"]]
471+
df[df["branch"] == branch][["bucket", "counts", "sample_count"]]
472472
.groupby(["bucket"])
473473
.sum()
474474
)
475475
buckets = list(subset.index)
476476
counts = list(subset["counts"])
477+
sample_counts = list(subset.get("sample_count", [0] * len(buckets)))
477478
else:
478479
subset = df[(df["segment"] == segment) & (df["branch"] == branch)]
479480
buckets = list(subset["bucket"])
480481
counts = list(subset["counts"])
482+
sample_counts = list(subset.get("sample_count", [0] * len(buckets)))
481483

482484
# Some clients report bucket sizes that are not real, and these buckets
483485
# end up having 1-5 samples in them. Filter these out entirely.
@@ -491,6 +493,8 @@ def collectResultsFromQuery_OS_segments(
491493
for i in sorted(remove, reverse=True):
492494
del buckets[i]
493495
del counts[i]
496+
if sample_counts:
497+
del sample_counts[i]
494498

495499
# Add labels to the buckets for categorical histograms.
496500
if self.config["histograms"][histogram]["kind"] == "categorical":
@@ -501,15 +505,23 @@ def collectResultsFromQuery_OS_segments(
501505
if len(labels) == (len(buckets) - 1) and counts[-1] == 0:
502506
del buckets[-1]
503507
del counts[-1]
508+
if sample_counts:
509+
del sample_counts[-1]
504510

505511
# Create a bucket->count mapping from query results
506512
bucket_to_count = dict(zip(buckets, counts))
513+
bucket_to_sample_count = (
514+
dict(zip(buckets, sample_counts)) if sample_counts else {}
515+
)
507516

508517
# Remap counts to match label order, filling missing labels with 0
509518
new_counts = []
519+
new_sample_counts = []
510520
for label in labels:
511521
new_counts.append(bucket_to_count.get(label, 0))
522+
new_sample_counts.append(bucket_to_sample_count.get(label, 0))
512523
counts = new_counts
524+
sample_counts = new_sample_counts
513525

514526
# Use labels as bucket names
515527
buckets = labels
@@ -520,20 +532,33 @@ def collectResultsFromQuery_OS_segments(
520532
maxBucket = self.config["histograms"][histogram]["max"]
521533
remove = []
522534
maxBucketCount = 0
535+
maxBucketSampleCount = 0
523536
for i, x in enumerate(buckets):
524537
if x >= maxBucket:
525538
remove.append(i)
526539
maxBucketCount = maxBucketCount + counts[i]
540+
if sample_counts:
541+
maxBucketSampleCount = (
542+
maxBucketSampleCount + sample_counts[i]
543+
)
527544
for i in sorted(remove, reverse=True):
528545
del buckets[i]
529546
del counts[i]
547+
if sample_counts:
548+
del sample_counts[i]
530549
buckets.append(maxBucket)
531550
counts.append(maxBucketCount)
551+
if sample_counts:
552+
sample_counts.append(maxBucketSampleCount)
532553

533554
assert len(buckets) == len(counts)
534555
results[branch][segment]["histograms"][histogram] = {}
535556
results[branch][segment]["histograms"][histogram]["bins"] = buckets
536557
results[branch][segment]["histograms"][histogram]["counts"] = counts
558+
if sample_counts:
559+
results[branch][segment]["histograms"][histogram][
560+
"sample_counts"
561+
] = sample_counts
537562
print(f" segment={segment} len(histogram: {histogram}) = ", len(buckets))
538563

539564
for metric in self.config["pageload_event_metrics"]:

lib/templates/html/categorical.html

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
grace: '50%',
4141
beginAtZero: true,
4242
title: {
43-
text: "Count{% if unit %} ({{unit}}){% endif %}",
43+
text: "{% if aggregate == 'sum' %}Sum{% else %}Count{% endif %}{% if unit %} ({{unit}}){% endif %}",
4444
display: true
4545
}
4646
},
@@ -57,6 +57,22 @@
5757
<div style="margin-bottom: 20px;">
5858
<h5 style="margin: 5px 0; color: #555; font-weight: bold;">{{row.label}}</h5>
5959
<table border="1" cellpadding="0" cellspacing="0" class="stat-table" style="table-layout: fixed; width: 100%;">
60+
{% if aggregate == 'sum' %}
61+
<colgroup>
62+
<col style="width: 30%;">
63+
<col style="width: 20%;">
64+
<col style="width: 30%;">
65+
<col style="width: 20%;">
66+
</colgroup>
67+
<thead>
68+
<tr>
69+
<th style="font-size: 11px; padding: 6px;">Branch</th>
70+
<th style="font-size: 11px; padding: 6px;">N</th>
71+
<th style="font-size: 11px; padding: 6px;">Sum{% if unit %} ({{unit}}){% endif %}</th>
72+
<th style="font-size: 11px; padding: 6px;">Uplift (%)</th>
73+
</tr>
74+
</thead>
75+
{% else %}
6076
<colgroup>
6177
<col style="width: 35%;">
6278
<col style="width: 30%;">
@@ -69,10 +85,14 @@ <h5 style="margin: 5px 0; color: #555; font-weight: bold;">{{row.label}}</h5>
6985
<th style="font-size: 11px; padding: 6px;">Uplift (%)</th>
7086
</tr>
7187
</thead>
88+
{% endif %}
7289
<tbody>
7390
{% for branch_row in row.branch_rows %}
7491
<tr style="background-color: {% if forloop.counter0|divisibleby:2 %}#f9f9f9{% else %}#ffffff{% endif %};">
7592
<td style="text-align: left; padding: 6px; font-size: 13px;">{{branch_row.branch_name}}</td>
93+
{% if aggregate == 'sum' %}
94+
<td style="padding: 6px; font-size: 13px;">{% if branch_row.sample_count %}{{branch_row.sample_count}}{% else %}-{% endif %}</td>
95+
{% endif %}
7696
<td style="padding: 6px; font-size: 13px;">{{branch_row.count}}</td>
7797
<td style="padding: 6px; font-size: 13px;{% if branch_row.uplift is not none and branch_row.uplift >= 10 %} background-color: #ffcdd2; color: #c62828;{% elif branch_row.uplift is not none and branch_row.uplift <= -10 %} background-color: #c8e6c9; color: #2e7d32;{% endif %}">
7898
{% if branch_row.uplift is not none %}

0 commit comments

Comments
 (0)