Skip to content

Commit cf0b273

Browse files
committed
fix(monitor): address review comments on Go SDK generator
- Add Prometheus datasource template variable so the dashboard exposes a datasource selector matching the original Jaeger dashboard - Assign unique stable IDs (1-15) to all rows and panels; previously rows had id=0 and timeseries panels had no id field - Fix stacking: P99 latency panels (Storage, Query) and single-metric panels (CPU Usage, Memory RSS) no longer use stacking mode — stacking percentile or single-series data produces misleading visualisations - Regenerate dashboard-for-grafana-v2.json from updated generator Relates to: #5833 Signed-off-by: abhay1999 <abhaychaurasiya19@gmail.com>
1 parent ba58120 commit cf0b273

File tree

2 files changed

+79
-39
lines changed

2 files changed

+79
-39
lines changed

monitoring/jaeger-mixin/dashboard-for-grafana-v2.json

Lines changed: 36 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,12 @@
2525
"x": 0,
2626
"y": 0
2727
},
28-
"id": 0,
28+
"id": 1,
2929
"panels": []
3030
},
3131
{
3232
"type": "timeseries",
33+
"id": 2,
3334
"targets": [
3435
{
3536
"expr": "sum(rate(otelcol_receiver_refused_spans_total[1m])) or vector(0)",
@@ -63,6 +64,7 @@
6364
},
6465
{
6566
"type": "timeseries",
67+
"id": 3,
6668
"targets": [
6769
{
6870
"expr": "sum(rate(otelcol_receiver_refused_spans_total[1m])) by (receiver, transport) / (sum(rate(otelcol_receiver_accepted_spans_total[1m])) by (receiver, transport) + sum(rate(otelcol_receiver_refused_spans_total[1m])) by (receiver, transport)) or vector(0)",
@@ -102,11 +104,12 @@
102104
"x": 0,
103105
"y": 9
104106
},
105-
"id": 0,
107+
"id": 4,
106108
"panels": []
107109
},
108110
{
109111
"type": "timeseries",
112+
"id": 5,
110113
"targets": [
111114
{
112115
"expr": "sum(rate(otelcol_exporter_send_failed_spans_total[1m])) or vector(0)",
@@ -140,6 +143,7 @@
140143
},
141144
{
142145
"type": "timeseries",
146+
"id": 6,
143147
"targets": [
144148
{
145149
"expr": "(sum(rate(otelcol_exporter_sent_spans_total[1m])) by (exporter) / (sum(rate(otelcol_exporter_sent_spans_total[1m])) by (exporter) + sum(rate(otelcol_exporter_send_failed_spans_total[1m])) by (exporter))) * 100 or vector(0)",
@@ -179,11 +183,12 @@
179183
"x": 0,
180184
"y": 18
181185
},
182-
"id": 0,
186+
"id": 7,
183187
"panels": []
184188
},
185189
{
186190
"type": "timeseries",
191+
"id": 8,
187192
"targets": [
188193
{
189194
"expr": "sum(rate(jaeger_storage_requests_total[1m])) by (operation, result)",
@@ -213,6 +218,7 @@
213218
},
214219
{
215220
"type": "timeseries",
221+
"id": 9,
216222
"targets": [
217223
{
218224
"expr": "histogram_quantile(0.99, sum(rate(jaeger_storage_latency_seconds_bucket[1m])) by (le, operation))",
@@ -232,10 +238,7 @@
232238
"defaults": {
233239
"unit": "s",
234240
"custom": {
235-
"fillOpacity": 10,
236-
"stacking": {
237-
"mode": "normal"
238-
}
241+
"fillOpacity": 10
239242
}
240243
},
241244
"overrides": []
@@ -251,11 +254,12 @@
251254
"x": 0,
252255
"y": 27
253256
},
254-
"id": 0,
257+
"id": 10,
255258
"panels": []
256259
},
257260
{
258261
"type": "timeseries",
262+
"id": 11,
259263
"targets": [
260264
{
261265
"expr": "sum(rate(http_server_request_duration_seconds_count{http_route=\"/api/traces\"}[1m])) by (http_response_status_code)",
@@ -285,6 +289,7 @@
285289
},
286290
{
287291
"type": "timeseries",
292+
"id": 12,
288293
"targets": [
289294
{
290295
"expr": "histogram_quantile(0.99, sum(rate(http_server_request_duration_seconds_bucket{http_route=\"/api/traces\"}[1m])) by (le))",
@@ -304,10 +309,7 @@
304309
"defaults": {
305310
"unit": "s",
306311
"custom": {
307-
"fillOpacity": 10,
308-
"stacking": {
309-
"mode": "normal"
310-
}
312+
"fillOpacity": 10
311313
}
312314
},
313315
"overrides": []
@@ -323,11 +325,12 @@
323325
"x": 0,
324326
"y": 36
325327
},
326-
"id": 0,
328+
"id": 13,
327329
"panels": []
328330
},
329331
{
330332
"type": "timeseries",
333+
"id": 14,
331334
"targets": [
332335
{
333336
"expr": "rate(otelcol_process_cpu_seconds_total[1m])",
@@ -347,17 +350,15 @@
347350
"defaults": {
348351
"unit": "percentunit",
349352
"custom": {
350-
"fillOpacity": 10,
351-
"stacking": {
352-
"mode": "normal"
353-
}
353+
"fillOpacity": 10
354354
}
355355
},
356356
"overrides": []
357357
}
358358
},
359359
{
360360
"type": "timeseries",
361+
"id": 15,
361362
"targets": [
362363
{
363364
"expr": "otelcol_process_memory_rss_bytes",
@@ -377,16 +378,29 @@
377378
"defaults": {
378379
"unit": "bytes",
379380
"custom": {
380-
"fillOpacity": 10,
381-
"stacking": {
382-
"mode": "normal"
383-
}
381+
"fillOpacity": 10
384382
}
385383
},
386384
"overrides": []
387385
}
388386
}
389387
],
390-
"templating": {},
388+
"templating": {
389+
"list": [
390+
{
391+
"type": "datasource",
392+
"name": "datasource",
393+
"label": "Data Source",
394+
"skipUrlSync": false,
395+
"query": "prometheus",
396+
"multi": false,
397+
"allowCustomValue": true,
398+
"includeAll": false,
399+
"auto": false,
400+
"auto_min": "10s",
401+
"auto_count": 30
402+
}
403+
]
404+
},
391405
"annotations": {}
392406
}

monitoring/jaeger-mixin/generate/main.go

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -42,38 +42,47 @@ func buildDashboard() (dashboard.Dashboard, error) {
4242
Refresh("30s").
4343
Time("now-1h", "now").
4444
Timezone(common.TimeZoneBrowser).
45+
// Prometheus datasource selector — matches the original Jaeger dashboard
46+
// and other mixin dashboards in this repo.
47+
WithVariable(
48+
dashboard.NewDatasourceVariableBuilder("datasource").
49+
Label("Data Source").
50+
Type("prometheus"),
51+
).
4552

4653
// ── Row 1: Collector - Ingestion ───────────────────────────────────────
47-
WithRow(dashboard.NewRowBuilder("Collector - Ingestion")).
54+
WithRow(dashboard.NewRowBuilder("Collector - Ingestion").Id(1)).
4855
WithPanel(spanIngestRatePanel()).
4956
WithPanel(spansRefusedPctPanel()).
5057

5158
// ── Row 2: Collector - Export ──────────────────────────────────────────
52-
WithRow(dashboard.NewRowBuilder("Collector - Export")).
59+
WithRow(dashboard.NewRowBuilder("Collector - Export").Id(4)).
5360
WithPanel(spanExportRatePanel()).
5461
WithPanel(exportSuccessRatePanel()).
5562

5663
// ── Row 3: Storage ─────────────────────────────────────────────────────
57-
WithRow(dashboard.NewRowBuilder("Storage")).
64+
WithRow(dashboard.NewRowBuilder("Storage").Id(7)).
5865
WithPanel(storageRequestRatePanel()).
5966
WithPanel(storageLatencyP99Panel()).
6067

6168
// ── Row 4: Query ───────────────────────────────────────────────────────
62-
WithRow(dashboard.NewRowBuilder("Query")).
69+
WithRow(dashboard.NewRowBuilder("Query").Id(10)).
6370
WithPanel(queryRequestRatePanel()).
6471
WithPanel(queryLatencyP99Panel()).
6572

6673
// ── Row 5: System ──────────────────────────────────────────────────────
67-
WithRow(dashboard.NewRowBuilder("System")).
74+
WithRow(dashboard.NewRowBuilder("System").Id(13)).
6875
WithPanel(cpuUsagePanel()).
6976
WithPanel(memoryRSSPanel())
7077

7178
return builder.Build()
7279
}
7380

74-
// stackedPanel returns a timeseries panel builder pre-configured for stacked mode.
75-
func stackedPanel(title string) *timeseries.PanelBuilder {
81+
// stackedPanel returns a timeseries panel pre-configured for stacked mode.
82+
// Use for rate/count panels where stacking multiple series is meaningful.
83+
func stackedPanel(id uint32, title string) *timeseries.PanelBuilder {
7684
return timeseries.NewPanelBuilder().
85+
Id(id).
7786
Title(title).
7887
Span(12).
7988
Height(8).
@@ -82,6 +91,18 @@ func stackedPanel(title string) *timeseries.PanelBuilder {
8291
Mode(common.StackingModeNormal))
8392
}
8493

94+
// timeseriesPanel returns a timeseries panel without stacking.
95+
// Use for latency percentiles and single-metric panels where stacking
96+
// would produce meaningless visualizations.
97+
func timeseriesPanel(id uint32, title string) *timeseries.PanelBuilder {
98+
return timeseries.NewPanelBuilder().
99+
Id(id).
100+
Title(title).
101+
Span(12).
102+
Height(8).
103+
FillOpacity(10)
104+
}
105+
85106
// promTarget is a shorthand for a Prometheus query with a legend.
86107
func promTarget(expr, legend string) *prometheus.DataqueryBuilder {
87108
return prometheus.NewDataqueryBuilder().
@@ -92,7 +113,7 @@ func promTarget(expr, legend string) *prometheus.DataqueryBuilder {
92113
// ── Collector - Ingestion ──────────────────────────────────────────────────────
93114

94115
func spanIngestRatePanel() *timeseries.PanelBuilder {
95-
return stackedPanel("Span Ingest Rate").
116+
return stackedPanel(2, "Span Ingest Rate").
96117
WithTarget(promTarget(
97118
`sum(rate(otelcol_receiver_refused_spans_total[1m])) or vector(0)`,
98119
"error",
@@ -104,7 +125,7 @@ func spanIngestRatePanel() *timeseries.PanelBuilder {
104125
}
105126

106127
func spansRefusedPctPanel() *timeseries.PanelBuilder {
107-
return stackedPanel("% Spans Refused").
128+
return stackedPanel(3, "% Spans Refused").
108129
Unit("percentunit").
109130
Max(1).
110131
WithTarget(promTarget(
@@ -119,7 +140,7 @@ func spansRefusedPctPanel() *timeseries.PanelBuilder {
119140
// ── Collector - Export ────────────────────────────────────────────────────────
120141

121142
func spanExportRatePanel() *timeseries.PanelBuilder {
122-
return stackedPanel("Span Export Rate").
143+
return stackedPanel(5, "Span Export Rate").
123144
WithTarget(promTarget(
124145
`sum(rate(otelcol_exporter_send_failed_spans_total[1m])) or vector(0)`,
125146
"error",
@@ -131,7 +152,7 @@ func spanExportRatePanel() *timeseries.PanelBuilder {
131152
}
132153

133154
func exportSuccessRatePanel() *timeseries.PanelBuilder {
134-
return stackedPanel("Export Success Rate %").
155+
return stackedPanel(6, "Export Success Rate %").
135156
Unit("percent").
136157
Max(100).
137158
WithTarget(promTarget(
@@ -146,15 +167,16 @@ func exportSuccessRatePanel() *timeseries.PanelBuilder {
146167
// ── Storage ───────────────────────────────────────────────────────────────────
147168

148169
func storageRequestRatePanel() *timeseries.PanelBuilder {
149-
return stackedPanel("Storage Request Rate").
170+
return stackedPanel(8, "Storage Request Rate").
150171
WithTarget(promTarget(
151172
`sum(rate(jaeger_storage_requests_total[1m])) by (operation, result)`,
152173
"{{operation}} - {{result}}",
153174
))
154175
}
155176

156177
func storageLatencyP99Panel() *timeseries.PanelBuilder {
157-
return stackedPanel("Storage Latency - P99").
178+
// Latency percentile — not stacked; stacking percentiles is misleading.
179+
return timeseriesPanel(9, "Storage Latency - P99").
158180
Unit("s").
159181
WithTarget(promTarget(
160182
`histogram_quantile(0.99, sum(rate(jaeger_storage_latency_seconds_bucket[1m])) by (le, operation))`,
@@ -165,15 +187,16 @@ func storageLatencyP99Panel() *timeseries.PanelBuilder {
165187
// ── Query ─────────────────────────────────────────────────────────────────────
166188

167189
func queryRequestRatePanel() *timeseries.PanelBuilder {
168-
return stackedPanel("Query Request Rate").
190+
return stackedPanel(11, "Query Request Rate").
169191
WithTarget(promTarget(
170192
`sum(rate(http_server_request_duration_seconds_count{http_route="/api/traces"}[1m])) by (http_response_status_code)`,
171193
"status {{http_response_status_code}}",
172194
))
173195
}
174196

175197
func queryLatencyP99Panel() *timeseries.PanelBuilder {
176-
return stackedPanel("Query Latency - P99").
198+
// Latency percentile — not stacked; stacking percentiles is misleading.
199+
return timeseriesPanel(12, "Query Latency - P99").
177200
Unit("s").
178201
WithTarget(promTarget(
179202
`histogram_quantile(0.99, sum(rate(http_server_request_duration_seconds_bucket{http_route="/api/traces"}[1m])) by (le))`,
@@ -184,7 +207,9 @@ func queryLatencyP99Panel() *timeseries.PanelBuilder {
184207
// ── System ────────────────────────────────────────────────────────────────────
185208

186209
func cpuUsagePanel() *timeseries.PanelBuilder {
187-
return stackedPanel("CPU Usage").
210+
// Single-metric panel — stacking a single series has no effect but is
211+
// semantically incorrect; use plain timeseries.
212+
return timeseriesPanel(14, "CPU Usage").
188213
Unit("percentunit").
189214
WithTarget(promTarget(
190215
`rate(otelcol_process_cpu_seconds_total[1m])`,
@@ -193,7 +218,8 @@ func cpuUsagePanel() *timeseries.PanelBuilder {
193218
}
194219

195220
func memoryRSSPanel() *timeseries.PanelBuilder {
196-
return stackedPanel("Memory RSS").
221+
// Single-metric panel — same rationale as cpuUsagePanel.
222+
return timeseriesPanel(15, "Memory RSS").
197223
Unit("bytes").
198224
WithTarget(promTarget(
199225
`otelcol_process_memory_rss_bytes`,

0 commit comments

Comments
 (0)