@@ -107,6 +107,9 @@ var (
107107 Help : "Total number of tests run (aggregate counter without run_id)" ,
108108 }, []string {
109109 "network_name" ,
110+ "test_name" ,
111+ "gate" ,
112+ "suite" ,
110113 })
111114
112115 testsPassed = promauto .NewCounterVec (prometheus.CounterOpts {
@@ -115,6 +118,9 @@ var (
115118 Help : "Total number of passed tests (aggregate counter without run_id)" ,
116119 }, []string {
117120 "network_name" ,
121+ "test_name" ,
122+ "gate" ,
123+ "suite" ,
118124 })
119125
120126 testsFailed = promauto .NewCounterVec (prometheus.CounterOpts {
@@ -123,6 +129,9 @@ var (
123129 Help : "Total number of failed tests (aggregate counter without run_id)" ,
124130 }, []string {
125131 "network_name" ,
132+ "test_name" ,
133+ "gate" ,
134+ "suite" ,
126135 })
127136
128137 testsSkipped = promauto .NewCounterVec (prometheus.CounterOpts {
@@ -131,6 +140,9 @@ var (
131140 Help : "Total number of skipped tests (aggregate counter without run_id)" ,
132141 }, []string {
133142 "network_name" ,
143+ "test_name" ,
144+ "gate" ,
145+ "suite" ,
134146 })
135147
136148 // Metrics for individual test tracking
@@ -157,6 +169,101 @@ var (
157169 "gate" ,
158170 "suite" ,
159171 })
172+
173+ // Test duration histogram to track distribution of test execution times
174+ testDurationHistogram = promauto .NewHistogramVec (prometheus.HistogramOpts {
175+ Namespace : MetricsNamespace ,
176+ Name : "test_duration_histogram_seconds" ,
177+ Help : "Histogram of test execution durations in seconds" ,
178+ Buckets : []float64 {0.1 , 0.5 , 1 , 2 , 5 , 10 , 30 , 60 , 120 , 300 , 600 }, // 100ms to 10min
179+ }, []string {
180+ "network_name" ,
181+ "test_name" ,
182+ "gate" ,
183+ "suite" ,
184+ })
185+
186+ // Test timeout tracking
187+ testTimeouts = promauto .NewCounterVec (prometheus.CounterOpts {
188+ Namespace : MetricsNamespace ,
189+ Name : "test_timeouts_total" ,
190+ Help : "Total number of tests that timed out" ,
191+ }, []string {
192+ "network_name" ,
193+ "run_id" ,
194+ "test_name" ,
195+ "gate" ,
196+ "suite" ,
197+ })
198+
199+ // Gate-level aggregated metrics
200+ gateTestsTotal = promauto .NewCounterVec (prometheus.CounterOpts {
201+ Namespace : MetricsNamespace ,
202+ Name : "gate_tests_total" ,
203+ Help : "Total number of tests per gate" ,
204+ }, []string {
205+ "network_name" ,
206+ "gate" ,
207+ })
208+
209+ gateTestsPassed = promauto .NewCounterVec (prometheus.CounterOpts {
210+ Namespace : MetricsNamespace ,
211+ Name : "gate_tests_passed_total" ,
212+ Help : "Total number of passed tests per gate" ,
213+ }, []string {
214+ "network_name" ,
215+ "gate" ,
216+ })
217+
218+ gateTestsFailed = promauto .NewCounterVec (prometheus.CounterOpts {
219+ Namespace : MetricsNamespace ,
220+ Name : "gate_tests_failed_total" ,
221+ Help : "Total number of failed tests per gate" ,
222+ }, []string {
223+ "network_name" ,
224+ "gate" ,
225+ })
226+
227+ gateDurationSeconds = promauto .NewGaugeVec (prometheus.GaugeOpts {
228+ Namespace : MetricsNamespace ,
229+ Name : "gate_duration_seconds" ,
230+ Help : "Duration of gate execution in seconds" ,
231+ }, []string {
232+ "network_name" ,
233+ "run_id" ,
234+ "gate" ,
235+ })
236+
237+ // Suite-level metrics
238+ suiteTestsTotal = promauto .NewCounterVec (prometheus.CounterOpts {
239+ Namespace : MetricsNamespace ,
240+ Name : "suite_tests_total" ,
241+ Help : "Total number of tests per suite" ,
242+ }, []string {
243+ "network_name" ,
244+ "gate" ,
245+ "suite" ,
246+ })
247+
248+ suiteTestsPassed = promauto .NewCounterVec (prometheus.CounterOpts {
249+ Namespace : MetricsNamespace ,
250+ Name : "suite_tests_passed_total" ,
251+ Help : "Total number of passed tests per suite" ,
252+ }, []string {
253+ "network_name" ,
254+ "gate" ,
255+ "suite" ,
256+ })
257+
258+ suiteTestsFailed = promauto .NewCounterVec (prometheus.CounterOpts {
259+ Namespace : MetricsNamespace ,
260+ Name : "suite_tests_failed_total" ,
261+ Help : "Total number of failed tests per suite" ,
262+ }, []string {
263+ "network_name" ,
264+ "gate" ,
265+ "suite" ,
266+ })
160267)
161268
162269// errToLabel tries to make the error string a more valid Prometheus label
@@ -232,14 +339,6 @@ func RecordAcceptance(
232339 }
233340
234341 testRunDurationSeconds .WithLabelValues (network , runID ).Set (duration .Seconds ())
235-
236- // Also record to the continuous counters without run_id
237- testsTotal .WithLabelValues (network ).Add (float64 (total ))
238- testsPassed .WithLabelValues (network ).Add (float64 (passed ))
239- testsFailed .WithLabelValues (network ).Add (float64 (failed ))
240- if skipped > 0 {
241- testsSkipped .WithLabelValues (network ).Add (float64 (skipped ))
242- }
243342}
244343
245344// RecordIndividualTest records metrics for an individual test
@@ -265,8 +364,44 @@ func RecordIndividualTest(
265364
266365 testStatus .WithLabelValues (network , runID , testName , gate , suite ).Set (statusValue )
267366 testDurationSeconds .WithLabelValues (network , runID , testName , gate , suite ).Set (duration .Seconds ())
367+
368+ // Also record to the continuous counters without run_id for time-based aggregation
369+ testsTotal .WithLabelValues (network , testName , gate , suite ).Inc ()
370+ switch status {
371+ case types .TestStatusPass :
372+ testsPassed .WithLabelValues (network , testName , gate , suite ).Inc ()
373+ case types .TestStatusFail :
374+ testsFailed .WithLabelValues (network , testName , gate , suite ).Inc ()
375+ case types .TestStatusSkip :
376+ testsSkipped .WithLabelValues (network , testName , gate , suite ).Inc ()
377+ }
268378}
269379
270380func isValidResult (result types.TestStatus ) bool {
271381 return slices .Contains (validResults , result )
272382}
383+
384+ // RecordTestDurationHistogram records test duration in a histogram for distribution analysis
385+ func RecordTestDurationHistogram (network string , testName string , gate string , suite string , duration time.Duration ) {
386+ testDurationHistogram .WithLabelValues (network , testName , gate , suite ).Observe (duration .Seconds ())
387+ }
388+
389+ // RecordTestTimeout records when a test times out
390+ func RecordTestTimeout (network string , runID string , testName string , gate string , suite string ) {
391+ testTimeouts .WithLabelValues (network , runID , testName , gate , suite ).Inc ()
392+ }
393+
394+ // RecordGateMetrics records aggregated metrics for a gate
395+ func RecordGateMetrics (network string , runID string , gate string , total int , passed int , failed int , duration time.Duration ) {
396+ gateTestsTotal .WithLabelValues (network , gate ).Add (float64 (total ))
397+ gateTestsPassed .WithLabelValues (network , gate ).Add (float64 (passed ))
398+ gateTestsFailed .WithLabelValues (network , gate ).Add (float64 (failed ))
399+ gateDurationSeconds .WithLabelValues (network , runID , gate ).Set (duration .Seconds ())
400+ }
401+
402+ // RecordSuiteMetrics records aggregated metrics for a suite
403+ func RecordSuiteMetrics (network string , gate string , suite string , total int , passed int , failed int ) {
404+ suiteTestsTotal .WithLabelValues (network , gate , suite ).Add (float64 (total ))
405+ suiteTestsPassed .WithLabelValues (network , gate , suite ).Add (float64 (passed ))
406+ suiteTestsFailed .WithLabelValues (network , gate , suite ).Add (float64 (failed ))
407+ }
0 commit comments