@@ -68,8 +68,8 @@ func isGoTestSuite(name string) bool {
6868func extractFailures (suites * junit.Testsuites , artifactName string , runID int64 , timestamp time.Time ) []TestFailure {
6969 var failures []TestFailure
7070
71- // Parse artifact name for run_id and job_id
72- _ , jobID := parseArtifactName (artifactName )
71+ // Parse artifact name for run_id, job_id, and matrix_name
72+ _ , jobID , matrixName := parseArtifactName (artifactName )
7373
7474 for _ , suite := range suites .Suites {
7575 for _ , testcase := range suite .Testcases {
@@ -82,6 +82,7 @@ func extractFailures(suites *junit.Testsuites, artifactName string, runID int64,
8282 ArtifactID : artifactName ,
8383 RunID : runID ,
8484 JobID : jobID ,
85+ MatrixName : matrixName ,
8586 Timestamp : timestamp ,
8687 }
8788 failures = append (failures , failure )
@@ -94,17 +95,19 @@ func extractFailures(suites *junit.Testsuites, artifactName string, runID int64,
9495
9596// extractAllTestRuns extracts all test runs (including successes) from parsed JUnit data
9697// Used for calculating failure rates
97- func extractAllTestRuns (suites * junit.Testsuites , runID int64 ) []TestRun {
98+ func extractAllTestRuns (suites * junit.Testsuites , runID int64 , jobID , matrixName string ) []TestRun {
9899 var runs []TestRun
99100
100101 for _ , suite := range suites .Suites {
101102 for _ , testcase := range suite .Testcases {
102103 run := TestRun {
103- SuiteName : topLevelTestName (testcase .Name ),
104- Name : testcase .Name ,
105- Failed : testcase .Failure != nil ,
106- Skipped : testcase .Skipped != nil ,
107- RunID : runID ,
104+ SuiteName : topLevelTestName (testcase .Name ),
105+ Name : testcase .Name ,
106+ Failed : testcase .Failure != nil ,
107+ Skipped : testcase .Skipped != nil ,
108+ RunID : runID ,
109+ JobID : jobID ,
110+ MatrixName : matrixName ,
108111 }
109112 runs = append (runs , run )
110113 }
@@ -237,6 +240,25 @@ func convertToReports(grouped map[string][]TestFailure, testRunCounts map[string
237240 return reports
238241}
239242
243+ // filterParentTests removes top-level test names from grouped when subtests of
244+ // that parent were observed in testRunCounts. A top-level failure whose subtests
245+ // ran in other CI jobs is already captured (with a correct denominator) in the
246+ // Flaky Suites section, so including it in the per-test table produces a
247+ // misleading 1/1 entry.
248+ func filterParentTests (grouped map [string ][]TestFailure , testRunCounts map [string ]int ) {
249+ suitePrefix := make (map [string ]bool , len (testRunCounts ))
250+ for name := range testRunCounts {
251+ if idx := strings .IndexByte (name , '/' ); idx >= 0 {
252+ suitePrefix [name [:idx ]] = true
253+ }
254+ }
255+ for testName := range grouped {
256+ if ! strings .Contains (testName , "/" ) && suitePrefix [testName ] {
257+ delete (grouped , testName )
258+ }
259+ }
260+ }
261+
240262// isFinalRetry returns true if the test name has the "(final)" suffix,
241263// indicating the test runner exhausted all retries.
242264func isFinalRetry (testName string ) bool {
@@ -341,23 +363,35 @@ func identifyCIBreakers(failures []TestFailure) (map[string][]TestFailure, map[s
341363 return ciBreakers , ciBreakCount
342364}
343365
366+ // suiteRunKey returns a string that uniquely identifies a single (CI run × DB config) pair.
367+ // Each workflow run may spawn multiple matrix jobs sharing the same RunID but with distinct
368+ // MatrixNames (DB configs). Keying by (RunID, MatrixName) ensures shards belonging to the
369+ // same run+config are counted once, regardless of how many shard JobIDs they produce.
370+ func suiteRunKey (runID int64 , matrixName string ) string {
371+ return fmt .Sprintf ("%d:%s" , runID , matrixName )
372+ }
373+
344374// generateSuiteReports creates per-suite flake breakdown from all failures and test runs.
345- // Suite flake rate = % of workflow runs where the suite had at least one non-retry failure.
375+ // Suite flake rate = % of (CI run × DB config) pairs where the suite had at least one
376+ // non-retry failure.
346377func generateSuiteReports (allFailures []TestFailure , allTestRuns []TestRun ) []SuiteReport {
347- // Track unique workflow runs per suite (denominator)
348- suiteRuns := make (map [string ]map [int64 ]bool )
378+ // Track unique (CI run × DB config) pairs per suite (denominator).
379+ // Using MatrixName avoids the inflation caused by per-shard JobIDs: suites whose
380+ // test methods are spread across N shards would otherwise be counted N times per
381+ // (run × DB config).
382+ suiteRuns := make (map [string ]map [string ]bool )
349383 for _ , run := range allTestRuns {
350384 if run .Skipped || ! isGoTestSuite (run .SuiteName ) {
351385 continue
352386 }
353387 if suiteRuns [run .SuiteName ] == nil {
354- suiteRuns [run .SuiteName ] = make (map [int64 ]bool )
388+ suiteRuns [run .SuiteName ] = make (map [string ]bool )
355389 }
356- suiteRuns [run.SuiteName ][run.RunID ] = true
390+ suiteRuns [run .SuiteName ][suiteRunKey ( run .RunID , run . MatrixName ) ] = true
357391 }
358392
359- // Track workflow runs with non-retry failures per suite (numerator)
360- suiteFailedRuns := make (map [string ]map [int64 ]bool )
393+ // Track (CI run × DB config) pairs with non-retry failures per suite (numerator)
394+ suiteFailedRuns := make (map [string ]map [string ]bool )
361395 suiteLastFailure := make (map [string ]time.Time )
362396 for _ , failure := range allFailures {
363397 if ! isGoTestSuite (failure .SuiteName ) {
@@ -368,9 +402,9 @@ func generateSuiteReports(allFailures []TestFailure, allTestRuns []TestRun) []Su
368402 continue
369403 }
370404 if suiteFailedRuns [failure .SuiteName ] == nil {
371- suiteFailedRuns [failure .SuiteName ] = make (map [int64 ]bool )
405+ suiteFailedRuns [failure .SuiteName ] = make (map [string ]bool )
372406 }
373- suiteFailedRuns [failure.SuiteName ][failure.RunID ] = true
407+ suiteFailedRuns [failure .SuiteName ][suiteRunKey ( failure .RunID , failure . MatrixName ) ] = true
374408 if failure .Timestamp .After (suiteLastFailure [failure .SuiteName ]) {
375409 suiteLastFailure [failure .SuiteName ] = failure .Timestamp
376410 }
0 commit comments