Skip to content

Commit b499b6e

Browse files
committed
Fix flakereport
1 parent 88ea88c commit b499b6e

File tree

8 files changed

+230
-92
lines changed

8 files changed

+230
-92
lines changed

.github/workflows/run-tests.yml

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -354,12 +354,19 @@ jobs:
354354
flags: unit-test
355355
report_type: test_results
356356

357+
- name: Get job ID
358+
id: get_job_id
359+
uses: ./.github/actions/get-job-id
360+
with:
361+
job_name: Unit test
362+
run_id: ${{ github.run_id }}
363+
357364
- name: Upload test results to GitHub
358365
# Can't pin to major because the action linter doesn't recognize the include-hidden-files flag.
359366
uses: actions/upload-artifact@v4.4.3
360367
if: ${{ !cancelled() }}
361368
with:
362-
name: junit-xml--${{github.run_id}}--${{github.run_attempt}}--unit-test
369+
name: junit-xml--${{ github.run_id }}--${{ steps.get_job_id.outputs.job_id }}--${{ github.run_attempt }}--unit-test
363370
path: ./.testoutput/junit.*.xml
364371
include-hidden-files: true
365372
retention-days: 28
@@ -447,12 +454,19 @@ jobs:
447454
flags: integration-test
448455
report_type: test_results
449456

457+
- name: Get job ID
458+
id: get_job_id
459+
uses: ./.github/actions/get-job-id
460+
with:
461+
job_name: Integration test
462+
run_id: ${{ github.run_id }}
463+
450464
- name: Upload test results to GitHub
451465
# Can't pin to major because the action linter doesn't recognize the include-hidden-files flag.
452466
uses: actions/upload-artifact@v4.4.3
453467
if: ${{ !cancelled() }}
454468
with:
455-
name: junit-xml--${{github.run_id}}--${{github.run_attempt}}--integration-test
469+
name: junit-xml--${{ github.run_id }}--${{ steps.get_job_id.outputs.job_id }}--${{ github.run_attempt }}--integration-test
456470
path: ./.testoutput/junit.*.xml
457471
include-hidden-files: true
458472
retention-days: 28

tools/flakereport/flakereport.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,8 +259,9 @@ func runGenerateCommand(c *cli.Context) (err error) {
259259
// Count test runs by name for failure rate calculation
260260
testRunCounts := countTestRuns(allTestRuns)
261261

262-
// Group failures by test name
262+
// Group failures by test name, then remove parent entries whose subtests were observed.
263263
grouped := groupFailuresByTest(allFailures)
264+
filterParentTests(grouped, testRunCounts)
264265
fmt.Printf("Unique tests with failures: %d\n", len(grouped))
265266

266267
// Classify failures

tools/flakereport/github.go

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -191,20 +191,31 @@ func extractArtifactZip(zipPath, outputDir string) ([]string, error) {
191191
return xmlFiles, nil
192192
}
193193

194-
// parseArtifactName extracts run_id and job_id from artifact name
195-
// Format: {prefix}--{run_id}--{job_id}--{suffix}
196-
// Returns: runID, jobID (or "unknown" if not parseable)
197-
func parseArtifactName(artifactName string) (runID string, jobID string) {
194+
// parseArtifactName extracts run_id, job_id, and matrix_name from artifact name.
195+
// Functional tests: junit-xml--{run_id}--{job_id}--{run_attempt}--{matrix_name}--{display_name}--functional-test
196+
// Unit/integration: junit-xml--{run_id}--{job_id}--{run_attempt}--unit-test
197+
// Returns: runID, jobID, matrixName ("unknown" for fields that are absent or unparseable)
198+
func parseArtifactName(artifactName string) (runID string, jobID string, matrixName string) {
198199
parts := strings.Split(artifactName, "--")
199-
if len(parts) >= 3 {
200-
runID = parts[1]
201-
jobID = parts[2]
202-
if jobID == "" {
203-
jobID = "unknown"
204-
}
205-
return runID, jobID
200+
if len(parts) < 3 {
201+
return "unknown", "unknown", "unknown"
202+
}
203+
204+
runID = parts[1]
205+
206+
jobID = parts[2]
207+
if jobID == "" {
208+
jobID = "unknown"
206209
}
207-
return "unknown", "unknown"
210+
211+
// Functional test artifacts carry a matrix name (DB config) at parts[4].
212+
if len(parts) >= 7 && parts[len(parts)-1] == "functional-test" {
213+
matrixName = parts[4]
214+
} else {
215+
matrixName = "unknown"
216+
}
217+
218+
return runID, jobID, matrixName
208219
}
209220

210221
// buildGitHubURL constructs GitHub Actions URL from run/job IDs

tools/flakereport/parallel.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,8 @@ func processArtifactJob(ctx context.Context, job ArtifactJob, totalArtifacts int
135135
result.Failures = append(result.Failures, failures...)
136136

137137
// Extract all test runs for failure rate calculation
138-
testRuns := extractAllTestRuns(suites, job.RunID)
138+
_, jobID, matrixName := parseArtifactName(job.Artifact.Name)
139+
testRuns := extractAllTestRuns(suites, job.RunID, jobID, matrixName)
139140
result.AllRuns = append(result.AllRuns, testRuns...)
140141
}
141142

tools/flakereport/parser.go

Lines changed: 51 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,8 @@ func isGoTestSuite(name string) bool {
6868
func extractFailures(suites *junit.Testsuites, artifactName string, runID int64, timestamp time.Time) []TestFailure {
6969
var failures []TestFailure
7070

71-
// Parse artifact name for run_id and job_id
72-
_, jobID := parseArtifactName(artifactName)
71+
// Parse artifact name for run_id, job_id, and matrix_name
72+
_, jobID, matrixName := parseArtifactName(artifactName)
7373

7474
for _, suite := range suites.Suites {
7575
for _, testcase := range suite.Testcases {
@@ -82,6 +82,7 @@ func extractFailures(suites *junit.Testsuites, artifactName string, runID int64,
8282
ArtifactID: artifactName,
8383
RunID: runID,
8484
JobID: jobID,
85+
MatrixName: matrixName,
8586
Timestamp: timestamp,
8687
}
8788
failures = append(failures, failure)
@@ -94,17 +95,19 @@ func extractFailures(suites *junit.Testsuites, artifactName string, runID int64,
9495

9596
// extractAllTestRuns extracts all test runs (including successes) from parsed JUnit data
9697
// Used for calculating failure rates
97-
func extractAllTestRuns(suites *junit.Testsuites, runID int64) []TestRun {
98+
func extractAllTestRuns(suites *junit.Testsuites, runID int64, jobID, matrixName string) []TestRun {
9899
var runs []TestRun
99100

100101
for _, suite := range suites.Suites {
101102
for _, testcase := range suite.Testcases {
102103
run := TestRun{
103-
SuiteName: topLevelTestName(testcase.Name),
104-
Name: testcase.Name,
105-
Failed: testcase.Failure != nil,
106-
Skipped: testcase.Skipped != nil,
107-
RunID: runID,
104+
SuiteName: topLevelTestName(testcase.Name),
105+
Name: testcase.Name,
106+
Failed: testcase.Failure != nil,
107+
Skipped: testcase.Skipped != nil,
108+
RunID: runID,
109+
JobID: jobID,
110+
MatrixName: matrixName,
108111
}
109112
runs = append(runs, run)
110113
}
@@ -237,6 +240,25 @@ func convertToReports(grouped map[string][]TestFailure, testRunCounts map[string
237240
return reports
238241
}
239242

243+
// filterParentTests removes top-level test names from grouped when subtests of
244+
// that parent were observed in testRunCounts. A top-level failure whose subtests
245+
// ran in other CI jobs is already captured (with a correct denominator) in the
246+
// Flaky Suites section, so including it in the per-test table produces a
247+
// misleading 1/1 entry.
248+
func filterParentTests(grouped map[string][]TestFailure, testRunCounts map[string]int) {
249+
suitePrefix := make(map[string]bool, len(testRunCounts))
250+
for name := range testRunCounts {
251+
if idx := strings.IndexByte(name, '/'); idx >= 0 {
252+
suitePrefix[name[:idx]] = true
253+
}
254+
}
255+
for testName := range grouped {
256+
if !strings.Contains(testName, "/") && suitePrefix[testName] {
257+
delete(grouped, testName)
258+
}
259+
}
260+
}
261+
240262
// isFinalRetry returns true if the test name has the "(final)" suffix,
241263
// indicating the test runner exhausted all retries.
242264
func isFinalRetry(testName string) bool {
@@ -341,23 +363,35 @@ func identifyCIBreakers(failures []TestFailure) (map[string][]TestFailure, map[s
341363
return ciBreakers, ciBreakCount
342364
}
343365

366+
// suiteRunKey returns a string that uniquely identifies a single (CI run × DB config) pair.
367+
// Each workflow run may spawn multiple matrix jobs sharing the same RunID but with distinct
368+
// MatrixNames (DB configs). Keying by (RunID, MatrixName) ensures shards belonging to the
369+
// same run+config are counted once, regardless of how many shard JobIDs they produce.
370+
func suiteRunKey(runID int64, matrixName string) string {
371+
return fmt.Sprintf("%d:%s", runID, matrixName)
372+
}
373+
344374
// generateSuiteReports creates per-suite flake breakdown from all failures and test runs.
345-
// Suite flake rate = % of workflow runs where the suite had at least one non-retry failure.
375+
// Suite flake rate = % of (CI run × DB config) pairs where the suite had at least one
376+
// non-retry failure.
346377
func generateSuiteReports(allFailures []TestFailure, allTestRuns []TestRun) []SuiteReport {
347-
// Track unique workflow runs per suite (denominator)
348-
suiteRuns := make(map[string]map[int64]bool)
378+
// Track unique (CI run × DB config) pairs per suite (denominator).
379+
// Using MatrixName avoids the inflation caused by per-shard JobIDs: suites whose
380+
// test methods are spread across N shards would otherwise be counted N times per
381+
// (run × DB config).
382+
suiteRuns := make(map[string]map[string]bool)
349383
for _, run := range allTestRuns {
350384
if run.Skipped || !isGoTestSuite(run.SuiteName) {
351385
continue
352386
}
353387
if suiteRuns[run.SuiteName] == nil {
354-
suiteRuns[run.SuiteName] = make(map[int64]bool)
388+
suiteRuns[run.SuiteName] = make(map[string]bool)
355389
}
356-
suiteRuns[run.SuiteName][run.RunID] = true
390+
suiteRuns[run.SuiteName][suiteRunKey(run.RunID, run.MatrixName)] = true
357391
}
358392

359-
// Track workflow runs with non-retry failures per suite (numerator)
360-
suiteFailedRuns := make(map[string]map[int64]bool)
393+
// Track (CI run × DB config) pairs with non-retry failures per suite (numerator)
394+
suiteFailedRuns := make(map[string]map[string]bool)
361395
suiteLastFailure := make(map[string]time.Time)
362396
for _, failure := range allFailures {
363397
if !isGoTestSuite(failure.SuiteName) {
@@ -368,9 +402,9 @@ func generateSuiteReports(allFailures []TestFailure, allTestRuns []TestRun) []Su
368402
continue
369403
}
370404
if suiteFailedRuns[failure.SuiteName] == nil {
371-
suiteFailedRuns[failure.SuiteName] = make(map[int64]bool)
405+
suiteFailedRuns[failure.SuiteName] = make(map[string]bool)
372406
}
373-
suiteFailedRuns[failure.SuiteName][failure.RunID] = true
407+
suiteFailedRuns[failure.SuiteName][suiteRunKey(failure.RunID, failure.MatrixName)] = true
374408
if failure.Timestamp.After(suiteLastFailure[failure.SuiteName]) {
375409
suiteLastFailure[failure.SuiteName] = failure.Timestamp
376410
}

0 commit comments

Comments
 (0)