openshift · stbenjam · Jul 30, 2025
diff --git a/pkg/test/ginkgo/cmd_runsuite.go b/pkg/test/ginkgo/cmd_runsuite.go
@@ -145,9 +145,13 @@ func max(a, b int) int {
 	return b
 }
 
+// permittedRetryImageTags defines which image tags are allowed for test retries
+// Don't add more here without discussion with OCP architects, we should be moving towards not having any flakes
+var permittedRetryImageTags = []string{"tests"} // tests = openshift-tests image
+
 // shouldRetryTest determines if a failed test should be retried based on retry policies.
 // It returns true if the test is eligible for retry, false otherwise.
-func shouldRetryTest(ctx context.Context, test *testCase, permittedRetryImageTags []string) bool {
+func shouldRetryTest(ctx context.Context, test *testCase) bool {
 	// Internal tests (no binary) are eligible for retry, we shouldn't really have any of these
 	// now that origin is also an extension.
 	if test.binary == nil {
@@ -178,6 +182,21 @@ func shouldRetryTest(ctx context.Context, test *testCase, permittedRetryImageTag
 	return false
 }
 
+// getRetriableTestCases takes a slice of test cases and returns copies of the retriable ones
+// according to image tag rules.
+func getRetriableTestCases(ctx context.Context, tests []*testCase) []*testCase {
+	var retries []*testCase
+
+	for _, test := range tests {
+		if shouldRetryTest(ctx, test) {
+			retry := test.Retry()
+			retries = append(retries, retry)
+		}
+	}
+
+	return retries
+}
+
 func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdiscovery.ClusterConfiguration, junitSuiteName string, monitorTestInfo monitortestframework.MonitorTestInitializationInfo,
 	upgrade bool) error {
 	ctx := context.Background()
@@ -506,72 +525,31 @@ func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdisc
 	}
 
 	// calculate the effective test set we ran, excluding any incompletes
-	tests, _ = splitTests(tests, func(t *testCase) bool { return t.success || t.flake || t.failed || t.skipped })
+	tests, _ = splitTests(tests, func(t *testCase) bool { return t.success || t.failed || t.skipped })
 
 	end := time.Now()
 	duration := end.Sub(start).Round(time.Second / 10)
 	if duration > time.Minute {
 		duration = duration.Round(time.Second)
 	}
 
-	pass, fail, skip, failing := summarizeTests(tests)
-
-	// Determine if we should retry any tests for flake detection
-	// Don't add more here without discussion with OCP architects, we should be moving towards not having any flakes
-	permittedRetryImageTags := []string{"tests"} // tests = openshift-tests image
-	if fail > 0 && fail <= suite.MaximumAllowedFlakes {
-		var retries []*testCase
-
-		failedUnretriableTestCount := 0
-		for _, test := range failing {
-			if shouldRetryTest(ctx, test, permittedRetryImageTags) {
-				retry := test.Retry()
-				retries = append(retries, retry)
-				if len(retries) > suite.MaximumAllowedFlakes {
-					break
-				}
-			} else if test.binary != nil {
-				// Do not retry extension tests -- we also want to remove retries from origin-sourced
-				// tests, but extensions is where we can start.
-				failedUnretriableTestCount++
-			}
-		}
-
-		logrus.Warningf("%d tests failed, %d tests permitted to be retried; %d failures are terminal non-retryable failures", len(failing), len(retries), failedUnretriableTestCount)
+	retries := getRetriableTestCases(ctx, tests)
+	if len(retries) <= suite.MaximumAllowedFlakes {
+		logrus.Warningf("%d failed tests to be retried", len(retries))
 
 		// Run the tests in the retries list.
 		q := newParallelTestQueue(testRunnerContext)
 		q.Execute(testCtx, retries, parallelism, testOutputConfig, abortFn)
 
-		var flaky, skipped []string
-		var repeatFailures []*testCase
-		for _, test := range retries {
-			if test.success {
-				flaky = append(flaky, test.name)
-			} else if test.skipped {
-				skipped = append(skipped, test.name)
-			} else {
-				repeatFailures = append(repeatFailures, test)
-			}
-		}
-
-		// Add the list of retries into the list of all tests.
+		// If a retry test got skipped, it means we very likely failed a precondition in the first failure, so
+		// we need to remove the failure case
+		var skipped []string
 		for _, retry := range retries {
-			if retry.flake {
-				// Retry tests that flaked are omitted so that the original test is counted as a failure.
-				fmt.Fprintf(o.Out, "Ignoring retry that returned a flake, original failure is authoritative for test: %s\n", retry.name)
-				continue
+			if retry.skipped {
+				skipped = append(skipped, retry.name)
 			}
-			tests = append(tests, retry)
-		}
-		if len(flaky) > 0 {
-			failing = repeatFailures
-			sort.Strings(flaky)
-			fmt.Fprintf(o.Out, "Flaky tests:\n\n%s\n\n", strings.Join(flaky, "\n"))
 		}
 		if len(skipped) > 0 {
-			// If a retry test got skipped, it means we very likely failed a precondition in the first failure, so
-			// we need to remove the failure case.
 			var withoutPreconditionFailures []*testCase
 		testLoop:
 			for _, t := range tests {
@@ -583,13 +561,19 @@ func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdisc
 				}
 			}
 			tests = withoutPreconditionFailures
-			failing = repeatFailures
 			sort.Strings(skipped)
 			fmt.Fprintf(o.Out, "Skipped tests that failed a precondition:\n\n%s\n\n", strings.Join(skipped, "\n"))
-
 		}
+
+		// Append our retry results to the final set
+		tests = append(tests, retries...)
 	}
 
+	// Calculate test results after retries
+	result := summarizeTests(tests)
+	pass, fail, skip, flake := len(result.passed), len(result.failed), len(result.skipped), len(result.flaked)
+	logrus.Infof("%d pass, %d fail, %d skip, %d flake", pass, fail, skip, flake)
+
 	// monitor the cluster while the tests are running and report any detected anomalies
 	var syntheticTestResults []*junitapi.JUnitTestCase
 	var syntheticFailure bool
@@ -660,10 +644,11 @@ func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdisc
 		wasMasterNodeUpdated = clusterinfo.WasMasterNodeUpdated(events)
 	}
 
-	// report the outcome of the test
-	if len(failing) > 0 {
-		names := sets.NewString(testNames(failing)...).List()
-		fmt.Fprintf(o.Out, "Failing tests:\n\n%s\n\n", strings.Join(names, "\n"))
+	if len(result.flaked) > 0 {
+		logrus.Infof("Flaky tests: %v", strings.Join(testNames(result.flaked), "\n"))
+	}
+	if len(result.failed) > 0 {
+		logrus.Infof("Failing tests: %v", strings.Join(testNames(result.failed), "\n"))
 	}
 
 	if len(o.JUnitDir) > 0 {
@@ -681,11 +666,18 @@ func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdisc
 		}
 	}
 
-	if fail > 0 {
-		if len(failing) > 0 || suite.MaximumAllowedFlakes == 0 {
-			return fmt.Errorf("%d fail, %d pass, %d skip (%s)", fail, pass, skip, duration)
-		}
-		fmt.Fprintf(o.Out, "%d flakes detected, suite allows passing with only flakes\n\n", fail)
+	// Check for hard failures first - these always cause suite failure
+	if fail > 0 && result.allFailuresBlocking() {
+		logrus.Errorf("Suite failing due to %d hard failures", fail)
+		return fmt.Errorf("%d fail, %d pass, %d skip, %d flake (%s)", fail, pass, skip, flake, duration)
+	} else if fail > 0 {
+		logrus.Warningf("All %d test failures are not blocking", fail)
+	}
+
+	// Check for flakes - these only cause failure if MaximumAllowedFlakes is 0
+	if flake > 0 && suite.MaximumAllowedFlakes == 0 {
+		logrus.Errorf("Suite failing because MaximumAllowedFlakes is 0 but %d tests flaked", flake)
+		return fmt.Errorf("%d flake, %d pass, %d skip (%s)", flake, pass, skip, duration)
 	}
 
 	if syntheticFailure {
@@ -695,7 +687,11 @@ func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdisc
 		return fmt.Errorf("failed due to a MonitorTest failure")
 	}
 
-	fmt.Fprintf(o.Out, "%d pass, %d skip (%s)\n", pass, skip, duration)
+	if flake > 0 {
+		fmt.Fprintf(o.Out, "%d pass, %d skip, %d flake (%s)\n", pass, skip, flake, duration)
+	} else {
+		fmt.Fprintf(o.Out, "%d pass, %d skip (%s)\n", pass, skip, duration)
+	}
 	return ctx.Err()
 }
 

diff --git a/pkg/test/ginkgo/junit.go b/pkg/test/ginkgo/junit.go
@@ -31,6 +31,7 @@ func generateJUnitTestSuiteResults(
 			},
 		},
 	}
+
 	for _, test := range tests {
 		switch {
 		case test.skipped:
@@ -55,25 +56,8 @@ func generateJUnitTestSuiteResults(
 					Output: lastLinesUntil(string(test.testOutputBytes), 100, "fail ["),
 				},
 			})
-		case test.flake:
-			s.NumTests++
-			s.NumFailed++
-			s.TestCases = append(s.TestCases, &junitapi.JUnitTestCase{
-				Name:      test.name,
-				SystemOut: string(test.testOutputBytes),
-				Duration:  test.duration.Seconds(),
-				FailureOutput: &junitapi.FailureOutput{
-					Output: lastLinesUntil(string(test.testOutputBytes), 100, "flake:"),
-				},
-			})
-
-			// also add the successful junit result:
-			s.NumTests++
-			s.TestCases = append(s.TestCases, &junitapi.JUnitTestCase{
-				Name:     test.name,
-				Duration: test.duration.Seconds(),
-			})
 		case test.success:
+			// Only add success entries for non-flaky tests (flaky tests already have success entry added above)
 			s.NumTests++
 			s.TestCases = append(s.TestCases, &junitapi.JUnitTestCase{
 				Name:     test.name,

diff --git a/pkg/test/ginkgo/status.go b/pkg/test/ginkgo/status.go
@@ -43,21 +43,54 @@ func (s *testSuiteProgress) TestEnded(testName string, testRunResult *testRunRes
 	}
 }
 
-func summarizeTests(tests []*testCase) (int, int, int, []*testCase) {
-	var pass, fail, skip int
-	var failingTests []*testCase
+func summarizeTests(tests []*testCase) *testSuiteResult {
+	result := &testSuiteResult{}
+
+	// Track results by test name to detect flakes
+	testResults := make(map[string]struct {
+		hasSuccess bool
+		hasFailure bool
+		testCases  []*testCase
+	})
+
+	// Collect all results by test name
 	for _, t := range tests {
+		testResult := testResults[t.name]
+		testResult.testCases = append(testResult.testCases, t)
+
 		switch {
 		case t.success:
-			pass++
+			testResult.hasSuccess = true
 		case t.failed:
-			fail++
-			failingTests = append(failingTests, t)
+			testResult.hasFailure = true
 		case t.skipped:
-			skip++
+			result.skipped = append(result.skipped, t)
+		}
+
+		testResults[t.name] = testResult
+	}
+
+	// Categorize tests based on their overall result
+	for _, testResult := range testResults {
+		if testResult.hasFailure && testResult.hasSuccess {
+			// Test has both successes and failures - it's a flake
+			result.flaked = append(result.flaked, testResult.testCases)
+		} else if testResult.hasFailure {
+			// Test only has failures - it's a hard failure
+			// Add all failed test cases to maintain the list for detailed reporting
+			result.failed = append(result.failed, testResult.testCases)
+		} else if testResult.hasSuccess {
+			// Test only has successes - it's a pass
+			// Add all successful test cases (there should only be 1)
+			for _, tc := range testResult.testCases {
+				if tc.success {
+					result.passed = append(result.passed, tc)
+				}
+			}
 		}
 	}
-	return pass, fail, skip, failingTests
+
+	return result
 }
 
 func sortedTests(tests []*testCase) []*testCase {

diff --git a/pkg/test/ginkgo/summarize_test.go b/pkg/test/ginkgo/summarize_test.go
@@ -0,0 +1,101 @@
+package ginkgo
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestSummarizeTests(t *testing.T) {
+	tests := []struct {
+		name           string
+		input          []*testCase
+		wantPass       int
+		wantFail       int
+		wantSkip       int
+		wantFlake      int
+		wantFlakeNames []string
+		failedAttempts int
+	}{
+		{
+			name: "success",
+			input: []*testCase{
+				{name: "test1", success: true},
+				{name: "test2", success: true},
+			},
+			wantPass:  2,
+			wantFail:  0,
+			wantSkip:  0,
+			wantFlake: 0,
+		},
+		{
+			name: "failure",
+			input: []*testCase{
+				{name: "test1", failed: true},
+			},
+			wantPass:       0,
+			wantFail:       1,
+			failedAttempts: 1,
+			wantSkip:       0,
+			wantFlake:      0,
+		},
+		{
+			name: "flake",
+			input: []*testCase{
+				{name: "test1", failed: true},
+				{name: "test1", success: true},
+			},
+			wantPass:       0,
+			wantFail:       0,
+			wantSkip:       0,
+			wantFlake:      1,
+			wantFlakeNames: []string{"test1"},
+		},
+		{
+			name: "retried failure",
+			input: []*testCase{
+				{name: "test1", failed: true},
+				{name: "test1", failed: true},
+			},
+			wantPass:       0,
+			wantFail:       2, // Both failed test cases should be counted
+			wantSkip:       0,
+			wantFlake:      0,
+			failedAttempts: 2,
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			result := summarizeTests(tc.input)
+
+			assert.Equal(t, tc.wantPass, len(result.passed), "pass count")
+			assert.Equal(t, tc.wantFail, len(result.failed), "fail count")
+			assert.Equal(t, tc.wantSkip, len(result.skipped), "skip count")
+			assert.Equal(t, tc.wantFlake, len(result.flaked), "flake count")
+			assert.Equal(t, tc.failedAttempts, len(result.failed), "failing len")
+
+			// Extract flake names from the flaked groups
+			var flakeNames []string
+			for _, flakedGroup := range result.flaked {
+				if len(flakedGroup) > 0 {
+					flakeNames = append(flakeNames, flakedGroup[0].name)
+				}
+			}
+			assert.Equal(t, len(tc.wantFlakeNames), len(flakeNames), "flake name count")
+
+			for _, expectedName := range tc.wantFlakeNames {
+				found := false
+				for _, actualName := range flakeNames {
+					if actualName == expectedName {
+						found = true
+						break
+					}
+				}
+				if !found {
+					t.Errorf("Expected flake test: %s", expectedName)
+				}
+			}
+		})
+	}
+}