diff --git a/pkg/test/ginkgo/cmd_runsuite.go b/pkg/test/ginkgo/cmd_runsuite.go index 0f8413d92d11..e1f392a202cb 100644 --- a/pkg/test/ginkgo/cmd_runsuite.go +++ b/pkg/test/ginkgo/cmd_runsuite.go @@ -145,9 +145,13 @@ func max(a, b int) int { return b } +// permittedRetryImageTags defines which image tags are allowed for test retries +// Don't add more here without discussion with OCP architects, we should be moving towards not having any flakes +var permittedRetryImageTags = []string{"tests"} // tests = openshift-tests image + // shouldRetryTest determines if a failed test should be retried based on retry policies. // It returns true if the test is eligible for retry, false otherwise. -func shouldRetryTest(ctx context.Context, test *testCase, permittedRetryImageTags []string) bool { +func shouldRetryTest(ctx context.Context, test *testCase) bool { // Internal tests (no binary) are eligible for retry, we shouldn't really have any of these // now that origin is also an extension. if test.binary == nil { @@ -178,6 +182,21 @@ func shouldRetryTest(ctx context.Context, test *testCase, permittedRetryImageTag return false } +// getRetriableTestCases takes a slice of test cases and returns copies of the retriable ones +// according to image tag rules. +func getRetriableTestCases(ctx context.Context, tests []*testCase) []*testCase { + var retries []*testCase + + for _, test := range tests { + if shouldRetryTest(ctx, test) { + retry := test.Retry() + retries = append(retries, retry) + } + } + + return retries +} + func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdiscovery.ClusterConfiguration, junitSuiteName string, monitorTestInfo monitortestframework.MonitorTestInitializationInfo, upgrade bool) error { ctx := context.Background() @@ -506,7 +525,7 @@ func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdisc } // calculate the effective test set we ran, excluding any incompletes - tests, _ = splitTests(tests, func(t *testCase) bool { return t.success || t.flake || t.failed || t.skipped }) + tests, _ = splitTests(tests, func(t *testCase) bool { return t.success || t.failed || t.skipped }) end := time.Now() duration := end.Sub(start).Round(time.Second / 10) @@ -514,64 +533,23 @@ func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdisc duration = duration.Round(time.Second) } - pass, fail, skip, failing := summarizeTests(tests) - - // Determine if we should retry any tests for flake detection - // Don't add more here without discussion with OCP architects, we should be moving towards not having any flakes - permittedRetryImageTags := []string{"tests"} // tests = openshift-tests image - if fail > 0 && fail <= suite.MaximumAllowedFlakes { - var retries []*testCase - - failedUnretriableTestCount := 0 - for _, test := range failing { - if shouldRetryTest(ctx, test, permittedRetryImageTags) { - retry := test.Retry() - retries = append(retries, retry) - if len(retries) > suite.MaximumAllowedFlakes { - break - } - } else if test.binary != nil { - // Do not retry extension tests -- we also want to remove retries from origin-sourced - // tests, but extensions is where we can start. - failedUnretriableTestCount++ - } - } - - logrus.Warningf("%d tests failed, %d tests permitted to be retried; %d failures are terminal non-retryable failures", len(failing), len(retries), failedUnretriableTestCount) + retries := getRetriableTestCases(ctx, tests) + if len(retries) <= suite.MaximumAllowedFlakes { + logrus.Warningf("%d failed tests to be retried", len(retries)) // Run the tests in the retries list. q := newParallelTestQueue(testRunnerContext) q.Execute(testCtx, retries, parallelism, testOutputConfig, abortFn) - var flaky, skipped []string - var repeatFailures []*testCase - for _, test := range retries { - if test.success { - flaky = append(flaky, test.name) - } else if test.skipped { - skipped = append(skipped, test.name) - } else { - repeatFailures = append(repeatFailures, test) - } - } - - // Add the list of retries into the list of all tests. + // If a retry test got skipped, it means we very likely failed a precondition in the first failure, so + // we need to remove the failure case + var skipped []string for _, retry := range retries { - if retry.flake { - // Retry tests that flaked are omitted so that the original test is counted as a failure. - fmt.Fprintf(o.Out, "Ignoring retry that returned a flake, original failure is authoritative for test: %s\n", retry.name) - continue + if retry.skipped { + skipped = append(skipped, retry.name) } - tests = append(tests, retry) - } - if len(flaky) > 0 { - failing = repeatFailures - sort.Strings(flaky) - fmt.Fprintf(o.Out, "Flaky tests:\n\n%s\n\n", strings.Join(flaky, "\n")) } if len(skipped) > 0 { - // If a retry test got skipped, it means we very likely failed a precondition in the first failure, so - // we need to remove the failure case. var withoutPreconditionFailures []*testCase testLoop: for _, t := range tests { @@ -583,13 +561,19 @@ func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdisc } } tests = withoutPreconditionFailures - failing = repeatFailures sort.Strings(skipped) fmt.Fprintf(o.Out, "Skipped tests that failed a precondition:\n\n%s\n\n", strings.Join(skipped, "\n")) - } + + // Append our retry results to the final set + tests = append(tests, retries...) } + // Calculate test results after retries + result := summarizeTests(tests) + pass, fail, skip, flake := len(result.passed), len(result.failed), len(result.skipped), len(result.flaked) + logrus.Infof("%d pass, %d fail, %d skip, %d flake", pass, fail, skip, flake) + // monitor the cluster while the tests are running and report any detected anomalies var syntheticTestResults []*junitapi.JUnitTestCase var syntheticFailure bool @@ -660,10 +644,11 @@ func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdisc wasMasterNodeUpdated = clusterinfo.WasMasterNodeUpdated(events) } - // report the outcome of the test - if len(failing) > 0 { - names := sets.NewString(testNames(failing)...).List() - fmt.Fprintf(o.Out, "Failing tests:\n\n%s\n\n", strings.Join(names, "\n")) + if len(result.flaked) > 0 { + logrus.Infof("Flaky tests: %v", strings.Join(testNames(result.flaked), "\n")) + } + if len(result.failed) > 0 { + logrus.Infof("Failing tests: %v", strings.Join(testNames(result.failed), "\n")) } if len(o.JUnitDir) > 0 { @@ -681,11 +666,18 @@ func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdisc } } - if fail > 0 { - if len(failing) > 0 || suite.MaximumAllowedFlakes == 0 { - return fmt.Errorf("%d fail, %d pass, %d skip (%s)", fail, pass, skip, duration) - } - fmt.Fprintf(o.Out, "%d flakes detected, suite allows passing with only flakes\n\n", fail) + // Check for hard failures first - these always cause suite failure + if fail > 0 && result.allFailuresBlocking() { + logrus.Errorf("Suite failing due to %d hard failures", fail) + return fmt.Errorf("%d fail, %d pass, %d skip, %d flake (%s)", fail, pass, skip, flake, duration) + } else if fail > 0 { + logrus.Warningf("All %d test failures are not blocking", fail) + } + + // Check for flakes - these only cause failure if MaximumAllowedFlakes is 0 + if flake > 0 && suite.MaximumAllowedFlakes == 0 { + logrus.Errorf("Suite failing because MaximumAllowedFlakes is 0 but %d tests flaked", flake) + return fmt.Errorf("%d flake, %d pass, %d skip (%s)", flake, pass, skip, duration) } if syntheticFailure { @@ -695,7 +687,11 @@ func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdisc return fmt.Errorf("failed due to a MonitorTest failure") } - fmt.Fprintf(o.Out, "%d pass, %d skip (%s)\n", pass, skip, duration) + if flake > 0 { + fmt.Fprintf(o.Out, "%d pass, %d skip, %d flake (%s)\n", pass, skip, flake, duration) + } else { + fmt.Fprintf(o.Out, "%d pass, %d skip (%s)\n", pass, skip, duration) + } return ctx.Err() } diff --git a/pkg/test/ginkgo/junit.go b/pkg/test/ginkgo/junit.go index e726b6a5b93c..41ea7d9099ee 100644 --- a/pkg/test/ginkgo/junit.go +++ b/pkg/test/ginkgo/junit.go @@ -31,6 +31,7 @@ func generateJUnitTestSuiteResults( }, }, } + for _, test := range tests { switch { case test.skipped: @@ -55,25 +56,8 @@ func generateJUnitTestSuiteResults( Output: lastLinesUntil(string(test.testOutputBytes), 100, "fail ["), }, }) - case test.flake: - s.NumTests++ - s.NumFailed++ - s.TestCases = append(s.TestCases, &junitapi.JUnitTestCase{ - Name: test.name, - SystemOut: string(test.testOutputBytes), - Duration: test.duration.Seconds(), - FailureOutput: &junitapi.FailureOutput{ - Output: lastLinesUntil(string(test.testOutputBytes), 100, "flake:"), - }, - }) - - // also add the successful junit result: - s.NumTests++ - s.TestCases = append(s.TestCases, &junitapi.JUnitTestCase{ - Name: test.name, - Duration: test.duration.Seconds(), - }) case test.success: + // Only add success entries for non-flaky tests (flaky tests already have success entry added above) s.NumTests++ s.TestCases = append(s.TestCases, &junitapi.JUnitTestCase{ Name: test.name, diff --git a/pkg/test/ginkgo/status.go b/pkg/test/ginkgo/status.go index 41cc736894f5..e87ae412c0b5 100644 --- a/pkg/test/ginkgo/status.go +++ b/pkg/test/ginkgo/status.go @@ -43,21 +43,54 @@ func (s *testSuiteProgress) TestEnded(testName string, testRunResult *testRunRes } } -func summarizeTests(tests []*testCase) (int, int, int, []*testCase) { - var pass, fail, skip int - var failingTests []*testCase +func summarizeTests(tests []*testCase) *testSuiteResult { + result := &testSuiteResult{} + + // Track results by test name to detect flakes + testResults := make(map[string]struct { + hasSuccess bool + hasFailure bool + testCases []*testCase + }) + + // Collect all results by test name for _, t := range tests { + testResult := testResults[t.name] + testResult.testCases = append(testResult.testCases, t) + switch { case t.success: - pass++ + testResult.hasSuccess = true case t.failed: - fail++ - failingTests = append(failingTests, t) + testResult.hasFailure = true case t.skipped: - skip++ + result.skipped = append(result.skipped, t) + } + + testResults[t.name] = testResult + } + + // Categorize tests based on their overall result + for _, testResult := range testResults { + if testResult.hasFailure && testResult.hasSuccess { + // Test has both successes and failures - it's a flake + result.flaked = append(result.flaked, testResult.testCases) + } else if testResult.hasFailure { + // Test only has failures - it's a hard failure + // Add all failed test cases to maintain the list for detailed reporting + result.failed = append(result.failed, testResult.testCases) + } else if testResult.hasSuccess { + // Test only has successes - it's a pass + // Add all successful test cases (there should only be 1) + for _, tc := range testResult.testCases { + if tc.success { + result.passed = append(result.passed, tc) + } + } } } - return pass, fail, skip, failingTests + + return result } func sortedTests(tests []*testCase) []*testCase { diff --git a/pkg/test/ginkgo/summarize_test.go b/pkg/test/ginkgo/summarize_test.go new file mode 100644 index 000000000000..477db304f751 --- /dev/null +++ b/pkg/test/ginkgo/summarize_test.go @@ -0,0 +1,101 @@ +package ginkgo + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestSummarizeTests(t *testing.T) { + tests := []struct { + name string + input []*testCase + wantPass int + wantFail int + wantSkip int + wantFlake int + wantFlakeNames []string + failedAttempts int + }{ + { + name: "success", + input: []*testCase{ + {name: "test1", success: true}, + {name: "test2", success: true}, + }, + wantPass: 2, + wantFail: 0, + wantSkip: 0, + wantFlake: 0, + }, + { + name: "failure", + input: []*testCase{ + {name: "test1", failed: true}, + }, + wantPass: 0, + wantFail: 1, + failedAttempts: 1, + wantSkip: 0, + wantFlake: 0, + }, + { + name: "flake", + input: []*testCase{ + {name: "test1", failed: true}, + {name: "test1", success: true}, + }, + wantPass: 0, + wantFail: 0, + wantSkip: 0, + wantFlake: 1, + wantFlakeNames: []string{"test1"}, + }, + { + name: "retried failure", + input: []*testCase{ + {name: "test1", failed: true}, + {name: "test1", failed: true}, + }, + wantPass: 0, + wantFail: 2, // Both failed test cases should be counted + wantSkip: 0, + wantFlake: 0, + failedAttempts: 2, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + result := summarizeTests(tc.input) + + assert.Equal(t, tc.wantPass, len(result.passed), "pass count") + assert.Equal(t, tc.wantFail, len(result.failed), "fail count") + assert.Equal(t, tc.wantSkip, len(result.skipped), "skip count") + assert.Equal(t, tc.wantFlake, len(result.flaked), "flake count") + assert.Equal(t, tc.failedAttempts, len(result.failed), "failing len") + + // Extract flake names from the flaked groups + var flakeNames []string + for _, flakedGroup := range result.flaked { + if len(flakedGroup) > 0 { + flakeNames = append(flakeNames, flakedGroup[0].name) + } + } + assert.Equal(t, len(tc.wantFlakeNames), len(flakeNames), "flake name count") + + for _, expectedName := range tc.wantFlakeNames { + found := false + for _, actualName := range flakeNames { + if actualName == expectedName { + found = true + break + } + } + if !found { + t.Errorf("Expected flake test: %s", expectedName) + } + } + }) + } +} diff --git a/pkg/test/ginkgo/test_runner.go b/pkg/test/ginkgo/test_runner.go index 791cec0554bc..86427c5a7b78 100644 --- a/pkg/test/ginkgo/test_runner.go +++ b/pkg/test/ginkgo/test_runner.go @@ -78,38 +78,27 @@ func mutateTestCaseWithResults(test *testCase, testRunResult *testRunResultHandl test.extensionTestResult = testRunResult.extensionTestResult switch testRunResult.testState { - case TestFlaked: - test.flake = true - test.failed = false - test.skipped = false - test.success = false - test.timedOut = false case TestSucceeded: - test.flake = false test.failed = false test.skipped = false test.success = true test.timedOut = false case TestSkipped: - test.flake = false test.failed = false test.skipped = true test.success = false test.timedOut = false case TestFailed: - test.flake = false test.failed = true test.skipped = false test.success = false test.timedOut = false case TestFailedTimeout: - test.flake = false test.failed = true test.skipped = false test.success = false test.timedOut = true case TestUnknown: - test.flake = false test.failed = true test.skipped = false test.success = false @@ -161,7 +150,6 @@ const ( TestSucceeded TestState = "Success" TestFailed TestState = "Failed" TestFailedTimeout TestState = "TimedOut" - TestFlaked TestState = "Flaked" TestSkipped TestState = "Skipped" TestUnknown TestState = "Unknown" ) @@ -170,8 +158,6 @@ func isTestFailed(testState TestState) bool { switch testState { case TestSucceeded: return false - case TestFlaked: - return false case TestSkipped: return false } @@ -223,10 +209,6 @@ func recordTestResultInLogWithoutOverlap(testRunResult *testRunResultHandle, tes func recordTestResultInLog(testRunResult *testRunResultHandle, out io.Writer, includeSuccessfulOutput bool) { // output the status of the test switch testRunResult.testState { - case TestFlaked: - out.Write(testRunResult.testOutputBytes) - fmt.Fprintln(out) - fmt.Fprintf(out, "flaked: (%s) %s %q\n\n", testRunResult.duration(), testRunResult.end.UTC().Format("2006-01-02T15:04:05"), testRunResult.name) case TestSucceeded: if includeSuccessfulOutput { out.Write(testRunResult.testOutputBytes) @@ -262,9 +244,6 @@ func recordTestResultInMonitor(testRunResult *testRunResultHandle, monitorRecord msg := monitorapi.NewMessage().HumanMessage("e2e test finished") switch testRunResult.testState { - case TestFlaked: - eventLevel = monitorapi.Error - msg = msg.WithAnnotation(monitorapi.AnnotationStatus, "Flaked") case TestSucceeded: eventLevel = monitorapi.Info msg = msg.WithAnnotation(monitorapi.AnnotationStatus, "Passed") diff --git a/pkg/test/ginkgo/test_suite.go b/pkg/test/ginkgo/test_suite.go index 216680e9e813..bb84d802a14b 100644 --- a/pkg/test/ginkgo/test_suite.go +++ b/pkg/test/ginkgo/test_suite.go @@ -5,6 +5,7 @@ import ( "time" "github.com/onsi/ginkgo/v2/types" + "github.com/openshift-eng/openshift-tests-extension/pkg/extension/extensiontests" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -38,6 +39,26 @@ func extensionTestSpecsToOriginTestCases(specs extensions.ExtensionTestSpecs) ([ return tests, nil } +type testSuiteResult struct { + passed []*testCase + failed [][]*testCase + flaked [][]*testCase + skipped []*testCase +} + +func (r testSuiteResult) allFailuresBlocking() bool { + for _, failed := range r.failed { + for _, f := range failed { + if f.extensionTestResult != nil && + f.extensionTestResult.Lifecycle != "" && + f.extensionTestResult.Lifecycle != extensiontests.LifecycleBlocking { + return false + } + } + } + return true +} + type testCase struct { // name is the fully labeled test name as reported by openshift-tests // this is being used for placing tests in buckets, as well as filtering @@ -65,7 +86,6 @@ type testCase struct { duration time.Duration testOutputBytes []byte - flake bool failed bool skipped bool success bool @@ -165,10 +185,13 @@ func (s *TestSuite) AddRequiredMatchFunc(matchFn TestMatchFunc) { } } -func testNames(tests []*testCase) []string { +func testNames(tests [][]*testCase) []string { var names []string for _, t := range tests { - names = append(names, t.name) + if len(t) > 0 { + names = append(names, t[0].name) + continue + } } return names }