diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 6b87a07643..7ff3d4aa55 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -23,6 +23,8 @@ jobs: e2e-kind: runs-on: ubuntu-latest + outputs: + alerts-found: ${{ steps.alert_output.outputs.alerts-found }} steps: - uses: actions/checkout@v5 with: @@ -34,7 +36,10 @@ jobs: - name: Run e2e tests run: ARTIFACT_PATH=/tmp/artifacts E2E_SUMMARY_OUTPUT=$GITHUB_STEP_SUMMARY make test-e2e - + - name: Mark for alerts + id: alert_output + if: contains(github.step_summary, 'Firing Alerts') + run: echo "alerts-found=true" >> "$GITHUB_OUTPUT" - uses: actions/upload-artifact@v4 if: failure() with: @@ -50,6 +55,8 @@ jobs: experimental-e2e: runs-on: ubuntu-latest + outputs: + alerts-found: ${{ steps.alert_output.outputs.alerts-found }} steps: - uses: actions/checkout@v5 with: @@ -61,7 +68,10 @@ jobs: - name: Run e2e tests run: ARTIFACT_PATH=/tmp/artifacts E2E_SUMMARY_OUTPUT=$GITHUB_STEP_SUMMARY make test-experimental-e2e - + - name: Mark for alerts + id: alert_output + if: contains(github.step_summary, 'Firing Alerts') + run: echo "alerts-found=true" >> "$GITHUB_OUTPUT" - uses: actions/upload-artifact@v4 if: failure() with: @@ -111,3 +121,41 @@ jobs: name: upgrade-experimental-e2e-artifacts path: /tmp/artifacts/ + report-performance: + runs-on: ubuntu-latest + needs: [ experimental-e2e, e2e-kind ] + steps: + - name: Find Previous Alert Comment + id: find_comment + uses: peter-evans/find-comment@v3 + with: + issue-number: ${{ github.event.pull_request.number }} + comment-author: 'github-actions[bot]' + body-includes: '**Alerts Found**' + + - name: Delete comment + uses: detomarco/delete-comments@1.1.0 + if: | + needs.e2e-kind.outputs.alerts-found == false && + needs.experimental-e2e.outputs.alerts-found == false + with: + comment-id: ${{ steps.find_comment.outputs.comment-id }} + + - name: Post Failure Comment + uses: peter-evans/create-or-update-comment@v4 + if: | + needs.e2e-kind.outputs.alerts-found == true || + needs.experimental-e2e.outputs.alerts-found == true + with: + issue-number: ${{ github.event.pull_request.number }} + comment-id: ${{ steps.find_comment.outputs.comment-id }} + edit-mode: replace + body: | + **Alerts Found** + + /hold + + A hold has been placed on this PR due to performance alerts during the CI's e2e run. + View the summary [here](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for detaills. + + To acknowledge this warning and continue anyway, leave an `/unhold` comment. diff --git a/helm/prometheus/templates/prometheusrile-controller-alerts.yml b/helm/prometheus/templates/prometheusrile-controller-alerts.yml index bce2706eea..9374d72b3f 100644 --- a/helm/prometheus/templates/prometheusrile-controller-alerts.yml +++ b/helm/prometheus/templates/prometheusrile-controller-alerts.yml @@ -37,8 +37,8 @@ spec: - alert: operator-controller-memory-usage annotations: description: 'operator-controller pod using high memory resources for the last 5 minutes: {{`{{ $value | humanize }}`}}B' - expr: sum(container_memory_working_set_bytes{pod=~"operator-controller.*",container="manager"}) > 100_000_000 - for: 5m + expr: sum(container_memory_working_set_bytes{pod=~"operator-controller.*",container="manager"}) > 100_000 + for: 1s keep_firing_for: 1d - alert: catalogd-memory-usage annotations: diff --git a/internal/shared/util/testutils/summary.go b/internal/shared/util/testutils/summary.go index 37c1d51e01..625b0e16c0 100644 --- a/internal/shared/util/testutils/summary.go +++ b/internal/shared/util/testutils/summary.go @@ -42,16 +42,14 @@ type xychart struct { } type githubSummary struct { - client api.Client - Pods []string - alertsFiring bool + client api.Client + Pods []string } func NewSummary(c api.Client, pods ...string) githubSummary { return githubSummary{ - client: c, - Pods: pods, - alertsFiring: false, + client: c, + Pods: pods, } } @@ -139,7 +137,6 @@ func (s *githubSummary) Alerts() (string, error) { switch a.State { case v1.AlertStateFiring: firingAlerts = append(firingAlerts, aConv) - s.alertsFiring = true case v1.AlertStatePending: pendingAlerts = append(pendingAlerts, aConv) // Ignore AlertStateInactive; the alerts endpoint doesn't return them @@ -176,10 +173,10 @@ func executeTemplate(templateFile string, obj any) (string, error) { // The markdown is template-driven; the summary methods are called from within the // template. This allows us to add or change queries (hopefully) without needing to // touch code. The summary will be output to a file supplied by the env target. -func PrintSummary(path string) error { +func PrintSummary(path string) { if path == "" { fmt.Printf("No summary output path specified; skipping") - return nil + return } client, err := api.NewClient(api.Config{ @@ -187,23 +184,19 @@ func PrintSummary(path string) error { }) if err != nil { fmt.Printf("warning: failed to initialize promQL client: %v", err) - return nil + return } summary := NewSummary(client, "operator-controller", "catalogd") summaryMarkdown, err := executeTemplate(summaryTemplate, &summary) if err != nil { fmt.Printf("warning: failed to generate e2e test summary: %v", err) - return nil + return } err = os.WriteFile(path, []byte(summaryMarkdown), 0o600) if err != nil { fmt.Printf("warning: failed to write e2e test summary output to %s: %v", path, err) - return nil + return } fmt.Printf("Test summary output to %s successful\n", path) - if summary.alertsFiring { - return fmt.Errorf("performance alerts encountered during test run; please check e2e test summary for details") - } - return nil } diff --git a/internal/shared/util/testutils/templates/alert.md.tmpl b/internal/shared/util/testutils/templates/alert.md.tmpl index 39f3e42879..2ce5ec04f7 100644 --- a/internal/shared/util/testutils/templates/alert.md.tmpl +++ b/internal/shared/util/testutils/templates/alert.md.tmpl @@ -6,11 +6,13 @@ | State | {{ .State }} | {{- end}} +{{ if .FiringAlerts }} ### Firing Alerts {{ range .FiringAlerts }} {{ template "alert" .}} -{{ end }} +{{ end }}{{ end }} +{{ if .PendingAlerts }} ### Pending Alerts {{ range .PendingAlerts }} {{ template "alert" .}} -{{ end }} +{{ end }}{{ end }} \ No newline at end of file diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index aa033a2f1e..f630b2d213 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -40,12 +40,7 @@ func TestMain(m *testing.M) { if path == "" { fmt.Printf("Note: E2E_SUMMARY_OUTPUT is unset; skipping summary generation") } else { - err = utils.PrintSummary(path) - if err != nil { - // Fail the run if alerts are found - fmt.Printf("%v", err) - os.Exit(1) - } + utils.PrintSummary(path) } os.Exit(res) }