Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .github/mergify.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,28 @@ pull_request_rules:
- -files~=^\.github/(actions|workflows)/.*\.ya?ml$
- -files~=^\.github/workflows/actionlint\.

# medium e2e workflow
- or:
- and:
# note this should match the triggering criteria in 'e2e-nvidia-l4-x1.yml'
- check-success~=e2e-medium-workflow-complete
- or:
- files~=\.py$
- files=pyproject.toml
- files=tox.ini
- files=requirements.txt
- files=requirements-dev.txt
- files=constraints-dev.txt
- files~=^\.github/workflows/.*\.yml$ # This workflow
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need to trigger medium for a change in ANY workflow file, do we?

- and:
- files~=\.py$
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't these entries have - prefixes, since this section is to detect the situation when no listed template matches? (in which case we do NOT require check-success~=e2e-medium-workflow-complete)

- files=pyproject.toml
- files=tox.ini
- files=requirements.txt
- files=requirements-dev.txt
- files=constraints-dev.txt
- files~=^\.github/workflows/.*\.yml$ # This workflow

# code lint workflow
- or:
- and:
Expand Down
39 changes: 29 additions & 10 deletions .github/workflows/e2e-nvidia-l40s-x4-sdk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ name: E2E (NVIDIA L40S x4) SDK Test

on:
# only run on PRs that touch certain regex paths
pull_request:
# only run on PRs that touch certain regex paths
pull_request_target:
branches:
- main
- release-*
paths:
# note this should match the merging criteria in 'mergify.yml'
- "**.py"
Expand All @@ -15,20 +17,23 @@ on:
- "requirements.txt"
- "requirements-dev.txt"
- "constraints-dev.txt"
- ".github/workflows/e2e-nvidia-l40s-x4-sdk.yaml" # This workflow
workflow_dispatch:
inputs:
pr_or_branch:
description: 'pull request number or branch name'
required: true
default: 'main'
- ".github/workflows/*.yml" # This workflow
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this change seems not helping - it triggers the job for unrelated workflow file changes.

workflow_dispatch: {}
concurrency:
group: ${{ github.workflow }}-${{ github.event.number || github.ref }}
cancel-in-progress: true

env:
TMPDIR: /home/tmp

defaults:
run:
shell: bash


permissions:
contents: read

jobs:
start-large-ec2-runner:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -97,8 +102,11 @@ jobs:
- start-large-ec2-runner
runs-on: ${{ needs.start-large-ec2-runner.outputs.label }}

permissions:
pull-requests: write

# It is important that this job has no write permissions and has
# no access to any secrets. This part (e2e-medium-test) is where we are running
# untrusted code from PRs.
permissions: {}

steps:
- name: Install Packages
Expand Down Expand Up @@ -308,3 +316,14 @@ jobs:
run: |
echo "::warning::Failed to upload Phase 2 loss graph to S3. This won't block the workflow, but you may want to investigate."
echo "Loss graph upload failed" >> "${GITHUB_STEP_SUMMARY}"

e2e-medium-workflow-complete:
# we don't want to block PRs on failed EC2 cleanup
# so not requiring "stop-medium-ec2-runner" as well
permissions:
checks: read
uses: ./.github/workflows/status-checks.yml
with:
job_ids: >- # Space-separated job ids to wait on for status checks
start-large-ec2-runner
e2e-medium-test
60 changes: 60 additions & 0 deletions .github/workflows/status-checks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# SPDX-License-Identifier: Apache-2.0
# yamllint disable rule:line-length

name: Status Checks Reusable Workflow

on:
workflow_call:
inputs:
job_ids:
description: 'Space-separated job ids to wait on for status checks'
required: true
type: string
delay:
description: 'Period in seconds to wait before first poll of GitHub Check Runs'
required: false
type: number
default: 10
interval:
description: 'Interval or period in seconds between polling GitHub Check Runs'
required: false
type: number
default: 10
timeout:
description: 'Timeout in seconds to complete polling GitHub Check Runs'
required: false
type: number
default: 3600

env:
LC_ALL: en_US.UTF-8

defaults:
run:
shell: bash

permissions:
checks: read

jobs:
status-checks:
runs-on: ubuntu-latest
steps:
- name: "Harden Runner"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we are going to remove this step from all workflows; I'd rather drop it from the PR now instead of cleaning up later.

uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
with:
egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs

- name: "Set status check variables"
id: set_variables
run: |
jq -nr '[$ARGS.positional[] | split("\\s"; null) | map(select(. != ""))] | flatten | join("|") | ("match_pattern=(" + . + ")")' --args "${{ inputs.job_ids }}" >> "$GITHUB_OUTPUT"
- name: "Wait for status checks"
uses: poseidon/wait-for-status-checks@899c768d191b56eef585c18f8558da19e1f3e707 # v0.6.0
with:
token: ${{ secrets.GITHUB_TOKEN }}
match_pattern: ${{ steps.set_variables.outputs.match_pattern }}
delay: ${{ inputs.delay }}
interval: ${{ inputs.interval }}
timeout: ${{ inputs.timeout }}