instructlab · cdoern · Jun 10, 2025 · booxter · Jun 20, 2025 · booxter
diff --git a/.github/mergify.yml b/.github/mergify.yml
@@ -24,6 +24,28 @@ pull_request_rules:
         - -files~=^\.github/(actions|workflows)/.*\.ya?ml$
         - -files~=^\.github/workflows/actionlint\.
 
+    # medium e2e workflow
+    - or:
+      - and:
+        # note this should match the triggering criteria in 'e2e-nvidia-l4-x1.yml'
+        - check-success~=e2e-medium-workflow-complete
+        - or:
+          - files~=\.py$
+          - files=pyproject.toml
+          - files=tox.ini
+          - files=requirements.txt
+          - files=requirements-dev.txt
+          - files=constraints-dev.txt
+          - files~=^\.github/workflows/.*\.yml$ # This workflow
+      - and:
+          - files~=\.py$
+          - files=pyproject.toml
+          - files=tox.ini
+          - files=requirements.txt
+          - files=requirements-dev.txt
+          - files=constraints-dev.txt
+          - files~=^\.github/workflows/.*\.yml$ # This workflow
+
     # code lint workflow
     - or:
       - and:

diff --git a/.github/workflows/e2e-nvidia-l40s-x4-sdk.yml b/.github/workflows/e2e-nvidia-l40s-x4-sdk.yml
@@ -4,9 +4,11 @@ name: E2E (NVIDIA L40S x4) SDK Test
 
 on:
   # only run on PRs that touch certain regex paths
-  pull_request:
+  # only run on PRs that touch certain regex paths
+  pull_request_target:
     branches:
       - main
+      - release-*
     paths:
       # note this should match the merging criteria in 'mergify.yml'
       - "**.py"
@@ -15,20 +17,23 @@ on:
       - "requirements.txt"
       - "requirements-dev.txt"
       - "constraints-dev.txt"
-      - ".github/workflows/e2e-nvidia-l40s-x4-sdk.yaml" # This workflow
-  workflow_dispatch:
-    inputs:
-      pr_or_branch:
-        description: 'pull request number or branch name'
-        required: true
-        default: 'main'
+      - ".github/workflows/*.yml" # This workflow
+  workflow_dispatch: {}
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.number || github.ref }}
   cancel-in-progress: true
 
 env:
   TMPDIR: /home/tmp
 
+defaults:
+  run:
+    shell: bash
+
+
+permissions:
+  contents: read
+
 jobs:
   start-large-ec2-runner:
     runs-on: ubuntu-latest
@@ -97,8 +102,11 @@ jobs:
       - start-large-ec2-runner
     runs-on: ${{ needs.start-large-ec2-runner.outputs.label }}
 
-    permissions:
-      pull-requests: write
+
+    # It is important that this job has no write permissions and has
+    # no access to any secrets. This part (e2e-medium-test) is where we are running
+    # untrusted code from PRs.
+    permissions: {}
 
     steps:
       - name: Install Packages
@@ -308,3 +316,14 @@ jobs:
         run: |
           echo "::warning::Failed to upload Phase 2 loss graph to S3. This won't block the workflow, but you may want to investigate."
           echo "Loss graph upload failed" >> "${GITHUB_STEP_SUMMARY}"
+
+  e2e-medium-workflow-complete:
+    # we don't want to block PRs on failed EC2 cleanup
+    # so not requiring "stop-medium-ec2-runner" as well
+    permissions:
+      checks: read
+    uses: ./.github/workflows/status-checks.yml
+    with:
+      job_ids: >- # Space-separated job ids to wait on for status checks
+        start-large-ec2-runner
+        e2e-medium-test
diff --git a/.github/workflows/status-checks.yml b/.github/workflows/status-checks.yml
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: Apache-2.0
+# yamllint disable rule:line-length
+
+name: Status Checks Reusable Workflow
+
+on:
+  workflow_call:
+    inputs:
+      job_ids:
+        description: 'Space-separated job ids to wait on for status checks'
+        required: true
+        type: string
+      delay:
+        description: 'Period in seconds to wait before first poll of GitHub Check Runs'
+        required: false
+        type: number
+        default: 10
+      interval:
+        description: 'Interval or period in seconds between polling GitHub Check Runs'
+        required: false
+        type: number
+        default: 10
+      timeout:
+        description: 'Timeout in seconds to complete polling GitHub Check Runs'
+        required: false
+        type: number
+        default: 3600
+
+env:
+  LC_ALL: en_US.UTF-8
+
+defaults:
+  run:
+    shell: bash
+
+permissions:
+  checks: read
+
+jobs:
+  status-checks:
+    runs-on: ubuntu-latest
+    steps:
+      - name: "Harden Runner"
+        uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
+        with:
+          egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs
+
+      - name: "Set status check variables"
+        id: set_variables
+        run: |
+          jq -nr '[$ARGS.positional[] | split("\\s"; null) | map(select(. != ""))] | flatten | join("|") | ("match_pattern=(" + . + ")")' --args "${{ inputs.job_ids }}" >> "$GITHUB_OUTPUT"
+
+      - name: "Wait for status checks"
+        uses: poseidon/wait-for-status-checks@899c768d191b56eef585c18f8558da19e1f3e707 # v0.6.0
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          match_pattern: ${{ steps.set_variables.outputs.match_pattern }}
+          delay: ${{ inputs.delay }}
+          interval: ${{ inputs.interval }}
+          timeout: ${{ inputs.timeout }}