Add a workflow to run vLLM unit tests on H100 (#55)

huydhn · web-flow · commit c094618d93c2 · 2025-08-05T12:10:08.000-07:00
* Add a workflow to run vLLM unit tests on H100

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* Use the correct test path

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* Find the right tests

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* Run on linux.aws.h100.4

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* [no ci] Just a comment update

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

* Update the script path

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;

---------

Signed-off-by: Huy Do &lt;huydhn@gmail.com&gt;
diff --git a/.github/scripts/run_vllm_tests.sh b/.github/scripts/run_vllm_tests.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+set -eux
+
+# A very simple setup for now without any sharding nor caching just to run some
+# critical tests on H100 that we couldn't run on vLLM CI
+
+echo 'Update me. This is an example'
+
+pushd /vllm-workspace/tests
+pytest -v models/multimodal/generation/test_maverick.py
+popd
diff --git a/.github/workflows/vllm-ci-test.yml b/.github/workflows/vllm-ci-test.yml
@@ -0,0 +1,132 @@
+name: Run vLLM tests
+
+on:
+  schedule:
+    # Run every 4 hours
+    - cron: '0 */4 * * *'
+  workflow_dispatch:
+    inputs:
+      vllm_branch:
+        description: vLLM branch (main, releases/vERSION for release validation, or refs/pull/PR_NUMBER/head for pre-merge check on pull request)
+        required: true
+        type: string
+        default: main
+      vllm_commit:
+        description: vLLM commit (optional, default to the latest commit in the branch that has not yet been benchmarked)
+        required: false
+        type: string
+  pull_request:
+    paths:
+      - .github/workflows/vllm-ci-test.yml
+      - .github/scripts/run_vllm_tests.sh
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
+  cancel-in-progress: true
+
+jobs:
+  test:
+    name: Run vLLM tests
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          # TODO (huydhn): Figure out later if we need to scale this up to multiple runners
+          - runs-on: linux.aws.h100.4
+            device-name: cuda
+    permissions:
+      id-token: write
+      contents: read
+    runs-on: ${{ matrix.runs-on }}
+    environment: pytorch-x-vllm
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Checkout vLLM repository
+        uses: actions/checkout@v4
+        with:
+          repository: vllm-project/vllm
+          path: vllm
+          ref: ${{ inputs.vllm_branch || 'main' }}
+          fetch-depth: 0
+
+      - name: Set Docker registry
+        shell: bash
+        env:
+          HEAD_BRANCH: ${{ inputs.vllm_branch || 'main' }}
+          DEVICE_NAME: ${{ matrix.device-name }}
+        run: |
+          set -eux
+
+          # Mimic the logic from vllm ci-infra test template
+          if [[ "${HEAD_BRANCH}" == "main" ]]; then
+            DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo
+          else
+            DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-test-repo
+          fi
+
+          DOCKER_IMAGE_SUFFIX=""
+          if [[ "${DEVICE_NAME}" == "rocm" ]]; then
+            DOCKER_IMAGE_PREFIX=docker.io/rocm/vllm-ci
+          elif [[ "${DEVICE_NAME}" == "cpu" ]]; then
+            DOCKER_IMAGE_SUFFIX=-cpu
+          fi
+          echo "DOCKER_IMAGE_PREFIX=$DOCKER_IMAGE_PREFIX" >> $GITHUB_ENV
+          echo "DOCKER_IMAGE_SUFFIX=$DOCKER_IMAGE_SUFFIX" >> $GITHUB_ENV
+
+      - name: Check for available Docker image
+        working-directory: vllm
+        env:
+          HEAD_BRANCH: ${{ inputs.vllm_branch || 'main' }}
+          HEAD_SHA: ${{ inputs.vllm_commit || '' }}
+        run: |
+          set -eux
+
+          if [[ -z "${HEAD_SHA}" ]]; then
+            # Looking back the latest 100 commits is enough
+            for i in {0..99}
+            do
+              # Check if the image is there, if it doesn't then check an older one
+              # because the commit is too recent
+              HEAD_SHA=$(git rev-parse --verify HEAD~${i})
+              DOCKER_IMAGE="${DOCKER_IMAGE_PREFIX}:${HEAD_SHA}${DOCKER_IMAGE_SUFFIX}"
+
+              # No Docker image available yet because the commit is too recent
+              if docker manifest inspect "${DOCKER_IMAGE}"; then
+                break
+              fi
+            done
+          fi
+
+          echo "HEAD_SHA=$HEAD_SHA" >> $GITHUB_ENV
+
+      - name: Setup CUDA GPU_FLAG for docker run
+        if: matrix.device-name == 'cuda'
+        run: |
+          echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
+
+      - name: Setup ROCm
+        if: matrix.device-name == 'rocm'
+        uses: pytorch/pytorch/./.github/actions/setup-rocm@main
+
+      - name: Run vLLM tests
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          DOCKER_IMAGE: ${{ env.DOCKER_IMAGE_PREFIX }}:${{ env.HEAD_SHA }}${{ env.DOCKER_IMAGE_SUFFIX }}
+        run: |
+          set -eux
+
+          container_name=$(docker run \
+            ${GPU_FLAG:-} \
+            -e HF_TOKEN \
+            --ipc=host \
+            --tty \
+            --detach \
+            --security-opt seccomp=unconfined \
+            --shm-size=4g \
+            -v "${GITHUB_WORKSPACE}:/tmp/workspace" \
+            -w /tmp/workspace \
+            "${DOCKER_IMAGE}"
+          )
+          docker exec -t "${container_name}" bash -c "bash .github/scripts/run_vllm_tests.sh"