diff --git a/.github/scripts/run_vllm_tests.sh b/.github/scripts/run_vllm_tests.sh new file mode 100755 index 0000000..4adc512 --- /dev/null +++ b/.github/scripts/run_vllm_tests.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +set -eux + +# A very simple setup for now without any sharding nor caching just to run some +# critical tests on H100 that we couldn't run on vLLM CI + +echo 'Update me. This is an example' + +pushd /vllm-workspace/tests +pytest -v models/multimodal/generation/test_maverick.py +popd diff --git a/.github/workflows/vllm-ci-test.yml b/.github/workflows/vllm-ci-test.yml new file mode 100644 index 0000000..eed96df --- /dev/null +++ b/.github/workflows/vllm-ci-test.yml @@ -0,0 +1,132 @@ +name: Run vLLM tests + +on: + schedule: + # Run every 4 hours + - cron: '0 */4 * * *' + workflow_dispatch: + inputs: + vllm_branch: + description: vLLM branch (main, releases/vERSION for release validation, or refs/pull/PR_NUMBER/head for pre-merge check on pull request) + required: true + type: string + default: main + vllm_commit: + description: vLLM commit (optional, default to the latest commit in the branch that has not yet been benchmarked) + required: false + type: string + pull_request: + paths: + - .github/workflows/vllm-ci-test.yml + - .github/scripts/run_vllm_tests.sh + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} + cancel-in-progress: true + +jobs: + test: + name: Run vLLM tests + strategy: + fail-fast: false + matrix: + include: + # TODO (huydhn): Figure out later if we need to scale this up to multiple runners + - runs-on: linux.aws.h100.4 + device-name: cuda + permissions: + id-token: write + contents: read + runs-on: ${{ matrix.runs-on }} + environment: pytorch-x-vllm + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Checkout vLLM repository + uses: actions/checkout@v4 + with: + repository: vllm-project/vllm + path: vllm + ref: ${{ inputs.vllm_branch || 'main' }} + fetch-depth: 0 + + - name: Set Docker registry + shell: bash + env: + HEAD_BRANCH: ${{ inputs.vllm_branch || 'main' }} + DEVICE_NAME: ${{ matrix.device-name }} + run: | + set -eux + + # Mimic the logic from vllm ci-infra test template + if [[ "${HEAD_BRANCH}" == "main" ]]; then + DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo + else + DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-test-repo + fi + + DOCKER_IMAGE_SUFFIX="" + if [[ "${DEVICE_NAME}" == "rocm" ]]; then + DOCKER_IMAGE_PREFIX=docker.io/rocm/vllm-ci + elif [[ "${DEVICE_NAME}" == "cpu" ]]; then + DOCKER_IMAGE_SUFFIX=-cpu + fi + echo "DOCKER_IMAGE_PREFIX=$DOCKER_IMAGE_PREFIX" >> $GITHUB_ENV + echo "DOCKER_IMAGE_SUFFIX=$DOCKER_IMAGE_SUFFIX" >> $GITHUB_ENV + + - name: Check for available Docker image + working-directory: vllm + env: + HEAD_BRANCH: ${{ inputs.vllm_branch || 'main' }} + HEAD_SHA: ${{ inputs.vllm_commit || '' }} + run: | + set -eux + + if [[ -z "${HEAD_SHA}" ]]; then + # Looking back the latest 100 commits is enough + for i in {0..99} + do + # Check if the image is there, if it doesn't then check an older one + # because the commit is too recent + HEAD_SHA=$(git rev-parse --verify HEAD~${i}) + DOCKER_IMAGE="${DOCKER_IMAGE_PREFIX}:${HEAD_SHA}${DOCKER_IMAGE_SUFFIX}" + + # No Docker image available yet because the commit is too recent + if docker manifest inspect "${DOCKER_IMAGE}"; then + break + fi + done + fi + + echo "HEAD_SHA=$HEAD_SHA" >> $GITHUB_ENV + + - name: Setup CUDA GPU_FLAG for docker run + if: matrix.device-name == 'cuda' + run: | + echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}" + + - name: Setup ROCm + if: matrix.device-name == 'rocm' + uses: pytorch/pytorch/./.github/actions/setup-rocm@main + + - name: Run vLLM tests + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + DOCKER_IMAGE: ${{ env.DOCKER_IMAGE_PREFIX }}:${{ env.HEAD_SHA }}${{ env.DOCKER_IMAGE_SUFFIX }} + run: | + set -eux + + container_name=$(docker run \ + ${GPU_FLAG:-} \ + -e HF_TOKEN \ + --ipc=host \ + --tty \ + --detach \ + --security-opt seccomp=unconfined \ + --shm-size=4g \ + -v "${GITHUB_WORKSPACE}:/tmp/workspace" \ + -w /tmp/workspace \ + "${DOCKER_IMAGE}" + ) + docker exec -t "${container_name}" bash -c "bash .github/scripts/run_vllm_tests.sh"