|
| 1 | +name: Run vLLM tests |
| 2 | + |
| 3 | +on: |
| 4 | + schedule: |
| 5 | + # Run every 4 hours |
| 6 | + - cron: '0 */4 * * *' |
| 7 | + workflow_dispatch: |
| 8 | + inputs: |
| 9 | + vllm_branch: |
| 10 | + description: vLLM branch (main, releases/vERSION for release validation, or refs/pull/PR_NUMBER/head for pre-merge check on pull request) |
| 11 | + required: true |
| 12 | + type: string |
| 13 | + default: main |
| 14 | + vllm_commit: |
| 15 | + description: vLLM commit (optional, default to the latest commit in the branch that has not yet been benchmarked) |
| 16 | + required: false |
| 17 | + type: string |
| 18 | + pull_request: |
| 19 | + paths: |
| 20 | + - .github/workflows/vllm-ci-test.yml |
| 21 | + - .github/scripts/run_vllm_tests.sh |
| 22 | + |
| 23 | +concurrency: |
| 24 | + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} |
| 25 | + cancel-in-progress: true |
| 26 | + |
| 27 | +jobs: |
| 28 | + test: |
| 29 | + name: Run vLLM tests |
| 30 | + strategy: |
| 31 | + fail-fast: false |
| 32 | + matrix: |
| 33 | + include: |
| 34 | + # TODO (huydhn): Figure out later if we need to scale this up to multiple runners |
| 35 | + - runs-on: linux.aws.h100.4 |
| 36 | + device-name: cuda |
| 37 | + permissions: |
| 38 | + id-token: write |
| 39 | + contents: read |
| 40 | + runs-on: ${{ matrix.runs-on }} |
| 41 | + environment: pytorch-x-vllm |
| 42 | + steps: |
| 43 | + - name: Checkout repository |
| 44 | + uses: actions/checkout@v4 |
| 45 | + |
| 46 | + - name: Checkout vLLM repository |
| 47 | + uses: actions/checkout@v4 |
| 48 | + with: |
| 49 | + repository: vllm-project/vllm |
| 50 | + path: vllm |
| 51 | + ref: ${{ inputs.vllm_branch || 'main' }} |
| 52 | + fetch-depth: 0 |
| 53 | + |
| 54 | + - name: Set Docker registry |
| 55 | + shell: bash |
| 56 | + env: |
| 57 | + HEAD_BRANCH: ${{ inputs.vllm_branch || 'main' }} |
| 58 | + DEVICE_NAME: ${{ matrix.device-name }} |
| 59 | + run: | |
| 60 | + set -eux |
| 61 | +
|
| 62 | + # Mimic the logic from vllm ci-infra test template |
| 63 | + if [[ "${HEAD_BRANCH}" == "main" ]]; then |
| 64 | + DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo |
| 65 | + else |
| 66 | + DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-test-repo |
| 67 | + fi |
| 68 | +
|
| 69 | + DOCKER_IMAGE_SUFFIX="" |
| 70 | + if [[ "${DEVICE_NAME}" == "rocm" ]]; then |
| 71 | + DOCKER_IMAGE_PREFIX=docker.io/rocm/vllm-ci |
| 72 | + elif [[ "${DEVICE_NAME}" == "cpu" ]]; then |
| 73 | + DOCKER_IMAGE_SUFFIX=-cpu |
| 74 | + fi |
| 75 | + echo "DOCKER_IMAGE_PREFIX=$DOCKER_IMAGE_PREFIX" >> $GITHUB_ENV |
| 76 | + echo "DOCKER_IMAGE_SUFFIX=$DOCKER_IMAGE_SUFFIX" >> $GITHUB_ENV |
| 77 | +
|
| 78 | + - name: Check for available Docker image |
| 79 | + working-directory: vllm |
| 80 | + env: |
| 81 | + HEAD_BRANCH: ${{ inputs.vllm_branch || 'main' }} |
| 82 | + HEAD_SHA: ${{ inputs.vllm_commit || '' }} |
| 83 | + run: | |
| 84 | + set -eux |
| 85 | +
|
| 86 | + if [[ -z "${HEAD_SHA}" ]]; then |
| 87 | + # Looking back the latest 100 commits is enough |
| 88 | + for i in {0..99} |
| 89 | + do |
| 90 | + # Check if the image is there, if it doesn't then check an older one |
| 91 | + # because the commit is too recent |
| 92 | + HEAD_SHA=$(git rev-parse --verify HEAD~${i}) |
| 93 | + DOCKER_IMAGE="${DOCKER_IMAGE_PREFIX}:${HEAD_SHA}${DOCKER_IMAGE_SUFFIX}" |
| 94 | +
|
| 95 | + # No Docker image available yet because the commit is too recent |
| 96 | + if docker manifest inspect "${DOCKER_IMAGE}"; then |
| 97 | + break |
| 98 | + fi |
| 99 | + done |
| 100 | + fi |
| 101 | +
|
| 102 | + echo "HEAD_SHA=$HEAD_SHA" >> $GITHUB_ENV |
| 103 | +
|
| 104 | + - name: Setup CUDA GPU_FLAG for docker run |
| 105 | + if: matrix.device-name == 'cuda' |
| 106 | + run: | |
| 107 | + echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}" |
| 108 | +
|
| 109 | + - name: Setup ROCm |
| 110 | + if: matrix.device-name == 'rocm' |
| 111 | + uses: pytorch/pytorch/./.github/actions/setup-rocm@main |
| 112 | + |
| 113 | + - name: Run vLLM tests |
| 114 | + env: |
| 115 | + HF_TOKEN: ${{ secrets.HF_TOKEN }} |
| 116 | + DOCKER_IMAGE: ${{ env.DOCKER_IMAGE_PREFIX }}:${{ env.HEAD_SHA }}${{ env.DOCKER_IMAGE_SUFFIX }} |
| 117 | + run: | |
| 118 | + set -eux |
| 119 | +
|
| 120 | + container_name=$(docker run \ |
| 121 | + ${GPU_FLAG:-} \ |
| 122 | + -e HF_TOKEN \ |
| 123 | + --ipc=host \ |
| 124 | + --tty \ |
| 125 | + --detach \ |
| 126 | + --security-opt seccomp=unconfined \ |
| 127 | + --shm-size=4g \ |
| 128 | + -v "${GITHUB_WORKSPACE}:/tmp/workspace" \ |
| 129 | + -w /tmp/workspace \ |
| 130 | + "${DOCKER_IMAGE}" |
| 131 | + ) |
| 132 | + docker exec -t "${container_name}" bash -c "bash .github/scripts/run_vllm_tests.sh" |
0 commit comments