From ecfa1b6fe8e87fcd5166ee8664a056336aca2595 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Tue, 29 Jul 2025 23:08:00 -0700 Subject: [PATCH 01/11] Test linux.dgx.b200 Signed-off-by: Huy Do --- .github/scripts/generate_vllm_benchmark_matrix.py | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/scripts/generate_vllm_benchmark_matrix.py b/.github/scripts/generate_vllm_benchmark_matrix.py index 90081c2..7a7ac36 100755 --- a/.github/scripts/generate_vllm_benchmark_matrix.py +++ b/.github/scripts/generate_vllm_benchmark_matrix.py @@ -18,6 +18,7 @@ "linux.aws.h100", "linux.rocm.gpu.mi300.2", # No single ROCm GPU? "linux.24xl.spr-metal", + "linux.dgx.b200", ], # NB: There is no 2xH100 runner at the momement, so let's use the next one # in the list here which is 4xH100 From 24ce5f214072e6b88986eca273aca35737812fe4 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Tue, 29 Jul 2025 23:10:42 -0700 Subject: [PATCH 02/11] Debug Signed-off-by: Huy Do --- .github/scripts/generate_vllm_benchmark_matrix.py | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/scripts/generate_vllm_benchmark_matrix.py b/.github/scripts/generate_vllm_benchmark_matrix.py index 7a7ac36..86bdf98 100755 --- a/.github/scripts/generate_vllm_benchmark_matrix.py +++ b/.github/scripts/generate_vllm_benchmark_matrix.py @@ -42,6 +42,7 @@ RUNNER_TO_PLATFORM_MAPPING = { "linux.aws.a100": "cuda", "linux.aws.h100": "cuda", + "linux.dgx.b200": "cuda", "linux.aws.h100.4": "cuda", "linux.aws.h100.8": "cuda", "linux.rocm.gpu.mi300.2": "rocm", From 110dbc06b8af8147b515f46587e1aecebc4c334e Mon Sep 17 00:00:00 2001 From: Huy Do Date: Tue, 29 Jul 2025 23:31:22 -0700 Subject: [PATCH 03/11] Auth with AWS on B200 DGX runners Signed-off-by: Huy Do --- .github/workflows/vllm-benchmark.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/vllm-benchmark.yml b/.github/workflows/vllm-benchmark.yml index 5a818ea..530fc08 100644 --- a/.github/workflows/vllm-benchmark.yml +++ b/.github/workflows/vllm-benchmark.yml @@ -276,7 +276,7 @@ jobs: - name: Authenticate with AWS # AWS CUDA runners already have access to the bucket via its runner IAM role - if: env.DEVICE_NAME != 'cuda' + if: env.DEVICE_NAME == 'rocm' || contains(env.DEVICE_TYPE, 'B200') uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0 with: role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results From 849b592b6cc7de3e58d6cf6a3e2008a755fcbc7d Mon Sep 17 00:00:00 2001 From: Huy Do Date: Fri, 1 Aug 2025 17:36:53 -0700 Subject: [PATCH 04/11] Add linux.dgx.b200.8 Signed-off-by: Huy Do --- .github/scripts/generate_vllm_benchmark_matrix.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/scripts/generate_vllm_benchmark_matrix.py b/.github/scripts/generate_vllm_benchmark_matrix.py index 9bac565..930306e 100755 --- a/.github/scripts/generate_vllm_benchmark_matrix.py +++ b/.github/scripts/generate_vllm_benchmark_matrix.py @@ -29,12 +29,14 @@ 4: [ "linux.aws.h100.4", "linux.rocm.gpu.gfx942.4", + "linux.dgx.b200.8", # TODO (huydh): See if it makes sense to have 4xB200 # TODO (huydhn): Enable this when Intel's runners are ready # "intel-cpu-emr", ], 8: [ "linux.aws.h100.8", "linux.rocm.gpu.gfx942.8", + "linux.dgx.b200.8", ], } From ed30375298881dc2e8f242e98ff94f85fe0ba0d2 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Fri, 1 Aug 2025 18:35:36 -0700 Subject: [PATCH 05/11] Another tweak Signed-off-by: Huy Do --- .github/scripts/generate_vllm_benchmark_matrix.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/scripts/generate_vllm_benchmark_matrix.py b/.github/scripts/generate_vllm_benchmark_matrix.py index 930306e..3001c9b 100755 --- a/.github/scripts/generate_vllm_benchmark_matrix.py +++ b/.github/scripts/generate_vllm_benchmark_matrix.py @@ -44,9 +44,10 @@ RUNNER_TO_PLATFORM_MAPPING = { "linux.aws.a100": "cuda", "linux.aws.h100": "cuda", - "linux.dgx.b200": "cuda", "linux.aws.h100.4": "cuda", "linux.aws.h100.8": "cuda", + "linux.dgx.b200": "cuda", + "linux.dgx.b200.8": "cuda", "linux.rocm.gpu.gfx942.2": "rocm", "linux.rocm.gpu.gfx942.4": "rocm", "linux.rocm.gpu.gfx942.8": "rocm", From e6b8e868a88ad987ff117ced516e862aed2f0091 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Sun, 3 Aug 2025 22:31:07 -0700 Subject: [PATCH 06/11] [no ci] 2.7.1 Signed-off-by: Huy Do --- .github/scripts/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/requirements.txt b/.github/scripts/requirements.txt index 4419c9f..2a191d6 100644 --- a/.github/scripts/requirements.txt +++ b/.github/scripts/requirements.txt @@ -4,4 +4,4 @@ psutil==7.0.0 pynvml==12.0.0 boto3==1.36.21 awscli==1.37.21 -torch==2.7.0 +torch==2.7.1 From 645d3e487f09f2c71ec395451d9e6650c9d81180 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Sun, 3 Aug 2025 22:34:08 -0700 Subject: [PATCH 07/11] [no ci] Use cu128 Signed-off-by: Huy Do --- .github/workflows/vllm-benchmark.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/vllm-benchmark.yml b/.github/workflows/vllm-benchmark.yml index 530fc08..702b00f 100644 --- a/.github/workflows/vllm-benchmark.yml +++ b/.github/workflows/vllm-benchmark.yml @@ -134,7 +134,8 @@ jobs: pip install -r .github/scripts/requirements.txt \ --extra-index-url https://download.pytorch.org/whl/rocm6.3 else - pip install -r .github/scripts/requirements.txt + pip install -r .github/scripts/requirements.txt \ + --extra-index-url https://download.pytorch.org/whl/cu128 fi - name: Set Docker registry From a4fd47689a571fbb0f8ff4218b8a59588ce4e74f Mon Sep 17 00:00:00 2001 From: Huy Do Date: Tue, 5 Aug 2025 12:14:42 -0700 Subject: [PATCH 08/11] A small tweak --- .github/scripts/generate_vllm_benchmark_matrix.py | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/scripts/generate_vllm_benchmark_matrix.py b/.github/scripts/generate_vllm_benchmark_matrix.py index 3001c9b..e0bb071 100755 --- a/.github/scripts/generate_vllm_benchmark_matrix.py +++ b/.github/scripts/generate_vllm_benchmark_matrix.py @@ -29,7 +29,6 @@ 4: [ "linux.aws.h100.4", "linux.rocm.gpu.gfx942.4", - "linux.dgx.b200.8", # TODO (huydh): See if it makes sense to have 4xB200 # TODO (huydhn): Enable this when Intel's runners are ready # "intel-cpu-emr", ], From c6a29f1af590099898c212a33fd94d1d0ed59a12 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Tue, 5 Aug 2025 15:08:36 -0700 Subject: [PATCH 09/11] Keep the name unique Signed-off-by: Huy Do --- .github/workflows/vllm-benchmark.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/vllm-benchmark.yml b/.github/workflows/vllm-benchmark.yml index fa62f3a..e656672 100644 --- a/.github/workflows/vllm-benchmark.yml +++ b/.github/workflows/vllm-benchmark.yml @@ -281,7 +281,7 @@ jobs: # Keep a copy of the benchmark results on GitHub for reference - uses: actions/upload-artifact@v4 with: - name: benchmark-results + name: benchmark-results-${{ matrix.models }} path: vllm-benchmarks/vllm/benchmarks/results - name: Authenticate with AWS From 807ba893deee96da059759b3550f187c2e5bf1bc Mon Sep 17 00:00:00 2001 From: Huy Do Date: Tue, 5 Aug 2025 15:35:27 -0700 Subject: [PATCH 10/11] Sanitize the model name Signed-off-by: Huy Do --- .github/workflows/vllm-benchmark.yml | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/.github/workflows/vllm-benchmark.yml b/.github/workflows/vllm-benchmark.yml index e656672..f8a4728 100644 --- a/.github/workflows/vllm-benchmark.yml +++ b/.github/workflows/vllm-benchmark.yml @@ -278,12 +278,6 @@ jobs: ) docker exec -t "${container_name}" bash -c "cd vllm-benchmarks/vllm && bash .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh" - # Keep a copy of the benchmark results on GitHub for reference - - uses: actions/upload-artifact@v4 - with: - name: benchmark-results-${{ matrix.models }} - path: vllm-benchmarks/vllm/benchmarks/results - - name: Authenticate with AWS # AWS CUDA runners already have access to the bucket via its runner IAM role if: env.DEVICE_NAME == 'rocm' || contains(env.DEVICE_TYPE, 'B200') @@ -305,10 +299,20 @@ jobs: ls -lah "${BENCHMARK_RESULTS}" SANITIZED_DEVICE_TYPE=$(echo "${DEVICE_TYPE// /_}" | sed "s/[^[:alnum:].-]/_/g") + SANITIZED_MODELS="${MODELS//\//_}" + python3 .github/scripts/upload_benchmark_results.py \ --repo vllm-benchmarks/vllm \ --benchmark-name "vLLM benchmark" \ --benchmark-results "${BENCHMARK_RESULTS}" \ --device-name "${DEVICE_NAME}" \ --device-type "${SANITIZED_DEVICE_TYPE}" \ - --model "${MODELS//\//_}" + --model "${SANITIZED_MODELS}" + + echo "SANITIZED_MODELS=$SANITIZED_MODELS" >> $GITHUB_ENV + + # Keep a copy of the benchmark results on GitHub for reference + - uses: actions/upload-artifact@v4 + with: + name: benchmark-results-${{ env.SANITIZED_MODELS }} + path: vllm-benchmarks/vllm/benchmarks/results From 39f3460ccf727a3c5eaf48dfb21b8dab122c97fb Mon Sep 17 00:00:00 2001 From: Huy Do Date: Tue, 5 Aug 2025 16:12:28 -0700 Subject: [PATCH 11/11] Add sanitized device Signed-off-by: Huy Do --- .github/workflows/vllm-benchmark.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/vllm-benchmark.yml b/.github/workflows/vllm-benchmark.yml index f8a4728..0b2965b 100644 --- a/.github/workflows/vllm-benchmark.yml +++ b/.github/workflows/vllm-benchmark.yml @@ -309,10 +309,11 @@ jobs: --device-type "${SANITIZED_DEVICE_TYPE}" \ --model "${SANITIZED_MODELS}" + echo "SANITIZED_DEVICE_TYPE=$SANITIZED_DEVICE_TYPE" >> $GITHUB_ENV echo "SANITIZED_MODELS=$SANITIZED_MODELS" >> $GITHUB_ENV # Keep a copy of the benchmark results on GitHub for reference - uses: actions/upload-artifact@v4 with: - name: benchmark-results-${{ env.SANITIZED_MODELS }} + name: benchmark-results--${{ env.SANITIZED_DEVICE_TYPE }}-${{ env.SANITIZED_MODELS }} path: vllm-benchmarks/vllm/benchmarks/results