From ecfa1b6fe8e87fcd5166ee8664a056336aca2595 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Tue, 29 Jul 2025 23:08:00 -0700
Subject: [PATCH 01/11] Test linux.dgx.b200

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 .github/scripts/generate_vllm_benchmark_matrix.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/scripts/generate_vllm_benchmark_matrix.py b/.github/scripts/generate_vllm_benchmark_matrix.py
index 90081c2..7a7ac36 100755
--- a/.github/scripts/generate_vllm_benchmark_matrix.py
+++ b/.github/scripts/generate_vllm_benchmark_matrix.py
@@ -18,6 +18,7 @@
         "linux.aws.h100",
         "linux.rocm.gpu.mi300.2",  # No single ROCm GPU?
         "linux.24xl.spr-metal",
+        "linux.dgx.b200",
     ],
     # NB: There is no 2xH100 runner at the momement, so let's use the next one
     # in the list here which is 4xH100

From 24ce5f214072e6b88986eca273aca35737812fe4 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Tue, 29 Jul 2025 23:10:42 -0700
Subject: [PATCH 02/11] Debug

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 .github/scripts/generate_vllm_benchmark_matrix.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/scripts/generate_vllm_benchmark_matrix.py b/.github/scripts/generate_vllm_benchmark_matrix.py
index 7a7ac36..86bdf98 100755
--- a/.github/scripts/generate_vllm_benchmark_matrix.py
+++ b/.github/scripts/generate_vllm_benchmark_matrix.py
@@ -42,6 +42,7 @@
 RUNNER_TO_PLATFORM_MAPPING = {
     "linux.aws.a100": "cuda",
     "linux.aws.h100": "cuda",
+    "linux.dgx.b200": "cuda",
     "linux.aws.h100.4": "cuda",
     "linux.aws.h100.8": "cuda",
     "linux.rocm.gpu.mi300.2": "rocm",

From 110dbc06b8af8147b515f46587e1aecebc4c334e Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Tue, 29 Jul 2025 23:31:22 -0700
Subject: [PATCH 03/11] Auth with AWS on B200 DGX runners

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 .github/workflows/vllm-benchmark.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/vllm-benchmark.yml b/.github/workflows/vllm-benchmark.yml
index 5a818ea..530fc08 100644
--- a/.github/workflows/vllm-benchmark.yml
+++ b/.github/workflows/vllm-benchmark.yml
@@ -276,7 +276,7 @@ jobs:
 
       - name: Authenticate with AWS
         # AWS CUDA runners already have access to the bucket via its runner IAM role
-        if: env.DEVICE_NAME != 'cuda'
+        if: env.DEVICE_NAME == 'rocm' || contains(env.DEVICE_TYPE, 'B200')
         uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
         with:
           role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results

From 849b592b6cc7de3e58d6cf6a3e2008a755fcbc7d Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Fri, 1 Aug 2025 17:36:53 -0700
Subject: [PATCH 04/11] Add linux.dgx.b200.8

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 .github/scripts/generate_vllm_benchmark_matrix.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/scripts/generate_vllm_benchmark_matrix.py b/.github/scripts/generate_vllm_benchmark_matrix.py
index 9bac565..930306e 100755
--- a/.github/scripts/generate_vllm_benchmark_matrix.py
+++ b/.github/scripts/generate_vllm_benchmark_matrix.py
@@ -29,12 +29,14 @@
     4: [
         "linux.aws.h100.4",
         "linux.rocm.gpu.gfx942.4",
+        "linux.dgx.b200.8",  # TODO (huydh): See if it makes sense to have 4xB200
         # TODO (huydhn): Enable this when Intel's runners are ready
         # "intel-cpu-emr",
     ],
     8: [
         "linux.aws.h100.8",
         "linux.rocm.gpu.gfx942.8",
+        "linux.dgx.b200.8",
     ],
 }
 

From ed30375298881dc2e8f242e98ff94f85fe0ba0d2 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Fri, 1 Aug 2025 18:35:36 -0700
Subject: [PATCH 05/11] Another tweak

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 .github/scripts/generate_vllm_benchmark_matrix.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/scripts/generate_vllm_benchmark_matrix.py b/.github/scripts/generate_vllm_benchmark_matrix.py
index 930306e..3001c9b 100755
--- a/.github/scripts/generate_vllm_benchmark_matrix.py
+++ b/.github/scripts/generate_vllm_benchmark_matrix.py
@@ -44,9 +44,10 @@
 RUNNER_TO_PLATFORM_MAPPING = {
     "linux.aws.a100": "cuda",
     "linux.aws.h100": "cuda",
-    "linux.dgx.b200": "cuda",
     "linux.aws.h100.4": "cuda",
     "linux.aws.h100.8": "cuda",
+    "linux.dgx.b200": "cuda",
+    "linux.dgx.b200.8": "cuda",
     "linux.rocm.gpu.gfx942.2": "rocm",
     "linux.rocm.gpu.gfx942.4": "rocm",
     "linux.rocm.gpu.gfx942.8": "rocm",

From e6b8e868a88ad987ff117ced516e862aed2f0091 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Sun, 3 Aug 2025 22:31:07 -0700
Subject: [PATCH 06/11] [no ci] 2.7.1

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 .github/scripts/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/scripts/requirements.txt b/.github/scripts/requirements.txt
index 4419c9f..2a191d6 100644
--- a/.github/scripts/requirements.txt
+++ b/.github/scripts/requirements.txt
@@ -4,4 +4,4 @@ psutil==7.0.0
 pynvml==12.0.0
 boto3==1.36.21
 awscli==1.37.21
-torch==2.7.0
+torch==2.7.1

From 645d3e487f09f2c71ec395451d9e6650c9d81180 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Sun, 3 Aug 2025 22:34:08 -0700
Subject: [PATCH 07/11] [no ci] Use cu128

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 .github/workflows/vllm-benchmark.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/vllm-benchmark.yml b/.github/workflows/vllm-benchmark.yml
index 530fc08..702b00f 100644
--- a/.github/workflows/vllm-benchmark.yml
+++ b/.github/workflows/vllm-benchmark.yml
@@ -134,7 +134,8 @@ jobs:
             pip install -r .github/scripts/requirements.txt \
               --extra-index-url https://download.pytorch.org/whl/rocm6.3
           else
-            pip install -r .github/scripts/requirements.txt
+            pip install -r .github/scripts/requirements.txt \
+              --extra-index-url https://download.pytorch.org/whl/cu128
           fi
 
       - name: Set Docker registry

From a4fd47689a571fbb0f8ff4218b8a59588ce4e74f Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Tue, 5 Aug 2025 12:14:42 -0700
Subject: [PATCH 08/11] A small tweak

---
 .github/scripts/generate_vllm_benchmark_matrix.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/scripts/generate_vllm_benchmark_matrix.py b/.github/scripts/generate_vllm_benchmark_matrix.py
index 3001c9b..e0bb071 100755
--- a/.github/scripts/generate_vllm_benchmark_matrix.py
+++ b/.github/scripts/generate_vllm_benchmark_matrix.py
@@ -29,7 +29,6 @@
     4: [
         "linux.aws.h100.4",
         "linux.rocm.gpu.gfx942.4",
-        "linux.dgx.b200.8",  # TODO (huydh): See if it makes sense to have 4xB200
         # TODO (huydhn): Enable this when Intel's runners are ready
         # "intel-cpu-emr",
     ],

From c6a29f1af590099898c212a33fd94d1d0ed59a12 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Tue, 5 Aug 2025 15:08:36 -0700
Subject: [PATCH 09/11] Keep the name unique

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 .github/workflows/vllm-benchmark.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/vllm-benchmark.yml b/.github/workflows/vllm-benchmark.yml
index fa62f3a..e656672 100644
--- a/.github/workflows/vllm-benchmark.yml
+++ b/.github/workflows/vllm-benchmark.yml
@@ -281,7 +281,7 @@ jobs:
       # Keep a copy of the benchmark results on GitHub for reference
       - uses: actions/upload-artifact@v4
         with:
-          name: benchmark-results
+          name: benchmark-results-${{ matrix.models }}
           path: vllm-benchmarks/vllm/benchmarks/results
 
       - name: Authenticate with AWS

From 807ba893deee96da059759b3550f187c2e5bf1bc Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Tue, 5 Aug 2025 15:35:27 -0700
Subject: [PATCH 10/11] Sanitize the model name

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 .github/workflows/vllm-benchmark.yml | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/vllm-benchmark.yml b/.github/workflows/vllm-benchmark.yml
index e656672..f8a4728 100644
--- a/.github/workflows/vllm-benchmark.yml
+++ b/.github/workflows/vllm-benchmark.yml
@@ -278,12 +278,6 @@ jobs:
           )
           docker exec -t "${container_name}" bash -c "cd vllm-benchmarks/vllm && bash .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh"
 
-      # Keep a copy of the benchmark results on GitHub for reference
-      - uses: actions/upload-artifact@v4
-        with:
-          name: benchmark-results-${{ matrix.models }}
-          path: vllm-benchmarks/vllm/benchmarks/results
-
       - name: Authenticate with AWS
         # AWS CUDA runners already have access to the bucket via its runner IAM role
         if: env.DEVICE_NAME == 'rocm' || contains(env.DEVICE_TYPE, 'B200')
@@ -305,10 +299,20 @@ jobs:
           ls -lah "${BENCHMARK_RESULTS}"
 
           SANITIZED_DEVICE_TYPE=$(echo "${DEVICE_TYPE// /_}" | sed "s/[^[:alnum:].-]/_/g")
+          SANITIZED_MODELS="${MODELS//\//_}"
+
           python3 .github/scripts/upload_benchmark_results.py \
             --repo vllm-benchmarks/vllm \
             --benchmark-name "vLLM benchmark" \
             --benchmark-results "${BENCHMARK_RESULTS}" \
             --device-name "${DEVICE_NAME}" \
             --device-type "${SANITIZED_DEVICE_TYPE}" \
-            --model "${MODELS//\//_}"
+            --model "${SANITIZED_MODELS}"
+
+          echo "SANITIZED_MODELS=$SANITIZED_MODELS" >> $GITHUB_ENV
+
+      # Keep a copy of the benchmark results on GitHub for reference
+      - uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-results-${{ env.SANITIZED_MODELS }}
+          path: vllm-benchmarks/vllm/benchmarks/results

From 39f3460ccf727a3c5eaf48dfb21b8dab122c97fb Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Tue, 5 Aug 2025 16:12:28 -0700
Subject: [PATCH 11/11] Add sanitized device

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 .github/workflows/vllm-benchmark.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/vllm-benchmark.yml b/.github/workflows/vllm-benchmark.yml
index f8a4728..0b2965b 100644
--- a/.github/workflows/vllm-benchmark.yml
+++ b/.github/workflows/vllm-benchmark.yml
@@ -309,10 +309,11 @@ jobs:
             --device-type "${SANITIZED_DEVICE_TYPE}" \
             --model "${SANITIZED_MODELS}"
 
+          echo "SANITIZED_DEVICE_TYPE=$SANITIZED_DEVICE_TYPE" >> $GITHUB_ENV
           echo "SANITIZED_MODELS=$SANITIZED_MODELS" >> $GITHUB_ENV
 
       # Keep a copy of the benchmark results on GitHub for reference
       - uses: actions/upload-artifact@v4
         with:
-          name: benchmark-results-${{ env.SANITIZED_MODELS }}
+          name: benchmark-results--${{ env.SANITIZED_DEVICE_TYPE }}-${{ env.SANITIZED_MODELS }}
           path: vllm-benchmarks/vllm/benchmarks/results