From 2b5f07df63be41f55943641c69ce6e321ec67373 Mon Sep 17 00:00:00 2001
From: velaraptor-runpod <chris.vela@runpod.io>
Date: Wed, 4 Mar 2026 16:38:40 -0600
Subject: [PATCH] feat: Update to 0.16.0, remove NUM_GPU_BLOCKS_OVERRIDE in hub
 default since 0 will break

---
 .runpod/hub.json | 9 ---------
 Dockerfile       | 2 +-
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/.runpod/hub.json b/.runpod/hub.json
index ece04835..a45aa552 100644
--- a/.runpod/hub.json
+++ b/.runpod/hub.json
@@ -280,15 +280,6 @@
           "advanced": true
         }
       },
-      {
-        "key": "NUM_GPU_BLOCKS_OVERRIDE",
-        "input": {
-          "name": "Num GPU Blocks Override",
-          "type": "number",
-          "description": "If specified, ignore GPU profiling result and use this number of GPU blocks.",
-          "advanced": true
-        }
-      },
       {
         "key": "MAX_NUM_BATCHED_TOKENS",
         "input": {
diff --git a/Dockerfile b/Dockerfile
index d4cbeffd..6bb5c465 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -7,7 +7,7 @@ RUN ldconfig /usr/local/cuda-12.9/compat/
 
 # Install vLLM with FlashInfer - use CUDA 12.8 PyTorch wheels (compatible with vLLM 0.15.1)
 RUN python3 -m pip install --upgrade pip && \
-    python3 -m pip install "vllm[flashinfer]==0.15.1" --extra-index-url https://download.pytorch.org/whl/cu129
+    python3 -m pip install "vllm[flashinfer]==0.16.0" --extra-index-url https://download.pytorch.org/whl/cu129