Add runner input to NVIDIA workflow for Helion/Nebius B200 support

msaroufim · msaroufim · commit 80ebeaa1073c · 2026-03-05T08:23:23.000-08:00
- Add configurable runner input to nvidia_workflow.yml (defaults to
  existing nvidia-docker-b200-8-x86-64 for backward compat)
- Add B200_Nebius GPU type mapped to nebius-b200-helion-runners
- Pass runner input for all GPU vendors (not just AMD)
- Bump NVIDIA workflow timeout to 20 min for Helion JIT compilation
diff --git a/.github/workflows/nvidia_workflow.yml b/.github/workflows/nvidia_workflow.yml
@@ -10,6 +10,11 @@ on:
         description: 'Content of the user submission, as json string'
         required: true
         type: string
+      runner:
+        description: 'NVIDIA runner to run workflow on'
+        required: true
+        default: "nvidia-docker-b200-8-x86-64"
+        type: string
       requirements:
         description: 'Contents for a requirements.txt file'
         required: false
@@ -19,8 +24,8 @@ run-name: 'NVIDIA Job - ${{ github.event.inputs.run_id }}'
 
 jobs:
   run:
-    runs-on: [nvidia-docker-b200-8-x86-64]
-    timeout-minutes: 10
+    runs-on: ${{ github.event.inputs.runner }}
+    timeout-minutes: 20
     steps:
     - uses: actions/checkout@v3
 
diff --git a/src/libkernelbot/consts.py b/src/libkernelbot/consts.py
@@ -18,6 +18,7 @@ class SchedulerType(Enum):
 
 class GitHubGPU(Enum):
     NVIDIA = "NVIDIA"
+    B200_Nebius = "B200_Nebius"
     MI300 = "MI300"
     MI250 = "MI250"
     MI300x8 = "MI300x8"
@@ -119,6 +120,7 @@ class RankCriterion(Enum):
     "H100": "90a",
     "B200": "100",
     "NVIDIA": None,
+    "B200_Nebius": "100",
     "MI300": None,
     "MI300x8": None,
     "MI250": None,
diff --git a/src/libkernelbot/launchers/github.py b/src/libkernelbot/launchers/github.py
@@ -94,6 +94,7 @@ async def run_submission(  # noqa: C901
         self, config: dict, gpu_type: GPU, status: RunProgressReporter
     ) -> FullResult:
         gpu_vendor = None
+        runner_name = None
         if gpu_type.value in ["MI300", "MI250", "MI300x8", "MI355X"]:
             selected_workflow = "amd_workflow.yml"
             runner_name = {
@@ -104,8 +105,12 @@ async def run_submission(  # noqa: C901
             }[gpu_type.value]
             gpu_vendor = "AMD"
             requirements = AMD_REQUIREMENTS
-        elif gpu_type.value == "NVIDIA":
+        elif gpu_type.value in ["NVIDIA", "B200_Nebius"]:
             selected_workflow = "nvidia_workflow.yml"
+            runner_name = {
+                "NVIDIA": "nvidia-docker-b200-8-x86-64",
+                "B200_Nebius": "nebius-b200-helion-runners",
+            }[gpu_type.value]
             gpu_vendor = "NVIDIA"
             requirements = NVIDIA_REQUIREMENTS
         else:
@@ -129,7 +134,7 @@ async def run_submission(  # noqa: C901
         inputs = {"payload": payload}
         if lang == "py":
             inputs["requirements"] = requirements
-            if gpu_vendor == "AMD":
+            if runner_name:
                 inputs["runner"] = runner_name
 
         if not await run.trigger(inputs):