Skip to content

Commit 0fe59ab

Browse files
author
Mark Saroufim
committed
Add dedicated Helion workflow for Nebius B200 runners
The helion runner image uses python3 -m pip (no bare pip) and needs sudo for apt-get. Create a separate helion_workflow.yml instead of reusing the NVIDIA workflow.
1 parent 80ebeaa commit 0fe59ab

File tree

2 files changed

+79
-5
lines changed

2 files changed

+79
-5
lines changed
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
name: Helion Job
2+
on:
3+
workflow_dispatch:
4+
inputs:
5+
run_id:
6+
description: 'Unique identifier for this run'
7+
required: true
8+
type: string
9+
payload:
10+
description: 'Content of the user submission, as json string'
11+
required: true
12+
type: string
13+
runner:
14+
description: 'Helion runner to run workflow on'
15+
required: true
16+
default: "nebius-b200-helion-runners"
17+
type: string
18+
requirements:
19+
description: 'Contents for a requirements.txt file'
20+
required: false
21+
type: string
22+
23+
run-name: 'Helion Job - ${{ github.event.inputs.run_id }}'
24+
25+
jobs:
26+
run:
27+
runs-on: ${{ github.event.inputs.runner }}
28+
timeout-minutes: 20
29+
steps:
30+
- uses: actions/checkout@v3
31+
32+
- name: Create input files
33+
shell: bash
34+
run: |
35+
# Extract the payload content without printing it
36+
sudo apt-get update && sudo apt-get install -y jq
37+
PAYLOAD=$(jq -r '.inputs.payload' $GITHUB_EVENT_PATH)
38+
39+
# Apply mask to the extracted content
40+
echo "::add-mask::$PAYLOAD"
41+
42+
# Now write to file (won't be logged since it's masked)
43+
echo "$PAYLOAD" > payload.json
44+
45+
- name: Setup Virtual Environment and Install Dependencies
46+
shell: bash
47+
run: |
48+
python3 -m pip install --upgrade pip
49+
python3 -m pip install -r "requirements-dev.txt"
50+
python3 -m pip install -e .
51+
52+
- name: Run script
53+
shell: bash
54+
run: |
55+
python3 src/runners/github-runner.py
56+
57+
- name: Upload training artifacts
58+
uses: actions/upload-artifact@v4
59+
if: always()
60+
with:
61+
name: run-result
62+
path: result.json
63+
64+
- name: Upload profiling artifacts
65+
uses: actions/upload-artifact@v4
66+
if: always()
67+
with:
68+
name: profile-data
69+
path: profile_data/*
70+
retention-days: 1

src/libkernelbot/launchers/github.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -105,12 +105,14 @@ async def run_submission( # noqa: C901
105105
}[gpu_type.value]
106106
gpu_vendor = "AMD"
107107
requirements = AMD_REQUIREMENTS
108-
elif gpu_type.value in ["NVIDIA", "B200_Nebius"]:
108+
elif gpu_type.value == "B200_Nebius":
109+
selected_workflow = "helion_workflow.yml"
110+
runner_name = "nebius-b200-helion-runners"
111+
gpu_vendor = "NVIDIA"
112+
requirements = NVIDIA_REQUIREMENTS
113+
elif gpu_type.value == "NVIDIA":
109114
selected_workflow = "nvidia_workflow.yml"
110-
runner_name = {
111-
"NVIDIA": "nvidia-docker-b200-8-x86-64",
112-
"B200_Nebius": "nebius-b200-helion-runners",
113-
}[gpu_type.value]
115+
runner_name = "nvidia-docker-b200-8-x86-64"
114116
gpu_vendor = "NVIDIA"
115117
requirements = NVIDIA_REQUIREMENTS
116118
else:
@@ -307,6 +309,8 @@ async def trigger(self, inputs: dict) -> bool:
307309
expected_run_name = f"AMD Job - {run_id}"
308310
elif self.workflow_file == "nvidia_workflow.yml":
309311
expected_run_name = f"NVIDIA Job - {run_id}"
312+
elif self.workflow_file == "helion_workflow.yml":
313+
expected_run_name = f"Helion Job - {run_id}"
310314
else:
311315
raise ValueError(f"Unknown workflow file: {self.workflow_file}")
312316

0 commit comments

Comments
 (0)