File tree Expand file tree Collapse file tree 2 files changed +79
-5
lines changed
src/libkernelbot/launchers Expand file tree Collapse file tree 2 files changed +79
-5
lines changed Original file line number Diff line number Diff line change 1+ name : Helion Job
2+ on :
3+ workflow_dispatch :
4+ inputs :
5+ run_id :
6+ description : ' Unique identifier for this run'
7+ required : true
8+ type : string
9+ payload :
10+ description : ' Content of the user submission, as json string'
11+ required : true
12+ type : string
13+ runner :
14+ description : ' Helion runner to run workflow on'
15+ required : true
16+ default : " nebius-b200-helion-runners"
17+ type : string
18+ requirements :
19+ description : ' Contents for a requirements.txt file'
20+ required : false
21+ type : string
22+
23+ run-name : ' Helion Job - ${{ github.event.inputs.run_id }}'
24+
25+ jobs :
26+ run :
27+ runs-on : ${{ github.event.inputs.runner }}
28+ timeout-minutes : 20
29+ steps :
30+ - uses : actions/checkout@v3
31+
32+ - name : Create input files
33+ shell : bash
34+ run : |
35+ # Extract the payload content without printing it
36+ sudo apt-get update && sudo apt-get install -y jq
37+ PAYLOAD=$(jq -r '.inputs.payload' $GITHUB_EVENT_PATH)
38+
39+ # Apply mask to the extracted content
40+ echo "::add-mask::$PAYLOAD"
41+
42+ # Now write to file (won't be logged since it's masked)
43+ echo "$PAYLOAD" > payload.json
44+
45+ - name : Setup Virtual Environment and Install Dependencies
46+ shell : bash
47+ run : |
48+ python3 -m pip install --upgrade pip
49+ python3 -m pip install -r "requirements-dev.txt"
50+ python3 -m pip install -e .
51+
52+ - name : Run script
53+ shell : bash
54+ run : |
55+ python3 src/runners/github-runner.py
56+
57+ - name : Upload training artifacts
58+ uses : actions/upload-artifact@v4
59+ if : always()
60+ with :
61+ name : run-result
62+ path : result.json
63+
64+ - name : Upload profiling artifacts
65+ uses : actions/upload-artifact@v4
66+ if : always()
67+ with :
68+ name : profile-data
69+ path : profile_data/*
70+ retention-days : 1
Original file line number Diff line number Diff line change @@ -105,12 +105,14 @@ async def run_submission( # noqa: C901
105105 }[gpu_type .value ]
106106 gpu_vendor = "AMD"
107107 requirements = AMD_REQUIREMENTS
108- elif gpu_type .value in ["NVIDIA" , "B200_Nebius" ]:
108+ elif gpu_type .value == "B200_Nebius" :
109+ selected_workflow = "helion_workflow.yml"
110+ runner_name = "nebius-b200-helion-runners"
111+ gpu_vendor = "NVIDIA"
112+ requirements = NVIDIA_REQUIREMENTS
113+ elif gpu_type .value == "NVIDIA" :
109114 selected_workflow = "nvidia_workflow.yml"
110- runner_name = {
111- "NVIDIA" : "nvidia-docker-b200-8-x86-64" ,
112- "B200_Nebius" : "nebius-b200-helion-runners" ,
113- }[gpu_type .value ]
115+ runner_name = "nvidia-docker-b200-8-x86-64"
114116 gpu_vendor = "NVIDIA"
115117 requirements = NVIDIA_REQUIREMENTS
116118 else :
@@ -307,6 +309,8 @@ async def trigger(self, inputs: dict) -> bool:
307309 expected_run_name = f"AMD Job - { run_id } "
308310 elif self .workflow_file == "nvidia_workflow.yml" :
309311 expected_run_name = f"NVIDIA Job - { run_id } "
312+ elif self .workflow_file == "helion_workflow.yml" :
313+ expected_run_name = f"Helion Job - { run_id } "
310314 else :
311315 raise ValueError (f"Unknown workflow file: { self .workflow_file } " )
312316
You can’t perform that action at this time.
0 commit comments