Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
208 changes: 135 additions & 73 deletions .github/workflows/pr-test-xpu.yml
Original file line number Diff line number Diff line change
@@ -1,73 +1,135 @@
name: PR Test (XPU)

on:
pull_request:
branches: [main]
workflow_dispatch:

concurrency:
group: pr-test-xpu-${{ github.ref }}
cancel-in-progress: true

jobs:
build-and-test:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
runs-on: sglang-pvc
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2

- name: Build Docker image
run: |
docker build \
--build-arg SG_LANG_KERNEL_BRANCH=${{ github.head_ref }} \
--build-arg SG_LANG_KERNEL_REPO=${{ github.event.pull_request.head.repo.clone_url }} \
--no-cache --progress=plain -f Dockerfile.xpu_kernel -t xpu_sglang:kernel .

- name: Run container
run: |
docker run -dt \
--device /dev/dri/ \
--name ci_sglang_xpu \
-e HF_TOKEN=$(cat ~/huggingface_token.txt) \
xpu_sglang:kernel

- name: Install Dependency
timeout-minutes: 20
run: |
docker exec ci_sglang_xpu /miniforge3/envs/py3.10/bin/python3 -m pip install --upgrade pip
docker exec ci_sglang_xpu /miniforge3/envs/py3.10/bin/python3 -m pip install pytest expecttest ray huggingface_hub
docker exec ci_sglang_xpu /bin/bash -c '/miniforge3/envs/py3.10/bin/huggingface-cli login --token ${HF_TOKEN} '
docker exec ci_sglang_xpu /bin/bash -c "ln -sf /miniforge3/envs/py3.10/bin/python3 /usr/bin/python3"

- name: Run Sglang Kernel Cases
timeout-minutes: 20
run: |
docker exec -w /root/sglang ci_sglang_xpu \
/bin/bash -c "cd /root/sglang/sgl-kernel-xpu/tests && python3 run_suite.py --suite per-commit "

- name: Run Sglang Kernel Benchmarks
timeout-minutes: 20
run: |
docker exec -w /root/sglang ci_sglang_xpu \
/bin/bash -c "cd /root/sglang/sgl-kernel-xpu/benchmark && python3 bench_flash_attn.py && python3 bench_moe_topk_softmax.py && python3 bench_fused_moe.py "

- name: Run E2E Bfloat16 tests
timeout-minutes: 20
run: |
echo "[PlaceHolder for E2E Test...]"

- name: Run E2E Qunatization tests
timeout-minutes: 20
run: |
echo "[PlaceHolder for E2E Test...]"

- name: Cleanup container
if: always()
run: |
docker rm -f ci_sglang_xpu || true
name: PR Test (XPU)

on:
pull_request:
branches: [main]
workflow_dispatch:

concurrency:
group: pr-test-xpu-${{ github.ref }}
cancel-in-progress: true

jobs:
build-and-test:
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
permissions: write-all
runs-on: sglang-pvc
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2

- name: Build Docker image
run: |
docker build \
--build-arg SG_LANG_KERNEL_BRANCH=${{ github.head_ref }} \
--build-arg SG_LANG_KERNEL_REPO=${{ github.event.pull_request.head.repo.clone_url }} \
--no-cache --progress=plain -f Dockerfile.xpu_kernel -t xpu_sglang:kernel .

- name: Run container
run: |
docker run -dt \
--device /dev/dri/ \
--name ci_sglang_xpu \
-e HF_TOKEN=$(cat ~/huggingface_token.txt) \
xpu_sglang:kernel

- name: Install Dependency
timeout-minutes: 20
run: |
docker exec ci_sglang_xpu /miniforge3/envs/py3.10/bin/python3 -m pip install --upgrade pip
docker exec ci_sglang_xpu /miniforge3/envs/py3.10/bin/python3 -m pip install pytest expecttest ray huggingface_hub
docker exec ci_sglang_xpu /bin/bash -c '/miniforge3/envs/py3.10/bin/huggingface-cli login --token ${HF_TOKEN} '
docker exec ci_sglang_xpu /bin/bash -c "ln -sf /miniforge3/envs/py3.10/bin/python3 /usr/bin/python3"

- name: Run Sglang Kernel Cases
timeout-minutes: 20
run: |
docker exec -w /root/sglang ci_sglang_xpu \
/bin/bash -c "cd /root/sglang/sgl-kernel-xpu/tests && python3 run_suite.py --suite per-commit "

- name: Run Sglang Kernel Benchmarks
timeout-minutes: 20
run: |
docker exec -w /root/sglang ci_sglang_xpu \
/bin/bash -c "cd /root/sglang/sgl-kernel-xpu/benchmark && python3 bench_flash_attn.py 2>&1 | tee flash.log && python3 bench_moe_topk_softmax.py 2>&1 | tee moe.log"

- name: Copy logs from container
timeout-minutes: 20
run: |
docker cp ci_sglang_xpu:/root/sglang/sgl-kernel-xpu/benchmark/moe.log ./moe.log
LATEST=$(awk '/topk-softmax-performance:/ {getline; getline; print $7}' moe.log)
echo "Latest=$LATEST"
BASELINE=$(python3 -c "import json; print(json.load(open('benchmark/baseline.json'))['bench_moe_topk_softmax_baseline'])")
echo "Baseline=$BASELINE"
if awk "BEGIN{exit !($LATEST > $BASELINE)}"; then
echo "Performance regression detected: $LATEST > $BASELINE"
exit 1
else
echo "Performance OK: $LATEST <= $BASELINE"
python3 -c "import json; f=open('benchmark/baseline.json'); data=json.load(f); f.close(); data['bench_moe_topk_softmax_baseline']=float($LATEST); f=open('benchmark/baseline.json','w'); json.dump(data,f,indent=4); f.write('\n'); f.close()"
cat benchmark/baseline.json
echo "Baseline updated to: $LATEST"
fi

- name: Install GitHub CLI
if: contains(join(github.event.pull_request.labels.*.name, ','), 'perf')
run: |
sudo apt update
sudo apt install -y gh

- name: Auto PR for baseline.json update
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall this part be separated as a standalone scripts? I think there might be more complex logic here

if: >
success() &&
contains(join(github.event.pull_request.labels.*.name, ','), 'perf')
run: |
git checkout -b update-baseline-${GITHUB_RUN_ID}
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"

git add benchmark/baseline.json
if command -v pre-commit >/dev/null 2>&1; then
pre-commit run --files benchmark/baseline.json || true
git add benchmark/baseline.json
fi
CHANGES=$(git diff --cached --numstat | wc -l)
if [ "$CHANGES" -eq 0 ]; then
echo "No changes in baseline.json, skipping PR creation."
exit 0
else
git commit -m "Update baseline.json from CI benchmark"
echo "Detected changes in baseline.json: $(git diff --cached --stat)"
fi

echo "${{ secrets.PAT }}" | gh auth login --with-token || { echo "gh auth login failed"; exit 1; }
git remote set-url origin https://x-access-token:${{ secrets.PAT }}@github.com/sgl-project/sgl-kernel-xpu.git
git push -f https://${{ secrets.PAT }}@github.com/sgl-project/sgl-kernel-xpu.git update-baseline-${GITHUB_RUN_ID}

PR_URL=$(gh pr create \
--repo sgl-project/sgl-kernel-xpu \
--title "CI: Update baseline.json" \
--body "$(printf 'Latest: %s\nPrevious: %s' "$LATEST" "$BASELINE")" \
--base main \
--head update-baseline-${GITHUB_RUN_ID}
)

echo "PR created at $PR_URL"

- name: Run E2E Bfloat16 tests
timeout-minutes: 20
run: |
echo "[PlaceHolder for E2E Test...]"

- name: Run E2E Qunatization tests
timeout-minutes: 20
run: |
echo "[PlaceHolder for E2E Test...]"

- name: Cleanup container
if: always()
run: |
docker rm -f ci_sglang_xpu || true
3 changes: 3 additions & 0 deletions benchmark/baseline.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"bench_moe_topk_softmax_baseline": 30.01
}