From 9b1def10f707e55ec837e43c93bdf18b7bc1f055 Mon Sep 17 00:00:00 2001
From: Egor Krivov <egor.krivov@intel.com>
Date: Thu, 23 Oct 2025 13:42:10 +0000
Subject: [PATCH 01/16] Fixed sglang testing, added pin, updated fix

---
 .github/workflows/sglang-tests.yml            | 100 --------
 .github/workflows/third-party-tests.yml       |  81 +++++-
 .../third_party/sglang/sglang-fix.patch       | 242 ++++++++++++++++--
 benchmarks/third_party/sglang/sglang-pin.txt  |   1 +
 scripts/test-triton.sh                        |  44 +++-
 5 files changed, 321 insertions(+), 147 deletions(-)
 delete mode 100644 .github/workflows/sglang-tests.yml
 create mode 100644 benchmarks/third_party/sglang/sglang-pin.txt

diff --git a/.github/workflows/sglang-tests.yml b/.github/workflows/sglang-tests.yml
deleted file mode 100644
index dc5cabc991..0000000000
--- a/.github/workflows/sglang-tests.yml
+++ /dev/null
@@ -1,100 +0,0 @@
-name: Third party SGLang tests
-
-on:
-  workflow_dispatch:
-    inputs:
-      runner_label:
-        description: Runner label, keep empty for default
-        type: string
-        default: ""
-      use_pyenv_python:
-        description: Use Python built with pyenv
-        type: boolean
-        default: false
-  schedule:
-    # About midnight PST Sunday (UTC-8)
-    - cron: "5 10 * * SUN"
-
-
-# Cancels in-progress PR runs when the PR is updated.  Manual runs are never cancelled.
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event_name == 'workflow_dispatch' && github.run_id || github.event.pull_request.number || github.ref }}
-  cancel-in-progress: true
-
-permissions: read-all
-
-env:
-  PYTHON_VERSION: "3.10"
-  TAG: ${{ inputs.tag || (github.event_name == 'pull_request' && format('pr-{0}', github.event.number)) || (github.event_name == 'schedule' && 'ci') || 'test' }}
-
-jobs:
-  build:
-    name: SGLang tests
-    runs-on:
-      - linux
-      - ${{ inputs.runner_label || 'rolling' }}
-    timeout-minutes: 720
-    defaults:
-      run:
-        shell: bash -noprofile --norc -eo pipefail -c "source /opt/intel/oneapi/setvars.sh > /dev/null; source {0}"
-    steps:
-      - name: Print inputs
-        run: |
-          cat <<EOF
-          ${{ toJSON(inputs) }}
-          EOF
-
-      - name: Checkout repository
-        uses: actions/checkout@v5
-
-      - name: Install Python
-        if: ${{ !(inputs.use_pyenv_python || false) }}
-        uses: actions/setup-python@v6
-        with:
-          python-version: ${{ env.PYTHON_VERSION }}
-
-      - name: Install Python (from pyenv) ${{ inputs.python_version }}
-        if: ${{ inputs.use_pyenv_python }}
-        uses: ./.github/actions/setup-pyenv-python
-        with:
-          python-version: ${{ env.PYTHON_VERSION }}
-
-      - name: Identify Python version
-        run: |
-          PYTHON_VERSION="$(python -c 'import sys; print(f"{sys.version_info[0]}.{ sys.version_info[1]}")')"
-          echo "PYTHON_VERSION=$PYTHON_VERSION" | tee -a $GITHUB_ENV
-
-      - name: Install Python build dependencies
-        run: |
-          pip install cmake
-
-      - name: Create reports dir
-        run: |
-          mkdir reports
-          echo "REPORTS=$PWD/reports" >> $GITHUB_ENV
-
-      - name: Install SGLang
-        id: install
-        run: |
-          git clone https://github.com/sgl-project/sglang.git
-          cd sglang
-          git apply ../benchmarks/third_party/sglang/sglang-fix.patch
-          pip install "./python[dev_xpu]"
-
-      - name: Setup PyTorch
-        uses: ./.github/actions/setup-pytorch
-
-      - name: Setup Triton
-        uses: ./.github/actions/setup-triton
-
-      - name: Run SGLANG tests
-        if: ${{ steps.install.outcome == 'success' && !cancelled() }}
-        run: |
-          ./scripts/test-triton.sh --sglang --skip-pip-install --skip-pytorch-install
-
-      - name: Upload test report
-        if: ${{ steps.install.outcome == 'success' && !cancelled() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: test-reports
-          path: reports
diff --git a/.github/workflows/third-party-tests.yml b/.github/workflows/third-party-tests.yml
index 41a38b5c3f..8415d15b86 100644
--- a/.github/workflows/third-party-tests.yml
+++ b/.github/workflows/third-party-tests.yml
@@ -1,4 +1,4 @@
-name: Third party tests [liger-kernels, vllm]
+name: Third party tests [liger-kernels, vllm, sglang]
 
 on:
   workflow_dispatch:
@@ -28,12 +28,12 @@ env:
   TAG: ${{ inputs.tag || (github.event_name == 'pull_request' && format('pr-{0}', github.event.number)) || (github.event_name == 'schedule' && 'ci') || 'test' }}
 
 jobs:
-  build:
-    name: Third party tests [liger-kernels, vllm]
+  small-tests:
+    name: Third party tests [vllm, sglang]
     runs-on:
       - linux
       - ${{ inputs.runner_label || 'max1550' }}
-    timeout-minutes: 720
+    timeout-minutes: 120
     defaults:
       run:
         shell: bash -noprofile --norc -eo pipefail -c "source /opt/intel/oneapi/setvars.sh > /dev/null; source {0}"
@@ -47,14 +47,7 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@v5
 
-      - name: Install Python
-        if: ${{ !(inputs.use_pyenv_python || false) }}
-        uses: actions/setup-python@v6
-        with:
-          python-version: ${{ env.PYTHON_VERSION }}
-
       - name: Install Python (from pyenv) ${{ inputs.python_version }}
-        if: ${{ inputs.use_pyenv_python }}
         uses: ./.github/actions/setup-pyenv-python
         with:
           python-version: ${{ env.PYTHON_VERSION }}
@@ -86,13 +79,75 @@ jobs:
           mkdir reports
           echo "REPORTS=$PWD/reports" >> $GITHUB_ENV
 
+      - name: Run SGLANG tests
+        if: ${{ steps.install.outcome == 'success' && !cancelled() }}
+        run: |
+          ./scripts/test-triton.sh --sglang --skip-pip-install --skip-pytorch-install
+
       - name: Run VLLM tests
         if: ${{ steps.install.outcome == 'success' && !cancelled() }}
         run: |
           ./scripts/test-triton.sh --vllm --skip-pip-install --skip-pytorch-install
 
-      - name: Run Liger-Kernel tests
+      - name: Upload test report
         if: ${{ steps.install.outcome == 'success' && !cancelled() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: test-main-reports
+          path: reports
+  # We run all tests for Liger, so it's slow and we test it separately
+  liger:
+    name: Liger testing
+    runs-on:
+      - linux
+      - ${{ inputs.runner_label || 'max1550' }}
+    timeout-minutes: 120
+    defaults:
+      run:
+        shell: bash -noprofile --norc -eo pipefail -c "source /opt/intel/oneapi/setvars.sh > /dev/null; source {0}"
+    steps:
+      - name: Print inputs
+        run: |
+          cat <<EOF
+          ${{ toJSON(inputs) }}
+          EOF
+
+      - name: Checkout repository
+        uses: actions/checkout@v5
+
+      - name: Install Python (from pyenv) ${{ inputs.python_version }}
+        uses: ./.github/actions/setup-pyenv-python
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Identify Python version
+        run: |
+          PYTHON_VERSION="$(python -c 'import sys; print(f"{sys.version_info[0]}.{ sys.version_info[1]}")')"
+          echo "PYTHON_VERSION=$PYTHON_VERSION" | tee -a $GITHUB_ENV
+
+      - name: Install Python build dependencies
+        run: |
+          pip install cmake
+
+      - name: Setup PyTorch
+        uses: ./.github/actions/setup-pytorch
+
+      - name: Build Triton wheels
+        uses: ./.github/actions/setup-triton
+        with:
+          command: DEBUG=1 python -m build --wheel --no-isolation
+
+      - name: Install Triton
+        id: install
+        run: |
+          pip install dist/*.whl
+
+      - name: Create reports dir
+        run: |
+          mkdir reports
+          echo "REPORTS=$PWD/reports" >> $GITHUB_ENV
+
+      - name: Run Liger-Kernel tests
         run: |
           ./scripts/test-triton.sh --liger --skip-pip-install --skip-pytorch-install
 
@@ -100,5 +155,5 @@ jobs:
         if: ${{ steps.install.outcome == 'success' && !cancelled() }}
         uses: actions/upload-artifact@v4
         with:
-          name: test-reports
+          name: test-liger-reports
           path: reports
diff --git a/benchmarks/third_party/sglang/sglang-fix.patch b/benchmarks/third_party/sglang/sglang-fix.patch
index 9b9d38dc43..b3769b6385 100644
--- a/benchmarks/third_party/sglang/sglang-fix.patch
+++ b/benchmarks/third_party/sglang/sglang-fix.patch
@@ -1,9 +1,9 @@
-diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils.py
-index bc2affa1..8ef91e66 100644
---- a/python/sglang/srt/utils.py
-+++ b/python/sglang/srt/utils.py
-@@ -228,6 +228,22 @@ def is_flashinfer_available():
-     return importlib.util.find_spec("flashinfer") is not None and is_cuda()
+diff --git a/python/sglang/srt/utils/common.py b/python/sglang/srt/utils/common.py
+index 7c2f573e4..8023cd6be 100644
+--- a/python/sglang/srt/utils/common.py
++++ b/python/sglang/srt/utils/common.py
+@@ -155,12 +155,44 @@ def is_cpu() -> bool:
+     return os.getenv("SGLANG_USE_CPU_ENGINE", "0") == "1" and is_host_cpu_x86()
  
  
 +def auto_detect_device():
@@ -22,26 +22,48 @@ index bc2affa1..8ef91e66 100644
 +        return "cpu"
 +
 +
- _ENABLE_TORCH_INFERENCE_MODE = get_bool_env_var(
-     "SGLANG_ENABLE_TORCH_INFERENCE_MODE", "false"
- )
+ def get_cuda_version():
+     if torch.version.cuda:
+         return tuple(map(int, torch.version.cuda.split(".")))
+     return (0, 0)
+ 
+ 
++def auto_detect_device():
++    """
++    Infer the device type based on the current environment.
++    """
++    if is_cuda_alike():
++        return "cuda"
++    elif is_xpu():
++        return "xpu"
++    elif is_hpu():
++        return "hpu"
++    elif is_npu():
++        return "npu"
++    else:
++        return "cpu"
++
++
+ def _check(cc_major):
+     if not is_cuda():
+         return False
 diff --git a/test/srt/test_triton_attention_kernels.py b/test/srt/test_triton_attention_kernels.py
-index 47eb16a9..cce70fb9 100644
+index 16c107006..03b9411fa 100644
 --- a/test/srt/test_triton_attention_kernels.py
 +++ b/test/srt/test_triton_attention_kernels.py
-@@ -16,8 +16,11 @@ from sglang.srt.layers.attention.triton_ops.prefill_attention import (
+@@ -18,8 +18,11 @@ from sglang.srt.layers.attention.triton_ops.extend_attention import (
+ from sglang.srt.layers.attention.triton_ops.prefill_attention import (
      context_attention_fwd,
  )
- from sglang.test.test_utils import CustomTestCase
 +from sglang.srt.utils import auto_detect_device
- 
+ from sglang.test.test_utils import CustomTestCase
  
 +device = auto_detect_device()
 +
- class TestTritonAttention(CustomTestCase):
  
-     def _set_all_seeds(self, seed):
-@@ -37,24 +40,24 @@ class TestTritonAttention(CustomTestCase):
+ def extend_attention_fwd_torch(
+     q: torch.Tensor,  # [extend_tokens, H_Q, D]
+@@ -114,24 +117,24 @@ class TestTritonAttention(CustomTestCase):
          dtype = torch.bfloat16
  
          b_seq_len_prefix = torch.randint(
@@ -73,7 +95,7 @@ index 47eb16a9..cce70fb9 100644
          )
  
          for i in range(B):
-@@ -65,15 +68,15 @@ class TestTritonAttention(CustomTestCase):
+@@ -142,15 +145,15 @@ class TestTritonAttention(CustomTestCase):
          total_token_num = torch.sum(b_seq_len).item()
          extend_token_num = torch.sum(b_seq_len_extend).item()
          k_buffer = torch.empty(
@@ -94,7 +116,7 @@ index 47eb16a9..cce70fb9 100644
          for i in range(B):
              extend_start_in_buffer = b_start_loc[i] + b_seq_len_prefix[i]
              extend_end_in_buffer = b_start_loc[i] + b_seq_len[i]
-@@ -86,20 +89,20 @@ class TestTritonAttention(CustomTestCase):
+@@ -163,20 +166,20 @@ class TestTritonAttention(CustomTestCase):
                  extend_start_in_buffer:extend_end_in_buffer
              ]
              q_extend[extend_start:extend_end] = torch.empty(
@@ -120,7 +142,7 @@ index 47eb16a9..cce70fb9 100644
          qo_indptr[1 : B + 1] = torch.cumsum(b_seq_len_extend[:B], dim=0)
  
          custom_mask = None
-@@ -123,9 +126,9 @@ class TestTritonAttention(CustomTestCase):
+@@ -200,9 +203,9 @@ class TestTritonAttention(CustomTestCase):
  
          b_seq_mask_len = b_seq_len_extend * b_seq_len
          custom_mask = torch.ones(
@@ -132,7 +154,81 @@ index 47eb16a9..cce70fb9 100644
          mask_indptr[1 : B + 1] = torch.cumsum(b_seq_mask_len[:B], dim=0)
          for i in range(B):
              causal_mask = (
-@@ -187,14 +190,14 @@ class TestTritonAttention(CustomTestCase):
+@@ -263,22 +266,22 @@ class TestTritonAttention(CustomTestCase):
+         dtype = torch.bfloat16
+ 
+         b_seq_len_prefix = torch.randint(
+-            1, N_CTX // 2, (B,), dtype=torch.int32, device="cuda"
++            1, N_CTX // 2, (B,), dtype=torch.int32, device=device
+         )
+         b_seq_len_extend = torch.randint(
+-            1, N_CTX // 2, (B,), dtype=torch.int32, device="cuda"
++            1, N_CTX // 2, (B,), dtype=torch.int32, device=device
+         )
+         b_seq_len = b_seq_len_prefix + b_seq_len_extend
+ 
+-        b_start_loc = torch.zeros((B,), dtype=torch.int32, device="cuda")
++        b_start_loc = torch.zeros((B,), dtype=torch.int32, device=device)
+         b_start_loc[1:] = torch.cumsum(b_seq_len[:-1], 0)
+-        b_start_loc_extend = torch.zeros((B,), dtype=torch.int32, device="cuda")
++        b_start_loc_extend = torch.zeros((B,), dtype=torch.int32, device=device)
+         b_start_loc_extend[1:] = torch.cumsum(b_seq_len_extend[:-1], 0)
+ 
+-        kv_indptr = torch.zeros((B + 1,), dtype=torch.int32, device="cuda")
++        kv_indptr = torch.zeros((B + 1,), dtype=torch.int32, device=device)
+         kv_indptr[1 : B + 1] = torch.cumsum(b_seq_len_prefix[:B], dim=0)
+         kv_indices = torch.zeros(
+-            (b_seq_len_prefix.sum().item(),), dtype=torch.int32, device="cuda"
++            (b_seq_len_prefix.sum().item(),), dtype=torch.int32, device=device
+         )
+ 
+         for i in range(B):
+@@ -289,15 +292,15 @@ class TestTritonAttention(CustomTestCase):
+         total_token_num = torch.sum(b_seq_len).item()
+         extend_token_num = torch.sum(b_seq_len_extend).item()
+         k_buffer = torch.empty(
+-            (total_token_num, H_KV, D), dtype=dtype, device="cuda"
++            (total_token_num, H_KV, D), dtype=dtype, device=device
+         ).normal_(mean=0.1, std=0.2)
+         v_buffer = torch.empty(
+-            (total_token_num, H_KV, D), dtype=dtype, device="cuda"
++            (total_token_num, H_KV, D), dtype=dtype, device=device
+         ).normal_(mean=0.1, std=0.2)
+ 
+-        k_extend = torch.empty((extend_token_num, H_KV, D), dtype=dtype, device="cuda")
+-        v_extend = torch.empty((extend_token_num, H_KV, D), dtype=dtype, device="cuda")
+-        q_extend = torch.empty((extend_token_num, H_Q, D), dtype=dtype, device="cuda")
++        k_extend = torch.empty((extend_token_num, H_KV, D), dtype=dtype, device=device)
++        v_extend = torch.empty((extend_token_num, H_KV, D), dtype=dtype, device=device)
++        q_extend = torch.empty((extend_token_num, H_Q, D), dtype=dtype, device=device)
+         for i in range(B):
+             extend_start_in_buffer = b_start_loc[i] + b_seq_len_prefix[i]
+             extend_end_in_buffer = b_start_loc[i] + b_seq_len[i]
+@@ -310,19 +313,19 @@ class TestTritonAttention(CustomTestCase):
+                 extend_start_in_buffer:extend_end_in_buffer
+             ]
+             q_extend[extend_start:extend_end] = torch.empty(
+-                (b_seq_len_extend[i], H_Q, D), dtype=dtype, device="cuda"
++                (b_seq_len_extend[i], H_Q, D), dtype=dtype, device=device
+             ).normal_(mean=0.1, std=0.2)
+ 
+         o_extend_triton = torch.empty(
+-            (extend_token_num, H_Q, D), dtype=dtype, device="cuda"
++            (extend_token_num, H_Q, D), dtype=dtype, device=device
+         )
+         o_extend_torch = torch.empty(
+-            (extend_token_num, H_Q, D), dtype=dtype, device="cuda"
++            (extend_token_num, H_Q, D), dtype=dtype, device=device
+         )
+ 
+         b_seq_len_extend = b_seq_len - b_seq_len_prefix
+         max_len_extend = torch.max(b_seq_len_extend, 0)[0].item()
+-        qo_indptr = torch.zeros((B + 1,), dtype=torch.int32, device="cuda")
++        qo_indptr = torch.zeros((B + 1,), dtype=torch.int32, device=device)
+         qo_indptr[1 : B + 1] = torch.cumsum(b_seq_len_extend[:B], dim=0)
+ 
+         extend_attention_fwd(
+@@ -373,14 +376,14 @@ class TestTritonAttention(CustomTestCase):
          max_seq_len = max(seq_lens)
  
          # Create random input tensors
@@ -153,7 +249,7 @@ index 47eb16a9..cce70fb9 100644
  
          context_attention_fwd(
              q, k, v, o, b_start_loc, b_seq_len, max_seq_len, is_causal=is_causal
-@@ -232,33 +235,33 @@ class TestTritonAttention(CustomTestCase):
+@@ -418,33 +421,33 @@ class TestTritonAttention(CustomTestCase):
          total_tokens = B * seq_len
          sm_scale = 1.0 / (D**0.5)
          max_kv_splits = 8
@@ -197,7 +293,7 @@ index 47eb16a9..cce70fb9 100644
          )
  
          decode_attention_fwd(
-@@ -296,34 +299,34 @@ class TestTritonAttention(CustomTestCase):
+@@ -482,34 +485,34 @@ class TestTritonAttention(CustomTestCase):
          total_tokens = B * seq_len
          sm_scale = 1.0 / (D**0.5)
          max_kv_splits = 8
@@ -243,7 +339,7 @@ index 47eb16a9..cce70fb9 100644
          )
  
          decode_attention_fwd_normal(
-@@ -343,12 +346,12 @@ class TestTritonAttention(CustomTestCase):
+@@ -529,12 +532,12 @@ class TestTritonAttention(CustomTestCase):
          attn_logits1 = torch.empty(
              (B, H_Q, max_kv_splits, D_V),
              dtype=torch.float32,
@@ -258,3 +354,103 @@ index 47eb16a9..cce70fb9 100644
          )
  
          decode_attention_fwd_grouped(
+@@ -578,23 +581,23 @@ class TestTritonAttention(CustomTestCase):
+         dtype = torch.bfloat16
+ 
+         b_seq_len_prefix = torch.randint(
+-            1, N_CTX // 2, (B,), dtype=torch.int32, device="cuda"
++            1, N_CTX // 2, (B,), dtype=torch.int32, device=device
+         )
+         b_seq_len_extend = torch.randint(
+-            1, N_CTX // 2, (B,), dtype=torch.int32, device="cuda"
++            1, N_CTX // 2, (B,), dtype=torch.int32, device=device
+         )
+         b_seq_len = b_seq_len_prefix + b_seq_len_extend
+ 
+-        b_start_loc = torch.zeros((B,), dtype=torch.int32, device="cuda")
++        b_start_loc = torch.zeros((B,), dtype=torch.int32, device=device)
+         b_start_loc[1:] = torch.cumsum(b_seq_len[:-1], 0)
+-        b_start_loc_extend = torch.zeros((B,), dtype=torch.int32, device="cuda")
++        b_start_loc_extend = torch.zeros((B,), dtype=torch.int32, device=device)
+         b_start_loc_extend[1:] = torch.cumsum(b_seq_len_extend[:-1], 0)
+ 
+         # Setup prefix KV indices
+-        kv_indptr = torch.zeros((B + 1,), dtype=torch.int32, device="cuda")
++        kv_indptr = torch.zeros((B + 1,), dtype=torch.int32, device=device)
+         kv_indptr[1 : B + 1] = torch.cumsum(b_seq_len_prefix[:B], dim=0)
+         kv_indices = torch.zeros(
+-            (b_seq_len_prefix.sum().item(),), dtype=torch.int64, device="cuda"
++            (b_seq_len_prefix.sum().item(),), dtype=torch.int64, device=device
+         )
+ 
+         for i in range(B):
+@@ -605,15 +608,15 @@ class TestTritonAttention(CustomTestCase):
+         total_token_num = torch.sum(b_seq_len).item()
+         extend_token_num = torch.sum(b_seq_len_extend).item()
+         k_buffer = torch.empty(
+-            (total_token_num, H_KV, D), dtype=dtype, device="cuda"
++            (total_token_num, H_KV, D), dtype=dtype, device=device
+         ).normal_(mean=0.1, std=0.2)
+         v_buffer = torch.empty(
+-            (total_token_num, H_KV, D), dtype=dtype, device="cuda"
++            (total_token_num, H_KV, D), dtype=dtype, device=device
+         ).normal_(mean=0.1, std=0.2)
+ 
+-        k_extend = torch.empty((extend_token_num, H_KV, D), dtype=dtype, device="cuda")
+-        v_extend = torch.empty((extend_token_num, H_KV, D), dtype=dtype, device="cuda")
+-        q_extend = torch.empty((extend_token_num, H_Q, D), dtype=dtype, device="cuda")
++        k_extend = torch.empty((extend_token_num, H_KV, D), dtype=dtype, device=device)
++        v_extend = torch.empty((extend_token_num, H_KV, D), dtype=dtype, device=device)
++        q_extend = torch.empty((extend_token_num, H_Q, D), dtype=dtype, device=device)
+ 
+         for i in range(B):
+             extend_start_in_buffer = b_start_loc[i] + b_seq_len_prefix[i]
+@@ -627,16 +630,16 @@ class TestTritonAttention(CustomTestCase):
+                 extend_start_in_buffer:extend_end_in_buffer
+             ]
+             q_extend[extend_start:extend_end] = torch.empty(
+-                (b_seq_len_extend[i], H_Q, D), dtype=dtype, device="cuda"
++                (b_seq_len_extend[i], H_Q, D), dtype=dtype, device=device
+             ).normal_(mean=0.1, std=0.2)
+ 
+         # Setup for extend attention
+         max_len_extend = torch.max(b_seq_len_extend, 0)[0].item()
+-        qo_indptr = torch.zeros((B + 1,), dtype=torch.int32, device="cuda")
++        qo_indptr = torch.zeros((B + 1,), dtype=torch.int32, device=device)
+         qo_indptr[1 : B + 1] = torch.cumsum(b_seq_len_extend[:B], dim=0)
+ 
+         # Run 2-stage kernel
+-        o_regular = torch.empty((extend_token_num, H_Q, D), dtype=dtype, device="cuda")
++        o_regular = torch.empty((extend_token_num, H_Q, D), dtype=dtype, device=device)
+         extend_attention_fwd(
+             q_extend,
+             k_extend,
+@@ -658,9 +661,9 @@ class TestTritonAttention(CustomTestCase):
+             total_token_num - extend_token_num,
+             total_token_num,
+             dtype=torch.int64,
+-            device="cuda",
++            device=device,
+         )
+-        extend_start_loc = torch.zeros((B,), dtype=torch.int32, device="cuda")
++        extend_start_loc = torch.zeros((B,), dtype=torch.int32, device=device)
+         extend_start_loc[1:] = torch.cumsum(b_seq_len_extend[:-1], 0)
+ 
+         unified_kv_indptr, unified_kv_indices, prefix_lens = build_unified_kv_indices(
+@@ -673,7 +676,7 @@ class TestTritonAttention(CustomTestCase):
+         )
+ 
+         # Run unified kernel
+-        o_unified = torch.empty((extend_token_num, H_Q, D), dtype=dtype, device="cuda")
++        o_unified = torch.empty((extend_token_num, H_Q, D), dtype=dtype, device=device)
+         extend_attention_fwd_unified(
+             q_extend,
+             o_unified,
+@@ -716,7 +719,6 @@ class TestTritonAttention(CustomTestCase):
+         """Test build_unified_kv_indices correctness."""
+         B = 4
+         dtype = torch.int64
+-        device = "cuda"
+ 
+         # Setup test data
+         prefix_lens = torch.tensor([10, 20, 15, 25], dtype=torch.int32, device=device)
diff --git a/benchmarks/third_party/sglang/sglang-pin.txt b/benchmarks/third_party/sglang/sglang-pin.txt
new file mode 100644
index 0000000000..8f8517ba4b
--- /dev/null
+++ b/benchmarks/third_party/sglang/sglang-pin.txt
@@ -0,0 +1 @@
+d6fee73d1f593bd6754cd2550775fd2e54aeae60
diff --git a/scripts/test-triton.sh b/scripts/test-triton.sh
index 0bdc5de7ad..27ef883d99 100755
--- a/scripts/test-triton.sh
+++ b/scripts/test-triton.sh
@@ -30,6 +30,7 @@ TEST:
     --liger
     --vllm
     --install-vllm
+    --install-sglang
 
 OPTION:
     --unskip
@@ -74,6 +75,7 @@ TEST_SGLANG=false
 TEST_LIGER=false
 TEST_VLLM=false
 INSTALL_VLLM=false
+INSTALL_SGLANG=false
 TEST_TRITON_KERNELS=false
 VENV=false
 TRITON_TEST_REPORTS=false
@@ -190,6 +192,11 @@ while (( $# != 0 )); do
       TEST_DEFAULT=false
       shift
       ;;
+    --install-sglang)
+      INSTALL_SGLANG=true
+      TEST_DEFAULT=false
+      shift
+      ;;
     --sglang)
       TEST_SGLANG=true
       TEST_DEFAULT=false
@@ -589,26 +596,38 @@ run_inductor_tests() {
   grep AlbertForMaskedLM inductor_log.csv | grep -q ,pass,
 }
 
-run_sglang_tests() {
-  echo "***************************************************"
-  echo "******    Running SGLang Triton tests        ******"
-  echo "***************************************************"
+run_sglang_install() {
+  echo "************************************************"
+  echo "******    Installing SGLang                 ****"
+  echo "************************************************"
 
   if ! [ -d "./sglang" ]; then
     git clone https://github.com/sgl-project/sglang.git
   fi
-  cd sglang
 
   if ! pip list | grep "sglang" ; then
-    git apply $TRITON_PROJ/benchmarks/third_party/sglang/sglang-fix.patch
+    cd sglang
+    git checkout "$(<../benchmarks/third_party/sglang/sglang-pin.txt)"
+    git apply ../benchmarks/third_party/sglang/sglang-fix.patch
+
+    # That's how sglang assumes we'll pick out platform for now
+    cp python/pyproject_xpu.toml python/pyproject.toml
+    # We should remove all torch libraries from requirements to avoid reinstalling triton & torch
+    # We remove sgl kernel due to a bug in the current environment probably due to using newer torch
+    sed -i '/pytorch\|torch\|sgl-kernel/d' python/pyproject.toml
     pip install "./python[dev_xpu]"
-
-    # SGLang installation breaks the default PyTorch and Triton versions, so we need to reinstall them.
-    $SCRIPTS_DIR/install-pytorch.sh --force-reinstall
-    $SCRIPTS_DIR/compile-triton.sh --triton
+    cd ..
   fi
 
-  pip install pytest pytest-xdist
+  pip install pytest pytest-cov pytest-xdist
+}
+
+run_sglang_tests() {
+  echo "***************************************************"
+  echo "******    Running SGLang Triton tests        ******"
+  echo "***************************************************"
+
+  run_sglang_install
   run_pytest_command -vvv -n ${PYTEST_MAX_PROCESSES:-4} test/srt/test_triton_attention_kernels.py
 }
 
@@ -745,6 +764,9 @@ test_triton() {
   if [ "$TEST_INDUCTOR" == true ]; then
     run_inductor_tests
   fi
+  if [ "$INSTALL_SGLANG" == true ]; then
+    run_sglang_install
+  fi
   if [ "$TEST_SGLANG" == true ]; then
     run_sglang_tests
   fi

From 7875d12c46388b6abd70bd7dc45a5216ee73b6ca Mon Sep 17 00:00:00 2001
From: Egor Krivov <egor.krivov@intel.com>
Date: Thu, 23 Oct 2025 15:06:55 +0000
Subject: [PATCH 02/16] Debug

---
 scripts/test-triton.sh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/scripts/test-triton.sh b/scripts/test-triton.sh
index 27ef883d99..ff1ac3f0e9 100755
--- a/scripts/test-triton.sh
+++ b/scripts/test-triton.sh
@@ -628,6 +628,7 @@ run_sglang_tests() {
   echo "***************************************************"
 
   run_sglang_install
+  cd sglang
   run_pytest_command -vvv -n ${PYTEST_MAX_PROCESSES:-4} test/srt/test_triton_attention_kernels.py
 }
 
@@ -667,8 +668,9 @@ run_vllm_install() {
     cd vllm-xpu-kernels
     git checkout "$(<../benchmarks/third_party/vllm/vllm-kernels-pin.txt)"
     sed -i '/pytorch\|torch/d' requirements.txt
+    sed -i '/pytorch\|torch/d' pyproject.toml
     pip install -r requirements.txt
-    VLLM_TARGET_DEVICE=xpu pip install --no-build-isolation -e .
+    VLLM_TARGET_DEVICE=xpu pip install -vvv --no-build-isolation .
     cd ..
 
     VLLM_TARGET_DEVICE=xpu pip install --no-deps --no-build-isolation -e vllm
@@ -691,7 +693,7 @@ run_vllm_tests() {
 
 run_triton_kernels_tests() {
   echo "***************************************************"
-  echo "******    Running Triton Kernels tests      ******"
+  echo "******    Running Triton Kernels tests      ******"requirements.txpt
   echo "***************************************************"
   cd $TRITON_PROJ/python/triton_kernels/tests
 

From 40da9ae42ad09ee02a57ff349112019e0a615054 Mon Sep 17 00:00:00 2001
From: Egor Krivov <egor.krivov@intel.com>
Date: Thu, 23 Oct 2025 15:52:57 +0000
Subject: [PATCH 03/16] Debug

---
 .github/workflows/third-party-tests.yml | 9 +--------
 scripts/test-triton.sh                  | 4 +++-
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/third-party-tests.yml b/.github/workflows/third-party-tests.yml
index 8415d15b86..9fda3ed4f1 100644
--- a/.github/workflows/third-party-tests.yml
+++ b/.github/workflows/third-party-tests.yml
@@ -64,15 +64,8 @@ jobs:
       - name: Setup PyTorch
         uses: ./.github/actions/setup-pytorch
 
-      - name: Build Triton wheels
+      - name: Setup Triton
         uses: ./.github/actions/setup-triton
-        with:
-          command: DEBUG=1 python -m build --wheel --no-isolation
-
-      - name: Install Triton
-        id: install
-        run: |
-          pip install dist/*.whl
 
       - name: Create reports dir
         run: |
diff --git a/scripts/test-triton.sh b/scripts/test-triton.sh
index ff1ac3f0e9..c588910bc8 100755
--- a/scripts/test-triton.sh
+++ b/scripts/test-triton.sh
@@ -615,7 +615,9 @@ run_sglang_install() {
     # We should remove all torch libraries from requirements to avoid reinstalling triton & torch
     # We remove sgl kernel due to a bug in the current environment probably due to using newer torch
     sed -i '/pytorch\|torch\|sgl-kernel/d' python/pyproject.toml
-    pip install "./python[dev_xpu]"
+    echo "pyproject.toml after modification:"
+    cat python/pyproject.toml
+    pip install -e "./python"
     cd ..
   fi
 

From a638d14ddf212148c708fd698642272a8628fb78 Mon Sep 17 00:00:00 2001
From: Egor Krivov <egor.krivov@intel.com>
Date: Thu, 23 Oct 2025 16:08:39 +0000
Subject: [PATCH 04/16] Debug

---
 .github/workflows/third-party-tests.yml | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/third-party-tests.yml b/.github/workflows/third-party-tests.yml
index 9fda3ed4f1..2dac6d56ba 100644
--- a/.github/workflows/third-party-tests.yml
+++ b/.github/workflows/third-party-tests.yml
@@ -65,6 +65,7 @@ jobs:
         uses: ./.github/actions/setup-pytorch
 
       - name: Setup Triton
+        id: install
         uses: ./.github/actions/setup-triton
 
       - name: Create reports dir
@@ -125,15 +126,9 @@ jobs:
       - name: Setup PyTorch
         uses: ./.github/actions/setup-pytorch
 
-      - name: Build Triton wheels
-        uses: ./.github/actions/setup-triton
-        with:
-          command: DEBUG=1 python -m build --wheel --no-isolation
-
-      - name: Install Triton
+      - name: Setup Triton
         id: install
-        run: |
-          pip install dist/*.whl
+        uses: ./.github/actions/setup-triton
 
       - name: Create reports dir
         run: |

From 31f641d3393319bb71d80081f047e01d5bd91ede Mon Sep 17 00:00:00 2001
From: Egor Krivov <egor.krivov@intel.com>
Date: Fri, 24 Oct 2025 07:58:11 +0000
Subject: [PATCH 05/16] Debug

---
 scripts/test-triton.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/test-triton.sh b/scripts/test-triton.sh
index c588910bc8..437a3aaab5 100755
--- a/scripts/test-triton.sh
+++ b/scripts/test-triton.sh
@@ -617,7 +617,7 @@ run_sglang_install() {
     sed -i '/pytorch\|torch\|sgl-kernel/d' python/pyproject.toml
     echo "pyproject.toml after modification:"
     cat python/pyproject.toml
-    pip install -e "./python"
+    pip install -vvv -e "./python"
     cd ..
   fi
 
@@ -695,7 +695,7 @@ run_vllm_tests() {
 
 run_triton_kernels_tests() {
   echo "***************************************************"
-  echo "******    Running Triton Kernels tests      ******"requirements.txpt
+  echo "******    Running Triton Kernels tests      *******"
   echo "***************************************************"
   cd $TRITON_PROJ/python/triton_kernels/tests
 

From 4b961f05451c625397f09613708ddf53212477fb Mon Sep 17 00:00:00 2001
From: Egor Krivov <egor.krivov@intel.com>
Date: Fri, 24 Oct 2025 07:59:19 +0000
Subject: [PATCH 06/16] Debug

---
 scripts/test-triton.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/test-triton.sh b/scripts/test-triton.sh
index 437a3aaab5..7f43a6aab3 100755
--- a/scripts/test-triton.sh
+++ b/scripts/test-triton.sh
@@ -618,6 +618,8 @@ run_sglang_install() {
     echo "pyproject.toml after modification:"
     cat python/pyproject.toml
     pip install -vvv -e "./python"
+    pip install pipdeptree
+    pipdeptree -r -p torch
     cd ..
   fi
 

From 7e60298605276b266c5a642628b01b96dd0435bf Mon Sep 17 00:00:00 2001
From: Egor Krivov <egor.krivov@intel.com>
Date: Fri, 24 Oct 2025 08:24:09 +0000
Subject: [PATCH 07/16] Debug

---
 scripts/test-triton.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/test-triton.sh b/scripts/test-triton.sh
index 7f43a6aab3..f200468a76 100755
--- a/scripts/test-triton.sh
+++ b/scripts/test-triton.sh
@@ -649,7 +649,7 @@ run_liger_tests() {
     pip install pytest pytest-xdist pytest-cov transformers pandas pytest datasets -e Liger-Kernel
   fi
 
-  run_pytest_command -vvv -n ${PYTEST_MAX_PROCESSES:-4} Liger-Kernel/test/
+  run_pytest_command -vv -n ${PYTEST_MAX_PROCESSES:-4} Liger-Kernel/test/
 }
 
 run_vllm_install() {
@@ -674,7 +674,7 @@ run_vllm_install() {
     sed -i '/pytorch\|torch/d' requirements.txt
     sed -i '/pytorch\|torch/d' pyproject.toml
     pip install -r requirements.txt
-    VLLM_TARGET_DEVICE=xpu pip install -vvv --no-build-isolation .
+    VLLM_TARGET_DEVICE=xpu pip install --no-build-isolation .
     cd ..
 
     VLLM_TARGET_DEVICE=xpu pip install --no-deps --no-build-isolation -e vllm

From 74fd84910a558f38492d5b31b5b416454546de05 Mon Sep 17 00:00:00 2001
From: Egor Krivov <egor.krivov@intel.com>
Date: Fri, 24 Oct 2025 08:56:03 +0000
Subject: [PATCH 08/16] Debug

---
 scripts/test-triton.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/scripts/test-triton.sh b/scripts/test-triton.sh
index f200468a76..9fb877d531 100755
--- a/scripts/test-triton.sh
+++ b/scripts/test-triton.sh
@@ -613,11 +613,12 @@ run_sglang_install() {
     # That's how sglang assumes we'll pick out platform for now
     cp python/pyproject_xpu.toml python/pyproject.toml
     # We should remove all torch libraries from requirements to avoid reinstalling triton & torch
-    # We remove sgl kernel due to a bug in the current environment probably due to using newer torch
+    # We remove sgl kernel due to a bug in the current environment probably due to using newer torch, we don't currently use it anyway
+    # We remove timm because it depends on torchvision, which depends on torch==2.9
     sed -i '/pytorch\|torch\|sgl-kernel/d' python/pyproject.toml
     echo "pyproject.toml after modification:"
     cat python/pyproject.toml
-    pip install -vvv -e "./python"
+    pip install -e "./python"
     pip install pipdeptree
     pipdeptree -r -p torch
     cd ..

From b2874409655a796c3c5118c35a6d84c76d33be23 Mon Sep 17 00:00:00 2001
From: Egor Krivov <egor.krivov@intel.com>
Date: Fri, 24 Oct 2025 09:21:27 +0000
Subject: [PATCH 09/16] Debug

---
 scripts/test-triton.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/test-triton.sh b/scripts/test-triton.sh
index 9fb877d531..fe278c4499 100755
--- a/scripts/test-triton.sh
+++ b/scripts/test-triton.sh
@@ -615,7 +615,7 @@ run_sglang_install() {
     # We should remove all torch libraries from requirements to avoid reinstalling triton & torch
     # We remove sgl kernel due to a bug in the current environment probably due to using newer torch, we don't currently use it anyway
     # We remove timm because it depends on torchvision, which depends on torch==2.9
-    sed -i '/pytorch\|torch\|sgl-kernel/d' python/pyproject.toml
+    sed -i '/pytorch\|torch\|sgl-kernel\|timm/d' python/pyproject.toml
     echo "pyproject.toml after modification:"
     cat python/pyproject.toml
     pip install -e "./python"

From 950d196cce2e8409ec2c580985023c8eaa5bd20d Mon Sep 17 00:00:00 2001
From: Egor Krivov <egor.krivov@intel.com>
Date: Fri, 24 Oct 2025 09:54:38 +0000
Subject: [PATCH 10/16] Cleanup

---
 scripts/test-triton.sh | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/scripts/test-triton.sh b/scripts/test-triton.sh
index fe278c4499..575ca2461d 100755
--- a/scripts/test-triton.sh
+++ b/scripts/test-triton.sh
@@ -616,11 +616,8 @@ run_sglang_install() {
     # We remove sgl kernel due to a bug in the current environment probably due to using newer torch, we don't currently use it anyway
     # We remove timm because it depends on torchvision, which depends on torch==2.9
     sed -i '/pytorch\|torch\|sgl-kernel\|timm/d' python/pyproject.toml
-    echo "pyproject.toml after modification:"
     cat python/pyproject.toml
     pip install -e "./python"
-    pip install pipdeptree
-    pipdeptree -r -p torch
     cd ..
   fi
 

From 787f6fc4c29b792ac3d2893a5ed2f12e1a1ffeb1 Mon Sep 17 00:00:00 2001
From: Egor Krivov <egor.krivov@intel.com>
Date: Fri, 24 Oct 2025 10:21:12 +0000
Subject: [PATCH 11/16] Cleaned up

---
 scripts/test-triton.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/test-triton.sh b/scripts/test-triton.sh
index 575ca2461d..bc347f4a00 100755
--- a/scripts/test-triton.sh
+++ b/scripts/test-triton.sh
@@ -647,7 +647,7 @@ run_liger_tests() {
     pip install pytest pytest-xdist pytest-cov transformers pandas pytest datasets -e Liger-Kernel
   fi
 
-  run_pytest_command -vv -n ${PYTEST_MAX_PROCESSES:-4} Liger-Kernel/test/
+  run_pytest_command -vvv -n ${PYTEST_MAX_PROCESSES:-4} Liger-Kernel/test/
 }
 
 run_vllm_install() {
@@ -672,7 +672,7 @@ run_vllm_install() {
     sed -i '/pytorch\|torch/d' requirements.txt
     sed -i '/pytorch\|torch/d' pyproject.toml
     pip install -r requirements.txt
-    VLLM_TARGET_DEVICE=xpu pip install --no-build-isolation .
+    VLLM_TARGET_DEVICE=xpu pip install --no-build-isolation -e .
     cd ..
 
     VLLM_TARGET_DEVICE=xpu pip install --no-deps --no-build-isolation -e vllm

From bf8bf04affbad9cc29497a54454af2a641eecf48 Mon Sep 17 00:00:00 2001
From: Egor Krivov <egor.krivov@intel.com>
Date: Fri, 24 Oct 2025 12:01:08 +0000
Subject: [PATCH 12/16] Added patch to liger kernels, refactor liger install

---
 .github/workflows/third-party-benchmarks.yml  |  9 ++---
 .../{liger_kernels => liger}/README.md        |  0
 .../run_benchmarks.sh                         |  0
 .../{liger_kernels => liger}/transform.py     |  0
 scripts/test-triton.sh                        | 40 ++++++++++++++-----
 5 files changed, 34 insertions(+), 15 deletions(-)
 rename benchmarks/third_party/{liger_kernels => liger}/README.md (100%)
 rename benchmarks/third_party/{liger_kernels => liger}/run_benchmarks.sh (100%)
 rename benchmarks/third_party/{liger_kernels => liger}/transform.py (100%)

diff --git a/.github/workflows/third-party-benchmarks.yml b/.github/workflows/third-party-benchmarks.yml
index 6612f937f5..866ec24cc7 100644
--- a/.github/workflows/third-party-benchmarks.yml
+++ b/.github/workflows/third-party-benchmarks.yml
@@ -108,17 +108,14 @@ jobs:
         run: |
           source ./scripts/capture-hw-details.sh
 
-          cd benchmarks/third_party/liger_kernels
-
-          git clone https://github.com/linkedin/Liger-Kernel
-          pip install -e Liger-Kernel
+          ./scripts/test-triton.sh --install-vllm --skip-pip-install --skip-pytorch-install
 
           # To remember return code, but still copy results
           RET_CODE=0
-          bash ./run_benchmarks.sh || RET_CODE=$?
+          bash benchmarks/third_party/liger/run_benchmarks.sh || RET_CODE=$?
 
           cp Liger-Kernel/benchmark/data/all_benchmark_data.csv $REPORTS/liger-raw.csv
-          python transform.py $REPORTS/liger-raw.csv $REPORTS/liger-report.csv --tag $TAG
+          python benchmarks/third_party/liger/transform.py $REPORTS/liger-raw.csv $REPORTS/liger-report.csv --tag $TAG
 
           # Return the captured return code at the end
           exit "$RET_CODE"
diff --git a/benchmarks/third_party/liger_kernels/README.md b/benchmarks/third_party/liger/README.md
similarity index 100%
rename from benchmarks/third_party/liger_kernels/README.md
rename to benchmarks/third_party/liger/README.md
diff --git a/benchmarks/third_party/liger_kernels/run_benchmarks.sh b/benchmarks/third_party/liger/run_benchmarks.sh
similarity index 100%
rename from benchmarks/third_party/liger_kernels/run_benchmarks.sh
rename to benchmarks/third_party/liger/run_benchmarks.sh
diff --git a/benchmarks/third_party/liger_kernels/transform.py b/benchmarks/third_party/liger/transform.py
similarity index 100%
rename from benchmarks/third_party/liger_kernels/transform.py
rename to benchmarks/third_party/liger/transform.py
diff --git a/scripts/test-triton.sh b/scripts/test-triton.sh
index bc347f4a00..148354e981 100755
--- a/scripts/test-triton.sh
+++ b/scripts/test-triton.sh
@@ -596,6 +596,10 @@ run_inductor_tests() {
   grep AlbertForMaskedLM inductor_log.csv | grep -q ,pass,
 }
 
+run_test_deps_install() {
+  pip install pytest pytest-cov pytest-xdist
+}
+
 run_sglang_install() {
   echo "************************************************"
   echo "******    Installing SGLang                 ****"
@@ -620,8 +624,6 @@ run_sglang_install() {
     pip install -e "./python"
     cd ..
   fi
-
-  pip install pytest pytest-cov pytest-xdist
 }
 
 run_sglang_tests() {
@@ -630,23 +632,39 @@ run_sglang_tests() {
   echo "***************************************************"
 
   run_sglang_install
+  run_test_deps_install
   cd sglang
   run_pytest_command -vvv -n ${PYTEST_MAX_PROCESSES:-4} test/srt/test_triton_attention_kernels.py
 }
 
-run_liger_tests() {
+run_liger_install() {
   echo "************************************************"
-  echo "******    Running Liger Triton tests      ******"
+  echo "******    Installing Liger-Kernel         ******"
   echo "************************************************"
 
   if ! [ -d "./Liger-Kernel" ]; then
     git clone https://github.com/linkedin/Liger-Kernel
+
+    # There is probably an issue with cache
+    # Will try to upstream the patch here:
+    # https://github.com/linkedin/Liger-Kernel/pull/917
+    # After merging we can remove this patch application
+    git apply benchmarks/third_party/liger/liger-fix.patch
   fi
 
   if ! pip list | grep "liger_kernel" ; then
-    pip install pytest pytest-xdist pytest-cov transformers pandas pytest datasets -e Liger-Kernel
+    pip install transformers pandas datasets -e Liger-Kernel
   fi
+}
+
+
+run_liger_tests() {
+  echo "************************************************"
+  echo "******    Running Liger-Kernel tests      ******"
+  echo "************************************************"
 
+  run_liger_install
+  run_test_deps_install
   run_pytest_command -vvv -n ${PYTEST_MAX_PROCESSES:-4} Liger-Kernel/test/
 }
 
@@ -678,7 +696,7 @@ run_vllm_install() {
     VLLM_TARGET_DEVICE=xpu pip install --no-deps --no-build-isolation -e vllm
   fi
 
-  pip install pytest pytest-cov pytest-xdist cachetools cbor2 blake3 pybase64 openai_harmony tblib
+  pip install cachetools cbor2 blake3 pybase64 openai_harmony tblib
 }
 
 
@@ -688,6 +706,7 @@ run_vllm_tests() {
   echo "************************************************"
 
   run_vllm_install
+  run_test_deps_install
 
   cd vllm
   run_pytest_command -vvv tests/kernels/moe/test_batched_moe.py tests/kernels/attention/test_triton_unified_attention.py
@@ -774,15 +793,18 @@ test_triton() {
   if [ "$TEST_SGLANG" == true ]; then
     run_sglang_tests
   fi
+  if [ "$INSTALL_LIGER" == true ]; then
+    run_liger_install
+  fi
   if [ "$TEST_LIGER" == true ]; then
     run_liger_tests
   fi
-  if [ "$TEST_VLLM" == true ]; then
-    run_vllm_tests
-  fi
   if [ "$INSTALL_VLLM" == true ]; then
     run_vllm_install
   fi
+  if [ "$TEST_VLLM" == true ]; then
+    run_vllm_tests
+  fi
   if [ "$TEST_TRITON_KERNELS" == true ]; then
     run_triton_kernels_tests
   fi

From cc48db6a8ca6ad06d68e5c94590c02c48b973a16 Mon Sep 17 00:00:00 2001
From: Egor Krivov <egor.krivov@intel.com>
Date: Fri, 24 Oct 2025 13:44:11 +0000
Subject: [PATCH 13/16] Fixes

---
 scripts/test-triton.sh | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/scripts/test-triton.sh b/scripts/test-triton.sh
index 148354e981..5899628fdf 100755
--- a/scripts/test-triton.sh
+++ b/scripts/test-triton.sh
@@ -26,11 +26,12 @@ TEST:
     --flex-attention
     --instrumentation
     --inductor
-    --sglang
-    --liger
     --vllm
     --install-vllm
+    --sglang
     --install-sglang
+    --liger
+    --install-liger
 
 OPTION:
     --unskip
@@ -72,10 +73,11 @@ TEST_BENCHMARK_FLEX_ATTENTION=false
 TEST_INSTRUMENTATION=false
 TEST_INDUCTOR=false
 TEST_SGLANG=false
+INSTALL_SGLANG=false
 TEST_LIGER=false
+INSTALL_LIGER=false
 TEST_VLLM=false
 INSTALL_VLLM=false
-INSTALL_SGLANG=false
 TEST_TRITON_KERNELS=false
 VENV=false
 TRITON_TEST_REPORTS=false
@@ -192,13 +194,13 @@ while (( $# != 0 )); do
       TEST_DEFAULT=false
       shift
       ;;
-    --install-sglang)
-      INSTALL_SGLANG=true
+    --sglang)
+      TEST_SGLANG=true
       TEST_DEFAULT=false
       shift
       ;;
-    --sglang)
-      TEST_SGLANG=true
+    --install-sglang)
+      INSTALL_SGLANG=true
       TEST_DEFAULT=false
       shift
       ;;
@@ -207,6 +209,11 @@ while (( $# != 0 )); do
       TEST_DEFAULT=false
       shift
       ;;
+    --install-liger)
+      INSTALL_LIGER=true
+      TEST_DEFAULT=false
+      shift
+      ;;
     --vllm)
       TEST_VLLM=true
       TEST_DEFAULT=false

From aedede21e23b70dda0e000f2bc51bc8075b01882 Mon Sep 17 00:00:00 2001
From: Egor Krivov <egor.krivov@intel.com>
Date: Fri, 24 Oct 2025 14:27:12 +0000
Subject: [PATCH 14/16] Removed liger patch

---
 scripts/test-triton.sh | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/scripts/test-triton.sh b/scripts/test-triton.sh
index 5899628fdf..8fc375c548 100755
--- a/scripts/test-triton.sh
+++ b/scripts/test-triton.sh
@@ -651,12 +651,6 @@ run_liger_install() {
 
   if ! [ -d "./Liger-Kernel" ]; then
     git clone https://github.com/linkedin/Liger-Kernel
-
-    # There is probably an issue with cache
-    # Will try to upstream the patch here:
-    # https://github.com/linkedin/Liger-Kernel/pull/917
-    # After merging we can remove this patch application
-    git apply benchmarks/third_party/liger/liger-fix.patch
   fi
 
   if ! pip list | grep "liger_kernel" ; then

From b3680ad06996801489e8febbcfa632cc40444c5f Mon Sep 17 00:00:00 2001
From: Egor Krivov <egor.krivov@intel.com>
Date: Fri, 24 Oct 2025 15:52:53 +0000
Subject: [PATCH 15/16] Debug

---
 scripts/test-triton.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/test-triton.sh b/scripts/test-triton.sh
index 8fc375c548..84811ae6a1 100755
--- a/scripts/test-triton.sh
+++ b/scripts/test-triton.sh
@@ -666,7 +666,7 @@ run_liger_tests() {
 
   run_liger_install
   run_test_deps_install
-  run_pytest_command -vvv -n ${PYTEST_MAX_PROCESSES:-4} Liger-Kernel/test/
+  run_pytest_command -vvv Liger-Kernel/test/
 }
 
 run_vllm_install() {

From 051a83d2c543f6e83c476fc1c7c8c46d90069da8 Mon Sep 17 00:00:00 2001
From: Egor Krivov <egor.krivov@intel.com>
Date: Mon, 27 Oct 2025 11:42:01 +0000
Subject: [PATCH 16/16] Fixed installation

---
 .github/workflows/third-party-benchmarks.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/third-party-benchmarks.yml b/.github/workflows/third-party-benchmarks.yml
index 866ec24cc7..59a1a9158f 100644
--- a/.github/workflows/third-party-benchmarks.yml
+++ b/.github/workflows/third-party-benchmarks.yml
@@ -104,11 +104,11 @@ jobs:
           python transform_results.py $REPORTS/moe-gemm-performance.csv $REPORTS/moe-gemm-fp8-report.csv --tag $TAG --benchmark moe-fp8-benchmark
 
       - name: Run Liger-Kernel benchmarks
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'liger-kernel')) }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'liger')) }}
         run: |
           source ./scripts/capture-hw-details.sh
 
-          ./scripts/test-triton.sh --install-vllm --skip-pip-install --skip-pytorch-install
+          ./scripts/test-triton.sh --install-liger --skip-pip-install --skip-pytorch-install
 
           # To remember return code, but still copy results
           RET_CODE=0