vllm-project · MrZ20 · Oct 25, 2025 · gemini-code-assist · Oct 25, 2025
diff --git a/.github/workflows/accuracy_test.yaml b/.github/workflows/accuracy_test.yaml
@@ -59,6 +59,8 @@ jobs:
             model_name: DeepSeek-V2-Lite
           - runner: a2-4
             model_name: Qwen3-Next-80B-A3B-Instruct
+          - runner: a2-1
+            model_name: Phi-4-mini-instruct
       fail-fast: false
     # test will be triggered when tag 'accuracy-test' & 'ready-for-test'
     if:  >-

diff --git a/tests/e2e/models/configs/Phi-4-mini-instruct.yaml b/tests/e2e/models/configs/Phi-4-mini-instruct.yaml
@@ -0,0 +1,14 @@
+model_name: "LLM-Research/Phi-4-mini-instruct"
+runner: "linux-aarch64-a2-1"
+hardware: "Atlas A2 Series"
+tasks:
+- name: "gsm8k"
+  metrics:
+  - name: "exact_match,strict-match"
+    value: 0.81
+  - name: "exact_match,flexible-extract"
+    value: 0.81
+trust_remote_code: True
+num_fewshot: 5
+batch_size: 32
+gpu_memory_utilization: 0.8
diff --git a/tests/e2e/models/configs/accuracy.txt b/tests/e2e/models/configs/accuracy.txt
@@ -6,3 +6,4 @@ Qwen2-7B.yaml
 Qwen2-VL-7B-Instruct.yaml
 Qwen2-Audio-7B-Instruct.yaml
 Qwen3-VL-30B-A3B-Instruct.yaml
+Phi-4-mini-instruct.yaml