add new test model for aclgraph single_request

lilinsiman · lilinsiman · commit bfaf9776a94e · 2025-10-30T10:14:07.000+08:00
Signed-off-by: lilinsiman &lt;lilinsiman@gmail.com&gt;
diff --git a/tests/e2e/multicard/test_single_request_aclgraph.py b/tests/e2e/multicard/test_single_request_aclgraph.py
@@ -29,7 +29,7 @@
     from vllm.utils.network_utils import get_open_port
 
 MODELS = [
-    "Qwen/Qwen3-30B-A3B",
+    "Qwen/Qwen3-30B-A3B", "vllm-ascend/DeepSeek-V2-Lite-W8A8"
 ]
 
 DATA_PARALLELS = [2]
@@ -52,12 +52,20 @@ async def test_single_request_aclgraph(model: str, dp_size: int) -> None:
         "TASK_QUEUE_ENABLE": "1",
         "HCCL_OP_EXPANSION_MODE": "AIV",
     }
-    server_args = [
+    if model == "vllm-ascend/DeepSeek-V2-Lite-W8A8":
+        server_args = [
         "--no-enable-prefix-caching", "--tensor-parallel-size", "1",
-        "--data-parallel-size",
+        "--data-parallel-size", "quantization", "ascend",
         str(dp_size), "--port",
         str(port), "--trust-remote-code", "--gpu-memory-utilization", "0.9"
     ]
+    else:
+        server_args = [
+            "--no-enable-prefix-caching", "--tensor-parallel-size", "1",
+            "--data-parallel-size",
+            str(dp_size), "--port",
+            str(port), "--trust-remote-code", "--gpu-memory-utilization", "0.9"
+        ]
     request_keyword_args: dict[str, Any] = {
         **api_keyword_args,
     }