Skip to content

Commit 55cc947

Browse files
committed
add new test model for aclgraph single_request v0.11.0
Signed-off-by: lilinsiman <[email protected]>
1 parent 29bd923 commit 55cc947

File tree

1 file changed

+16
-9
lines changed

1 file changed

+16
-9
lines changed

tests/e2e/multicard/test_single_request_aclgraph.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,7 @@
2323

2424
from tests.e2e.conftest import RemoteOpenAIServer
2525

26-
MODELS = [
27-
"Qwen/Qwen3-30B-A3B",
28-
]
26+
MODELS = ["Qwen/Qwen3-30B-A3B", "vllm-ascend/DeepSeek-V2-Lite-W8A8"]
2927

3028
DATA_PARALLELS = [2]
3129

@@ -47,12 +45,21 @@ async def test_single_request_aclgraph(model: str, dp_size: int) -> None:
4745
"TASK_QUEUE_ENABLE": "1",
4846
"HCCL_OP_EXPANSION_MODE": "AIV",
4947
}
50-
server_args = [
51-
"--no-enable-prefix-caching", "--tensor-parallel-size", "1",
52-
"--data-parallel-size",
53-
str(dp_size), "--port",
54-
str(port), "--trust-remote-code", "--gpu-memory-utilization", "0.9"
55-
]
48+
if model == "vllm-ascend/DeepSeek-V2-Lite-W8A8":
49+
server_args = [
50+
"--no-enable-prefix-caching", "--tensor-parallel-size", "1",
51+
"--data-parallel-size",
52+
str(dp_size), "--quantization", "ascend", "--max-model-len",
53+
"1024", "--port",
54+
str(port), "--trust-remote-code", "--gpu-memory-utilization", "0.9"
55+
]
56+
else:
57+
server_args = [
58+
"--no-enable-prefix-caching", "--tensor-parallel-size", "1",
59+
"--data-parallel-size",
60+
str(dp_size), "--port",
61+
str(port), "--trust-remote-code", "--gpu-memory-utilization", "0.9"
62+
]
5663
request_keyword_args: dict[str, Any] = {
5764
**api_keyword_args,
5865
}

0 commit comments

Comments
 (0)