Skip to content

Commit e1d34cb

Browse files
committed
add new test model for aclgraph single_request
Signed-off-by: lilinsiman <[email protected]>
1 parent cba69e1 commit e1d34cb

File tree

1 file changed

+16
-9
lines changed

1 file changed

+16
-9
lines changed

tests/e2e/multicard/test_single_request_aclgraph.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,7 @@
2828
else:
2929
from vllm.utils.network_utils import get_open_port
3030

31-
MODELS = [
32-
"Qwen/Qwen3-30B-A3B",
33-
]
31+
MODELS = ["Qwen/Qwen3-30B-A3B", "vllm-ascend/DeepSeek-V2-Lite-W8A8"]
3432

3533
DATA_PARALLELS = [2]
3634

@@ -52,12 +50,21 @@ async def test_single_request_aclgraph(model: str, dp_size: int) -> None:
5250
"TASK_QUEUE_ENABLE": "1",
5351
"HCCL_OP_EXPANSION_MODE": "AIV",
5452
}
55-
server_args = [
56-
"--no-enable-prefix-caching", "--tensor-parallel-size", "1",
57-
"--data-parallel-size",
58-
str(dp_size), "--port",
59-
str(port), "--trust-remote-code", "--gpu-memory-utilization", "0.9"
60-
]
53+
if model == "vllm-ascend/DeepSeek-V2-Lite-W8A8":
54+
server_args = [
55+
"--no-enable-prefix-caching", "--tensor-parallel-size", "1",
56+
"--data-parallel-size",
57+
str(dp_size), "--quantization", "ascend", "--max-model-len",
58+
"1024", "--port",
59+
str(port), "--trust-remote-code", "--gpu-memory-utilization", "0.9"
60+
]
61+
else:
62+
server_args = [
63+
"--no-enable-prefix-caching", "--tensor-parallel-size", "1",
64+
"--data-parallel-size",
65+
str(dp_size), "--port",
66+
str(port), "--trust-remote-code", "--gpu-memory-utilization", "0.9"
67+
]
6168
request_keyword_args: dict[str, Any] = {
6269
**api_keyword_args,
6370
}

0 commit comments

Comments
 (0)