Skip to content

Commit bfaf977

Browse files
committed
add new test model for aclgraph single_request
Signed-off-by: lilinsiman <[email protected]>
1 parent cba69e1 commit bfaf977

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

tests/e2e/multicard/test_single_request_aclgraph.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from vllm.utils.network_utils import get_open_port
3030

3131
MODELS = [
32-
"Qwen/Qwen3-30B-A3B",
32+
"Qwen/Qwen3-30B-A3B", "vllm-ascend/DeepSeek-V2-Lite-W8A8"
3333
]
3434

3535
DATA_PARALLELS = [2]
@@ -52,12 +52,20 @@ async def test_single_request_aclgraph(model: str, dp_size: int) -> None:
5252
"TASK_QUEUE_ENABLE": "1",
5353
"HCCL_OP_EXPANSION_MODE": "AIV",
5454
}
55-
server_args = [
55+
if model == "vllm-ascend/DeepSeek-V2-Lite-W8A8":
56+
server_args = [
5657
"--no-enable-prefix-caching", "--tensor-parallel-size", "1",
57-
"--data-parallel-size",
58+
"--data-parallel-size", "quantization", "ascend",
5859
str(dp_size), "--port",
5960
str(port), "--trust-remote-code", "--gpu-memory-utilization", "0.9"
6061
]
62+
else:
63+
server_args = [
64+
"--no-enable-prefix-caching", "--tensor-parallel-size", "1",
65+
"--data-parallel-size",
66+
str(dp_size), "--port",
67+
str(port), "--trust-remote-code", "--gpu-memory-utilization", "0.9"
68+
]
6169
request_keyword_args: dict[str, Any] = {
6270
**api_keyword_args,
6371
}

0 commit comments

Comments
 (0)