Skip to content

Commit 14ca1e5

Browse files
[CI]Fix oom of deepseek-eplb nigtly test. (#3884)
### What this PR does / why we need it? Fix oom of deepseek-eplb nigtly test - vLLM version: v0.11.0rc3 - vLLM main: vllm-project/vllm@83f478b --------- Signed-off-by: offline0806 <[email protected]> Co-authored-by: offline0806 <[email protected]>
1 parent dc960e7 commit 14ca1e5

File tree

4 files changed

+4
-4
lines changed

4 files changed

+4
-4
lines changed

tests/e2e/nightly/models/test_deepseek_r1_w8a8_eplb.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ async def test_models(model: str, tp_size: int, dp_size: int) -> None:
8585
"--quantization", "ascend", "--gpu-memory-utilization", "0.9",
8686
"--additional-config", '{"enable_weight_nz_layout":true, '
8787
'"torch_air_graph_config":{"enabled": true, "enable_multistream_mla": true, "graph_batch_size": [16], "use_cached_graph": true},'
88-
'"dynamic_eplb": true, "num_iterations_eplb_update": 1000, "num_wait_worker_iterations": 200, "init_redundancy_expert": 16}'
88+
'"dynamic_eplb": true, "num_iterations_eplb_update": 1000, "num_wait_worker_iterations": 200'
8989
]
9090
request_keyword_args: dict[str, Any] = {
9191
**api_keyword_args,

tests/e2e/nightly/models/test_qwen3_235b_a22b_w8a8_eplb.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,7 @@ async def test_models(model: str, tp_size: int) -> None:
8282
"--quantization", "ascend", "--gpu-memory-utilization", "0.9",
8383
"--additional-config",
8484
'{"enable_weight_nz_layout":true, "dynamic_eplb": true, '
85-
'"num_iterations_eplb_update": 1000, "num_wait_worker_iterations": 200, '
86-
'"init_redundancy_expert": 16}'
85+
'"num_iterations_eplb_update": 1000, "num_wait_worker_iterations": 200}'
8786
]
8887
request_keyword_args: dict[str, Any] = {
8988
**api_keyword_args,

vllm_ascend/eplb/core/eplb_device_transfer_loader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def update_expert_map_and_weight(self, reqs):
126126
local_expert_to_replace,
127127
buffer_tensor_id)
128128

129-
logger.info(
129+
logger.debug(
130130
f"[EPLB] finished update expert weight for layer: {self.layer_id}")
131131

132132
self.recv_expert_list = []

vllm_ascend/eplb/eplb_updator.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ def update_iteration(self):
7777
self.cur_iterations += 1
7878
if self.cur_iterations == (self.num_iterations_eplb_update + \
7979
self.num_wait_worker_iterations + self.num_moe_layers):
80+
logger.info("Finish expert parallel load balancing.")
8081
if self.expert_map_record_path is not None:
8182
self.adaptor._export_tensor_to_file(
8283
self.shared_dict["expert_maps"],

0 commit comments

Comments
 (0)