Skip to content

Commit de00b2f

Browse files
[Bugfix] Fix num_hidden_layers when Qwen2-Audio 7B
Signed-off-by: hfadzxy <[email protected]>
1 parent 787010a commit de00b2f

File tree

3 files changed

+25
-6
lines changed

3 files changed

+25
-6
lines changed

docs/source/tutorials/single_npu_audio.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,7 @@ def main(audio_count: int):
9090
llm = LLM(model="Qwen/Qwen2-Audio-7B-Instruct",
9191
max_model_len=4096,
9292
max_num_seqs=5,
93-
limit_mm_per_prompt={"audio": audio_count},
94-
enforce_eager=True)
93+
limit_mm_per_prompt={"audio": audio_count})
9594

9695
inputs = prepare_inputs(audio_count)
9796

docs/source/tutorials/single_npu_multimodal.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ llm = LLM(
5757
model=MODEL_PATH,
5858
max_model_len=16384,
5959
limit_mm_per_prompt={"image": 10},
60-
enforce_eager=True,
6160
)
6261

6362
sampling_params = SamplingParams(
@@ -146,8 +145,7 @@ docker run --rm \
146145
vllm serve Qwen/Qwen2.5-VL-7B-Instruct \
147146
--dtype bfloat16 \
148147
--max_model_len 16384 \
149-
--max-num-batched-tokens 16384 \
150-
--enforce-eager
148+
--max-num-batched-tokens 16384
151149
```
152150
153151
:::{note}

vllm_ascend/utils.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,24 @@ def vllm_version_is(target_vllm_version: str):
296296
"format of x.y.z.")
297297

298298

299+
def get_max_hidden_layers(hf_config) -> int:
300+
cfg_dict = hf_config.to_dict()
301+
layer_counts = []
302+
303+
def _rec_find(d):
304+
if isinstance(d, dict):
305+
for k, v in d.items():
306+
if k == "num_hidden_layers" and isinstance(v, int):
307+
layer_counts.append(v)
308+
else:
309+
_rec_find(v)
310+
311+
_rec_find(cfg_dict)
312+
if not layer_counts:
313+
raise ValueError("Not found num_hidden_layers in model config.")
314+
return max(layer_counts)
315+
316+
299317
def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
300318
"""Update ACL graph capture sizes based on hardware limitations"""
301319
# Store original configuration and temporarily clear it
@@ -304,7 +322,11 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
304322
compilation_config.cudagraph_capture_sizes, None
305323

306324
# Calculate parallel configuration factor
307-
num_hidden_layers = vllm_config.model_config.hf_config.num_hidden_layers
325+
hf_config = vllm_config.model_config.hf_config
326+
if hasattr(hf_config, 'num_hidden_layers'):
327+
num_hidden_layers = hf_config.num_hidden_layers
328+
else:
329+
num_hidden_layers = get_max_hidden_layers(hf_config)
308330
parallel_config = vllm_config.parallel_config
309331

310332
# TODO: Find out whether we need to take into account the pp_size

0 commit comments

Comments
 (0)