[Bugfix] Fix num_hidden_layers when Qwen2-Audio 7B

zhangxinyuehfad · zhangxinyuehfad · commit de00b2f8e683 · 2025-07-17T10:39:26.000+08:00
Signed-off-by: hfadzxy &lt;starmoon_zhang@163.com&gt;
diff --git a/docs/source/tutorials/single_npu_audio.md b/docs/source/tutorials/single_npu_audio.md
@@ -90,8 +90,7 @@ def main(audio_count: int):
     llm = LLM(model="Qwen/Qwen2-Audio-7B-Instruct",
               max_model_len=4096,
               max_num_seqs=5,
-              limit_mm_per_prompt={"audio": audio_count},
-              enforce_eager=True)
+              limit_mm_per_prompt={"audio": audio_count})
 
     inputs = prepare_inputs(audio_count)
 
diff --git a/docs/source/tutorials/single_npu_multimodal.md b/docs/source/tutorials/single_npu_multimodal.md
@@ -57,7 +57,6 @@ llm = LLM(
     model=MODEL_PATH,
     max_model_len=16384,
     limit_mm_per_prompt={"image": 10},
-    enforce_eager=True,
 )
 
 sampling_params = SamplingParams(
@@ -146,8 +145,7 @@ docker run --rm \
 vllm serve Qwen/Qwen2.5-VL-7B-Instruct \
 --dtype bfloat16 \
 --max_model_len 16384 \
---max-num-batched-tokens 16384 \
---enforce-eager
+--max-num-batched-tokens 16384 
 ```
 
 :::{note}
diff --git a/vllm_ascend/utils.py b/vllm_ascend/utils.py
@@ -296,6 +296,24 @@ def vllm_version_is(target_vllm_version: str):
             "format of x.y.z.")
 
 
+def get_max_hidden_layers(hf_config) -> int:
+    cfg_dict = hf_config.to_dict()
+    layer_counts = []
+
+    def _rec_find(d):
+        if isinstance(d, dict):
+            for k, v in d.items():
+                if k == "num_hidden_layers" and isinstance(v, int):
+                    layer_counts.append(v)
+                else:
+                    _rec_find(v)
+
+    _rec_find(cfg_dict)
+    if not layer_counts:
+        raise ValueError("Not found num_hidden_layers in model config.")
+    return max(layer_counts)
+
+
 def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
     """Update ACL graph capture sizes based on hardware limitations"""
     # Store original configuration and temporarily clear it
@@ -304,7 +322,11 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
         compilation_config.cudagraph_capture_sizes, None
 
     # Calculate parallel configuration factor
-    num_hidden_layers = vllm_config.model_config.hf_config.num_hidden_layers
+    hf_config = vllm_config.model_config.hf_config
+    if hasattr(hf_config, 'num_hidden_layers'):
+        num_hidden_layers = hf_config.num_hidden_layers
+    else:
+        num_hidden_layers = get_max_hidden_layers(hf_config)
     parallel_config = vllm_config.parallel_config
 
     # TODO: Find out whether we need to take into account the pp_size