File tree Expand file tree Collapse file tree 3 files changed +25
-6
lines changed Expand file tree Collapse file tree 3 files changed +25
-6
lines changed Original file line number Diff line number Diff line change @@ -90,8 +90,7 @@ def main(audio_count: int):
90
90
llm = LLM(model = " Qwen/Qwen2-Audio-7B-Instruct" ,
91
91
max_model_len = 4096 ,
92
92
max_num_seqs = 5 ,
93
- limit_mm_per_prompt = {" audio" : audio_count},
94
- enforce_eager = True )
93
+ limit_mm_per_prompt = {" audio" : audio_count})
95
94
96
95
inputs = prepare_inputs(audio_count)
97
96
Original file line number Diff line number Diff line change @@ -57,7 +57,6 @@ llm = LLM(
57
57
model = MODEL_PATH ,
58
58
max_model_len = 16384 ,
59
59
limit_mm_per_prompt = {" image" : 10 },
60
- enforce_eager = True ,
61
60
)
62
61
63
62
sampling_params = SamplingParams(
@@ -146,8 +145,7 @@ docker run --rm \
146
145
vllm serve Qwen/Qwen2.5-VL-7B-Instruct \
147
146
--dtype bfloat16 \
148
147
--max_model_len 16384 \
149
- --max-num-batched-tokens 16384 \
150
- --enforce-eager
148
+ --max-num-batched-tokens 16384
151
149
` ` `
152
150
153
151
:::{note}
Original file line number Diff line number Diff line change @@ -296,6 +296,24 @@ def vllm_version_is(target_vllm_version: str):
296
296
"format of x.y.z." )
297
297
298
298
299
+ def get_max_hidden_layers (hf_config ) -> int :
300
+ cfg_dict = hf_config .to_dict ()
301
+ layer_counts = []
302
+
303
+ def _rec_find (d ):
304
+ if isinstance (d , dict ):
305
+ for k , v in d .items ():
306
+ if k == "num_hidden_layers" and isinstance (v , int ):
307
+ layer_counts .append (v )
308
+ else :
309
+ _rec_find (v )
310
+
311
+ _rec_find (cfg_dict )
312
+ if not layer_counts :
313
+ raise ValueError ("Not found num_hidden_layers in model config." )
314
+ return max (layer_counts )
315
+
316
+
299
317
def update_aclgraph_sizes (vllm_config : VllmConfig ) -> None :
300
318
"""Update ACL graph capture sizes based on hardware limitations"""
301
319
# Store original configuration and temporarily clear it
@@ -304,7 +322,11 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
304
322
compilation_config .cudagraph_capture_sizes , None
305
323
306
324
# Calculate parallel configuration factor
307
- num_hidden_layers = vllm_config .model_config .hf_config .num_hidden_layers
325
+ hf_config = vllm_config .model_config .hf_config
326
+ if hasattr (hf_config , 'num_hidden_layers' ):
327
+ num_hidden_layers = hf_config .num_hidden_layers
328
+ else :
329
+ num_hidden_layers = get_max_hidden_layers (hf_config )
308
330
parallel_config = vllm_config .parallel_config
309
331
310
332
# TODO: Find out whether we need to take into account the pp_size
You can’t perform that action at this time.
0 commit comments