We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 940d345 commit de4d673Copy full SHA for de4d673
vllm/v1/worker/gpu_model_runner.py
@@ -911,7 +911,7 @@ def _dummy_blk_table_and_slot_mapping():
911
dtype=torch.int32,
912
device="cpu")
913
# NOTE - using max_encoder_len is whisper specific
914
- total_num_scheduled_tokens_arg = self.max_encoder_len
+ total_num_scheduled_tokens_arg = num_encoder_tokens
915
max_num_scheduled_tokens_arg = self.max_encoder_len
916
max_seq_len_arg = self.max_encoder_len
917
elif isinstance(kv_cache_group_spec.kv_cache_spec,
0 commit comments