Skip to content

Commit 1345abc

Browse files
WoosukKwonMatthewBonanni
authored andcommitted
[Misc] Fix seq_lens for graph capture (vllm-project#23175)
Signed-off-by: Woosuk Kwon <[email protected]>
1 parent 1169a87 commit 1345abc

File tree

1 file changed

+2
-4
lines changed

1 file changed

+2
-4
lines changed

vllm/v1/worker/gpu_model_runner.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2317,15 +2317,13 @@ def _dummy_run(
23172317

23182318
# If force_attention is True, we always capture attention. Otherwise,
23192319
# it only happens for cudagraph_runtime_mode=FULL.
2320-
if force_attention or cudagraph_runtime_mode == \
2321-
CUDAGraphMode.FULL:
2320+
if force_attention or cudagraph_runtime_mode == CUDAGraphMode.FULL:
23222321
attn_metadata = {}
23232322

23242323
# Make sure max_model_len is used at the graph capture time.
23252324
self.seq_lens_np[:num_reqs] = self.max_model_len
23262325
self.seq_lens_np[num_reqs:] = 0
2327-
self.seq_lens[:num_reqs].copy_(self.seq_lens_cpu[:num_reqs],
2328-
non_blocking=True)
2326+
self.seq_lens.copy_(self.seq_lens_cpu, non_blocking=True)
23292327

23302328
for kv_cache_group_id, kv_cache_group_spec in enumerate(
23312329
self.kv_cache_config.kv_cache_groups):

0 commit comments

Comments
 (0)