Skip to content

Commit 6464c15

Browse files
committed
Move fast prefill layer init to initialize_kv_cache
Signed-off-by: Yong Hoon Shin <[email protected]>
1 parent bcf331a commit 6464c15

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

vllm/v1/worker/gpu_model_runner.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3201,6 +3201,8 @@ def initialize_kv_cache(self, kv_cache_config: KVCacheConfig) -> None:
32013201
kv_cache_config: Configuration for the KV cache, including the KV
32023202
cache size of each layer
32033203
"""
3204+
attn_layers = get_layers_from_vllm_config(self.vllm_config, Attention)
3205+
self.maybe_add_kv_sharing_fast_prefill_layers(attn_layers)
32043206
self.kv_cache_config = kv_cache_config
32053207
self.may_reinitialize_input_batch(kv_cache_config)
32063208
self.maybe_add_kv_sharing_layers_to_kv_cache_groups(kv_cache_config)
@@ -3323,8 +3325,6 @@ def get_kv_cache_spec(self) -> dict[str, KVCacheSpec]:
33233325
page_size_padded=page_size_padded,
33243326
mamba_type=mamba_module.mamba_type)
33253327

3326-
self.maybe_add_kv_sharing_fast_prefill_layers(attn_layers)
3327-
33283328
return kv_cache_spec
33293329

33303330
def _build_encoder_only_attn_metadata(

0 commit comments

Comments
 (0)