Skip to content

Commit ccdb632

Browse files
david6666666diegocastanibm
authored andcommitted
[BugFix] Fix shared storage connector load kv only load attention layer (vllm-project#21428)
Signed-off-by: David Chen <[email protected]> Signed-off-by: Diego-Castan <[email protected]>
1 parent ec31708 commit ccdb632

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

vllm/distributed/kv_transfer/kv_connector/v1/shared_storage_connector.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,16 @@ def inject_kv_into_layer(
156156
logger.info("Inject KV cache of %d tokens to the paged memory",
157157
len(request.slot_mapping))
158158
for layer_name in forward_context.no_compile_layers:
159-
attn_layer = forward_context.no_compile_layers[layer_name]
160-
kv_cache_layer = attn_layer.kv_cache[\
159+
layer = forward_context.no_compile_layers[layer_name]
160+
161+
# Only process layers that have kv_cache
162+
# attribute (attention layers) Skip non-attention
163+
# layers like FusedMoE/MLP etc.
164+
kv_cache_attr = getattr(layer, 'kv_cache', None)
165+
if kv_cache_attr is None:
166+
continue
167+
168+
kv_cache_layer = kv_cache_attr[ \
161169
forward_context.virtual_engine]
162170

163171
filename = self._generate_filename_debug(

0 commit comments

Comments
 (0)