Skip to content

Commit e18f085

Browse files
authored
skip fusedmoe layer for start_load_kv (#21378)
Signed-off-by: calvin chen <[email protected]>
1 parent afa2607 commit e18f085

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,8 +192,16 @@ def inject_kv_into_layer(
192192
# Load the KV for each request each layer
193193
for request in metadata.requests:
194194
for layer_name in forward_context.no_compile_layers:
195-
attn_layer = forward_context.no_compile_layers[layer_name]
196-
kv_cache_layer = attn_layer.kv_cache[ \
195+
layer = forward_context.no_compile_layers[layer_name]
196+
197+
# Only process layers that have kv_cache
198+
# attribute (attention layers) Skip non-attention
199+
# layers like FusedMoE
200+
kv_cache = getattr(layer, 'kv_cache', None)
201+
if kv_cache is None:
202+
continue
203+
204+
kv_cache_layer = kv_cache[ \
197205
forward_context.virtual_engine]
198206

199207
kv_cache = self.p2p_nccl_engine.recv_tensor(

0 commit comments

Comments
 (0)