File tree Expand file tree Collapse file tree 1 file changed +10
-2
lines changed
vllm/distributed/kv_transfer/kv_connector/v1/p2p Expand file tree Collapse file tree 1 file changed +10
-2
lines changed Original file line number Diff line number Diff line change @@ -192,8 +192,16 @@ def inject_kv_into_layer(
192
192
# Load the KV for each request each layer
193
193
for request in metadata .requests :
194
194
for layer_name in forward_context .no_compile_layers :
195
- attn_layer = forward_context .no_compile_layers [layer_name ]
196
- kv_cache_layer = attn_layer .kv_cache [ \
195
+ layer = forward_context .no_compile_layers [layer_name ]
196
+
197
+ # Only process layers that have kv_cache
198
+ # attribute (attention layers) Skip non-attention
199
+ # layers like FusedMoE
200
+ kv_cache = getattr (layer , 'kv_cache' , None )
201
+ if kv_cache is None :
202
+ continue
203
+
204
+ kv_cache_layer = kv_cache [ \
197
205
forward_context .virtual_engine ]
198
206
199
207
kv_cache = self .p2p_nccl_engine .recv_tensor (
You can’t perform that action at this time.
0 commit comments