diff --git a/vllm/model_executor/models/deepseek_mtp.py b/vllm/model_executor/models/deepseek_mtp.py index 576977b00e61..aa176ef05fcc 100644 --- a/vllm/model_executor/models/deepseek_mtp.py +++ b/vllm/model_executor/models/deepseek_mtp.py @@ -16,7 +16,10 @@ ParallelLMHead, VocabParallelEmbedding, ) -from vllm.model_executor.model_loader.weight_utils import default_weight_loader +from vllm.model_executor.model_loader.weight_utils import ( + default_weight_loader, + maybe_remap_kv_scale_name, +) from vllm.platforms import current_platform from vllm.sequence import IntermediateTensors @@ -278,6 +281,10 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: if name.endswith(".bias") and name not in params_dict: continue + name = maybe_remap_kv_scale_name(name, params_dict) + if name is None: + continue + # According to DeepSeek-V3 Technical Report, MTP modules # shares embedding layer. We only load the first weights. if (