@@ -6578,6 +6578,177 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
65786578 return super ().modify_tensors (data_torch , name , bid )
65796579
65806580
6581+ @ModelBase .register ("Glm4MoeForCausalLM" )
6582+ class Glm4MoeModel (TextModel ):
6583+ model_arch = gguf .MODEL_ARCH .GLM4_MOE
6584+
6585+ def set_vocab (self ):
6586+ from transformers import AutoTokenizer
6587+
6588+ tokenizer = AutoTokenizer .from_pretrained (
6589+ self .dir_model , trust_remote_code = True
6590+ )
6591+ special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = True )
6592+ tokens , toktypes , tokpre = self .get_vocab_base ()
6593+ self .gguf_writer .add_tokenizer_model ("gpt2" )
6594+ self .gguf_writer .add_tokenizer_pre (tokpre )
6595+ self .gguf_writer .add_token_list (tokens )
6596+ self .gguf_writer .add_token_types (toktypes )
6597+ special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = True )
6598+ special_vocab ._set_special_token (
6599+ "eos" , tokenizer .get_added_vocab ()["<|endoftext|>" ]
6600+ )
6601+ special_vocab ._set_special_token ("eot" , tokenizer .get_added_vocab ()["<|user|>" ])
6602+ special_vocab ._set_special_token (
6603+ "unk" , tokenizer .get_added_vocab ()["<|endoftext|>" ]
6604+ )
6605+ special_vocab ._set_special_token (
6606+ "bos" , tokenizer .get_added_vocab ()["<|endoftext|>" ]
6607+ )
6608+ special_vocab .add_to_gguf (self .gguf_writer )
6609+
6610+ def set_gguf_parameters (self ):
6611+ super ().set_gguf_parameters ()
6612+ if (rope_dim := self .hparams .get ("head_dim" )) is None :
6613+ rope_dim = (
6614+ self .hparams ["hidden_size" ] // self .hparams ["num_attention_heads" ]
6615+ )
6616+ self .gguf_writer .add_rope_dimension_count (
6617+ int (rope_dim * self .hparams .get ("partial_rotary_factor" , 0.5 ))
6618+ )
6619+
6620+ # MoE parameters
6621+ if (n_experts := self .hparams .get ("n_routed_experts" )) is not None :
6622+ self .gguf_writer .add_expert_count (n_experts )
6623+ # Note: expert_used_count is already set by parent class using num_experts_per_tok
6624+ if (moe_intermediate_size := self .hparams .get ("moe_intermediate_size" )) is not None :
6625+ self .gguf_writer .add_expert_feed_forward_length (moe_intermediate_size )
6626+ if (n_shared_experts := self .hparams .get ("n_shared_experts" )) is not None :
6627+ self .gguf_writer .add_expert_shared_count (n_shared_experts )
6628+ if (first_k_dense_replace := self .hparams .get ("first_k_dense_replace" )) is not None :
6629+ self .gguf_writer .add_leading_dense_block_count (first_k_dense_replace )
6630+
6631+ # Expert gating function (sigmoid for GLM4_MOE)
6632+ self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SIGMOID )
6633+
6634+ # Routed scaling factor
6635+ if (routed_scaling_factor := self .hparams .get ("routed_scaling_factor" )) is not None :
6636+ self .gguf_writer .add_expert_weights_scale (routed_scaling_factor )
6637+
6638+ # Normalise topk probabilities
6639+ if (norm_topk_prob := self .hparams .get ("norm_topk_prob" )) is not None :
6640+ self .gguf_writer .add_expert_weights_norm (norm_topk_prob )
6641+
6642+ _experts : list [dict [str , Tensor ]] | None = None
6643+ _shared_experts : list [dict [str , Tensor ]] | None = None
6644+
6645+ def modify_tensors (
6646+ self , data_torch : Tensor , name : str , bid : int | None
6647+ ) -> Iterable [tuple [str , Tensor ]]:
6648+ # Handle special GLM4_MOE layer 46 tensors (nextn prediction layer)
6649+ if bid is not None and bid == 46 :
6650+ # Layer 46 is the nextn prediction layer - skip all tensors
6651+ return []
6652+
6653+ if name .startswith ("model.visual." ): # ignore visual part
6654+ return []
6655+ elif name .startswith ("model.language_model." ):
6656+ name = name .replace ("language_model." , "" ) # for multimodal variants
6657+
6658+ # Handle main token embedding
6659+ if name == "model.embed_tokens.weight" :
6660+ return [(self .map_tensor_name ("token_embd.weight" ), data_torch )]
6661+
6662+ # Handle routed experts
6663+ if name .find ("mlp.experts" ) != - 1 and "shared_experts" not in name :
6664+ n_experts = self .hparams ["n_routed_experts" ]
6665+ assert bid is not None
6666+
6667+ if self ._experts is None :
6668+ self ._experts = [{} for _ in range (self .block_count )]
6669+
6670+ self ._experts [bid ][name ] = data_torch
6671+
6672+ if len (self ._experts [bid ]) >= n_experts * 3 :
6673+ tensors : list [tuple [str , Tensor ]] = []
6674+
6675+ # merge the experts into a single 3d tensor
6676+ for w_name in ["down_proj" , "gate_proj" , "up_proj" ]:
6677+ datas : list [Tensor ] = []
6678+
6679+ for xid in range (n_experts ):
6680+ ename = f"model.layers.{ bid } .mlp.experts.{ xid } .{ w_name } .weight"
6681+ datas .append (self ._experts [bid ][ename ])
6682+ del self ._experts [bid ][ename ]
6683+
6684+ data_torch = torch .stack (datas , dim = 0 )
6685+ # Generate GGUF tensor names for merged experts
6686+ if w_name == "down_proj" :
6687+ new_name = f"blk.{ bid } .ffn_down_exps.weight"
6688+ elif w_name == "gate_proj" :
6689+ new_name = f"blk.{ bid } .ffn_gate_exps.weight"
6690+ elif w_name == "up_proj" :
6691+ new_name = f"blk.{ bid } .ffn_up_exps.weight"
6692+ else :
6693+ merged_name = f"model.layers.{ bid } .mlp.experts.{ w_name } .weight"
6694+ new_name = self .map_tensor_name (merged_name )
6695+ tensors .append ((new_name , data_torch ))
6696+ return tensors
6697+ else :
6698+ return []
6699+
6700+ # Handle expert gating input (routing gate)
6701+ if ".mlp.gate.e_score_correction_bias" in name :
6702+ new_name = name .replace ("model.layers." , "blk." ).replace (
6703+ ".mlp.gate.e_score_correction_bias" , ".ffn_gate_inp.bias"
6704+ )
6705+ return [(self .map_tensor_name (new_name ), data_torch )]
6706+
6707+ # Handle shared expert tensors
6708+ if ".mlp.ffn_" in name and "_shexp" in name :
6709+ new_name = name .replace ("model.layers." , "blk." )
6710+ return [(new_name , data_torch )]
6711+
6712+ # Handle regular dense FFN layers (for hybrid dense/MoE architecture)
6713+ if ".mlp." in name and "experts" not in name and "_shexp" not in name :
6714+ if "gate_proj" in name :
6715+ new_name = name .replace ("model.layers." , "blk." ).replace (
6716+ ".mlp.gate_proj.weight" , ".ffn_gate.weight"
6717+ )
6718+ elif "up_proj" in name :
6719+ new_name = name .replace ("model.layers." , "blk." ).replace (
6720+ ".mlp.up_proj.weight" , ".ffn_up.weight"
6721+ )
6722+ elif "down_proj" in name :
6723+ new_name = name .replace ("model.layers." , "blk." ).replace (
6724+ ".mlp.down_proj.weight" , ".ffn_down.weight"
6725+ )
6726+ else :
6727+ new_name = name
6728+ return [(self .map_tensor_name (new_name ), data_torch )]
6729+
6730+ # Handle other special GLM4_MOE tensors (nextn prediction)
6731+ if (
6732+ ".embed_tokens." in name
6733+ or ".shared_head." in name
6734+ or ".eh_proj." in name
6735+ or ".enorm." in name
6736+ or ".hnorm." in name
6737+ ):
6738+ # Skip these special tensors - they are for nextn prediction
6739+ return []
6740+
6741+ return super ().modify_tensors (data_torch , name , bid )
6742+
6743+ def prepare_tensors (self ):
6744+ super ().prepare_tensors ()
6745+ if self ._experts is not None :
6746+ # flatten `list[dict[str, Tensor]]` into `list[str]`
6747+ experts = [k for d in self ._experts for k in d .keys ()]
6748+ if len (experts ) > 0 :
6749+ raise ValueError (f"Unprocessed experts: { experts } " )
6750+
6751+
65816752@ModelBase .register ("GlmForCausalLM" , "ChatGLMModel" , "ChatGLMForConditionalGeneration" )
65826753class ChatGLMModel (TextModel ):
65836754 model_arch = gguf .MODEL_ARCH .CHATGLM
0 commit comments