@@ -6578,6 +6578,179 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
65786578 return super ().modify_tensors (data_torch , name , bid )
65796579
65806580
6581+ @ModelBase .register ("Glm4MoeForCausalLM" )
6582+ class Glm4MoeModel (TextModel ):
6583+ model_arch = gguf .MODEL_ARCH .GLM4_MOE
6584+
6585+ def set_vocab (self ):
6586+ from transformers import AutoTokenizer
6587+
6588+ tokenizer = AutoTokenizer .from_pretrained (
6589+ self .dir_model , trust_remote_code = True
6590+ )
6591+ special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = True )
6592+ tokens , toktypes , tokpre = self .get_vocab_base ()
6593+ self .gguf_writer .add_tokenizer_model ("gpt2" )
6594+ self .gguf_writer .add_tokenizer_pre (tokpre )
6595+ self .gguf_writer .add_token_list (tokens )
6596+ self .gguf_writer .add_token_types (toktypes )
6597+ special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = True )
6598+ special_vocab ._set_special_token (
6599+ "eos" , tokenizer .get_added_vocab ()["<|endoftext|>" ]
6600+ )
6601+ special_vocab ._set_special_token ("eot" , tokenizer .get_added_vocab ()["<|user|>" ])
6602+ special_vocab ._set_special_token (
6603+ "unk" , tokenizer .get_added_vocab ()["<|endoftext|>" ]
6604+ )
6605+ special_vocab ._set_special_token (
6606+ "bos" , tokenizer .get_added_vocab ()["<|endoftext|>" ]
6607+ )
6608+ special_vocab .add_to_gguf (self .gguf_writer )
6609+
6610+ def set_gguf_parameters (self ):
6611+ super ().set_gguf_parameters ()
6612+ if (rope_dim := self .hparams .get ("head_dim" )) is None :
6613+ rope_dim = (
6614+ self .hparams ["hidden_size" ] // self .hparams ["num_attention_heads" ]
6615+ )
6616+ self .gguf_writer .add_rope_dimension_count (
6617+ int (rope_dim * self .hparams .get ("partial_rotary_factor" , 0.5 ))
6618+ )
6619+
6620+ # MoE parameters
6621+ if (n_experts := self .hparams .get ("n_routed_experts" )) is not None :
6622+ self .gguf_writer .add_expert_count (n_experts )
6623+ # Note: expert_used_count is already set by parent class using num_experts_per_tok
6624+ if (moe_intermediate_size := self .hparams .get ("moe_intermediate_size" )) is not None :
6625+ self .gguf_writer .add_expert_feed_forward_length (moe_intermediate_size )
6626+ if (n_shared_experts := self .hparams .get ("n_shared_experts" )) is not None :
6627+ self .gguf_writer .add_expert_shared_count (n_shared_experts )
6628+ if (first_k_dense_replace := self .hparams .get ("first_k_dense_replace" )) is not None :
6629+ self .gguf_writer .add_leading_dense_block_count (first_k_dense_replace )
6630+
6631+ # Expert gating function (sigmoid for GLM4_MOE)
6632+ self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SIGMOID )
6633+
6634+ # Routed scaling factor
6635+ if (routed_scaling_factor := self .hparams .get ("routed_scaling_factor" )) is not None :
6636+ self .gguf_writer .add_expert_weights_scale (routed_scaling_factor )
6637+
6638+ # Normalise topk probabilities
6639+ if (norm_topk_prob := self .hparams .get ("norm_topk_prob" )) is not None :
6640+ self .gguf_writer .add_expert_weights_norm (norm_topk_prob )
6641+
6642+ _experts : list [dict [str , Tensor ]] | None = None
6643+ _shared_experts : list [dict [str , Tensor ]] | None = None
6644+
6645+ def modify_tensors (
6646+ self , data_torch : Tensor , name : str , bid : int | None
6647+ ) -> Iterable [tuple [str , Tensor ]]:
6648+ # Handle layer 46 tensors - preserve all for future MTP support
6649+ if bid is not None and bid == 46 :
6650+ # Convert layer 46 tensors to GGUF naming but don't try to map them
6651+ new_name = name .replace ("model.layers." , "blk." )
6652+ return [(new_name , data_torch )]
6653+
6654+ if name .startswith ("model.visual." ): # ignore visual part
6655+ return []
6656+ elif name .startswith ("model.language_model." ):
6657+ name = name .replace ("language_model." , "" ) # for multimodal variants
6658+
6659+ # Handle main token embedding
6660+ if name == "model.embed_tokens.weight" :
6661+ return [(self .map_tensor_name ("token_embd.weight" ), data_torch )]
6662+
6663+ # Handle routed experts (skip for NextN layer 46)
6664+ if name .find ("mlp.experts" ) != - 1 and "shared_experts" not in name and bid != 46 :
6665+ n_experts = self .hparams ["n_routed_experts" ]
6666+ assert bid is not None
6667+
6668+ if self ._experts is None :
6669+ self ._experts = [{} for _ in range (self .block_count )]
6670+
6671+ self ._experts [bid ][name ] = data_torch
6672+
6673+ if len (self ._experts [bid ]) >= n_experts * 3 :
6674+ tensors : list [tuple [str , Tensor ]] = []
6675+
6676+ # merge the experts into a single 3d tensor
6677+ for w_name in ["down_proj" , "gate_proj" , "up_proj" ]:
6678+ datas : list [Tensor ] = []
6679+
6680+ for xid in range (n_experts ):
6681+ ename = f"model.layers.{ bid } .mlp.experts.{ xid } .{ w_name } .weight"
6682+ datas .append (self ._experts [bid ][ename ])
6683+ del self ._experts [bid ][ename ]
6684+
6685+ data_torch = torch .stack (datas , dim = 0 )
6686+ # Generate GGUF tensor names for merged experts
6687+ if w_name == "down_proj" :
6688+ new_name = f"blk.{ bid } .ffn_down_exps.weight"
6689+ elif w_name == "gate_proj" :
6690+ new_name = f"blk.{ bid } .ffn_gate_exps.weight"
6691+ elif w_name == "up_proj" :
6692+ new_name = f"blk.{ bid } .ffn_up_exps.weight"
6693+ else :
6694+ merged_name = f"model.layers.{ bid } .mlp.experts.{ w_name } .weight"
6695+ new_name = self .map_tensor_name (merged_name )
6696+ tensors .append ((new_name , data_torch ))
6697+ return tensors
6698+ else :
6699+ return []
6700+
6701+ # Handle expert gating input (routing gate)
6702+ if ".mlp.gate.e_score_correction_bias" in name :
6703+ new_name = name .replace ("model.layers." , "blk." ).replace (
6704+ ".mlp.gate.e_score_correction_bias" , ".ffn_gate_inp.bias"
6705+ )
6706+ return [(self .map_tensor_name (new_name ), data_torch )]
6707+
6708+ # Handle shared expert tensors
6709+ if ".mlp.ffn_" in name and "_shexp" in name :
6710+ new_name = name .replace ("model.layers." , "blk." )
6711+ return [(new_name , data_torch )]
6712+
6713+ # Handle regular dense FFN layers (for hybrid dense/MoE architecture)
6714+ if ".mlp." in name and "experts" not in name and "_shexp" not in name :
6715+ if "gate_proj" in name :
6716+ new_name = name .replace ("model.layers." , "blk." ).replace (
6717+ ".mlp.gate_proj.weight" , ".ffn_gate.weight"
6718+ )
6719+ elif "up_proj" in name :
6720+ new_name = name .replace ("model.layers." , "blk." ).replace (
6721+ ".mlp.up_proj.weight" , ".ffn_up.weight"
6722+ )
6723+ elif "down_proj" in name :
6724+ new_name = name .replace ("model.layers." , "blk." ).replace (
6725+ ".mlp.down_proj.weight" , ".ffn_down.weight"
6726+ )
6727+ else :
6728+ new_name = name
6729+ return [(self .map_tensor_name (new_name ), data_torch )]
6730+
6731+ # Handle special NextN tensors - preserve for future MTP support
6732+ if (
6733+ ".embed_tokens." in name
6734+ or ".shared_head." in name
6735+ or ".eh_proj." in name
6736+ or ".enorm." in name
6737+ or ".hnorm." in name
6738+ ):
6739+ # For NextN tensors, convert to GGUF naming convention
6740+ new_name = name .replace ("model.layers." , "blk." ).replace ("model." , "" )
6741+ return [(new_name , data_torch )]
6742+
6743+ return super ().modify_tensors (data_torch , name , bid )
6744+
6745+ def prepare_tensors (self ):
6746+ super ().prepare_tensors ()
6747+ if self ._experts is not None :
6748+ # flatten `list[dict[str, Tensor]]` into `list[str]`
6749+ experts = [k for d in self ._experts for k in d .keys ()]
6750+ if len (experts ) > 0 :
6751+ raise ValueError (f"Unprocessed experts: { experts } " )
6752+
6753+
65816754@ModelBase .register ("GlmForCausalLM" , "ChatGLMModel" , "ChatGLMForConditionalGeneration" )
65826755class ChatGLMModel (TextModel ):
65836756 model_arch = gguf .MODEL_ARCH .CHATGLM
0 commit comments