@@ -1216,6 +1216,55 @@ def _try_set_pooling_type(self) -> None:
12161216                raise  NotImplementedError ("Only MEAN, CLS, and LAST pooling types supported" )
12171217            self .gguf_writer .add_pooling_type (pooling_type )
12181218
1219+     def  _set_vocab_interns1 (self ):
1220+         tokens : list [str ] =  []
1221+         toktypes : list [int ] =  []
1222+ 
1223+         from  transformers  import  AutoTokenizer 
1224+         tokenizer  =  AutoTokenizer .from_pretrained (self .dir_model , trust_remote_code = True )
1225+         vocab  =  getattr (tokenizer , 'vocab' , tokenizer .get_vocab ())
1226+         vocab_size  =  self .hparams .get ("vocab_size" , len (vocab ))
1227+         assert  max (vocab .values ()) <  vocab_size 
1228+ 
1229+         tokpre  =  self .get_vocab_base_pre (tokenizer )
1230+ 
1231+         reverse_vocab  =  {id_ : encoded_tok  for  encoded_tok , id_  in  vocab .items ()}
1232+         added_vocab  =  tokenizer .get_added_vocab ()
1233+ 
1234+         added_tokens_decoder  =  tokenizer .added_tokens_decoder 
1235+ 
1236+         for  i  in  range (vocab_size ):
1237+             if  i  not  in   reverse_vocab :
1238+                 tokens .append (f"[PAD{ i }  ]" )
1239+                 toktypes .append (gguf .TokenType .UNUSED )
1240+             else :
1241+                 token : str  =  reverse_vocab [i ]
1242+                 if  token  in  added_vocab :
1243+                     # The tokenizer in llama.cpp assumes the CONTROL and USER_DEFINED tokens are pre-normalized. 
1244+                     # To avoid unexpected issues - we make sure to normalize non-normalized tokens 
1245+                     if  not  added_tokens_decoder [i ].normalized :
1246+                         previous_token  =  token 
1247+                         token  =  tokenizer .decode (tokenizer .encode (token , add_special_tokens = False ))
1248+                         if  previous_token  !=  token :
1249+                             logger .info (f"{ repr (previous_token )}   is encoded and decoded back to { repr (token )}   using AutoTokenizer" )
1250+ 
1251+                     if  added_tokens_decoder [i ].special  or  self .does_token_look_special (token ):
1252+                         toktypes .append (gguf .TokenType .CONTROL )
1253+                     else :
1254+                         toktypes .append (gguf .TokenType .USER_DEFINED )
1255+                 else :
1256+                     toktypes .append (gguf .TokenType .NORMAL )
1257+                 tokens .append (token )
1258+ 
1259+         self .gguf_writer .add_tokenizer_model ("gpt2" )
1260+         self .gguf_writer .add_tokenizer_pre (tokpre )
1261+         self .gguf_writer .add_token_list (tokens )
1262+         self .gguf_writer .add_token_types (toktypes )
1263+ 
1264+         special_vocab  =  gguf .SpecialVocab (self .dir_model , load_merges = True )
1265+         special_vocab ._set_special_token ("bos" , 151643 )
1266+         special_vocab .add_to_gguf (self .gguf_writer )
1267+ 
12191268
12201269class  MmprojModel (ModelBase ):
12211270    model_type  =  ModelType .MMPROJ 
@@ -2932,7 +2981,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
29322981        if  "language_model."  in  name :
29332982            name  =  name .replace ("language_model." , "" ) # for InternVL 
29342983        if  name .startswith ("mlp" ) or  name .startswith ("multi_modal_projector" ) \
2935-                 or  name .startswith ("vision_model" ) or  name .startswith ("audio_tower" ):
2984+                 or  name .startswith ("vision_model" ) or  name .startswith ("audio_tower" ) \
2985+                 or  name .startswith ("model.vision_tower" ) or  name .startswith ("model.multi_modal_projector" ):
29362986            # skip vision and audio tensors 
29372987            return  []
29382988        yield  from  super ().modify_tensors (data_torch , name , bid )
@@ -3109,7 +3159,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
31093159        yield  from  super ().modify_tensors (data_torch , name , bid )
31103160
31113161
3112- @ModelBase .register ("Ernie4_5_ForCausalLM" ) 
3162+ @ModelBase .register ("Ernie4_5_ForCausalLM" ,  "Ernie4_5ForCausalLM" ) 
31133163class  Ernie4_5Model (TextModel ):
31143164    model_arch  =  gguf .MODEL_ARCH .ERNIE4_5 
31153165
@@ -3604,6 +3654,19 @@ def prepare_tensors(self):
36043654class  Qwen3Model (Qwen2Model ):
36053655    model_arch  =  gguf .MODEL_ARCH .QWEN3 
36063656
3657+     def  __init__ (self , * args , ** kwargs ):
3658+         super ().__init__ (* args , ** kwargs )
3659+         hparams  =  ModelBase .load_hparams (self .dir_model , is_mistral_format = False )
3660+         self .origin_hf_arch  =  hparams .get ('architectures' , [None ])[0 ]
3661+ 
3662+     def  set_vocab (self ):
3663+         # deal with intern-s1-mini 
3664+         if  self .origin_hf_arch  ==  'InternS1ForConditionalGeneration' :
3665+             self ._set_vocab_interns1 ()
3666+             return 
3667+ 
3668+         super ().set_vocab ()
3669+ 
36073670
36083671@ModelBase .register ("Qwen3MoeForCausalLM" ) 
36093672class  Qwen3MoeModel (Qwen2MoeModel ):
@@ -3620,73 +3683,7 @@ def set_vocab(self):
36203683            self ._set_vocab_interns1 ()
36213684            return 
36223685
3623-         try :
3624-             self ._set_vocab_sentencepiece ()
3625-         except  FileNotFoundError :
3626-             self ._set_vocab_gpt2 ()
3627- 
3628-     def  _set_vocab_interns1 (self ):
3629-         tokens : list [str ] =  []
3630-         toktypes : list [int ] =  []
3631- 
3632-         from  transformers  import  AutoTokenizer 
3633-         tokenizer  =  AutoTokenizer .from_pretrained (self .dir_model , trust_remote_code = True )
3634-         vocab  =  getattr (tokenizer , 'vocab' , tokenizer .get_vocab ())
3635-         vocab_size  =  self .hparams .get ("vocab_size" , len (vocab ))
3636-         assert  max (vocab .values ()) <  vocab_size 
3637- 
3638-         tokpre  =  self .get_vocab_base_pre (tokenizer )
3639- 
3640-         reverse_vocab  =  {id_ : encoded_tok  for  encoded_tok , id_  in  vocab .items ()}
3641-         added_vocab  =  tokenizer .get_added_vocab ()
3642- 
3643-         added_tokens_decoder  =  tokenizer .added_tokens_decoder 
3644- 
3645-         for  i  in  range (vocab_size ):
3646-             if  i  not  in   reverse_vocab :
3647-                 tokens .append (f"[PAD{ i }  ]" )
3648-                 toktypes .append (gguf .TokenType .UNUSED )
3649-             else :
3650-                 token : str  =  reverse_vocab [i ]
3651-                 if  token  in  added_vocab :
3652-                     # The tokenizer in llama.cpp assumes the CONTROL and USER_DEFINED tokens are pre-normalized. 
3653-                     # To avoid unexpected issues - we make sure to normalize non-normalized tokens 
3654-                     if  not  added_tokens_decoder [i ].normalized :
3655-                         previous_token  =  token 
3656-                         token  =  tokenizer .decode (tokenizer .encode (token , add_special_tokens = False ))
3657-                         if  previous_token  !=  token :
3658-                             logger .info (f"{ repr (previous_token )}   is encoded and decoded back to { repr (token )}   using AutoTokenizer" )
3659- 
3660-                     if  added_tokens_decoder [i ].special  or  self .does_token_look_special (token ):
3661-                         toktypes .append (gguf .TokenType .CONTROL )
3662-                     else :
3663-                         toktypes .append (gguf .TokenType .USER_DEFINED )
3664-                 else :
3665-                     toktypes .append (gguf .TokenType .NORMAL )
3666-                 tokens .append (token )
3667- 
3668-         self .gguf_writer .add_tokenizer_model ("gpt2" )
3669-         self .gguf_writer .add_tokenizer_pre (tokpre )
3670-         self .gguf_writer .add_token_list (tokens )
3671-         self .gguf_writer .add_token_types (toktypes )
3672- 
3673-         special_vocab  =  gguf .SpecialVocab (self .dir_model , load_merges = True )
3674-         special_tokens_map_file  =  self .dir_model  /  'special_tokens_map.json' 
3675-         additional_special_tokens  =  []
3676-         if  special_tokens_map_file .is_file ():
3677-             with  open (special_tokens_map_file , encoding  =  'utf-8' ) as  f :
3678-                 additional_special_tokens  =  json .load (f ).get ('additional_special_tokens' , [])
3679-         tokenizer_cfg_file  =  self .dir_model  /  'special_tokens_map.json' 
3680-         if  tokenizer_cfg_file .is_file ():
3681-             with  open (tokenizer_cfg_file , encoding  =  'utf-8' ) as  f :
3682-                 added_tokens_decoder  =  json .load (f ).get ('added_tokens_decoder' , {})
3683-                 token2ids_map  =  {data ['content' ] : int (token ) for  token , data  in  added_tokens_decoder .items () if  data ['special' ]}
3684-                 for  token  in  additional_special_tokens :
3685-                     if  token  in  token2ids_map :
3686-                         special_vocab ._set_special_token (token , token2ids_map [token ])
3687-         special_vocab ._set_special_token ('eos' , 151645 )
3688-         special_vocab ._set_special_token ("bos" , 151643 )
3689-         special_vocab .add_to_gguf (self .gguf_writer )
3686+         super ().set_vocab ()
36903687
36913688
36923689@ModelBase .register ("GPT2LMHeadModel" ) 
@@ -6257,9 +6254,11 @@ def prepare_tensors(self):
62576254                raise  ValueError (f"Unprocessed experts: { experts }  " )
62586255
62596256
6260- @ModelBase .register ("DeepseekV2ForCausalLM" ) 
6261- @ModelBase .register ("DeepseekV3ForCausalLM" ) 
6262- @ModelBase .register ("KimiVLForConditionalGeneration" ) 
6257+ @ModelBase .register ( 
6258+     "DeepseekV2ForCausalLM" , 
6259+     "DeepseekV3ForCausalLM" , 
6260+     "KimiVLForConditionalGeneration" , 
6261+ ) 
62636262class  DeepseekV2Model (TextModel ):
62646263    model_arch  =  gguf .MODEL_ARCH .DEEPSEEK2 
62656264
@@ -8510,6 +8509,43 @@ def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", "
85108509            return  "mm.2.weight" 
85118510        return  super ().map_tensor_name (name , try_suffixes )
85128511
8512+ 
8513+ @ModelBase .register ("KimiVLForConditionalGeneration" ) 
8514+ class  KimiVLModel (MmprojModel ):
8515+     def  __init__ (self , * args , ** kwargs ):
8516+         super ().__init__ (* args , ** kwargs )
8517+         assert  self .hparams_vision  is  not   None 
8518+         self .hparams_vision ["image_size" ] =  64  *  14  # for compatibility 
8519+ 
8520+     def  set_gguf_parameters (self ):
8521+         super ().set_gguf_parameters ()
8522+         self .gguf_writer .add_clip_projector_type (gguf .VisionProjectorType .KIMIVL )
8523+         self .gguf_writer .add_vision_use_gelu (True )
8524+         self .gguf_writer .add_vision_projector_scale_factor (2 )
8525+         # eps is the same as pytorch's default value 
8526+         assert  self .hparams_vision  is  not   None 
8527+         self .gguf_writer .add_vision_attention_layernorm_eps (self .hparams_vision .get ("layer_norm_eps" , 1e-5 ))
8528+ 
8529+     def  modify_tensors (self , data_torch : Tensor , name : str , bid : int  |  None ) ->  Iterable [tuple [str , Tensor ]]:
8530+         del  bid   # unused 
8531+         is_vision_tensor  =  "vision_tower"  in  name  or  "multi_modal_projector"  in  name 
8532+ 
8533+         if  is_vision_tensor :
8534+             if  "pos_emb.weight"  in  name :
8535+                 data_torch  =  data_torch .view (data_torch .shape [0 ] *  data_torch .shape [1 ], data_torch .shape [2 ])
8536+             elif  "wqkv"  in  name :
8537+                 split_dim  =  0  if  "weight"  in  name  else  - 1 
8538+                 wq , wk , wv  =  data_torch .chunk (3 , dim = split_dim )
8539+                 return  [
8540+                     (self .map_tensor_name (name .replace ("wqkv" , "wq" )), wq ),
8541+                     (self .map_tensor_name (name .replace ("wqkv" , "wk" )), wk ),
8542+                     (self .map_tensor_name (name .replace ("wqkv" , "wv" )), wv )
8543+                 ]
8544+ 
8545+             return  [(self .map_tensor_name (name ), data_torch )]
8546+ 
8547+         return  [] # skip other tensors 
8548+ 
85138549###### CONVERSION LOGIC ###### 
85148550
85158551
0 commit comments