@@ -684,6 +684,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
684
684
if chkhsh == "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664" :
685
685
# ref: https://huggingface.co/tencent/Hunyuan-A13B-Instruct
686
686
res = "hunyuan"
687
+ if chkhsh == "bba3b3366b646dbdded5dbc42d59598b849371afc42f7beafa914afaa5b70aa6" :
688
+ # ref: https://huggingface.co/tencent/Hunyuan-4B-Instruct
689
+ res = "hunyuan-dense"
687
690
if chkhsh == "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6" :
688
691
# ref: https://huggingface.co/tiiuae/Falcon-H1-0.5B-Base
689
692
res = "falcon-h1"
@@ -7553,11 +7556,6 @@ def set_gguf_parameters(self):
7553
7556
class HunYuanMoEModel (TextModel ):
7554
7557
model_arch = gguf .MODEL_ARCH .HUNYUAN_MOE
7555
7558
7556
- def __init__ (self , * args , ** kwargs ):
7557
- super ().__init__ (* args , ** kwargs )
7558
- # For handling tied embeddings
7559
- self ._tok_embd = None
7560
-
7561
7559
def set_vocab (self ):
7562
7560
from transformers import AutoTokenizer
7563
7561
tokenizer = AutoTokenizer .from_pretrained (self .dir_model , trust_remote_code = True )
@@ -7651,9 +7649,6 @@ def set_gguf_parameters(self):
7651
7649
_experts : list [dict [str , Tensor ]] | None = None
7652
7650
7653
7651
def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
7654
- if name == "model.embed_tokens.weight" :
7655
- self ._tok_embd = data_torch .clone ()
7656
-
7657
7652
if name == "lm_head.weight" :
7658
7653
if self .hparams .get ("tie_word_embeddings" , False ):
7659
7654
logger .info ("Skipping tied output layer 'lm_head.weight'" )
@@ -7698,6 +7693,98 @@ def prepare_tensors(self):
7698
7693
raise ValueError (f"Unprocessed experts: { experts } " )
7699
7694
7700
7695
7696
+ @ModelBase .register ("HunYuanDenseV1ForCausalLM" )
7697
+ class HunYuanModel (TextModel ):
7698
+ model_arch = gguf .MODEL_ARCH .HUNYUAN_DENSE
7699
+
7700
+ def set_vocab (self ):
7701
+ if (self .dir_model / "tokenizer.json" ).is_file ():
7702
+ self ._set_vocab_gpt2 ()
7703
+ else :
7704
+ from transformers import AutoTokenizer
7705
+ tokenizer = AutoTokenizer .from_pretrained (self .dir_model , trust_remote_code = True )
7706
+
7707
+ # 1. Get the pre-tokenizer identifier hash
7708
+ tokpre = self .get_vocab_base_pre (tokenizer )
7709
+
7710
+ # 2. Reverse-engineer the merges list from mergeable_ranks
7711
+ merges = []
7712
+ vocab = {}
7713
+ mergeable_ranks = tokenizer .mergeable_ranks
7714
+ for token , rank in mergeable_ranks .items ():
7715
+ vocab [QwenModel .token_bytes_to_string (token )] = rank
7716
+ if len (token ) == 1 :
7717
+ continue
7718
+ merged = QwenModel .bpe (mergeable_ranks , token , max_rank = rank )
7719
+ if len (merged ) == 2 :
7720
+ merges .append (' ' .join (map (QwenModel .token_bytes_to_string , merged )))
7721
+
7722
+ # 3. Generate the tokens and toktypes lists
7723
+ vocab_size = self .hparams ["vocab_size" ]
7724
+ assert tokenizer .vocab_size == vocab_size
7725
+ special_tokens = tokenizer .special_tokens
7726
+ reverse_vocab = {id_ : encoded_tok for encoded_tok , id_ in {** vocab , ** special_tokens }.items ()}
7727
+ tokens : list [str ] = []
7728
+ toktypes : list [int ] = []
7729
+ for i in range (vocab_size ):
7730
+ if i not in reverse_vocab :
7731
+ tokens .append (f"[PAD{ i } ]" )
7732
+ toktypes .append (gguf .TokenType .UNUSED )
7733
+ else :
7734
+ token = reverse_vocab [i ]
7735
+ tokens .append (token )
7736
+ if i in special_tokens .values ():
7737
+ toktypes .append (gguf .TokenType .CONTROL )
7738
+ else :
7739
+ toktypes .append (gguf .TokenType .NORMAL )
7740
+
7741
+ # 4. Write all vocab-related fields to the GGUF writer
7742
+ self .gguf_writer .add_tokenizer_model ("gpt2" )
7743
+ self .gguf_writer .add_tokenizer_pre (tokpre )
7744
+ self .gguf_writer .add_token_list (tokens )
7745
+ self .gguf_writer .add_token_types (toktypes )
7746
+ self .gguf_writer .add_token_merges (merges )
7747
+
7748
+ # 5. Add special tokens and chat templates
7749
+ special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = False )
7750
+ special_vocab .add_to_gguf (self .gguf_writer )
7751
+ # FIX for BOS token: Overwrite incorrect id read from config.json
7752
+ if self .hparams ['hidden_size' ] == 4096 :
7753
+ self .gguf_writer .add_bos_token_id (127958 ) # only for 7b dense, fix <|bos|> token
7754
+
7755
+ def set_gguf_parameters (self ):
7756
+ super ().set_gguf_parameters ()
7757
+ hparams = self .hparams
7758
+
7759
+ # Rope
7760
+ rope_scaling = hparams .get ("rope_scaling" , {})
7761
+ if rope_scaling .get ("type" ) == "dynamic" :
7762
+ # HunYuan uses NTK Aware Alpha based scaling. Original implementation: https://www.reddit.com/r/LocalLLaMA/comments/14lz7j5/ntkaware_scaled_rope_allows_llama_models_to_have/
7763
+ # 1000 corresponds to a usable context length of 256k (https://github.com/Tencent-Hunyuan/Hunyuan-A13B/blob/main/report/Hunyuan_A13B_Technical_Report.pdf)
7764
+ alpha = rope_scaling .get ("alpha" , 50 )
7765
+ base = hparams .get ("rope_theta" , 10000.0 )
7766
+ dim = hparams ["head_dim" ]
7767
+ scaled_base = base * (alpha ** (dim / (dim - 2 )))
7768
+ self .gguf_writer .add_rope_freq_base (scaled_base )
7769
+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .NONE )
7770
+ self .gguf_writer .add_rope_scaling_factor (1 )
7771
+ # There is no consistent way to calculate ctx from alpha, and the config is incorrectly set to 32k
7772
+ self .gguf_writer .add_rope_scaling_orig_ctx_len (256 * 1024 ) # 256k context length
7773
+ self .gguf_writer .add_context_length (256 * 1024 ) # 256k context length
7774
+
7775
+ # if any of our assumptions about the values are wrong, something has changed and this may need to be updated
7776
+ assert base == 10000.0 and self .hparams ["max_position_embeddings" ] in [32 * 1024 , 256 * 1024 ] , \
7777
+ "HunYuan dynamic RoPE scaling assumptions changed, please update the logic or context length manually"
7778
+
7779
+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
7780
+ if name == "lm_head.weight" :
7781
+ if self .hparams .get ("tie_word_embeddings" , False ):
7782
+ logger .info ("Skipping tied output layer 'lm_head.weight'" )
7783
+ return []
7784
+
7785
+ return [(self .map_tensor_name (name ), data_torch )]
7786
+
7787
+
7701
7788
@ModelBase .register ("SmolLM3ForCausalLM" )
7702
7789
class SmolLM3Model (LlamaModel ):
7703
7790
model_arch = gguf .MODEL_ARCH .SMOLLM3
0 commit comments