Fix README and convert_hf_to_gguf

am17an · am17an · commit 7a5747dd8e5d · 2025-07-20T10:40:50.000+08:00
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -2858,9 +2858,6 @@ class LLaDAModel(TextModel):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        # fix for SmolVLM2, missing `num_attention_heads` in config.json
-        if self.hf_arch == "VLlama3ForCausalLM":
-            self.hparams["num_attention_heads"] = self.hparams.get("num_attention_heads", 32)
 
     def get_vocab_base(self) -> tuple[list[str], list[int], str]:
         tokens: list[str] = []
@@ -2909,29 +2906,6 @@ def set_vocab(self):
                 # Llama 3
                 self._set_vocab_gpt2()
 
-        # Apply to CodeLlama only (and ignore for Llama 3 with a vocab size of 128256)
-        if self.hparams.get("vocab_size", 32000) == 32016:
-            special_vocab = gguf.SpecialVocab(
-                self.dir_model, load_merges=False,
-                special_token_types = ['prefix', 'suffix', 'middle', 'eot']
-            )
-            special_vocab._set_special_token("prefix", 32007)
-            special_vocab._set_special_token("suffix", 32008)
-            special_vocab._set_special_token("middle", 32009)
-            special_vocab._set_special_token("eot",    32010)
-            special_vocab.add_to_gguf(self.gguf_writer)
-
-        tokenizer_config_file = self.dir_model / 'tokenizer_config.json'
-        if tokenizer_config_file.is_file():
-            with open(tokenizer_config_file, "r", encoding="utf-8") as f:
-                tokenizer_config_json = json.load(f)
-                if "add_prefix_space" in tokenizer_config_json:
-                    self.gguf_writer.add_add_space_prefix(tokenizer_config_json["add_prefix_space"])
-
-        # Apply to granite small models only
-        if self.hparams.get("vocab_size", 32000) == 49152:
-            self.gguf_writer.add_add_bos_token(False)
-
     def set_gguf_parameters(self):
         super().set_gguf_parameters()
         self._try_set_pooling_type()
diff --git a/examples/diffusion/README.md b/examples/diffusion/README.md
@@ -9,7 +9,7 @@ This directory contains implementations for diffusion-based text generation usin
 - https://huggingface.co/Dream-org/Dream-v0-Base-7B
 - Original PR - https://github.com/ggml-org/llama.cpp/pull/14644
 
-The Dream model supports four different sampling algorithms controlled by the `--diffusion-alg` parameter:
+The Dream model supports four different sampling algorithms controlled by the `--diffusion-algorithm` parameter:
 
 1. **ORIGIN (0)** - Original diffusion algorithm
    - Uses probability transfer based on timestep ratios
@@ -30,7 +30,7 @@ The Dream model supports four different sampling algorithms controlled by the `-
 
 ### LLaDA Model Remasking Strategies
 
-The LLaDA model uses two remasking approaches controlled by the `--diffusion-alg` parameter:
+The LLaDA model uses two remasking approaches controlled by the `--diffusion-algorithm` parameter:
 
 1. **REMASKING_LOW_CONFIDENCE (0)** - Default strategy
    - Remasks tokens with lowest confidence scores