Fix broken rebase

Tianyue-Zhao · Tianyue-Zhao · commit 0acdb78929ba · 2025-08-02T16:42:44.000Z
diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py
@@ -428,9 +428,9 @@ class TensorNameMap:
             "model.layers.{bid}.residual_mlp.w1",             # arctic
             "transformer.h.{bid}.mlp.c_fc_0",                 # exaone
             "model.layers.{bid}.feed_forward.gate_proj",      # llama4 jamba granite-hybrid
-            "model.layers.{bid}.block_sparse_moe.gate",   # smallthinker
-            "model.transformer.blocks.{bid}.ff_proj",     # llada
-            "layers.{bid}.mlp.gate_proj",                 # qwen3-embedding
+            "model.layers.{bid}.block_sparse_moe.gate",       # smallthinker
+            "model.transformer.blocks.{bid}.ff_proj",         # llada
+            "layers.{bid}.mlp.gate_proj",                     # qwen3-embedding
             "model.layers.{bid}.mlp.language_mlp.gate_proj",  # cogvlm
         ),
 
diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp
@@ -1980,27 +1980,6 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
     },
     {
         LLM_ARCH_DREAM,
-        {
-            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
-            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
-            { LLM_TENSOR_OUTPUT,             "output" },
-            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
-            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
-            { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" },
-            { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" },
-            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
-            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
-            { LLM_TENSOR_FFN_GATE,           "blk.%d.ffn_gate" },
-            { LLM_TENSOR_FFN_DOWN,           "blk.%d.ffn_down" },
-            { LLM_TENSOR_FFN_UP,             "blk.%d.ffn_up" },
-            { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
-            { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
-            { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
-            { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" }
-        },
-    },
-    {
-        LLM_ARCH_COGVLM,
         {
             { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
             { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
@@ -2014,13 +1993,6 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
             { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
             { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
             { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
-            { LLM_TENSOR_VISEXP_ATTN_WQ,   "blk.%d.vis_attn_q" },
-            { LLM_TENSOR_VISEXP_ATTN_WK,   "blk.%d.vis_attn_k" },
-            { LLM_TENSOR_VISEXP_ATTN_WV,   "blk.%d.vis_attn_v" },
-            { LLM_TENSOR_VISEXP_ATTN_OUT, "blk.%d.vis_attn_output" },
-            { LLM_TENSOR_VISEXP_FFN_GATE,     "blk.%d.vis_gate" },
-            { LLM_TENSOR_VISEXP_FFN_DOWN,     "blk.%d.vis_down" },
-            { LLM_TENSOR_VISEXP_FFN_UP,       "blk.%d.vis_up" },
         },
     },
     {
@@ -2040,6 +2012,30 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
             { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
         },
     },
+    {
+        LLM_ARCH_COGVLM,
+        {
+            { LLM_TENSOR_TOKEN_EMBD,       "token_embd" },
+            { LLM_TENSOR_OUTPUT_NORM,      "output_norm" },
+            { LLM_TENSOR_OUTPUT,           "output" },
+            { LLM_TENSOR_ATTN_NORM,        "blk.%d.attn_norm" },
+            { LLM_TENSOR_ATTN_Q,           "blk.%d.attn_q" },
+            { LLM_TENSOR_ATTN_K,           "blk.%d.attn_k" },
+            { LLM_TENSOR_ATTN_V,           "blk.%d.attn_v" },
+            { LLM_TENSOR_ATTN_OUT,         "blk.%d.attn_output" },
+            { LLM_TENSOR_FFN_NORM,         "blk.%d.ffn_norm" },
+            { LLM_TENSOR_FFN_GATE,         "blk.%d.ffn_gate" },
+            { LLM_TENSOR_FFN_DOWN,         "blk.%d.ffn_down" },
+            { LLM_TENSOR_FFN_UP,           "blk.%d.ffn_up" },
+            { LLM_TENSOR_VISEXP_ATTN_WQ,   "blk.%d.vis_attn_q" },
+            { LLM_TENSOR_VISEXP_ATTN_WK,   "blk.%d.vis_attn_k" },
+            { LLM_TENSOR_VISEXP_ATTN_WV,   "blk.%d.vis_attn_v" },
+            { LLM_TENSOR_VISEXP_ATTN_OUT,  "blk.%d.vis_attn_output" },
+            { LLM_TENSOR_VISEXP_FFN_GATE,  "blk.%d.vis_gate" },
+            { LLM_TENSOR_VISEXP_FFN_DOWN,  "blk.%d.vis_down" },
+            { LLM_TENSOR_VISEXP_FFN_UP,    "blk.%d.vis_up" },
+        },
+    },
     {
         LLM_ARCH_UNKNOWN,
         {
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -5353,12 +5353,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
 
                     // output
                     output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
-                    output      = create_tensor(tn(LLM_TENSOR_OUTPUT,      "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
-
-                    // if output is NULL, init from the input tok embed
-                    if (output == NULL) {
-                        output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);
-                    }
+                    output      = create_tensor(tn(LLM_TENSOR_OUTPUT,      "weight"), {n_embd, n_vocab}, 0);
 
                     for (int i = 0; i < n_layer; ++i) {
                         auto & layer = layers[i];
@@ -17661,7 +17656,6 @@ struct llm_build_cogvlm : public llm_graph_context {
                 Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
                 Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
 
-                // TODO: Check Rope because this might not be the same as cogvlm
                 Qcur = ggml_rope(ctx0, Qcur, inp_pos, n_embd_head, GGML_ROPE_TYPE_NEOX);
                 Kcur = ggml_rope(ctx0, Kcur, inp_pos, n_embd_head, GGML_ROPE_TYPE_NEOX);
 
@@ -17675,12 +17669,6 @@ struct llm_build_cogvlm : public llm_graph_context {
             cur = build_norm(ffn_inp, model.layers[il].ffn_norm, NULL, LLM_NORM_RMS, il);
             cb(cur, "ffn_norm", il);
 
-            // Make a standard ffn without the build_ffn function
-            //ggml_tensor * tmp = build_lora_mm(ffn_up, cur);
-            //ggml_tensor * gate = build_lora_mm(ffn_gate, cur);
-            //gate = ggml_silu(ctx0, gate);
-            //cur = ggml_mul(ctx0, gate, tmp);
-            //cur = build_lora_mm(ffn_down, cur);
             cur = build_ffn(cur,
                     ffn_up,   NULL, NULL,
                     ffn_gate, NULL, NULL,
diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp
@@ -1669,11 +1669,8 @@ struct clip_graph {
             auto & layer = model.layers[il];
             ggml_tensor * cur = inpL; // inpL = residual, cur = hidden_states
 
-            // Check if this is COGVLM projector type for post-norm layernorm order
-            const bool is_cogvlm = ctx->proj_type() == PROJECTOR_TYPE_COGVLM;
-
-            // layernorm1 (only for non-COGVLM)
-            if (!is_cogvlm) {
+            // layernorm1
+            if (ctx->proj_type() != PROJECTOR_TYPE_COGVLM) {
                 cur = build_norm(cur, layer.ln_1_w, layer.ln_1_b, norm_t, eps, il);
                 cb(cur, "layer_inp_normed", il);
             }
@@ -1730,8 +1727,8 @@ struct clip_graph {
                 cb(cur, "attn_out_scaled", il);
             }
 
-            // Apply layernorm AFTER attention for COGVLM (post-norm)
-            if (is_cogvlm) {
+            // Apply layernorm after attention for cogvlm
+            if (ctx->proj_type() == PROJECTOR_TYPE_COGVLM) {
                 cur = build_norm(cur, layer.ln_1_w, layer.ln_1_b, norm_t, eps, il);
                 cb(cur, "attn_post_norm", il);
             }
@@ -1743,8 +1740,8 @@ struct clip_graph {
 
             cb(cur, "ffn_inp", il);
 
-            // layernorm2 (only for non-COGVLM)
-            if (!is_cogvlm) {
+            // layernorm2
+            if (ctx->proj_type() != PROJECTOR_TYPE_COGVLM) {
                 cur = build_norm(cur, layer.ln_2_w, layer.ln_2_b, norm_t, eps, il);
                 cb(cur, "ffn_inp_normed", il);
             }
@@ -1763,8 +1760,8 @@ struct clip_graph {
                 cb(cur, "ffn_out_scaled", il);
             }
 
-            // Apply layernorm AFTER MLP for COGVLM (post-norm)
-            if (is_cogvlm) {
+            // Apply layernorm after mlp for cogvlm
+            if (ctx->proj_type() == PROJECTOR_TYPE_COGVLM) {
                 cur = build_norm(cur, layer.ln_2_w, layer.ln_2_b, norm_t, eps, il);
                 cb(cur, "ffn_post_norm", il);
             }