@@ -18137,9 +18137,8 @@ struct llm_build_cogvlm : public llm_graph_context {
18137
18137
18138
18138
Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
18139
18139
18140
- // TODO: Check Rope because this might not be the same as cogvlm
18141
- Qcur = ggml_rope(ctx0, Qcur, inp_pos, n_embd_head, GGML_ROPE_TYPE_NEOX);
18142
- Kcur = ggml_rope(ctx0, Kcur, inp_pos, n_embd_head, GGML_ROPE_TYPE_NEOX);
18140
+ Qcur = ggml_rope(ctx0, Qcur, inp_pos, n_embd_head, rope_type);
18141
+ Kcur = ggml_rope(ctx0, Kcur, inp_pos, n_embd_head, rope_type);
18143
18142
18144
18143
cur = build_attn(inp_attn, wo, nullptr, Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il);
18145
18144
cb(cur, "attn_out", il);
@@ -18151,12 +18150,6 @@ struct llm_build_cogvlm : public llm_graph_context {
18151
18150
cur = build_norm(ffn_inp, model.layers[il].ffn_norm, NULL, LLM_NORM_RMS, il);
18152
18151
cb(cur, "ffn_norm", il);
18153
18152
18154
- // Make a standard ffn without the build_ffn function
18155
- //ggml_tensor * tmp = build_lora_mm(ffn_up, cur);
18156
- //ggml_tensor * gate = build_lora_mm(ffn_gate, cur);
18157
- //gate = ggml_silu(ctx0, gate);
18158
- //cur = ggml_mul(ctx0, gate, tmp);
18159
- //cur = build_lora_mm(ffn_down, cur);
18160
18153
cur = build_ffn(cur,
18161
18154
ffn_up, NULL, NULL,
18162
18155
ffn_gate, NULL, NULL,
@@ -18812,7 +18805,6 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
18812
18805
case LLM_ARCH_ARCEE:
18813
18806
case LLM_ARCH_ERNIE4_5:
18814
18807
case LLM_ARCH_ERNIE4_5_MOE:
18815
- case LLM_ARCH_COGVLM:
18816
18808
return LLAMA_ROPE_TYPE_NORM;
18817
18809
18818
18810
// the pairs of head values are offset by n_rot/2
@@ -18858,6 +18850,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
18858
18850
case LLM_ARCH_LFM2:
18859
18851
case LLM_ARCH_SMALLTHINKER:
18860
18852
case LLM_ARCH_GLM4_MOE:
18853
+ case LLM_ARCH_COGVLM:
18861
18854
return LLAMA_ROPE_TYPE_NEOX;
18862
18855
18863
18856
case LLM_ARCH_QWEN2VL:
0 commit comments