@@ -7843,6 +7843,8 @@ struct llm_build_bert : public llm_graph_context {
7843
7843
}
7844
7844
7845
7845
if (model.layers[il].attn_q_norm) {
7846
+ Qcur = ggml_reshape_2d(ctx0, Qcur, n_embd_head*n_head, n_tokens);
7847
+
7846
7848
Qcur = build_norm(Qcur,
7847
7849
model.layers[il].attn_q_norm,
7848
7850
model.layers[il].attn_q_norm_b,
@@ -7852,6 +7854,8 @@ struct llm_build_bert : public llm_graph_context {
7852
7854
}
7853
7855
7854
7856
if (model.layers[il].attn_k_norm) {
7857
+ Kcur = ggml_reshape_2d(ctx0, Kcur, n_embd_head*n_head_kv, n_tokens);
7858
+
7855
7859
Kcur = build_norm(Kcur,
7856
7860
model.layers[il].attn_k_norm,
7857
7861
model.layers[il].attn_k_norm_b,
@@ -8234,6 +8238,9 @@ struct llm_build_mpt : public llm_graph_context {
8234
8238
8235
8239
// Q/K Layernorm
8236
8240
if (model.layers[il].attn_q_norm) {
8241
+ Qcur = ggml_reshape_2d(ctx0, Qcur, n_embd_head*n_head, n_tokens);
8242
+ Kcur = ggml_reshape_2d(ctx0, Kcur, n_embd_head*n_head_kv, n_tokens);
8243
+
8237
8244
Qcur = build_norm(Qcur,
8238
8245
model.layers[il].attn_q_norm,
8239
8246
model.layers[il].attn_q_norm_b,
0 commit comments