@@ -111,6 +111,8 @@ const char * llm_type_name(llm_type type) {
111
111
case LLM_TYPE_30B_A3B: return "30B.A3B";
112
112
case LLM_TYPE_235B_A22B: return "235B.A22B";
113
113
case LLM_TYPE_300B_A47B: return "300B.A47B";
114
+ case LLM_TYPE_9B_A2B: return "9B.A2B";
115
+ case LLM_TYPE_32B_A7B: return "32B.A7B";
114
116
case LLM_TYPE_E2B: return "E2B";
115
117
case LLM_TYPE_E4B: return "E4B";
116
118
default: return "?B";
@@ -1435,8 +1437,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
1435
1437
}
1436
1438
1437
1439
switch (hparams.n_layer) {
1438
- case 46: type = LLM_TYPE_12B ; break; // GLM-4.5-Air
1439
- case 93: type = LLM_TYPE_32B ; break; // GLM-4.5
1440
+ case 46: type = LLM_TYPE_9B_A2B ; break; // GLM-4.5-Air
1441
+ case 93: type = LLM_TYPE_32B_A7B ; break; // GLM-4.5
1440
1442
default: type = LLM_TYPE_UNKNOWN;
1441
1443
}
1442
1444
} break;
@@ -4393,9 +4395,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
4393
4395
layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), { n_embd, n_embd_head_k * n_head }, 0);
4394
4396
layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), { n_embd, n_embd_k_gqa }, 0);
4395
4397
layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), { n_embd, n_embd_v_gqa }, 0);
4396
- layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), { n_embd }, TENSOR_NOT_REQUIRED);
4397
- layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), { n_embd_gqa }, TENSOR_NOT_REQUIRED);
4398
- layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), { n_embd_gqa }, TENSOR_NOT_REQUIRED);
4398
+ layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), { n_embd_head_k * n_head }, TENSOR_NOT_REQUIRED);
4399
+ layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), { n_embd_k_gqa }, TENSOR_NOT_REQUIRED);
4400
+ layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), { n_embd_v_gqa }, TENSOR_NOT_REQUIRED);
4399
4401
4400
4402
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_embd, n_embd }, 0);
4401
4403
layer.attn_post_norm = create_tensor(tn(LLM_TENSOR_ATTN_POST_NORM, "weight", i), { n_embd }, 0);
0 commit comments