@@ -154,7 +154,7 @@ class tensor_traits : public ggml::cpu::tensor_traits {
154
154
if (dst->src [0 ]->type == GGML_TYPE_Q4_0) {
155
155
return compute_forward_q4_0 (params, dst);
156
156
} else if (dst->src [0 ]->type == GGML_TYPE_F16) {
157
- return compute_forward_kv_cache (params, dst);
157
+ return compute_forward_fp16 (params, dst);
158
158
}
159
159
} else if (dst->op == GGML_OP_GET_ROWS) {
160
160
if (dst->src [0 ]->type == GGML_TYPE_Q4_0) {
@@ -164,7 +164,7 @@ class tensor_traits : public ggml::cpu::tensor_traits {
164
164
return false ;
165
165
}
166
166
167
- bool compute_forward_kv_cache (ggml_compute_params * params, struct ggml_tensor * dst) {
167
+ bool compute_forward_fp16 (ggml_compute_params * params, struct ggml_tensor * dst) {
168
168
static std::atomic_flag first_to_arrive = ATOMIC_FLAG_INIT;
169
169
170
170
const ggml_tensor * src0 = dst->src [0 ];
@@ -534,13 +534,8 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
534
534
if (op->src [0 ]->buffer && op->src [0 ]->buffer ->buft == ggml_backend_cpu_kleidiai_buffer_type ()) {
535
535
return (ggml::cpu::tensor_traits *) op->src [0 ]->extra ;
536
536
}
537
- else if (ggml_kleidiai_select_kernels (ctx.features , op) &&
538
- op->src [0 ]->op == GGML_OP_VIEW &&
539
- (op->src [1 ]->op == GGML_OP_PERMUTE || op->src [1 ]->op == GGML_OP_SOFT_MAX) &&
540
- op->src [1 ]->ne [1 ] > 1 ) {
541
- if ((op->src [0 ]->nb [0 ] != 2 ) ||
542
- (op->src [1 ]->nb [0 ] != 4 ) ||
543
- (op->src [0 ]->nb [1 ] * op->src [0 ]->ne [1 ] != op->src [0 ]->nb [2 ]) ||
537
+ else if (ggml_kleidiai_select_kernels (ctx.features , op) && op->src [1 ]->ne [1 ] > 1 ) {
538
+ if ((op->src [0 ]->nb [1 ] * op->src [0 ]->ne [1 ] != op->src [0 ]->nb [2 ]) ||
544
539
(op->src [1 ]->nb [1 ] * op->src [1 ]->ne [1 ] != op->src [1 ]->nb [2 ])) {
545
540
return nullptr ;
546
541
}
0 commit comments