Skip to content

Commit 41366a4

Browse files
committed
graph : fix can_reuse() checks when flash-attention is disabled
1 parent a872790 commit 41366a4

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

src/llama-graph.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ bool llm_graph_input_attn_kv_unified::can_reuse(const llm_graph_params & params)
320320
bool res = true;
321321

322322
res &= self_k_idxs->ne[0] == params.ubatch.n_tokens;
323-
res &= self_v_idxs->ne[0] == params.ubatch.n_tokens;
323+
//res &= self_v_idxs->ne[0] == params.ubatch.n_tokens; // TODO: need to move this to the unified cache and check there
324324

325325
res &= self_kq_mask->ne[0] == mctx->get_n_kv();
326326
res &= self_kq_mask->ne[1] == GGML_PAD(params.ubatch.n_tokens, GGML_KQ_MASK_PAD);
@@ -350,10 +350,10 @@ bool llm_graph_input_attn_kv_unified_iswa::can_reuse(const llm_graph_params & pa
350350
bool res = true;
351351

352352
res &= self_k_idxs->ne[0] == params.ubatch.n_tokens;
353-
res &= self_v_idxs->ne[0] == params.ubatch.n_tokens;
353+
//res &= self_v_idxs->ne[0] == params.ubatch.n_tokens; // TODO: need to move this to the unified cache and check there
354354

355355
res &= self_k_idxs_swa->ne[0] == params.ubatch.n_tokens;
356-
res &= self_v_idxs_swa->ne[0] == params.ubatch.n_tokens;
356+
//res &= self_v_idxs_swa->ne[0] == params.ubatch.n_tokens; // TODO: need to move this to the unified cache and check there
357357

358358
res &= self_kq_mask->ne[0] == mctx->get_base()->get_n_kv();
359359
res &= self_kq_mask->ne[1] == GGML_PAD(params.ubatch.n_tokens, GGML_KQ_MASK_PAD);

0 commit comments

Comments
 (0)