@@ -83,7 +83,7 @@ class llm_graph_input_i {
83
83
84
84
// return true if the resulting input tensors using the provided graph parameters would be
85
85
// the same as the previous input tensors that we have currently stored in the object
86
- virtual bool update (const llm_graph_params & params) {
86
+ virtual bool can_reuse (const llm_graph_params & params) {
87
87
// returning false here by default will prevent from reusing the graph if the check
88
88
// for the input type has not been implemented yet
89
89
GGML_UNUSED (params);
@@ -100,7 +100,7 @@ class llm_graph_input_embd : public llm_graph_input_i {
100
100
101
101
void set_input (const llama_ubatch * ubatch) override ;
102
102
103
- bool update (const llm_graph_params & params) override ;
103
+ bool can_reuse (const llm_graph_params & params) override ;
104
104
105
105
ggml_tensor * tokens = nullptr ; // I32 [n_batch]
106
106
ggml_tensor * embd = nullptr ; // F32 [n_embd, n_batch]
@@ -113,7 +113,7 @@ class llm_graph_input_pos : public llm_graph_input_i {
113
113
114
114
void set_input (const llama_ubatch * ubatch) override ;
115
115
116
- bool update (const llm_graph_params & params) override ;
116
+ bool can_reuse (const llm_graph_params & params) override ;
117
117
118
118
ggml_tensor * pos = nullptr ; // I32 [n_batch]
119
119
@@ -173,7 +173,7 @@ class llm_graph_input_out_ids : public llm_graph_input_i {
173
173
174
174
void set_input (const llama_ubatch * ubatch) override ;
175
175
176
- bool update (const llm_graph_params & params) override ;
176
+ bool can_reuse (const llm_graph_params & params) override ;
177
177
178
178
ggml_tensor * out_ids; // I32 [n_outputs]
179
179
@@ -265,7 +265,7 @@ class llm_graph_input_attn_kv_unified : public llm_graph_input_i {
265
265
266
266
void set_input (const llama_ubatch * ubatch) override ;
267
267
268
- bool update (const llm_graph_params & params) override ;
268
+ bool can_reuse (const llm_graph_params & params) override ;
269
269
270
270
ggml_tensor * get_k_idxs () const { return self_k_idxs; }
271
271
ggml_tensor * get_v_idxs () const { return self_v_idxs; }
@@ -298,7 +298,7 @@ class llm_graph_input_attn_kv_unified_iswa : public llm_graph_input_i {
298
298
299
299
void set_input (const llama_ubatch * ubatch) override ;
300
300
301
- bool update (const llm_graph_params & params) override ;
301
+ bool can_reuse (const llm_graph_params & params) override ;
302
302
303
303
ggml_tensor * get_k_idxs () const { return self_k_idxs; }
304
304
ggml_tensor * get_v_idxs () const { return self_v_idxs; }
@@ -388,7 +388,7 @@ class llm_graph_result_i {
388
388
389
389
virtual void set_inputs (const llama_ubatch * ubatch) = 0;
390
390
391
- virtual bool update (const llm_graph_params & params) = 0;
391
+ virtual bool can_reuse (const llm_graph_params & params) = 0;
392
392
};
393
393
394
394
using llm_graph_result_ptr = std::unique_ptr<llm_graph_result_i>;
@@ -482,20 +482,20 @@ class llm_graph_result : public llm_graph_result_i {
482
482
}
483
483
}
484
484
485
- // try to update the existing graph result using the new graph parameters
485
+ // try to update the existing graph result using the new graph parameters in order to reuse it
486
486
// this can only be done if we determine that the resulting graph using the new graph parameters
487
487
// would be identical to the existing graph. in that case, we simply have to update the memory
488
488
// contexts of the input tensors of the graph and we can reuse it for another computation
489
489
// return true if the graph was updated and can be reused
490
- bool update (const llm_graph_params & params) override {
490
+ bool can_reuse (const llm_graph_params & params) override {
491
491
if (!this ->params .is_same (params)) {
492
492
return false ;
493
493
}
494
494
495
495
bool res = true ;
496
496
497
497
for (auto & input : inputs) {
498
- res &= input->update (params);
498
+ res &= input->can_reuse (params);
499
499
}
500
500
501
501
return res;
0 commit comments