Skip to content

Commit c7ccf38

Browse files
committed
graph : move llm_graph_result impl in source file + debug env
ggml-ci
1 parent 41366a4 commit c7ccf38

File tree

2 files changed

+86
-48
lines changed

2 files changed

+86
-48
lines changed

src/llama-graph.cpp

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,83 @@ void llm_graph_input_mem_hybrid::set_input(const llama_ubatch * ubatch) {
407407
inp_rs->set_input(ubatch);
408408
}
409409

410+
//
411+
// llm_graph_result
412+
//
413+
414+
llm_graph_result::llm_graph_result(int64_t max_nodes) : max_nodes(max_nodes) {
415+
reset();
416+
417+
const char * LLAMA_GRAPH_RESULT_DEBUG = getenv("LLAMA_GRAPH_RESULT_DEBUG");
418+
debug = LLAMA_GRAPH_RESULT_DEBUG ? atoi(LLAMA_GRAPH_RESULT_DEBUG) : 0;
419+
}
420+
421+
int64_t llm_graph_result::get_max_nodes() const {
422+
return max_nodes;
423+
}
424+
425+
void llm_graph_result::reset() {
426+
t_tokens = nullptr;
427+
t_logits = nullptr;
428+
t_embd = nullptr;
429+
t_embd_pooled = nullptr;
430+
431+
inputs.clear();
432+
433+
buf_compute_meta.resize(ggml_tensor_overhead()*max_nodes + ggml_graph_overhead_custom(max_nodes, false));
434+
435+
ggml_init_params params = {
436+
/*.mem_size =*/ buf_compute_meta.size(),
437+
/*.mem_buffer =*/ buf_compute_meta.data(),
438+
/*.no_alloc =*/ true,
439+
};
440+
441+
ctx_compute.reset(ggml_init(params));
442+
443+
gf = ggml_new_graph_custom(ctx_compute.get(), max_nodes, false);
444+
}
445+
446+
void llm_graph_result::set_inputs(const llama_ubatch * ubatch) {
447+
for (auto & input : inputs) {
448+
input->set_input(ubatch);
449+
}
450+
}
451+
452+
bool llm_graph_result::can_reuse(const llm_graph_params & params) {
453+
if (!this->params.allow_reuse(params)) {
454+
if (debug > 1) {
455+
LLAMA_LOG_DEBUG("%s: cannot reuse graph due to incompatible graph parameters\n", __func__);
456+
}
457+
458+
return false;
459+
}
460+
461+
if (debug > 1) {
462+
LLAMA_LOG_DEBUG("%s: checking compatibility of %d inputs:\n", __func__, (int) inputs.size());
463+
}
464+
465+
bool res = true;
466+
467+
for (auto & input : inputs) {
468+
const bool cur = input->can_reuse(params);
469+
470+
LLAMA_LOG_DEBUG(" %s: can_reuse = %d\n", "placeholder", cur);
471+
472+
res = res && cur;
473+
}
474+
475+
if (debug > 0) {
476+
LLAMA_LOG_DEBUG("%s: can reuse graph = %d\n", __func__, res);
477+
}
478+
479+
return res;
480+
}
481+
482+
llm_graph_input_i * llm_graph_result::add_input(llm_graph_input_ptr input) {
483+
inputs.emplace_back(std::move(input));
484+
return inputs.back().get();
485+
}
486+
410487
//
411488
// llm_graph_context
412489
//

src/llama-graph.h

Lines changed: 9 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -466,9 +466,7 @@ struct llm_graph_params {
466466

467467
class llm_graph_result : public llm_graph_result_i {
468468
public:
469-
llm_graph_result(int64_t max_nodes) : max_nodes(max_nodes) {
470-
reset();
471-
}
469+
llm_graph_result(int64_t max_nodes);
472470

473471
virtual ~llm_graph_result() = default;
474472

@@ -480,60 +478,20 @@ class llm_graph_result : public llm_graph_result_i {
480478
ggml_cgraph * get_gf() override { return gf; }
481479
ggml_context * get_ctx() override { return ctx_compute.get(); }
482480

483-
int64_t get_max_nodes() const {
484-
return max_nodes;
485-
}
486-
487-
void reset() override {
488-
t_tokens = nullptr;
489-
t_logits = nullptr;
490-
t_embd = nullptr;
491-
t_embd_pooled = nullptr;
492-
493-
inputs.clear();
494-
495-
buf_compute_meta.resize(ggml_tensor_overhead()*max_nodes + ggml_graph_overhead_custom(max_nodes, false));
481+
int64_t get_max_nodes() const;
496482

497-
ggml_init_params params = {
498-
/*.mem_size =*/ buf_compute_meta.size(),
499-
/*.mem_buffer =*/ buf_compute_meta.data(),
500-
/*.no_alloc =*/ true,
501-
};
483+
void reset() override;
502484

503-
ctx_compute.reset(ggml_init(params));
504-
505-
gf = ggml_new_graph_custom(ctx_compute.get(), max_nodes, false);
506-
}
507-
508-
void set_inputs(const llama_ubatch * ubatch) override {
509-
for (auto & input : inputs) {
510-
input->set_input(ubatch);
511-
}
512-
}
485+
void set_inputs(const llama_ubatch * ubatch) override;
513486

514487
// try to update the existing graph result using the new graph parameters in order to reuse it
515488
// this can only be done if we determine that the resulting graph using the new graph parameters
516489
// would be identical to the existing graph. in that case, we simply have to update the memory
517490
// contexts of the input tensors of the graph and we can reuse it for another computation
518491
// return true if the graph was updated and can be reused
519-
bool can_reuse(const llm_graph_params & params) override {
520-
if (!this->params.allow_reuse(params)) {
521-
return false;
522-
}
523-
524-
bool res = true;
525-
526-
for (auto & input : inputs) {
527-
res &= input->can_reuse(params);
528-
}
492+
bool can_reuse(const llm_graph_params & params) override;
529493

530-
return res;
531-
}
532-
533-
llm_graph_input_i * add_input(llm_graph_input_ptr input) {
534-
inputs.emplace_back(std::move(input));
535-
return inputs.back().get();
536-
}
494+
llm_graph_input_i * add_input(llm_graph_input_ptr input);
537495

538496
// important graph nodes
539497
ggml_tensor * t_tokens = nullptr;
@@ -556,6 +514,9 @@ class llm_graph_result : public llm_graph_result_i {
556514
// we will use this to determine whether the graph can be reused by comparing them with the new parameters
557515
// note: these are updated after constructing the new graph
558516
llm_graph_params params;
517+
518+
// env: LLAMA_GRAPH_RESULT_DEBUG
519+
int debug = 0;
559520
};
560521

561522
//

0 commit comments

Comments
 (0)