From 5ac42c3b82be0cfe275b4a060ff93dff2643dd76 Mon Sep 17 00:00:00 2001 From: noemotiovon <757486878@qq.com> Date: Fri, 5 Sep 2025 09:17:22 +0000 Subject: [PATCH 1/5] CANN: implement LRU cache for ACL graphs in CANN backend - Introduce ggml_cann_graph_lru_cache to store multiple ggml_cann_graph objects. - Graphs are loaded on demand and evicted using LRU policy when capacity is exceeded. - Updated push, move_to_front, and clear methods to manage cached graphs efficiently. - Ensures reuse of graphs, reducing graph reconstruction overhead in CANN backend. --- ggml/src/ggml-cann/common.h | 54 +++++++++++++++++++- ggml/src/ggml-cann/ggml-cann.cpp | 88 ++++++++++++++++++-------------- 2 files changed, 103 insertions(+), 39 deletions(-) diff --git a/ggml/src/ggml-cann/common.h b/ggml/src/ggml-cann/common.h index e295f4ab47210..5ec58491bb949 100755 --- a/ggml/src/ggml-cann/common.h +++ b/ggml/src/ggml-cann/common.h @@ -38,6 +38,7 @@ #include #include #include +#include #include "../include/ggml-cann.h" #include "../include/ggml.h" @@ -358,6 +359,57 @@ struct ggml_cann_graph { std::vector ggml_graph_properties; }; + +/** + * @brief LRU cache for managing ggml_cann_graph objects. + * + * This class maintains a list of shared_ptr to ggml_cann_graph objects + * and enforces a maximum capacity. It provides methods to push new graphs, + * move existing graphs to the front (most recently used), and clear the cache. + */ +struct ggml_cann_graph_lru_cache { + size_t capacity = 12; /**< Maximum number of graphs in the cache. */ + + std::list> cache_list; /**< List storing cached graphs. */ + + std::shared_ptr matched_graph = nullptr; /**< Pointer to a recently matched graph. */ + + /** + * @brief Push a new graph to the front of the cache. + * If the cache exceeds capacity, the least recently used graph is removed. + * @param new_node Shared pointer to the new ggml_cann_graph to cache. + */ + void push(std::shared_ptr new_node) { + if (cache_list.size() >= capacity) { + cache_list.pop_back(); + } + + cache_list.push_front(new_node); + } + + /** + * @brief Move an existing graph to the front of the cache. + * @param node Shared pointer to the ggml_cann_graph to move. + */ + void move_to_front(std::shared_ptr node) { + cache_list.remove(node); + cache_list.push_front(node); + } + + /** + * @brief Clear all graphs from the cache. + */ + void clear() { + cache_list.clear(); + } + + /** + * @brief Destructor that clears the cache upon object destruction. + */ + ~ggml_cann_graph_lru_cache() { + clear(); + } +}; #endif // USE_ACL_GRAPH struct ggml_cann_rope_cache { @@ -394,7 +446,7 @@ struct ggml_backend_cann_context { aclrtEvent copy_event = nullptr; /**< Event for managing copy operations. */ #ifdef USE_ACL_GRAPH /// Cached CANN ACL graph used for executing the current ggml computation graph. - std::unique_ptr cann_graph; + ggml_cann_graph_lru_cache graph_lru_cache; bool acl_graph_mode = true; #endif cann_task_queue task_queue; diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index 2f9f373f54077..686af24a86d5d 100755 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -2140,21 +2140,31 @@ static void ggml_backend_cann_synchronize(ggml_backend_t backend) { * @param cgraph The ggml computational graph. */ static void set_ggml_graph_node_properties(ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph) { - for (int node_idx = 0; node_idx < cgraph->n_nodes; node_idx++) { - ggml_tensor * node = cgraph->nodes[node_idx]; - cann_ctx->cann_graph->ggml_graph_properties[node_idx].node_address = node->data; - cann_ctx->cann_graph->ggml_graph_properties[node_idx].node_op = node->op; - - for (int dim = 0; dim < GGML_MAX_DIMS; dim++) { - cann_ctx->cann_graph->ggml_graph_properties[node_idx].ne[dim] = node->ne[dim]; - cann_ctx->cann_graph->ggml_graph_properties[node_idx].nb[dim] = node->nb[dim]; - } - for (int src = 0; src < GGML_MAX_SRC; src++) { - cann_ctx->cann_graph->ggml_graph_properties[node_idx].src_address[src] = - node->src[src] ? node->src[src]->data : nullptr; + std::shared_ptr &matched_graph = cann_ctx->graph_lru_cache.matched_graph; + if (!matched_graph) { + matched_graph.reset(new ggml_cann_graph()); + matched_graph->ggml_graph_properties.resize(cgraph->n_nodes); + for (int node_idx = 0; node_idx < cgraph->n_nodes; node_idx++) { + ggml_tensor * node = cgraph->nodes[node_idx]; + matched_graph->ggml_graph_properties[node_idx].node_address = node->data; + matched_graph->ggml_graph_properties[node_idx].node_op = node->op; + + for (int dim = 0; dim < GGML_MAX_DIMS; dim++) { + matched_graph->ggml_graph_properties[node_idx].ne[dim] = node->ne[dim]; + matched_graph->ggml_graph_properties[node_idx].nb[dim] = node->nb[dim]; + } + for (int src = 0; src < GGML_MAX_SRC; src++) { + matched_graph->ggml_graph_properties[node_idx].src_address[src] = + node->src[src] ? node->src[src]->data : nullptr; + } + memcpy(matched_graph->ggml_graph_properties[node_idx].op_params, node->op_params, GGML_MAX_OP_PARAMS); } - memcpy(cann_ctx->cann_graph->ggml_graph_properties[node_idx].op_params, node->op_params, GGML_MAX_OP_PARAMS); + + cann_ctx->graph_lru_cache.push(matched_graph); + } else { + cann_ctx->graph_lru_cache.move_to_front(matched_graph); } + return; } /** @@ -2209,21 +2219,29 @@ static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_gra * @return true if an update is required; false otherwise. */ static bool is_cann_graph_update_required(ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph) { - // The number of nodes is different, so the graph needs to be reconstructed. - if (cann_ctx->cann_graph->ggml_graph_properties.size() != (size_t)cgraph->n_nodes) { - cann_ctx->cann_graph->ggml_graph_properties.resize(cgraph->n_nodes); - return true; - } - - // The number of nodes is the same; iterate over each node to check whether they match. - for (int i = 0; i < cgraph->n_nodes; i++) { - bool has_matching_properties = ggml_graph_node_has_matching_properties( - cgraph->nodes[i], &cann_ctx->cann_graph->ggml_graph_properties[i]); - if(!has_matching_properties) { - return true; + ggml_cann_graph_lru_cache &lru_cache = cann_ctx->graph_lru_cache; + for (auto &graph_ptr : lru_cache.cache_list) { + // The number of nodes is different, so the graph needs to be reconstructed. + if (graph_ptr->ggml_graph_properties.size() != (size_t)cgraph->n_nodes) { + continue; + } + // The number of nodes is the same; iterate over each node to check whether they match. + bool all_match = true; + for (int i = 0; i < cgraph->n_nodes; i++) { + bool has_matching_properties = ggml_graph_node_has_matching_properties( + cgraph->nodes[i], &graph_ptr->ggml_graph_properties[i]); + if(!has_matching_properties) { + all_match = false; + break; + } } + if (all_match) { + lru_cache.matched_graph = graph_ptr; + return false; + } } - return false; + lru_cache.matched_graph = nullptr; + return true; } #endif // USE_ACL_GRAPH @@ -2244,14 +2262,13 @@ static void evaluate_and_capture_cann_graph(ggml_backend_cann_context * cann_ctx bool & use_cann_graph, bool & cann_graph_update_required) { #ifdef USE_ACL_GRAPH if (use_cann_graph && cann_graph_update_required) { - if (cann_ctx->cann_graph->graph != nullptr) { - ACL_CHECK(aclmdlRIDestroy(cann_ctx->cann_graph->graph)); - cann_ctx->cann_graph->graph = nullptr; + if (cann_ctx->graph_lru_cache.matched_graph->graph != nullptr) { + ACL_CHECK(aclmdlRIDestroy(cann_ctx->graph_lru_cache.matched_graph->graph)); + cann_ctx->graph_lru_cache.matched_graph->graph = nullptr; } ACL_CHECK(aclmdlRICaptureBegin(cann_ctx->stream(), ACL_MODEL_RI_CAPTURE_MODE_GLOBAL)); } #endif // USE_ACL_GRAPH - // Only perform the graph execution if CANN graphs are not enabled, or we are capturing the graph. // With the use of CANN graphs, the execution will be performed by the graph launch. if (!use_cann_graph || cann_graph_update_required) { @@ -2272,12 +2289,12 @@ static void evaluate_and_capture_cann_graph(ggml_backend_cann_context * cann_ctx #ifdef USE_ACL_GRAPH if (use_cann_graph && cann_graph_update_required) { // End CANN graph capture - ACL_CHECK(aclmdlRICaptureEnd(cann_ctx->stream(), &cann_ctx->cann_graph->graph)); + ACL_CHECK(aclmdlRICaptureEnd(cann_ctx->stream(), &cann_ctx->graph_lru_cache.matched_graph->graph)); } if (use_cann_graph) { // Execute graph - ACL_CHECK(aclmdlRIExecuteAsync(cann_ctx->cann_graph->graph, cann_ctx->stream())); + ACL_CHECK(aclmdlRIExecuteAsync(cann_ctx->graph_lru_cache.matched_graph->graph, cann_ctx->stream())); } #endif // USE_ACL_GRAPH } @@ -2311,11 +2328,7 @@ static enum ggml_status ggml_backend_cann_graph_compute( } if (use_cann_graph) { - if (cann_ctx->cann_graph == nullptr) { - cann_ctx->cann_graph.reset(new ggml_cann_graph()); - cann_graph_update_required = true; - } - + // TODO: refactor to lru_cache cann_graph_update_required = is_cann_graph_update_required(cann_ctx, cgraph); set_ggml_graph_node_properties(cann_ctx, cgraph); } @@ -2323,7 +2336,6 @@ static enum ggml_status ggml_backend_cann_graph_compute( bool use_cann_graph = false; bool cann_graph_update_required = false; #endif // USE_ACL_GRAPH - evaluate_and_capture_cann_graph( cann_ctx, cgraph, From 4c9b10a45b505c099710995dafd79c1e5e6c7e7f Mon Sep 17 00:00:00 2001 From: noemotiovon <757486878@qq.com> Date: Fri, 5 Sep 2025 09:22:04 +0000 Subject: [PATCH 2/5] fix typo --- ggml/src/ggml-cann/ggml-cann.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index 686af24a86d5d..19b3bd0b06bc5 100755 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -2238,7 +2238,7 @@ static bool is_cann_graph_update_required(ggml_backend_cann_context * cann_ctx, if (all_match) { lru_cache.matched_graph = graph_ptr; return false; - } + } } lru_cache.matched_graph = nullptr; return true; From 15b4ff7a067bc581c1ead8218e7fafa604afbfa4 Mon Sep 17 00:00:00 2001 From: noemotiovon <757486878@qq.com> Date: Mon, 8 Sep 2025 02:17:11 +0000 Subject: [PATCH 3/5] The LRU cache capacity can be configured via an env variable Signed-off-by: noemotiovon <757486878@qq.com> --- docs/backend/CANN.md | 4 ++++ ggml/src/ggml-cann/common.h | 11 ++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/docs/backend/CANN.md b/docs/backend/CANN.md index 357253f43a0ce..35b189bb9558f 100755 --- a/docs/backend/CANN.md +++ b/docs/backend/CANN.md @@ -314,3 +314,7 @@ Converting the matmul weight format from ND to NZ to improve performance. Enable ### GGML_CANN_ACL_GRAPH Operators are executed using ACL graph execution, rather than in op-by-op (eager) mode. Enabled by default. + +### GGML_CANN_GRAPH_CACHE_CAPACITY + +Maximum number of compiled CANN graphs kept in the LRU cache, default is 12. When the number of cached graphs exceeds this capacity, the least recently used graph will be evicted. diff --git a/ggml/src/ggml-cann/common.h b/ggml/src/ggml-cann/common.h index 5ec58491bb949..6c4c07f15a4e3 100755 --- a/ggml/src/ggml-cann/common.h +++ b/ggml/src/ggml-cann/common.h @@ -368,12 +368,21 @@ struct ggml_cann_graph { * move existing graphs to the front (most recently used), and clear the cache. */ struct ggml_cann_graph_lru_cache { - size_t capacity = 12; /**< Maximum number of graphs in the cache. */ + size_t capacity; /**< Maximum number of graphs in the cache. */ std::list> cache_list; /**< List storing cached graphs. */ std::shared_ptr matched_graph = nullptr; /**< Pointer to a recently matched graph. */ + ggml_cann_graph_lru_cache() { + std::string env_val = get_env("GGML_CANN_GRAPH_CACHE_CAPACITY").value_or("12"); + try { + capacity = std::stoul(env_val); + } catch (...) { + capacity = 12; // fallback to default if invalid + } + } + /** * @brief Push a new graph to the front of the cache. * If the cache exceeds capacity, the least recently used graph is removed. From 81aa674052e1495a223cf4e53a7892b6fc40a0f8 Mon Sep 17 00:00:00 2001 From: noemotiovon <757486878@qq.com> Date: Tue, 9 Sep 2025 03:37:28 +0000 Subject: [PATCH 4/5] refactory acl graph --- ggml/src/ggml-cann/common.h | 35 ++++---- ggml/src/ggml-cann/ggml-cann.cpp | 135 ++++++++++++++++++++----------- 2 files changed, 105 insertions(+), 65 deletions(-) diff --git a/ggml/src/ggml-cann/common.h b/ggml/src/ggml-cann/common.h index 6c4c07f15a4e3..17d7dbc75c244 100755 --- a/ggml/src/ggml-cann/common.h +++ b/ggml/src/ggml-cann/common.h @@ -107,6 +107,7 @@ int32_t ggml_cann_get_device(); std::optional get_env(const std::string& name); bool parse_bool(const std::string& value); +int parse_integer(const std::string& value); /** * @brief Abstract base class for memory pools used by CANN. @@ -351,7 +352,7 @@ struct ggml_graph_node_properties { struct ggml_cann_graph { ~ggml_cann_graph() { if (graph != nullptr) { - aclmdlRIDestroy(graph); + ACL_CHECK(aclmdlRIDestroy(graph)); } } @@ -370,50 +371,48 @@ struct ggml_cann_graph { struct ggml_cann_graph_lru_cache { size_t capacity; /**< Maximum number of graphs in the cache. */ - std::list> cache_list; /**< List storing cached graphs. */ - - std::shared_ptr matched_graph = nullptr; /**< Pointer to a recently matched graph. */ + std::list cache_list; /**< List storing cached graphs as raw pointers. */ ggml_cann_graph_lru_cache() { - std::string env_val = get_env("GGML_CANN_GRAPH_CACHE_CAPACITY").value_or("12"); - try { - capacity = std::stoul(env_val); - } catch (...) { - capacity = 12; // fallback to default if invalid - } + capacity = parse_integer(get_env("GGML_CANN_GRAPH_CACHE_CAPACITY").value_or("12")); } /** * @brief Push a new graph to the front of the cache. - * If the cache exceeds capacity, the least recently used graph is removed. - * @param new_node Shared pointer to the new ggml_cann_graph to cache. + * If the cache exceeds capacity, the least recently used graph is deleted. + * @param new_node Pointer to the new ggml_cann_graph to cache. + * Ownership is transferred to the cache (cache will delete it). */ - void push(std::shared_ptr new_node) { + void push(ggml_cann_graph* new_node) { if (cache_list.size() >= capacity) { + ggml_cann_graph* old = cache_list.back(); cache_list.pop_back(); + delete old; // free the old graph } - cache_list.push_front(new_node); } /** * @brief Move an existing graph to the front of the cache. - * @param node Shared pointer to the ggml_cann_graph to move. + * @param node Pointer to the ggml_cann_graph to move. */ - void move_to_front(std::shared_ptr node) { + void move_to_front(ggml_cann_graph* node) { cache_list.remove(node); cache_list.push_front(node); } /** - * @brief Clear all graphs from the cache. + * @brief Clear all graphs from the cache (also frees memory). */ void clear() { + for (auto ptr : cache_list) { + delete ptr; + } cache_list.clear(); } /** - * @brief Destructor that clears the cache upon object destruction. + * @brief Destructor that clears the cache and frees all cached graphs. */ ~ggml_cann_graph_lru_cache() { clear(); diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index 19b3bd0b06bc5..c14bf84d1b882 100755 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -116,6 +116,24 @@ bool parse_bool(const std::string& value) { return valid_values.find(value) != valid_values.end(); } +/** + * @brief Parse a string as an integer, returning 0 if invalid. + * + * This function attempts to convert the input string `value` to an `int`. + * If the string is not a valid integer or is out of the `int` range, + * it returns 0. + * + * @param value The string to parse. + * @return The parsed integer, or 0 if conversion fails. + */ +int parse_integer(const std::string& value) { + try { + return std::stoi(value); + } catch (...) { + return 0; + } +} + /** * @brief Initialize the CANN device information. * @@ -2134,37 +2152,56 @@ static void ggml_backend_cann_synchronize(ggml_backend_t backend) { * @brief Populate the internal CANN graph node properties from the ggml computation graph. * * This function copies all node attributes (operation type, dimensions, strides, input sources, - * and operation parameters) into the cached CANN graph structure for later reuse or comparison. + * and operation parameters) into a CANN graph structure. The graph is then managed by the + * LRU cache in the CANN context. * - * @param cann_ctx The CANN backend context. - * @param cgraph The ggml computational graph. + * Key behavior: + * - If `matched_graph` is nullptr, a new ggml_cann_graph is created, initialized with + * the properties of the current ggml graph, and pushed into the cache. + * - If `matched_graph` is not nullptr, it is moved to the front of the cache. + * + * This ensures that the **first element of the cache list always points to the current + * matched graph or the newly created graph**, allowing quick access and reuse. + * + * @param cann_ctx The CANN backend context containing the graph cache. + * @param cgraph The current ggml computation graph. + * @param matched_graph A pointer to the matched CANN graph. If null, a new graph will + * be created and pushed into the cache. The cache takes ownership + * and will delete the graph when evicted. */ -static void set_ggml_graph_node_properties(ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph) { - std::shared_ptr &matched_graph = cann_ctx->graph_lru_cache.matched_graph; +static void set_lru_matched_graph_node_properties( + ggml_backend_cann_context * cann_ctx, + ggml_cgraph * cgraph, + ggml_cann_graph * matched_graph) { + if (!matched_graph) { - matched_graph.reset(new ggml_cann_graph()); - matched_graph->ggml_graph_properties.resize(cgraph->n_nodes); - for (int node_idx = 0; node_idx < cgraph->n_nodes; node_idx++) { + // Create a new ggml_cann_graph object on the heap (its lifetime is managed by the cache). + ggml_cann_graph * new_graph = new ggml_cann_graph(); + new_graph->ggml_graph_properties.resize(cgraph->n_nodes); + + for (int node_idx = 0; node_idx < cgraph->n_nodes; ++node_idx) { ggml_tensor * node = cgraph->nodes[node_idx]; - matched_graph->ggml_graph_properties[node_idx].node_address = node->data; - matched_graph->ggml_graph_properties[node_idx].node_op = node->op; + auto & prop = new_graph->ggml_graph_properties[node_idx]; - for (int dim = 0; dim < GGML_MAX_DIMS; dim++) { - matched_graph->ggml_graph_properties[node_idx].ne[dim] = node->ne[dim]; - matched_graph->ggml_graph_properties[node_idx].nb[dim] = node->nb[dim]; - } - for (int src = 0; src < GGML_MAX_SRC; src++) { - matched_graph->ggml_graph_properties[node_idx].src_address[src] = - node->src[src] ? node->src[src]->data : nullptr; + prop.node_address = node->data; + prop.node_op = node->op; + + std::copy_n(node->ne, GGML_MAX_DIMS, prop.ne); + std::copy_n(node->nb, GGML_MAX_DIMS, prop.nb); + + for (int src = 0; src < GGML_MAX_SRC; ++src) { + prop.src_address[src] = node->src[src] ? node->src[src]->data : nullptr; } - memcpy(matched_graph->ggml_graph_properties[node_idx].op_params, node->op_params, GGML_MAX_OP_PARAMS); + + memcpy(prop.op_params, node->op_params, GGML_MAX_OP_PARAMS); } - cann_ctx->graph_lru_cache.push(matched_graph); + // Insert into the LRU cache (cache takes ownership and will delete it when evicted). + cann_ctx->graph_lru_cache.push(new_graph); } else { + // Cast to the proper type and move to the front of the cache. cann_ctx->graph_lru_cache.move_to_front(matched_graph); } - return; } /** @@ -2209,39 +2246,43 @@ static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_gra } /** - * @brief Determine if the CANN graph needs to be rebuilt due to graph changes. + * @brief Find a cached CANN graph that matches the current ggml graph. + * + * This function checks the cached CANN graphs in the LRU cache to determine + * whether there is an existing graph whose node properties match the current + * ggml computation graph. If a matching graph is found, it can be reused + * without rebuilding; otherwise, a new CANN graph must be captured. * - * This checks whether the number or properties of ggml graph nodes have changed - * compared to the last captured CANN graph. If so, the CANN graph must be re-captured. + * The comparison includes the number of nodes and their properties, such as + * operation type, dimensions, strides, inputs, and operation parameters. * - * @param cann_ctx The CANN backend context. + * @param cann_ctx The CANN backend context containing the graph cache. * @param cgraph The current ggml computation graph. - * @return true if an update is required; false otherwise. + * @return Pointer to a matching ggml_cann_graph if found; nullptr otherwise. */ -static bool is_cann_graph_update_required(ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph) { +static ggml_cann_graph* get_matched_graph(ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph) { ggml_cann_graph_lru_cache &lru_cache = cann_ctx->graph_lru_cache; for (auto &graph_ptr : lru_cache.cache_list) { - // The number of nodes is different, so the graph needs to be reconstructed. - if (graph_ptr->ggml_graph_properties.size() != (size_t)cgraph->n_nodes) { + // Skip graphs with a different number of nodes. + if (graph_ptr->ggml_graph_properties.size() != static_cast(cgraph->n_nodes)) { continue; } - // The number of nodes is the same; iterate over each node to check whether they match. + + // Check if all nodes match. bool all_match = true; - for (int i = 0; i < cgraph->n_nodes; i++) { - bool has_matching_properties = ggml_graph_node_has_matching_properties( - cgraph->nodes[i], &graph_ptr->ggml_graph_properties[i]); - if(!has_matching_properties) { + for (int i = 0; i < cgraph->n_nodes; ++i) { + if (!ggml_graph_node_has_matching_properties(cgraph->nodes[i], &graph_ptr->ggml_graph_properties[i])) { all_match = false; break; } } + if (all_match) { - lru_cache.matched_graph = graph_ptr; - return false; + return graph_ptr; } } - lru_cache.matched_graph = nullptr; - return true; + + return nullptr; } #endif // USE_ACL_GRAPH @@ -2259,13 +2300,10 @@ static bool is_cann_graph_update_required(ggml_backend_cann_context * cann_ctx, * @param cann_graph_update_required Whether graph capture is needed due to graph changes. */ static void evaluate_and_capture_cann_graph(ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph, - bool & use_cann_graph, bool & cann_graph_update_required) { + bool & use_cann_graph, bool & cann_graph_update_required) { #ifdef USE_ACL_GRAPH + ggml_cann_graph* matched_graph = cann_ctx->graph_lru_cache.cache_list.front(); if (use_cann_graph && cann_graph_update_required) { - if (cann_ctx->graph_lru_cache.matched_graph->graph != nullptr) { - ACL_CHECK(aclmdlRIDestroy(cann_ctx->graph_lru_cache.matched_graph->graph)); - cann_ctx->graph_lru_cache.matched_graph->graph = nullptr; - } ACL_CHECK(aclmdlRICaptureBegin(cann_ctx->stream(), ACL_MODEL_RI_CAPTURE_MODE_GLOBAL)); } #endif // USE_ACL_GRAPH @@ -2289,12 +2327,12 @@ static void evaluate_and_capture_cann_graph(ggml_backend_cann_context * cann_ctx #ifdef USE_ACL_GRAPH if (use_cann_graph && cann_graph_update_required) { // End CANN graph capture - ACL_CHECK(aclmdlRICaptureEnd(cann_ctx->stream(), &cann_ctx->graph_lru_cache.matched_graph->graph)); + ACL_CHECK(aclmdlRICaptureEnd(cann_ctx->stream(), &matched_graph->graph)); } if (use_cann_graph) { // Execute graph - ACL_CHECK(aclmdlRIExecuteAsync(cann_ctx->graph_lru_cache.matched_graph->graph, cann_ctx->stream())); + ACL_CHECK(aclmdlRIExecuteAsync(matched_graph->graph, cann_ctx->stream())); } #endif // USE_ACL_GRAPH } @@ -2322,15 +2360,18 @@ static enum ggml_status ggml_backend_cann_graph_compute( #ifdef USE_ACL_GRAPH bool use_cann_graph = true; bool cann_graph_update_required = false; + ggml_cann_graph * matched_graph = nullptr; if (!cann_ctx->acl_graph_mode) { use_cann_graph = false; } if (use_cann_graph) { - // TODO: refactor to lru_cache - cann_graph_update_required = is_cann_graph_update_required(cann_ctx, cgraph); - set_ggml_graph_node_properties(cann_ctx, cgraph); + matched_graph = get_matched_graph(cann_ctx, cgraph); + if (!matched_graph) { + cann_graph_update_required = true; + } + set_lru_matched_graph_node_properties(cann_ctx, cgraph, matched_graph); } #else bool use_cann_graph = false; From d91cefcf140f453a732eec07a8d5e684d8ebdce6 Mon Sep 17 00:00:00 2001 From: noemotiovon <757486878@qq.com> Date: Wed, 10 Sep 2025 06:27:17 +0000 Subject: [PATCH 5/5] refactory && fix review comments Signed-off-by: noemotiovon <757486878@qq.com> --- ggml/src/ggml-cann/ggml-cann.cpp | 108 +++++++++++++++---------------- 1 file changed, 52 insertions(+), 56 deletions(-) diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index c14bf84d1b882..d3e3f07f09cb6 100755 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -2149,59 +2149,52 @@ static void ggml_backend_cann_synchronize(ggml_backend_t backend) { #ifdef USE_ACL_GRAPH /** - * @brief Populate the internal CANN graph node properties from the ggml computation graph. + * @brief Add a new CANN graph to the LRU cache by populating node properties from the ggml graph. * - * This function copies all node attributes (operation type, dimensions, strides, input sources, - * and operation parameters) into a CANN graph structure. The graph is then managed by the - * LRU cache in the CANN context. + * This function creates a new ggml_cann_graph object and fills its node properties + * (operation type, dimensions, strides, input sources, and operation parameters) + * based on the current ggml computation graph. * - * Key behavior: - * - If `matched_graph` is nullptr, a new ggml_cann_graph is created, initialized with - * the properties of the current ggml graph, and pushed into the cache. - * - If `matched_graph` is not nullptr, it is moved to the front of the cache. + * Each node in the ggml graph is mapped to a property entry in the new CANN graph: + * - node address + * - operation type + * - shape (ne) and strides (nb) + * - source tensor addresses + * - operation parameters * - * This ensures that the **first element of the cache list always points to the current - * matched graph or the newly created graph**, allowing quick access and reuse. + * After initialization, the new graph is pushed into the LRU cache owned by the + * CANN backend context. The cache takes ownership of the graph and manages its + * lifetime (including deletion upon eviction). * - * @param cann_ctx The CANN backend context containing the graph cache. - * @param cgraph The current ggml computation graph. - * @param matched_graph A pointer to the matched CANN graph. If null, a new graph will - * be created and pushed into the cache. The cache takes ownership - * and will delete the graph when evicted. + * @param cann_ctx The CANN backend context containing the graph cache. + * @param cgraph The current ggml computation graph. */ -static void set_lru_matched_graph_node_properties( +static void add_lru_matched_graph_node_properties( ggml_backend_cann_context * cann_ctx, - ggml_cgraph * cgraph, - ggml_cann_graph * matched_graph) { - - if (!matched_graph) { - // Create a new ggml_cann_graph object on the heap (its lifetime is managed by the cache). - ggml_cann_graph * new_graph = new ggml_cann_graph(); - new_graph->ggml_graph_properties.resize(cgraph->n_nodes); + ggml_cgraph * cgraph) { + // Create a new ggml_cann_graph object on the heap (its lifetime is managed by the cache). + ggml_cann_graph * new_graph = new ggml_cann_graph(); + new_graph->ggml_graph_properties.resize(cgraph->n_nodes); - for (int node_idx = 0; node_idx < cgraph->n_nodes; ++node_idx) { - ggml_tensor * node = cgraph->nodes[node_idx]; - auto & prop = new_graph->ggml_graph_properties[node_idx]; + for (int node_idx = 0; node_idx < cgraph->n_nodes; ++node_idx) { + ggml_tensor * node = cgraph->nodes[node_idx]; + auto & prop = new_graph->ggml_graph_properties[node_idx]; - prop.node_address = node->data; - prop.node_op = node->op; + prop.node_address = node->data; + prop.node_op = node->op; - std::copy_n(node->ne, GGML_MAX_DIMS, prop.ne); - std::copy_n(node->nb, GGML_MAX_DIMS, prop.nb); - - for (int src = 0; src < GGML_MAX_SRC; ++src) { - prop.src_address[src] = node->src[src] ? node->src[src]->data : nullptr; - } + std::copy_n(node->ne, GGML_MAX_DIMS, prop.ne); + std::copy_n(node->nb, GGML_MAX_DIMS, prop.nb); - memcpy(prop.op_params, node->op_params, GGML_MAX_OP_PARAMS); + for (int src = 0; src < GGML_MAX_SRC; ++src) { + prop.src_address[src] = node->src[src] ? node->src[src]->data : nullptr; } - // Insert into the LRU cache (cache takes ownership and will delete it when evicted). - cann_ctx->graph_lru_cache.push(new_graph); - } else { - // Cast to the proper type and move to the front of the cache. - cann_ctx->graph_lru_cache.move_to_front(matched_graph); + memcpy(prop.op_params, node->op_params, GGML_MAX_OP_PARAMS); } + + // Insert into the LRU cache (cache takes ownership and will delete it when evicted). + cann_ctx->graph_lru_cache.push(new_graph); } /** @@ -2246,21 +2239,22 @@ static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_gra } /** - * @brief Find a cached CANN graph that matches the current ggml graph. + * @brief Check whether there is a cached CANN graph that matches the current ggml graph. * - * This function checks the cached CANN graphs in the LRU cache to determine - * whether there is an existing graph whose node properties match the current - * ggml computation graph. If a matching graph is found, it can be reused - * without rebuilding; otherwise, a new CANN graph must be captured. + * This function iterates through the cached CANN graphs stored in the LRU cache and + * compares them against the given ggml computation graph. A match requires that the + * number of nodes is the same and that each node’s properties (operation type, + * dimensions, strides, inputs, and operation parameters) are identical. * - * The comparison includes the number of nodes and their properties, such as - * operation type, dimensions, strides, inputs, and operation parameters. + * If a matching graph is found, it is promoted to the front of the LRU cache and the + * function returns true. Otherwise, the function returns false, indicating that a new + * CANN graph needs to be captured. * * @param cann_ctx The CANN backend context containing the graph cache. * @param cgraph The current ggml computation graph. - * @return Pointer to a matching ggml_cann_graph if found; nullptr otherwise. + * @return true if a matching cached graph exists; false otherwise. */ -static ggml_cann_graph* get_matched_graph(ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph) { +static bool is_matched_graph(ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph) { ggml_cann_graph_lru_cache &lru_cache = cann_ctx->graph_lru_cache; for (auto &graph_ptr : lru_cache.cache_list) { // Skip graphs with a different number of nodes. @@ -2278,11 +2272,13 @@ static ggml_cann_graph* get_matched_graph(ggml_backend_cann_context * cann_ctx, } if (all_match) { - return graph_ptr; + // update cache_list && renturn graph_ptr + lru_cache.move_to_front(graph_ptr); + return true; } } - return nullptr; + return false; } #endif // USE_ACL_GRAPH @@ -2360,18 +2356,18 @@ static enum ggml_status ggml_backend_cann_graph_compute( #ifdef USE_ACL_GRAPH bool use_cann_graph = true; bool cann_graph_update_required = false; - ggml_cann_graph * matched_graph = nullptr; if (!cann_ctx->acl_graph_mode) { use_cann_graph = false; } if (use_cann_graph) { - matched_graph = get_matched_graph(cann_ctx, cgraph); - if (!matched_graph) { - cann_graph_update_required = true; + // If no matching graph is found, the graph needs to be recaptured. + cann_graph_update_required = !is_matched_graph(cann_ctx, cgraph); + if (cann_graph_update_required) { + // If no matching graph is found, add a new ACL graph. + add_lru_matched_graph_node_properties(cann_ctx, cgraph); } - set_lru_matched_graph_node_properties(cann_ctx, cgraph, matched_graph); } #else bool use_cann_graph = false;