Skip to content

Commit 28b5f19

Browse files
authored
CANN: implement LRU cache for ACL graphs (#15814)
* CANN: implement LRU cache for ACL graphs in CANN backend - Introduce ggml_cann_graph_lru_cache to store multiple ggml_cann_graph objects. - Graphs are loaded on demand and evicted using LRU policy when capacity is exceeded. - Updated push, move_to_front, and clear methods to manage cached graphs efficiently. - Ensures reuse of graphs, reducing graph reconstruction overhead in CANN backend. * fix typo * The LRU cache capacity can be configured via an env variable Signed-off-by: noemotiovon <[email protected]> * refactory acl graph * refactory && fix review comments Signed-off-by: noemotiovon <[email protected]> --------- Signed-off-by: noemotiovon <[email protected]>
1 parent 86587da commit 28b5f19

File tree

3 files changed

+164
-51
lines changed

3 files changed

+164
-51
lines changed

docs/backend/CANN.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,3 +314,7 @@ Converting the matmul weight format from ND to NZ to improve performance. Enable
314314
### GGML_CANN_ACL_GRAPH
315315

316316
Operators are executed using ACL graph execution, rather than in op-by-op (eager) mode. Enabled by default.
317+
318+
### GGML_CANN_GRAPH_CACHE_CAPACITY
319+
320+
Maximum number of compiled CANN graphs kept in the LRU cache, default is 12. When the number of cached graphs exceeds this capacity, the least recently used graph will be evicted.

ggml/src/ggml-cann/common.h

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include <unistd.h>
3939
#include <functional>
4040
#include <optional>
41+
#include <list>
4142

4243
#include "../include/ggml-cann.h"
4344
#include "../include/ggml.h"
@@ -106,6 +107,7 @@ int32_t ggml_cann_get_device();
106107

107108
std::optional<std::string> get_env(const std::string& name);
108109
bool parse_bool(const std::string& value);
110+
int parse_integer(const std::string& value);
109111

110112
/**
111113
* @brief Abstract base class for memory pools used by CANN.
@@ -350,14 +352,72 @@ struct ggml_graph_node_properties {
350352
struct ggml_cann_graph {
351353
~ggml_cann_graph() {
352354
if (graph != nullptr) {
353-
aclmdlRIDestroy(graph);
355+
ACL_CHECK(aclmdlRIDestroy(graph));
354356
}
355357
}
356358

357359
aclmdlRI graph = nullptr;
358360

359361
std::vector<ggml_graph_node_properties> ggml_graph_properties;
360362
};
363+
364+
/**
365+
* @brief LRU cache for managing ggml_cann_graph objects.
366+
*
367+
* This class maintains a list of shared_ptr to ggml_cann_graph objects
368+
* and enforces a maximum capacity. It provides methods to push new graphs,
369+
* move existing graphs to the front (most recently used), and clear the cache.
370+
*/
371+
struct ggml_cann_graph_lru_cache {
372+
size_t capacity; /**< Maximum number of graphs in the cache. */
373+
374+
std::list<ggml_cann_graph*> cache_list; /**< List storing cached graphs as raw pointers. */
375+
376+
ggml_cann_graph_lru_cache() {
377+
capacity = parse_integer(get_env("GGML_CANN_GRAPH_CACHE_CAPACITY").value_or("12"));
378+
}
379+
380+
/**
381+
* @brief Push a new graph to the front of the cache.
382+
* If the cache exceeds capacity, the least recently used graph is deleted.
383+
* @param new_node Pointer to the new ggml_cann_graph to cache.
384+
* Ownership is transferred to the cache (cache will delete it).
385+
*/
386+
void push(ggml_cann_graph* new_node) {
387+
if (cache_list.size() >= capacity) {
388+
ggml_cann_graph* old = cache_list.back();
389+
cache_list.pop_back();
390+
delete old; // free the old graph
391+
}
392+
cache_list.push_front(new_node);
393+
}
394+
395+
/**
396+
* @brief Move an existing graph to the front of the cache.
397+
* @param node Pointer to the ggml_cann_graph to move.
398+
*/
399+
void move_to_front(ggml_cann_graph* node) {
400+
cache_list.remove(node);
401+
cache_list.push_front(node);
402+
}
403+
404+
/**
405+
* @brief Clear all graphs from the cache (also frees memory).
406+
*/
407+
void clear() {
408+
for (auto ptr : cache_list) {
409+
delete ptr;
410+
}
411+
cache_list.clear();
412+
}
413+
414+
/**
415+
* @brief Destructor that clears the cache and frees all cached graphs.
416+
*/
417+
~ggml_cann_graph_lru_cache() {
418+
clear();
419+
}
420+
};
361421
#endif // USE_ACL_GRAPH
362422

363423
struct ggml_cann_rope_cache {
@@ -394,7 +454,7 @@ struct ggml_backend_cann_context {
394454
aclrtEvent copy_event = nullptr; /**< Event for managing copy operations. */
395455
#ifdef USE_ACL_GRAPH
396456
/// Cached CANN ACL graph used for executing the current ggml computation graph.
397-
std::unique_ptr<ggml_cann_graph> cann_graph;
457+
ggml_cann_graph_lru_cache graph_lru_cache;
398458
bool acl_graph_mode = true;
399459
#endif
400460
cann_task_queue task_queue;

ggml/src/ggml-cann/ggml-cann.cpp

Lines changed: 98 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,24 @@ bool parse_bool(const std::string& value) {
116116
return valid_values.find(value) != valid_values.end();
117117
}
118118

119+
/**
120+
* @brief Parse a string as an integer, returning 0 if invalid.
121+
*
122+
* This function attempts to convert the input string `value` to an `int`.
123+
* If the string is not a valid integer or is out of the `int` range,
124+
* it returns 0.
125+
*
126+
* @param value The string to parse.
127+
* @return The parsed integer, or 0 if conversion fails.
128+
*/
129+
int parse_integer(const std::string& value) {
130+
try {
131+
return std::stoi(value);
132+
} catch (...) {
133+
return 0;
134+
}
135+
}
136+
119137
/**
120138
* @brief Initialize the CANN device information.
121139
*
@@ -2131,30 +2149,52 @@ static void ggml_backend_cann_synchronize(ggml_backend_t backend) {
21312149

21322150
#ifdef USE_ACL_GRAPH
21332151
/**
2134-
* @brief Populate the internal CANN graph node properties from the ggml computation graph.
2152+
* @brief Add a new CANN graph to the LRU cache by populating node properties from the ggml graph.
2153+
*
2154+
* This function creates a new ggml_cann_graph object and fills its node properties
2155+
* (operation type, dimensions, strides, input sources, and operation parameters)
2156+
* based on the current ggml computation graph.
21352157
*
2136-
* This function copies all node attributes (operation type, dimensions, strides, input sources,
2137-
* and operation parameters) into the cached CANN graph structure for later reuse or comparison.
2158+
* Each node in the ggml graph is mapped to a property entry in the new CANN graph:
2159+
* - node address
2160+
* - operation type
2161+
* - shape (ne) and strides (nb)
2162+
* - source tensor addresses
2163+
* - operation parameters
21382164
*
2139-
* @param cann_ctx The CANN backend context.
2140-
* @param cgraph The ggml computational graph.
2165+
* After initialization, the new graph is pushed into the LRU cache owned by the
2166+
* CANN backend context. The cache takes ownership of the graph and manages its
2167+
* lifetime (including deletion upon eviction).
2168+
*
2169+
* @param cann_ctx The CANN backend context containing the graph cache.
2170+
* @param cgraph The current ggml computation graph.
21412171
*/
2142-
static void set_ggml_graph_node_properties(ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph) {
2143-
for (int node_idx = 0; node_idx < cgraph->n_nodes; node_idx++) {
2172+
static void add_lru_matched_graph_node_properties(
2173+
ggml_backend_cann_context * cann_ctx,
2174+
ggml_cgraph * cgraph) {
2175+
// Create a new ggml_cann_graph object on the heap (its lifetime is managed by the cache).
2176+
ggml_cann_graph * new_graph = new ggml_cann_graph();
2177+
new_graph->ggml_graph_properties.resize(cgraph->n_nodes);
2178+
2179+
for (int node_idx = 0; node_idx < cgraph->n_nodes; ++node_idx) {
21442180
ggml_tensor * node = cgraph->nodes[node_idx];
2145-
cann_ctx->cann_graph->ggml_graph_properties[node_idx].node_address = node->data;
2146-
cann_ctx->cann_graph->ggml_graph_properties[node_idx].node_op = node->op;
2181+
auto & prop = new_graph->ggml_graph_properties[node_idx];
21472182

2148-
for (int dim = 0; dim < GGML_MAX_DIMS; dim++) {
2149-
cann_ctx->cann_graph->ggml_graph_properties[node_idx].ne[dim] = node->ne[dim];
2150-
cann_ctx->cann_graph->ggml_graph_properties[node_idx].nb[dim] = node->nb[dim];
2151-
}
2152-
for (int src = 0; src < GGML_MAX_SRC; src++) {
2153-
cann_ctx->cann_graph->ggml_graph_properties[node_idx].src_address[src] =
2154-
node->src[src] ? node->src[src]->data : nullptr;
2183+
prop.node_address = node->data;
2184+
prop.node_op = node->op;
2185+
2186+
std::copy_n(node->ne, GGML_MAX_DIMS, prop.ne);
2187+
std::copy_n(node->nb, GGML_MAX_DIMS, prop.nb);
2188+
2189+
for (int src = 0; src < GGML_MAX_SRC; ++src) {
2190+
prop.src_address[src] = node->src[src] ? node->src[src]->data : nullptr;
21552191
}
2156-
memcpy(cann_ctx->cann_graph->ggml_graph_properties[node_idx].op_params, node->op_params, GGML_MAX_OP_PARAMS);
2192+
2193+
memcpy(prop.op_params, node->op_params, GGML_MAX_OP_PARAMS);
21572194
}
2195+
2196+
// Insert into the LRU cache (cache takes ownership and will delete it when evicted).
2197+
cann_ctx->graph_lru_cache.push(new_graph);
21582198
}
21592199

21602200
/**
@@ -2199,30 +2239,45 @@ static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_gra
21992239
}
22002240

22012241
/**
2202-
* @brief Determine if the CANN graph needs to be rebuilt due to graph changes.
2242+
* @brief Check whether there is a cached CANN graph that matches the current ggml graph.
2243+
*
2244+
* This function iterates through the cached CANN graphs stored in the LRU cache and
2245+
* compares them against the given ggml computation graph. A match requires that the
2246+
* number of nodes is the same and that each node’s properties (operation type,
2247+
* dimensions, strides, inputs, and operation parameters) are identical.
22032248
*
2204-
* This checks whether the number or properties of ggml graph nodes have changed
2205-
* compared to the last captured CANN graph. If so, the CANN graph must be re-captured.
2249+
* If a matching graph is found, it is promoted to the front of the LRU cache and the
2250+
* function returns true. Otherwise, the function returns false, indicating that a new
2251+
* CANN graph needs to be captured.
22062252
*
2207-
* @param cann_ctx The CANN backend context.
2253+
* @param cann_ctx The CANN backend context containing the graph cache.
22082254
* @param cgraph The current ggml computation graph.
2209-
* @return true if an update is required; false otherwise.
2210-
*/
2211-
static bool is_cann_graph_update_required(ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph) {
2212-
// The number of nodes is different, so the graph needs to be reconstructed.
2213-
if (cann_ctx->cann_graph->ggml_graph_properties.size() != (size_t)cgraph->n_nodes) {
2214-
cann_ctx->cann_graph->ggml_graph_properties.resize(cgraph->n_nodes);
2215-
return true;
2216-
}
2255+
* @return true if a matching cached graph exists; false otherwise.
2256+
*/
2257+
static bool is_matched_graph(ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph) {
2258+
ggml_cann_graph_lru_cache &lru_cache = cann_ctx->graph_lru_cache;
2259+
for (auto &graph_ptr : lru_cache.cache_list) {
2260+
// Skip graphs with a different number of nodes.
2261+
if (graph_ptr->ggml_graph_properties.size() != static_cast<size_t>(cgraph->n_nodes)) {
2262+
continue;
2263+
}
22172264

2218-
// The number of nodes is the same; iterate over each node to check whether they match.
2219-
for (int i = 0; i < cgraph->n_nodes; i++) {
2220-
bool has_matching_properties = ggml_graph_node_has_matching_properties(
2221-
cgraph->nodes[i], &cann_ctx->cann_graph->ggml_graph_properties[i]);
2222-
if(!has_matching_properties) {
2265+
// Check if all nodes match.
2266+
bool all_match = true;
2267+
for (int i = 0; i < cgraph->n_nodes; ++i) {
2268+
if (!ggml_graph_node_has_matching_properties(cgraph->nodes[i], &graph_ptr->ggml_graph_properties[i])) {
2269+
all_match = false;
2270+
break;
2271+
}
2272+
}
2273+
2274+
if (all_match) {
2275+
// update cache_list && renturn graph_ptr
2276+
lru_cache.move_to_front(graph_ptr);
22232277
return true;
22242278
}
22252279
}
2280+
22262281
return false;
22272282
}
22282283
#endif // USE_ACL_GRAPH
@@ -2241,17 +2296,13 @@ static bool is_cann_graph_update_required(ggml_backend_cann_context * cann_ctx,
22412296
* @param cann_graph_update_required Whether graph capture is needed due to graph changes.
22422297
*/
22432298
static void evaluate_and_capture_cann_graph(ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph,
2244-
bool & use_cann_graph, bool & cann_graph_update_required) {
2299+
bool & use_cann_graph, bool & cann_graph_update_required) {
22452300
#ifdef USE_ACL_GRAPH
2301+
ggml_cann_graph* matched_graph = cann_ctx->graph_lru_cache.cache_list.front();
22462302
if (use_cann_graph && cann_graph_update_required) {
2247-
if (cann_ctx->cann_graph->graph != nullptr) {
2248-
ACL_CHECK(aclmdlRIDestroy(cann_ctx->cann_graph->graph));
2249-
cann_ctx->cann_graph->graph = nullptr;
2250-
}
22512303
ACL_CHECK(aclmdlRICaptureBegin(cann_ctx->stream(), ACL_MODEL_RI_CAPTURE_MODE_GLOBAL));
22522304
}
22532305
#endif // USE_ACL_GRAPH
2254-
22552306
// Only perform the graph execution if CANN graphs are not enabled, or we are capturing the graph.
22562307
// With the use of CANN graphs, the execution will be performed by the graph launch.
22572308
if (!use_cann_graph || cann_graph_update_required) {
@@ -2272,12 +2323,12 @@ static void evaluate_and_capture_cann_graph(ggml_backend_cann_context * cann_ctx
22722323

22732324
#ifdef USE_ACL_GRAPH
22742325
if (use_cann_graph && cann_graph_update_required) { // End CANN graph capture
2275-
ACL_CHECK(aclmdlRICaptureEnd(cann_ctx->stream(), &cann_ctx->cann_graph->graph));
2326+
ACL_CHECK(aclmdlRICaptureEnd(cann_ctx->stream(), &matched_graph->graph));
22762327
}
22772328

22782329
if (use_cann_graph) {
22792330
// Execute graph
2280-
ACL_CHECK(aclmdlRIExecuteAsync(cann_ctx->cann_graph->graph, cann_ctx->stream()));
2331+
ACL_CHECK(aclmdlRIExecuteAsync(matched_graph->graph, cann_ctx->stream()));
22812332
}
22822333
#endif // USE_ACL_GRAPH
22832334
}
@@ -2311,19 +2362,17 @@ static enum ggml_status ggml_backend_cann_graph_compute(
23112362
}
23122363

23132364
if (use_cann_graph) {
2314-
if (cann_ctx->cann_graph == nullptr) {
2315-
cann_ctx->cann_graph.reset(new ggml_cann_graph());
2316-
cann_graph_update_required = true;
2365+
// If no matching graph is found, the graph needs to be recaptured.
2366+
cann_graph_update_required = !is_matched_graph(cann_ctx, cgraph);
2367+
if (cann_graph_update_required) {
2368+
// If no matching graph is found, add a new ACL graph.
2369+
add_lru_matched_graph_node_properties(cann_ctx, cgraph);
23172370
}
2318-
2319-
cann_graph_update_required = is_cann_graph_update_required(cann_ctx, cgraph);
2320-
set_ggml_graph_node_properties(cann_ctx, cgraph);
23212371
}
23222372
#else
23232373
bool use_cann_graph = false;
23242374
bool cann_graph_update_required = false;
23252375
#endif // USE_ACL_GRAPH
2326-
23272376
evaluate_and_capture_cann_graph(
23282377
cann_ctx,
23292378
cgraph,

0 commit comments

Comments
 (0)