@@ -116,6 +116,24 @@ bool parse_bool(const std::string& value) {
116
116
return valid_values.find (value) != valid_values.end ();
117
117
}
118
118
119
+ /* *
120
+ * @brief Parse a string as an integer, returning 0 if invalid.
121
+ *
122
+ * This function attempts to convert the input string `value` to an `int`.
123
+ * If the string is not a valid integer or is out of the `int` range,
124
+ * it returns 0.
125
+ *
126
+ * @param value The string to parse.
127
+ * @return The parsed integer, or 0 if conversion fails.
128
+ */
129
+ int parse_integer (const std::string& value) {
130
+ try {
131
+ return std::stoi (value);
132
+ } catch (...) {
133
+ return 0 ;
134
+ }
135
+ }
136
+
119
137
/* *
120
138
* @brief Initialize the CANN device information.
121
139
*
@@ -2131,30 +2149,52 @@ static void ggml_backend_cann_synchronize(ggml_backend_t backend) {
2131
2149
2132
2150
#ifdef USE_ACL_GRAPH
2133
2151
/* *
2134
- * @brief Populate the internal CANN graph node properties from the ggml computation graph.
2152
+ * @brief Add a new CANN graph to the LRU cache by populating node properties from the ggml graph.
2153
+ *
2154
+ * This function creates a new ggml_cann_graph object and fills its node properties
2155
+ * (operation type, dimensions, strides, input sources, and operation parameters)
2156
+ * based on the current ggml computation graph.
2135
2157
*
2136
- * This function copies all node attributes (operation type, dimensions, strides, input sources,
2137
- * and operation parameters) into the cached CANN graph structure for later reuse or comparison.
2158
+ * Each node in the ggml graph is mapped to a property entry in the new CANN graph:
2159
+ * - node address
2160
+ * - operation type
2161
+ * - shape (ne) and strides (nb)
2162
+ * - source tensor addresses
2163
+ * - operation parameters
2138
2164
*
2139
- * @param cann_ctx The CANN backend context.
2140
- * @param cgraph The ggml computational graph.
2165
+ * After initialization, the new graph is pushed into the LRU cache owned by the
2166
+ * CANN backend context. The cache takes ownership of the graph and manages its
2167
+ * lifetime (including deletion upon eviction).
2168
+ *
2169
+ * @param cann_ctx The CANN backend context containing the graph cache.
2170
+ * @param cgraph The current ggml computation graph.
2141
2171
*/
2142
- static void set_ggml_graph_node_properties (ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph) {
2143
- for (int node_idx = 0 ; node_idx < cgraph->n_nodes ; node_idx++) {
2172
+ static void add_lru_matched_graph_node_properties (
2173
+ ggml_backend_cann_context * cann_ctx,
2174
+ ggml_cgraph * cgraph) {
2175
+ // Create a new ggml_cann_graph object on the heap (its lifetime is managed by the cache).
2176
+ ggml_cann_graph * new_graph = new ggml_cann_graph ();
2177
+ new_graph->ggml_graph_properties .resize (cgraph->n_nodes );
2178
+
2179
+ for (int node_idx = 0 ; node_idx < cgraph->n_nodes ; ++node_idx) {
2144
2180
ggml_tensor * node = cgraph->nodes [node_idx];
2145
- cann_ctx->cann_graph ->ggml_graph_properties [node_idx].node_address = node->data ;
2146
- cann_ctx->cann_graph ->ggml_graph_properties [node_idx].node_op = node->op ;
2181
+ auto & prop = new_graph->ggml_graph_properties [node_idx];
2147
2182
2148
- for (int dim = 0 ; dim < GGML_MAX_DIMS; dim++) {
2149
- cann_ctx->cann_graph ->ggml_graph_properties [node_idx].ne [dim] = node->ne [dim];
2150
- cann_ctx->cann_graph ->ggml_graph_properties [node_idx].nb [dim] = node->nb [dim];
2151
- }
2152
- for (int src = 0 ; src < GGML_MAX_SRC; src++) {
2153
- cann_ctx->cann_graph ->ggml_graph_properties [node_idx].src_address [src] =
2154
- node->src [src] ? node->src [src]->data : nullptr ;
2183
+ prop.node_address = node->data ;
2184
+ prop.node_op = node->op ;
2185
+
2186
+ std::copy_n (node->ne , GGML_MAX_DIMS, prop.ne );
2187
+ std::copy_n (node->nb , GGML_MAX_DIMS, prop.nb );
2188
+
2189
+ for (int src = 0 ; src < GGML_MAX_SRC; ++src) {
2190
+ prop.src_address [src] = node->src [src] ? node->src [src]->data : nullptr ;
2155
2191
}
2156
- memcpy (cann_ctx->cann_graph ->ggml_graph_properties [node_idx].op_params , node->op_params , GGML_MAX_OP_PARAMS);
2192
+
2193
+ memcpy (prop.op_params , node->op_params , GGML_MAX_OP_PARAMS);
2157
2194
}
2195
+
2196
+ // Insert into the LRU cache (cache takes ownership and will delete it when evicted).
2197
+ cann_ctx->graph_lru_cache .push (new_graph);
2158
2198
}
2159
2199
2160
2200
/* *
@@ -2199,30 +2239,45 @@ static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_gra
2199
2239
}
2200
2240
2201
2241
/* *
2202
- * @brief Determine if the CANN graph needs to be rebuilt due to graph changes.
2242
+ * @brief Check whether there is a cached CANN graph that matches the current ggml graph.
2243
+ *
2244
+ * This function iterates through the cached CANN graphs stored in the LRU cache and
2245
+ * compares them against the given ggml computation graph. A match requires that the
2246
+ * number of nodes is the same and that each node’s properties (operation type,
2247
+ * dimensions, strides, inputs, and operation parameters) are identical.
2203
2248
*
2204
- * This checks whether the number or properties of ggml graph nodes have changed
2205
- * compared to the last captured CANN graph. If so, the CANN graph must be re-captured.
2249
+ * If a matching graph is found, it is promoted to the front of the LRU cache and the
2250
+ * function returns true. Otherwise, the function returns false, indicating that a new
2251
+ * CANN graph needs to be captured.
2206
2252
*
2207
- * @param cann_ctx The CANN backend context.
2253
+ * @param cann_ctx The CANN backend context containing the graph cache .
2208
2254
* @param cgraph The current ggml computation graph.
2209
- * @return true if an update is required; false otherwise.
2210
- */
2211
- static bool is_cann_graph_update_required (ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph) {
2212
- // The number of nodes is different, so the graph needs to be reconstructed.
2213
- if (cann_ctx->cann_graph ->ggml_graph_properties .size () != (size_t )cgraph->n_nodes ) {
2214
- cann_ctx->cann_graph ->ggml_graph_properties .resize (cgraph->n_nodes );
2215
- return true ;
2216
- }
2255
+ * @return true if a matching cached graph exists; false otherwise.
2256
+ */
2257
+ static bool is_matched_graph (ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph) {
2258
+ ggml_cann_graph_lru_cache &lru_cache = cann_ctx->graph_lru_cache ;
2259
+ for (auto &graph_ptr : lru_cache.cache_list ) {
2260
+ // Skip graphs with a different number of nodes.
2261
+ if (graph_ptr->ggml_graph_properties .size () != static_cast <size_t >(cgraph->n_nodes )) {
2262
+ continue ;
2263
+ }
2217
2264
2218
- // The number of nodes is the same; iterate over each node to check whether they match.
2219
- for (int i = 0 ; i < cgraph->n_nodes ; i++) {
2220
- bool has_matching_properties = ggml_graph_node_has_matching_properties (
2221
- cgraph->nodes [i], &cann_ctx->cann_graph ->ggml_graph_properties [i]);
2222
- if (!has_matching_properties) {
2265
+ // Check if all nodes match.
2266
+ bool all_match = true ;
2267
+ for (int i = 0 ; i < cgraph->n_nodes ; ++i) {
2268
+ if (!ggml_graph_node_has_matching_properties (cgraph->nodes [i], &graph_ptr->ggml_graph_properties [i])) {
2269
+ all_match = false ;
2270
+ break ;
2271
+ }
2272
+ }
2273
+
2274
+ if (all_match) {
2275
+ // update cache_list && renturn graph_ptr
2276
+ lru_cache.move_to_front (graph_ptr);
2223
2277
return true ;
2224
2278
}
2225
2279
}
2280
+
2226
2281
return false ;
2227
2282
}
2228
2283
#endif // USE_ACL_GRAPH
@@ -2241,17 +2296,13 @@ static bool is_cann_graph_update_required(ggml_backend_cann_context * cann_ctx,
2241
2296
* @param cann_graph_update_required Whether graph capture is needed due to graph changes.
2242
2297
*/
2243
2298
static void evaluate_and_capture_cann_graph (ggml_backend_cann_context * cann_ctx, ggml_cgraph * cgraph,
2244
- bool & use_cann_graph, bool & cann_graph_update_required) {
2299
+ bool & use_cann_graph, bool & cann_graph_update_required) {
2245
2300
#ifdef USE_ACL_GRAPH
2301
+ ggml_cann_graph* matched_graph = cann_ctx->graph_lru_cache .cache_list .front ();
2246
2302
if (use_cann_graph && cann_graph_update_required) {
2247
- if (cann_ctx->cann_graph ->graph != nullptr ) {
2248
- ACL_CHECK (aclmdlRIDestroy (cann_ctx->cann_graph ->graph ));
2249
- cann_ctx->cann_graph ->graph = nullptr ;
2250
- }
2251
2303
ACL_CHECK (aclmdlRICaptureBegin (cann_ctx->stream (), ACL_MODEL_RI_CAPTURE_MODE_GLOBAL));
2252
2304
}
2253
2305
#endif // USE_ACL_GRAPH
2254
-
2255
2306
// Only perform the graph execution if CANN graphs are not enabled, or we are capturing the graph.
2256
2307
// With the use of CANN graphs, the execution will be performed by the graph launch.
2257
2308
if (!use_cann_graph || cann_graph_update_required) {
@@ -2272,12 +2323,12 @@ static void evaluate_and_capture_cann_graph(ggml_backend_cann_context * cann_ctx
2272
2323
2273
2324
#ifdef USE_ACL_GRAPH
2274
2325
if (use_cann_graph && cann_graph_update_required) { // End CANN graph capture
2275
- ACL_CHECK (aclmdlRICaptureEnd (cann_ctx->stream (), &cann_ctx-> cann_graph ->graph ));
2326
+ ACL_CHECK (aclmdlRICaptureEnd (cann_ctx->stream (), &matched_graph ->graph ));
2276
2327
}
2277
2328
2278
2329
if (use_cann_graph) {
2279
2330
// Execute graph
2280
- ACL_CHECK (aclmdlRIExecuteAsync (cann_ctx-> cann_graph ->graph , cann_ctx->stream ()));
2331
+ ACL_CHECK (aclmdlRIExecuteAsync (matched_graph ->graph , cann_ctx->stream ()));
2281
2332
}
2282
2333
#endif // USE_ACL_GRAPH
2283
2334
}
@@ -2311,19 +2362,17 @@ static enum ggml_status ggml_backend_cann_graph_compute(
2311
2362
}
2312
2363
2313
2364
if (use_cann_graph) {
2314
- if (cann_ctx->cann_graph == nullptr ) {
2315
- cann_ctx->cann_graph .reset (new ggml_cann_graph ());
2316
- cann_graph_update_required = true ;
2365
+ // If no matching graph is found, the graph needs to be recaptured.
2366
+ cann_graph_update_required = !is_matched_graph (cann_ctx, cgraph);
2367
+ if (cann_graph_update_required) {
2368
+ // If no matching graph is found, add a new ACL graph.
2369
+ add_lru_matched_graph_node_properties (cann_ctx, cgraph);
2317
2370
}
2318
-
2319
- cann_graph_update_required = is_cann_graph_update_required (cann_ctx, cgraph);
2320
- set_ggml_graph_node_properties (cann_ctx, cgraph);
2321
2371
}
2322
2372
#else
2323
2373
bool use_cann_graph = false ;
2324
2374
bool cann_graph_update_required = false ;
2325
2375
#endif // USE_ACL_GRAPH
2326
-
2327
2376
evaluate_and_capture_cann_graph (
2328
2377
cann_ctx,
2329
2378
cgraph,
0 commit comments