@@ -121,7 +121,7 @@ static __global__ void cpy_q_f32(const char * cx, char * cdst_direct, const int
121
121
// Copy destination pointers to GPU to be available when pointer indirection is in use
122
122
123
123
void ggml_cuda_cpy_dest_ptrs_copy (ggml_cuda_graph * cuda_graph, char ** host_dest_ptrs, const int host_dest_ptrs_size, cudaStream_t stream) {
124
- #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS)
124
+ #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS) || defined(GGML_MUSA_GRAPHS)
125
125
if (cuda_graph->dest_ptrs_size < host_dest_ptrs_size) { // (re-)allocate GPU memory for destination pointers
126
126
CUDA_CHECK (cudaStreamSynchronize (stream));
127
127
if (cuda_graph->dest_ptrs_d != nullptr ) {
@@ -353,7 +353,7 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
353
353
354
354
char ** dest_ptrs_d = nullptr ;
355
355
int graph_cpynode_index = -1 ;
356
- #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS)
356
+ #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS) || defined(GGML_MUSA_GRAPHS)
357
357
if (ctx.cuda_graph ->use_cpy_indirection && !disable_indirection_for_this_node) {
358
358
dest_ptrs_d = ctx.cuda_graph ->dest_ptrs_d ;
359
359
graph_cpynode_index = ctx.cuda_graph ->graph_cpynode_index ;
@@ -410,7 +410,7 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
410
410
GGML_ABORT (" %s: unsupported type combination (%s to %s)\n " , __func__,
411
411
ggml_type_name (src0->type ), ggml_type_name (src1->type ));
412
412
}
413
- #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS)
413
+ #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS) || defined(GGML_MUSA_GRAPHS)
414
414
if (ctx.cuda_graph ->use_cpy_indirection && !disable_indirection_for_this_node) {
415
415
ctx.cuda_graph ->graph_cpynode_index = graph_cpynode_index;
416
416
}
0 commit comments