Skip to content

Commit c12bbde

Browse files
authored
sched : fix multiple evaluations of the same graph with pipeline parallelism (ggml-org#14855)
ggml-ci
1 parent 3f4fc97 commit c12bbde

File tree

1 file changed

+8
-5
lines changed

1 file changed

+8
-5
lines changed

ggml/src/ggml-backend.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,7 @@ struct ggml_backend_sched {
647647
// pipeline parallelism support
648648
int n_copies;
649649
int cur_copy;
650+
int next_copy;
650651
ggml_backend_event_t events[GGML_SCHED_MAX_BACKENDS][GGML_SCHED_MAX_COPIES];
651652
struct ggml_tensor * graph_inputs[GGML_SCHED_MAX_SPLIT_INPUTS];
652653
int n_graph_inputs;
@@ -1433,8 +1434,6 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
14331434
}
14341435
}
14351436

1436-
sched->cur_copy = (sched->cur_copy + 1) % sched->n_copies;
1437-
14381437
return GGML_STATUS_SUCCESS;
14391438
}
14401439

@@ -1535,10 +1534,10 @@ void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
15351534
bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) {
15361535
GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes + measure_graph->n_leafs);
15371536

1538-
ggml_backend_sched_split_graph(sched, measure_graph);
1539-
15401537
ggml_backend_sched_synchronize(sched);
15411538

1539+
ggml_backend_sched_split_graph(sched, measure_graph);
1540+
15421541
if (!ggml_gallocr_reserve_n(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids)) {
15431542
return false;
15441543
}
@@ -1550,6 +1549,10 @@ bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph *
15501549

15511550
bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
15521551
GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + graph->n_leafs);
1552+
GGML_ASSERT(!sched->is_alloc);
1553+
1554+
sched->cur_copy = sched->next_copy;
1555+
sched->next_copy = (sched->next_copy + 1) % sched->n_copies;
15531556

15541557
ggml_backend_sched_split_graph(sched, graph);
15551558

@@ -1590,7 +1593,7 @@ void ggml_backend_sched_synchronize(ggml_backend_sched_t sched) {
15901593
// if the graph is not already allocated, always use copy 0 after a synchronization
15911594
// this ensures that during generation the same copy is used every time,
15921595
// which avoids changes in the graph that could cause CUDA or other graphs to be disabled
1593-
sched->cur_copy = 0;
1596+
sched->next_copy = 0;
15941597
}
15951598
}
15961599

0 commit comments

Comments
 (0)