Spaces:
Sleeping
Sleeping
Diego Devesa
commited on
Commit
·
1c0a5c0
1
Parent(s):
6b6155b
sched : avoid changing cur_copy when a graph is already allocated (llama/13922)
Browse files- ggml/src/ggml-backend.cpp +10 -5
ggml/src/ggml-backend.cpp
CHANGED
|
@@ -1340,7 +1340,10 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
|
|
| 1340 |
// allocate graph
|
| 1341 |
if (backend_ids_changed || !ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
|
| 1342 |
// the re-allocation may cause the split inputs to be moved to a different address
|
| 1343 |
-
ggml_backend_sched_synchronize
|
|
|
|
|
|
|
|
|
|
| 1344 |
#ifndef NDEBUG
|
| 1345 |
GGML_LOG_DEBUG("%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
|
| 1346 |
#endif
|
|
@@ -1564,7 +1567,6 @@ bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgra
|
|
| 1564 |
|
| 1565 |
ggml_backend_sched_split_graph(sched, graph);
|
| 1566 |
|
| 1567 |
-
|
| 1568 |
if (!ggml_backend_sched_alloc_splits(sched)) {
|
| 1569 |
return false;
|
| 1570 |
}
|
|
@@ -1598,9 +1600,12 @@ void ggml_backend_sched_synchronize(ggml_backend_sched_t sched) {
|
|
| 1598 |
for (int i = 0; i < sched->n_backends; i++) {
|
| 1599 |
ggml_backend_synchronize(sched->backends[i]);
|
| 1600 |
}
|
| 1601 |
-
|
| 1602 |
-
|
| 1603 |
-
|
|
|
|
|
|
|
|
|
|
| 1604 |
}
|
| 1605 |
|
| 1606 |
void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) {
|
|
|
|
| 1340 |
// allocate graph
|
| 1341 |
if (backend_ids_changed || !ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
|
| 1342 |
// the re-allocation may cause the split inputs to be moved to a different address
|
| 1343 |
+
// synchronize without ggml_backend_sched_synchronize to avoid changing cur_copy
|
| 1344 |
+
for (int i = 0; i < sched->n_backends; i++) {
|
| 1345 |
+
ggml_backend_synchronize(sched->backends[i]);
|
| 1346 |
+
}
|
| 1347 |
#ifndef NDEBUG
|
| 1348 |
GGML_LOG_DEBUG("%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
|
| 1349 |
#endif
|
|
|
|
| 1567 |
|
| 1568 |
ggml_backend_sched_split_graph(sched, graph);
|
| 1569 |
|
|
|
|
| 1570 |
if (!ggml_backend_sched_alloc_splits(sched)) {
|
| 1571 |
return false;
|
| 1572 |
}
|
|
|
|
| 1600 |
for (int i = 0; i < sched->n_backends; i++) {
|
| 1601 |
ggml_backend_synchronize(sched->backends[i]);
|
| 1602 |
}
|
| 1603 |
+
if (!sched->is_alloc) {
|
| 1604 |
+
// if the graph is not already allocated, always use copy 0 after a synchronization
|
| 1605 |
+
// this ensures that during generation the same copy is used every time,
|
| 1606 |
+
// which avoids changes in the graph that could cause CUDA or other graphs to be disabled
|
| 1607 |
+
sched->cur_copy = 0;
|
| 1608 |
+
}
|
| 1609 |
}
|
| 1610 |
|
| 1611 |
void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) {
|