Spaces:
Running
Running
ggml : dynamic ggml_sched_max_splits based on graph_size (llama/9047)
Browse files* ggml : Dynamic ggml_sched_max_splits based on graph_size
* Fixed and readded debug code for causes
- ggml/src/ggml-backend.c +5 -8
ggml/src/ggml-backend.c
CHANGED
|
@@ -1018,10 +1018,6 @@ static bool ggml_is_view_op(enum ggml_op op) {
|
|
| 1018 |
#define GGML_SCHED_MAX_BACKENDS 16
|
| 1019 |
#endif
|
| 1020 |
|
| 1021 |
-
#ifndef GGML_SCHED_MAX_SPLITS
|
| 1022 |
-
#define GGML_SCHED_MAX_SPLITS 2048
|
| 1023 |
-
#endif
|
| 1024 |
-
|
| 1025 |
#ifndef GGML_SCHED_MAX_SPLIT_INPUTS
|
| 1026 |
#define GGML_SCHED_MAX_SPLIT_INPUTS GGML_MAX_SRC
|
| 1027 |
#endif
|
|
@@ -1125,7 +1121,8 @@ static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, co
|
|
| 1125 |
}
|
| 1126 |
|
| 1127 |
#if 0
|
| 1128 |
-
|
|
|
|
| 1129 |
#define SET_CAUSE(node, ...) sprintf(causes[hash_id(node)], __VA_ARGS__)
|
| 1130 |
#define GET_CAUSE(node) causes[hash_id(node)]
|
| 1131 |
#else
|
|
@@ -1549,7 +1546,6 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
| 1549 |
sched->splits = realloc(sched->splits, sched->splits_capacity * sizeof(struct ggml_backend_sched_split));
|
| 1550 |
GGML_ASSERT(sched->splits != NULL);
|
| 1551 |
}
|
| 1552 |
-
GGML_ASSERT(i_split < GGML_SCHED_MAX_SPLITS);
|
| 1553 |
split = &sched->splits[i_split];
|
| 1554 |
split->backend_id = node_backend_id;
|
| 1555 |
split->i_start = i;
|
|
@@ -1865,13 +1861,14 @@ ggml_backend_sched_t ggml_backend_sched_new(
|
|
| 1865 |
sched->hv_tensor_backend_ids = malloc(sched->hash_set.size * sizeof(sched->hv_tensor_backend_ids[0]));
|
| 1866 |
sched->hv_tensor_copies = malloc(sched->hash_set.size * sched->n_backends * sched->n_copies * sizeof(struct ggml_tensor *));
|
| 1867 |
|
| 1868 |
-
const size_t
|
|
|
|
| 1869 |
sched->node_backend_ids = calloc(nodes_size, sizeof(sched->node_backend_ids[0]));
|
| 1870 |
sched->leaf_backend_ids = calloc(nodes_size, sizeof(sched->leaf_backend_ids[0]));
|
| 1871 |
sched->prev_node_backend_ids = calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0]));
|
| 1872 |
sched->prev_leaf_backend_ids = calloc(nodes_size, sizeof(sched->prev_leaf_backend_ids[0]));
|
| 1873 |
|
| 1874 |
-
sched->context_buffer_size =
|
| 1875 |
sched->context_buffer = malloc(sched->context_buffer_size);
|
| 1876 |
|
| 1877 |
const int initial_splits_capacity = 16;
|
|
|
|
| 1018 |
#define GGML_SCHED_MAX_BACKENDS 16
|
| 1019 |
#endif
|
| 1020 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1021 |
#ifndef GGML_SCHED_MAX_SPLIT_INPUTS
|
| 1022 |
#define GGML_SCHED_MAX_SPLIT_INPUTS GGML_MAX_SRC
|
| 1023 |
#endif
|
|
|
|
| 1121 |
}
|
| 1122 |
|
| 1123 |
#if 0
|
| 1124 |
+
#define GGML_SCHED_MAX_SPLITS_DEBUG 4096
|
| 1125 |
+
static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_SCHED_MAX_SPLITS_DEBUG*GGML_SCHED_MAX_SPLIT_INPUTS][128]; // debug only
|
| 1126 |
#define SET_CAUSE(node, ...) sprintf(causes[hash_id(node)], __VA_ARGS__)
|
| 1127 |
#define GET_CAUSE(node) causes[hash_id(node)]
|
| 1128 |
#else
|
|
|
|
| 1546 |
sched->splits = realloc(sched->splits, sched->splits_capacity * sizeof(struct ggml_backend_sched_split));
|
| 1547 |
GGML_ASSERT(sched->splits != NULL);
|
| 1548 |
}
|
|
|
|
| 1549 |
split = &sched->splits[i_split];
|
| 1550 |
split->backend_id = node_backend_id;
|
| 1551 |
split->i_start = i;
|
|
|
|
| 1861 |
sched->hv_tensor_backend_ids = malloc(sched->hash_set.size * sizeof(sched->hv_tensor_backend_ids[0]));
|
| 1862 |
sched->hv_tensor_copies = malloc(sched->hash_set.size * sched->n_backends * sched->n_copies * sizeof(struct ggml_tensor *));
|
| 1863 |
|
| 1864 |
+
const size_t ggml_sched_max_splits = graph_size; // at most there is one split for each node in the graph
|
| 1865 |
+
const size_t nodes_size = graph_size + ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2;
|
| 1866 |
sched->node_backend_ids = calloc(nodes_size, sizeof(sched->node_backend_ids[0]));
|
| 1867 |
sched->leaf_backend_ids = calloc(nodes_size, sizeof(sched->leaf_backend_ids[0]));
|
| 1868 |
sched->prev_node_backend_ids = calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0]));
|
| 1869 |
sched->prev_leaf_backend_ids = calloc(nodes_size, sizeof(sched->prev_leaf_backend_ids[0]));
|
| 1870 |
|
| 1871 |
+
sched->context_buffer_size = ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + ggml_graph_overhead_custom(graph_size, false);
|
| 1872 |
sched->context_buffer = malloc(sched->context_buffer_size);
|
| 1873 |
|
| 1874 |
const int initial_splits_capacity = 16;
|