nicoboss commited on
Commit
e0dc1ad
·
1 Parent(s): 12634fc

ggml : dynamic ggml_sched_max_splits based on graph_size (llama/9047)

Browse files

* ggml : Dynamic ggml_sched_max_splits based on graph_size

* Fixed and readded debug code for causes

Files changed (1) hide show
  1. ggml/src/ggml-backend.c +5 -8
ggml/src/ggml-backend.c CHANGED
@@ -1018,10 +1018,6 @@ static bool ggml_is_view_op(enum ggml_op op) {
1018
  #define GGML_SCHED_MAX_BACKENDS 16
1019
  #endif
1020
 
1021
- #ifndef GGML_SCHED_MAX_SPLITS
1022
- #define GGML_SCHED_MAX_SPLITS 2048
1023
- #endif
1024
-
1025
  #ifndef GGML_SCHED_MAX_SPLIT_INPUTS
1026
  #define GGML_SCHED_MAX_SPLIT_INPUTS GGML_MAX_SRC
1027
  #endif
@@ -1125,7 +1121,8 @@ static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, co
1125
  }
1126
 
1127
  #if 0
1128
- static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS][128]; // debug only
 
1129
  #define SET_CAUSE(node, ...) sprintf(causes[hash_id(node)], __VA_ARGS__)
1130
  #define GET_CAUSE(node) causes[hash_id(node)]
1131
  #else
@@ -1549,7 +1546,6 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
1549
  sched->splits = realloc(sched->splits, sched->splits_capacity * sizeof(struct ggml_backend_sched_split));
1550
  GGML_ASSERT(sched->splits != NULL);
1551
  }
1552
- GGML_ASSERT(i_split < GGML_SCHED_MAX_SPLITS);
1553
  split = &sched->splits[i_split];
1554
  split->backend_id = node_backend_id;
1555
  split->i_start = i;
@@ -1865,13 +1861,14 @@ ggml_backend_sched_t ggml_backend_sched_new(
1865
  sched->hv_tensor_backend_ids = malloc(sched->hash_set.size * sizeof(sched->hv_tensor_backend_ids[0]));
1866
  sched->hv_tensor_copies = malloc(sched->hash_set.size * sched->n_backends * sched->n_copies * sizeof(struct ggml_tensor *));
1867
 
1868
- const size_t nodes_size = graph_size + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2;
 
1869
  sched->node_backend_ids = calloc(nodes_size, sizeof(sched->node_backend_ids[0]));
1870
  sched->leaf_backend_ids = calloc(nodes_size, sizeof(sched->leaf_backend_ids[0]));
1871
  sched->prev_node_backend_ids = calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0]));
1872
  sched->prev_leaf_backend_ids = calloc(nodes_size, sizeof(sched->prev_leaf_backend_ids[0]));
1873
 
1874
- sched->context_buffer_size = GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + ggml_graph_overhead_custom(graph_size, false);
1875
  sched->context_buffer = malloc(sched->context_buffer_size);
1876
 
1877
  const int initial_splits_capacity = 16;
 
1018
  #define GGML_SCHED_MAX_BACKENDS 16
1019
  #endif
1020
 
 
 
 
 
1021
  #ifndef GGML_SCHED_MAX_SPLIT_INPUTS
1022
  #define GGML_SCHED_MAX_SPLIT_INPUTS GGML_MAX_SRC
1023
  #endif
 
1121
  }
1122
 
1123
  #if 0
1124
+ #define GGML_SCHED_MAX_SPLITS_DEBUG 4096
1125
+ static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_SCHED_MAX_SPLITS_DEBUG*GGML_SCHED_MAX_SPLIT_INPUTS][128]; // debug only
1126
  #define SET_CAUSE(node, ...) sprintf(causes[hash_id(node)], __VA_ARGS__)
1127
  #define GET_CAUSE(node) causes[hash_id(node)]
1128
  #else
 
1546
  sched->splits = realloc(sched->splits, sched->splits_capacity * sizeof(struct ggml_backend_sched_split));
1547
  GGML_ASSERT(sched->splits != NULL);
1548
  }
 
1549
  split = &sched->splits[i_split];
1550
  split->backend_id = node_backend_id;
1551
  split->i_start = i;
 
1861
  sched->hv_tensor_backend_ids = malloc(sched->hash_set.size * sizeof(sched->hv_tensor_backend_ids[0]));
1862
  sched->hv_tensor_copies = malloc(sched->hash_set.size * sched->n_backends * sched->n_copies * sizeof(struct ggml_tensor *));
1863
 
1864
+ const size_t ggml_sched_max_splits = graph_size; // at most there is one split for each node in the graph
1865
+ const size_t nodes_size = graph_size + ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2;
1866
  sched->node_backend_ids = calloc(nodes_size, sizeof(sched->node_backend_ids[0]));
1867
  sched->leaf_backend_ids = calloc(nodes_size, sizeof(sched->leaf_backend_ids[0]));
1868
  sched->prev_node_backend_ids = calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0]));
1869
  sched->prev_leaf_backend_ids = calloc(nodes_size, sizeof(sched->prev_leaf_backend_ids[0]));
1870
 
1871
+ sched->context_buffer_size = ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + ggml_graph_overhead_custom(graph_size, false);
1872
  sched->context_buffer = malloc(sched->context_buffer_size);
1873
 
1874
  const int initial_splits_capacity = 16;