Diego Devesa commited on
Commit
9e9c0ad
·
1 Parent(s): 15659b4

llama : only use default buffer types for the KV cache (llama/10358)

Browse files
Files changed (1) hide show
  1. ggml/src/ggml-backend.cpp +5 -4
ggml/src/ggml-backend.cpp CHANGED
@@ -880,7 +880,7 @@ static int ggml_backend_sched_backend_id(ggml_backend_sched_t sched, ggml_backen
880
  }
881
 
882
  static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, const struct ggml_tensor * tensor, const struct ggml_tensor * op) {
883
- ggml_backend_buffer_t buffer = tensor->buffer;
884
  if (buffer == NULL) {
885
  return -1;
886
  }
@@ -913,8 +913,6 @@ static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_SCHED_MAX_SPLITS_DEBUG*GGML
913
 
914
  // returns the backend that should be used for the node based on the current locations
915
  static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, struct ggml_tensor * tensor) {
916
- // TODO: use supports_op to check if the backend supports the op
917
-
918
  // assign pre-allocated nodes to their backend
919
  int cur_backend_id = ggml_backend_sched_backend_from_buffer(sched, tensor, tensor);
920
  if (cur_backend_id != -1) {
@@ -933,7 +931,7 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st
933
 
934
  if (tensor->buffer || (tensor->view_src && tensor->view_src->buffer)) {
935
  // since the tensor is pre-allocated, it cannot be moved to another backend
936
- GGML_ABORT("pre-allocated tensor in a backend that cannot run the operation");
937
  }
938
 
939
  // graph input
@@ -1077,6 +1075,9 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
1077
  for (int i = 0; i < graph->n_nodes; i++) {
1078
  struct ggml_tensor * node = graph->nodes[i];
1079
  int * node_backend_id = &tensor_backend_id(node);
 
 
 
1080
  // do not overwrite user assignments
1081
  if (*node_backend_id == -1) {
1082
  *node_backend_id = ggml_backend_sched_backend_id_from_cur(sched, node);
 
880
  }
881
 
882
  static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, const struct ggml_tensor * tensor, const struct ggml_tensor * op) {
883
+ ggml_backend_buffer_t buffer = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
884
  if (buffer == NULL) {
885
  return -1;
886
  }
 
913
 
914
  // returns the backend that should be used for the node based on the current locations
915
  static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, struct ggml_tensor * tensor) {
 
 
916
  // assign pre-allocated nodes to their backend
917
  int cur_backend_id = ggml_backend_sched_backend_from_buffer(sched, tensor, tensor);
918
  if (cur_backend_id != -1) {
 
931
 
932
  if (tensor->buffer || (tensor->view_src && tensor->view_src->buffer)) {
933
  // since the tensor is pre-allocated, it cannot be moved to another backend
934
+ GGML_ABORT("pre-allocated tensor (%s) in a backend that cannot run the operation", tensor->name);
935
  }
936
 
937
  // graph input
 
1075
  for (int i = 0; i < graph->n_nodes; i++) {
1076
  struct ggml_tensor * node = graph->nodes[i];
1077
  int * node_backend_id = &tensor_backend_id(node);
1078
+ if (ggml_is_view_op(node->op)) {
1079
+ continue;
1080
+ }
1081
  // do not overwrite user assignments
1082
  if (*node_backend_id == -1) {
1083
  *node_backend_id = ggml_backend_sched_backend_id_from_cur(sched, node);