Spaces:
Running
Running
Diego Devesa
commited on
Commit
·
9e9c0ad
1
Parent(s):
15659b4
llama : only use default buffer types for the KV cache (llama/10358)
Browse files
ggml/src/ggml-backend.cpp
CHANGED
|
@@ -880,7 +880,7 @@ static int ggml_backend_sched_backend_id(ggml_backend_sched_t sched, ggml_backen
|
|
| 880 |
}
|
| 881 |
|
| 882 |
static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, const struct ggml_tensor * tensor, const struct ggml_tensor * op) {
|
| 883 |
-
ggml_backend_buffer_t buffer = tensor->buffer;
|
| 884 |
if (buffer == NULL) {
|
| 885 |
return -1;
|
| 886 |
}
|
|
@@ -913,8 +913,6 @@ static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_SCHED_MAX_SPLITS_DEBUG*GGML
|
|
| 913 |
|
| 914 |
// returns the backend that should be used for the node based on the current locations
|
| 915 |
static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, struct ggml_tensor * tensor) {
|
| 916 |
-
// TODO: use supports_op to check if the backend supports the op
|
| 917 |
-
|
| 918 |
// assign pre-allocated nodes to their backend
|
| 919 |
int cur_backend_id = ggml_backend_sched_backend_from_buffer(sched, tensor, tensor);
|
| 920 |
if (cur_backend_id != -1) {
|
|
@@ -933,7 +931,7 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st
|
|
| 933 |
|
| 934 |
if (tensor->buffer || (tensor->view_src && tensor->view_src->buffer)) {
|
| 935 |
// since the tensor is pre-allocated, it cannot be moved to another backend
|
| 936 |
-
GGML_ABORT("pre-allocated tensor in a backend that cannot run the operation");
|
| 937 |
}
|
| 938 |
|
| 939 |
// graph input
|
|
@@ -1077,6 +1075,9 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
| 1077 |
for (int i = 0; i < graph->n_nodes; i++) {
|
| 1078 |
struct ggml_tensor * node = graph->nodes[i];
|
| 1079 |
int * node_backend_id = &tensor_backend_id(node);
|
|
|
|
|
|
|
|
|
|
| 1080 |
// do not overwrite user assignments
|
| 1081 |
if (*node_backend_id == -1) {
|
| 1082 |
*node_backend_id = ggml_backend_sched_backend_id_from_cur(sched, node);
|
|
|
|
| 880 |
}
|
| 881 |
|
| 882 |
static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, const struct ggml_tensor * tensor, const struct ggml_tensor * op) {
|
| 883 |
+
ggml_backend_buffer_t buffer = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
| 884 |
if (buffer == NULL) {
|
| 885 |
return -1;
|
| 886 |
}
|
|
|
|
| 913 |
|
| 914 |
// returns the backend that should be used for the node based on the current locations
|
| 915 |
static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, struct ggml_tensor * tensor) {
|
|
|
|
|
|
|
| 916 |
// assign pre-allocated nodes to their backend
|
| 917 |
int cur_backend_id = ggml_backend_sched_backend_from_buffer(sched, tensor, tensor);
|
| 918 |
if (cur_backend_id != -1) {
|
|
|
|
| 931 |
|
| 932 |
if (tensor->buffer || (tensor->view_src && tensor->view_src->buffer)) {
|
| 933 |
// since the tensor is pre-allocated, it cannot be moved to another backend
|
| 934 |
+
GGML_ABORT("pre-allocated tensor (%s) in a backend that cannot run the operation", tensor->name);
|
| 935 |
}
|
| 936 |
|
| 937 |
// graph input
|
|
|
|
| 1075 |
for (int i = 0; i < graph->n_nodes; i++) {
|
| 1076 |
struct ggml_tensor * node = graph->nodes[i];
|
| 1077 |
int * node_backend_id = &tensor_backend_id(node);
|
| 1078 |
+
if (ggml_is_view_op(node->op)) {
|
| 1079 |
+
continue;
|
| 1080 |
+
}
|
| 1081 |
// do not overwrite user assignments
|
| 1082 |
if (*node_backend_id == -1) {
|
| 1083 |
*node_backend_id = ggml_backend_sched_backend_id_from_cur(sched, node);
|