Spaces:
Sleeping
Sleeping
ggml : remove ggml_scratch (llama/10121)
Browse files- ggml/include/ggml.h +0 -9
- ggml/src/ggml.c +2 -65
ggml/include/ggml.h
CHANGED
|
@@ -655,14 +655,6 @@ extern "C" {
|
|
| 655 |
void * abort_callback_data;
|
| 656 |
};
|
| 657 |
|
| 658 |
-
// scratch buffer
|
| 659 |
-
// TODO: deprecate and remove
|
| 660 |
-
struct ggml_scratch {
|
| 661 |
-
size_t offs;
|
| 662 |
-
size_t size;
|
| 663 |
-
void * data;
|
| 664 |
-
};
|
| 665 |
-
|
| 666 |
struct ggml_init_params {
|
| 667 |
// memory pool
|
| 668 |
size_t mem_size; // bytes
|
|
@@ -766,7 +758,6 @@ extern "C" {
|
|
| 766 |
|
| 767 |
GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
|
| 768 |
|
| 769 |
-
GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);
|
| 770 |
GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx);
|
| 771 |
GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
|
| 772 |
|
|
|
|
| 655 |
void * abort_callback_data;
|
| 656 |
};
|
| 657 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 658 |
struct ggml_init_params {
|
| 659 |
// memory pool
|
| 660 |
size_t mem_size; // bytes
|
|
|
|
| 758 |
|
| 759 |
GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
|
| 760 |
|
|
|
|
| 761 |
GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx);
|
| 762 |
GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
|
| 763 |
|
ggml/src/ggml.c
CHANGED
|
@@ -2018,15 +2018,11 @@ struct ggml_context {
|
|
| 2018 |
void * mem_buffer;
|
| 2019 |
bool mem_buffer_owned;
|
| 2020 |
bool no_alloc;
|
| 2021 |
-
bool no_alloc_save; // this is used to save the no_alloc state when using scratch buffers
|
| 2022 |
|
| 2023 |
int n_objects;
|
| 2024 |
|
| 2025 |
struct ggml_object * objects_begin;
|
| 2026 |
struct ggml_object * objects_end;
|
| 2027 |
-
|
| 2028 |
-
struct ggml_scratch scratch;
|
| 2029 |
-
struct ggml_scratch scratch_save;
|
| 2030 |
};
|
| 2031 |
|
| 2032 |
struct ggml_context_container {
|
|
@@ -3879,12 +3875,9 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|
| 3879 |
/*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size),
|
| 3880 |
/*.mem_buffer_owned =*/ params.mem_buffer ? false : true,
|
| 3881 |
/*.no_alloc =*/ params.no_alloc,
|
| 3882 |
-
/*.no_alloc_save =*/ params.no_alloc,
|
| 3883 |
/*.n_objects =*/ 0,
|
| 3884 |
/*.objects_begin =*/ NULL,
|
| 3885 |
/*.objects_end =*/ NULL,
|
| 3886 |
-
/*.scratch =*/ { 0, 0, NULL, },
|
| 3887 |
-
/*.scratch_save =*/ { 0, 0, NULL, },
|
| 3888 |
};
|
| 3889 |
|
| 3890 |
GGML_ASSERT(ctx->mem_buffer != NULL);
|
|
@@ -3904,8 +3897,6 @@ void ggml_reset(struct ggml_context * ctx) {
|
|
| 3904 |
ctx->n_objects = 0;
|
| 3905 |
ctx->objects_begin = NULL;
|
| 3906 |
ctx->objects_end = NULL;
|
| 3907 |
-
ctx->scratch = (struct ggml_scratch) { 0, 0, NULL, };
|
| 3908 |
-
ctx->scratch_save = (struct ggml_scratch) { 0, 0, NULL, };
|
| 3909 |
}
|
| 3910 |
|
| 3911 |
void ggml_free(struct ggml_context * ctx) {
|
|
@@ -3924,14 +3915,6 @@ size_t ggml_used_mem(const struct ggml_context * ctx) {
|
|
| 3924 |
return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size;
|
| 3925 |
}
|
| 3926 |
|
| 3927 |
-
size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch) {
|
| 3928 |
-
const size_t result = ctx->scratch.data ? ctx->scratch.offs : 0;
|
| 3929 |
-
|
| 3930 |
-
ctx->scratch = scratch;
|
| 3931 |
-
|
| 3932 |
-
return result;
|
| 3933 |
-
}
|
| 3934 |
-
|
| 3935 |
bool ggml_get_no_alloc(struct ggml_context * ctx) {
|
| 3936 |
return ctx->no_alloc;
|
| 3937 |
}
|
|
@@ -3959,27 +3942,6 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
|
|
| 3959 |
return max_size;
|
| 3960 |
}
|
| 3961 |
|
| 3962 |
-
// IMPORTANT:
|
| 3963 |
-
// when creating "opt" tensors, always save and load the scratch buffer
|
| 3964 |
-
// this is an error prone process, but it is necessary to support inplace
|
| 3965 |
-
// operators when using scratch buffers
|
| 3966 |
-
// TODO: implement a better way
|
| 3967 |
-
static void ggml_scratch_save(struct ggml_context * ctx) {
|
| 3968 |
-
// this is needed to allow opt tensors to store their data
|
| 3969 |
-
// TODO: again, need to find a better way
|
| 3970 |
-
ctx->no_alloc_save = ctx->no_alloc;
|
| 3971 |
-
ctx->no_alloc = false;
|
| 3972 |
-
|
| 3973 |
-
ctx->scratch_save = ctx->scratch;
|
| 3974 |
-
ctx->scratch.data = NULL;
|
| 3975 |
-
}
|
| 3976 |
-
|
| 3977 |
-
static void ggml_scratch_load(struct ggml_context * ctx) {
|
| 3978 |
-
ctx->no_alloc = ctx->no_alloc_save;
|
| 3979 |
-
|
| 3980 |
-
ctx->scratch = ctx->scratch_save;
|
| 3981 |
-
}
|
| 3982 |
-
|
| 3983 |
////////////////////////////////////////////////////////////////////////////////
|
| 3984 |
|
| 3985 |
static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
|
|
@@ -4060,29 +4022,13 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
| 4060 |
size_t obj_alloc_size = 0;
|
| 4061 |
|
| 4062 |
if (view_src == NULL && !ctx->no_alloc) {
|
| 4063 |
-
|
| 4064 |
-
|
| 4065 |
-
if (ctx->scratch.offs + data_size > ctx->scratch.size) {
|
| 4066 |
-
GGML_LOG_WARN("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
|
| 4067 |
-
__func__, ctx->scratch.offs + data_size, ctx->scratch.size);
|
| 4068 |
-
assert(false);
|
| 4069 |
-
return NULL;
|
| 4070 |
-
}
|
| 4071 |
-
|
| 4072 |
-
data = (char * const) ctx->scratch.data + ctx->scratch.offs;
|
| 4073 |
-
|
| 4074 |
-
ctx->scratch.offs += data_size;
|
| 4075 |
-
} else {
|
| 4076 |
-
// allocate tensor data in the context's memory pool
|
| 4077 |
-
obj_alloc_size = data_size;
|
| 4078 |
-
}
|
| 4079 |
}
|
| 4080 |
|
| 4081 |
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
|
| 4082 |
GGML_ASSERT(obj_new);
|
| 4083 |
|
| 4084 |
-
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
|
| 4085 |
-
|
| 4086 |
struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
|
| 4087 |
|
| 4088 |
#ifdef __clang__
|
|
@@ -4178,24 +4124,16 @@ struct ggml_tensor * ggml_new_tensor_4d(
|
|
| 4178 |
}
|
| 4179 |
|
| 4180 |
struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) {
|
| 4181 |
-
ggml_scratch_save(ctx);
|
| 4182 |
-
|
| 4183 |
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 1);
|
| 4184 |
|
| 4185 |
-
ggml_scratch_load(ctx);
|
| 4186 |
-
|
| 4187 |
ggml_set_i32(result, value);
|
| 4188 |
|
| 4189 |
return result;
|
| 4190 |
}
|
| 4191 |
|
| 4192 |
struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value) {
|
| 4193 |
-
ggml_scratch_save(ctx);
|
| 4194 |
-
|
| 4195 |
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
|
| 4196 |
|
| 4197 |
-
ggml_scratch_load(ctx);
|
| 4198 |
-
|
| 4199 |
ggml_set_f32(result, value);
|
| 4200 |
|
| 4201 |
return result;
|
|
@@ -20263,7 +20201,6 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
| 20263 |
uint64_t size_eval = 0;
|
| 20264 |
|
| 20265 |
// compute size of intermediate results
|
| 20266 |
-
// TODO: does not take into account scratch buffers !!!!
|
| 20267 |
for (int i = 0; i < cgraph->n_nodes; ++i) {
|
| 20268 |
size_eval += ggml_nbytes_pad(cgraph->nodes[i]);
|
| 20269 |
}
|
|
|
|
| 2018 |
void * mem_buffer;
|
| 2019 |
bool mem_buffer_owned;
|
| 2020 |
bool no_alloc;
|
|
|
|
| 2021 |
|
| 2022 |
int n_objects;
|
| 2023 |
|
| 2024 |
struct ggml_object * objects_begin;
|
| 2025 |
struct ggml_object * objects_end;
|
|
|
|
|
|
|
|
|
|
| 2026 |
};
|
| 2027 |
|
| 2028 |
struct ggml_context_container {
|
|
|
|
| 3875 |
/*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size),
|
| 3876 |
/*.mem_buffer_owned =*/ params.mem_buffer ? false : true,
|
| 3877 |
/*.no_alloc =*/ params.no_alloc,
|
|
|
|
| 3878 |
/*.n_objects =*/ 0,
|
| 3879 |
/*.objects_begin =*/ NULL,
|
| 3880 |
/*.objects_end =*/ NULL,
|
|
|
|
|
|
|
| 3881 |
};
|
| 3882 |
|
| 3883 |
GGML_ASSERT(ctx->mem_buffer != NULL);
|
|
|
|
| 3897 |
ctx->n_objects = 0;
|
| 3898 |
ctx->objects_begin = NULL;
|
| 3899 |
ctx->objects_end = NULL;
|
|
|
|
|
|
|
| 3900 |
}
|
| 3901 |
|
| 3902 |
void ggml_free(struct ggml_context * ctx) {
|
|
|
|
| 3915 |
return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size;
|
| 3916 |
}
|
| 3917 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3918 |
bool ggml_get_no_alloc(struct ggml_context * ctx) {
|
| 3919 |
return ctx->no_alloc;
|
| 3920 |
}
|
|
|
|
| 3942 |
return max_size;
|
| 3943 |
}
|
| 3944 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3945 |
////////////////////////////////////////////////////////////////////////////////
|
| 3946 |
|
| 3947 |
static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
|
|
|
|
| 4022 |
size_t obj_alloc_size = 0;
|
| 4023 |
|
| 4024 |
if (view_src == NULL && !ctx->no_alloc) {
|
| 4025 |
+
// allocate tensor data in the context's memory pool
|
| 4026 |
+
obj_alloc_size = data_size;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4027 |
}
|
| 4028 |
|
| 4029 |
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
|
| 4030 |
GGML_ASSERT(obj_new);
|
| 4031 |
|
|
|
|
|
|
|
| 4032 |
struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
|
| 4033 |
|
| 4034 |
#ifdef __clang__
|
|
|
|
| 4124 |
}
|
| 4125 |
|
| 4126 |
struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) {
|
|
|
|
|
|
|
| 4127 |
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 1);
|
| 4128 |
|
|
|
|
|
|
|
| 4129 |
ggml_set_i32(result, value);
|
| 4130 |
|
| 4131 |
return result;
|
| 4132 |
}
|
| 4133 |
|
| 4134 |
struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value) {
|
|
|
|
|
|
|
| 4135 |
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
|
| 4136 |
|
|
|
|
|
|
|
| 4137 |
ggml_set_f32(result, value);
|
| 4138 |
|
| 4139 |
return result;
|
|
|
|
| 20201 |
uint64_t size_eval = 0;
|
| 20202 |
|
| 20203 |
// compute size of intermediate results
|
|
|
|
| 20204 |
for (int i = 0; i < cgraph->n_nodes; ++i) {
|
| 20205 |
size_eval += ggml_nbytes_pad(cgraph->nodes[i]);
|
| 20206 |
}
|