ggerganov commited on
Commit
3f0b7ba
·
1 Parent(s): e295a3f

ggml : remove ggml_scratch (llama/10121)

Browse files
Files changed (2) hide show
  1. ggml/include/ggml.h +0 -9
  2. ggml/src/ggml.c +2 -65
ggml/include/ggml.h CHANGED
@@ -655,14 +655,6 @@ extern "C" {
655
  void * abort_callback_data;
656
  };
657
 
658
- // scratch buffer
659
- // TODO: deprecate and remove
660
- struct ggml_scratch {
661
- size_t offs;
662
- size_t size;
663
- void * data;
664
- };
665
-
666
  struct ggml_init_params {
667
  // memory pool
668
  size_t mem_size; // bytes
@@ -766,7 +758,6 @@ extern "C" {
766
 
767
  GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
768
 
769
- GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);
770
  GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx);
771
  GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
772
 
 
655
  void * abort_callback_data;
656
  };
657
 
 
 
 
 
 
 
 
 
658
  struct ggml_init_params {
659
  // memory pool
660
  size_t mem_size; // bytes
 
758
 
759
  GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
760
 
 
761
  GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx);
762
  GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
763
 
ggml/src/ggml.c CHANGED
@@ -2018,15 +2018,11 @@ struct ggml_context {
2018
  void * mem_buffer;
2019
  bool mem_buffer_owned;
2020
  bool no_alloc;
2021
- bool no_alloc_save; // this is used to save the no_alloc state when using scratch buffers
2022
 
2023
  int n_objects;
2024
 
2025
  struct ggml_object * objects_begin;
2026
  struct ggml_object * objects_end;
2027
-
2028
- struct ggml_scratch scratch;
2029
- struct ggml_scratch scratch_save;
2030
  };
2031
 
2032
  struct ggml_context_container {
@@ -3879,12 +3875,9 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
3879
  /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size),
3880
  /*.mem_buffer_owned =*/ params.mem_buffer ? false : true,
3881
  /*.no_alloc =*/ params.no_alloc,
3882
- /*.no_alloc_save =*/ params.no_alloc,
3883
  /*.n_objects =*/ 0,
3884
  /*.objects_begin =*/ NULL,
3885
  /*.objects_end =*/ NULL,
3886
- /*.scratch =*/ { 0, 0, NULL, },
3887
- /*.scratch_save =*/ { 0, 0, NULL, },
3888
  };
3889
 
3890
  GGML_ASSERT(ctx->mem_buffer != NULL);
@@ -3904,8 +3897,6 @@ void ggml_reset(struct ggml_context * ctx) {
3904
  ctx->n_objects = 0;
3905
  ctx->objects_begin = NULL;
3906
  ctx->objects_end = NULL;
3907
- ctx->scratch = (struct ggml_scratch) { 0, 0, NULL, };
3908
- ctx->scratch_save = (struct ggml_scratch) { 0, 0, NULL, };
3909
  }
3910
 
3911
  void ggml_free(struct ggml_context * ctx) {
@@ -3924,14 +3915,6 @@ size_t ggml_used_mem(const struct ggml_context * ctx) {
3924
  return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size;
3925
  }
3926
 
3927
- size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch) {
3928
- const size_t result = ctx->scratch.data ? ctx->scratch.offs : 0;
3929
-
3930
- ctx->scratch = scratch;
3931
-
3932
- return result;
3933
- }
3934
-
3935
  bool ggml_get_no_alloc(struct ggml_context * ctx) {
3936
  return ctx->no_alloc;
3937
  }
@@ -3959,27 +3942,6 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
3959
  return max_size;
3960
  }
3961
 
3962
- // IMPORTANT:
3963
- // when creating "opt" tensors, always save and load the scratch buffer
3964
- // this is an error prone process, but it is necessary to support inplace
3965
- // operators when using scratch buffers
3966
- // TODO: implement a better way
3967
- static void ggml_scratch_save(struct ggml_context * ctx) {
3968
- // this is needed to allow opt tensors to store their data
3969
- // TODO: again, need to find a better way
3970
- ctx->no_alloc_save = ctx->no_alloc;
3971
- ctx->no_alloc = false;
3972
-
3973
- ctx->scratch_save = ctx->scratch;
3974
- ctx->scratch.data = NULL;
3975
- }
3976
-
3977
- static void ggml_scratch_load(struct ggml_context * ctx) {
3978
- ctx->no_alloc = ctx->no_alloc_save;
3979
-
3980
- ctx->scratch = ctx->scratch_save;
3981
- }
3982
-
3983
  ////////////////////////////////////////////////////////////////////////////////
3984
 
3985
  static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
@@ -4060,29 +4022,13 @@ static struct ggml_tensor * ggml_new_tensor_impl(
4060
  size_t obj_alloc_size = 0;
4061
 
4062
  if (view_src == NULL && !ctx->no_alloc) {
4063
- if (ctx->scratch.data != NULL) {
4064
- // allocate tensor data in the scratch buffer
4065
- if (ctx->scratch.offs + data_size > ctx->scratch.size) {
4066
- GGML_LOG_WARN("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
4067
- __func__, ctx->scratch.offs + data_size, ctx->scratch.size);
4068
- assert(false);
4069
- return NULL;
4070
- }
4071
-
4072
- data = (char * const) ctx->scratch.data + ctx->scratch.offs;
4073
-
4074
- ctx->scratch.offs += data_size;
4075
- } else {
4076
- // allocate tensor data in the context's memory pool
4077
- obj_alloc_size = data_size;
4078
- }
4079
  }
4080
 
4081
  struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
4082
  GGML_ASSERT(obj_new);
4083
 
4084
- // TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
4085
-
4086
  struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
4087
 
4088
  #ifdef __clang__
@@ -4178,24 +4124,16 @@ struct ggml_tensor * ggml_new_tensor_4d(
4178
  }
4179
 
4180
  struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) {
4181
- ggml_scratch_save(ctx);
4182
-
4183
  struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 1);
4184
 
4185
- ggml_scratch_load(ctx);
4186
-
4187
  ggml_set_i32(result, value);
4188
 
4189
  return result;
4190
  }
4191
 
4192
  struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value) {
4193
- ggml_scratch_save(ctx);
4194
-
4195
  struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
4196
 
4197
- ggml_scratch_load(ctx);
4198
-
4199
  ggml_set_f32(result, value);
4200
 
4201
  return result;
@@ -20263,7 +20201,6 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
20263
  uint64_t size_eval = 0;
20264
 
20265
  // compute size of intermediate results
20266
- // TODO: does not take into account scratch buffers !!!!
20267
  for (int i = 0; i < cgraph->n_nodes; ++i) {
20268
  size_eval += ggml_nbytes_pad(cgraph->nodes[i]);
20269
  }
 
2018
  void * mem_buffer;
2019
  bool mem_buffer_owned;
2020
  bool no_alloc;
 
2021
 
2022
  int n_objects;
2023
 
2024
  struct ggml_object * objects_begin;
2025
  struct ggml_object * objects_end;
 
 
 
2026
  };
2027
 
2028
  struct ggml_context_container {
 
3875
  /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size),
3876
  /*.mem_buffer_owned =*/ params.mem_buffer ? false : true,
3877
  /*.no_alloc =*/ params.no_alloc,
 
3878
  /*.n_objects =*/ 0,
3879
  /*.objects_begin =*/ NULL,
3880
  /*.objects_end =*/ NULL,
 
 
3881
  };
3882
 
3883
  GGML_ASSERT(ctx->mem_buffer != NULL);
 
3897
  ctx->n_objects = 0;
3898
  ctx->objects_begin = NULL;
3899
  ctx->objects_end = NULL;
 
 
3900
  }
3901
 
3902
  void ggml_free(struct ggml_context * ctx) {
 
3915
  return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size;
3916
  }
3917
 
 
 
 
 
 
 
 
 
3918
  bool ggml_get_no_alloc(struct ggml_context * ctx) {
3919
  return ctx->no_alloc;
3920
  }
 
3942
  return max_size;
3943
  }
3944
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3945
  ////////////////////////////////////////////////////////////////////////////////
3946
 
3947
  static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
 
4022
  size_t obj_alloc_size = 0;
4023
 
4024
  if (view_src == NULL && !ctx->no_alloc) {
4025
+ // allocate tensor data in the context's memory pool
4026
+ obj_alloc_size = data_size;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4027
  }
4028
 
4029
  struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
4030
  GGML_ASSERT(obj_new);
4031
 
 
 
4032
  struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
4033
 
4034
  #ifdef __clang__
 
4124
  }
4125
 
4126
  struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) {
 
 
4127
  struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 1);
4128
 
 
 
4129
  ggml_set_i32(result, value);
4130
 
4131
  return result;
4132
  }
4133
 
4134
  struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value) {
 
 
4135
  struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
4136
 
 
 
4137
  ggml_set_f32(result, value);
4138
 
4139
  return result;
 
20201
  uint64_t size_eval = 0;
20202
 
20203
  // compute size of intermediate results
 
20204
  for (int i = 0; i < cgraph->n_nodes; ++i) {
20205
  size_eval += ggml_nbytes_pad(cgraph->nodes[i]);
20206
  }