ggerganov commited on
Commit
c71bca8
·
unverified ·
1 Parent(s): 648fd74

whisper : fix extra memory usage after recent processor changes

Browse files

Had increased the memory buffer to the size of the model and forgot to
bring it down.

Files changed (1) hide show
  1. whisper.cpp +28 -20
whisper.cpp CHANGED
@@ -133,11 +133,19 @@ static const std::map<std::string, std::pair<int, std::string>> g_lang = {
133
  static const size_t MB = 1024*1024;
134
 
135
  static const std::map<e_model, size_t> MEM_REQ_MODEL = {
136
- { MODEL_TINY, 86ull*MB },
137
- { MODEL_BASE, 165ull*MB },
138
- { MODEL_SMALL, 540ull*MB },
139
- { MODEL_MEDIUM, 1650ull*MB },
140
- { MODEL_LARGE, 3260ull*MB },
 
 
 
 
 
 
 
 
141
  };
142
 
143
  static const std::map<e_model, size_t> MEM_REQ_ENCODE = {
@@ -498,7 +506,7 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
498
 
499
  wctx.buf_model = new std::vector<uint8_t>();
500
  wctx.buf_model->resize(MEM_REQ_MODEL.at(model.type));
501
- wctx.buf_memory.resize(std::max(MEM_REQ_MODEL.at(model.type), MEM_REQ_MODEL.at(model.type))); // TODO: TMP !!!
502
  wctx.buf_compute.resize(std::max(MEM_REQ_ENCODE.at(model.type), MEM_REQ_DECODE.at(model.type)));
503
  wctx.buf_compute_layer.resize(std::max(MEM_REQ_ENCODE_LAYER.at(model.type), MEM_REQ_DECODE_LAYER.at(model.type)));
504
 
@@ -722,20 +730,6 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
722
  }
723
  }
724
 
725
- // create the ggml memory context
726
- {
727
- struct ggml_init_params params = {
728
- .mem_size = wctx.buf_memory.size(),
729
- .mem_buffer = wctx.buf_memory.data(),
730
- };
731
-
732
- model.ctx_mem = ggml_init(params);
733
- if (!model.ctx_mem) {
734
- fprintf(stderr, "%s: ggml_init() failed\n", __func__);
735
- return false;
736
- }
737
- }
738
-
739
  // prepare memory for the weights
740
  {
741
  auto & ctx = model.ctx;
@@ -932,6 +926,20 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
932
  }
933
  }
934
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
935
  // key + value memory
936
  {
937
  auto & ctx = model.ctx_mem;
 
133
  static const size_t MB = 1024*1024;
134
 
135
  static const std::map<e_model, size_t> MEM_REQ_MODEL = {
136
+ { MODEL_TINY, 74ull*MB },
137
+ { MODEL_BASE, 142ull*MB },
138
+ { MODEL_SMALL, 466ull*MB },
139
+ { MODEL_MEDIUM, 1464ull*MB },
140
+ { MODEL_LARGE, 2952ull*MB },
141
+ };
142
+
143
+ static const std::map<e_model, size_t> MEM_REQ_MEMORY = {
144
+ { MODEL_TINY, 12ull*MB },
145
+ { MODEL_BASE, 24ull*MB },
146
+ { MODEL_SMALL, 70ull*MB },
147
+ { MODEL_MEDIUM, 184ull*MB },
148
+ { MODEL_LARGE, 306ull*MB },
149
  };
150
 
151
  static const std::map<e_model, size_t> MEM_REQ_ENCODE = {
 
506
 
507
  wctx.buf_model = new std::vector<uint8_t>();
508
  wctx.buf_model->resize(MEM_REQ_MODEL.at(model.type));
509
+ wctx.buf_memory.resize(MEM_REQ_MEMORY.at(model.type));
510
  wctx.buf_compute.resize(std::max(MEM_REQ_ENCODE.at(model.type), MEM_REQ_DECODE.at(model.type)));
511
  wctx.buf_compute_layer.resize(std::max(MEM_REQ_ENCODE_LAYER.at(model.type), MEM_REQ_DECODE_LAYER.at(model.type)));
512
 
 
730
  }
731
  }
732
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
733
  // prepare memory for the weights
734
  {
735
  auto & ctx = model.ctx;
 
926
  }
927
  }
928
 
929
+ // create the ggml memory context
930
+ {
931
+ struct ggml_init_params params = {
932
+ .mem_size = wctx.buf_memory.size(),
933
+ .mem_buffer = wctx.buf_memory.data(),
934
+ };
935
+
936
+ model.ctx_mem = ggml_init(params);
937
+ if (!model.ctx_mem) {
938
+ fprintf(stderr, "%s: ggml_init() failed\n", __func__);
939
+ return false;
940
+ }
941
+ }
942
+
943
  // key + value memory
944
  {
945
  auto & ctx = model.ctx_mem;