Spaces:
Running
Running
whisper : fix extra memory usage after recent processor changes
Browse filesHad increased the memory buffer to the size of the model and forgot to
bring it down.
- whisper.cpp +28 -20
whisper.cpp
CHANGED
|
@@ -133,11 +133,19 @@ static const std::map<std::string, std::pair<int, std::string>> g_lang = {
|
|
| 133 |
static const size_t MB = 1024*1024;
|
| 134 |
|
| 135 |
static const std::map<e_model, size_t> MEM_REQ_MODEL = {
|
| 136 |
-
{ MODEL_TINY,
|
| 137 |
-
{ MODEL_BASE,
|
| 138 |
-
{ MODEL_SMALL,
|
| 139 |
-
{ MODEL_MEDIUM,
|
| 140 |
-
{ MODEL_LARGE,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
};
|
| 142 |
|
| 143 |
static const std::map<e_model, size_t> MEM_REQ_ENCODE = {
|
|
@@ -498,7 +506,7 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
|
|
| 498 |
|
| 499 |
wctx.buf_model = new std::vector<uint8_t>();
|
| 500 |
wctx.buf_model->resize(MEM_REQ_MODEL.at(model.type));
|
| 501 |
-
wctx.buf_memory.resize(
|
| 502 |
wctx.buf_compute.resize(std::max(MEM_REQ_ENCODE.at(model.type), MEM_REQ_DECODE.at(model.type)));
|
| 503 |
wctx.buf_compute_layer.resize(std::max(MEM_REQ_ENCODE_LAYER.at(model.type), MEM_REQ_DECODE_LAYER.at(model.type)));
|
| 504 |
|
|
@@ -722,20 +730,6 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
|
|
| 722 |
}
|
| 723 |
}
|
| 724 |
|
| 725 |
-
// create the ggml memory context
|
| 726 |
-
{
|
| 727 |
-
struct ggml_init_params params = {
|
| 728 |
-
.mem_size = wctx.buf_memory.size(),
|
| 729 |
-
.mem_buffer = wctx.buf_memory.data(),
|
| 730 |
-
};
|
| 731 |
-
|
| 732 |
-
model.ctx_mem = ggml_init(params);
|
| 733 |
-
if (!model.ctx_mem) {
|
| 734 |
-
fprintf(stderr, "%s: ggml_init() failed\n", __func__);
|
| 735 |
-
return false;
|
| 736 |
-
}
|
| 737 |
-
}
|
| 738 |
-
|
| 739 |
// prepare memory for the weights
|
| 740 |
{
|
| 741 |
auto & ctx = model.ctx;
|
|
@@ -932,6 +926,20 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
|
|
| 932 |
}
|
| 933 |
}
|
| 934 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 935 |
// key + value memory
|
| 936 |
{
|
| 937 |
auto & ctx = model.ctx_mem;
|
|
|
|
| 133 |
static const size_t MB = 1024*1024;
|
| 134 |
|
| 135 |
static const std::map<e_model, size_t> MEM_REQ_MODEL = {
|
| 136 |
+
{ MODEL_TINY, 74ull*MB },
|
| 137 |
+
{ MODEL_BASE, 142ull*MB },
|
| 138 |
+
{ MODEL_SMALL, 466ull*MB },
|
| 139 |
+
{ MODEL_MEDIUM, 1464ull*MB },
|
| 140 |
+
{ MODEL_LARGE, 2952ull*MB },
|
| 141 |
+
};
|
| 142 |
+
|
| 143 |
+
static const std::map<e_model, size_t> MEM_REQ_MEMORY = {
|
| 144 |
+
{ MODEL_TINY, 12ull*MB },
|
| 145 |
+
{ MODEL_BASE, 24ull*MB },
|
| 146 |
+
{ MODEL_SMALL, 70ull*MB },
|
| 147 |
+
{ MODEL_MEDIUM, 184ull*MB },
|
| 148 |
+
{ MODEL_LARGE, 306ull*MB },
|
| 149 |
};
|
| 150 |
|
| 151 |
static const std::map<e_model, size_t> MEM_REQ_ENCODE = {
|
|
|
|
| 506 |
|
| 507 |
wctx.buf_model = new std::vector<uint8_t>();
|
| 508 |
wctx.buf_model->resize(MEM_REQ_MODEL.at(model.type));
|
| 509 |
+
wctx.buf_memory.resize(MEM_REQ_MEMORY.at(model.type));
|
| 510 |
wctx.buf_compute.resize(std::max(MEM_REQ_ENCODE.at(model.type), MEM_REQ_DECODE.at(model.type)));
|
| 511 |
wctx.buf_compute_layer.resize(std::max(MEM_REQ_ENCODE_LAYER.at(model.type), MEM_REQ_DECODE_LAYER.at(model.type)));
|
| 512 |
|
|
|
|
| 730 |
}
|
| 731 |
}
|
| 732 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 733 |
// prepare memory for the weights
|
| 734 |
{
|
| 735 |
auto & ctx = model.ctx;
|
|
|
|
| 926 |
}
|
| 927 |
}
|
| 928 |
|
| 929 |
+
// create the ggml memory context
|
| 930 |
+
{
|
| 931 |
+
struct ggml_init_params params = {
|
| 932 |
+
.mem_size = wctx.buf_memory.size(),
|
| 933 |
+
.mem_buffer = wctx.buf_memory.data(),
|
| 934 |
+
};
|
| 935 |
+
|
| 936 |
+
model.ctx_mem = ggml_init(params);
|
| 937 |
+
if (!model.ctx_mem) {
|
| 938 |
+
fprintf(stderr, "%s: ggml_init() failed\n", __func__);
|
| 939 |
+
return false;
|
| 940 |
+
}
|
| 941 |
+
}
|
| 942 |
+
|
| 943 |
// key + value memory
|
| 944 |
{
|
| 945 |
auto & ctx = model.ctx_mem;
|