Spaces:
Running
Running
Revert "whisper : remove extra backend instance (huh?)" (#2182)
Browse files- whisper.cpp +15 -4
whisper.cpp
CHANGED
|
@@ -818,6 +818,8 @@ struct whisper_state {
|
|
| 818 |
|
| 819 |
whisper_decoder decoders[WHISPER_MAX_DECODERS];
|
| 820 |
|
|
|
|
|
|
|
| 821 |
// ggml-alloc:
|
| 822 |
// - stores meta info about the intermediate tensors into the `meta` buffers
|
| 823 |
// - stores the actual tensor data into the `data` buffers
|
|
@@ -2261,7 +2263,7 @@ static bool whisper_encode_internal(
|
|
| 2261 |
}
|
| 2262 |
|
| 2263 |
if (!whisper_encode_external(wstate)) {
|
| 2264 |
-
if (!ggml_graph_compute_helper(
|
| 2265 |
return false;
|
| 2266 |
}
|
| 2267 |
} else {
|
|
@@ -2284,7 +2286,7 @@ static bool whisper_encode_internal(
|
|
| 2284 |
return false;
|
| 2285 |
}
|
| 2286 |
|
| 2287 |
-
if (!ggml_graph_compute_helper(
|
| 2288 |
return false;
|
| 2289 |
}
|
| 2290 |
}
|
|
@@ -2300,7 +2302,7 @@ static bool whisper_encode_internal(
|
|
| 2300 |
return false;
|
| 2301 |
}
|
| 2302 |
|
| 2303 |
-
if (!ggml_graph_compute_helper(
|
| 2304 |
return false;
|
| 2305 |
}
|
| 2306 |
}
|
|
@@ -2801,7 +2803,7 @@ static bool whisper_decode_internal(
|
|
| 2801 |
|
| 2802 |
logits = gf->nodes[gf->n_nodes - 1];
|
| 2803 |
|
| 2804 |
-
if (!ggml_graph_compute_helper(
|
| 2805 |
return false;
|
| 2806 |
}
|
| 2807 |
}
|
|
@@ -3248,6 +3250,13 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
|
|
| 3248 |
|
| 3249 |
whisper_state * state = new whisper_state;
|
| 3250 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3251 |
// at this point, we don't know yet how many decoders will be used, so we overallocate 3x ctx
|
| 3252 |
// in theory, there can be a case where this is not enough, but in practice it should always be enough
|
| 3253 |
const int factor = 3;
|
|
@@ -3684,6 +3693,8 @@ void whisper_free_state(struct whisper_state * state) {
|
|
| 3684 |
ggml_gallocr_free(state->alloc_cross.alloc);
|
| 3685 |
ggml_gallocr_free(state->alloc_decode.alloc);
|
| 3686 |
|
|
|
|
|
|
|
| 3687 |
// [EXPERIMENTAL] Token-level timestamps with DTW
|
| 3688 |
aheads_masks_free(state->aheads_masks);
|
| 3689 |
|
|
|
|
| 818 |
|
| 819 |
whisper_decoder decoders[WHISPER_MAX_DECODERS];
|
| 820 |
|
| 821 |
+
ggml_backend_t backend = nullptr;
|
| 822 |
+
|
| 823 |
// ggml-alloc:
|
| 824 |
// - stores meta info about the intermediate tensors into the `meta` buffers
|
| 825 |
// - stores the actual tensor data into the `data` buffers
|
|
|
|
| 2263 |
}
|
| 2264 |
|
| 2265 |
if (!whisper_encode_external(wstate)) {
|
| 2266 |
+
if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
|
| 2267 |
return false;
|
| 2268 |
}
|
| 2269 |
} else {
|
|
|
|
| 2286 |
return false;
|
| 2287 |
}
|
| 2288 |
|
| 2289 |
+
if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
|
| 2290 |
return false;
|
| 2291 |
}
|
| 2292 |
}
|
|
|
|
| 2302 |
return false;
|
| 2303 |
}
|
| 2304 |
|
| 2305 |
+
if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
|
| 2306 |
return false;
|
| 2307 |
}
|
| 2308 |
}
|
|
|
|
| 2803 |
|
| 2804 |
logits = gf->nodes[gf->n_nodes - 1];
|
| 2805 |
|
| 2806 |
+
if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
|
| 2807 |
return false;
|
| 2808 |
}
|
| 2809 |
}
|
|
|
|
| 3250 |
|
| 3251 |
whisper_state * state = new whisper_state;
|
| 3252 |
|
| 3253 |
+
state->backend = whisper_backend_init(ctx->params);
|
| 3254 |
+
if (!state->backend) {
|
| 3255 |
+
WHISPER_LOG_ERROR("%s: whisper_backend_init() failed\n", __func__);
|
| 3256 |
+
whisper_free_state(state);
|
| 3257 |
+
return nullptr;
|
| 3258 |
+
}
|
| 3259 |
+
|
| 3260 |
// at this point, we don't know yet how many decoders will be used, so we overallocate 3x ctx
|
| 3261 |
// in theory, there can be a case where this is not enough, but in practice it should always be enough
|
| 3262 |
const int factor = 3;
|
|
|
|
| 3693 |
ggml_gallocr_free(state->alloc_cross.alloc);
|
| 3694 |
ggml_gallocr_free(state->alloc_decode.alloc);
|
| 3695 |
|
| 3696 |
+
ggml_backend_free(state->backend);
|
| 3697 |
+
|
| 3698 |
// [EXPERIMENTAL] Token-level timestamps with DTW
|
| 3699 |
aheads_masks_free(state->aheads_masks);
|
| 3700 |
|