ggerganov commited on
Commit
b708d81
·
unverified ·
1 Parent(s): 18c60fc

Revert "whisper : remove extra backend instance (huh?)" (#2182)

Browse files
Files changed (1) hide show
  1. whisper.cpp +15 -4
whisper.cpp CHANGED
@@ -818,6 +818,8 @@ struct whisper_state {
818
 
819
  whisper_decoder decoders[WHISPER_MAX_DECODERS];
820
 
 
 
821
  // ggml-alloc:
822
  // - stores meta info about the intermediate tensors into the `meta` buffers
823
  // - stores the actual tensor data into the `data` buffers
@@ -2261,7 +2263,7 @@ static bool whisper_encode_internal(
2261
  }
2262
 
2263
  if (!whisper_encode_external(wstate)) {
2264
- if (!ggml_graph_compute_helper(wctx.backend, gf, n_threads)) {
2265
  return false;
2266
  }
2267
  } else {
@@ -2284,7 +2286,7 @@ static bool whisper_encode_internal(
2284
  return false;
2285
  }
2286
 
2287
- if (!ggml_graph_compute_helper(wctx.backend, gf, n_threads)) {
2288
  return false;
2289
  }
2290
  }
@@ -2300,7 +2302,7 @@ static bool whisper_encode_internal(
2300
  return false;
2301
  }
2302
 
2303
- if (!ggml_graph_compute_helper(wctx.backend, gf, n_threads)) {
2304
  return false;
2305
  }
2306
  }
@@ -2801,7 +2803,7 @@ static bool whisper_decode_internal(
2801
 
2802
  logits = gf->nodes[gf->n_nodes - 1];
2803
 
2804
- if (!ggml_graph_compute_helper(wctx.backend, gf, n_threads)) {
2805
  return false;
2806
  }
2807
  }
@@ -3248,6 +3250,13 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
3248
 
3249
  whisper_state * state = new whisper_state;
3250
 
 
 
 
 
 
 
 
3251
  // at this point, we don't know yet how many decoders will be used, so we overallocate 3x ctx
3252
  // in theory, there can be a case where this is not enough, but in practice it should always be enough
3253
  const int factor = 3;
@@ -3684,6 +3693,8 @@ void whisper_free_state(struct whisper_state * state) {
3684
  ggml_gallocr_free(state->alloc_cross.alloc);
3685
  ggml_gallocr_free(state->alloc_decode.alloc);
3686
 
 
 
3687
  // [EXPERIMENTAL] Token-level timestamps with DTW
3688
  aheads_masks_free(state->aheads_masks);
3689
 
 
818
 
819
  whisper_decoder decoders[WHISPER_MAX_DECODERS];
820
 
821
+ ggml_backend_t backend = nullptr;
822
+
823
  // ggml-alloc:
824
  // - stores meta info about the intermediate tensors into the `meta` buffers
825
  // - stores the actual tensor data into the `data` buffers
 
2263
  }
2264
 
2265
  if (!whisper_encode_external(wstate)) {
2266
+ if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
2267
  return false;
2268
  }
2269
  } else {
 
2286
  return false;
2287
  }
2288
 
2289
+ if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
2290
  return false;
2291
  }
2292
  }
 
2302
  return false;
2303
  }
2304
 
2305
+ if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
2306
  return false;
2307
  }
2308
  }
 
2803
 
2804
  logits = gf->nodes[gf->n_nodes - 1];
2805
 
2806
+ if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
2807
  return false;
2808
  }
2809
  }
 
3250
 
3251
  whisper_state * state = new whisper_state;
3252
 
3253
+ state->backend = whisper_backend_init(ctx->params);
3254
+ if (!state->backend) {
3255
+ WHISPER_LOG_ERROR("%s: whisper_backend_init() failed\n", __func__);
3256
+ whisper_free_state(state);
3257
+ return nullptr;
3258
+ }
3259
+
3260
  // at this point, we don't know yet how many decoders will be used, so we overallocate 3x ctx
3261
  // in theory, there can be a case where this is not enough, but in practice it should always be enough
3262
  const int factor = 3;
 
3693
  ggml_gallocr_free(state->alloc_cross.alloc);
3694
  ggml_gallocr_free(state->alloc_decode.alloc);
3695
 
3696
+ ggml_backend_free(state->backend);
3697
+
3698
  // [EXPERIMENTAL] Token-level timestamps with DTW
3699
  aheads_masks_free(state->aheads_masks);
3700