ggerganov commited on
Commit
fc78e6a
·
unverified ·
1 Parent(s): 4528b8c

whisper : minor OpenVINO refactoring (#1037)

Browse files

Hopefully I didn't break something - haven't tested

Files changed (5) hide show
  1. examples/common.cpp +5 -0
  2. examples/common.h +8 -6
  3. examples/main/main.cpp +1 -1
  4. whisper.cpp +33 -36
  5. whisper.h +1 -2
examples/common.cpp CHANGED
@@ -47,6 +47,11 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
47
  params.n_batch = std::stoi(argv[++i]);
48
  } else if (arg == "-m" || arg == "--model") {
49
  params.model = argv[++i];
 
 
 
 
 
50
  } else if (arg == "-h" || arg == "--help") {
51
  gpt_print_usage(argc, argv, params);
52
  exit(0);
 
47
  params.n_batch = std::stoi(argv[++i]);
48
  } else if (arg == "-m" || arg == "--model") {
49
  params.model = argv[++i];
50
+ } else if (arg == "-i" || arg == "--interactive") {
51
+ params.interactive = true;
52
+ } else if (arg == "-ip" || arg == "--interactive-port") {
53
+ params.interactive = true;
54
+ params.interactive_port = std::stoi(argv[++i]);
55
  } else if (arg == "-h" || arg == "--help") {
56
  gpt_print_usage(argc, argv, params);
57
  exit(0);
examples/common.h CHANGED
@@ -15,22 +15,24 @@
15
  //
16
 
17
  struct gpt_params {
18
- int32_t seed = -1; // RNG seed
19
  int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
20
  int32_t n_predict = 200; // new tokens to predict
 
21
 
22
  // sampling parameters
23
- int32_t top_k = 40;
24
- float top_p = 0.9f;
25
- float temp = 0.9f;
26
  int32_t repeat_last_n = 64;
27
  float repeat_penalty = 1.00f;
28
 
29
- int32_t n_batch = 8; // batch size for prompt processing
30
-
31
  std::string model = "models/gpt-2-117M/ggml-model.bin"; // model path
32
  std::string prompt = "";
33
  std::string token_test = "";
 
 
 
34
  };
35
 
36
  bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
 
15
  //
16
 
17
  struct gpt_params {
18
+ int32_t seed = -1; // RNG seed
19
  int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
20
  int32_t n_predict = 200; // new tokens to predict
21
+ int32_t n_batch = 8; // batch size for prompt processing
22
 
23
  // sampling parameters
24
+ int32_t top_k = 40;
25
+ float top_p = 0.9f;
26
+ float temp = 0.9f;
27
  int32_t repeat_last_n = 64;
28
  float repeat_penalty = 1.00f;
29
 
 
 
30
  std::string model = "models/gpt-2-117M/ggml-model.bin"; // model path
31
  std::string prompt = "";
32
  std::string token_test = "";
33
+
34
+ bool interactive = false;
35
+ int32_t interactive_port = -1;
36
  };
37
 
38
  bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
examples/main/main.cpp CHANGED
@@ -813,7 +813,7 @@ int main(int argc, char ** argv) {
813
  return 3;
814
  }
815
 
816
- // initialize openvino encoder. This has no effect on whisper.cpp builds that don't have OpenVINO configured.
817
  whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr);
818
 
819
  for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
 
813
  return 3;
814
  }
815
 
816
+ // initialize openvino encoder. this has no effect on whisper.cpp builds that don't have OpenVINO configured
817
  whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr);
818
 
819
  for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
whisper.cpp CHANGED
@@ -2654,7 +2654,7 @@ static std::string whisper_get_coreml_path_encoder(std::string path_bin) {
2654
 
2655
  #ifdef WHISPER_USE_OPENVINO
2656
  // replace .bin with-encoder-openvino.xml
2657
- static std::string whisper_get_openvino_path_encoder(std::string path_bin) {
2658
  auto pos = path_bin.rfind('.');
2659
  if (pos != std::string::npos) {
2660
  path_bin = path_bin.substr(0, pos);
@@ -2665,7 +2665,7 @@ static std::string whisper_get_openvino_path_encoder(std::string path_bin) {
2665
  return path_bin;
2666
  }
2667
 
2668
- static std::string whisper_get_openvino_path_cache(std::string path_bin) {
2669
  auto pos = path_bin.rfind('.');
2670
  if (pos != std::string::npos) {
2671
  path_bin = path_bin.substr(0, pos);
@@ -2743,55 +2743,52 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
2743
  return state;
2744
  }
2745
 
2746
- int whisper_ctx_init_openvino_encoder(struct whisper_context* ctx,
2747
- const char* openvino_model_path,
2748
- const char* openvino_device,
2749
- const char* openvino_cache_dir)
2750
- {
2751
  #ifndef WHISPER_USE_OPENVINO
2752
  (void)(ctx);
2753
- (void)(openvino_model_path);
2754
- (void)(openvino_device);
2755
- (void)(openvino_cache_dir);
2756
- return 0;
 
2757
  #else
2758
- if (!openvino_model_path && ctx->path_model.empty())
2759
- {
2760
- fprintf(stderr, "%s: openvino_model_path is nullptr, and ctx has no model_path set.\n", __func__);
2761
- return 0;
2762
  }
2763
 
2764
- std::string path_openvino;
2765
- if (!openvino_model_path) {
2766
- //if openvino_model_path is not set, attempt to find it in the same directory as ggml-<model>.bin model
2767
- path_openvino = whisper_get_openvino_path_encoder(ctx->path_model);
2768
- }
2769
- else {
2770
- path_openvino = openvino_model_path;
2771
  }
2772
 
2773
- std::string path_openvino_cache_dir;
2774
- if (!openvino_cache_dir) {
2775
- //if openvino_cache_dir is not set, set it as a dir residing next to ggml-<model>.bin
2776
- path_openvino_cache_dir = whisper_get_openvino_path_cache(ctx->path_model);
2777
- }
2778
- else {
2779
- path_openvino_cache_dir = openvino_cache_dir;
2780
  }
2781
 
2782
- fprintf(stderr, "%s: loading OpenVINO model from '%s'\n", __func__, path_openvino.c_str());
2783
  fprintf(stderr, "%s: first run on a device may take a while ...\n", __func__);
2784
 
2785
- ctx->state->ctx_openvino = whisper_openvino_init(path_openvino.c_str(), openvino_device, path_openvino_cache_dir.c_str());
2786
  if (!ctx->state->ctx_openvino) {
2787
- fprintf(stderr, "%s: failed to init OpenVINO encoder from '%s'\n", __func__, path_openvino.c_str());
2788
- return 0;
2789
- }
2790
- else {
2791
  fprintf(stderr, "%s: OpenVINO model loaded\n", __func__);
2792
  }
2793
 
2794
- return 1;
2795
  #endif
2796
  }
2797
 
 
2654
 
2655
  #ifdef WHISPER_USE_OPENVINO
2656
  // replace .bin with-encoder-openvino.xml
2657
+ static std::string whisper_openvino_get_path_encoder(std::string path_bin) {
2658
  auto pos = path_bin.rfind('.');
2659
  if (pos != std::string::npos) {
2660
  path_bin = path_bin.substr(0, pos);
 
2665
  return path_bin;
2666
  }
2667
 
2668
+ static std::string whisper_openvino_get_path_cache(std::string path_bin) {
2669
  auto pos = path_bin.rfind('.');
2670
  if (pos != std::string::npos) {
2671
  path_bin = path_bin.substr(0, pos);
 
2743
  return state;
2744
  }
2745
 
2746
+ int whisper_ctx_init_openvino_encoder(
2747
+ struct whisper_context * ctx,
2748
+ const char * model_path,
2749
+ const char * device,
2750
+ const char * cache_dir) {
2751
  #ifndef WHISPER_USE_OPENVINO
2752
  (void)(ctx);
2753
+ (void)(model_path);
2754
+ (void)(device);
2755
+ (void)(cache_dir);
2756
+
2757
+ return 1;
2758
  #else
2759
+ if (!model_path && ctx->path_model.empty()) {
2760
+ fprintf(stderr, "%s: model_path is nullptr, and ctx has no model_path set.\n", __func__);
2761
+ return 1;
 
2762
  }
2763
 
2764
+ std::string path_encoder;
2765
+ if (!model_path) {
2766
+ //if model_path is not set, attempt to find it in the same directory as ggml-<model>.bin model
2767
+ path_encoder = whisper_openvino_get_path_encoder(ctx->path_model);
2768
+ } else {
2769
+ path_encoder = model_path;
 
2770
  }
2771
 
2772
+ std::string path_cache;
2773
+ if (!cache_dir) {
2774
+ //if cache_dir is not set, set it as a dir residing next to ggml-<model>.bin
2775
+ path_cache = whisper_openvino_get_path_cache(ctx->path_model);
2776
+ } else {
2777
+ path_cache = cache_dir;
 
2778
  }
2779
 
2780
+ fprintf(stderr, "%s: loading OpenVINO model from '%s'\n", __func__, path_encoder.c_str());
2781
  fprintf(stderr, "%s: first run on a device may take a while ...\n", __func__);
2782
 
2783
+ ctx->state->ctx_openvino = whisper_openvino_init(path_encoder.c_str(), device, path_cache.c_str());
2784
  if (!ctx->state->ctx_openvino) {
2785
+ fprintf(stderr, "%s: failed to init OpenVINO encoder from '%s'\n", __func__, path_encoder.c_str());
2786
+ return 1;
2787
+ } else {
 
2788
  fprintf(stderr, "%s: OpenVINO model loaded\n", __func__);
2789
  }
2790
 
2791
+ return 0;
2792
  #endif
2793
  }
2794
 
whisper.h CHANGED
@@ -120,8 +120,7 @@ extern "C" {
120
  // cache_dir: Optional cache directory that can speed up init time, especially for
121
  // GPU, by caching compiled 'blobs' there.
122
  // Set to nullptr if not used.
123
- // Returns 1 on success. If OpenVINO is not enabled in build, this
124
- // simply returns 0.
125
  WHISPER_API int whisper_ctx_init_openvino_encoder(
126
  struct whisper_context * ctx,
127
  const char * model_path,
 
120
  // cache_dir: Optional cache directory that can speed up init time, especially for
121
  // GPU, by caching compiled 'blobs' there.
122
  // Set to nullptr if not used.
123
+ // Returns 0 on success. If OpenVINO is not enabled in build, this simply returns 1.
 
124
  WHISPER_API int whisper_ctx_init_openvino_encoder(
125
  struct whisper_context * ctx,
126
  const char * model_path,