whisper.cpp

Running

App Files Files Community

ggerganov commited on Jul 4, 2023

Commit

fc78e6a

unverified ·

1 Parent(s): 4528b8c

whisper : minor OpenVINO refactoring (#1037)

Browse files

Hopefully I didn't break something - haven't tested

Files changed (5) hide show

examples/common.cpp +5 -0
examples/common.h +8 -6
examples/main/main.cpp +1 -1
whisper.cpp +33 -36
whisper.h +1 -2

examples/common.cpp CHANGED Viewed

@@ -47,6 +47,11 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
             params.n_batch = std::stoi(argv[++i]);
         } else if (arg == "-m" || arg == "--model") {
             params.model = argv[++i];
         } else if (arg == "-h" || arg == "--help") {
             gpt_print_usage(argc, argv, params);
             exit(0);

             params.n_batch = std::stoi(argv[++i]);
         } else if (arg == "-m" || arg == "--model") {
             params.model = argv[++i];
+        } else if (arg == "-i" || arg == "--interactive") {
+            params.interactive = true;
+        } else if (arg == "-ip" || arg == "--interactive-port") {
+            params.interactive = true;
+            params.interactive_port = std::stoi(argv[++i]);
         } else if (arg == "-h" || arg == "--help") {
             gpt_print_usage(argc, argv, params);
             exit(0);

examples/common.h CHANGED Viewed

@@ -15,22 +15,24 @@
 //
 struct gpt_params {
-    int32_t seed      = -1; // RNG seed
     int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
     int32_t n_predict = 200; // new tokens to predict
     // sampling parameters
-    int32_t top_k = 40;
-    float   top_p = 0.9f;
-    float   temp  = 0.9f;
     int32_t repeat_last_n  = 64;
     float   repeat_penalty = 1.00f;
-    int32_t n_batch = 8; // batch size for prompt processing
     std::string model      = "models/gpt-2-117M/ggml-model.bin"; // model path
     std::string prompt     = "";
     std::string token_test = "";
 };
 bool gpt_params_parse(int argc, char ** argv, gpt_params & params);

 //
 struct gpt_params {
+    int32_t seed      = -1;  // RNG seed
     int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
     int32_t n_predict = 200; // new tokens to predict
+    int32_t n_batch   = 8;   // batch size for prompt processing
     // sampling parameters
+    int32_t top_k          = 40;
+    float   top_p          = 0.9f;
+    float   temp           = 0.9f;
     int32_t repeat_last_n  = 64;
     float   repeat_penalty = 1.00f;
     std::string model      = "models/gpt-2-117M/ggml-model.bin"; // model path
     std::string prompt     = "";
     std::string token_test = "";
+    bool    interactive      = false;
+    int32_t interactive_port = -1;
 };
 bool gpt_params_parse(int argc, char ** argv, gpt_params & params);

examples/main/main.cpp CHANGED Viewed

@@ -813,7 +813,7 @@ int main(int argc, char ** argv) {
         return 3;
     }
-    // initialize openvino encoder. This has no effect on whisper.cpp builds that don't have OpenVINO configured.
     whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr);
     for (int f = 0; f < (int) params.fname_inp.size(); ++f) {

         return 3;
     }
+    // initialize openvino encoder. this has no effect on whisper.cpp builds that don't have OpenVINO configured
     whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr);
     for (int f = 0; f < (int) params.fname_inp.size(); ++f) {

whisper.cpp CHANGED Viewed

@@ -2654,7 +2654,7 @@ static std::string whisper_get_coreml_path_encoder(std::string path_bin) {
 #ifdef WHISPER_USE_OPENVINO
 // replace .bin with-encoder-openvino.xml
-static std::string whisper_get_openvino_path_encoder(std::string path_bin) {
     auto pos = path_bin.rfind('.');
     if (pos != std::string::npos) {
         path_bin = path_bin.substr(0, pos);
@@ -2665,7 +2665,7 @@ static std::string whisper_get_openvino_path_encoder(std::string path_bin) {
     return path_bin;
 }
-static std::string whisper_get_openvino_path_cache(std::string path_bin) {
     auto pos = path_bin.rfind('.');
     if (pos != std::string::npos) {
         path_bin = path_bin.substr(0, pos);
@@ -2743,55 +2743,52 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
     return state;
 }
-int whisper_ctx_init_openvino_encoder(struct whisper_context* ctx,
-    const char* openvino_model_path,
-    const char* openvino_device,
-    const char* openvino_cache_dir)
-{
 #ifndef WHISPER_USE_OPENVINO
     (void)(ctx);
-    (void)(openvino_model_path);
-    (void)(openvino_device);
-    (void)(openvino_cache_dir);
-    return 0;
 #else
-    if (!openvino_model_path && ctx->path_model.empty())
-    {
-        fprintf(stderr, "%s: openvino_model_path is nullptr, and ctx has no model_path set.\n", __func__);
-        return 0;
     }
-    std::string path_openvino;
-    if (!openvino_model_path) {
-        //if openvino_model_path is not set, attempt to find it in the same directory as ggml-<model>.bin model
-        path_openvino = whisper_get_openvino_path_encoder(ctx->path_model);
-    }
-    else {
-        path_openvino = openvino_model_path;
     }
-    std::string path_openvino_cache_dir;
-    if (!openvino_cache_dir) {
-        //if openvino_cache_dir is not set, set it as a dir residing next to ggml-<model>.bin
-        path_openvino_cache_dir = whisper_get_openvino_path_cache(ctx->path_model);
-    }
-    else {
-        path_openvino_cache_dir = openvino_cache_dir;
     }
-    fprintf(stderr, "%s: loading OpenVINO model from '%s'\n", __func__, path_openvino.c_str());
     fprintf(stderr, "%s: first run on a device may take a while ...\n", __func__);
-    ctx->state->ctx_openvino = whisper_openvino_init(path_openvino.c_str(), openvino_device, path_openvino_cache_dir.c_str());
     if (!ctx->state->ctx_openvino) {
-        fprintf(stderr, "%s: failed to init OpenVINO encoder from '%s'\n", __func__, path_openvino.c_str());
-        return 0;
-    }
-    else {
         fprintf(stderr, "%s: OpenVINO model loaded\n", __func__);
     }
-    return 1;
 #endif
 }

 #ifdef WHISPER_USE_OPENVINO
 // replace .bin with-encoder-openvino.xml
+static std::string whisper_openvino_get_path_encoder(std::string path_bin) {
     auto pos = path_bin.rfind('.');
     if (pos != std::string::npos) {
         path_bin = path_bin.substr(0, pos);
     return path_bin;
 }
+static std::string whisper_openvino_get_path_cache(std::string path_bin) {
     auto pos = path_bin.rfind('.');
     if (pos != std::string::npos) {
         path_bin = path_bin.substr(0, pos);
     return state;
 }
+int whisper_ctx_init_openvino_encoder(
+        struct whisper_context * ctx,
+                    const char * model_path,
+                    const char * device,
+                    const char * cache_dir) {
 #ifndef WHISPER_USE_OPENVINO
     (void)(ctx);
+    (void)(model_path);
+    (void)(device);
+    (void)(cache_dir);
+    return 1;
 #else
+    if (!model_path && ctx->path_model.empty()) {
+        fprintf(stderr, "%s: model_path is nullptr, and ctx has no model_path set.\n", __func__);
+        return 1;
     }
+    std::string path_encoder;
+    if (!model_path) {
+        //if model_path is not set, attempt to find it in the same directory as ggml-<model>.bin model
+        path_encoder = whisper_openvino_get_path_encoder(ctx->path_model);
+    } else {
+        path_encoder = model_path;
     }
+    std::string path_cache;
+    if (!cache_dir) {
+        //if cache_dir is not set, set it as a dir residing next to ggml-<model>.bin
+        path_cache = whisper_openvino_get_path_cache(ctx->path_model);
+    } else {
+        path_cache = cache_dir;
     }
+    fprintf(stderr, "%s: loading OpenVINO model from '%s'\n", __func__, path_encoder.c_str());
     fprintf(stderr, "%s: first run on a device may take a while ...\n", __func__);
+    ctx->state->ctx_openvino = whisper_openvino_init(path_encoder.c_str(), device, path_cache.c_str());
     if (!ctx->state->ctx_openvino) {
+        fprintf(stderr, "%s: failed to init OpenVINO encoder from '%s'\n", __func__, path_encoder.c_str());
+        return 1;
+    } else {
         fprintf(stderr, "%s: OpenVINO model loaded\n", __func__);
     }
+    return 0;
 #endif
 }

whisper.h CHANGED Viewed

@@ -120,8 +120,7 @@ extern "C" {
     // cache_dir: Optional cache directory that can speed up init time, especially for
     //                     GPU, by caching compiled 'blobs' there.
     //                     Set to nullptr if not used.
-    // Returns 1 on success. If OpenVINO is not enabled in build, this
-    // simply returns 0.
     WHISPER_API int whisper_ctx_init_openvino_encoder(
         struct whisper_context * ctx,
                     const char * model_path,

     // cache_dir: Optional cache directory that can speed up init time, especially for
     //                     GPU, by caching compiled 'blobs' there.
     //                     Set to nullptr if not used.
+    // Returns 0 on success. If OpenVINO is not enabled in build, this simply returns 1.
     WHISPER_API int whisper_ctx_init_openvino_encoder(
         struct whisper_context * ctx,
                     const char * model_path,