Spaces:
Running
Running
whisper : minor OpenVINO refactoring (#1037)
Browse filesHopefully I didn't break something - haven't tested
- examples/common.cpp +5 -0
- examples/common.h +8 -6
- examples/main/main.cpp +1 -1
- whisper.cpp +33 -36
- whisper.h +1 -2
examples/common.cpp
CHANGED
|
@@ -47,6 +47,11 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
|
| 47 |
params.n_batch = std::stoi(argv[++i]);
|
| 48 |
} else if (arg == "-m" || arg == "--model") {
|
| 49 |
params.model = argv[++i];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
} else if (arg == "-h" || arg == "--help") {
|
| 51 |
gpt_print_usage(argc, argv, params);
|
| 52 |
exit(0);
|
|
|
|
| 47 |
params.n_batch = std::stoi(argv[++i]);
|
| 48 |
} else if (arg == "-m" || arg == "--model") {
|
| 49 |
params.model = argv[++i];
|
| 50 |
+
} else if (arg == "-i" || arg == "--interactive") {
|
| 51 |
+
params.interactive = true;
|
| 52 |
+
} else if (arg == "-ip" || arg == "--interactive-port") {
|
| 53 |
+
params.interactive = true;
|
| 54 |
+
params.interactive_port = std::stoi(argv[++i]);
|
| 55 |
} else if (arg == "-h" || arg == "--help") {
|
| 56 |
gpt_print_usage(argc, argv, params);
|
| 57 |
exit(0);
|
examples/common.h
CHANGED
|
@@ -15,22 +15,24 @@
|
|
| 15 |
//
|
| 16 |
|
| 17 |
struct gpt_params {
|
| 18 |
-
int32_t seed = -1;
|
| 19 |
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
| 20 |
int32_t n_predict = 200; // new tokens to predict
|
|
|
|
| 21 |
|
| 22 |
// sampling parameters
|
| 23 |
-
int32_t top_k
|
| 24 |
-
float top_p
|
| 25 |
-
float temp
|
| 26 |
int32_t repeat_last_n = 64;
|
| 27 |
float repeat_penalty = 1.00f;
|
| 28 |
|
| 29 |
-
int32_t n_batch = 8; // batch size for prompt processing
|
| 30 |
-
|
| 31 |
std::string model = "models/gpt-2-117M/ggml-model.bin"; // model path
|
| 32 |
std::string prompt = "";
|
| 33 |
std::string token_test = "";
|
|
|
|
|
|
|
|
|
|
| 34 |
};
|
| 35 |
|
| 36 |
bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
|
|
|
|
| 15 |
//
|
| 16 |
|
| 17 |
struct gpt_params {
|
| 18 |
+
int32_t seed = -1; // RNG seed
|
| 19 |
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
| 20 |
int32_t n_predict = 200; // new tokens to predict
|
| 21 |
+
int32_t n_batch = 8; // batch size for prompt processing
|
| 22 |
|
| 23 |
// sampling parameters
|
| 24 |
+
int32_t top_k = 40;
|
| 25 |
+
float top_p = 0.9f;
|
| 26 |
+
float temp = 0.9f;
|
| 27 |
int32_t repeat_last_n = 64;
|
| 28 |
float repeat_penalty = 1.00f;
|
| 29 |
|
|
|
|
|
|
|
| 30 |
std::string model = "models/gpt-2-117M/ggml-model.bin"; // model path
|
| 31 |
std::string prompt = "";
|
| 32 |
std::string token_test = "";
|
| 33 |
+
|
| 34 |
+
bool interactive = false;
|
| 35 |
+
int32_t interactive_port = -1;
|
| 36 |
};
|
| 37 |
|
| 38 |
bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
|
examples/main/main.cpp
CHANGED
|
@@ -813,7 +813,7 @@ int main(int argc, char ** argv) {
|
|
| 813 |
return 3;
|
| 814 |
}
|
| 815 |
|
| 816 |
-
// initialize openvino encoder.
|
| 817 |
whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr);
|
| 818 |
|
| 819 |
for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
|
|
|
|
| 813 |
return 3;
|
| 814 |
}
|
| 815 |
|
| 816 |
+
// initialize openvino encoder. this has no effect on whisper.cpp builds that don't have OpenVINO configured
|
| 817 |
whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr);
|
| 818 |
|
| 819 |
for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
|
whisper.cpp
CHANGED
|
@@ -2654,7 +2654,7 @@ static std::string whisper_get_coreml_path_encoder(std::string path_bin) {
|
|
| 2654 |
|
| 2655 |
#ifdef WHISPER_USE_OPENVINO
|
| 2656 |
// replace .bin with-encoder-openvino.xml
|
| 2657 |
-
static std::string
|
| 2658 |
auto pos = path_bin.rfind('.');
|
| 2659 |
if (pos != std::string::npos) {
|
| 2660 |
path_bin = path_bin.substr(0, pos);
|
|
@@ -2665,7 +2665,7 @@ static std::string whisper_get_openvino_path_encoder(std::string path_bin) {
|
|
| 2665 |
return path_bin;
|
| 2666 |
}
|
| 2667 |
|
| 2668 |
-
static std::string
|
| 2669 |
auto pos = path_bin.rfind('.');
|
| 2670 |
if (pos != std::string::npos) {
|
| 2671 |
path_bin = path_bin.substr(0, pos);
|
|
@@ -2743,55 +2743,52 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
|
|
| 2743 |
return state;
|
| 2744 |
}
|
| 2745 |
|
| 2746 |
-
int whisper_ctx_init_openvino_encoder(
|
| 2747 |
-
|
| 2748 |
-
|
| 2749 |
-
|
| 2750 |
-
{
|
| 2751 |
#ifndef WHISPER_USE_OPENVINO
|
| 2752 |
(void)(ctx);
|
| 2753 |
-
(void)(
|
| 2754 |
-
(void)(
|
| 2755 |
-
(void)(
|
| 2756 |
-
|
|
|
|
| 2757 |
#else
|
| 2758 |
-
if (!
|
| 2759 |
-
|
| 2760 |
-
|
| 2761 |
-
return 0;
|
| 2762 |
}
|
| 2763 |
|
| 2764 |
-
std::string
|
| 2765 |
-
if (!
|
| 2766 |
-
//if
|
| 2767 |
-
|
| 2768 |
-
}
|
| 2769 |
-
|
| 2770 |
-
path_openvino = openvino_model_path;
|
| 2771 |
}
|
| 2772 |
|
| 2773 |
-
std::string
|
| 2774 |
-
if (!
|
| 2775 |
-
//if
|
| 2776 |
-
|
| 2777 |
-
}
|
| 2778 |
-
|
| 2779 |
-
path_openvino_cache_dir = openvino_cache_dir;
|
| 2780 |
}
|
| 2781 |
|
| 2782 |
-
fprintf(stderr, "%s: loading OpenVINO model from '%s'\n", __func__,
|
| 2783 |
fprintf(stderr, "%s: first run on a device may take a while ...\n", __func__);
|
| 2784 |
|
| 2785 |
-
ctx->state->ctx_openvino = whisper_openvino_init(
|
| 2786 |
if (!ctx->state->ctx_openvino) {
|
| 2787 |
-
fprintf(stderr, "%s: failed to init OpenVINO encoder from '%s'\n", __func__,
|
| 2788 |
-
return
|
| 2789 |
-
}
|
| 2790 |
-
else {
|
| 2791 |
fprintf(stderr, "%s: OpenVINO model loaded\n", __func__);
|
| 2792 |
}
|
| 2793 |
|
| 2794 |
-
return
|
| 2795 |
#endif
|
| 2796 |
}
|
| 2797 |
|
|
|
|
| 2654 |
|
| 2655 |
#ifdef WHISPER_USE_OPENVINO
|
| 2656 |
// replace .bin with-encoder-openvino.xml
|
| 2657 |
+
static std::string whisper_openvino_get_path_encoder(std::string path_bin) {
|
| 2658 |
auto pos = path_bin.rfind('.');
|
| 2659 |
if (pos != std::string::npos) {
|
| 2660 |
path_bin = path_bin.substr(0, pos);
|
|
|
|
| 2665 |
return path_bin;
|
| 2666 |
}
|
| 2667 |
|
| 2668 |
+
static std::string whisper_openvino_get_path_cache(std::string path_bin) {
|
| 2669 |
auto pos = path_bin.rfind('.');
|
| 2670 |
if (pos != std::string::npos) {
|
| 2671 |
path_bin = path_bin.substr(0, pos);
|
|
|
|
| 2743 |
return state;
|
| 2744 |
}
|
| 2745 |
|
| 2746 |
+
int whisper_ctx_init_openvino_encoder(
|
| 2747 |
+
struct whisper_context * ctx,
|
| 2748 |
+
const char * model_path,
|
| 2749 |
+
const char * device,
|
| 2750 |
+
const char * cache_dir) {
|
| 2751 |
#ifndef WHISPER_USE_OPENVINO
|
| 2752 |
(void)(ctx);
|
| 2753 |
+
(void)(model_path);
|
| 2754 |
+
(void)(device);
|
| 2755 |
+
(void)(cache_dir);
|
| 2756 |
+
|
| 2757 |
+
return 1;
|
| 2758 |
#else
|
| 2759 |
+
if (!model_path && ctx->path_model.empty()) {
|
| 2760 |
+
fprintf(stderr, "%s: model_path is nullptr, and ctx has no model_path set.\n", __func__);
|
| 2761 |
+
return 1;
|
|
|
|
| 2762 |
}
|
| 2763 |
|
| 2764 |
+
std::string path_encoder;
|
| 2765 |
+
if (!model_path) {
|
| 2766 |
+
//if model_path is not set, attempt to find it in the same directory as ggml-<model>.bin model
|
| 2767 |
+
path_encoder = whisper_openvino_get_path_encoder(ctx->path_model);
|
| 2768 |
+
} else {
|
| 2769 |
+
path_encoder = model_path;
|
|
|
|
| 2770 |
}
|
| 2771 |
|
| 2772 |
+
std::string path_cache;
|
| 2773 |
+
if (!cache_dir) {
|
| 2774 |
+
//if cache_dir is not set, set it as a dir residing next to ggml-<model>.bin
|
| 2775 |
+
path_cache = whisper_openvino_get_path_cache(ctx->path_model);
|
| 2776 |
+
} else {
|
| 2777 |
+
path_cache = cache_dir;
|
|
|
|
| 2778 |
}
|
| 2779 |
|
| 2780 |
+
fprintf(stderr, "%s: loading OpenVINO model from '%s'\n", __func__, path_encoder.c_str());
|
| 2781 |
fprintf(stderr, "%s: first run on a device may take a while ...\n", __func__);
|
| 2782 |
|
| 2783 |
+
ctx->state->ctx_openvino = whisper_openvino_init(path_encoder.c_str(), device, path_cache.c_str());
|
| 2784 |
if (!ctx->state->ctx_openvino) {
|
| 2785 |
+
fprintf(stderr, "%s: failed to init OpenVINO encoder from '%s'\n", __func__, path_encoder.c_str());
|
| 2786 |
+
return 1;
|
| 2787 |
+
} else {
|
|
|
|
| 2788 |
fprintf(stderr, "%s: OpenVINO model loaded\n", __func__);
|
| 2789 |
}
|
| 2790 |
|
| 2791 |
+
return 0;
|
| 2792 |
#endif
|
| 2793 |
}
|
| 2794 |
|
whisper.h
CHANGED
|
@@ -120,8 +120,7 @@ extern "C" {
|
|
| 120 |
// cache_dir: Optional cache directory that can speed up init time, especially for
|
| 121 |
// GPU, by caching compiled 'blobs' there.
|
| 122 |
// Set to nullptr if not used.
|
| 123 |
-
// Returns
|
| 124 |
-
// simply returns 0.
|
| 125 |
WHISPER_API int whisper_ctx_init_openvino_encoder(
|
| 126 |
struct whisper_context * ctx,
|
| 127 |
const char * model_path,
|
|
|
|
| 120 |
// cache_dir: Optional cache directory that can speed up init time, especially for
|
| 121 |
// GPU, by caching compiled 'blobs' there.
|
| 122 |
// Set to nullptr if not used.
|
| 123 |
+
// Returns 0 on success. If OpenVINO is not enabled in build, this simply returns 1.
|
|
|
|
| 124 |
WHISPER_API int whisper_ctx_init_openvino_encoder(
|
| 125 |
struct whisper_context * ctx,
|
| 126 |
const char * model_path,
|