Spaces:
Running
Running
main : log probs to text file (#1205)
Browse files* token/probability file generated with -ls
* code comment cleaning
* main : indentations
---------
Co-authored-by: Georgi Gerganov <[email protected]>
- examples/main/main.cpp +28 -0
examples/main/main.cpp
CHANGED
|
@@ -87,6 +87,7 @@ struct whisper_params {
|
|
| 87 |
bool print_colors = false;
|
| 88 |
bool print_progress = false;
|
| 89 |
bool no_timestamps = false;
|
|
|
|
| 90 |
|
| 91 |
std::string language = "en";
|
| 92 |
std::string prompt;
|
|
@@ -159,6 +160,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|
| 159 |
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
|
| 160 |
else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(argv[++i]); }
|
| 161 |
else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = argv[++i]; }
|
|
|
|
| 162 |
else {
|
| 163 |
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
| 164 |
whisper_print_usage(argc, argv, params);
|
|
@@ -212,6 +214,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|
| 212 |
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
| 213 |
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n", "");
|
| 214 |
fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str());
|
|
|
|
| 215 |
fprintf(stderr, "\n");
|
| 216 |
}
|
| 217 |
|
|
@@ -486,6 +489,25 @@ bool output_csv(struct whisper_context * ctx, const char * fname, const whisper_
|
|
| 486 |
return true;
|
| 487 |
}
|
| 488 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 489 |
bool output_json(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
|
| 490 |
std::ofstream fout(fname);
|
| 491 |
int indent = 0;
|
|
@@ -982,6 +1004,12 @@ int main(int argc, char ** argv) {
|
|
| 982 |
const auto fname_lrc = fname_out + ".lrc";
|
| 983 |
output_lrc(ctx, fname_lrc.c_str(), params, pcmf32s);
|
| 984 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 985 |
}
|
| 986 |
}
|
| 987 |
|
|
|
|
| 87 |
bool print_colors = false;
|
| 88 |
bool print_progress = false;
|
| 89 |
bool no_timestamps = false;
|
| 90 |
+
bool log_score = false;
|
| 91 |
|
| 92 |
std::string language = "en";
|
| 93 |
std::string prompt;
|
|
|
|
| 160 |
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
|
| 161 |
else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(argv[++i]); }
|
| 162 |
else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = argv[++i]; }
|
| 163 |
+
else if (arg == "-ls" || arg == "--log-score") { params.log_score = true; }
|
| 164 |
else {
|
| 165 |
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
| 166 |
whisper_print_usage(argc, argv, params);
|
|
|
|
| 214 |
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
| 215 |
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n", "");
|
| 216 |
fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str());
|
| 217 |
+
fprintf(stderr, " -ls, --log-score [%-7s] log best decoder scores of tokens\n", params.log_score?"true":"false");
|
| 218 |
fprintf(stderr, "\n");
|
| 219 |
}
|
| 220 |
|
|
|
|
| 489 |
return true;
|
| 490 |
}
|
| 491 |
|
| 492 |
+
bool output_score(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
|
| 493 |
+
std::ofstream fout(fname);
|
| 494 |
+
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
|
| 495 |
+
|
| 496 |
+
const int n_segments = whisper_full_n_segments(ctx);
|
| 497 |
+
// fprintf(stderr,"segments: %d\n",n_segments);
|
| 498 |
+
for (int i = 0; i < n_segments; ++i) {
|
| 499 |
+
const int n_tokens = whisper_full_n_tokens(ctx, i);
|
| 500 |
+
// fprintf(stderr,"tokens: %d\n",n_tokens);
|
| 501 |
+
for (int j = 0; j < n_tokens; j++) {
|
| 502 |
+
auto token = whisper_full_get_token_text(ctx, i, j);
|
| 503 |
+
auto probability = whisper_full_get_token_p(ctx, i, j);
|
| 504 |
+
fout << token << '\t' << probability << std::endl;
|
| 505 |
+
// fprintf(stderr,"token: %s %f\n",token,probability);
|
| 506 |
+
}
|
| 507 |
+
}
|
| 508 |
+
return true;
|
| 509 |
+
}
|
| 510 |
+
|
| 511 |
bool output_json(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
|
| 512 |
std::ofstream fout(fname);
|
| 513 |
int indent = 0;
|
|
|
|
| 1004 |
const auto fname_lrc = fname_out + ".lrc";
|
| 1005 |
output_lrc(ctx, fname_lrc.c_str(), params, pcmf32s);
|
| 1006 |
}
|
| 1007 |
+
|
| 1008 |
+
// output to score file
|
| 1009 |
+
if (params.log_score) {
|
| 1010 |
+
const auto fname_score = fname_out + ".score.txt";
|
| 1011 |
+
output_score(ctx, fname_score.c_str(), params, pcmf32s);
|
| 1012 |
+
}
|
| 1013 |
}
|
| 1014 |
}
|
| 1015 |
|