jbyu ggerganov commited on
Commit
26a54a0
·
unverified ·
1 Parent(s): fc4185e

main : log probs to text file (#1205)

Browse files

* token/probability file generated with -ls

* code comment cleaning

* main : indentations

---------

Co-authored-by: Georgi Gerganov <[email protected]>

Files changed (1) hide show
  1. examples/main/main.cpp +28 -0
examples/main/main.cpp CHANGED
@@ -87,6 +87,7 @@ struct whisper_params {
87
  bool print_colors = false;
88
  bool print_progress = false;
89
  bool no_timestamps = false;
 
90
 
91
  std::string language = "en";
92
  std::string prompt;
@@ -159,6 +160,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
159
  else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
160
  else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(argv[++i]); }
161
  else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = argv[++i]; }
 
162
  else {
163
  fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
164
  whisper_print_usage(argc, argv, params);
@@ -212,6 +214,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
212
  fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
213
  fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n", "");
214
  fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str());
 
215
  fprintf(stderr, "\n");
216
  }
217
 
@@ -486,6 +489,25 @@ bool output_csv(struct whisper_context * ctx, const char * fname, const whisper_
486
  return true;
487
  }
488
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
489
  bool output_json(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
490
  std::ofstream fout(fname);
491
  int indent = 0;
@@ -982,6 +1004,12 @@ int main(int argc, char ** argv) {
982
  const auto fname_lrc = fname_out + ".lrc";
983
  output_lrc(ctx, fname_lrc.c_str(), params, pcmf32s);
984
  }
 
 
 
 
 
 
985
  }
986
  }
987
 
 
87
  bool print_colors = false;
88
  bool print_progress = false;
89
  bool no_timestamps = false;
90
+ bool log_score = false;
91
 
92
  std::string language = "en";
93
  std::string prompt;
 
160
  else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
161
  else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(argv[++i]); }
162
  else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = argv[++i]; }
163
+ else if (arg == "-ls" || arg == "--log-score") { params.log_score = true; }
164
  else {
165
  fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
166
  whisper_print_usage(argc, argv, params);
 
214
  fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
215
  fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n", "");
216
  fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str());
217
+ fprintf(stderr, " -ls, --log-score [%-7s] log best decoder scores of tokens\n", params.log_score?"true":"false");
218
  fprintf(stderr, "\n");
219
  }
220
 
 
489
  return true;
490
  }
491
 
492
+ bool output_score(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
493
+ std::ofstream fout(fname);
494
+ fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
495
+
496
+ const int n_segments = whisper_full_n_segments(ctx);
497
+ // fprintf(stderr,"segments: %d\n",n_segments);
498
+ for (int i = 0; i < n_segments; ++i) {
499
+ const int n_tokens = whisper_full_n_tokens(ctx, i);
500
+ // fprintf(stderr,"tokens: %d\n",n_tokens);
501
+ for (int j = 0; j < n_tokens; j++) {
502
+ auto token = whisper_full_get_token_text(ctx, i, j);
503
+ auto probability = whisper_full_get_token_p(ctx, i, j);
504
+ fout << token << '\t' << probability << std::endl;
505
+ // fprintf(stderr,"token: %s %f\n",token,probability);
506
+ }
507
+ }
508
+ return true;
509
+ }
510
+
511
  bool output_json(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
512
  std::ofstream fout(fname);
513
  int indent = 0;
 
1004
  const auto fname_lrc = fname_out + ".lrc";
1005
  output_lrc(ctx, fname_lrc.c_str(), params, pcmf32s);
1006
  }
1007
+
1008
+ // output to score file
1009
+ if (params.log_score) {
1010
+ const auto fname_score = fname_out + ".score.txt";
1011
+ output_score(ctx, fname_score.c_str(), params, pcmf32s);
1012
+ }
1013
  }
1014
  }
1015