Aaron Ang commited on
Commit
a482bd7
·
unverified ·
1 Parent(s): 32a61ec

command: output commands to text file (#3273)

Browse files

This commit implements code for the command line argument `-f --file FNAME` which is currently missing.

Files changed (1) hide show
  1. examples/command/command.cpp +26 -8
examples/command/command.cpp CHANGED
@@ -251,7 +251,7 @@ static std::vector<std::string> get_words(const std::string &txt) {
251
 
252
  // command-list mode
253
  // guide the transcription to match the most likely command from a provided list
254
- static int process_command_list(struct whisper_context * ctx, audio_async &audio, const whisper_params &params) {
255
  fprintf(stderr, "\n");
256
  fprintf(stderr, "%s: guided mode\n", __func__);
257
 
@@ -444,12 +444,16 @@ static int process_command_list(struct whisper_context * ctx, audio_async &audio
444
 
445
  const float prob = probs_id[0].first;
446
  const int index = probs_id[0].second;
 
447
 
448
  fprintf(stdout, "\n");
449
  fprintf(stdout, "%s: detected command: %s%s%s | p = %f | t = %d ms\n", __func__,
450
- "\033[1m", allowed_commands[index].c_str(), "\033[0m", prob,
451
  (int) std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count());
452
  fprintf(stdout, "\n");
 
 
 
453
  }
454
  }
455
 
@@ -462,7 +466,7 @@ static int process_command_list(struct whisper_context * ctx, audio_async &audio
462
 
463
  // always-prompt mode
464
  // transcribe the voice into text after valid prompt
465
- static int always_prompt_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params) {
466
  bool is_running = true;
467
  bool ask_prompt = true;
468
 
@@ -528,6 +532,9 @@ static int always_prompt_transcription(struct whisper_context * ctx, audio_async
528
 
529
  if ((sim > 0.7f) && (command.size() > 0)) {
530
  fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
 
 
 
531
  }
532
 
533
  fprintf(stdout, "\n");
@@ -542,7 +549,7 @@ static int always_prompt_transcription(struct whisper_context * ctx, audio_async
542
 
543
  // general-purpose mode
544
  // freely transcribe the voice into text
545
- static int process_general_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params) {
546
  bool is_running = true;
547
  bool have_prompt = false;
548
  bool ask_prompt = true;
@@ -662,8 +669,10 @@ static int process_general_transcription(struct whisper_context * ctx, audio_asy
662
  } else {
663
  // cut the prompt from the decoded text
664
  const std::string command = ::trim(txt.substr(best_len));
665
-
666
  fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
 
 
 
667
  }
668
 
669
  fprintf(stdout, "\n");
@@ -759,13 +768,22 @@ int main(int argc, char ** argv) {
759
  }
760
  }
761
 
 
 
 
 
 
 
 
 
 
762
  if (ret_val == 0) {
763
  if (!params.commands.empty()) {
764
- ret_val = process_command_list(ctx, audio, params);
765
  } else if (!params.prompt.empty() && params.grammar_parsed.rules.empty()) {
766
- ret_val = always_prompt_transcription(ctx, audio, params);
767
  } else {
768
- ret_val = process_general_transcription(ctx, audio, params);
769
  }
770
  }
771
 
 
251
 
252
  // command-list mode
253
  // guide the transcription to match the most likely command from a provided list
254
+ static int process_command_list(struct whisper_context * ctx, audio_async &audio, const whisper_params &params, std::ofstream &fout) {
255
  fprintf(stderr, "\n");
256
  fprintf(stderr, "%s: guided mode\n", __func__);
257
 
 
444
 
445
  const float prob = probs_id[0].first;
446
  const int index = probs_id[0].second;
447
+ const char * best_command = allowed_commands[index].c_str();
448
 
449
  fprintf(stdout, "\n");
450
  fprintf(stdout, "%s: detected command: %s%s%s | p = %f | t = %d ms\n", __func__,
451
+ "\033[1m", best_command, "\033[0m", prob,
452
  (int) std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count());
453
  fprintf(stdout, "\n");
454
+ if (fout.is_open()) {
455
+ fout << best_command << std::endl;
456
+ }
457
  }
458
  }
459
 
 
466
 
467
  // always-prompt mode
468
  // transcribe the voice into text after valid prompt
469
+ static int always_prompt_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params, std::ofstream & fout) {
470
  bool is_running = true;
471
  bool ask_prompt = true;
472
 
 
532
 
533
  if ((sim > 0.7f) && (command.size() > 0)) {
534
  fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
535
+ if (fout.is_open()) {
536
+ fout << command << std::endl;
537
+ }
538
  }
539
 
540
  fprintf(stdout, "\n");
 
549
 
550
  // general-purpose mode
551
  // freely transcribe the voice into text
552
+ static int process_general_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params, std::ofstream & fout) {
553
  bool is_running = true;
554
  bool have_prompt = false;
555
  bool ask_prompt = true;
 
669
  } else {
670
  // cut the prompt from the decoded text
671
  const std::string command = ::trim(txt.substr(best_len));
 
672
  fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
673
+ if (fout.is_open()) {
674
+ fout << command << std::endl;
675
+ }
676
  }
677
 
678
  fprintf(stdout, "\n");
 
768
  }
769
  }
770
 
771
+ std::ofstream fout;
772
+ if (params.fname_out.length() > 0) {
773
+ fout.open(params.fname_out);
774
+ if (!fout.is_open()) {
775
+ fprintf(stderr, "%s: failed to open output file '%s'!\n", __func__, params.fname_out.c_str());
776
+ return 1;
777
+ }
778
+ }
779
+
780
  if (ret_val == 0) {
781
  if (!params.commands.empty()) {
782
+ ret_val = process_command_list(ctx, audio, params, fout);
783
  } else if (!params.prompt.empty() && params.grammar_parsed.rules.empty()) {
784
+ ret_val = always_prompt_transcription(ctx, audio, params, fout);
785
  } else {
786
+ ret_val = process_general_transcription(ctx, audio, params, fout);
787
  }
788
  }
789