Spaces:
Running
Running
Aaron Ang
commited on
command: output commands to text file (#3273)
Browse filesThis commit implements code for the command line argument `-f --file FNAME` which is currently missing.
- examples/command/command.cpp +26 -8
examples/command/command.cpp
CHANGED
|
@@ -251,7 +251,7 @@ static std::vector<std::string> get_words(const std::string &txt) {
|
|
| 251 |
|
| 252 |
// command-list mode
|
| 253 |
// guide the transcription to match the most likely command from a provided list
|
| 254 |
-
static int process_command_list(struct whisper_context * ctx, audio_async &audio, const whisper_params ¶ms) {
|
| 255 |
fprintf(stderr, "\n");
|
| 256 |
fprintf(stderr, "%s: guided mode\n", __func__);
|
| 257 |
|
|
@@ -444,12 +444,16 @@ static int process_command_list(struct whisper_context * ctx, audio_async &audio
|
|
| 444 |
|
| 445 |
const float prob = probs_id[0].first;
|
| 446 |
const int index = probs_id[0].second;
|
|
|
|
| 447 |
|
| 448 |
fprintf(stdout, "\n");
|
| 449 |
fprintf(stdout, "%s: detected command: %s%s%s | p = %f | t = %d ms\n", __func__,
|
| 450 |
-
"\033[1m",
|
| 451 |
(int) std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count());
|
| 452 |
fprintf(stdout, "\n");
|
|
|
|
|
|
|
|
|
|
| 453 |
}
|
| 454 |
}
|
| 455 |
|
|
@@ -462,7 +466,7 @@ static int process_command_list(struct whisper_context * ctx, audio_async &audio
|
|
| 462 |
|
| 463 |
// always-prompt mode
|
| 464 |
// transcribe the voice into text after valid prompt
|
| 465 |
-
static int always_prompt_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params) {
|
| 466 |
bool is_running = true;
|
| 467 |
bool ask_prompt = true;
|
| 468 |
|
|
@@ -528,6 +532,9 @@ static int always_prompt_transcription(struct whisper_context * ctx, audio_async
|
|
| 528 |
|
| 529 |
if ((sim > 0.7f) && (command.size() > 0)) {
|
| 530 |
fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
|
|
|
|
|
|
|
|
|
|
| 531 |
}
|
| 532 |
|
| 533 |
fprintf(stdout, "\n");
|
|
@@ -542,7 +549,7 @@ static int always_prompt_transcription(struct whisper_context * ctx, audio_async
|
|
| 542 |
|
| 543 |
// general-purpose mode
|
| 544 |
// freely transcribe the voice into text
|
| 545 |
-
static int process_general_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params) {
|
| 546 |
bool is_running = true;
|
| 547 |
bool have_prompt = false;
|
| 548 |
bool ask_prompt = true;
|
|
@@ -662,8 +669,10 @@ static int process_general_transcription(struct whisper_context * ctx, audio_asy
|
|
| 662 |
} else {
|
| 663 |
// cut the prompt from the decoded text
|
| 664 |
const std::string command = ::trim(txt.substr(best_len));
|
| 665 |
-
|
| 666 |
fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
|
|
|
|
|
|
|
|
|
|
| 667 |
}
|
| 668 |
|
| 669 |
fprintf(stdout, "\n");
|
|
@@ -759,13 +768,22 @@ int main(int argc, char ** argv) {
|
|
| 759 |
}
|
| 760 |
}
|
| 761 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 762 |
if (ret_val == 0) {
|
| 763 |
if (!params.commands.empty()) {
|
| 764 |
-
ret_val = process_command_list(ctx, audio, params);
|
| 765 |
} else if (!params.prompt.empty() && params.grammar_parsed.rules.empty()) {
|
| 766 |
-
ret_val = always_prompt_transcription(ctx, audio, params);
|
| 767 |
} else {
|
| 768 |
-
ret_val = process_general_transcription(ctx, audio, params);
|
| 769 |
}
|
| 770 |
}
|
| 771 |
|
|
|
|
| 251 |
|
| 252 |
// command-list mode
|
| 253 |
// guide the transcription to match the most likely command from a provided list
|
| 254 |
+
static int process_command_list(struct whisper_context * ctx, audio_async &audio, const whisper_params ¶ms, std::ofstream &fout) {
|
| 255 |
fprintf(stderr, "\n");
|
| 256 |
fprintf(stderr, "%s: guided mode\n", __func__);
|
| 257 |
|
|
|
|
| 444 |
|
| 445 |
const float prob = probs_id[0].first;
|
| 446 |
const int index = probs_id[0].second;
|
| 447 |
+
const char * best_command = allowed_commands[index].c_str();
|
| 448 |
|
| 449 |
fprintf(stdout, "\n");
|
| 450 |
fprintf(stdout, "%s: detected command: %s%s%s | p = %f | t = %d ms\n", __func__,
|
| 451 |
+
"\033[1m", best_command, "\033[0m", prob,
|
| 452 |
(int) std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count());
|
| 453 |
fprintf(stdout, "\n");
|
| 454 |
+
if (fout.is_open()) {
|
| 455 |
+
fout << best_command << std::endl;
|
| 456 |
+
}
|
| 457 |
}
|
| 458 |
}
|
| 459 |
|
|
|
|
| 466 |
|
| 467 |
// always-prompt mode
|
| 468 |
// transcribe the voice into text after valid prompt
|
| 469 |
+
static int always_prompt_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params, std::ofstream & fout) {
|
| 470 |
bool is_running = true;
|
| 471 |
bool ask_prompt = true;
|
| 472 |
|
|
|
|
| 532 |
|
| 533 |
if ((sim > 0.7f) && (command.size() > 0)) {
|
| 534 |
fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
|
| 535 |
+
if (fout.is_open()) {
|
| 536 |
+
fout << command << std::endl;
|
| 537 |
+
}
|
| 538 |
}
|
| 539 |
|
| 540 |
fprintf(stdout, "\n");
|
|
|
|
| 549 |
|
| 550 |
// general-purpose mode
|
| 551 |
// freely transcribe the voice into text
|
| 552 |
+
static int process_general_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params, std::ofstream & fout) {
|
| 553 |
bool is_running = true;
|
| 554 |
bool have_prompt = false;
|
| 555 |
bool ask_prompt = true;
|
|
|
|
| 669 |
} else {
|
| 670 |
// cut the prompt from the decoded text
|
| 671 |
const std::string command = ::trim(txt.substr(best_len));
|
|
|
|
| 672 |
fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
|
| 673 |
+
if (fout.is_open()) {
|
| 674 |
+
fout << command << std::endl;
|
| 675 |
+
}
|
| 676 |
}
|
| 677 |
|
| 678 |
fprintf(stdout, "\n");
|
|
|
|
| 768 |
}
|
| 769 |
}
|
| 770 |
|
| 771 |
+
std::ofstream fout;
|
| 772 |
+
if (params.fname_out.length() > 0) {
|
| 773 |
+
fout.open(params.fname_out);
|
| 774 |
+
if (!fout.is_open()) {
|
| 775 |
+
fprintf(stderr, "%s: failed to open output file '%s'!\n", __func__, params.fname_out.c_str());
|
| 776 |
+
return 1;
|
| 777 |
+
}
|
| 778 |
+
}
|
| 779 |
+
|
| 780 |
if (ret_val == 0) {
|
| 781 |
if (!params.commands.empty()) {
|
| 782 |
+
ret_val = process_command_list(ctx, audio, params, fout);
|
| 783 |
} else if (!params.prompt.empty() && params.grammar_parsed.rules.empty()) {
|
| 784 |
+
ret_val = always_prompt_transcription(ctx, audio, params, fout);
|
| 785 |
} else {
|
| 786 |
+
ret_val = process_general_transcription(ctx, audio, params, fout);
|
| 787 |
}
|
| 788 |
}
|
| 789 |
|