whisper.cpp

Running

App Files Files Community

Aaron Ang commited on Jun 24

Commit

a482bd7

unverified ·

1 Parent(s): 32a61ec

command: output commands to text file (#3273)

Browse files

This commit implements code for the command line argument `-f --file FNAME` which is currently missing.

Files changed (1) hide show

examples/command/command.cpp +26 -8

examples/command/command.cpp CHANGED Viewed

@@ -251,7 +251,7 @@ static std::vector<std::string> get_words(const std::string &txt) {
 // command-list mode
 // guide the transcription to match the most likely command from a provided list
-static int process_command_list(struct whisper_context * ctx, audio_async &audio, const whisper_params &params) {
     fprintf(stderr, "\n");
     fprintf(stderr, "%s: guided mode\n", __func__);
@@ -444,12 +444,16 @@ static int process_command_list(struct whisper_context * ctx, audio_async &audio
                     const float prob = probs_id[0].first;
                     const int index = probs_id[0].second;
                     fprintf(stdout, "\n");
                     fprintf(stdout, "%s: detected command: %s%s%s | p = %f | t = %d ms\n", __func__,
-                            "\033[1m", allowed_commands[index].c_str(), "\033[0m", prob,
                             (int) std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count());
                     fprintf(stdout, "\n");
                 }
             }
@@ -462,7 +466,7 @@ static int process_command_list(struct whisper_context * ctx, audio_async &audio
 // always-prompt mode
 // transcribe the voice into text after valid prompt
-static int always_prompt_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params) {
     bool is_running = true;
     bool ask_prompt = true;
@@ -528,6 +532,9 @@ static int always_prompt_transcription(struct whisper_context * ctx, audio_async
                 if ((sim > 0.7f) && (command.size() > 0)) {
                     fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
                 }
                 fprintf(stdout, "\n");
@@ -542,7 +549,7 @@ static int always_prompt_transcription(struct whisper_context * ctx, audio_async
 // general-purpose mode
 // freely transcribe the voice into text
-static int process_general_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params) {
     bool is_running  = true;
     bool have_prompt = false;
     bool ask_prompt  = true;
@@ -662,8 +669,10 @@ static int process_general_transcription(struct whisper_context * ctx, audio_asy
                     } else {
                         // cut the prompt from the decoded text
                         const std::string command = ::trim(txt.substr(best_len));
                         fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
                     }
                     fprintf(stdout, "\n");
@@ -759,13 +768,22 @@ int main(int argc, char ** argv) {
         }
     }
     if (ret_val == 0) {
         if (!params.commands.empty()) {
-            ret_val = process_command_list(ctx, audio, params);
         } else if (!params.prompt.empty() && params.grammar_parsed.rules.empty()) {
-            ret_val = always_prompt_transcription(ctx, audio, params);
         } else {
-            ret_val = process_general_transcription(ctx, audio, params);
         }
     }

 // command-list mode
 // guide the transcription to match the most likely command from a provided list
+static int process_command_list(struct whisper_context * ctx, audio_async &audio, const whisper_params &params, std::ofstream &fout) {
     fprintf(stderr, "\n");
     fprintf(stderr, "%s: guided mode\n", __func__);
                     const float prob = probs_id[0].first;
                     const int index = probs_id[0].second;
+                    const char * best_command = allowed_commands[index].c_str();
                     fprintf(stdout, "\n");
                     fprintf(stdout, "%s: detected command: %s%s%s | p = %f | t = %d ms\n", __func__,
+                            "\033[1m", best_command, "\033[0m", prob,
                             (int) std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count());
                     fprintf(stdout, "\n");
+                    if (fout.is_open()) {
+                        fout << best_command << std::endl;
+                    }
                 }
             }
 // always-prompt mode
 // transcribe the voice into text after valid prompt
+static int always_prompt_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params, std::ofstream & fout) {
     bool is_running = true;
     bool ask_prompt = true;
                 if ((sim > 0.7f) && (command.size() > 0)) {
                     fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
+                    if (fout.is_open()) {
+                        fout << command << std::endl;
+                    }
                 }
                 fprintf(stdout, "\n");
 // general-purpose mode
 // freely transcribe the voice into text
+static int process_general_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params, std::ofstream & fout) {
     bool is_running  = true;
     bool have_prompt = false;
     bool ask_prompt  = true;
                     } else {
                         // cut the prompt from the decoded text
                         const std::string command = ::trim(txt.substr(best_len));
                         fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
+                        if (fout.is_open()) {
+                            fout << command << std::endl;
+                        }
                     }
                     fprintf(stdout, "\n");
         }
     }
+    std::ofstream fout;
+    if (params.fname_out.length() > 0) {
+        fout.open(params.fname_out);
+        if (!fout.is_open()) {
+            fprintf(stderr, "%s: failed to open output file '%s'!\n", __func__, params.fname_out.c_str());
+            return 1;
+        }
+    }
     if (ret_val == 0) {
         if (!params.commands.empty()) {
+            ret_val = process_command_list(ctx, audio, params, fout);
         } else if (!params.prompt.empty() && params.grammar_parsed.rules.empty()) {
+            ret_val = always_prompt_transcription(ctx, audio, params, fout);
         } else {
+            ret_val = process_general_transcription(ctx, audio, params, fout);
         }
     }