whisper.cpp

Running

ggerganov commited on Jan 16, 2023

Commit

7d451fb

unverified ·

1 Parent(s): 996990d

stream : fix --keep_context argument to be used correctly (#354)

Files changed (2) hide show

examples/stream/stream.cpp CHANGED Viewed

@@ -434,9 +434,9 @@ int main(int argc, char ** argv) {
     const int n_new_line = !use_vad ? params.length_ms / params.step_ms - 1 : 1; // number of steps to print new line
-    params.no_timestamps = !use_vad;
-    params.no_context    = use_vad;
-    params.max_tokens    = 0;
     // init audio
@@ -486,7 +486,7 @@ int main(int argc, char ** argv) {
                 params.no_timestamps ? 0 : 1);
         if (!use_vad) {
-            fprintf(stderr, "%s: n_new_line = %d\n", __func__, n_new_line);
         } else {
             fprintf(stderr, "%s: using VAD, will transcribe on speech activity\n", __func__);
         }

     const int n_new_line = !use_vad ? params.length_ms / params.step_ms - 1 : 1; // number of steps to print new line
+    params.no_timestamps  = !use_vad;
+    params.no_context    |= use_vad;
+    params.max_tokens     = 0;
     // init audio
                 params.no_timestamps ? 0 : 1);
         if (!use_vad) {
+            fprintf(stderr, "%s: n_new_line = %d, no_context = %d\n", __func__, n_new_line, params.no_context);
         } else {
             fprintf(stderr, "%s: using VAD, will transcribe on speech activity\n", __func__);
         }

whisper.h CHANGED Viewed

@@ -245,7 +245,7 @@ extern "C" {
         int duration_ms;        // audio duration to process in ms
         bool translate;
-        bool no_context;        // do not use initial prompt for the decoder (if any)
         bool single_segment;    // force single segment output (useful for streaming)
         bool print_special;     // print special tokens (e.g. <SOT>, <EOT>, <BEG>, etc.)
         bool print_progress;    // print progress information

         int duration_ms;        // audio duration to process in ms
         bool translate;
+        bool no_context;        // do not use past transcription (if any) as initial prompt for the decoder
         bool single_segment;    // force single segment output (useful for streaming)
         bool print_special;     // print special tokens (e.g. <SOT>, <EOT>, <BEG>, etc.)
         bool print_progress;    // print progress information