Spaces:
Running
Running
LittleLoli
commited on
main : add lrc output support (#718)
Browse files* add lrc output support.
* fix wrong comment
- examples/main/main.cpp +42 -0
examples/main/main.cpp
CHANGED
|
@@ -75,6 +75,7 @@ struct whisper_params {
|
|
| 75 |
bool output_wts = false;
|
| 76 |
bool output_csv = false;
|
| 77 |
bool output_jsn = false;
|
|
|
|
| 78 |
bool print_special = false;
|
| 79 |
bool print_colors = false;
|
| 80 |
bool print_progress = false;
|
|
@@ -130,6 +131,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|
| 130 |
else if (arg == "-ovtt" || arg == "--output-vtt") { params.output_vtt = true; }
|
| 131 |
else if (arg == "-osrt" || arg == "--output-srt") { params.output_srt = true; }
|
| 132 |
else if (arg == "-owts" || arg == "--output-words") { params.output_wts = true; }
|
|
|
|
| 133 |
else if (arg == "-fp" || arg == "--font-path") { params.font_path = argv[++i]; }
|
| 134 |
else if (arg == "-ocsv" || arg == "--output-csv") { params.output_csv = true; }
|
| 135 |
else if (arg == "-oj" || arg == "--output-json") { params.output_jsn = true; }
|
|
@@ -178,6 +180,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|
| 178 |
fprintf(stderr, " -otxt, --output-txt [%-7s] output result in a text file\n", params.output_txt ? "true" : "false");
|
| 179 |
fprintf(stderr, " -ovtt, --output-vtt [%-7s] output result in a vtt file\n", params.output_vtt ? "true" : "false");
|
| 180 |
fprintf(stderr, " -osrt, --output-srt [%-7s] output result in a srt file\n", params.output_srt ? "true" : "false");
|
|
|
|
| 181 |
fprintf(stderr, " -owts, --output-words [%-7s] output script for generating karaoke video\n", params.output_wts ? "true" : "false");
|
| 182 |
fprintf(stderr, " -fp, --font-path [%-7s] path to a monospace font for karaoke video\n", params.font_path.c_str());
|
| 183 |
fprintf(stderr, " -ocsv, --output-csv [%-7s] output result in a CSV file\n", params.output_csv ? "true" : "false");
|
|
@@ -647,6 +650,39 @@ bool output_wts(struct whisper_context * ctx, const char * fname, const char * f
|
|
| 647 |
return true;
|
| 648 |
}
|
| 649 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 650 |
int main(int argc, char ** argv) {
|
| 651 |
whisper_params params;
|
| 652 |
|
|
@@ -813,6 +849,12 @@ int main(int argc, char ** argv) {
|
|
| 813 |
const auto fname_jsn = fname_out + ".json";
|
| 814 |
output_json(ctx, fname_jsn.c_str(), params);
|
| 815 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 816 |
}
|
| 817 |
}
|
| 818 |
|
|
|
|
| 75 |
bool output_wts = false;
|
| 76 |
bool output_csv = false;
|
| 77 |
bool output_jsn = false;
|
| 78 |
+
bool output_lrc = false;
|
| 79 |
bool print_special = false;
|
| 80 |
bool print_colors = false;
|
| 81 |
bool print_progress = false;
|
|
|
|
| 131 |
else if (arg == "-ovtt" || arg == "--output-vtt") { params.output_vtt = true; }
|
| 132 |
else if (arg == "-osrt" || arg == "--output-srt") { params.output_srt = true; }
|
| 133 |
else if (arg == "-owts" || arg == "--output-words") { params.output_wts = true; }
|
| 134 |
+
else if (arg == "-olrc" || arg == "--output-lrc") { params.output_lrc = true; }
|
| 135 |
else if (arg == "-fp" || arg == "--font-path") { params.font_path = argv[++i]; }
|
| 136 |
else if (arg == "-ocsv" || arg == "--output-csv") { params.output_csv = true; }
|
| 137 |
else if (arg == "-oj" || arg == "--output-json") { params.output_jsn = true; }
|
|
|
|
| 180 |
fprintf(stderr, " -otxt, --output-txt [%-7s] output result in a text file\n", params.output_txt ? "true" : "false");
|
| 181 |
fprintf(stderr, " -ovtt, --output-vtt [%-7s] output result in a vtt file\n", params.output_vtt ? "true" : "false");
|
| 182 |
fprintf(stderr, " -osrt, --output-srt [%-7s] output result in a srt file\n", params.output_srt ? "true" : "false");
|
| 183 |
+
fprintf(stderr, " -olrc, --output-lrc [%-7s] output result in a lrc file\n", params.output_lrc ? "true" : "false");
|
| 184 |
fprintf(stderr, " -owts, --output-words [%-7s] output script for generating karaoke video\n", params.output_wts ? "true" : "false");
|
| 185 |
fprintf(stderr, " -fp, --font-path [%-7s] path to a monospace font for karaoke video\n", params.font_path.c_str());
|
| 186 |
fprintf(stderr, " -ocsv, --output-csv [%-7s] output result in a CSV file\n", params.output_csv ? "true" : "false");
|
|
|
|
| 650 |
return true;
|
| 651 |
}
|
| 652 |
|
| 653 |
+
bool output_lrc(struct whisper_context * ctx, const char * fname) {
|
| 654 |
+
|
| 655 |
+
std::ofstream fout(fname);
|
| 656 |
+
if (!fout.is_open()) {
|
| 657 |
+
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
|
| 658 |
+
return false;
|
| 659 |
+
}
|
| 660 |
+
|
| 661 |
+
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
|
| 662 |
+
|
| 663 |
+
fout << "[by:whisper.cpp]\n";
|
| 664 |
+
|
| 665 |
+
const int n_segments = whisper_full_n_segments(ctx);
|
| 666 |
+
for (int i = 0; i < n_segments; ++i) {
|
| 667 |
+
const char * text = whisper_full_get_segment_text(ctx, i);
|
| 668 |
+
const int64_t t = whisper_full_get_segment_t0(ctx, i);
|
| 669 |
+
|
| 670 |
+
int64_t msec = t * 10;
|
| 671 |
+
int64_t min = msec / (1000 * 60);
|
| 672 |
+
msec = msec - min * (1000 * 60);
|
| 673 |
+
int64_t sec = msec / 1000;
|
| 674 |
+
msec = msec - sec * 1000;
|
| 675 |
+
|
| 676 |
+
char buf[16];
|
| 677 |
+
snprintf(buf, sizeof(buf), "%02d:%02d.%02d", (int) min, (int) sec, (int) ( msec / 10));
|
| 678 |
+
std::string timestamp_lrc = std::string(buf);
|
| 679 |
+
|
| 680 |
+
fout << '[' << timestamp_lrc << ']' << text << "\n";
|
| 681 |
+
}
|
| 682 |
+
|
| 683 |
+
return true;
|
| 684 |
+
}
|
| 685 |
+
|
| 686 |
int main(int argc, char ** argv) {
|
| 687 |
whisper_params params;
|
| 688 |
|
|
|
|
| 849 |
const auto fname_jsn = fname_out + ".json";
|
| 850 |
output_json(ctx, fname_jsn.c_str(), params);
|
| 851 |
}
|
| 852 |
+
|
| 853 |
+
// output to LRC file
|
| 854 |
+
if (params.output_lrc) {
|
| 855 |
+
const auto fname_lrc = fname_out + ".lrc";
|
| 856 |
+
output_lrc(ctx, fname_lrc.c_str());
|
| 857 |
+
}
|
| 858 |
}
|
| 859 |
}
|
| 860 |
|