Oleg Sidorov commited on
Commit
77aa181
·
unverified ·
1 Parent(s): 9001318

server : backport .srt output format (#1565)

Browse files

This commit adds a support of .srt format to Whisper server. The code is
effectively backported from examples/main. The output mimetype is set to
application/x-subrip as per https://en.wikipedia.org/wiki/SubRip.

Example usage:

curl 127.0.0.1:8080/inference \
-H "Content-Type: multipart/form-data" \
-F file="@<file-path>" \
-F temperature="0.2" \
-F response-format="srt"

Files changed (1) hide show
  1. examples/server/server.cpp +22 -0
examples/server/server.cpp CHANGED
@@ -11,6 +11,7 @@
11
  #include <thread>
12
  #include <vector>
13
  #include <cstring>
 
14
 
15
  #if defined(_MSC_VER)
16
  #pragma warning(disable: 4244 4267) // possible loss of data
@@ -657,6 +658,27 @@ int main(int argc, char ** argv) {
657
  std::string results = output_str(ctx, params, pcmf32s);
658
  res.set_content(results.c_str(), "text/html");
659
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
660
  // TODO add more output formats
661
  else
662
  {
 
11
  #include <thread>
12
  #include <vector>
13
  #include <cstring>
14
+ #include <sstream>
15
 
16
  #if defined(_MSC_VER)
17
  #pragma warning(disable: 4244 4267) // possible loss of data
 
658
  std::string results = output_str(ctx, params, pcmf32s);
659
  res.set_content(results.c_str(), "text/html");
660
  }
661
+ else if (params.response_format == srt_format)
662
+ {
663
+ std::stringstream ss;
664
+ const int n_segments = whisper_full_n_segments(ctx);
665
+ for (int i = 0; i < n_segments; ++i) {
666
+ const char * text = whisper_full_get_segment_text(ctx, i);
667
+ const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
668
+ const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
669
+ std::string speaker = "";
670
+
671
+ if (params.diarize && pcmf32s.size() == 2)
672
+ {
673
+ speaker = estimate_diarization_speaker(pcmf32s, t0, t1);
674
+ }
675
+
676
+ ss << i + 1 + params.offset_n << "\n";
677
+ ss << to_timestamp(t0, true) << " --> " << to_timestamp(t1, true) << "\n";
678
+ ss << speaker << text << "\n\n";
679
+ }
680
+ res.set_content(ss.str(), "application/x-subrip");
681
+ }
682
  // TODO add more output formats
683
  else
684
  {