Spaces:
Running
Running
Oleg Sidorov
commited on
server : backport .srt output format (#1565)
Browse filesThis commit adds a support of .srt format to Whisper server. The code is
effectively backported from examples/main. The output mimetype is set to
application/x-subrip as per https://en.wikipedia.org/wiki/SubRip.
Example usage:
curl 127.0.0.1:8080/inference \
-H "Content-Type: multipart/form-data" \
-F file="@<file-path>" \
-F temperature="0.2" \
-F response-format="srt"
- examples/server/server.cpp +22 -0
examples/server/server.cpp
CHANGED
|
@@ -11,6 +11,7 @@
|
|
| 11 |
#include <thread>
|
| 12 |
#include <vector>
|
| 13 |
#include <cstring>
|
|
|
|
| 14 |
|
| 15 |
#if defined(_MSC_VER)
|
| 16 |
#pragma warning(disable: 4244 4267) // possible loss of data
|
|
@@ -657,6 +658,27 @@ int main(int argc, char ** argv) {
|
|
| 657 |
std::string results = output_str(ctx, params, pcmf32s);
|
| 658 |
res.set_content(results.c_str(), "text/html");
|
| 659 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 660 |
// TODO add more output formats
|
| 661 |
else
|
| 662 |
{
|
|
|
|
| 11 |
#include <thread>
|
| 12 |
#include <vector>
|
| 13 |
#include <cstring>
|
| 14 |
+
#include <sstream>
|
| 15 |
|
| 16 |
#if defined(_MSC_VER)
|
| 17 |
#pragma warning(disable: 4244 4267) // possible loss of data
|
|
|
|
| 658 |
std::string results = output_str(ctx, params, pcmf32s);
|
| 659 |
res.set_content(results.c_str(), "text/html");
|
| 660 |
}
|
| 661 |
+
else if (params.response_format == srt_format)
|
| 662 |
+
{
|
| 663 |
+
std::stringstream ss;
|
| 664 |
+
const int n_segments = whisper_full_n_segments(ctx);
|
| 665 |
+
for (int i = 0; i < n_segments; ++i) {
|
| 666 |
+
const char * text = whisper_full_get_segment_text(ctx, i);
|
| 667 |
+
const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
|
| 668 |
+
const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
|
| 669 |
+
std::string speaker = "";
|
| 670 |
+
|
| 671 |
+
if (params.diarize && pcmf32s.size() == 2)
|
| 672 |
+
{
|
| 673 |
+
speaker = estimate_diarization_speaker(pcmf32s, t0, t1);
|
| 674 |
+
}
|
| 675 |
+
|
| 676 |
+
ss << i + 1 + params.offset_n << "\n";
|
| 677 |
+
ss << to_timestamp(t0, true) << " --> " << to_timestamp(t1, true) << "\n";
|
| 678 |
+
ss << speaker << text << "\n\n";
|
| 679 |
+
}
|
| 680 |
+
res.set_content(ss.str(), "application/x-subrip");
|
| 681 |
+
}
|
| 682 |
// TODO add more output formats
|
| 683 |
else
|
| 684 |
{
|