Spaces:
Running
Running
mashizora
commited on
main : fix double quote escaping in csv output (#2090)
Browse files- examples/main/main.cpp +33 -1
examples/main/main.cpp
CHANGED
|
@@ -471,6 +471,38 @@ char *escape_double_quotes_and_backslashes(const char *str) {
|
|
| 471 |
return escaped;
|
| 472 |
}
|
| 473 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 474 |
bool output_csv(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
|
| 475 |
std::ofstream fout(fname);
|
| 476 |
if (!fout.is_open()) {
|
|
@@ -492,7 +524,7 @@ bool output_csv(struct whisper_context * ctx, const char * fname, const whisper_
|
|
| 492 |
const char * text = whisper_full_get_segment_text(ctx, i);
|
| 493 |
const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
|
| 494 |
const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
|
| 495 |
-
char * text_escaped =
|
| 496 |
|
| 497 |
//need to multiply times returned from whisper_full_get_segment_t{0,1}() by 10 to get milliseconds.
|
| 498 |
fout << 10 * t0 << "," << 10 * t1 << ",";
|
|
|
|
| 471 |
return escaped;
|
| 472 |
}
|
| 473 |
|
| 474 |
+
// double quote should be escaped by another double quote. (rfc4180)
|
| 475 |
+
char *escape_double_quotes_in_csv(const char *str) {
|
| 476 |
+
if (str == NULL) {
|
| 477 |
+
return NULL;
|
| 478 |
+
}
|
| 479 |
+
|
| 480 |
+
size_t escaped_length = strlen(str) + 1;
|
| 481 |
+
|
| 482 |
+
for (size_t i = 0; str[i] != '\0'; i++) {
|
| 483 |
+
if (str[i] == '"') {
|
| 484 |
+
escaped_length++;
|
| 485 |
+
}
|
| 486 |
+
}
|
| 487 |
+
|
| 488 |
+
char *escaped = (char *)calloc(escaped_length, 1); // pre-zeroed
|
| 489 |
+
if (escaped == NULL) {
|
| 490 |
+
return NULL;
|
| 491 |
+
}
|
| 492 |
+
|
| 493 |
+
size_t pos = 0;
|
| 494 |
+
for (size_t i = 0; str[i] != '\0'; i++) {
|
| 495 |
+
if (str[i] == '"') {
|
| 496 |
+
escaped[pos++] = '"';
|
| 497 |
+
}
|
| 498 |
+
escaped[pos++] = str[i];
|
| 499 |
+
}
|
| 500 |
+
|
| 501 |
+
// no need to set zero due to calloc() being used prior
|
| 502 |
+
|
| 503 |
+
return escaped;
|
| 504 |
+
}
|
| 505 |
+
|
| 506 |
bool output_csv(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
|
| 507 |
std::ofstream fout(fname);
|
| 508 |
if (!fout.is_open()) {
|
|
|
|
| 524 |
const char * text = whisper_full_get_segment_text(ctx, i);
|
| 525 |
const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
|
| 526 |
const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
|
| 527 |
+
char * text_escaped = escape_double_quotes_in_csv(text);
|
| 528 |
|
| 529 |
//need to multiply times returned from whisper_full_get_segment_t{0,1}() by 10 to get milliseconds.
|
| 530 |
fout << 10 * t0 << "," << 10 * t1 << ",";
|