mashizora commited on
Commit
9952a85
·
unverified ·
1 Parent(s): 99d668a

main : fix double quote escaping in csv output (#2090)

Browse files
Files changed (1) hide show
  1. examples/main/main.cpp +33 -1
examples/main/main.cpp CHANGED
@@ -471,6 +471,38 @@ char *escape_double_quotes_and_backslashes(const char *str) {
471
  return escaped;
472
  }
473
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
474
  bool output_csv(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
475
  std::ofstream fout(fname);
476
  if (!fout.is_open()) {
@@ -492,7 +524,7 @@ bool output_csv(struct whisper_context * ctx, const char * fname, const whisper_
492
  const char * text = whisper_full_get_segment_text(ctx, i);
493
  const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
494
  const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
495
- char * text_escaped = escape_double_quotes_and_backslashes(text);
496
 
497
  //need to multiply times returned from whisper_full_get_segment_t{0,1}() by 10 to get milliseconds.
498
  fout << 10 * t0 << "," << 10 * t1 << ",";
 
471
  return escaped;
472
  }
473
 
474
+ // double quote should be escaped by another double quote. (rfc4180)
475
+ char *escape_double_quotes_in_csv(const char *str) {
476
+ if (str == NULL) {
477
+ return NULL;
478
+ }
479
+
480
+ size_t escaped_length = strlen(str) + 1;
481
+
482
+ for (size_t i = 0; str[i] != '\0'; i++) {
483
+ if (str[i] == '"') {
484
+ escaped_length++;
485
+ }
486
+ }
487
+
488
+ char *escaped = (char *)calloc(escaped_length, 1); // pre-zeroed
489
+ if (escaped == NULL) {
490
+ return NULL;
491
+ }
492
+
493
+ size_t pos = 0;
494
+ for (size_t i = 0; str[i] != '\0'; i++) {
495
+ if (str[i] == '"') {
496
+ escaped[pos++] = '"';
497
+ }
498
+ escaped[pos++] = str[i];
499
+ }
500
+
501
+ // no need to set zero due to calloc() being used prior
502
+
503
+ return escaped;
504
+ }
505
+
506
  bool output_csv(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
507
  std::ofstream fout(fname);
508
  if (!fout.is_open()) {
 
524
  const char * text = whisper_full_get_segment_text(ctx, i);
525
  const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
526
  const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
527
+ char * text_escaped = escape_double_quotes_in_csv(text);
528
 
529
  //need to multiply times returned from whisper_full_get_segment_t{0,1}() by 10 to get milliseconds.
530
  fout << 10 * t0 << "," << 10 * t1 << ",";