ggerganov commited on
Commit
2b07302
·
unverified ·
1 Parent(s): 3a69cdf

whisper : `split_on_word` no longer trims (#1046)

Browse files
Files changed (1) hide show
  1. whisper.cpp +0 -28
whisper.cpp CHANGED
@@ -3401,26 +3401,6 @@ static void whisper_exp_compute_token_level_timestamps(
3401
  float thold_pt,
3402
  float thold_ptsum);
3403
 
3404
- // trim from start (in place)
3405
- static inline void ltrim(std::string &s) {
3406
- s.erase(s.begin(), std::find_if_not(s.begin(), s.end(), [](unsigned char ch) {
3407
- return std::isspace(ch);
3408
- }));
3409
- }
3410
-
3411
- // trim from end (in place)
3412
- static inline void rtrim(std::string &s) {
3413
- s.erase(std::find_if_not(s.rbegin(), s.rend(), [](unsigned char ch) {
3414
- return std::isspace(ch);
3415
- }).base(), s.end());
3416
- }
3417
-
3418
- // trim from both ends (in place)
3419
- static inline void trim(std::string &s) {
3420
- rtrim(s);
3421
- ltrim(s);
3422
- }
3423
-
3424
  static inline bool should_split_on_word(const char * txt, bool split_on_word) {
3425
  if (!split_on_word) return true;
3426
 
@@ -3447,11 +3427,6 @@ static int whisper_wrap_segment(struct whisper_context & ctx, struct whisper_sta
3447
  const int cur = strlen(txt);
3448
 
3449
  if (acc + cur > max_len && i > 0 && should_split_on_word(txt, split_on_word)) {
3450
- // split here
3451
- if (split_on_word) {
3452
- trim(text);
3453
- }
3454
-
3455
  state.result_all.back().text = std::move(text);
3456
  state.result_all.back().t1 = token.t0;
3457
  state.result_all.back().tokens.resize(i);
@@ -3479,9 +3454,6 @@ static int whisper_wrap_segment(struct whisper_context & ctx, struct whisper_sta
3479
  }
3480
  }
3481
 
3482
- if (split_on_word) {
3483
- trim(text);
3484
- }
3485
  state.result_all.back().text = std::move(text);
3486
 
3487
  return res;
 
3401
  float thold_pt,
3402
  float thold_ptsum);
3403
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3404
  static inline bool should_split_on_word(const char * txt, bool split_on_word) {
3405
  if (!split_on_word) return true;
3406
 
 
3427
  const int cur = strlen(txt);
3428
 
3429
  if (acc + cur > max_len && i > 0 && should_split_on_word(txt, split_on_word)) {
 
 
 
 
 
3430
  state.result_all.back().text = std::move(text);
3431
  state.result_all.back().t1 = token.t0;
3432
  state.result_all.back().tokens.resize(i);
 
3454
  }
3455
  }
3456
 
 
 
 
3457
  state.result_all.back().text = std::move(text);
3458
 
3459
  return res;