Spaces:
Sleeping
Sleeping
whisper : `split_on_word` no longer trims (#1046)
Browse files- whisper.cpp +0 -28
whisper.cpp
CHANGED
|
@@ -3401,26 +3401,6 @@ static void whisper_exp_compute_token_level_timestamps(
|
|
| 3401 |
float thold_pt,
|
| 3402 |
float thold_ptsum);
|
| 3403 |
|
| 3404 |
-
// trim from start (in place)
|
| 3405 |
-
static inline void ltrim(std::string &s) {
|
| 3406 |
-
s.erase(s.begin(), std::find_if_not(s.begin(), s.end(), [](unsigned char ch) {
|
| 3407 |
-
return std::isspace(ch);
|
| 3408 |
-
}));
|
| 3409 |
-
}
|
| 3410 |
-
|
| 3411 |
-
// trim from end (in place)
|
| 3412 |
-
static inline void rtrim(std::string &s) {
|
| 3413 |
-
s.erase(std::find_if_not(s.rbegin(), s.rend(), [](unsigned char ch) {
|
| 3414 |
-
return std::isspace(ch);
|
| 3415 |
-
}).base(), s.end());
|
| 3416 |
-
}
|
| 3417 |
-
|
| 3418 |
-
// trim from both ends (in place)
|
| 3419 |
-
static inline void trim(std::string &s) {
|
| 3420 |
-
rtrim(s);
|
| 3421 |
-
ltrim(s);
|
| 3422 |
-
}
|
| 3423 |
-
|
| 3424 |
static inline bool should_split_on_word(const char * txt, bool split_on_word) {
|
| 3425 |
if (!split_on_word) return true;
|
| 3426 |
|
|
@@ -3447,11 +3427,6 @@ static int whisper_wrap_segment(struct whisper_context & ctx, struct whisper_sta
|
|
| 3447 |
const int cur = strlen(txt);
|
| 3448 |
|
| 3449 |
if (acc + cur > max_len && i > 0 && should_split_on_word(txt, split_on_word)) {
|
| 3450 |
-
// split here
|
| 3451 |
-
if (split_on_word) {
|
| 3452 |
-
trim(text);
|
| 3453 |
-
}
|
| 3454 |
-
|
| 3455 |
state.result_all.back().text = std::move(text);
|
| 3456 |
state.result_all.back().t1 = token.t0;
|
| 3457 |
state.result_all.back().tokens.resize(i);
|
|
@@ -3479,9 +3454,6 @@ static int whisper_wrap_segment(struct whisper_context & ctx, struct whisper_sta
|
|
| 3479 |
}
|
| 3480 |
}
|
| 3481 |
|
| 3482 |
-
if (split_on_word) {
|
| 3483 |
-
trim(text);
|
| 3484 |
-
}
|
| 3485 |
state.result_all.back().text = std::move(text);
|
| 3486 |
|
| 3487 |
return res;
|
|
|
|
| 3401 |
float thold_pt,
|
| 3402 |
float thold_ptsum);
|
| 3403 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3404 |
static inline bool should_split_on_word(const char * txt, bool split_on_word) {
|
| 3405 |
if (!split_on_word) return true;
|
| 3406 |
|
|
|
|
| 3427 |
const int cur = strlen(txt);
|
| 3428 |
|
| 3429 |
if (acc + cur > max_len && i > 0 && should_split_on_word(txt, split_on_word)) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3430 |
state.result_all.back().text = std::move(text);
|
| 3431 |
state.result_all.back().t1 = token.t0;
|
| 3432 |
state.result_all.back().tokens.resize(i);
|
|
|
|
| 3454 |
}
|
| 3455 |
}
|
| 3456 |
|
|
|
|
|
|
|
|
|
|
| 3457 |
state.result_all.back().text = std::move(text);
|
| 3458 |
|
| 3459 |
return res;
|