ggerganov commited on
Commit
97f9410
·
unverified ·
1 Parent(s): a5f8f3c

examples : fix + refactor Levenshtein distance

Browse files
examples/command.wasm/emscripten.cpp CHANGED
@@ -28,31 +28,6 @@ std::string g_transcribed = "";
28
 
29
  std::vector<float> g_pcmf32;
30
 
31
- // compute similarity between two strings using Levenshtein distance
32
- static float similarity(const std::string & s0, const std::string & s1) {
33
- const size_t len0 = s0.size() + 1;
34
- const size_t len1 = s1.size() + 1;
35
-
36
- std::vector<int> col(len1, 0);
37
- std::vector<int> prevCol(len1, 0);
38
-
39
- for (size_t i = 0; i < len1; i++) {
40
- prevCol[i] = i;
41
- }
42
-
43
- for (size_t i = 0; i < len0; i++) {
44
- col[0] = i;
45
- for (size_t j = 1; j < len1; j++) {
46
- col[j] = std::min(std::min(1 + col[j - 1], 1 + prevCol[j]), prevCol[j - 1] + (s0[i - 1] == s1[j - 1] ? 0 : 1));
47
- }
48
- col.swap(prevCol);
49
- }
50
-
51
- const float dist = prevCol[len1 - 1];
52
-
53
- return 1.0f - (dist / std::max(s0.size(), s1.size()));
54
- }
55
-
56
  void command_set_status(const std::string & status) {
57
  std::lock_guard<std::mutex> lock(g_mutex);
58
  g_status = status;
 
28
 
29
  std::vector<float> g_pcmf32;
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  void command_set_status(const std::string & status) {
32
  std::lock_guard<std::mutex> lock(g_mutex);
33
  g_status = status;
examples/command/command.cpp CHANGED
@@ -163,31 +163,6 @@ std::string transcribe(whisper_context * ctx, const whisper_params & params, con
163
  return result;
164
  }
165
 
166
- // compute similarity between two strings using Levenshtein distance
167
- float similarity(const std::string & s0, const std::string & s1) {
168
- const size_t len0 = s0.size() + 1;
169
- const size_t len1 = s1.size() + 1;
170
-
171
- std::vector<int> col(len1, 0);
172
- std::vector<int> prevCol(len1, 0);
173
-
174
- for (size_t i = 0; i < len1; i++) {
175
- prevCol[i] = i;
176
- }
177
-
178
- for (size_t i = 0; i < len0; i++) {
179
- col[0] = i;
180
- for (size_t j = 1; j < len1; j++) {
181
- col[j] = std::min(std::min(1 + col[j - 1], 1 + prevCol[j]), prevCol[j - 1] + (s0[i - 1] == s1[j - 1] ? 0 : 1));
182
- }
183
- col.swap(prevCol);
184
- }
185
-
186
- const float dist = prevCol[len1 - 1];
187
-
188
- return 1.0f - (dist / std::max(s0.size(), s1.size()));
189
- }
190
-
191
  std::vector<std::string> read_allowed_commands(const std::string & fname) {
192
  std::vector<std::string> allowed_commands;
193
 
 
163
  return result;
164
  }
165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  std::vector<std::string> read_allowed_commands(const std::string & fname) {
167
  std::vector<std::string> allowed_commands;
168
 
examples/common.cpp CHANGED
@@ -479,3 +479,27 @@ bool vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float
479
 
480
  return true;
481
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
479
 
480
  return true;
481
  }
482
+
483
+ float similarity(const std::string & s0, const std::string & s1) {
484
+ const size_t len0 = s0.size() + 1;
485
+ const size_t len1 = s1.size() + 1;
486
+
487
+ std::vector<int> col(len1, 0);
488
+ std::vector<int> prevCol(len1, 0);
489
+
490
+ for (size_t i = 0; i < len1; i++) {
491
+ prevCol[i] = i;
492
+ }
493
+
494
+ for (size_t i = 0; i < len0; i++) {
495
+ col[0] = i;
496
+ for (size_t j = 1; j < len1; j++) {
497
+ col[j] = std::min(std::min(1 + col[j - 1], 1 + prevCol[j]), prevCol[j - 1] + (i > 0 && s0[i - 1] == s1[j - 1] ? 0 : 1));
498
+ }
499
+ col.swap(prevCol);
500
+ }
501
+
502
+ const float dist = prevCol[len1 - 1];
503
+
504
+ return 1.0f - (dist / std::max(s0.size(), s1.size()));
505
+ }
examples/common.h CHANGED
@@ -118,3 +118,5 @@ bool vad_simple(
118
  float freq_thold,
119
  bool verbose);
120
 
 
 
 
118
  float freq_thold,
119
  bool verbose);
120
 
121
+ // compute similarity between two strings using Levenshtein distance
122
+ float similarity(const std::string & s0, const std::string & s1);