Spaces:
Running
Running
examples : fix + refactor Levenshtein distance
Browse files- examples/command.wasm/emscripten.cpp +0 -25
- examples/command/command.cpp +0 -25
- examples/common.cpp +24 -0
- examples/common.h +2 -0
examples/command.wasm/emscripten.cpp
CHANGED
|
@@ -28,31 +28,6 @@ std::string g_transcribed = "";
|
|
| 28 |
|
| 29 |
std::vector<float> g_pcmf32;
|
| 30 |
|
| 31 |
-
// compute similarity between two strings using Levenshtein distance
|
| 32 |
-
static float similarity(const std::string & s0, const std::string & s1) {
|
| 33 |
-
const size_t len0 = s0.size() + 1;
|
| 34 |
-
const size_t len1 = s1.size() + 1;
|
| 35 |
-
|
| 36 |
-
std::vector<int> col(len1, 0);
|
| 37 |
-
std::vector<int> prevCol(len1, 0);
|
| 38 |
-
|
| 39 |
-
for (size_t i = 0; i < len1; i++) {
|
| 40 |
-
prevCol[i] = i;
|
| 41 |
-
}
|
| 42 |
-
|
| 43 |
-
for (size_t i = 0; i < len0; i++) {
|
| 44 |
-
col[0] = i;
|
| 45 |
-
for (size_t j = 1; j < len1; j++) {
|
| 46 |
-
col[j] = std::min(std::min(1 + col[j - 1], 1 + prevCol[j]), prevCol[j - 1] + (s0[i - 1] == s1[j - 1] ? 0 : 1));
|
| 47 |
-
}
|
| 48 |
-
col.swap(prevCol);
|
| 49 |
-
}
|
| 50 |
-
|
| 51 |
-
const float dist = prevCol[len1 - 1];
|
| 52 |
-
|
| 53 |
-
return 1.0f - (dist / std::max(s0.size(), s1.size()));
|
| 54 |
-
}
|
| 55 |
-
|
| 56 |
void command_set_status(const std::string & status) {
|
| 57 |
std::lock_guard<std::mutex> lock(g_mutex);
|
| 58 |
g_status = status;
|
|
|
|
| 28 |
|
| 29 |
std::vector<float> g_pcmf32;
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
void command_set_status(const std::string & status) {
|
| 32 |
std::lock_guard<std::mutex> lock(g_mutex);
|
| 33 |
g_status = status;
|
examples/command/command.cpp
CHANGED
|
@@ -163,31 +163,6 @@ std::string transcribe(whisper_context * ctx, const whisper_params & params, con
|
|
| 163 |
return result;
|
| 164 |
}
|
| 165 |
|
| 166 |
-
// compute similarity between two strings using Levenshtein distance
|
| 167 |
-
float similarity(const std::string & s0, const std::string & s1) {
|
| 168 |
-
const size_t len0 = s0.size() + 1;
|
| 169 |
-
const size_t len1 = s1.size() + 1;
|
| 170 |
-
|
| 171 |
-
std::vector<int> col(len1, 0);
|
| 172 |
-
std::vector<int> prevCol(len1, 0);
|
| 173 |
-
|
| 174 |
-
for (size_t i = 0; i < len1; i++) {
|
| 175 |
-
prevCol[i] = i;
|
| 176 |
-
}
|
| 177 |
-
|
| 178 |
-
for (size_t i = 0; i < len0; i++) {
|
| 179 |
-
col[0] = i;
|
| 180 |
-
for (size_t j = 1; j < len1; j++) {
|
| 181 |
-
col[j] = std::min(std::min(1 + col[j - 1], 1 + prevCol[j]), prevCol[j - 1] + (s0[i - 1] == s1[j - 1] ? 0 : 1));
|
| 182 |
-
}
|
| 183 |
-
col.swap(prevCol);
|
| 184 |
-
}
|
| 185 |
-
|
| 186 |
-
const float dist = prevCol[len1 - 1];
|
| 187 |
-
|
| 188 |
-
return 1.0f - (dist / std::max(s0.size(), s1.size()));
|
| 189 |
-
}
|
| 190 |
-
|
| 191 |
std::vector<std::string> read_allowed_commands(const std::string & fname) {
|
| 192 |
std::vector<std::string> allowed_commands;
|
| 193 |
|
|
|
|
| 163 |
return result;
|
| 164 |
}
|
| 165 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
std::vector<std::string> read_allowed_commands(const std::string & fname) {
|
| 167 |
std::vector<std::string> allowed_commands;
|
| 168 |
|
examples/common.cpp
CHANGED
|
@@ -479,3 +479,27 @@ bool vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float
|
|
| 479 |
|
| 480 |
return true;
|
| 481 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 479 |
|
| 480 |
return true;
|
| 481 |
}
|
| 482 |
+
|
| 483 |
+
float similarity(const std::string & s0, const std::string & s1) {
|
| 484 |
+
const size_t len0 = s0.size() + 1;
|
| 485 |
+
const size_t len1 = s1.size() + 1;
|
| 486 |
+
|
| 487 |
+
std::vector<int> col(len1, 0);
|
| 488 |
+
std::vector<int> prevCol(len1, 0);
|
| 489 |
+
|
| 490 |
+
for (size_t i = 0; i < len1; i++) {
|
| 491 |
+
prevCol[i] = i;
|
| 492 |
+
}
|
| 493 |
+
|
| 494 |
+
for (size_t i = 0; i < len0; i++) {
|
| 495 |
+
col[0] = i;
|
| 496 |
+
for (size_t j = 1; j < len1; j++) {
|
| 497 |
+
col[j] = std::min(std::min(1 + col[j - 1], 1 + prevCol[j]), prevCol[j - 1] + (i > 0 && s0[i - 1] == s1[j - 1] ? 0 : 1));
|
| 498 |
+
}
|
| 499 |
+
col.swap(prevCol);
|
| 500 |
+
}
|
| 501 |
+
|
| 502 |
+
const float dist = prevCol[len1 - 1];
|
| 503 |
+
|
| 504 |
+
return 1.0f - (dist / std::max(s0.size(), s1.size()));
|
| 505 |
+
}
|
examples/common.h
CHANGED
|
@@ -118,3 +118,5 @@ bool vad_simple(
|
|
| 118 |
float freq_thold,
|
| 119 |
bool verbose);
|
| 120 |
|
|
|
|
|
|
|
|
|
| 118 |
float freq_thold,
|
| 119 |
bool verbose);
|
| 120 |
|
| 121 |
+
// compute similarity between two strings using Levenshtein distance
|
| 122 |
+
float similarity(const std::string & s0, const std::string & s1);
|