Spaces:
Running
Running
Brad Murray
commited on
whisper : fix DTW memory access (#2012)
Browse files* Fix DTW memory access
* Memory fix - Apply changes from denersc
- whisper.cpp +15 -10
whisper.cpp
CHANGED
|
@@ -1148,26 +1148,31 @@ static bool aheads_masks_init(
|
|
| 1148 |
}
|
| 1149 |
|
| 1150 |
// Set data on mask tensors
|
| 1151 |
-
// Since this must be backend agnostic, we
|
| 1152 |
-
//
|
| 1153 |
-
//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1154 |
std::vector<float> mask_data;
|
| 1155 |
for (int64_t il = 0; il < n_text_layer; ++il) {
|
| 1156 |
if (aheads_masks.m[il] != nullptr) {
|
| 1157 |
auto aheads = get_alignment_heads_by_layer(cparams, il, n_text_layer, n_head);
|
| 1158 |
|
| 1159 |
-
size_t data_size = aheads_masks.m[il]->ne[0] * aheads_masks.m[il]->ne[1]
|
|
|
|
| 1160 |
mask_data.resize(data_size);
|
| 1161 |
-
ggml_backend_tensor_get(aheads_masks.m[il], mask_data.data(), 0, data_size);
|
| 1162 |
-
memset(mask_data.data(), 0, data_size);
|
| 1163 |
|
|
|
|
| 1164 |
for (size_t ih = 0; ih < aheads.size(); ++ih) {
|
| 1165 |
-
size_t pos = (aheads[ih] + (ih * aheads_masks.m[il]->ne[0]
|
| 1166 |
-
|
| 1167 |
-
memcpy(mask_data.data() + pos, &v, sizeof(float));
|
| 1168 |
}
|
| 1169 |
|
| 1170 |
-
ggml_backend_tensor_set(aheads_masks.m[il], mask_data.data(), 0,
|
| 1171 |
}
|
| 1172 |
}
|
| 1173 |
|
|
|
|
| 1148 |
}
|
| 1149 |
|
| 1150 |
// Set data on mask tensors
|
| 1151 |
+
// Since this must be backend agnostic, we write our desired values on mask_data,
|
| 1152 |
+
// and send it to backend with ggml_backend_tensor_set.
|
| 1153 |
+
// Each mask in N_HEADS*N_ALIGNMENT_HEADS, one per text layer containing alignment
|
| 1154 |
+
// heads. Each row of the mask "marks" one alignment head. E.g. if some text layer
|
| 1155 |
+
// has a total of 10 heads and of those, heads 0,5,6 are alignment heads, the mask
|
| 1156 |
+
// should read:
|
| 1157 |
+
// 1 0 0 0 0 0 0 0 0 0
|
| 1158 |
+
// 0 0 0 0 0 1 0 0 0 0
|
| 1159 |
+
// 0 0 0 0 0 0 1 0 0 0
|
| 1160 |
std::vector<float> mask_data;
|
| 1161 |
for (int64_t il = 0; il < n_text_layer; ++il) {
|
| 1162 |
if (aheads_masks.m[il] != nullptr) {
|
| 1163 |
auto aheads = get_alignment_heads_by_layer(cparams, il, n_text_layer, n_head);
|
| 1164 |
|
| 1165 |
+
size_t data_size = aheads_masks.m[il]->ne[0] * aheads_masks.m[il]->ne[1];
|
| 1166 |
+
size_t data_size_bytes = data_size * sizeof(float);
|
| 1167 |
mask_data.resize(data_size);
|
|
|
|
|
|
|
| 1168 |
|
| 1169 |
+
std::fill(mask_data.begin(), mask_data.end(), 0);
|
| 1170 |
for (size_t ih = 0; ih < aheads.size(); ++ih) {
|
| 1171 |
+
size_t pos = (aheads[ih] + (ih * aheads_masks.m[il]->ne[0]));
|
| 1172 |
+
mask_data[pos] = 1.0f;
|
|
|
|
| 1173 |
}
|
| 1174 |
|
| 1175 |
+
ggml_backend_tensor_set(aheads_masks.m[il], mask_data.data(), 0, data_size_bytes);
|
| 1176 |
}
|
| 1177 |
}
|
| 1178 |
|