Spaces:
Running
Running
cann : add Ascend NPU support (#2336)
Browse files* enable Ascend NPU in src/whisper.cpp
* sync test-backend-ops with llama.cpp
- ggml/src/ggml-cann/Doxyfile +2 -2
- src/whisper.cpp +20 -2
- tests/test-backend-ops.cpp +106 -17
ggml/src/ggml-cann/Doxyfile
CHANGED
|
@@ -32,7 +32,7 @@ DOXYFILE_ENCODING = UTF-8
|
|
| 32 |
# title of most generated pages and in a few other places.
|
| 33 |
# The default value is: My Project.
|
| 34 |
|
| 35 |
-
PROJECT_NAME = "
|
| 36 |
|
| 37 |
# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
|
| 38 |
# could be handy for archiving the generated documentation or if some version
|
|
@@ -44,7 +44,7 @@ PROJECT_NUMBER =
|
|
| 44 |
# for a project that appears at the top of each page and should give viewer a
|
| 45 |
# quick idea about the purpose of the project. Keep the description short.
|
| 46 |
|
| 47 |
-
PROJECT_BRIEF = "
|
| 48 |
|
| 49 |
# With the PROJECT_LOGO tag one can specify a logo or an icon that is included
|
| 50 |
# in the documentation. The maximum height of the logo should not exceed 55
|
|
|
|
| 32 |
# title of most generated pages and in a few other places.
|
| 33 |
# The default value is: My Project.
|
| 34 |
|
| 35 |
+
PROJECT_NAME = "whisper.cpp"
|
| 36 |
|
| 37 |
# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
|
| 38 |
# could be handy for archiving the generated documentation or if some version
|
|
|
|
| 44 |
# for a project that appears at the top of each page and should give viewer a
|
| 45 |
# quick idea about the purpose of the project. Keep the description short.
|
| 46 |
|
| 47 |
+
PROJECT_BRIEF = "Port of OpenAI's Whisper model in C/C++"
|
| 48 |
|
| 49 |
# With the PROJECT_LOGO tag one can specify a logo or an icon that is included
|
| 50 |
# in the documentation. The maximum height of the logo should not exceed 55
|
src/whisper.cpp
CHANGED
|
@@ -29,6 +29,10 @@
|
|
| 29 |
#include "openvino/whisper-openvino-encoder.h"
|
| 30 |
#endif
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
#include "ggml.h"
|
| 33 |
#include "ggml-alloc.h"
|
| 34 |
#include "ggml-backend.h"
|
|
@@ -1283,6 +1287,16 @@ static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & pa
|
|
| 1283 |
}
|
| 1284 |
#endif
|
| 1285 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1286 |
return result;
|
| 1287 |
}
|
| 1288 |
|
|
@@ -1335,6 +1349,10 @@ static ggml_backend_buffer_type_t whisper_default_buffer_type(const whisper_cont
|
|
| 1335 |
result || (result = ggml_backend_vk_buffer_type(params.gpu_device));
|
| 1336 |
#endif
|
| 1337 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1338 |
result || (result = ggml_backend_cpu_buffer_type());
|
| 1339 |
|
| 1340 |
return result;
|
|
@@ -4337,8 +4355,8 @@ const char * whisper_print_system_info(void) {
|
|
| 4337 |
s += "VSX = " + std::to_string(ggml_cpu_has_vsx()) + " | ";
|
| 4338 |
s += "CUDA = " + std::to_string(ggml_cpu_has_cuda()) + " | ";
|
| 4339 |
s += "COREML = " + std::to_string(whisper_has_coreml()) + " | ";
|
| 4340 |
-
s += "OPENVINO = " + std::to_string(whisper_has_openvino())
|
| 4341 |
-
|
| 4342 |
return s.c_str();
|
| 4343 |
}
|
| 4344 |
|
|
|
|
| 29 |
#include "openvino/whisper-openvino-encoder.h"
|
| 30 |
#endif
|
| 31 |
|
| 32 |
+
#ifdef GGML_USE_CANN
|
| 33 |
+
#include "ggml-cann.h"
|
| 34 |
+
#endif
|
| 35 |
+
|
| 36 |
#include "ggml.h"
|
| 37 |
#include "ggml-alloc.h"
|
| 38 |
#include "ggml-backend.h"
|
|
|
|
| 1287 |
}
|
| 1288 |
#endif
|
| 1289 |
|
| 1290 |
+
#ifdef GGML_USE_CANN
|
| 1291 |
+
if (params.use_gpu) {
|
| 1292 |
+
WHISPER_LOG_INFO("%s: using CANN backend\n", __func__);
|
| 1293 |
+
result = ggml_backend_cann_init(params.gpu_device);
|
| 1294 |
+
if (!result) {
|
| 1295 |
+
WHISPER_LOG_ERROR("%s: ggml_backend_cann_init() failed\n", __func__);
|
| 1296 |
+
}
|
| 1297 |
+
}
|
| 1298 |
+
#endif
|
| 1299 |
+
|
| 1300 |
return result;
|
| 1301 |
}
|
| 1302 |
|
|
|
|
| 1349 |
result || (result = ggml_backend_vk_buffer_type(params.gpu_device));
|
| 1350 |
#endif
|
| 1351 |
|
| 1352 |
+
#ifdef GGML_USE_CANN
|
| 1353 |
+
result || (result == ggml_backend_cann_buffer_type(params.gpu_device));
|
| 1354 |
+
#endif
|
| 1355 |
+
|
| 1356 |
result || (result = ggml_backend_cpu_buffer_type());
|
| 1357 |
|
| 1358 |
return result;
|
|
|
|
| 4355 |
s += "VSX = " + std::to_string(ggml_cpu_has_vsx()) + " | ";
|
| 4356 |
s += "CUDA = " + std::to_string(ggml_cpu_has_cuda()) + " | ";
|
| 4357 |
s += "COREML = " + std::to_string(whisper_has_coreml()) + " | ";
|
| 4358 |
+
s += "OPENVINO = " + std::to_string(whisper_has_openvino()) + " | ";
|
| 4359 |
+
s += "CANN = " + std::to_string(ggml_cpu_has_cann()) ;
|
| 4360 |
return s.c_str();
|
| 4361 |
}
|
| 4362 |
|
tests/test-backend-ops.cpp
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
#include <ggml.h>
|
| 2 |
#include <ggml-alloc.h>
|
| 3 |
#include <ggml-backend.h>
|
| 4 |
-
#include <ggml-backend-impl.h>
|
| 5 |
|
| 6 |
#include <algorithm>
|
| 7 |
#include <array>
|
|
@@ -80,14 +79,22 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
|
|
| 80 |
im = nullptr;
|
| 81 |
}
|
| 82 |
}
|
|
|
|
| 83 |
ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size/tensor->ne[0], tensor->ne[0], im);
|
| 84 |
GGML_ASSERT(ggml_validate_row_data(tensor->type, dataq.data(), dataq.size()));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size());
|
| 86 |
} else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) {
|
| 87 |
// This is going to create some weird integers though.
|
| 88 |
ggml_backend_tensor_set(tensor, data.data(), 0, ggml_nbytes(tensor));
|
| 89 |
} else {
|
| 90 |
-
|
| 91 |
}
|
| 92 |
}
|
| 93 |
|
|
@@ -125,7 +132,7 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
|
|
| 125 |
tt.to_float(&buf[i], vq.data(), bs);
|
| 126 |
tv.insert(tv.end(), vq.begin(), vq.end());
|
| 127 |
} else {
|
| 128 |
-
|
| 129 |
}
|
| 130 |
}
|
| 131 |
}
|
|
@@ -760,7 +767,7 @@ struct test_dup : public test_case {
|
|
| 760 |
}
|
| 761 |
|
| 762 |
test_dup(ggml_type type = GGML_TYPE_F32,
|
| 763 |
-
std::array<int64_t, 4> ne = {10, 10,
|
| 764 |
std::array<int64_t, 4> permute = {0, 0, 0, 0})
|
| 765 |
: type(type), ne(ne), permute(permute),
|
| 766 |
_use_permute(permute[0] + permute[1] + permute[2] + permute[3] > 0) {}
|
|
@@ -780,9 +787,11 @@ struct test_cpy : public test_case {
|
|
| 780 |
const ggml_type type_src;
|
| 781 |
const ggml_type type_dst;
|
| 782 |
const std::array<int64_t, 4> ne;
|
|
|
|
|
|
|
| 783 |
|
| 784 |
std::string vars() override {
|
| 785 |
-
return
|
| 786 |
}
|
| 787 |
|
| 788 |
double max_nmse_err() override {
|
|
@@ -794,12 +803,17 @@ struct test_cpy : public test_case {
|
|
| 794 |
}
|
| 795 |
|
| 796 |
test_cpy(ggml_type type_src = GGML_TYPE_F32, ggml_type type_dst = GGML_TYPE_F32,
|
| 797 |
-
std::array<int64_t, 4> ne = {10, 10, 10, 1}
|
| 798 |
-
|
|
|
|
|
|
|
| 799 |
|
| 800 |
ggml_tensor * build_graph(ggml_context * ctx) override {
|
| 801 |
ggml_tensor * src = ggml_new_tensor(ctx, type_src, 4, ne.data());
|
| 802 |
-
|
|
|
|
|
|
|
|
|
|
| 803 |
ggml_tensor * out = ggml_cpy(ctx, src, dst);
|
| 804 |
return out;
|
| 805 |
}
|
|
@@ -1175,6 +1189,7 @@ struct test_soft_max : public test_case {
|
|
| 1175 |
}
|
| 1176 |
};
|
| 1177 |
|
|
|
|
| 1178 |
// GGML_OP_ROPE
|
| 1179 |
struct test_rope : public test_case {
|
| 1180 |
const ggml_type type;
|
|
@@ -1267,6 +1282,32 @@ struct test_pool2d : public test_case {
|
|
| 1267 |
}
|
| 1268 |
};
|
| 1269 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1270 |
// GGML_OP_IM2COL
|
| 1271 |
struct test_im2col : public test_case {
|
| 1272 |
const ggml_type type_input;
|
|
@@ -1280,7 +1321,7 @@ struct test_im2col : public test_case {
|
|
| 1280 |
// padding
|
| 1281 |
const int p0;
|
| 1282 |
const int p1;
|
| 1283 |
-
//
|
| 1284 |
const int d0;
|
| 1285 |
const int d1;
|
| 1286 |
// mode
|
|
@@ -1393,7 +1434,7 @@ struct test_argsort : public test_case {
|
|
| 1393 |
ggml_backend_tensor_set(t, data.data(), r * t->nb[1], t->ne[0] * sizeof(float));
|
| 1394 |
}
|
| 1395 |
} else {
|
| 1396 |
-
|
| 1397 |
}
|
| 1398 |
}
|
| 1399 |
}
|
|
@@ -1470,6 +1511,7 @@ struct test_group_norm : public test_case {
|
|
| 1470 |
const ggml_type type;
|
| 1471 |
const std::array<int64_t, 4> ne;
|
| 1472 |
const int32_t num_groups;
|
|
|
|
| 1473 |
|
| 1474 |
std::string vars() override {
|
| 1475 |
return VARS_TO_STR3(type, ne, num_groups);
|
|
@@ -1477,12 +1519,13 @@ struct test_group_norm : public test_case {
|
|
| 1477 |
|
| 1478 |
test_group_norm(ggml_type type = GGML_TYPE_F32,
|
| 1479 |
std::array<int64_t, 4> ne = {64, 64, 320, 1},
|
| 1480 |
-
int32_t num_groups = 32
|
| 1481 |
-
|
|
|
|
| 1482 |
|
| 1483 |
ggml_tensor * build_graph(ggml_context * ctx) override {
|
| 1484 |
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
| 1485 |
-
ggml_tensor * out = ggml_group_norm(ctx, a, num_groups);
|
| 1486 |
return out;
|
| 1487 |
}
|
| 1488 |
};
|
|
@@ -2053,6 +2096,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
| 2053 |
GGML_TYPE_IQ2_XS, GGML_TYPE_IQ2_S,
|
| 2054 |
GGML_TYPE_IQ3_XXS, GGML_TYPE_IQ1_S, GGML_TYPE_IQ1_M,
|
| 2055 |
GGML_TYPE_IQ4_NL, GGML_TYPE_IQ3_S, GGML_TYPE_IQ4_XS,
|
|
|
|
| 2056 |
};
|
| 2057 |
|
| 2058 |
// unary ops
|
|
@@ -2097,6 +2141,19 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
| 2097 |
|
| 2098 |
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32));
|
| 2099 |
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2100 |
|
| 2101 |
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 10, 10, 10}, {1, 1, 1, 1}));
|
| 2102 |
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 10, 10, 10}, {2, 1, 1, 1}));
|
|
@@ -2110,12 +2167,22 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
| 2110 |
test_cases.emplace_back(new test_dup(GGML_TYPE_F16));
|
| 2111 |
test_cases.emplace_back(new test_dup(GGML_TYPE_I32));
|
| 2112 |
test_cases.emplace_back(new test_dup(GGML_TYPE_I16));
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2113 |
test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10, 8, 3, 1}, {0, 2, 1, 3}));
|
| 2114 |
test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10, 8, 3, 1}, {1, 2, 0, 3}));
|
| 2115 |
|
| 2116 |
for (ggml_type type_src : {GGML_TYPE_F16, GGML_TYPE_F32}) {
|
| 2117 |
for (ggml_type type_dst : all_types) {
|
| 2118 |
test_cases.emplace_back(new test_cpy(type_src, type_dst, {256, 4, 4, 4}));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2119 |
}
|
| 2120 |
}
|
| 2121 |
|
|
@@ -2165,6 +2232,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
| 2165 |
test_cases.emplace_back(new test_rms_norm(GGML_TYPE_F32, {64, 10, 10, 10}, eps));
|
| 2166 |
}
|
| 2167 |
|
|
|
|
| 2168 |
for (ggml_type type_a : base_types) {
|
| 2169 |
for (ggml_type type_b : {GGML_TYPE_F32, GGML_TYPE_F16}) {
|
| 2170 |
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, { 1, 1}, {1, 1}));
|
|
@@ -2184,10 +2252,31 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
| 2184 |
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {10, 10}, {2, 2}));
|
| 2185 |
}
|
| 2186 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2187 |
|
| 2188 |
for (ggml_type type_a : other_types) {
|
| 2189 |
for (ggml_type type_b : {GGML_TYPE_F32}) {
|
| 2190 |
-
|
|
|
|
|
|
|
|
|
|
| 2191 |
}
|
| 2192 |
}
|
| 2193 |
|
|
@@ -2247,7 +2336,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
| 2247 |
for (int n = 0; n < 10; ++n) {
|
| 2248 |
int64_t ne0 = dist_ne0(rng);
|
| 2249 |
int64_t ne1 = dist_ne1(rng);
|
| 2250 |
-
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0, ne1, 1, 1}, n/2 == 0, 0.1f, ne0 < 1000 ? 4.0f : 0.0f));
|
| 2251 |
}
|
| 2252 |
|
| 2253 |
exponent <<= 1;
|
|
@@ -2266,7 +2355,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
| 2266 |
}
|
| 2267 |
}
|
| 2268 |
}
|
| 2269 |
-
|
| 2270 |
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, false, 0.1f, 0.0f));
|
| 2271 |
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, 0.1f, 0.0f));
|
| 2272 |
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, 0.1f, 8.0f));
|
|
@@ -2380,7 +2469,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|
| 2380 |
return true;
|
| 2381 |
}
|
| 2382 |
|
| 2383 |
-
|
| 2384 |
return false;
|
| 2385 |
}
|
| 2386 |
|
|
|
|
| 1 |
#include <ggml.h>
|
| 2 |
#include <ggml-alloc.h>
|
| 3 |
#include <ggml-backend.h>
|
|
|
|
| 4 |
|
| 5 |
#include <algorithm>
|
| 6 |
#include <array>
|
|
|
|
| 79 |
im = nullptr;
|
| 80 |
}
|
| 81 |
}
|
| 82 |
+
|
| 83 |
ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size/tensor->ne[0], tensor->ne[0], im);
|
| 84 |
GGML_ASSERT(ggml_validate_row_data(tensor->type, dataq.data(), dataq.size()));
|
| 85 |
+
// TODO: other cases
|
| 86 |
+
//#pragma omp parallel for
|
| 87 |
+
//for (int i = 0; i < tensor->ne[1]; i++) {
|
| 88 |
+
// ggml_quantize_chunk(tensor->type, data.data(), dataq.data(),
|
| 89 |
+
// i * tensor->ne[0], 1, tensor->ne[0], im);
|
| 90 |
+
//}
|
| 91 |
+
|
| 92 |
ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size());
|
| 93 |
} else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) {
|
| 94 |
// This is going to create some weird integers though.
|
| 95 |
ggml_backend_tensor_set(tensor, data.data(), 0, ggml_nbytes(tensor));
|
| 96 |
} else {
|
| 97 |
+
GGML_ABORT("fatal error");
|
| 98 |
}
|
| 99 |
}
|
| 100 |
|
|
|
|
| 132 |
tt.to_float(&buf[i], vq.data(), bs);
|
| 133 |
tv.insert(tv.end(), vq.begin(), vq.end());
|
| 134 |
} else {
|
| 135 |
+
GGML_ABORT("fatal error");
|
| 136 |
}
|
| 137 |
}
|
| 138 |
}
|
|
|
|
| 767 |
}
|
| 768 |
|
| 769 |
test_dup(ggml_type type = GGML_TYPE_F32,
|
| 770 |
+
std::array<int64_t, 4> ne = {10, 10, 20, 1},
|
| 771 |
std::array<int64_t, 4> permute = {0, 0, 0, 0})
|
| 772 |
: type(type), ne(ne), permute(permute),
|
| 773 |
_use_permute(permute[0] + permute[1] + permute[2] + permute[3] > 0) {}
|
|
|
|
| 787 |
const ggml_type type_src;
|
| 788 |
const ggml_type type_dst;
|
| 789 |
const std::array<int64_t, 4> ne;
|
| 790 |
+
const std::array<int64_t, 4> permute;
|
| 791 |
+
bool _src_use_permute;
|
| 792 |
|
| 793 |
std::string vars() override {
|
| 794 |
+
return VARS_TO_STR4(type_src, type_dst, ne, permute);
|
| 795 |
}
|
| 796 |
|
| 797 |
double max_nmse_err() override {
|
|
|
|
| 803 |
}
|
| 804 |
|
| 805 |
test_cpy(ggml_type type_src = GGML_TYPE_F32, ggml_type type_dst = GGML_TYPE_F32,
|
| 806 |
+
std::array<int64_t, 4> ne = {10, 10, 10, 1},
|
| 807 |
+
std::array<int64_t, 4> permute = {0, 0, 0, 0})
|
| 808 |
+
: type_src(type_src), type_dst(type_dst), ne(ne), permute(permute),
|
| 809 |
+
_src_use_permute(permute[0] + permute[1] + permute[2] + permute[3] > 0) {}
|
| 810 |
|
| 811 |
ggml_tensor * build_graph(ggml_context * ctx) override {
|
| 812 |
ggml_tensor * src = ggml_new_tensor(ctx, type_src, 4, ne.data());
|
| 813 |
+
if (_src_use_permute) {
|
| 814 |
+
src = ggml_permute(ctx, src, permute[0], permute[1], permute[2], permute[3]);
|
| 815 |
+
}
|
| 816 |
+
ggml_tensor* dst = ggml_new_tensor(ctx, type_dst, 4, src->ne);
|
| 817 |
ggml_tensor * out = ggml_cpy(ctx, src, dst);
|
| 818 |
return out;
|
| 819 |
}
|
|
|
|
| 1189 |
}
|
| 1190 |
};
|
| 1191 |
|
| 1192 |
+
|
| 1193 |
// GGML_OP_ROPE
|
| 1194 |
struct test_rope : public test_case {
|
| 1195 |
const ggml_type type;
|
|
|
|
| 1282 |
}
|
| 1283 |
};
|
| 1284 |
|
| 1285 |
+
// GGML_OP_CONV_TRANSPOSE_1D
|
| 1286 |
+
struct test_conv_transpose_1d : public test_case {
|
| 1287 |
+
const std::array<int64_t, 4> ne_input;
|
| 1288 |
+
const std::array<int64_t, 4> ne_kernel;
|
| 1289 |
+
|
| 1290 |
+
const int s0; // stride
|
| 1291 |
+
const int p0; // padding
|
| 1292 |
+
const int d0; // dilation
|
| 1293 |
+
|
| 1294 |
+
std::string vars() override {
|
| 1295 |
+
return VARS_TO_STR5(ne_input, ne_kernel, s0, p0, d0);
|
| 1296 |
+
}
|
| 1297 |
+
|
| 1298 |
+
test_conv_transpose_1d(std::array<int64_t, 4> ne_input = {197, 32, 1, 1}, // [input_width, input_height, input_channels, 1]
|
| 1299 |
+
std::array<int64_t, 4> ne_kernel = {16, 32, 32, 1}, // [kernel_width, kernel_height, input_channels, 1]
|
| 1300 |
+
int s0 = 1, int p0 = 0, int d0 = 1)
|
| 1301 |
+
: ne_input(ne_input), ne_kernel(ne_kernel), s0(s0), p0(p0), d0(d0) {}
|
| 1302 |
+
|
| 1303 |
+
ggml_tensor * build_graph(ggml_context * ctx) override {
|
| 1304 |
+
ggml_tensor * input = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_input.data());
|
| 1305 |
+
ggml_tensor * kernel = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_kernel.data());
|
| 1306 |
+
ggml_tensor * out = ggml_conv_transpose_1d(ctx, kernel, input, s0, p0, d0);
|
| 1307 |
+
return out;
|
| 1308 |
+
}
|
| 1309 |
+
};
|
| 1310 |
+
|
| 1311 |
// GGML_OP_IM2COL
|
| 1312 |
struct test_im2col : public test_case {
|
| 1313 |
const ggml_type type_input;
|
|
|
|
| 1321 |
// padding
|
| 1322 |
const int p0;
|
| 1323 |
const int p1;
|
| 1324 |
+
// dilation
|
| 1325 |
const int d0;
|
| 1326 |
const int d1;
|
| 1327 |
// mode
|
|
|
|
| 1434 |
ggml_backend_tensor_set(t, data.data(), r * t->nb[1], t->ne[0] * sizeof(float));
|
| 1435 |
}
|
| 1436 |
} else {
|
| 1437 |
+
GGML_ABORT("fatal error");
|
| 1438 |
}
|
| 1439 |
}
|
| 1440 |
}
|
|
|
|
| 1511 |
const ggml_type type;
|
| 1512 |
const std::array<int64_t, 4> ne;
|
| 1513 |
const int32_t num_groups;
|
| 1514 |
+
const float eps;
|
| 1515 |
|
| 1516 |
std::string vars() override {
|
| 1517 |
return VARS_TO_STR3(type, ne, num_groups);
|
|
|
|
| 1519 |
|
| 1520 |
test_group_norm(ggml_type type = GGML_TYPE_F32,
|
| 1521 |
std::array<int64_t, 4> ne = {64, 64, 320, 1},
|
| 1522 |
+
int32_t num_groups = 32,
|
| 1523 |
+
float eps = 1e-6f)
|
| 1524 |
+
: type(type), ne(ne), num_groups(num_groups), eps(eps) {}
|
| 1525 |
|
| 1526 |
ggml_tensor * build_graph(ggml_context * ctx) override {
|
| 1527 |
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
| 1528 |
+
ggml_tensor * out = ggml_group_norm(ctx, a, num_groups, eps);
|
| 1529 |
return out;
|
| 1530 |
}
|
| 1531 |
};
|
|
|
|
| 2096 |
GGML_TYPE_IQ2_XS, GGML_TYPE_IQ2_S,
|
| 2097 |
GGML_TYPE_IQ3_XXS, GGML_TYPE_IQ1_S, GGML_TYPE_IQ1_M,
|
| 2098 |
GGML_TYPE_IQ4_NL, GGML_TYPE_IQ3_S, GGML_TYPE_IQ4_XS,
|
| 2099 |
+
GGML_TYPE_BF16,
|
| 2100 |
};
|
| 2101 |
|
| 2102 |
// unary ops
|
|
|
|
| 2141 |
|
| 2142 |
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32));
|
| 2143 |
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16));
|
| 2144 |
+
// test cases for 1D im2col
|
| 2145 |
+
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {3000, 128, 1, 1}, {3, 128, 1280, 1}, 1, 0, 1, 0, 1, 0, false));
|
| 2146 |
+
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {3000, 128, 1, 1}, {3, 128, 1280, 1}, 1, 0, 1, 0, 1, 0, false));
|
| 2147 |
+
|
| 2148 |
+
test_cases.emplace_back(new test_conv_transpose_1d());
|
| 2149 |
+
test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {2,3,2,1}, 3, 0, 1));
|
| 2150 |
+
test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {2,3,2,1}, 2, 0, 1));
|
| 2151 |
+
test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {2,3,2,1}, 1, 0, 1));
|
| 2152 |
+
test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {3,2,2,1}, 2, 0, 1));
|
| 2153 |
+
test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {3,2,2,1}, 1, 0, 1));
|
| 2154 |
+
test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {3,1,2,1}, 1, 0, 1));
|
| 2155 |
+
test_cases.emplace_back(new test_conv_transpose_1d({2,1,1,1}, {3,1,1,1}, 1, 0, 1));
|
| 2156 |
+
|
| 2157 |
|
| 2158 |
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 10, 10, 10}, {1, 1, 1, 1}));
|
| 2159 |
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 10, 10, 10}, {2, 1, 1, 1}));
|
|
|
|
| 2167 |
test_cases.emplace_back(new test_dup(GGML_TYPE_F16));
|
| 2168 |
test_cases.emplace_back(new test_dup(GGML_TYPE_I32));
|
| 2169 |
test_cases.emplace_back(new test_dup(GGML_TYPE_I16));
|
| 2170 |
+
test_cases.emplace_back(new test_dup(GGML_TYPE_F32, {10, 10, 5, 1}, {0, 2, 1, 3}));
|
| 2171 |
+
test_cases.emplace_back(new test_dup(GGML_TYPE_F16, {10, 10, 5, 1}, {0, 2, 1, 3})); // dup by rows
|
| 2172 |
+
test_cases.emplace_back(new test_dup(GGML_TYPE_F32, {10, 10, 5, 1}, {1, 0, 2, 3}));
|
| 2173 |
+
test_cases.emplace_back(new test_dup(GGML_TYPE_F16, {10, 10, 5, 1}, {1, 0, 2, 3})); // dup dst not-contiguous
|
| 2174 |
test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10, 8, 3, 1}, {0, 2, 1, 3}));
|
| 2175 |
test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10, 8, 3, 1}, {1, 2, 0, 3}));
|
| 2176 |
|
| 2177 |
for (ggml_type type_src : {GGML_TYPE_F16, GGML_TYPE_F32}) {
|
| 2178 |
for (ggml_type type_dst : all_types) {
|
| 2179 |
test_cases.emplace_back(new test_cpy(type_src, type_dst, {256, 4, 4, 4}));
|
| 2180 |
+
test_cases.emplace_back(new test_cpy(type_src, type_dst, {256, 2, 3, 4}, {0, 2, 1, 3})); // cpy by rows
|
| 2181 |
+
}
|
| 2182 |
+
}
|
| 2183 |
+
for (ggml_type type_src : {GGML_TYPE_F16, GGML_TYPE_F32}) {
|
| 2184 |
+
for (ggml_type type_dst : {GGML_TYPE_F16, GGML_TYPE_F32}) {
|
| 2185 |
+
test_cases.emplace_back(new test_cpy(type_src, type_dst, {256, 2, 3, 4}, {1, 0, 2, 3})); // cpy not-contiguous
|
| 2186 |
}
|
| 2187 |
}
|
| 2188 |
|
|
|
|
| 2232 |
test_cases.emplace_back(new test_rms_norm(GGML_TYPE_F32, {64, 10, 10, 10}, eps));
|
| 2233 |
}
|
| 2234 |
|
| 2235 |
+
#if 1
|
| 2236 |
for (ggml_type type_a : base_types) {
|
| 2237 |
for (ggml_type type_b : {GGML_TYPE_F32, GGML_TYPE_F16}) {
|
| 2238 |
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, { 1, 1}, {1, 1}));
|
|
|
|
| 2252 |
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {10, 10}, {2, 2}));
|
| 2253 |
}
|
| 2254 |
}
|
| 2255 |
+
#else
|
| 2256 |
+
// m = a rows
|
| 2257 |
+
// n = b rows
|
| 2258 |
+
// k = cols
|
| 2259 |
+
std::uniform_int_distribution<> dist_m(1, 128);
|
| 2260 |
+
std::uniform_int_distribution<> dist_n(16, 128);
|
| 2261 |
+
std::uniform_int_distribution<> dist_k(1, 16);
|
| 2262 |
+
for (int i = 0; i < 1000; i++) {
|
| 2263 |
+
for (ggml_type type_a : all_types) {
|
| 2264 |
+
for (ggml_type type_b : {GGML_TYPE_F32}) {
|
| 2265 |
+
int m = dist_m(rng);
|
| 2266 |
+
int n = dist_n(rng);
|
| 2267 |
+
int k = dist_k(rng) * ggml_blck_size(type_a);
|
| 2268 |
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, m, n, k, { 1, 1}, {1, 1}));
|
| 2269 |
+
}
|
| 2270 |
+
}
|
| 2271 |
+
}
|
| 2272 |
+
#endif
|
| 2273 |
|
| 2274 |
for (ggml_type type_a : other_types) {
|
| 2275 |
for (ggml_type type_b : {GGML_TYPE_F32}) {
|
| 2276 |
+
if (ggml_blck_size(type_a) != 256) {
|
| 2277 |
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, ggml_blck_size(type_a), {1, 1}, {1, 1}));
|
| 2278 |
+
}
|
| 2279 |
+
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {1, 1}, {1, 1}));
|
| 2280 |
}
|
| 2281 |
}
|
| 2282 |
|
|
|
|
| 2336 |
for (int n = 0; n < 10; ++n) {
|
| 2337 |
int64_t ne0 = dist_ne0(rng);
|
| 2338 |
int64_t ne1 = dist_ne1(rng);
|
| 2339 |
+
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, GGML_TYPE_F32, {ne0, ne1, 1, 1}, n/2 == 0, 0.1f, ne0 < 1000 ? 4.0f : 0.0f));
|
| 2340 |
}
|
| 2341 |
|
| 2342 |
exponent <<= 1;
|
|
|
|
| 2355 |
}
|
| 2356 |
}
|
| 2357 |
}
|
| 2358 |
+
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, true, 0.1f, 0.0f));
|
| 2359 |
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, false, 0.1f, 0.0f));
|
| 2360 |
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, 0.1f, 0.0f));
|
| 2361 |
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, 0.1f, 8.0f));
|
|
|
|
| 2469 |
return true;
|
| 2470 |
}
|
| 2471 |
|
| 2472 |
+
GGML_ABORT("fatal error");
|
| 2473 |
return false;
|
| 2474 |
}
|
| 2475 |
|