Spaces:
Running
Running
Commit
·
85e2387
1
Parent(s):
b2602d7
vulkan : fix build for GGML_VULKAN_RUN_TESTS, add TFLOPS to log (ggml/961)
Browse files- ggml/src/ggml-vulkan.cpp +13 -6
ggml/src/ggml-vulkan.cpp
CHANGED
|
@@ -5008,6 +5008,8 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
|
|
| 5008 |
}
|
| 5009 |
}
|
| 5010 |
|
|
|
|
|
|
|
| 5011 |
vk_buffer d_X = ggml_vk_create_buffer_check(ctx->device, sizeof(X_TYPE) * x_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
|
| 5012 |
vk_buffer d_Y = ggml_vk_create_buffer_check(ctx->device, sizeof(Y_TYPE) * y_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
|
| 5013 |
vk_buffer d_D = ggml_vk_create_buffer_check(ctx->device, sizeof(float) * d_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
|
|
@@ -5124,7 +5126,9 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
|
|
| 5124 |
|
| 5125 |
avg_err /= m * n;
|
| 5126 |
|
| 5127 |
-
|
|
|
|
|
|
|
| 5128 |
|
| 5129 |
if (avg_err > 0.1) {
|
| 5130 |
std::cerr << "m = " << first_err_m << " n = " << first_err_n << " b = " << first_err_b << std::endl;
|
|
@@ -5246,12 +5250,14 @@ static void ggml_vk_test_dequant(ggml_backend_vk_context * ctx, size_t ne, ggml_
|
|
| 5246 |
|
| 5247 |
ggml_pipeline_request_descriptor_sets(ctx->device, p, 1);
|
| 5248 |
|
|
|
|
|
|
|
| 5249 |
ggml_vk_buffer_write(qx_buf, 0, qx, qx_sz);
|
| 5250 |
|
| 5251 |
vk_context subctx = ggml_vk_create_context(ctx, ctx->device->compute_queue);
|
| 5252 |
ggml_vk_ctx_begin(ctx->device, subctx);
|
| 5253 |
const std::vector<uint32_t> pc = { 1, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne };
|
| 5254 |
-
ggml_vk_dispatch_pipeline(ctx, subctx, p, { { qx_buf, 0, qx_sz }, { x_buf, 0, x_sz_f16 } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)ne, 1, 1});
|
| 5255 |
ggml_vk_ctx_end(subctx);
|
| 5256 |
|
| 5257 |
auto begin = std::chrono::high_resolution_clock::now();
|
|
@@ -5378,6 +5384,8 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m,
|
|
| 5378 |
}
|
| 5379 |
}
|
| 5380 |
|
|
|
|
|
|
|
| 5381 |
ggml_vk_buffer_write(qx_buf, 0, qx, qx_sz);
|
| 5382 |
ggml_vk_buffer_write(y_buf, 0, y, y_sz);
|
| 5383 |
|
|
@@ -5445,7 +5453,9 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m,
|
|
| 5445 |
|
| 5446 |
avg_err /= m * n;
|
| 5447 |
|
| 5448 |
-
|
|
|
|
|
|
|
| 5449 |
|
| 5450 |
if (avg_err > 0.01 || std::isnan(avg_err)) {
|
| 5451 |
std::cerr << "m = " << first_err_m << " n = " << first_err_n << " b = " << first_err_b << std::endl;
|
|
@@ -5497,9 +5507,6 @@ static ggml_tensor_extra_gpu * ggml_vk_tensor_create_extra(ggml_tensor * tensor)
|
|
| 5497 |
|
| 5498 |
static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) {
|
| 5499 |
#if defined(GGML_VULKAN_RUN_TESTS)
|
| 5500 |
-
ctx->staging = ggml_vk_create_buffer_check(ctx->device, 100ul * 1024ul * 1024ul,
|
| 5501 |
-
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached,
|
| 5502 |
-
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
|
| 5503 |
ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_F32);
|
| 5504 |
ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_Q4_0);
|
| 5505 |
ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_Q4_1);
|
|
|
|
| 5008 |
}
|
| 5009 |
}
|
| 5010 |
|
| 5011 |
+
ggml_pipeline_allocate_descriptor_sets(ctx->device);
|
| 5012 |
+
|
| 5013 |
vk_buffer d_X = ggml_vk_create_buffer_check(ctx->device, sizeof(X_TYPE) * x_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
|
| 5014 |
vk_buffer d_Y = ggml_vk_create_buffer_check(ctx->device, sizeof(Y_TYPE) * y_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
|
| 5015 |
vk_buffer d_D = ggml_vk_create_buffer_check(ctx->device, sizeof(float) * d_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
|
|
|
|
| 5126 |
|
| 5127 |
avg_err /= m * n;
|
| 5128 |
|
| 5129 |
+
double tflops = 2.0*m*n*k*batch*num_it / (time / 1000.0) / (1000.0*1000.0*1000.0*1000.0);
|
| 5130 |
+
|
| 5131 |
+
std::cerr << "TEST " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time / num_it << "ms " << tflops << " TFLOPS avg_err=" << avg_err << std::endl;
|
| 5132 |
|
| 5133 |
if (avg_err > 0.1) {
|
| 5134 |
std::cerr << "m = " << first_err_m << " n = " << first_err_n << " b = " << first_err_b << std::endl;
|
|
|
|
| 5250 |
|
| 5251 |
ggml_pipeline_request_descriptor_sets(ctx->device, p, 1);
|
| 5252 |
|
| 5253 |
+
ggml_pipeline_allocate_descriptor_sets(ctx->device);
|
| 5254 |
+
|
| 5255 |
ggml_vk_buffer_write(qx_buf, 0, qx, qx_sz);
|
| 5256 |
|
| 5257 |
vk_context subctx = ggml_vk_create_context(ctx, ctx->device->compute_queue);
|
| 5258 |
ggml_vk_ctx_begin(ctx->device, subctx);
|
| 5259 |
const std::vector<uint32_t> pc = { 1, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne };
|
| 5260 |
+
ggml_vk_dispatch_pipeline(ctx, subctx, p, { vk_subbuffer{ qx_buf, 0, qx_sz }, vk_subbuffer{ x_buf, 0, x_sz_f16 } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)ne, 1, 1});
|
| 5261 |
ggml_vk_ctx_end(subctx);
|
| 5262 |
|
| 5263 |
auto begin = std::chrono::high_resolution_clock::now();
|
|
|
|
| 5384 |
}
|
| 5385 |
}
|
| 5386 |
|
| 5387 |
+
ggml_pipeline_allocate_descriptor_sets(ctx->device);
|
| 5388 |
+
|
| 5389 |
ggml_vk_buffer_write(qx_buf, 0, qx, qx_sz);
|
| 5390 |
ggml_vk_buffer_write(y_buf, 0, y, y_sz);
|
| 5391 |
|
|
|
|
| 5453 |
|
| 5454 |
avg_err /= m * n;
|
| 5455 |
|
| 5456 |
+
double tflops = 2.0*m*n*k*batch*num_it / (time_ms / 1000.0) / (1000.0*1000.0*1000.0*1000.0);
|
| 5457 |
+
|
| 5458 |
+
std::cerr << "TEST MMQ " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time_ms / num_it << "ms " << tflops << " TFLOPS avg_err=" << avg_err << std::endl;
|
| 5459 |
|
| 5460 |
if (avg_err > 0.01 || std::isnan(avg_err)) {
|
| 5461 |
std::cerr << "m = " << first_err_m << " n = " << first_err_n << " b = " << first_err_b << std::endl;
|
|
|
|
| 5507 |
|
| 5508 |
static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) {
|
| 5509 |
#if defined(GGML_VULKAN_RUN_TESTS)
|
|
|
|
|
|
|
|
|
|
| 5510 |
ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_F32);
|
| 5511 |
ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_Q4_0);
|
| 5512 |
ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_Q4_1);
|