jeffbolznv commited on
Commit
85e2387
·
1 Parent(s): b2602d7

vulkan : fix build for GGML_VULKAN_RUN_TESTS, add TFLOPS to log (ggml/961)

Browse files
Files changed (1) hide show
  1. ggml/src/ggml-vulkan.cpp +13 -6
ggml/src/ggml-vulkan.cpp CHANGED
@@ -5008,6 +5008,8 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
5008
  }
5009
  }
5010
 
 
 
5011
  vk_buffer d_X = ggml_vk_create_buffer_check(ctx->device, sizeof(X_TYPE) * x_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
5012
  vk_buffer d_Y = ggml_vk_create_buffer_check(ctx->device, sizeof(Y_TYPE) * y_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
5013
  vk_buffer d_D = ggml_vk_create_buffer_check(ctx->device, sizeof(float) * d_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
@@ -5124,7 +5126,9 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
5124
 
5125
  avg_err /= m * n;
5126
 
5127
- std::cerr << "TEST " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time / num_it << "ms avg_err=" << avg_err << std::endl;
 
 
5128
 
5129
  if (avg_err > 0.1) {
5130
  std::cerr << "m = " << first_err_m << " n = " << first_err_n << " b = " << first_err_b << std::endl;
@@ -5246,12 +5250,14 @@ static void ggml_vk_test_dequant(ggml_backend_vk_context * ctx, size_t ne, ggml_
5246
 
5247
  ggml_pipeline_request_descriptor_sets(ctx->device, p, 1);
5248
 
 
 
5249
  ggml_vk_buffer_write(qx_buf, 0, qx, qx_sz);
5250
 
5251
  vk_context subctx = ggml_vk_create_context(ctx, ctx->device->compute_queue);
5252
  ggml_vk_ctx_begin(ctx->device, subctx);
5253
  const std::vector<uint32_t> pc = { 1, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne };
5254
- ggml_vk_dispatch_pipeline(ctx, subctx, p, { { qx_buf, 0, qx_sz }, { x_buf, 0, x_sz_f16 } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)ne, 1, 1});
5255
  ggml_vk_ctx_end(subctx);
5256
 
5257
  auto begin = std::chrono::high_resolution_clock::now();
@@ -5378,6 +5384,8 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m,
5378
  }
5379
  }
5380
 
 
 
5381
  ggml_vk_buffer_write(qx_buf, 0, qx, qx_sz);
5382
  ggml_vk_buffer_write(y_buf, 0, y, y_sz);
5383
 
@@ -5445,7 +5453,9 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m,
5445
 
5446
  avg_err /= m * n;
5447
 
5448
- std::cerr << "TEST MMQ " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time_ms / num_it << "ms avg_err=" << avg_err << std::endl;
 
 
5449
 
5450
  if (avg_err > 0.01 || std::isnan(avg_err)) {
5451
  std::cerr << "m = " << first_err_m << " n = " << first_err_n << " b = " << first_err_b << std::endl;
@@ -5497,9 +5507,6 @@ static ggml_tensor_extra_gpu * ggml_vk_tensor_create_extra(ggml_tensor * tensor)
5497
 
5498
  static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) {
5499
  #if defined(GGML_VULKAN_RUN_TESTS)
5500
- ctx->staging = ggml_vk_create_buffer_check(ctx->device, 100ul * 1024ul * 1024ul,
5501
- vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached,
5502
- vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
5503
  ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_F32);
5504
  ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_Q4_0);
5505
  ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_Q4_1);
 
5008
  }
5009
  }
5010
 
5011
+ ggml_pipeline_allocate_descriptor_sets(ctx->device);
5012
+
5013
  vk_buffer d_X = ggml_vk_create_buffer_check(ctx->device, sizeof(X_TYPE) * x_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
5014
  vk_buffer d_Y = ggml_vk_create_buffer_check(ctx->device, sizeof(Y_TYPE) * y_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
5015
  vk_buffer d_D = ggml_vk_create_buffer_check(ctx->device, sizeof(float) * d_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
 
5126
 
5127
  avg_err /= m * n;
5128
 
5129
+ double tflops = 2.0*m*n*k*batch*num_it / (time / 1000.0) / (1000.0*1000.0*1000.0*1000.0);
5130
+
5131
+ std::cerr << "TEST " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time / num_it << "ms " << tflops << " TFLOPS avg_err=" << avg_err << std::endl;
5132
 
5133
  if (avg_err > 0.1) {
5134
  std::cerr << "m = " << first_err_m << " n = " << first_err_n << " b = " << first_err_b << std::endl;
 
5250
 
5251
  ggml_pipeline_request_descriptor_sets(ctx->device, p, 1);
5252
 
5253
+ ggml_pipeline_allocate_descriptor_sets(ctx->device);
5254
+
5255
  ggml_vk_buffer_write(qx_buf, 0, qx, qx_sz);
5256
 
5257
  vk_context subctx = ggml_vk_create_context(ctx, ctx->device->compute_queue);
5258
  ggml_vk_ctx_begin(ctx->device, subctx);
5259
  const std::vector<uint32_t> pc = { 1, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne };
5260
+ ggml_vk_dispatch_pipeline(ctx, subctx, p, { vk_subbuffer{ qx_buf, 0, qx_sz }, vk_subbuffer{ x_buf, 0, x_sz_f16 } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)ne, 1, 1});
5261
  ggml_vk_ctx_end(subctx);
5262
 
5263
  auto begin = std::chrono::high_resolution_clock::now();
 
5384
  }
5385
  }
5386
 
5387
+ ggml_pipeline_allocate_descriptor_sets(ctx->device);
5388
+
5389
  ggml_vk_buffer_write(qx_buf, 0, qx, qx_sz);
5390
  ggml_vk_buffer_write(y_buf, 0, y, y_sz);
5391
 
 
5453
 
5454
  avg_err /= m * n;
5455
 
5456
+ double tflops = 2.0*m*n*k*batch*num_it / (time_ms / 1000.0) / (1000.0*1000.0*1000.0*1000.0);
5457
+
5458
+ std::cerr << "TEST MMQ " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time_ms / num_it << "ms " << tflops << " TFLOPS avg_err=" << avg_err << std::endl;
5459
 
5460
  if (avg_err > 0.01 || std::isnan(avg_err)) {
5461
  std::cerr << "m = " << first_err_m << " n = " << first_err_n << " b = " << first_err_b << std::endl;
 
5507
 
5508
  static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) {
5509
  #if defined(GGML_VULKAN_RUN_TESTS)
 
 
 
5510
  ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_F32);
5511
  ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_Q4_0);
5512
  ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_Q4_1);