ddpasa commited on
Commit
472195f
·
unverified ·
1 Parent(s): 4a30367

ggml-vulkan: fix VULKAN_CHECK_RESULTS flag, which was previously broken (llama/5813)

Browse files
Files changed (1) hide show
  1. ggml-vulkan.cpp +18 -16
ggml-vulkan.cpp CHANGED
@@ -5428,7 +5428,8 @@ static void ggml_vk_print_tensor(ggml_backend_vk_context * ctx, const ggml_tenso
5428
 
5429
  ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
5430
 
5431
- ggml_vk_buffer_read(ctx, extra->buffer_gpu, extra->offset, tensor_data, tensor_size);
 
5432
  }
5433
 
5434
  std::cerr << "TENSOR CHECK " << name << " (" << tensor->name << "): " << ggml_op_name(tensor->op) << std::endl;
@@ -5540,7 +5541,8 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
5540
  for (int i3 = 0; i3 < src0->ne[3]; i3++) {
5541
  for (int i2 = 0; i2 < src0->ne[2]; i2++) {
5542
  const int idx = i3*src0->ne[2] + i2;
5543
- ggml_vk_buffer_read(ctx, extra->buffer_gpu, offset + idx * src0->nb[2], ((char *)src0_clone->data + idx * src0_clone->nb[2]), src0->ne[1] * src0->nb[1]);
 
5544
  }
5545
  }
5546
 
@@ -5550,10 +5552,11 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
5550
  src0_clone->nb[i] = src0_clone->nb[i - 1]*src0_clone->ne[i - 1];
5551
  }
5552
  } else {
5553
- if (offset + src0_size >= extra->buffer_gpu->size) {
5554
- src0_size = extra->buffer_gpu->size - offset;
 
5555
  }
5556
- ggml_vk_buffer_read(ctx, extra->buffer_gpu, offset, src0_clone->data, src0_size);
5557
  memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
5558
  }
5559
  } else {
@@ -5583,7 +5586,8 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
5583
  for (int i3 = 0; i3 < src1->ne[3]; i3++) {
5584
  for (int i2 = 0; i2 < src1->ne[2]; i2++) {
5585
  const int idx = i3*src1->ne[2] + i2;
5586
- ggml_vk_buffer_read(ctx, extra->buffer_gpu, offset + idx * src1->nb[2], ((char *)src1_clone->data + idx * src1_clone->nb[2]), src1->ne[1] * src1->nb[1]);
 
5587
  }
5588
  }
5589
 
@@ -5593,10 +5597,11 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
5593
  src1_clone->nb[i] = src1_clone->nb[i - 1]*src1_clone->ne[i - 1];
5594
  }
5595
  } else {
5596
- if (offset + src1_size >= extra->buffer_gpu->size) {
5597
- src1_size = extra->buffer_gpu->size - offset;
 
5598
  }
5599
- ggml_vk_buffer_read(ctx, extra->buffer_gpu, offset, src1_clone->data, src1_size);
5600
  memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
5601
  }
5602
  } else {
@@ -5643,11 +5648,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
5643
  } else if (tensor->op == GGML_OP_RMS_NORM) {
5644
  tensor_clone = ggml_rms_norm(ggml_ctx, src0_clone, *(float *)tensor->op_params);
5645
  } else if (tensor->op == GGML_OP_SOFT_MAX) {
5646
- if (src1 != nullptr) {
5647
- tensor_clone = ggml_soft_max_ext(ggml_ctx, src0_clone, src1_clone, *(float *)tensor->op_params);
5648
- } else {
5649
  tensor_clone = ggml_soft_max(ggml_ctx, src0_clone);
5650
- }
5651
  } else if (tensor->op == GGML_OP_DIAG_MASK_INF) {
5652
  tensor_clone = ggml_diag_mask_inf(ggml_ctx, src0_clone, *(float *)tensor->op_params);
5653
  } else if (tensor->op == GGML_OP_ROPE) {
@@ -5753,11 +5754,12 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_compute_
5753
 
5754
  ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
5755
 
5756
- if (extra->offset + tensor_size >= extra->buffer_gpu->size) {
5757
- tensor_size = extra->buffer_gpu->size - (extra->offset);
 
5758
  }
5759
 
5760
- ggml_vk_buffer_read(ctx, extra->buffer_gpu, extra->offset, tensor_data, tensor_size);
5761
  }
5762
 
5763
  float first_error_result = -1.0f;
 
5428
 
5429
  ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
5430
 
5431
+ vk_buffer buffer_gpu = extra->buffer_gpu.lock();
5432
+ ggml_vk_buffer_read(ctx, buffer_gpu, extra->offset, tensor_data, tensor_size);
5433
  }
5434
 
5435
  std::cerr << "TENSOR CHECK " << name << " (" << tensor->name << "): " << ggml_op_name(tensor->op) << std::endl;
 
5541
  for (int i3 = 0; i3 < src0->ne[3]; i3++) {
5542
  for (int i2 = 0; i2 < src0->ne[2]; i2++) {
5543
  const int idx = i3*src0->ne[2] + i2;
5544
+ vk_buffer buffer_gpu = extra->buffer_gpu.lock();
5545
+ ggml_vk_buffer_read(ctx, buffer_gpu, offset + idx * src0->nb[2], ((char *)src0_clone->data + idx * src0_clone->nb[2]), src0->ne[1] * src0->nb[1]);
5546
  }
5547
  }
5548
 
 
5552
  src0_clone->nb[i] = src0_clone->nb[i - 1]*src0_clone->ne[i - 1];
5553
  }
5554
  } else {
5555
+ vk_buffer buffer_gpu = extra->buffer_gpu.lock();
5556
+ if (offset + src0_size >= buffer_gpu->size) {
5557
+ src0_size = buffer_gpu->size - offset;
5558
  }
5559
+ ggml_vk_buffer_read(ctx, buffer_gpu, offset, src0_clone->data, src0_size);
5560
  memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
5561
  }
5562
  } else {
 
5586
  for (int i3 = 0; i3 < src1->ne[3]; i3++) {
5587
  for (int i2 = 0; i2 < src1->ne[2]; i2++) {
5588
  const int idx = i3*src1->ne[2] + i2;
5589
+ vk_buffer buffer_gpu = extra->buffer_gpu.lock();
5590
+ ggml_vk_buffer_read(ctx, buffer_gpu, offset + idx * src1->nb[2], ((char *)src1_clone->data + idx * src1_clone->nb[2]), src1->ne[1] * src1->nb[1]);
5591
  }
5592
  }
5593
 
 
5597
  src1_clone->nb[i] = src1_clone->nb[i - 1]*src1_clone->ne[i - 1];
5598
  }
5599
  } else {
5600
+ vk_buffer buffer_gpu = extra->buffer_gpu.lock();
5601
+ if (offset + src1_size >= buffer_gpu->size) {
5602
+ src1_size = buffer_gpu->size - offset;
5603
  }
5604
+ ggml_vk_buffer_read(ctx, buffer_gpu, offset, src1_clone->data, src1_size);
5605
  memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
5606
  }
5607
  } else {
 
5648
  } else if (tensor->op == GGML_OP_RMS_NORM) {
5649
  tensor_clone = ggml_rms_norm(ggml_ctx, src0_clone, *(float *)tensor->op_params);
5650
  } else if (tensor->op == GGML_OP_SOFT_MAX) {
 
 
 
5651
  tensor_clone = ggml_soft_max(ggml_ctx, src0_clone);
 
5652
  } else if (tensor->op == GGML_OP_DIAG_MASK_INF) {
5653
  tensor_clone = ggml_diag_mask_inf(ggml_ctx, src0_clone, *(float *)tensor->op_params);
5654
  } else if (tensor->op == GGML_OP_ROPE) {
 
5754
 
5755
  ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
5756
 
5757
+ vk_buffer buffer_gpu = extra->buffer_gpu.lock();
5758
+ if (extra->offset + tensor_size >= buffer_gpu->size) {
5759
+ tensor_size = buffer_gpu->size - (extra->offset);
5760
  }
5761
 
5762
+ ggml_vk_buffer_read(ctx, buffer_gpu, extra->offset, tensor_data, tensor_size);
5763
  }
5764
 
5765
  float first_error_result = -1.0f;