OccamRazor commited on
Commit
e2989d0
·
1 Parent(s): a094e22

Vulkan MMQ Fix (llama/8479)

Browse files

* Fix incoherence by adding missing LOAD_VEC_A parameter

* Fix Vulkan op result checker build error

Files changed (1) hide show
  1. ggml/src/ggml-vulkan.cpp +8 -8
ggml/src/ggml-vulkan.cpp CHANGED
@@ -6561,7 +6561,7 @@ static void ggml_vk_print_tensor(ggml_backend_vk_context * ctx, const ggml_tenso
6561
  ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
6562
 
6563
  vk_buffer buffer_gpu = extra->buffer_gpu.lock();
6564
- ggml_vk_buffer_read(ctx, buffer_gpu, extra->offset + tensor->view_offs, tensor_data, tensor_size);
6565
  }
6566
 
6567
  std::cerr << "TENSOR CHECK " << name << " (" << tensor->name << "): " << ggml_op_name(tensor->op) << std::endl;
@@ -6645,7 +6645,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
6645
  for (int i3 = 0; i3 < src0->ne[3]; i3++) {
6646
  for (int i2 = 0; i2 < src0->ne[2]; i2++) {
6647
  const int idx = i3*src0->ne[2] + i2;
6648
- ggml_vk_buffer_read(ctx, buffer_gpu, offset + idx * src0->nb[2], ((char *)src0_clone->data + idx * src0_clone->nb[2]), src0->ne[1] * src0->nb[1]);
6649
  }
6650
  }
6651
 
@@ -6658,7 +6658,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
6658
  if (offset + src0_size >= buffer_gpu->size) {
6659
  src0_size = buffer_gpu->size - offset;
6660
  }
6661
- ggml_vk_buffer_read(ctx, buffer_gpu, offset, src0_clone->data, src0_size);
6662
  memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
6663
  }
6664
  } else {
@@ -6687,7 +6687,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
6687
  for (int i3 = 0; i3 < src1->ne[3]; i3++) {
6688
  for (int i2 = 0; i2 < src1->ne[2]; i2++) {
6689
  const int idx = i3*src1->ne[2] + i2;
6690
- ggml_vk_buffer_read(ctx, buffer_gpu, offset + idx * src1->nb[2], ((char *)src1_clone->data + idx * src1_clone->nb[2]), src1->ne[1] * src1->nb[1]);
6691
  }
6692
  }
6693
 
@@ -6700,7 +6700,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
6700
  if (offset + src1_size >= buffer_gpu->size) {
6701
  src1_size = buffer_gpu->size - offset;
6702
  }
6703
- ggml_vk_buffer_read(ctx, buffer_gpu, offset, src1_clone->data, src1_size);
6704
  memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
6705
  }
6706
  } else {
@@ -6745,7 +6745,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
6745
  for (int i3 = 0; i3 < src2->ne[3]; i3++) {
6746
  for (int i2 = 0; i2 < src2->ne[2]; i2++) {
6747
  const int idx = i3*src2->ne[2] + i2;
6748
- ggml_vk_buffer_read(ctx, buffer_gpu, offset + idx * src2->nb[2], ((char *)src2_clone->data + idx * src2_clone->nb[2]), src2->ne[1] * src2->nb[1]);
6749
  }
6750
  }
6751
 
@@ -6758,7 +6758,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
6758
  if (offset + src2_size >= buffer_gpu->size) {
6759
  src2_size = buffer_gpu->size - offset;
6760
  }
6761
- ggml_vk_buffer_read(ctx, buffer_gpu, offset, src2_clone->data, src2_size);
6762
  memcpy(src2_clone->nb, src2->nb, sizeof(size_t) * GGML_MAX_DIMS);
6763
  }
6764
  } else {
@@ -6922,7 +6922,7 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_tensor *
6922
  tensor_size = buffer_gpu->size - (extra->offset + tensor->view_offs);
6923
  }
6924
 
6925
- ggml_vk_buffer_read(ctx, buffer_gpu, extra->offset + tensor->view_offs, tensor_data, tensor_size);
6926
  }
6927
 
6928
  float first_error_result = -1.0f;
 
6561
  ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
6562
 
6563
  vk_buffer buffer_gpu = extra->buffer_gpu.lock();
6564
+ ggml_vk_buffer_read(buffer_gpu, extra->offset + tensor->view_offs, tensor_data, tensor_size);
6565
  }
6566
 
6567
  std::cerr << "TENSOR CHECK " << name << " (" << tensor->name << "): " << ggml_op_name(tensor->op) << std::endl;
 
6645
  for (int i3 = 0; i3 < src0->ne[3]; i3++) {
6646
  for (int i2 = 0; i2 < src0->ne[2]; i2++) {
6647
  const int idx = i3*src0->ne[2] + i2;
6648
+ ggml_vk_buffer_read(buffer_gpu, offset + idx * src0->nb[2], ((char *)src0_clone->data + idx * src0_clone->nb[2]), src0->ne[1] * src0->nb[1]);
6649
  }
6650
  }
6651
 
 
6658
  if (offset + src0_size >= buffer_gpu->size) {
6659
  src0_size = buffer_gpu->size - offset;
6660
  }
6661
+ ggml_vk_buffer_read(buffer_gpu, offset, src0_clone->data, src0_size);
6662
  memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
6663
  }
6664
  } else {
 
6687
  for (int i3 = 0; i3 < src1->ne[3]; i3++) {
6688
  for (int i2 = 0; i2 < src1->ne[2]; i2++) {
6689
  const int idx = i3*src1->ne[2] + i2;
6690
+ ggml_vk_buffer_read(buffer_gpu, offset + idx * src1->nb[2], ((char *)src1_clone->data + idx * src1_clone->nb[2]), src1->ne[1] * src1->nb[1]);
6691
  }
6692
  }
6693
 
 
6700
  if (offset + src1_size >= buffer_gpu->size) {
6701
  src1_size = buffer_gpu->size - offset;
6702
  }
6703
+ ggml_vk_buffer_read(buffer_gpu, offset, src1_clone->data, src1_size);
6704
  memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
6705
  }
6706
  } else {
 
6745
  for (int i3 = 0; i3 < src2->ne[3]; i3++) {
6746
  for (int i2 = 0; i2 < src2->ne[2]; i2++) {
6747
  const int idx = i3*src2->ne[2] + i2;
6748
+ ggml_vk_buffer_read(buffer_gpu, offset + idx * src2->nb[2], ((char *)src2_clone->data + idx * src2_clone->nb[2]), src2->ne[1] * src2->nb[1]);
6749
  }
6750
  }
6751
 
 
6758
  if (offset + src2_size >= buffer_gpu->size) {
6759
  src2_size = buffer_gpu->size - offset;
6760
  }
6761
+ ggml_vk_buffer_read(buffer_gpu, offset, src2_clone->data, src2_size);
6762
  memcpy(src2_clone->nb, src2->nb, sizeof(size_t) * GGML_MAX_DIMS);
6763
  }
6764
  } else {
 
6922
  tensor_size = buffer_gpu->size - (extra->offset + tensor->view_offs);
6923
  }
6924
 
6925
+ ggml_vk_buffer_read(buffer_gpu, extra->offset + tensor->view_offs, tensor_data, tensor_size);
6926
  }
6927
 
6928
  float first_error_result = -1.0f;