Spaces:
Sleeping
Sleeping
Commit
·
e2989d0
1
Parent(s):
a094e22
Vulkan MMQ Fix (llama/8479)
Browse files* Fix incoherence by adding missing LOAD_VEC_A parameter
* Fix Vulkan op result checker build error
- ggml/src/ggml-vulkan.cpp +8 -8
ggml/src/ggml-vulkan.cpp
CHANGED
|
@@ -6561,7 +6561,7 @@ static void ggml_vk_print_tensor(ggml_backend_vk_context * ctx, const ggml_tenso
|
|
| 6561 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
| 6562 |
|
| 6563 |
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
|
| 6564 |
-
ggml_vk_buffer_read(
|
| 6565 |
}
|
| 6566 |
|
| 6567 |
std::cerr << "TENSOR CHECK " << name << " (" << tensor->name << "): " << ggml_op_name(tensor->op) << std::endl;
|
|
@@ -6645,7 +6645,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
|
|
| 6645 |
for (int i3 = 0; i3 < src0->ne[3]; i3++) {
|
| 6646 |
for (int i2 = 0; i2 < src0->ne[2]; i2++) {
|
| 6647 |
const int idx = i3*src0->ne[2] + i2;
|
| 6648 |
-
ggml_vk_buffer_read(
|
| 6649 |
}
|
| 6650 |
}
|
| 6651 |
|
|
@@ -6658,7 +6658,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
|
|
| 6658 |
if (offset + src0_size >= buffer_gpu->size) {
|
| 6659 |
src0_size = buffer_gpu->size - offset;
|
| 6660 |
}
|
| 6661 |
-
ggml_vk_buffer_read(
|
| 6662 |
memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 6663 |
}
|
| 6664 |
} else {
|
|
@@ -6687,7 +6687,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
|
|
| 6687 |
for (int i3 = 0; i3 < src1->ne[3]; i3++) {
|
| 6688 |
for (int i2 = 0; i2 < src1->ne[2]; i2++) {
|
| 6689 |
const int idx = i3*src1->ne[2] + i2;
|
| 6690 |
-
ggml_vk_buffer_read(
|
| 6691 |
}
|
| 6692 |
}
|
| 6693 |
|
|
@@ -6700,7 +6700,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
|
|
| 6700 |
if (offset + src1_size >= buffer_gpu->size) {
|
| 6701 |
src1_size = buffer_gpu->size - offset;
|
| 6702 |
}
|
| 6703 |
-
ggml_vk_buffer_read(
|
| 6704 |
memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 6705 |
}
|
| 6706 |
} else {
|
|
@@ -6745,7 +6745,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
|
|
| 6745 |
for (int i3 = 0; i3 < src2->ne[3]; i3++) {
|
| 6746 |
for (int i2 = 0; i2 < src2->ne[2]; i2++) {
|
| 6747 |
const int idx = i3*src2->ne[2] + i2;
|
| 6748 |
-
ggml_vk_buffer_read(
|
| 6749 |
}
|
| 6750 |
}
|
| 6751 |
|
|
@@ -6758,7 +6758,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_tensor *
|
|
| 6758 |
if (offset + src2_size >= buffer_gpu->size) {
|
| 6759 |
src2_size = buffer_gpu->size - offset;
|
| 6760 |
}
|
| 6761 |
-
ggml_vk_buffer_read(
|
| 6762 |
memcpy(src2_clone->nb, src2->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 6763 |
}
|
| 6764 |
} else {
|
|
@@ -6922,7 +6922,7 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_tensor *
|
|
| 6922 |
tensor_size = buffer_gpu->size - (extra->offset + tensor->view_offs);
|
| 6923 |
}
|
| 6924 |
|
| 6925 |
-
ggml_vk_buffer_read(
|
| 6926 |
}
|
| 6927 |
|
| 6928 |
float first_error_result = -1.0f;
|
|
|
|
| 6561 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
| 6562 |
|
| 6563 |
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
|
| 6564 |
+
ggml_vk_buffer_read(buffer_gpu, extra->offset + tensor->view_offs, tensor_data, tensor_size);
|
| 6565 |
}
|
| 6566 |
|
| 6567 |
std::cerr << "TENSOR CHECK " << name << " (" << tensor->name << "): " << ggml_op_name(tensor->op) << std::endl;
|
|
|
|
| 6645 |
for (int i3 = 0; i3 < src0->ne[3]; i3++) {
|
| 6646 |
for (int i2 = 0; i2 < src0->ne[2]; i2++) {
|
| 6647 |
const int idx = i3*src0->ne[2] + i2;
|
| 6648 |
+
ggml_vk_buffer_read(buffer_gpu, offset + idx * src0->nb[2], ((char *)src0_clone->data + idx * src0_clone->nb[2]), src0->ne[1] * src0->nb[1]);
|
| 6649 |
}
|
| 6650 |
}
|
| 6651 |
|
|
|
|
| 6658 |
if (offset + src0_size >= buffer_gpu->size) {
|
| 6659 |
src0_size = buffer_gpu->size - offset;
|
| 6660 |
}
|
| 6661 |
+
ggml_vk_buffer_read(buffer_gpu, offset, src0_clone->data, src0_size);
|
| 6662 |
memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 6663 |
}
|
| 6664 |
} else {
|
|
|
|
| 6687 |
for (int i3 = 0; i3 < src1->ne[3]; i3++) {
|
| 6688 |
for (int i2 = 0; i2 < src1->ne[2]; i2++) {
|
| 6689 |
const int idx = i3*src1->ne[2] + i2;
|
| 6690 |
+
ggml_vk_buffer_read(buffer_gpu, offset + idx * src1->nb[2], ((char *)src1_clone->data + idx * src1_clone->nb[2]), src1->ne[1] * src1->nb[1]);
|
| 6691 |
}
|
| 6692 |
}
|
| 6693 |
|
|
|
|
| 6700 |
if (offset + src1_size >= buffer_gpu->size) {
|
| 6701 |
src1_size = buffer_gpu->size - offset;
|
| 6702 |
}
|
| 6703 |
+
ggml_vk_buffer_read(buffer_gpu, offset, src1_clone->data, src1_size);
|
| 6704 |
memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 6705 |
}
|
| 6706 |
} else {
|
|
|
|
| 6745 |
for (int i3 = 0; i3 < src2->ne[3]; i3++) {
|
| 6746 |
for (int i2 = 0; i2 < src2->ne[2]; i2++) {
|
| 6747 |
const int idx = i3*src2->ne[2] + i2;
|
| 6748 |
+
ggml_vk_buffer_read(buffer_gpu, offset + idx * src2->nb[2], ((char *)src2_clone->data + idx * src2_clone->nb[2]), src2->ne[1] * src2->nb[1]);
|
| 6749 |
}
|
| 6750 |
}
|
| 6751 |
|
|
|
|
| 6758 |
if (offset + src2_size >= buffer_gpu->size) {
|
| 6759 |
src2_size = buffer_gpu->size - offset;
|
| 6760 |
}
|
| 6761 |
+
ggml_vk_buffer_read(buffer_gpu, offset, src2_clone->data, src2_size);
|
| 6762 |
memcpy(src2_clone->nb, src2->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 6763 |
}
|
| 6764 |
} else {
|
|
|
|
| 6922 |
tensor_size = buffer_gpu->size - (extra->offset + tensor->view_offs);
|
| 6923 |
}
|
| 6924 |
|
| 6925 |
+
ggml_vk_buffer_read(buffer_gpu, extra->offset + tensor->view_offs, tensor_data, tensor_size);
|
| 6926 |
}
|
| 6927 |
|
| 6928 |
float first_error_result = -1.0f;
|