Spaces:
Running
Running
ddpasa
commited on
ggml-vulkan: fix VULKAN_CHECK_RESULTS flag, which was previously broken (llama/5813)
Browse files- ggml-vulkan.cpp +18 -16
ggml-vulkan.cpp
CHANGED
|
@@ -5428,7 +5428,8 @@ static void ggml_vk_print_tensor(ggml_backend_vk_context * ctx, const ggml_tenso
|
|
| 5428 |
|
| 5429 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
| 5430 |
|
| 5431 |
-
|
|
|
|
| 5432 |
}
|
| 5433 |
|
| 5434 |
std::cerr << "TENSOR CHECK " << name << " (" << tensor->name << "): " << ggml_op_name(tensor->op) << std::endl;
|
|
@@ -5540,7 +5541,8 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
|
|
| 5540 |
for (int i3 = 0; i3 < src0->ne[3]; i3++) {
|
| 5541 |
for (int i2 = 0; i2 < src0->ne[2]; i2++) {
|
| 5542 |
const int idx = i3*src0->ne[2] + i2;
|
| 5543 |
-
|
|
|
|
| 5544 |
}
|
| 5545 |
}
|
| 5546 |
|
|
@@ -5550,10 +5552,11 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
|
|
| 5550 |
src0_clone->nb[i] = src0_clone->nb[i - 1]*src0_clone->ne[i - 1];
|
| 5551 |
}
|
| 5552 |
} else {
|
| 5553 |
-
|
| 5554 |
-
|
|
|
|
| 5555 |
}
|
| 5556 |
-
ggml_vk_buffer_read(ctx,
|
| 5557 |
memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 5558 |
}
|
| 5559 |
} else {
|
|
@@ -5583,7 +5586,8 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
|
|
| 5583 |
for (int i3 = 0; i3 < src1->ne[3]; i3++) {
|
| 5584 |
for (int i2 = 0; i2 < src1->ne[2]; i2++) {
|
| 5585 |
const int idx = i3*src1->ne[2] + i2;
|
| 5586 |
-
|
|
|
|
| 5587 |
}
|
| 5588 |
}
|
| 5589 |
|
|
@@ -5593,10 +5597,11 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
|
|
| 5593 |
src1_clone->nb[i] = src1_clone->nb[i - 1]*src1_clone->ne[i - 1];
|
| 5594 |
}
|
| 5595 |
} else {
|
| 5596 |
-
|
| 5597 |
-
|
|
|
|
| 5598 |
}
|
| 5599 |
-
ggml_vk_buffer_read(ctx,
|
| 5600 |
memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 5601 |
}
|
| 5602 |
} else {
|
|
@@ -5643,11 +5648,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
|
|
| 5643 |
} else if (tensor->op == GGML_OP_RMS_NORM) {
|
| 5644 |
tensor_clone = ggml_rms_norm(ggml_ctx, src0_clone, *(float *)tensor->op_params);
|
| 5645 |
} else if (tensor->op == GGML_OP_SOFT_MAX) {
|
| 5646 |
-
if (src1 != nullptr) {
|
| 5647 |
-
tensor_clone = ggml_soft_max_ext(ggml_ctx, src0_clone, src1_clone, *(float *)tensor->op_params);
|
| 5648 |
-
} else {
|
| 5649 |
tensor_clone = ggml_soft_max(ggml_ctx, src0_clone);
|
| 5650 |
-
}
|
| 5651 |
} else if (tensor->op == GGML_OP_DIAG_MASK_INF) {
|
| 5652 |
tensor_clone = ggml_diag_mask_inf(ggml_ctx, src0_clone, *(float *)tensor->op_params);
|
| 5653 |
} else if (tensor->op == GGML_OP_ROPE) {
|
|
@@ -5753,11 +5754,12 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_compute_
|
|
| 5753 |
|
| 5754 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
| 5755 |
|
| 5756 |
-
|
| 5757 |
-
|
|
|
|
| 5758 |
}
|
| 5759 |
|
| 5760 |
-
ggml_vk_buffer_read(ctx,
|
| 5761 |
}
|
| 5762 |
|
| 5763 |
float first_error_result = -1.0f;
|
|
|
|
| 5428 |
|
| 5429 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
| 5430 |
|
| 5431 |
+
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
|
| 5432 |
+
ggml_vk_buffer_read(ctx, buffer_gpu, extra->offset, tensor_data, tensor_size);
|
| 5433 |
}
|
| 5434 |
|
| 5435 |
std::cerr << "TENSOR CHECK " << name << " (" << tensor->name << "): " << ggml_op_name(tensor->op) << std::endl;
|
|
|
|
| 5541 |
for (int i3 = 0; i3 < src0->ne[3]; i3++) {
|
| 5542 |
for (int i2 = 0; i2 < src0->ne[2]; i2++) {
|
| 5543 |
const int idx = i3*src0->ne[2] + i2;
|
| 5544 |
+
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
|
| 5545 |
+
ggml_vk_buffer_read(ctx, buffer_gpu, offset + idx * src0->nb[2], ((char *)src0_clone->data + idx * src0_clone->nb[2]), src0->ne[1] * src0->nb[1]);
|
| 5546 |
}
|
| 5547 |
}
|
| 5548 |
|
|
|
|
| 5552 |
src0_clone->nb[i] = src0_clone->nb[i - 1]*src0_clone->ne[i - 1];
|
| 5553 |
}
|
| 5554 |
} else {
|
| 5555 |
+
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
|
| 5556 |
+
if (offset + src0_size >= buffer_gpu->size) {
|
| 5557 |
+
src0_size = buffer_gpu->size - offset;
|
| 5558 |
}
|
| 5559 |
+
ggml_vk_buffer_read(ctx, buffer_gpu, offset, src0_clone->data, src0_size);
|
| 5560 |
memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 5561 |
}
|
| 5562 |
} else {
|
|
|
|
| 5586 |
for (int i3 = 0; i3 < src1->ne[3]; i3++) {
|
| 5587 |
for (int i2 = 0; i2 < src1->ne[2]; i2++) {
|
| 5588 |
const int idx = i3*src1->ne[2] + i2;
|
| 5589 |
+
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
|
| 5590 |
+
ggml_vk_buffer_read(ctx, buffer_gpu, offset + idx * src1->nb[2], ((char *)src1_clone->data + idx * src1_clone->nb[2]), src1->ne[1] * src1->nb[1]);
|
| 5591 |
}
|
| 5592 |
}
|
| 5593 |
|
|
|
|
| 5597 |
src1_clone->nb[i] = src1_clone->nb[i - 1]*src1_clone->ne[i - 1];
|
| 5598 |
}
|
| 5599 |
} else {
|
| 5600 |
+
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
|
| 5601 |
+
if (offset + src1_size >= buffer_gpu->size) {
|
| 5602 |
+
src1_size = buffer_gpu->size - offset;
|
| 5603 |
}
|
| 5604 |
+
ggml_vk_buffer_read(ctx, buffer_gpu, offset, src1_clone->data, src1_size);
|
| 5605 |
memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 5606 |
}
|
| 5607 |
} else {
|
|
|
|
| 5648 |
} else if (tensor->op == GGML_OP_RMS_NORM) {
|
| 5649 |
tensor_clone = ggml_rms_norm(ggml_ctx, src0_clone, *(float *)tensor->op_params);
|
| 5650 |
} else if (tensor->op == GGML_OP_SOFT_MAX) {
|
|
|
|
|
|
|
|
|
|
| 5651 |
tensor_clone = ggml_soft_max(ggml_ctx, src0_clone);
|
|
|
|
| 5652 |
} else if (tensor->op == GGML_OP_DIAG_MASK_INF) {
|
| 5653 |
tensor_clone = ggml_diag_mask_inf(ggml_ctx, src0_clone, *(float *)tensor->op_params);
|
| 5654 |
} else if (tensor->op == GGML_OP_ROPE) {
|
|
|
|
| 5754 |
|
| 5755 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
| 5756 |
|
| 5757 |
+
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
|
| 5758 |
+
if (extra->offset + tensor_size >= buffer_gpu->size) {
|
| 5759 |
+
tensor_size = buffer_gpu->size - (extra->offset);
|
| 5760 |
}
|
| 5761 |
|
| 5762 |
+
ggml_vk_buffer_read(ctx, buffer_gpu, extra->offset, tensor_data, tensor_size);
|
| 5763 |
}
|
| 5764 |
|
| 5765 |
float first_error_result = -1.0f;
|