Spaces:
Sleeping
Sleeping
Commit
·
b9b60de
1
Parent(s):
c3a7159
vulkan : reuse parent extra for views (llama/7806)
Browse files* vulkan : reuse parent extra for views
* Fix validation error when multiple compute contexts are used in a graph
---------
Co-authored-by: 0cc4m <[email protected]>
- ggml-vulkan.cpp +56 -72
ggml-vulkan.cpp
CHANGED
|
@@ -345,15 +345,12 @@ struct vk_context {
|
|
| 345 |
};
|
| 346 |
|
| 347 |
struct ggml_tensor_extra_gpu {
|
| 348 |
-
bool ready;
|
| 349 |
-
|
| 350 |
size_t ctx_idx;
|
| 351 |
|
| 352 |
vk_buffer_ref buffer_gpu;
|
| 353 |
uint64_t offset;
|
| 354 |
|
| 355 |
void reset() {
|
| 356 |
-
ready = false;
|
| 357 |
ctx_idx = 0;
|
| 358 |
buffer_gpu.reset();
|
| 359 |
offset = 0;
|
|
@@ -2949,7 +2946,7 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context * su
|
|
| 2949 |
const uint64_t d_sz = sizeof(float) * d_ne;
|
| 2950 |
|
| 2951 |
vk_buffer d_D = extra->buffer_gpu.lock();
|
| 2952 |
-
const uint64_t d_buf_offset = extra->offset;
|
| 2953 |
GGML_ASSERT(d_D != nullptr);
|
| 2954 |
GGML_ASSERT(d_D->size >= d_buf_offset + d_sz * ne02 * ne03);
|
| 2955 |
vk_buffer d_X;
|
|
@@ -2958,12 +2955,12 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context * su
|
|
| 2958 |
uint64_t y_buf_offset = 0;
|
| 2959 |
if (!src0_uma) {
|
| 2960 |
d_Qx = extra_src0->buffer_gpu.lock();
|
| 2961 |
-
qx_buf_offset = extra_src0->offset;
|
| 2962 |
GGML_ASSERT(d_Qx != nullptr);
|
| 2963 |
}
|
| 2964 |
if (!src1_uma) {
|
| 2965 |
d_Qy = extra_src1->buffer_gpu.lock();
|
| 2966 |
-
qy_buf_offset = extra_src1->offset;
|
| 2967 |
GGML_ASSERT(d_Qy != nullptr);
|
| 2968 |
}
|
| 2969 |
if (qx_needs_dequant) {
|
|
@@ -3114,7 +3111,7 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context
|
|
| 3114 |
const uint64_t d_sz = sizeof(float) * d_ne;
|
| 3115 |
|
| 3116 |
vk_buffer d_D = extra->buffer_gpu.lock();
|
| 3117 |
-
const uint64_t d_buf_offset = extra->offset;
|
| 3118 |
GGML_ASSERT(d_D != nullptr);
|
| 3119 |
vk_buffer d_X;
|
| 3120 |
uint64_t x_buf_offset = 0;
|
|
@@ -3122,12 +3119,12 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context
|
|
| 3122 |
uint64_t y_buf_offset = 0;
|
| 3123 |
if(!src0_uma) {
|
| 3124 |
d_Qx = extra_src0->buffer_gpu.lock();
|
| 3125 |
-
qx_buf_offset = extra_src0->offset;
|
| 3126 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3127 |
}
|
| 3128 |
if(!src1_uma) {
|
| 3129 |
d_Qy = extra_src1->buffer_gpu.lock();
|
| 3130 |
-
qy_buf_offset = extra_src1->offset;
|
| 3131 |
GGML_ASSERT(d_Qy != nullptr);
|
| 3132 |
}
|
| 3133 |
if (qx_needs_dequant) {
|
|
@@ -3246,14 +3243,14 @@ static void ggml_vk_mul_mat_vec_p021_f16_f32(ggml_backend_vk_context * ctx, vk_c
|
|
| 3246 |
const uint64_t d_sz = sizeof(float) * d_ne;
|
| 3247 |
|
| 3248 |
vk_buffer d_D = extra->buffer_gpu.lock();
|
| 3249 |
-
const uint64_t d_buf_offset = extra->offset;
|
| 3250 |
GGML_ASSERT(d_D != nullptr);
|
| 3251 |
vk_buffer d_Qx = extra_src0->buffer_gpu.lock();
|
| 3252 |
-
const uint64_t qx_buf_offset = extra_src0->offset;
|
| 3253 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3254 |
if (!src1_uma) {
|
| 3255 |
d_Qy = extra_src1->buffer_gpu.lock();
|
| 3256 |
-
qy_buf_offset = extra_src1->offset;
|
| 3257 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3258 |
}
|
| 3259 |
|
|
@@ -3323,14 +3320,14 @@ static void ggml_vk_mul_mat_vec_nc_f16_f32(ggml_backend_vk_context * ctx, vk_con
|
|
| 3323 |
const uint64_t d_sz = sizeof(float) * d_ne;
|
| 3324 |
|
| 3325 |
vk_buffer d_D = extra->buffer_gpu.lock();
|
| 3326 |
-
const uint64_t d_buf_offset = extra->offset;
|
| 3327 |
GGML_ASSERT(d_D != nullptr);
|
| 3328 |
vk_buffer d_Qx = extra_src0->buffer_gpu.lock();
|
| 3329 |
-
const uint64_t qx_buf_offset = extra_src0->offset;
|
| 3330 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3331 |
if (!src1_uma) {
|
| 3332 |
d_Qy = extra_src1->buffer_gpu.lock();
|
| 3333 |
-
qy_buf_offset = extra_src1->offset;
|
| 3334 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3335 |
}
|
| 3336 |
|
|
@@ -3459,7 +3456,7 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context *
|
|
| 3459 |
const uint64_t d_sz = sizeof(float) * d_ne;
|
| 3460 |
|
| 3461 |
vk_buffer d_D = extra->buffer_gpu.lock();
|
| 3462 |
-
const uint64_t d_buf_offset = extra->offset;
|
| 3463 |
GGML_ASSERT(d_D != nullptr);
|
| 3464 |
vk_buffer d_X;
|
| 3465 |
uint64_t x_buf_offset = 0;
|
|
@@ -3467,17 +3464,17 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context *
|
|
| 3467 |
uint64_t y_buf_offset = 0;
|
| 3468 |
if (!src0_uma) {
|
| 3469 |
d_Qx = extra_src0->buffer_gpu.lock();
|
| 3470 |
-
qx_buf_offset = extra_src0->offset;
|
| 3471 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3472 |
}
|
| 3473 |
if (!src1_uma) {
|
| 3474 |
d_Qy = extra_src1->buffer_gpu.lock();
|
| 3475 |
-
qy_buf_offset = extra_src1->offset;
|
| 3476 |
GGML_ASSERT(d_Qy != nullptr);
|
| 3477 |
}
|
| 3478 |
if (!ids_uma) {
|
| 3479 |
d_ids = extra_ids->buffer_gpu.lock();
|
| 3480 |
-
ids_buf_offset = extra_ids->offset;
|
| 3481 |
GGML_ASSERT(d_ids != nullptr);
|
| 3482 |
}
|
| 3483 |
if (qx_needs_dequant) {
|
|
@@ -3636,7 +3633,7 @@ static void ggml_vk_mul_mat_vec_id_q_f16(ggml_backend_vk_context * ctx, vk_conte
|
|
| 3636 |
const uint64_t d_sz = sizeof(float) * d_ne;
|
| 3637 |
|
| 3638 |
vk_buffer d_D = extra->buffer_gpu.lock();
|
| 3639 |
-
const uint64_t d_buf_offset = extra->offset;
|
| 3640 |
GGML_ASSERT(d_D != nullptr);
|
| 3641 |
vk_buffer d_X;
|
| 3642 |
uint64_t x_buf_offset = 0;
|
|
@@ -3644,17 +3641,17 @@ static void ggml_vk_mul_mat_vec_id_q_f16(ggml_backend_vk_context * ctx, vk_conte
|
|
| 3644 |
uint64_t y_buf_offset = 0;
|
| 3645 |
if(!src0_uma) {
|
| 3646 |
d_Qx = extra_src0->buffer_gpu.lock();
|
| 3647 |
-
qx_buf_offset = extra_src0->offset;
|
| 3648 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3649 |
}
|
| 3650 |
if(!src1_uma) {
|
| 3651 |
d_Qy = extra_src1->buffer_gpu.lock();
|
| 3652 |
-
qy_buf_offset = extra_src1->offset;
|
| 3653 |
GGML_ASSERT(d_Qy != nullptr);
|
| 3654 |
}
|
| 3655 |
if(!ids_uma) {
|
| 3656 |
d_ids = extra_ids->buffer_gpu.lock();
|
| 3657 |
-
ids_buf_offset = extra_ids->offset;
|
| 3658 |
GGML_ASSERT(d_ids != nullptr);
|
| 3659 |
}
|
| 3660 |
if (qx_needs_dequant) {
|
|
@@ -3769,9 +3766,9 @@ static void ggml_vk_op_repeat(ggml_backend_vk_context * ctx, vk_context * subctx
|
|
| 3769 |
ggml_tensor_extra_gpu * extra_src0 = (ggml_tensor_extra_gpu *) src0->extra;
|
| 3770 |
|
| 3771 |
const vk_buffer src_buf = extra_src0->buffer_gpu.lock();
|
| 3772 |
-
const uint64_t src_offset = extra_src0->offset;
|
| 3773 |
vk_buffer dst_buf = extra->buffer_gpu.lock();
|
| 3774 |
-
const uint64_t dst_offset = extra->offset;
|
| 3775 |
|
| 3776 |
std::vector<vk::BufferCopy> copies;
|
| 3777 |
|
|
@@ -4062,21 +4059,21 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context * subctx, c
|
|
| 4062 |
}
|
| 4063 |
|
| 4064 |
GGML_ASSERT(d_D != nullptr);
|
| 4065 |
-
uint64_t d_buf_offset = (extra->offset / ctx->device->properties.limits.minStorageBufferOffsetAlignment) * ctx->device->properties.limits.minStorageBufferOffsetAlignment;
|
| 4066 |
GGML_ASSERT(d_buf_offset == extra->offset || op == GGML_OP_CPY); // NOLINT
|
| 4067 |
if(!src0_uma) {
|
| 4068 |
d_X = extra_src0->buffer_gpu.lock();
|
| 4069 |
-
x_buf_offset = extra_src0->offset;
|
| 4070 |
GGML_ASSERT(d_X != nullptr);
|
| 4071 |
}
|
| 4072 |
if (use_src1 && !src1_uma) {
|
| 4073 |
d_Y = extra_src1->buffer_gpu.lock();
|
| 4074 |
-
y_buf_offset = extra_src1->offset;
|
| 4075 |
GGML_ASSERT(d_Y != nullptr);
|
| 4076 |
}
|
| 4077 |
if (use_src2 && !src2_uma) {
|
| 4078 |
d_Z = extra_src2->buffer_gpu.lock();
|
| 4079 |
-
z_buf_offset = extra_src2->offset;
|
| 4080 |
GGML_ASSERT(d_Z != nullptr);
|
| 4081 |
}
|
| 4082 |
|
|
@@ -4336,7 +4333,7 @@ static void ggml_vk_cpy(ggml_backend_vk_context * ctx, vk_context * subctx, cons
|
|
| 4336 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) dst->extra;
|
| 4337 |
const uint32_t src0_type_size = ggml_type_size(src0->type);
|
| 4338 |
const uint32_t dst_type_size = ggml_type_size(dst->type);
|
| 4339 |
-
const uint32_t d_offset = (extra->offset % ctx->device->properties.limits.minStorageBufferOffsetAlignment) / dst_type_size;
|
| 4340 |
|
| 4341 |
ggml_vk_op_f32<vk_op_unary_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_CPY, {
|
| 4342 |
(uint32_t)ggml_nelements(src0),
|
|
@@ -5569,6 +5566,13 @@ static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod
|
|
| 5569 |
const ggml_tensor * src2 = node->src[2];
|
| 5570 |
|
| 5571 |
switch (node->op) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5572 |
case GGML_OP_UNARY:
|
| 5573 |
switch (ggml_get_unary_op(node)) {
|
| 5574 |
case GGML_UNARY_OP_SILU:
|
|
@@ -5590,10 +5594,6 @@ static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod
|
|
| 5590 |
case GGML_OP_CPY:
|
| 5591 |
case GGML_OP_CONT:
|
| 5592 |
case GGML_OP_DUP:
|
| 5593 |
-
case GGML_OP_RESHAPE:
|
| 5594 |
-
case GGML_OP_VIEW:
|
| 5595 |
-
case GGML_OP_PERMUTE:
|
| 5596 |
-
case GGML_OP_TRANSPOSE:
|
| 5597 |
case GGML_OP_NORM:
|
| 5598 |
case GGML_OP_RMS_NORM:
|
| 5599 |
case GGML_OP_DIAG_MASK_INF:
|
|
@@ -5601,7 +5601,6 @@ static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod
|
|
| 5601 |
case GGML_OP_ROPE:
|
| 5602 |
case GGML_OP_MUL_MAT:
|
| 5603 |
case GGML_OP_MUL_MAT_ID:
|
| 5604 |
-
case GGML_OP_NONE:
|
| 5605 |
case GGML_OP_ARGSORT:
|
| 5606 |
case GGML_OP_SUM_ROWS:
|
| 5607 |
break;
|
|
@@ -5654,12 +5653,6 @@ static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod
|
|
| 5654 |
case GGML_OP_DUP:
|
| 5655 |
ggml_vk_cpy(ctx, ctx->compute_ctx, src0, node);
|
| 5656 |
|
| 5657 |
-
break;
|
| 5658 |
-
case GGML_OP_RESHAPE:
|
| 5659 |
-
case GGML_OP_VIEW:
|
| 5660 |
-
case GGML_OP_PERMUTE:
|
| 5661 |
-
case GGML_OP_TRANSPOSE:
|
| 5662 |
-
case GGML_OP_NONE:
|
| 5663 |
break;
|
| 5664 |
case GGML_OP_NORM:
|
| 5665 |
ggml_vk_norm(ctx, ctx->compute_ctx, src0, node);
|
|
@@ -5712,7 +5705,6 @@ static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod
|
|
| 5712 |
return;
|
| 5713 |
}
|
| 5714 |
|
| 5715 |
-
extra->ready = true;
|
| 5716 |
extra->ctx_idx = ctx->compute_ctx->idx;
|
| 5717 |
|
| 5718 |
#ifdef GGML_VULKAN_CHECK_RESULTS
|
|
@@ -5796,8 +5788,6 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_compute_
|
|
| 5796 |
ggml_vk_check_results_0(ctx, params, tensor);
|
| 5797 |
#endif
|
| 5798 |
|
| 5799 |
-
GGML_ASSERT(extra->ready);
|
| 5800 |
-
|
| 5801 |
vk_context& subctx = ctx->gc.contexts[extra->ctx_idx];
|
| 5802 |
|
| 5803 |
// Only run if ctx hasn't been submitted yet
|
|
@@ -5822,8 +5812,6 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_compute_
|
|
| 5822 |
subctx.out_memcpys.clear();
|
| 5823 |
}
|
| 5824 |
|
| 5825 |
-
extra->ready = false;
|
| 5826 |
-
|
| 5827 |
return true;
|
| 5828 |
}
|
| 5829 |
|
|
@@ -5943,7 +5931,9 @@ struct ggml_backend_vk_buffer_context {
|
|
| 5943 |
|
| 5944 |
~ggml_backend_vk_buffer_context() {
|
| 5945 |
ggml_vk_destroy_buffer(dev_buffer);
|
| 5946 |
-
|
|
|
|
|
|
|
| 5947 |
}
|
| 5948 |
|
| 5949 |
ggml_tensor_extra_gpu * ggml_vk_alloc_temp_tensor_extra() {
|
|
@@ -5990,18 +5980,16 @@ GGML_CALL static void ggml_backend_vk_buffer_init_tensor(ggml_backend_buffer_t b
|
|
| 5990 |
#endif
|
| 5991 |
ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context;
|
| 5992 |
|
| 5993 |
-
|
| 5994 |
-
if (tensor->view_src != nullptr && tensor->view_src->extra != nullptr) {
|
| 5995 |
GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft);
|
| 5996 |
-
|
| 5997 |
-
extra
|
| 5998 |
-
extra->offset = extra_view->offset + tensor->view_offs;
|
| 5999 |
} else {
|
|
|
|
| 6000 |
extra->buffer_gpu = ctx->dev_buffer;
|
| 6001 |
extra->offset = (uint8_t *) tensor->data - (uint8_t *) vk_ptr_base;
|
|
|
|
| 6002 |
}
|
| 6003 |
-
|
| 6004 |
-
tensor->extra = extra;
|
| 6005 |
}
|
| 6006 |
|
| 6007 |
GGML_CALL static void ggml_backend_vk_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
|
@@ -6014,7 +6002,7 @@ GGML_CALL static void ggml_backend_vk_buffer_set_tensor(ggml_backend_buffer_t bu
|
|
| 6014 |
|
| 6015 |
vk_buffer buf = extra->buffer_gpu.lock();
|
| 6016 |
|
| 6017 |
-
ggml_vk_buffer_write(ctx->ctx, buf, extra->offset + offset, data, size);
|
| 6018 |
}
|
| 6019 |
|
| 6020 |
GGML_CALL static void ggml_backend_vk_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
|
@@ -6027,7 +6015,7 @@ GGML_CALL static void ggml_backend_vk_buffer_get_tensor(ggml_backend_buffer_t bu
|
|
| 6027 |
|
| 6028 |
vk_buffer buf = extra->buffer_gpu.lock();
|
| 6029 |
|
| 6030 |
-
ggml_vk_buffer_read(ctx->ctx, buf, extra->offset + offset, data, size);
|
| 6031 |
}
|
| 6032 |
|
| 6033 |
GGML_CALL static bool ggml_backend_vk_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * src, ggml_tensor * dst) {
|
|
@@ -6038,7 +6026,7 @@ GGML_CALL static bool ggml_backend_vk_buffer_cpy_tensor(ggml_backend_buffer_t bu
|
|
| 6038 |
vk_buffer src_buf = src_extra->buffer_gpu.lock();
|
| 6039 |
vk_buffer dst_buf = dst_extra->buffer_gpu.lock();
|
| 6040 |
|
| 6041 |
-
ggml_vk_buffer_copy(dst_buf, dst_extra->offset, src_buf, src_extra->offset, ggml_nbytes(src));
|
| 6042 |
|
| 6043 |
return true;
|
| 6044 |
}
|
|
@@ -6264,7 +6252,7 @@ GGML_CALL static void ggml_backend_vk_set_tensor_async(ggml_backend_t backend, g
|
|
| 6264 |
|
| 6265 |
vk_buffer buf = extra->buffer_gpu.lock();
|
| 6266 |
|
| 6267 |
-
ggml_vk_buffer_write_async(ctx, ctx->transfer_ctx, buf, extra->offset + offset, data, size);
|
| 6268 |
}
|
| 6269 |
|
| 6270 |
GGML_CALL static void ggml_backend_vk_get_tensor_async(ggml_backend_t backend, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
|
@@ -6284,7 +6272,7 @@ GGML_CALL static void ggml_backend_vk_get_tensor_async(ggml_backend_t backend, c
|
|
| 6284 |
|
| 6285 |
vk_buffer buf = extra->buffer_gpu.lock();
|
| 6286 |
|
| 6287 |
-
ggml_vk_buffer_read_async(ctx, ctx->transfer_ctx, buf, extra->offset + offset, data, size);
|
| 6288 |
}
|
| 6289 |
|
| 6290 |
GGML_CALL static bool ggml_backend_vk_cpy_tensor_async(ggml_backend_t backend, const ggml_tensor * src, ggml_tensor * dst) {
|
|
@@ -6305,7 +6293,7 @@ GGML_CALL static bool ggml_backend_vk_cpy_tensor_async(ggml_backend_t backend, c
|
|
| 6305 |
vk_buffer src_buf = src_extra->buffer_gpu.lock();
|
| 6306 |
vk_buffer dst_buf = dst_extra->buffer_gpu.lock();
|
| 6307 |
|
| 6308 |
-
ggml_vk_buffer_copy_async(ctx->transfer_ctx, dst_buf, dst_extra->offset, src_buf, src_extra->offset, ggml_nbytes(src));
|
| 6309 |
return true;
|
| 6310 |
}
|
| 6311 |
|
|
@@ -6478,11 +6466,7 @@ GGML_CALL static bool ggml_backend_vk_supports_op(ggml_backend_t backend, const
|
|
| 6478 |
// return src0_type != GGML_TYPE_I32 && src0_type != GGML_TYPE_I16;
|
| 6479 |
// } break;
|
| 6480 |
case GGML_OP_ROPE:
|
| 6481 |
-
|
| 6482 |
-
const int mode = ((const int32_t *) op->op_params)[2];
|
| 6483 |
-
|
| 6484 |
-
return true;
|
| 6485 |
-
} break;
|
| 6486 |
case GGML_OP_NONE:
|
| 6487 |
case GGML_OP_RESHAPE:
|
| 6488 |
case GGML_OP_VIEW:
|
|
@@ -6725,7 +6709,7 @@ static void ggml_vk_print_tensor(ggml_backend_vk_context * ctx, const ggml_tenso
|
|
| 6725 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
| 6726 |
|
| 6727 |
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
|
| 6728 |
-
ggml_vk_buffer_read(ctx, buffer_gpu, extra->offset, tensor_data, tensor_size);
|
| 6729 |
}
|
| 6730 |
|
| 6731 |
std::cerr << "TENSOR CHECK " << name << " (" << tensor->name << "): " << ggml_op_name(tensor->op) << std::endl;
|
|
@@ -6809,7 +6793,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
|
|
| 6809 |
} else if (ggml_backend_buffer_is_vk(src0->buffer)) {
|
| 6810 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src0->extra;
|
| 6811 |
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
|
| 6812 |
-
uint64_t offset = extra->offset;
|
| 6813 |
if (!ggml_is_contiguous(src0) && ggml_vk_dim01_contiguous(src0)) {
|
| 6814 |
for (int i3 = 0; i3 < src0->ne[3]; i3++) {
|
| 6815 |
for (int i2 = 0; i2 < src0->ne[2]; i2++) {
|
|
@@ -6851,7 +6835,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
|
|
| 6851 |
} else if (ggml_backend_buffer_is_vk(src1->buffer)) {
|
| 6852 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src1->extra;
|
| 6853 |
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
|
| 6854 |
-
uint64_t offset = extra->offset;
|
| 6855 |
if (!ggml_is_contiguous(src1) && ggml_vk_dim01_contiguous(src1)) {
|
| 6856 |
for (int i3 = 0; i3 < src1->ne[3]; i3++) {
|
| 6857 |
for (int i2 = 0; i2 < src1->ne[2]; i2++) {
|
|
@@ -6909,7 +6893,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
|
|
| 6909 |
} else if (ggml_backend_buffer_is_vk(src2->buffer)) {
|
| 6910 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src2->extra;
|
| 6911 |
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
|
| 6912 |
-
uint64_t offset = extra->offset;
|
| 6913 |
if (!ggml_is_contiguous(src2) && ggml_vk_dim01_contiguous(src2)) {
|
| 6914 |
for (int i3 = 0; i3 < src2->ne[3]; i3++) {
|
| 6915 |
for (int i2 = 0; i2 < src2->ne[2]; i2++) {
|
|
@@ -7092,11 +7076,11 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_compute_
|
|
| 7092 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
| 7093 |
|
| 7094 |
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
|
| 7095 |
-
if (extra->offset + tensor_size >= buffer_gpu->size) {
|
| 7096 |
-
tensor_size = buffer_gpu->size - (extra->offset);
|
| 7097 |
}
|
| 7098 |
|
| 7099 |
-
ggml_vk_buffer_read(ctx, buffer_gpu, extra->offset, tensor_data, tensor_size);
|
| 7100 |
}
|
| 7101 |
|
| 7102 |
float first_error_result = -1.0f;
|
|
|
|
| 345 |
};
|
| 346 |
|
| 347 |
struct ggml_tensor_extra_gpu {
|
|
|
|
|
|
|
| 348 |
size_t ctx_idx;
|
| 349 |
|
| 350 |
vk_buffer_ref buffer_gpu;
|
| 351 |
uint64_t offset;
|
| 352 |
|
| 353 |
void reset() {
|
|
|
|
| 354 |
ctx_idx = 0;
|
| 355 |
buffer_gpu.reset();
|
| 356 |
offset = 0;
|
|
|
|
| 2946 |
const uint64_t d_sz = sizeof(float) * d_ne;
|
| 2947 |
|
| 2948 |
vk_buffer d_D = extra->buffer_gpu.lock();
|
| 2949 |
+
const uint64_t d_buf_offset = extra->offset + dst->view_offs;
|
| 2950 |
GGML_ASSERT(d_D != nullptr);
|
| 2951 |
GGML_ASSERT(d_D->size >= d_buf_offset + d_sz * ne02 * ne03);
|
| 2952 |
vk_buffer d_X;
|
|
|
|
| 2955 |
uint64_t y_buf_offset = 0;
|
| 2956 |
if (!src0_uma) {
|
| 2957 |
d_Qx = extra_src0->buffer_gpu.lock();
|
| 2958 |
+
qx_buf_offset = extra_src0->offset + src0->view_offs;
|
| 2959 |
GGML_ASSERT(d_Qx != nullptr);
|
| 2960 |
}
|
| 2961 |
if (!src1_uma) {
|
| 2962 |
d_Qy = extra_src1->buffer_gpu.lock();
|
| 2963 |
+
qy_buf_offset = extra_src1->offset + src1->view_offs;
|
| 2964 |
GGML_ASSERT(d_Qy != nullptr);
|
| 2965 |
}
|
| 2966 |
if (qx_needs_dequant) {
|
|
|
|
| 3111 |
const uint64_t d_sz = sizeof(float) * d_ne;
|
| 3112 |
|
| 3113 |
vk_buffer d_D = extra->buffer_gpu.lock();
|
| 3114 |
+
const uint64_t d_buf_offset = extra->offset + dst->view_offs;
|
| 3115 |
GGML_ASSERT(d_D != nullptr);
|
| 3116 |
vk_buffer d_X;
|
| 3117 |
uint64_t x_buf_offset = 0;
|
|
|
|
| 3119 |
uint64_t y_buf_offset = 0;
|
| 3120 |
if(!src0_uma) {
|
| 3121 |
d_Qx = extra_src0->buffer_gpu.lock();
|
| 3122 |
+
qx_buf_offset = extra_src0->offset + src0->view_offs;
|
| 3123 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3124 |
}
|
| 3125 |
if(!src1_uma) {
|
| 3126 |
d_Qy = extra_src1->buffer_gpu.lock();
|
| 3127 |
+
qy_buf_offset = extra_src1->offset + src1->view_offs;
|
| 3128 |
GGML_ASSERT(d_Qy != nullptr);
|
| 3129 |
}
|
| 3130 |
if (qx_needs_dequant) {
|
|
|
|
| 3243 |
const uint64_t d_sz = sizeof(float) * d_ne;
|
| 3244 |
|
| 3245 |
vk_buffer d_D = extra->buffer_gpu.lock();
|
| 3246 |
+
const uint64_t d_buf_offset = extra->offset + dst->view_offs;
|
| 3247 |
GGML_ASSERT(d_D != nullptr);
|
| 3248 |
vk_buffer d_Qx = extra_src0->buffer_gpu.lock();
|
| 3249 |
+
const uint64_t qx_buf_offset = extra_src0->offset + src0->view_offs;
|
| 3250 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3251 |
if (!src1_uma) {
|
| 3252 |
d_Qy = extra_src1->buffer_gpu.lock();
|
| 3253 |
+
qy_buf_offset = extra_src1->offset + src1->view_offs;
|
| 3254 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3255 |
}
|
| 3256 |
|
|
|
|
| 3320 |
const uint64_t d_sz = sizeof(float) * d_ne;
|
| 3321 |
|
| 3322 |
vk_buffer d_D = extra->buffer_gpu.lock();
|
| 3323 |
+
const uint64_t d_buf_offset = extra->offset + dst->view_offs;
|
| 3324 |
GGML_ASSERT(d_D != nullptr);
|
| 3325 |
vk_buffer d_Qx = extra_src0->buffer_gpu.lock();
|
| 3326 |
+
const uint64_t qx_buf_offset = extra_src0->offset + src0->view_offs;
|
| 3327 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3328 |
if (!src1_uma) {
|
| 3329 |
d_Qy = extra_src1->buffer_gpu.lock();
|
| 3330 |
+
qy_buf_offset = extra_src1->offset + src1->view_offs;
|
| 3331 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3332 |
}
|
| 3333 |
|
|
|
|
| 3456 |
const uint64_t d_sz = sizeof(float) * d_ne;
|
| 3457 |
|
| 3458 |
vk_buffer d_D = extra->buffer_gpu.lock();
|
| 3459 |
+
const uint64_t d_buf_offset = extra->offset + dst->view_offs;
|
| 3460 |
GGML_ASSERT(d_D != nullptr);
|
| 3461 |
vk_buffer d_X;
|
| 3462 |
uint64_t x_buf_offset = 0;
|
|
|
|
| 3464 |
uint64_t y_buf_offset = 0;
|
| 3465 |
if (!src0_uma) {
|
| 3466 |
d_Qx = extra_src0->buffer_gpu.lock();
|
| 3467 |
+
qx_buf_offset = extra_src0->offset + src0->view_offs;
|
| 3468 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3469 |
}
|
| 3470 |
if (!src1_uma) {
|
| 3471 |
d_Qy = extra_src1->buffer_gpu.lock();
|
| 3472 |
+
qy_buf_offset = extra_src1->offset + src1->view_offs;
|
| 3473 |
GGML_ASSERT(d_Qy != nullptr);
|
| 3474 |
}
|
| 3475 |
if (!ids_uma) {
|
| 3476 |
d_ids = extra_ids->buffer_gpu.lock();
|
| 3477 |
+
ids_buf_offset = extra_ids->offset + ids->view_offs;
|
| 3478 |
GGML_ASSERT(d_ids != nullptr);
|
| 3479 |
}
|
| 3480 |
if (qx_needs_dequant) {
|
|
|
|
| 3633 |
const uint64_t d_sz = sizeof(float) * d_ne;
|
| 3634 |
|
| 3635 |
vk_buffer d_D = extra->buffer_gpu.lock();
|
| 3636 |
+
const uint64_t d_buf_offset = extra->offset + dst->view_offs;
|
| 3637 |
GGML_ASSERT(d_D != nullptr);
|
| 3638 |
vk_buffer d_X;
|
| 3639 |
uint64_t x_buf_offset = 0;
|
|
|
|
| 3641 |
uint64_t y_buf_offset = 0;
|
| 3642 |
if(!src0_uma) {
|
| 3643 |
d_Qx = extra_src0->buffer_gpu.lock();
|
| 3644 |
+
qx_buf_offset = extra_src0->offset + src0->view_offs;
|
| 3645 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3646 |
}
|
| 3647 |
if(!src1_uma) {
|
| 3648 |
d_Qy = extra_src1->buffer_gpu.lock();
|
| 3649 |
+
qy_buf_offset = extra_src1->offset + src1->view_offs;
|
| 3650 |
GGML_ASSERT(d_Qy != nullptr);
|
| 3651 |
}
|
| 3652 |
if(!ids_uma) {
|
| 3653 |
d_ids = extra_ids->buffer_gpu.lock();
|
| 3654 |
+
ids_buf_offset = extra_ids->offset + ids->view_offs;
|
| 3655 |
GGML_ASSERT(d_ids != nullptr);
|
| 3656 |
}
|
| 3657 |
if (qx_needs_dequant) {
|
|
|
|
| 3766 |
ggml_tensor_extra_gpu * extra_src0 = (ggml_tensor_extra_gpu *) src0->extra;
|
| 3767 |
|
| 3768 |
const vk_buffer src_buf = extra_src0->buffer_gpu.lock();
|
| 3769 |
+
const uint64_t src_offset = extra_src0->offset + src0->view_offs;
|
| 3770 |
vk_buffer dst_buf = extra->buffer_gpu.lock();
|
| 3771 |
+
const uint64_t dst_offset = extra->offset + dst->view_offs;
|
| 3772 |
|
| 3773 |
std::vector<vk::BufferCopy> copies;
|
| 3774 |
|
|
|
|
| 4059 |
}
|
| 4060 |
|
| 4061 |
GGML_ASSERT(d_D != nullptr);
|
| 4062 |
+
uint64_t d_buf_offset = ((extra->offset + dst->view_offs) / ctx->device->properties.limits.minStorageBufferOffsetAlignment) * ctx->device->properties.limits.minStorageBufferOffsetAlignment;
|
| 4063 |
GGML_ASSERT(d_buf_offset == extra->offset || op == GGML_OP_CPY); // NOLINT
|
| 4064 |
if(!src0_uma) {
|
| 4065 |
d_X = extra_src0->buffer_gpu.lock();
|
| 4066 |
+
x_buf_offset = extra_src0->offset + src0->view_offs;
|
| 4067 |
GGML_ASSERT(d_X != nullptr);
|
| 4068 |
}
|
| 4069 |
if (use_src1 && !src1_uma) {
|
| 4070 |
d_Y = extra_src1->buffer_gpu.lock();
|
| 4071 |
+
y_buf_offset = extra_src1->offset + src1->view_offs;
|
| 4072 |
GGML_ASSERT(d_Y != nullptr);
|
| 4073 |
}
|
| 4074 |
if (use_src2 && !src2_uma) {
|
| 4075 |
d_Z = extra_src2->buffer_gpu.lock();
|
| 4076 |
+
z_buf_offset = extra_src2->offset + src2->view_offs;
|
| 4077 |
GGML_ASSERT(d_Z != nullptr);
|
| 4078 |
}
|
| 4079 |
|
|
|
|
| 4333 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) dst->extra;
|
| 4334 |
const uint32_t src0_type_size = ggml_type_size(src0->type);
|
| 4335 |
const uint32_t dst_type_size = ggml_type_size(dst->type);
|
| 4336 |
+
const uint32_t d_offset = ((extra->offset + dst->view_offs) % ctx->device->properties.limits.minStorageBufferOffsetAlignment) / dst_type_size;
|
| 4337 |
|
| 4338 |
ggml_vk_op_f32<vk_op_unary_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_CPY, {
|
| 4339 |
(uint32_t)ggml_nelements(src0),
|
|
|
|
| 5566 |
const ggml_tensor * src2 = node->src[2];
|
| 5567 |
|
| 5568 |
switch (node->op) {
|
| 5569 |
+
// Return on empty ops to avoid generating a compute_ctx and setting exit_tensor
|
| 5570 |
+
case GGML_OP_RESHAPE:
|
| 5571 |
+
case GGML_OP_VIEW:
|
| 5572 |
+
case GGML_OP_PERMUTE:
|
| 5573 |
+
case GGML_OP_TRANSPOSE:
|
| 5574 |
+
case GGML_OP_NONE:
|
| 5575 |
+
return;
|
| 5576 |
case GGML_OP_UNARY:
|
| 5577 |
switch (ggml_get_unary_op(node)) {
|
| 5578 |
case GGML_UNARY_OP_SILU:
|
|
|
|
| 5594 |
case GGML_OP_CPY:
|
| 5595 |
case GGML_OP_CONT:
|
| 5596 |
case GGML_OP_DUP:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5597 |
case GGML_OP_NORM:
|
| 5598 |
case GGML_OP_RMS_NORM:
|
| 5599 |
case GGML_OP_DIAG_MASK_INF:
|
|
|
|
| 5601 |
case GGML_OP_ROPE:
|
| 5602 |
case GGML_OP_MUL_MAT:
|
| 5603 |
case GGML_OP_MUL_MAT_ID:
|
|
|
|
| 5604 |
case GGML_OP_ARGSORT:
|
| 5605 |
case GGML_OP_SUM_ROWS:
|
| 5606 |
break;
|
|
|
|
| 5653 |
case GGML_OP_DUP:
|
| 5654 |
ggml_vk_cpy(ctx, ctx->compute_ctx, src0, node);
|
| 5655 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5656 |
break;
|
| 5657 |
case GGML_OP_NORM:
|
| 5658 |
ggml_vk_norm(ctx, ctx->compute_ctx, src0, node);
|
|
|
|
| 5705 |
return;
|
| 5706 |
}
|
| 5707 |
|
|
|
|
| 5708 |
extra->ctx_idx = ctx->compute_ctx->idx;
|
| 5709 |
|
| 5710 |
#ifdef GGML_VULKAN_CHECK_RESULTS
|
|
|
|
| 5788 |
ggml_vk_check_results_0(ctx, params, tensor);
|
| 5789 |
#endif
|
| 5790 |
|
|
|
|
|
|
|
| 5791 |
vk_context& subctx = ctx->gc.contexts[extra->ctx_idx];
|
| 5792 |
|
| 5793 |
// Only run if ctx hasn't been submitted yet
|
|
|
|
| 5812 |
subctx.out_memcpys.clear();
|
| 5813 |
}
|
| 5814 |
|
|
|
|
|
|
|
| 5815 |
return true;
|
| 5816 |
}
|
| 5817 |
|
|
|
|
| 5931 |
|
| 5932 |
~ggml_backend_vk_buffer_context() {
|
| 5933 |
ggml_vk_destroy_buffer(dev_buffer);
|
| 5934 |
+
if (temp_tensor_extras != nullptr) {
|
| 5935 |
+
delete[] temp_tensor_extras;
|
| 5936 |
+
}
|
| 5937 |
}
|
| 5938 |
|
| 5939 |
ggml_tensor_extra_gpu * ggml_vk_alloc_temp_tensor_extra() {
|
|
|
|
| 5980 |
#endif
|
| 5981 |
ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context;
|
| 5982 |
|
| 5983 |
+
if (tensor->view_src != nullptr) {
|
|
|
|
| 5984 |
GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft);
|
| 5985 |
+
GGML_ASSERT(tensor->view_src->extra != nullptr);
|
| 5986 |
+
tensor->extra = tensor->view_src->extra;
|
|
|
|
| 5987 |
} else {
|
| 5988 |
+
ggml_tensor_extra_gpu * extra = ctx->ggml_vk_alloc_temp_tensor_extra();
|
| 5989 |
extra->buffer_gpu = ctx->dev_buffer;
|
| 5990 |
extra->offset = (uint8_t *) tensor->data - (uint8_t *) vk_ptr_base;
|
| 5991 |
+
tensor->extra = extra;
|
| 5992 |
}
|
|
|
|
|
|
|
| 5993 |
}
|
| 5994 |
|
| 5995 |
GGML_CALL static void ggml_backend_vk_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
|
|
|
| 6002 |
|
| 6003 |
vk_buffer buf = extra->buffer_gpu.lock();
|
| 6004 |
|
| 6005 |
+
ggml_vk_buffer_write(ctx->ctx, buf, extra->offset + tensor->view_offs + offset, data, size);
|
| 6006 |
}
|
| 6007 |
|
| 6008 |
GGML_CALL static void ggml_backend_vk_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
|
|
|
| 6015 |
|
| 6016 |
vk_buffer buf = extra->buffer_gpu.lock();
|
| 6017 |
|
| 6018 |
+
ggml_vk_buffer_read(ctx->ctx, buf, extra->offset + tensor->view_offs + offset, data, size);
|
| 6019 |
}
|
| 6020 |
|
| 6021 |
GGML_CALL static bool ggml_backend_vk_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * src, ggml_tensor * dst) {
|
|
|
|
| 6026 |
vk_buffer src_buf = src_extra->buffer_gpu.lock();
|
| 6027 |
vk_buffer dst_buf = dst_extra->buffer_gpu.lock();
|
| 6028 |
|
| 6029 |
+
ggml_vk_buffer_copy(dst_buf, dst_extra->offset + dst->view_offs, src_buf, src_extra->offset + src->view_offs, ggml_nbytes(src));
|
| 6030 |
|
| 6031 |
return true;
|
| 6032 |
}
|
|
|
|
| 6252 |
|
| 6253 |
vk_buffer buf = extra->buffer_gpu.lock();
|
| 6254 |
|
| 6255 |
+
ggml_vk_buffer_write_async(ctx, ctx->transfer_ctx, buf, extra->offset + tensor->view_offs + offset, data, size);
|
| 6256 |
}
|
| 6257 |
|
| 6258 |
GGML_CALL static void ggml_backend_vk_get_tensor_async(ggml_backend_t backend, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
|
|
|
| 6272 |
|
| 6273 |
vk_buffer buf = extra->buffer_gpu.lock();
|
| 6274 |
|
| 6275 |
+
ggml_vk_buffer_read_async(ctx, ctx->transfer_ctx, buf, extra->offset + tensor->view_offs + offset, data, size);
|
| 6276 |
}
|
| 6277 |
|
| 6278 |
GGML_CALL static bool ggml_backend_vk_cpy_tensor_async(ggml_backend_t backend, const ggml_tensor * src, ggml_tensor * dst) {
|
|
|
|
| 6293 |
vk_buffer src_buf = src_extra->buffer_gpu.lock();
|
| 6294 |
vk_buffer dst_buf = dst_extra->buffer_gpu.lock();
|
| 6295 |
|
| 6296 |
+
ggml_vk_buffer_copy_async(ctx->transfer_ctx, dst_buf, dst_extra->offset + dst->view_offs, src_buf, src_extra->offset + src->view_offs, ggml_nbytes(src));
|
| 6297 |
return true;
|
| 6298 |
}
|
| 6299 |
|
|
|
|
| 6466 |
// return src0_type != GGML_TYPE_I32 && src0_type != GGML_TYPE_I16;
|
| 6467 |
// } break;
|
| 6468 |
case GGML_OP_ROPE:
|
| 6469 |
+
return true;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6470 |
case GGML_OP_NONE:
|
| 6471 |
case GGML_OP_RESHAPE:
|
| 6472 |
case GGML_OP_VIEW:
|
|
|
|
| 6709 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
| 6710 |
|
| 6711 |
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
|
| 6712 |
+
ggml_vk_buffer_read(ctx, buffer_gpu, extra->offset + tensor->view_offs, tensor_data, tensor_size);
|
| 6713 |
}
|
| 6714 |
|
| 6715 |
std::cerr << "TENSOR CHECK " << name << " (" << tensor->name << "): " << ggml_op_name(tensor->op) << std::endl;
|
|
|
|
| 6793 |
} else if (ggml_backend_buffer_is_vk(src0->buffer)) {
|
| 6794 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src0->extra;
|
| 6795 |
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
|
| 6796 |
+
uint64_t offset = extra->offset + src0->view_offs;
|
| 6797 |
if (!ggml_is_contiguous(src0) && ggml_vk_dim01_contiguous(src0)) {
|
| 6798 |
for (int i3 = 0; i3 < src0->ne[3]; i3++) {
|
| 6799 |
for (int i2 = 0; i2 < src0->ne[2]; i2++) {
|
|
|
|
| 6835 |
} else if (ggml_backend_buffer_is_vk(src1->buffer)) {
|
| 6836 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src1->extra;
|
| 6837 |
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
|
| 6838 |
+
uint64_t offset = extra->offset + src1->view_offs;
|
| 6839 |
if (!ggml_is_contiguous(src1) && ggml_vk_dim01_contiguous(src1)) {
|
| 6840 |
for (int i3 = 0; i3 < src1->ne[3]; i3++) {
|
| 6841 |
for (int i2 = 0; i2 < src1->ne[2]; i2++) {
|
|
|
|
| 6893 |
} else if (ggml_backend_buffer_is_vk(src2->buffer)) {
|
| 6894 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src2->extra;
|
| 6895 |
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
|
| 6896 |
+
uint64_t offset = extra->offset + src2->view_offs;
|
| 6897 |
if (!ggml_is_contiguous(src2) && ggml_vk_dim01_contiguous(src2)) {
|
| 6898 |
for (int i3 = 0; i3 < src2->ne[3]; i3++) {
|
| 6899 |
for (int i2 = 0; i2 < src2->ne[2]; i2++) {
|
|
|
|
| 7076 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
| 7077 |
|
| 7078 |
vk_buffer buffer_gpu = extra->buffer_gpu.lock();
|
| 7079 |
+
if (extra->offset + tensor->view_offs + tensor_size >= buffer_gpu->size) {
|
| 7080 |
+
tensor_size = buffer_gpu->size - (extra->offset + tensor->view_offs);
|
| 7081 |
}
|
| 7082 |
|
| 7083 |
+
ggml_vk_buffer_read(ctx, buffer_gpu, extra->offset + tensor->view_offs, tensor_data, tensor_size);
|
| 7084 |
}
|
| 7085 |
|
| 7086 |
float first_error_result = -1.0f;
|