Spaces:
Sleeping
Sleeping
code : normalize enum names (llama/5697)
Browse files* coda : normalize enum names
ggml-ci
* code : cont
* code : cont
- ggml-cuda.cu +69 -69
- ggml-metal.m +2 -2
- ggml-opencl.cpp +25 -25
- ggml-sycl.cpp +76 -76
- ggml-vulkan.cpp +51 -51
- ggml.c +175 -175
- ggml.h +19 -19
ggml-cuda.cu
CHANGED
|
@@ -6369,11 +6369,11 @@ static __global__ void k_argsort_f32_i32(const float * x, int * dst, const int n
|
|
| 6369 |
int ixj = col ^ j;
|
| 6370 |
if (ixj > col) {
|
| 6371 |
if ((col & k) == 0) {
|
| 6372 |
-
if (order ==
|
| 6373 |
swap(dst_row[col], dst_row[ixj]);
|
| 6374 |
}
|
| 6375 |
} else {
|
| 6376 |
-
if (order ==
|
| 6377 |
swap(dst_row[col], dst_row[ixj]);
|
| 6378 |
}
|
| 6379 |
}
|
|
@@ -7927,10 +7927,10 @@ static void argsort_f32_i32_cuda(const float * x, int * dst, const int ncols, co
|
|
| 7927 |
|
| 7928 |
const dim3 block_dims(ncols, 1, 1);
|
| 7929 |
const dim3 block_nums(1, nrows, 1);
|
| 7930 |
-
if (order ==
|
| 7931 |
-
k_argsort_f32_i32<
|
| 7932 |
-
} else if (order ==
|
| 7933 |
-
k_argsort_f32_i32<
|
| 7934 |
} else {
|
| 7935 |
GGML_ASSERT(false);
|
| 7936 |
}
|
|
@@ -8362,11 +8362,11 @@ static cudaError_t ggml_cuda_cpy_tensor_2d(
|
|
| 8362 |
|
| 8363 |
cudaMemcpyKind kind;
|
| 8364 |
char * src_ptr;
|
| 8365 |
-
if (src->backend ==
|
| 8366 |
kind = cudaMemcpyHostToDevice;
|
| 8367 |
src_ptr = (char *) src->data;
|
| 8368 |
-
} else if (src->backend ==
|
| 8369 |
-
GGML_ASSERT(src->backend !=
|
| 8370 |
kind = cudaMemcpyDeviceToDevice;
|
| 8371 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra;
|
| 8372 |
int id;
|
|
@@ -8771,7 +8771,7 @@ static void ggml_cuda_op_mul_mat_q(
|
|
| 8771 |
|
| 8772 |
// the main device has a larger memory buffer to hold the results from all GPUs
|
| 8773 |
// nrows_dst == nrows of the matrix that the kernel writes into
|
| 8774 |
-
const int64_t nrows_dst = dst->backend ==
|
| 8775 |
|
| 8776 |
switch (src0->type) {
|
| 8777 |
case GGML_TYPE_Q4_0:
|
|
@@ -8920,7 +8920,7 @@ static void ggml_cuda_op_mul_mat_vec_q(
|
|
| 8920 |
|
| 8921 |
// the main device has a larger memory buffer to hold the results from all GPUs
|
| 8922 |
// nrows_dst == nrows of the matrix that the kernel writes into
|
| 8923 |
-
const int64_t nrows_dst = dst->backend ==
|
| 8924 |
|
| 8925 |
switch (src0->type) {
|
| 8926 |
case GGML_TYPE_Q4_0:
|
|
@@ -9096,7 +9096,7 @@ static void ggml_cuda_op_mul_mat_cublas(
|
|
| 9096 |
|
| 9097 |
// the main device has a larger memory buffer to hold the results from all GPUs
|
| 9098 |
// ldc == nrows of the matrix that cuBLAS writes into
|
| 9099 |
-
int ldc = dst->backend ==
|
| 9100 |
|
| 9101 |
const int compute_capability = g_device_caps[id].cc;
|
| 9102 |
|
|
@@ -9444,7 +9444,7 @@ static void ggml_cuda_op_soft_max(
|
|
| 9444 |
const bool use_src2 = src2 != nullptr;
|
| 9445 |
|
| 9446 |
if (use_src2) {
|
| 9447 |
-
const bool src2_on_device = src2->backend ==
|
| 9448 |
|
| 9449 |
if (src2_on_device) {
|
| 9450 |
ggml_tensor_extra_gpu * src2_extra = (ggml_tensor_extra_gpu *) src2->extra;
|
|
@@ -9502,16 +9502,16 @@ static void ggml_cuda_op_flatten(const ggml_tensor * src0, const ggml_tensor * s
|
|
| 9502 |
const bool use_src1 = src1 != nullptr;
|
| 9503 |
const int64_t nrows1 = use_src1 ? ggml_nrows(src1) : 1;
|
| 9504 |
|
| 9505 |
-
GGML_ASSERT(!use_src1 || src1->backend !=
|
| 9506 |
-
GGML_ASSERT( dst->backend !=
|
| 9507 |
|
| 9508 |
ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu *) src0->extra;
|
| 9509 |
ggml_tensor_extra_gpu * src1_extra = use_src1 ? (ggml_tensor_extra_gpu *) src1->extra : nullptr;
|
| 9510 |
ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra;
|
| 9511 |
|
| 9512 |
-
const bool src0_on_device = src0->backend ==
|
| 9513 |
-
const bool src1_on_device = use_src1 && src1->backend ==
|
| 9514 |
-
const bool dst_on_device = dst->backend ==
|
| 9515 |
|
| 9516 |
// dd = data device
|
| 9517 |
float * src0_ddf = nullptr;
|
|
@@ -9555,7 +9555,7 @@ static void ggml_cuda_op_flatten(const ggml_tensor * src0, const ggml_tensor * s
|
|
| 9555 |
CUDA_CHECK(cudaMemcpyAsync(dst->data, dst_ddf, ggml_nbytes(dst), cudaMemcpyDeviceToHost, main_stream));
|
| 9556 |
}
|
| 9557 |
|
| 9558 |
-
if (dst->backend ==
|
| 9559 |
CUDA_CHECK(cudaDeviceSynchronize());
|
| 9560 |
}
|
| 9561 |
}
|
|
@@ -9636,8 +9636,8 @@ static void ggml_cuda_op_mul_mat(
|
|
| 9636 |
const int nb2 = dst->nb[2];
|
| 9637 |
const int nb3 = dst->nb[3];
|
| 9638 |
|
| 9639 |
-
GGML_ASSERT(dst->backend !=
|
| 9640 |
-
GGML_ASSERT(src1->backend !=
|
| 9641 |
GGML_ASSERT(src1->type == GGML_TYPE_F32 || (src1->ne[2] == 1 && src1->ne[3] == 1));
|
| 9642 |
|
| 9643 |
GGML_ASSERT(ne12 >= ne02 && ne12 % ne02 == 0);
|
|
@@ -9653,20 +9653,20 @@ static void ggml_cuda_op_mul_mat(
|
|
| 9653 |
ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu *) src1->extra;
|
| 9654 |
ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra;
|
| 9655 |
|
| 9656 |
-
const bool src0_on_device = src0->backend ==
|
| 9657 |
const bool src0_is_contiguous = ggml_is_contiguous(src0);
|
| 9658 |
const bool src1_is_contiguous = ggml_is_contiguous(src1);
|
| 9659 |
|
| 9660 |
const int64_t src1_padded_col_size = GGML_PAD(ne10, MATRIX_ROW_PADDING);
|
| 9661 |
|
| 9662 |
-
const bool split = src0->backend ==
|
| 9663 |
GGML_ASSERT(!(split && ne02 > 1));
|
| 9664 |
GGML_ASSERT(!(split && ne03 > 1));
|
| 9665 |
GGML_ASSERT(!(split && ne02 < ne12));
|
| 9666 |
|
| 9667 |
std::array<float, GGML_CUDA_MAX_DEVICES> tensor_split;
|
| 9668 |
if (split) {
|
| 9669 |
-
// TODO: check that src0->buffer->buft is a split buffer type, replace
|
| 9670 |
// GGML_ASSERT(src0->buffer != nullptr && src0->buffer->buft == ...);
|
| 9671 |
ggml_backend_cuda_split_buffer_type_context * buft_ctx = (ggml_backend_cuda_split_buffer_type_context *) src0->buffer->buft->context;
|
| 9672 |
tensor_split = buft_ctx->tensor_split;
|
|
@@ -9724,8 +9724,8 @@ static void ggml_cuda_op_mul_mat(
|
|
| 9724 |
|
| 9725 |
used_devices++;
|
| 9726 |
|
| 9727 |
-
const bool src1_on_device = src1->backend ==
|
| 9728 |
-
const bool dst_on_device = dst->backend ==
|
| 9729 |
|
| 9730 |
ggml_cuda_set_device(id);
|
| 9731 |
cudaStream_t stream = g_cudaStreams[id][0];
|
|
@@ -9776,8 +9776,8 @@ static void ggml_cuda_op_mul_mat(
|
|
| 9776 |
continue;
|
| 9777 |
}
|
| 9778 |
|
| 9779 |
-
const bool src1_on_device = src1->backend ==
|
| 9780 |
-
const bool dst_on_device = dst->backend ==
|
| 9781 |
const int64_t row_diff = dev[id].row_high - dev[id].row_low;
|
| 9782 |
|
| 9783 |
ggml_cuda_set_device(id);
|
|
@@ -9802,12 +9802,12 @@ static void ggml_cuda_op_mul_mat(
|
|
| 9802 |
|
| 9803 |
// the main device memory buffer can be on VRAM scratch, with space for all partial results
|
| 9804 |
// in that case an offset on dst_ddf_i is needed
|
| 9805 |
-
if (dst->backend ==
|
| 9806 |
dst_dd_i += dev[id].row_low; // offset is 0 if no tensor split
|
| 9807 |
}
|
| 9808 |
|
| 9809 |
// copy src0, src1 to device if necessary
|
| 9810 |
-
if (src1->backend ==
|
| 9811 |
if (id != g_main_device) {
|
| 9812 |
if (convert_src1_to_q8_1) {
|
| 9813 |
char * src1_ddq_i_source = dev[g_main_device].src1_ddq + src1_ddq_i_offset;
|
|
@@ -9820,14 +9820,14 @@ static void ggml_cuda_op_mul_mat(
|
|
| 9820 |
src1_ncols*ne10*sizeof(float), stream));
|
| 9821 |
}
|
| 9822 |
}
|
| 9823 |
-
} else if (src1->backend ==
|
| 9824 |
CUDA_CHECK(ggml_cuda_cpy_tensor_2d(
|
| 9825 |
src1_ddf_i, src1, i03, i02, src1_col_0, src1_col_0+src1_ncols, stream));
|
| 9826 |
} else {
|
| 9827 |
GGML_ASSERT(false);
|
| 9828 |
}
|
| 9829 |
|
| 9830 |
-
if (convert_src1_to_q8_1 && (src1->backend ==
|
| 9831 |
quantize_row_q8_1_cuda(src1_ddf_i, src1_ddq_i, ne10, src1_ncols, src1_padded_col_size, stream);
|
| 9832 |
CUDA_CHECK(cudaGetLastError());
|
| 9833 |
}
|
|
@@ -9845,10 +9845,10 @@ static void ggml_cuda_op_mul_mat(
|
|
| 9845 |
if (!dst_on_device) {
|
| 9846 |
void * dst_off_device;
|
| 9847 |
cudaMemcpyKind kind;
|
| 9848 |
-
if (dst->backend ==
|
| 9849 |
dst_off_device = dst->data;
|
| 9850 |
kind = cudaMemcpyDeviceToHost;
|
| 9851 |
-
} else if (dst->backend ==
|
| 9852 |
dst_off_device = dst_extra->data_device[g_main_device];
|
| 9853 |
kind = cudaMemcpyDeviceToDevice;
|
| 9854 |
} else {
|
|
@@ -9913,7 +9913,7 @@ static void ggml_cuda_op_mul_mat(
|
|
| 9913 |
}
|
| 9914 |
}
|
| 9915 |
|
| 9916 |
-
if (dst->backend ==
|
| 9917 |
ggml_cuda_set_device(g_main_device);
|
| 9918 |
CUDA_CHECK(cudaDeviceSynchronize());
|
| 9919 |
}
|
|
@@ -10019,7 +10019,7 @@ GGML_CALL bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const stru
|
|
| 10019 |
|
| 10020 |
static void ggml_cuda_mul_mat_vec_p021(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst){
|
| 10021 |
GGML_ASSERT(ggml_is_permuted(src0) && ggml_is_permuted(src1));
|
| 10022 |
-
GGML_ASSERT(src0->backend !=
|
| 10023 |
GGML_ASSERT(src0->nb[0] <= src0->nb[1] && src0->nb[2] <= src0->nb[3]); // 0213 permutation
|
| 10024 |
GGML_ASSERT(src1->nb[0] <= src1->nb[1] && src1->nb[2] <= src1->nb[3]); // 0213 permutation
|
| 10025 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
|
@@ -10050,7 +10050,7 @@ static void ggml_cuda_mul_mat_vec_nc(const ggml_tensor * src0, const ggml_tensor
|
|
| 10050 |
GGML_ASSERT(!ggml_is_transposed(src0));
|
| 10051 |
GGML_ASSERT(!ggml_is_transposed(src1));
|
| 10052 |
GGML_ASSERT(!ggml_is_permuted(src0));
|
| 10053 |
-
GGML_ASSERT(src0->backend !=
|
| 10054 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
| 10055 |
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
| 10056 |
|
|
@@ -10109,7 +10109,7 @@ static void ggml_cuda_mul_mat_batched_cublas(const ggml_tensor * src0, const ggm
|
|
| 10109 |
GGML_ASSERT(!ggml_is_transposed(src0));
|
| 10110 |
GGML_ASSERT(!ggml_is_transposed(src1));
|
| 10111 |
|
| 10112 |
-
GGML_ASSERT(src0->backend !=
|
| 10113 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
| 10114 |
|
| 10115 |
GGML_TENSOR_BINARY_OP_LOCALS
|
|
@@ -10255,11 +10255,11 @@ static void ggml_cuda_mul_mat_batched_cublas(const ggml_tensor * src0, const ggm
|
|
| 10255 |
|
| 10256 |
static void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
| 10257 |
const bool all_on_device =
|
| 10258 |
-
(src0->backend ==
|
| 10259 |
-
(src1->backend ==
|
| 10260 |
-
( dst->backend ==
|
| 10261 |
|
| 10262 |
-
const bool split = src0->backend ==
|
| 10263 |
|
| 10264 |
int64_t min_compute_capability = INT_MAX;
|
| 10265 |
|
|
@@ -10409,7 +10409,7 @@ static void ggml_cuda_mul_mat_id_cublas(ggml_tensor * dst) {
|
|
| 10409 |
GGML_ASSERT(!ggml_is_transposed(src00));
|
| 10410 |
GGML_ASSERT(!ggml_is_transposed(src1));
|
| 10411 |
|
| 10412 |
-
GGML_ASSERT(src00->backend !=
|
| 10413 |
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
| 10414 |
|
| 10415 |
const int64_t ne00 = src00->ne[0]; GGML_UNUSED(ne00);
|
|
@@ -10553,7 +10553,7 @@ static void ggml_cuda_mul_mat_id(const ggml_tensor * src0, const ggml_tensor * s
|
|
| 10553 |
|
| 10554 |
cudaStream_t stream = g_cudaStreams[g_main_device][0];
|
| 10555 |
|
| 10556 |
-
if (ids->backend ==
|
| 10557 |
const char * ids_dev = (const char *)((const ggml_tensor_extra_gpu *)ids->extra)->data_device[g_main_device];
|
| 10558 |
CUDA_CHECK(cudaMemcpyAsync(ids_host.data(), ids_dev, ggml_nbytes(ids), cudaMemcpyDeviceToHost, stream));
|
| 10559 |
CUDA_CHECK(cudaStreamSynchronize(stream));
|
|
@@ -10570,20 +10570,20 @@ static void ggml_cuda_mul_mat_id(const ggml_tensor * src0, const ggml_tensor * s
|
|
| 10570 |
ggml_tensor src1_row = *src1;
|
| 10571 |
ggml_tensor dst_row = *dst;
|
| 10572 |
|
| 10573 |
-
src1_row.backend =
|
| 10574 |
-
dst_row.backend =
|
| 10575 |
|
| 10576 |
src1_row.extra = &src1_row_extra;
|
| 10577 |
dst_row.extra = &dst_row_extra;
|
| 10578 |
|
| 10579 |
-
char * src1_original = src1->backend ==
|
| 10580 |
(char *) src1->data : (char *) src1_extra->data_device[g_main_device];
|
| 10581 |
-
char * dst_original = dst->backend ==
|
| 10582 |
(char *) dst->data : (char *) dst_extra->data_device[g_main_device];
|
| 10583 |
|
| 10584 |
if (src1->ne[1] == 1) {
|
| 10585 |
-
GGML_ASSERT(src1->backend ==
|
| 10586 |
-
GGML_ASSERT(dst->backend ==
|
| 10587 |
|
| 10588 |
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
|
| 10589 |
//int32_t row_id;
|
|
@@ -10611,9 +10611,9 @@ static void ggml_cuda_mul_mat_id(const ggml_tensor * src0, const ggml_tensor * s
|
|
| 10611 |
src1_row_extra.data_device[g_main_device] = src1_contiguous.get();
|
| 10612 |
dst_row_extra.data_device[g_main_device] = dst_contiguous.get();
|
| 10613 |
|
| 10614 |
-
const cudaMemcpyKind src1_kind = src1->backend ==
|
| 10615 |
cudaMemcpyHostToDevice : cudaMemcpyDeviceToDevice;
|
| 10616 |
-
const cudaMemcpyKind dst_kind = dst->backend ==
|
| 10617 |
cudaMemcpyDeviceToHost : cudaMemcpyDeviceToDevice;
|
| 10618 |
|
| 10619 |
for (int32_t row_id = 0; row_id < n_as; ++row_id) {
|
|
@@ -10668,7 +10668,7 @@ static void ggml_cuda_mul_mat_id(const ggml_tensor * src0, const ggml_tensor * s
|
|
| 10668 |
}
|
| 10669 |
}
|
| 10670 |
|
| 10671 |
-
if (dst->backend ==
|
| 10672 |
CUDA_CHECK(cudaStreamSynchronize(stream));
|
| 10673 |
}
|
| 10674 |
}
|
|
@@ -10685,8 +10685,8 @@ static void ggml_cuda_cpy(const ggml_tensor * src0, const ggml_tensor * src1, gg
|
|
| 10685 |
const int64_t ne = ggml_nelements(src0);
|
| 10686 |
GGML_ASSERT(ne == ggml_nelements(src1));
|
| 10687 |
|
| 10688 |
-
GGML_ASSERT(src0->backend ==
|
| 10689 |
-
GGML_ASSERT(src1->backend ==
|
| 10690 |
|
| 10691 |
GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX);
|
| 10692 |
GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX);
|
|
@@ -10817,9 +10817,9 @@ GGML_CALL bool ggml_cuda_compute_forward(struct ggml_compute_params * params, st
|
|
| 10817 |
if (!g_cublas_loaded) return false;
|
| 10818 |
|
| 10819 |
ggml_cuda_func_t func;
|
| 10820 |
-
const bool any_on_device = tensor->backend ==
|
| 10821 |
-
|| (tensor->src[0] != nullptr && (tensor->src[0]->backend ==
|
| 10822 |
-
|| (tensor->src[1] != nullptr && tensor->src[1]->backend ==
|
| 10823 |
|
| 10824 |
if (!any_on_device && tensor->op != GGML_OP_MUL_MAT && tensor->op != GGML_OP_MUL_MAT_ID) {
|
| 10825 |
return false;
|
|
@@ -10966,14 +10966,14 @@ GGML_CALL bool ggml_cuda_compute_forward(struct ggml_compute_params * params, st
|
|
| 10966 |
return false;
|
| 10967 |
}
|
| 10968 |
|
| 10969 |
-
if (tensor->src[0] != nullptr && tensor->src[0]->backend ==
|
| 10970 |
ggml_cuda_set_peer_access(tensor->src[1]->ne[1]);
|
| 10971 |
}
|
| 10972 |
|
| 10973 |
if (params->ith != 0) {
|
| 10974 |
return true;
|
| 10975 |
}
|
| 10976 |
-
if (params->type ==
|
| 10977 |
return true;
|
| 10978 |
}
|
| 10979 |
func(tensor->src[0], tensor->src[1], tensor);
|
|
@@ -11072,7 +11072,7 @@ GGML_CALL static void ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t
|
|
| 11072 |
|
| 11073 |
extra->data_device[ctx->device] = tensor->data;
|
| 11074 |
|
| 11075 |
-
tensor->backend =
|
| 11076 |
tensor->extra = extra;
|
| 11077 |
|
| 11078 |
if (ggml_is_quantized(tensor->type)) {
|
|
@@ -11087,7 +11087,7 @@ GGML_CALL static void ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t
|
|
| 11087 |
}
|
| 11088 |
|
| 11089 |
GGML_CALL static void ggml_backend_cuda_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
| 11090 |
-
GGML_ASSERT(tensor->backend ==
|
| 11091 |
|
| 11092 |
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
| 11093 |
|
|
@@ -11098,7 +11098,7 @@ GGML_CALL static void ggml_backend_cuda_buffer_set_tensor(ggml_backend_buffer_t
|
|
| 11098 |
}
|
| 11099 |
|
| 11100 |
GGML_CALL static void ggml_backend_cuda_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
| 11101 |
-
GGML_ASSERT(tensor->backend ==
|
| 11102 |
|
| 11103 |
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
| 11104 |
|
|
@@ -11333,7 +11333,7 @@ GGML_CALL static void ggml_backend_cuda_split_buffer_init_tensor(ggml_backend_bu
|
|
| 11333 |
CUDA_CHECK(cudaEventCreateWithFlags(&extra->events[id][is], cudaEventDisableTiming));
|
| 11334 |
}
|
| 11335 |
}
|
| 11336 |
-
tensor->backend =
|
| 11337 |
tensor->extra = extra;
|
| 11338 |
}
|
| 11339 |
|
|
@@ -11605,7 +11605,7 @@ GGML_CALL static void ggml_backend_cuda_set_tensor_async(ggml_backend_t backend,
|
|
| 11605 |
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
|
| 11606 |
|
| 11607 |
GGML_ASSERT(tensor->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && "unsupported buffer type");
|
| 11608 |
-
GGML_ASSERT(tensor->backend ==
|
| 11609 |
|
| 11610 |
CUDA_CHECK(cudaMemcpyAsync((char *)tensor->data + offset, data, size, cudaMemcpyHostToDevice, g_cudaStreams[cuda_ctx->device][0]));
|
| 11611 |
}
|
|
@@ -11614,7 +11614,7 @@ GGML_CALL static void ggml_backend_cuda_get_tensor_async(ggml_backend_t backend,
|
|
| 11614 |
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
|
| 11615 |
|
| 11616 |
GGML_ASSERT(tensor->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && "unsupported buffer type");
|
| 11617 |
-
GGML_ASSERT(tensor->backend ==
|
| 11618 |
|
| 11619 |
CUDA_CHECK(cudaMemcpyAsync(data, (const char *)tensor->data + offset, size, cudaMemcpyDeviceToHost, g_cudaStreams[cuda_ctx->device][0]));
|
| 11620 |
}
|
|
@@ -11644,7 +11644,7 @@ GGML_CALL static bool ggml_backend_cuda_graph_compute(ggml_backend_t backend, gg
|
|
| 11644 |
ggml_cuda_set_main_device(cuda_ctx->device);
|
| 11645 |
|
| 11646 |
ggml_compute_params params = {};
|
| 11647 |
-
params.type =
|
| 11648 |
params.ith = 0;
|
| 11649 |
for (int i = 0; i < cgraph->n_nodes; i++) {
|
| 11650 |
ggml_tensor * node = cgraph->nodes[i];
|
|
@@ -11654,13 +11654,13 @@ GGML_CALL static bool ggml_backend_cuda_graph_compute(ggml_backend_t backend, gg
|
|
| 11654 |
}
|
| 11655 |
|
| 11656 |
#ifndef NDEBUG
|
| 11657 |
-
assert(node->backend ==
|
| 11658 |
assert(node->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device));
|
| 11659 |
assert(node->extra != nullptr);
|
| 11660 |
|
| 11661 |
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
| 11662 |
if (node->src[j] != nullptr) {
|
| 11663 |
-
assert(node->src[j]->backend ==
|
| 11664 |
assert(node->src[j]->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) || ggml_backend_buffer_is_cuda_split(node->src[j]->buffer));
|
| 11665 |
assert(node->src[j]->extra != nullptr);
|
| 11666 |
}
|
|
|
|
| 6369 |
int ixj = col ^ j;
|
| 6370 |
if (ixj > col) {
|
| 6371 |
if ((col & k) == 0) {
|
| 6372 |
+
if (order == GGML_SORT_ORDER_ASC ? x_row[dst_row[col]] > x_row[dst_row[ixj]] : x_row[dst_row[col]] < x_row[dst_row[ixj]]) {
|
| 6373 |
swap(dst_row[col], dst_row[ixj]);
|
| 6374 |
}
|
| 6375 |
} else {
|
| 6376 |
+
if (order == GGML_SORT_ORDER_ASC ? x_row[dst_row[col]] < x_row[dst_row[ixj]] : x_row[dst_row[col]] > x_row[dst_row[ixj]]) {
|
| 6377 |
swap(dst_row[col], dst_row[ixj]);
|
| 6378 |
}
|
| 6379 |
}
|
|
|
|
| 7927 |
|
| 7928 |
const dim3 block_dims(ncols, 1, 1);
|
| 7929 |
const dim3 block_nums(1, nrows, 1);
|
| 7930 |
+
if (order == GGML_SORT_ORDER_ASC) {
|
| 7931 |
+
k_argsort_f32_i32<GGML_SORT_ORDER_ASC><<<block_nums, block_dims, 0, stream>>>(x, dst, ncols);
|
| 7932 |
+
} else if (order == GGML_SORT_ORDER_DESC) {
|
| 7933 |
+
k_argsort_f32_i32<GGML_SORT_ORDER_DESC><<<block_nums, block_dims, 0, stream>>>(x, dst, ncols);
|
| 7934 |
} else {
|
| 7935 |
GGML_ASSERT(false);
|
| 7936 |
}
|
|
|
|
| 8362 |
|
| 8363 |
cudaMemcpyKind kind;
|
| 8364 |
char * src_ptr;
|
| 8365 |
+
if (src->backend == GGML_BACKEND_TYPE_CPU) {
|
| 8366 |
kind = cudaMemcpyHostToDevice;
|
| 8367 |
src_ptr = (char *) src->data;
|
| 8368 |
+
} else if (src->backend == GGML_BACKEND_TYPE_GPU || src->backend == GGML_BACKEND_TYPE_GPU_SPLIT) {
|
| 8369 |
+
GGML_ASSERT(src->backend != GGML_BACKEND_TYPE_GPU_SPLIT || (i1_low == 0 && i1_high == src->ne[1]));
|
| 8370 |
kind = cudaMemcpyDeviceToDevice;
|
| 8371 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra;
|
| 8372 |
int id;
|
|
|
|
| 8771 |
|
| 8772 |
// the main device has a larger memory buffer to hold the results from all GPUs
|
| 8773 |
// nrows_dst == nrows of the matrix that the kernel writes into
|
| 8774 |
+
const int64_t nrows_dst = dst->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device ? ne0 : row_diff;
|
| 8775 |
|
| 8776 |
switch (src0->type) {
|
| 8777 |
case GGML_TYPE_Q4_0:
|
|
|
|
| 8920 |
|
| 8921 |
// the main device has a larger memory buffer to hold the results from all GPUs
|
| 8922 |
// nrows_dst == nrows of the matrix that the kernel writes into
|
| 8923 |
+
const int64_t nrows_dst = dst->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device ? ne0 : row_diff;
|
| 8924 |
|
| 8925 |
switch (src0->type) {
|
| 8926 |
case GGML_TYPE_Q4_0:
|
|
|
|
| 9096 |
|
| 9097 |
// the main device has a larger memory buffer to hold the results from all GPUs
|
| 9098 |
// ldc == nrows of the matrix that cuBLAS writes into
|
| 9099 |
+
int ldc = dst->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device ? ne0 : row_diff;
|
| 9100 |
|
| 9101 |
const int compute_capability = g_device_caps[id].cc;
|
| 9102 |
|
|
|
|
| 9444 |
const bool use_src2 = src2 != nullptr;
|
| 9445 |
|
| 9446 |
if (use_src2) {
|
| 9447 |
+
const bool src2_on_device = src2->backend == GGML_BACKEND_TYPE_GPU;
|
| 9448 |
|
| 9449 |
if (src2_on_device) {
|
| 9450 |
ggml_tensor_extra_gpu * src2_extra = (ggml_tensor_extra_gpu *) src2->extra;
|
|
|
|
| 9502 |
const bool use_src1 = src1 != nullptr;
|
| 9503 |
const int64_t nrows1 = use_src1 ? ggml_nrows(src1) : 1;
|
| 9504 |
|
| 9505 |
+
GGML_ASSERT(!use_src1 || src1->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
|
| 9506 |
+
GGML_ASSERT( dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
|
| 9507 |
|
| 9508 |
ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu *) src0->extra;
|
| 9509 |
ggml_tensor_extra_gpu * src1_extra = use_src1 ? (ggml_tensor_extra_gpu *) src1->extra : nullptr;
|
| 9510 |
ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra;
|
| 9511 |
|
| 9512 |
+
const bool src0_on_device = src0->backend == GGML_BACKEND_TYPE_GPU || src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT;
|
| 9513 |
+
const bool src1_on_device = use_src1 && src1->backend == GGML_BACKEND_TYPE_GPU;
|
| 9514 |
+
const bool dst_on_device = dst->backend == GGML_BACKEND_TYPE_GPU;
|
| 9515 |
|
| 9516 |
// dd = data device
|
| 9517 |
float * src0_ddf = nullptr;
|
|
|
|
| 9555 |
CUDA_CHECK(cudaMemcpyAsync(dst->data, dst_ddf, ggml_nbytes(dst), cudaMemcpyDeviceToHost, main_stream));
|
| 9556 |
}
|
| 9557 |
|
| 9558 |
+
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
| 9559 |
CUDA_CHECK(cudaDeviceSynchronize());
|
| 9560 |
}
|
| 9561 |
}
|
|
|
|
| 9636 |
const int nb2 = dst->nb[2];
|
| 9637 |
const int nb3 = dst->nb[3];
|
| 9638 |
|
| 9639 |
+
GGML_ASSERT(dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
|
| 9640 |
+
GGML_ASSERT(src1->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
|
| 9641 |
GGML_ASSERT(src1->type == GGML_TYPE_F32 || (src1->ne[2] == 1 && src1->ne[3] == 1));
|
| 9642 |
|
| 9643 |
GGML_ASSERT(ne12 >= ne02 && ne12 % ne02 == 0);
|
|
|
|
| 9653 |
ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu *) src1->extra;
|
| 9654 |
ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra;
|
| 9655 |
|
| 9656 |
+
const bool src0_on_device = src0->backend == GGML_BACKEND_TYPE_GPU || src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT;
|
| 9657 |
const bool src0_is_contiguous = ggml_is_contiguous(src0);
|
| 9658 |
const bool src1_is_contiguous = ggml_is_contiguous(src1);
|
| 9659 |
|
| 9660 |
const int64_t src1_padded_col_size = GGML_PAD(ne10, MATRIX_ROW_PADDING);
|
| 9661 |
|
| 9662 |
+
const bool split = src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT;
|
| 9663 |
GGML_ASSERT(!(split && ne02 > 1));
|
| 9664 |
GGML_ASSERT(!(split && ne03 > 1));
|
| 9665 |
GGML_ASSERT(!(split && ne02 < ne12));
|
| 9666 |
|
| 9667 |
std::array<float, GGML_CUDA_MAX_DEVICES> tensor_split;
|
| 9668 |
if (split) {
|
| 9669 |
+
// TODO: check that src0->buffer->buft is a split buffer type, replace GGML_BACKEND_TYPE_GPU_SPLIT check
|
| 9670 |
// GGML_ASSERT(src0->buffer != nullptr && src0->buffer->buft == ...);
|
| 9671 |
ggml_backend_cuda_split_buffer_type_context * buft_ctx = (ggml_backend_cuda_split_buffer_type_context *) src0->buffer->buft->context;
|
| 9672 |
tensor_split = buft_ctx->tensor_split;
|
|
|
|
| 9724 |
|
| 9725 |
used_devices++;
|
| 9726 |
|
| 9727 |
+
const bool src1_on_device = src1->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device;
|
| 9728 |
+
const bool dst_on_device = dst->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device;
|
| 9729 |
|
| 9730 |
ggml_cuda_set_device(id);
|
| 9731 |
cudaStream_t stream = g_cudaStreams[id][0];
|
|
|
|
| 9776 |
continue;
|
| 9777 |
}
|
| 9778 |
|
| 9779 |
+
const bool src1_on_device = src1->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device;
|
| 9780 |
+
const bool dst_on_device = dst->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device;
|
| 9781 |
const int64_t row_diff = dev[id].row_high - dev[id].row_low;
|
| 9782 |
|
| 9783 |
ggml_cuda_set_device(id);
|
|
|
|
| 9802 |
|
| 9803 |
// the main device memory buffer can be on VRAM scratch, with space for all partial results
|
| 9804 |
// in that case an offset on dst_ddf_i is needed
|
| 9805 |
+
if (dst->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device) {
|
| 9806 |
dst_dd_i += dev[id].row_low; // offset is 0 if no tensor split
|
| 9807 |
}
|
| 9808 |
|
| 9809 |
// copy src0, src1 to device if necessary
|
| 9810 |
+
if (src1->backend == GGML_BACKEND_TYPE_GPU && src1_is_contiguous) {
|
| 9811 |
if (id != g_main_device) {
|
| 9812 |
if (convert_src1_to_q8_1) {
|
| 9813 |
char * src1_ddq_i_source = dev[g_main_device].src1_ddq + src1_ddq_i_offset;
|
|
|
|
| 9820 |
src1_ncols*ne10*sizeof(float), stream));
|
| 9821 |
}
|
| 9822 |
}
|
| 9823 |
+
} else if (src1->backend == GGML_BACKEND_TYPE_CPU || (src1_on_device && !src1_is_contiguous)) {
|
| 9824 |
CUDA_CHECK(ggml_cuda_cpy_tensor_2d(
|
| 9825 |
src1_ddf_i, src1, i03, i02, src1_col_0, src1_col_0+src1_ncols, stream));
|
| 9826 |
} else {
|
| 9827 |
GGML_ASSERT(false);
|
| 9828 |
}
|
| 9829 |
|
| 9830 |
+
if (convert_src1_to_q8_1 && (src1->backend == GGML_BACKEND_TYPE_CPU || !src1_is_contiguous)) {
|
| 9831 |
quantize_row_q8_1_cuda(src1_ddf_i, src1_ddq_i, ne10, src1_ncols, src1_padded_col_size, stream);
|
| 9832 |
CUDA_CHECK(cudaGetLastError());
|
| 9833 |
}
|
|
|
|
| 9845 |
if (!dst_on_device) {
|
| 9846 |
void * dst_off_device;
|
| 9847 |
cudaMemcpyKind kind;
|
| 9848 |
+
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
| 9849 |
dst_off_device = dst->data;
|
| 9850 |
kind = cudaMemcpyDeviceToHost;
|
| 9851 |
+
} else if (dst->backend == GGML_BACKEND_TYPE_GPU) {
|
| 9852 |
dst_off_device = dst_extra->data_device[g_main_device];
|
| 9853 |
kind = cudaMemcpyDeviceToDevice;
|
| 9854 |
} else {
|
|
|
|
| 9913 |
}
|
| 9914 |
}
|
| 9915 |
|
| 9916 |
+
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
| 9917 |
ggml_cuda_set_device(g_main_device);
|
| 9918 |
CUDA_CHECK(cudaDeviceSynchronize());
|
| 9919 |
}
|
|
|
|
| 10019 |
|
| 10020 |
static void ggml_cuda_mul_mat_vec_p021(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst){
|
| 10021 |
GGML_ASSERT(ggml_is_permuted(src0) && ggml_is_permuted(src1));
|
| 10022 |
+
GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
|
| 10023 |
GGML_ASSERT(src0->nb[0] <= src0->nb[1] && src0->nb[2] <= src0->nb[3]); // 0213 permutation
|
| 10024 |
GGML_ASSERT(src1->nb[0] <= src1->nb[1] && src1->nb[2] <= src1->nb[3]); // 0213 permutation
|
| 10025 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
|
|
|
| 10050 |
GGML_ASSERT(!ggml_is_transposed(src0));
|
| 10051 |
GGML_ASSERT(!ggml_is_transposed(src1));
|
| 10052 |
GGML_ASSERT(!ggml_is_permuted(src0));
|
| 10053 |
+
GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
|
| 10054 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
| 10055 |
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
| 10056 |
|
|
|
|
| 10109 |
GGML_ASSERT(!ggml_is_transposed(src0));
|
| 10110 |
GGML_ASSERT(!ggml_is_transposed(src1));
|
| 10111 |
|
| 10112 |
+
GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
|
| 10113 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
| 10114 |
|
| 10115 |
GGML_TENSOR_BINARY_OP_LOCALS
|
|
|
|
| 10255 |
|
| 10256 |
static void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
| 10257 |
const bool all_on_device =
|
| 10258 |
+
(src0->backend == GGML_BACKEND_TYPE_GPU || src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT) &&
|
| 10259 |
+
(src1->backend == GGML_BACKEND_TYPE_GPU) &&
|
| 10260 |
+
( dst->backend == GGML_BACKEND_TYPE_GPU);
|
| 10261 |
|
| 10262 |
+
const bool split = src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT;
|
| 10263 |
|
| 10264 |
int64_t min_compute_capability = INT_MAX;
|
| 10265 |
|
|
|
|
| 10409 |
GGML_ASSERT(!ggml_is_transposed(src00));
|
| 10410 |
GGML_ASSERT(!ggml_is_transposed(src1));
|
| 10411 |
|
| 10412 |
+
GGML_ASSERT(src00->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
|
| 10413 |
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
| 10414 |
|
| 10415 |
const int64_t ne00 = src00->ne[0]; GGML_UNUSED(ne00);
|
|
|
|
| 10553 |
|
| 10554 |
cudaStream_t stream = g_cudaStreams[g_main_device][0];
|
| 10555 |
|
| 10556 |
+
if (ids->backend == GGML_BACKEND_TYPE_GPU) {
|
| 10557 |
const char * ids_dev = (const char *)((const ggml_tensor_extra_gpu *)ids->extra)->data_device[g_main_device];
|
| 10558 |
CUDA_CHECK(cudaMemcpyAsync(ids_host.data(), ids_dev, ggml_nbytes(ids), cudaMemcpyDeviceToHost, stream));
|
| 10559 |
CUDA_CHECK(cudaStreamSynchronize(stream));
|
|
|
|
| 10570 |
ggml_tensor src1_row = *src1;
|
| 10571 |
ggml_tensor dst_row = *dst;
|
| 10572 |
|
| 10573 |
+
src1_row.backend = GGML_BACKEND_TYPE_GPU;
|
| 10574 |
+
dst_row.backend = GGML_BACKEND_TYPE_GPU;
|
| 10575 |
|
| 10576 |
src1_row.extra = &src1_row_extra;
|
| 10577 |
dst_row.extra = &dst_row_extra;
|
| 10578 |
|
| 10579 |
+
char * src1_original = src1->backend == GGML_BACKEND_TYPE_CPU ?
|
| 10580 |
(char *) src1->data : (char *) src1_extra->data_device[g_main_device];
|
| 10581 |
+
char * dst_original = dst->backend == GGML_BACKEND_TYPE_CPU ?
|
| 10582 |
(char *) dst->data : (char *) dst_extra->data_device[g_main_device];
|
| 10583 |
|
| 10584 |
if (src1->ne[1] == 1) {
|
| 10585 |
+
GGML_ASSERT(src1->backend == GGML_BACKEND_TYPE_GPU);
|
| 10586 |
+
GGML_ASSERT(dst->backend == GGML_BACKEND_TYPE_GPU);
|
| 10587 |
|
| 10588 |
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
|
| 10589 |
//int32_t row_id;
|
|
|
|
| 10611 |
src1_row_extra.data_device[g_main_device] = src1_contiguous.get();
|
| 10612 |
dst_row_extra.data_device[g_main_device] = dst_contiguous.get();
|
| 10613 |
|
| 10614 |
+
const cudaMemcpyKind src1_kind = src1->backend == GGML_BACKEND_TYPE_CPU ?
|
| 10615 |
cudaMemcpyHostToDevice : cudaMemcpyDeviceToDevice;
|
| 10616 |
+
const cudaMemcpyKind dst_kind = dst->backend == GGML_BACKEND_TYPE_CPU ?
|
| 10617 |
cudaMemcpyDeviceToHost : cudaMemcpyDeviceToDevice;
|
| 10618 |
|
| 10619 |
for (int32_t row_id = 0; row_id < n_as; ++row_id) {
|
|
|
|
| 10668 |
}
|
| 10669 |
}
|
| 10670 |
|
| 10671 |
+
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
| 10672 |
CUDA_CHECK(cudaStreamSynchronize(stream));
|
| 10673 |
}
|
| 10674 |
}
|
|
|
|
| 10685 |
const int64_t ne = ggml_nelements(src0);
|
| 10686 |
GGML_ASSERT(ne == ggml_nelements(src1));
|
| 10687 |
|
| 10688 |
+
GGML_ASSERT(src0->backend == GGML_BACKEND_TYPE_GPU);
|
| 10689 |
+
GGML_ASSERT(src1->backend == GGML_BACKEND_TYPE_GPU);
|
| 10690 |
|
| 10691 |
GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX);
|
| 10692 |
GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX);
|
|
|
|
| 10817 |
if (!g_cublas_loaded) return false;
|
| 10818 |
|
| 10819 |
ggml_cuda_func_t func;
|
| 10820 |
+
const bool any_on_device = tensor->backend == GGML_BACKEND_TYPE_GPU
|
| 10821 |
+
|| (tensor->src[0] != nullptr && (tensor->src[0]->backend == GGML_BACKEND_TYPE_GPU || tensor->src[0]->backend == GGML_BACKEND_TYPE_GPU_SPLIT))
|
| 10822 |
+
|| (tensor->src[1] != nullptr && tensor->src[1]->backend == GGML_BACKEND_TYPE_GPU);
|
| 10823 |
|
| 10824 |
if (!any_on_device && tensor->op != GGML_OP_MUL_MAT && tensor->op != GGML_OP_MUL_MAT_ID) {
|
| 10825 |
return false;
|
|
|
|
| 10966 |
return false;
|
| 10967 |
}
|
| 10968 |
|
| 10969 |
+
if (tensor->src[0] != nullptr && tensor->src[0]->backend == GGML_BACKEND_TYPE_GPU_SPLIT) {
|
| 10970 |
ggml_cuda_set_peer_access(tensor->src[1]->ne[1]);
|
| 10971 |
}
|
| 10972 |
|
| 10973 |
if (params->ith != 0) {
|
| 10974 |
return true;
|
| 10975 |
}
|
| 10976 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 10977 |
return true;
|
| 10978 |
}
|
| 10979 |
func(tensor->src[0], tensor->src[1], tensor);
|
|
|
|
| 11072 |
|
| 11073 |
extra->data_device[ctx->device] = tensor->data;
|
| 11074 |
|
| 11075 |
+
tensor->backend = GGML_BACKEND_TYPE_GPU;
|
| 11076 |
tensor->extra = extra;
|
| 11077 |
|
| 11078 |
if (ggml_is_quantized(tensor->type)) {
|
|
|
|
| 11087 |
}
|
| 11088 |
|
| 11089 |
GGML_CALL static void ggml_backend_cuda_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
| 11090 |
+
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
| 11091 |
|
| 11092 |
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
| 11093 |
|
|
|
|
| 11098 |
}
|
| 11099 |
|
| 11100 |
GGML_CALL static void ggml_backend_cuda_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
| 11101 |
+
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
| 11102 |
|
| 11103 |
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
| 11104 |
|
|
|
|
| 11333 |
CUDA_CHECK(cudaEventCreateWithFlags(&extra->events[id][is], cudaEventDisableTiming));
|
| 11334 |
}
|
| 11335 |
}
|
| 11336 |
+
tensor->backend = GGML_BACKEND_TYPE_GPU_SPLIT;
|
| 11337 |
tensor->extra = extra;
|
| 11338 |
}
|
| 11339 |
|
|
|
|
| 11605 |
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
|
| 11606 |
|
| 11607 |
GGML_ASSERT(tensor->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && "unsupported buffer type");
|
| 11608 |
+
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
| 11609 |
|
| 11610 |
CUDA_CHECK(cudaMemcpyAsync((char *)tensor->data + offset, data, size, cudaMemcpyHostToDevice, g_cudaStreams[cuda_ctx->device][0]));
|
| 11611 |
}
|
|
|
|
| 11614 |
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
|
| 11615 |
|
| 11616 |
GGML_ASSERT(tensor->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && "unsupported buffer type");
|
| 11617 |
+
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
| 11618 |
|
| 11619 |
CUDA_CHECK(cudaMemcpyAsync(data, (const char *)tensor->data + offset, size, cudaMemcpyDeviceToHost, g_cudaStreams[cuda_ctx->device][0]));
|
| 11620 |
}
|
|
|
|
| 11644 |
ggml_cuda_set_main_device(cuda_ctx->device);
|
| 11645 |
|
| 11646 |
ggml_compute_params params = {};
|
| 11647 |
+
params.type = GGML_TASK_TYPE_COMPUTE;
|
| 11648 |
params.ith = 0;
|
| 11649 |
for (int i = 0; i < cgraph->n_nodes; i++) {
|
| 11650 |
ggml_tensor * node = cgraph->nodes[i];
|
|
|
|
| 11654 |
}
|
| 11655 |
|
| 11656 |
#ifndef NDEBUG
|
| 11657 |
+
assert(node->backend == GGML_BACKEND_TYPE_GPU || node->backend == GGML_BACKEND_TYPE_GPU_SPLIT);
|
| 11658 |
assert(node->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device));
|
| 11659 |
assert(node->extra != nullptr);
|
| 11660 |
|
| 11661 |
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
| 11662 |
if (node->src[j] != nullptr) {
|
| 11663 |
+
assert(node->src[j]->backend == GGML_BACKEND_TYPE_GPU || node->src[j]->backend == GGML_BACKEND_TYPE_GPU_SPLIT);
|
| 11664 |
assert(node->src[j]->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) || ggml_backend_buffer_is_cuda_split(node->src[j]->buffer));
|
| 11665 |
assert(node->src[j]->extra != nullptr);
|
| 11666 |
}
|
ggml-metal.m
CHANGED
|
@@ -2262,8 +2262,8 @@ static bool ggml_metal_graph_compute(
|
|
| 2262 |
id<MTLComputePipelineState> pipeline = nil;
|
| 2263 |
|
| 2264 |
switch (order) {
|
| 2265 |
-
case
|
| 2266 |
-
case
|
| 2267 |
default: GGML_ASSERT(false);
|
| 2268 |
};
|
| 2269 |
|
|
|
|
| 2262 |
id<MTLComputePipelineState> pipeline = nil;
|
| 2263 |
|
| 2264 |
switch (order) {
|
| 2265 |
+
case GGML_SORT_ORDER_ASC: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_ASC].pipeline; break;
|
| 2266 |
+
case GGML_SORT_ORDER_DESC: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_DESC].pipeline; break;
|
| 2267 |
default: GGML_ASSERT(false);
|
| 2268 |
};
|
| 2269 |
|
ggml-opencl.cpp
CHANGED
|
@@ -1354,7 +1354,7 @@ static void ggml_cl_pool_free(cl_mem mem, size_t size) {
|
|
| 1354 |
}
|
| 1355 |
|
| 1356 |
void ggml_cl_free_data(const struct ggml_tensor* tensor) {
|
| 1357 |
-
if (tensor->backend !=
|
| 1358 |
return;
|
| 1359 |
}
|
| 1360 |
|
|
@@ -1412,7 +1412,7 @@ static cl_int ggml_cl_h2d_tensor_2d(cl_command_queue queue, cl_mem dst, size_t o
|
|
| 1412 |
}
|
| 1413 |
|
| 1414 |
static void ggml_cl_mul_f32(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
| 1415 |
-
GGML_ASSERT(src1->backend ==
|
| 1416 |
const int64_t ne00 = src0->ne[0];
|
| 1417 |
const int64_t ne01 = src0->ne[1];
|
| 1418 |
const int64_t ne02 = src0->ne[2];
|
|
@@ -1476,7 +1476,7 @@ void ggml_cl_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src
|
|
| 1476 |
}
|
| 1477 |
|
| 1478 |
static void ggml_cl_add_f32(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
| 1479 |
-
GGML_ASSERT(src1->backend ==
|
| 1480 |
const int64_t ne00 = src0->ne[0];
|
| 1481 |
const int64_t ne01 = src0->ne[1];
|
| 1482 |
const int64_t ne02 = src0->ne[2];
|
|
@@ -1566,13 +1566,13 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr
|
|
| 1566 |
size_t y_size;
|
| 1567 |
size_t d_size;
|
| 1568 |
cl_mem d_X;
|
| 1569 |
-
if (src0->backend ==
|
| 1570 |
d_X = (cl_mem) src0->extra;
|
| 1571 |
} else {
|
| 1572 |
d_X = ggml_cl_pool_malloc(sizeof(float) * x_ne, &x_size);
|
| 1573 |
}
|
| 1574 |
-
cl_mem d_Y = src1->backend ==
|
| 1575 |
-
cl_mem d_D = dst->backend ==
|
| 1576 |
|
| 1577 |
size_t x_offset = 0;
|
| 1578 |
|
|
@@ -1580,7 +1580,7 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr
|
|
| 1580 |
// TODO: copy src0 here when r3>1
|
| 1581 |
for (int64_t i13 = i03 * r3, e13 = i13 + r3; i13 < e13; i13++) {
|
| 1582 |
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
| 1583 |
-
if (src0->backend ==
|
| 1584 |
x_offset = (i03 * ne02 + i02) * x_ne;
|
| 1585 |
} else {
|
| 1586 |
// copy src0 to device
|
|
@@ -1589,7 +1589,7 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr
|
|
| 1589 |
|
| 1590 |
for (int64_t i12 = i02 * r2, e12 = i12 + r2; i12 < e12; i12++) {
|
| 1591 |
// copy src1 to device
|
| 1592 |
-
if (src1->backend ==
|
| 1593 |
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_Y, 0, src1, i13, i12, NULL));
|
| 1594 |
}
|
| 1595 |
|
|
@@ -1612,7 +1612,7 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr
|
|
| 1612 |
}
|
| 1613 |
|
| 1614 |
// copy dst to host
|
| 1615 |
-
if (dst->backend ==
|
| 1616 |
float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
|
| 1617 |
CL_CHECK(clEnqueueReadBuffer(queue, d_D, true, 0, sizeof(float) * d_ne, d, 1, &ev_sgemm, NULL));
|
| 1618 |
}
|
|
@@ -1621,13 +1621,13 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr
|
|
| 1621 |
}
|
| 1622 |
}
|
| 1623 |
|
| 1624 |
-
if (src0->backend !=
|
| 1625 |
ggml_cl_pool_free(d_X, x_size);
|
| 1626 |
}
|
| 1627 |
-
if (src1->backend !=
|
| 1628 |
ggml_cl_pool_free(d_Y, y_size);
|
| 1629 |
}
|
| 1630 |
-
if (dst->backend !=
|
| 1631 |
ggml_cl_pool_free(d_D, d_size);
|
| 1632 |
}
|
| 1633 |
}
|
|
@@ -1670,7 +1670,7 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr
|
|
| 1670 |
size_t y_size;
|
| 1671 |
size_t d_size;
|
| 1672 |
cl_mem d_X;
|
| 1673 |
-
if (src0->backend ==
|
| 1674 |
d_X = (cl_mem) src0->extra;
|
| 1675 |
} else {
|
| 1676 |
d_X = ggml_cl_pool_malloc(sizeof(ggml_fp16_t) * x_ne, &x_size);
|
|
@@ -1687,7 +1687,7 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr
|
|
| 1687 |
// TODO: copy src0 here when r3>1
|
| 1688 |
for (int64_t i13 = i03 * r3, e13 = i13 + r3; i13 < e13; i13++) {
|
| 1689 |
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
| 1690 |
-
if (src0->backend ==
|
| 1691 |
x_offset = (i03 * ne02 + i02) * x_ne;
|
| 1692 |
} else {
|
| 1693 |
// copy src0 to device
|
|
@@ -1741,7 +1741,7 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr
|
|
| 1741 |
}
|
| 1742 |
|
| 1743 |
// copy dst to host, then convert to float
|
| 1744 |
-
if (dst->backend ==
|
| 1745 |
CL_CHECK(clEnqueueReadBuffer(queue, d_D, true, 0, sizeof(ggml_fp16_t) * d_ne, tmp, 1, &ev_sgemm, NULL));
|
| 1746 |
float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
|
| 1747 |
ggml_fp16_to_fp32_row(tmp, d, d_ne);
|
|
@@ -1753,7 +1753,7 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr
|
|
| 1753 |
}
|
| 1754 |
}
|
| 1755 |
|
| 1756 |
-
if (src0->backend !=
|
| 1757 |
ggml_cl_pool_free(d_X, x_size);
|
| 1758 |
}
|
| 1759 |
ggml_cl_pool_free(d_Y, y_size);
|
|
@@ -1798,7 +1798,7 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
|
|
| 1798 |
cl_mem d_Y = ggml_cl_pool_malloc(sizeof(float) * y_ne, &y_size);
|
| 1799 |
cl_mem d_D = ggml_cl_pool_malloc(sizeof(float) * d_ne, &d_size);
|
| 1800 |
cl_mem d_Q;
|
| 1801 |
-
if (src0->backend ==
|
| 1802 |
d_Q = ggml_cl_pool_malloc(q_sz, &q_size);
|
| 1803 |
}
|
| 1804 |
|
|
@@ -1817,10 +1817,10 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
|
|
| 1817 |
for (int64_t i13 = i03 * r3, e13 = i13 + r3; i13 < e13; i13++) {
|
| 1818 |
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
| 1819 |
// copy src0 to device if necessary
|
| 1820 |
-
if (src0->backend ==
|
| 1821 |
events.emplace_back();
|
| 1822 |
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_Q, 0, src0, i03, i02, events.data() + ev_idx++));
|
| 1823 |
-
} else if (src0->backend ==
|
| 1824 |
d_Q = (cl_mem) src0->extra;
|
| 1825 |
} else {
|
| 1826 |
GGML_ASSERT(false);
|
|
@@ -1829,7 +1829,7 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
|
|
| 1829 |
if (!mul_mat_vec) {
|
| 1830 |
// convert src0 to fp32 on device
|
| 1831 |
const size_t global = x_ne / global_denom;
|
| 1832 |
-
const size_t offset = src0->backend ==
|
| 1833 |
CL_CHECK(clSetKernelArg(*to_fp32_cl, 0, sizeof(cl_mem), &d_Q));
|
| 1834 |
CL_CHECK(clSetKernelArg(*to_fp32_cl, 1, sizeof(cl_mem), &d_X));
|
| 1835 |
CL_CHECK(clEnqueueNDRangeKernel(queue, *to_fp32_cl, 1, &offset, &global, local > 0 ? &local : NULL, events.size(), !events.empty() ? events.data() : NULL, NULL));
|
|
@@ -1843,7 +1843,7 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
|
|
| 1843 |
|
| 1844 |
// compute
|
| 1845 |
const size_t global = ne01 * local;
|
| 1846 |
-
const size_t offset = src0->backend ==
|
| 1847 |
const cl_int ncols = ne00;
|
| 1848 |
events.emplace_back();
|
| 1849 |
CL_CHECK(clSetKernelArg(*dmmv, 0, sizeof(cl_mem), &d_Q));
|
|
@@ -1895,7 +1895,7 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
|
|
| 1895 |
}
|
| 1896 |
ggml_cl_pool_free(d_Y, y_size);
|
| 1897 |
ggml_cl_pool_free(d_D, d_size);
|
| 1898 |
-
if (src0->backend ==
|
| 1899 |
ggml_cl_pool_free(d_Q, q_size);
|
| 1900 |
}
|
| 1901 |
}
|
|
@@ -1911,7 +1911,7 @@ bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tens
|
|
| 1911 |
if ((src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) &&
|
| 1912 |
src1->type == GGML_TYPE_F32 &&
|
| 1913 |
dst->type == GGML_TYPE_F32 &&
|
| 1914 |
-
((ne0 >= 32 && ne1 >= 32 && ne10 >= 32) || src0->backend ==
|
| 1915 |
return true;
|
| 1916 |
}
|
| 1917 |
|
|
@@ -1993,7 +1993,7 @@ void ggml_cl_transform_tensor(void * data, ggml_tensor * tensor) {
|
|
| 1993 |
CL_CHECK(clFinish(queue));
|
| 1994 |
|
| 1995 |
tensor->extra = dst;
|
| 1996 |
-
GGML_ASSERT(tensor->backend ==
|
| 1997 |
}
|
| 1998 |
|
| 1999 |
// ggml-backend
|
|
@@ -2045,7 +2045,7 @@ static void ggml_backend_opencl_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
|
| 2045 |
ctx->sub_buffers.push_back(sub_buffer);
|
| 2046 |
tensor->extra = sub_buffer;
|
| 2047 |
}
|
| 2048 |
-
tensor->backend =
|
| 2049 |
}
|
| 2050 |
|
| 2051 |
static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
|
|
|
| 1354 |
}
|
| 1355 |
|
| 1356 |
void ggml_cl_free_data(const struct ggml_tensor* tensor) {
|
| 1357 |
+
if (tensor->backend != GGML_BACKEND_TYPE_GPU) {
|
| 1358 |
return;
|
| 1359 |
}
|
| 1360 |
|
|
|
|
| 1412 |
}
|
| 1413 |
|
| 1414 |
static void ggml_cl_mul_f32(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
| 1415 |
+
GGML_ASSERT(src1->backend == GGML_BACKEND_TYPE_GPU);
|
| 1416 |
const int64_t ne00 = src0->ne[0];
|
| 1417 |
const int64_t ne01 = src0->ne[1];
|
| 1418 |
const int64_t ne02 = src0->ne[2];
|
|
|
|
| 1476 |
}
|
| 1477 |
|
| 1478 |
static void ggml_cl_add_f32(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
| 1479 |
+
GGML_ASSERT(src1->backend == GGML_BACKEND_TYPE_GPU);
|
| 1480 |
const int64_t ne00 = src0->ne[0];
|
| 1481 |
const int64_t ne01 = src0->ne[1];
|
| 1482 |
const int64_t ne02 = src0->ne[2];
|
|
|
|
| 1566 |
size_t y_size;
|
| 1567 |
size_t d_size;
|
| 1568 |
cl_mem d_X;
|
| 1569 |
+
if (src0->backend == GGML_BACKEND_TYPE_GPU) { // NOLINT
|
| 1570 |
d_X = (cl_mem) src0->extra;
|
| 1571 |
} else {
|
| 1572 |
d_X = ggml_cl_pool_malloc(sizeof(float) * x_ne, &x_size);
|
| 1573 |
}
|
| 1574 |
+
cl_mem d_Y = src1->backend == GGML_BACKEND_TYPE_GPU ? (cl_mem) src1->extra : ggml_cl_pool_malloc(sizeof(float) * y_ne, &y_size);
|
| 1575 |
+
cl_mem d_D = dst->backend == GGML_BACKEND_TYPE_GPU ? (cl_mem) dst->extra : ggml_cl_pool_malloc(sizeof(float) * d_ne, &d_size);
|
| 1576 |
|
| 1577 |
size_t x_offset = 0;
|
| 1578 |
|
|
|
|
| 1580 |
// TODO: copy src0 here when r3>1
|
| 1581 |
for (int64_t i13 = i03 * r3, e13 = i13 + r3; i13 < e13; i13++) {
|
| 1582 |
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
| 1583 |
+
if (src0->backend == GGML_BACKEND_TYPE_GPU) {
|
| 1584 |
x_offset = (i03 * ne02 + i02) * x_ne;
|
| 1585 |
} else {
|
| 1586 |
// copy src0 to device
|
|
|
|
| 1589 |
|
| 1590 |
for (int64_t i12 = i02 * r2, e12 = i12 + r2; i12 < e12; i12++) {
|
| 1591 |
// copy src1 to device
|
| 1592 |
+
if (src1->backend == GGML_BACKEND_TYPE_CPU) {
|
| 1593 |
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_Y, 0, src1, i13, i12, NULL));
|
| 1594 |
}
|
| 1595 |
|
|
|
|
| 1612 |
}
|
| 1613 |
|
| 1614 |
// copy dst to host
|
| 1615 |
+
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
| 1616 |
float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
|
| 1617 |
CL_CHECK(clEnqueueReadBuffer(queue, d_D, true, 0, sizeof(float) * d_ne, d, 1, &ev_sgemm, NULL));
|
| 1618 |
}
|
|
|
|
| 1621 |
}
|
| 1622 |
}
|
| 1623 |
|
| 1624 |
+
if (src0->backend != GGML_BACKEND_TYPE_GPU) {
|
| 1625 |
ggml_cl_pool_free(d_X, x_size);
|
| 1626 |
}
|
| 1627 |
+
if (src1->backend != GGML_BACKEND_TYPE_GPU) {
|
| 1628 |
ggml_cl_pool_free(d_Y, y_size);
|
| 1629 |
}
|
| 1630 |
+
if (dst->backend != GGML_BACKEND_TYPE_GPU) {
|
| 1631 |
ggml_cl_pool_free(d_D, d_size);
|
| 1632 |
}
|
| 1633 |
}
|
|
|
|
| 1670 |
size_t y_size;
|
| 1671 |
size_t d_size;
|
| 1672 |
cl_mem d_X;
|
| 1673 |
+
if (src0->backend == GGML_BACKEND_TYPE_GPU) { // NOLINT
|
| 1674 |
d_X = (cl_mem) src0->extra;
|
| 1675 |
} else {
|
| 1676 |
d_X = ggml_cl_pool_malloc(sizeof(ggml_fp16_t) * x_ne, &x_size);
|
|
|
|
| 1687 |
// TODO: copy src0 here when r3>1
|
| 1688 |
for (int64_t i13 = i03 * r3, e13 = i13 + r3; i13 < e13; i13++) {
|
| 1689 |
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
| 1690 |
+
if (src0->backend == GGML_BACKEND_TYPE_GPU) {
|
| 1691 |
x_offset = (i03 * ne02 + i02) * x_ne;
|
| 1692 |
} else {
|
| 1693 |
// copy src0 to device
|
|
|
|
| 1741 |
}
|
| 1742 |
|
| 1743 |
// copy dst to host, then convert to float
|
| 1744 |
+
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
| 1745 |
CL_CHECK(clEnqueueReadBuffer(queue, d_D, true, 0, sizeof(ggml_fp16_t) * d_ne, tmp, 1, &ev_sgemm, NULL));
|
| 1746 |
float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
|
| 1747 |
ggml_fp16_to_fp32_row(tmp, d, d_ne);
|
|
|
|
| 1753 |
}
|
| 1754 |
}
|
| 1755 |
|
| 1756 |
+
if (src0->backend != GGML_BACKEND_TYPE_GPU) {
|
| 1757 |
ggml_cl_pool_free(d_X, x_size);
|
| 1758 |
}
|
| 1759 |
ggml_cl_pool_free(d_Y, y_size);
|
|
|
|
| 1798 |
cl_mem d_Y = ggml_cl_pool_malloc(sizeof(float) * y_ne, &y_size);
|
| 1799 |
cl_mem d_D = ggml_cl_pool_malloc(sizeof(float) * d_ne, &d_size);
|
| 1800 |
cl_mem d_Q;
|
| 1801 |
+
if (src0->backend == GGML_BACKEND_TYPE_CPU) {
|
| 1802 |
d_Q = ggml_cl_pool_malloc(q_sz, &q_size);
|
| 1803 |
}
|
| 1804 |
|
|
|
|
| 1817 |
for (int64_t i13 = i03 * r3, e13 = i13 + r3; i13 < e13; i13++) {
|
| 1818 |
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
| 1819 |
// copy src0 to device if necessary
|
| 1820 |
+
if (src0->backend == GGML_BACKEND_TYPE_CPU) {
|
| 1821 |
events.emplace_back();
|
| 1822 |
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_Q, 0, src0, i03, i02, events.data() + ev_idx++));
|
| 1823 |
+
} else if (src0->backend == GGML_BACKEND_TYPE_GPU) {
|
| 1824 |
d_Q = (cl_mem) src0->extra;
|
| 1825 |
} else {
|
| 1826 |
GGML_ASSERT(false);
|
|
|
|
| 1829 |
if (!mul_mat_vec) {
|
| 1830 |
// convert src0 to fp32 on device
|
| 1831 |
const size_t global = x_ne / global_denom;
|
| 1832 |
+
const size_t offset = src0->backend == GGML_BACKEND_TYPE_GPU ? (i03 * ne02 + i02) * x_bps : 0;
|
| 1833 |
CL_CHECK(clSetKernelArg(*to_fp32_cl, 0, sizeof(cl_mem), &d_Q));
|
| 1834 |
CL_CHECK(clSetKernelArg(*to_fp32_cl, 1, sizeof(cl_mem), &d_X));
|
| 1835 |
CL_CHECK(clEnqueueNDRangeKernel(queue, *to_fp32_cl, 1, &offset, &global, local > 0 ? &local : NULL, events.size(), !events.empty() ? events.data() : NULL, NULL));
|
|
|
|
| 1843 |
|
| 1844 |
// compute
|
| 1845 |
const size_t global = ne01 * local;
|
| 1846 |
+
const size_t offset = src0->backend == GGML_BACKEND_TYPE_GPU ? (i03 * ne02 + i02) * x_bps : 0;
|
| 1847 |
const cl_int ncols = ne00;
|
| 1848 |
events.emplace_back();
|
| 1849 |
CL_CHECK(clSetKernelArg(*dmmv, 0, sizeof(cl_mem), &d_Q));
|
|
|
|
| 1895 |
}
|
| 1896 |
ggml_cl_pool_free(d_Y, y_size);
|
| 1897 |
ggml_cl_pool_free(d_D, d_size);
|
| 1898 |
+
if (src0->backend == GGML_BACKEND_TYPE_CPU) {
|
| 1899 |
ggml_cl_pool_free(d_Q, q_size);
|
| 1900 |
}
|
| 1901 |
}
|
|
|
|
| 1911 |
if ((src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) &&
|
| 1912 |
src1->type == GGML_TYPE_F32 &&
|
| 1913 |
dst->type == GGML_TYPE_F32 &&
|
| 1914 |
+
((ne0 >= 32 && ne1 >= 32 && ne10 >= 32) || src0->backend == GGML_BACKEND_TYPE_GPU)) {
|
| 1915 |
return true;
|
| 1916 |
}
|
| 1917 |
|
|
|
|
| 1993 |
CL_CHECK(clFinish(queue));
|
| 1994 |
|
| 1995 |
tensor->extra = dst;
|
| 1996 |
+
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
| 1997 |
}
|
| 1998 |
|
| 1999 |
// ggml-backend
|
|
|
|
| 2045 |
ctx->sub_buffers.push_back(sub_buffer);
|
| 2046 |
tensor->extra = sub_buffer;
|
| 2047 |
}
|
| 2048 |
+
tensor->backend = GGML_BACKEND_TYPE_GPU;
|
| 2049 |
}
|
| 2050 |
|
| 2051 |
static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
ggml-sycl.cpp
CHANGED
|
@@ -3338,7 +3338,7 @@ void print_ggml_tensor(const char*name, struct ggml_tensor *src){
|
|
| 3338 |
|
| 3339 |
size_t total_elements = ggml_nelements(src);
|
| 3340 |
|
| 3341 |
-
const bool src_on_device = src->backend ==
|
| 3342 |
float *src_data =NULL;
|
| 3343 |
if(src_on_device) {
|
| 3344 |
ggml_tensor_extra_gpu * src_extra = (ggml_tensor_extra_gpu *) src->extra;
|
|
@@ -8086,11 +8086,11 @@ static void k_argsort_f32_i32(const float * x, int * dst, const int ncols,
|
|
| 8086 |
int ixj = col ^ j;
|
| 8087 |
if (ixj > col) {
|
| 8088 |
if ((col & k) == 0) {
|
| 8089 |
-
if (order ==
|
| 8090 |
swap(dst_row[col], dst_row[ixj]);
|
| 8091 |
}
|
| 8092 |
} else {
|
| 8093 |
-
if (order ==
|
| 8094 |
swap(dst_row[col], dst_row[ixj]);
|
| 8095 |
}
|
| 8096 |
}
|
|
@@ -10825,7 +10825,7 @@ static void argsort_f32_i32_sycl(const float *x, int *dst, const int ncols,
|
|
| 10825 |
|
| 10826 |
const sycl::range<3> block_dims(1, 1, ncols);
|
| 10827 |
const sycl::range<3> block_nums(1, nrows, 1);
|
| 10828 |
-
if (order ==
|
| 10829 |
/*
|
| 10830 |
DPCT1049:44: The work-group size passed to the SYCL kernel may exceed
|
| 10831 |
the limit. To get the device limit, query
|
|
@@ -10834,9 +10834,9 @@ static void argsort_f32_i32_sycl(const float *x, int *dst, const int ncols,
|
|
| 10834 |
stream->parallel_for(
|
| 10835 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
| 10836 |
[=](sycl::nd_item<3> item_ct1) {
|
| 10837 |
-
k_argsort_f32_i32<
|
| 10838 |
});
|
| 10839 |
-
} else if (order ==
|
| 10840 |
/*
|
| 10841 |
DPCT1049:45: The work-group size passed to the SYCL kernel may exceed
|
| 10842 |
the limit. To get the device limit, query
|
|
@@ -10845,7 +10845,7 @@ static void argsort_f32_i32_sycl(const float *x, int *dst, const int ncols,
|
|
| 10845 |
stream->parallel_for(
|
| 10846 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
| 10847 |
[=](sycl::nd_item<3> item_ct1) {
|
| 10848 |
-
k_argsort_f32_i32<
|
| 10849 |
});
|
| 10850 |
} else {
|
| 10851 |
GGML_ASSERT(false);
|
|
@@ -11407,12 +11407,12 @@ static dpct::err0 ggml_sycl_cpy_tensor_2d(void *dst,
|
|
| 11407 |
|
| 11408 |
dpct::memcpy_direction kind;
|
| 11409 |
char * src_ptr;
|
| 11410 |
-
if (src->backend ==
|
| 11411 |
kind = dpct::host_to_device;
|
| 11412 |
src_ptr = (char *) src->data;
|
| 11413 |
-
// GGML_SYCL_DEBUG("ggml_sycl_cpy_tensor_2d
|
| 11414 |
-
} else if (src->backend ==
|
| 11415 |
-
GGML_ASSERT(src->backend !=
|
| 11416 |
kind = dpct::device_to_device;
|
| 11417 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra;
|
| 11418 |
int id;
|
|
@@ -11846,7 +11846,7 @@ inline void ggml_sycl_op_mul_mat_q(
|
|
| 11846 |
|
| 11847 |
// the main device has a larger memory buffer to hold the results from all GPUs
|
| 11848 |
// nrows_dst == nrows of the matrix that the dequantize_mul_mat kernel writes into
|
| 11849 |
-
const int64_t nrows_dst = dst->backend ==
|
| 11850 |
|
| 11851 |
switch (src0->type) {
|
| 11852 |
case GGML_TYPE_Q4_0:
|
|
@@ -12119,7 +12119,7 @@ inline void ggml_sycl_op_mul_mat_sycl(
|
|
| 12119 |
|
| 12120 |
// the main device has a larger memory buffer to hold the results from all GPUs
|
| 12121 |
// ldc == nrows of the matrix that cuBLAS writes into
|
| 12122 |
-
int ldc = dst->backend ==
|
| 12123 |
|
| 12124 |
#ifdef GGML_SYCL_F16
|
| 12125 |
bool use_fp16 = true; // TODO(Yu) SYCL capability check
|
|
@@ -12501,16 +12501,16 @@ static void ggml_sycl_op_flatten(const ggml_tensor *src0,
|
|
| 12501 |
const bool use_src1 = src1 != nullptr;
|
| 12502 |
const int64_t nrows1 = use_src1 ? ggml_nrows(src1) : 1;
|
| 12503 |
|
| 12504 |
-
GGML_ASSERT(!use_src1 || src1->backend !=
|
| 12505 |
-
GGML_ASSERT( dst->backend !=
|
| 12506 |
|
| 12507 |
ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu *) src0->extra;
|
| 12508 |
ggml_tensor_extra_gpu * src1_extra = use_src1 ? (ggml_tensor_extra_gpu *) src1->extra : nullptr;
|
| 12509 |
ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra;
|
| 12510 |
|
| 12511 |
-
const bool src0_on_device = src0->backend ==
|
| 12512 |
-
const bool src1_on_device = use_src1 && src1->backend ==
|
| 12513 |
-
const bool dst_on_device = dst->backend ==
|
| 12514 |
|
| 12515 |
// dd = data device
|
| 12516 |
float * src0_ddf = nullptr;
|
|
@@ -12565,7 +12565,7 @@ static void ggml_sycl_op_flatten(const ggml_tensor *src0,
|
|
| 12565 |
main_stream->memcpy(dst->data, dst_ddf, ggml_nbytes(dst))));
|
| 12566 |
}
|
| 12567 |
|
| 12568 |
-
if (dst->backend ==
|
| 12569 |
SYCL_CHECK(CHECK_TRY_ERROR(
|
| 12570 |
dpct::get_current_device().queues_wait_and_throw()));
|
| 12571 |
}
|
|
@@ -12640,8 +12640,8 @@ static void ggml_sycl_op_mul_mat(const ggml_tensor *src0,
|
|
| 12640 |
const int nb2 = dst->nb[2];
|
| 12641 |
const int nb3 = dst->nb[3];
|
| 12642 |
|
| 12643 |
-
GGML_ASSERT(dst->backend !=
|
| 12644 |
-
GGML_ASSERT(src1->backend !=
|
| 12645 |
|
| 12646 |
GGML_ASSERT(ne12 >= ne02 && ne12 % ne02 == 0);
|
| 12647 |
|
|
@@ -12656,13 +12656,13 @@ static void ggml_sycl_op_mul_mat(const ggml_tensor *src0,
|
|
| 12656 |
ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu *) src1->extra;
|
| 12657 |
ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra;
|
| 12658 |
|
| 12659 |
-
const bool src0_on_device = src0->backend ==
|
| 12660 |
const bool src0_is_contiguous = ggml_is_contiguous(src0);
|
| 12661 |
const bool src1_is_contiguous = ggml_is_contiguous(src1);
|
| 12662 |
|
| 12663 |
int64_t src1_padded_col_size = GGML_PAD(ne10, MATRIX_ROW_PADDING);
|
| 12664 |
|
| 12665 |
-
const bool split = src0->backend ==
|
| 12666 |
GGML_ASSERT(!(split && ne02 > 1));
|
| 12667 |
GGML_ASSERT(!(split && ne03 > 1));
|
| 12668 |
GGML_ASSERT(!(split && ne02 < ne12));
|
|
@@ -12717,8 +12717,8 @@ static void ggml_sycl_op_mul_mat(const ggml_tensor *src0,
|
|
| 12717 |
|
| 12718 |
used_devices++;
|
| 12719 |
|
| 12720 |
-
const bool src1_on_device = src1->backend ==
|
| 12721 |
-
const bool dst_on_device = dst->backend ==
|
| 12722 |
|
| 12723 |
ggml_sycl_set_device(get_device_id_by_index(id));
|
| 12724 |
const dpct::queue_ptr stream = g_syclStreams[id][0];
|
|
@@ -12782,8 +12782,8 @@ static void ggml_sycl_op_mul_mat(const ggml_tensor *src0,
|
|
| 12782 |
continue;
|
| 12783 |
}
|
| 12784 |
|
| 12785 |
-
const bool src1_on_device = src1->backend ==
|
| 12786 |
-
const bool dst_on_device = dst->backend ==
|
| 12787 |
const int64_t row_diff = row_high[id] - row_low[id];
|
| 12788 |
|
| 12789 |
ggml_sycl_set_device(get_device_id_by_index(id));
|
|
@@ -12809,12 +12809,12 @@ static void ggml_sycl_op_mul_mat(const ggml_tensor *src0,
|
|
| 12809 |
|
| 12810 |
// the main device memory buffer can be on VRAM scratch, with space for all partial results
|
| 12811 |
// in that case an offset on dst_ddf_i is needed
|
| 12812 |
-
if (dst->backend ==
|
| 12813 |
dst_dd_i += row_low[id]; // offset is 0 if no tensor split
|
| 12814 |
}
|
| 12815 |
|
| 12816 |
// copy src0, src1 to device if necessary
|
| 12817 |
-
if (src1->backend ==
|
| 12818 |
if (id != g_main_device_index) {
|
| 12819 |
if (convert_src1_to_q8_1) {
|
| 12820 |
char * src1_ddq_i_source = src1_ddq[g_main_device_index] + src1_ddq_i_offset;
|
|
@@ -12830,14 +12830,14 @@ static void ggml_sycl_op_mul_mat(const ggml_tensor *src0,
|
|
| 12830 |
src1_ncols * ne10 * sizeof(float))));
|
| 12831 |
}
|
| 12832 |
}
|
| 12833 |
-
} else if (src1->backend ==
|
| 12834 |
SYCL_CHECK(ggml_sycl_cpy_tensor_2d(
|
| 12835 |
src1_ddf_i, src1, i03, i02, src1_col_0, src1_col_0+src1_ncols, stream));
|
| 12836 |
} else {
|
| 12837 |
GGML_ASSERT(false);
|
| 12838 |
}
|
| 12839 |
|
| 12840 |
-
if (convert_src1_to_q8_1 && (src1->backend ==
|
| 12841 |
quantize_row_q8_1_sycl(src1_ddf_i, src1_ddq_i, ne10, src1_ncols, src1_padded_col_size, stream);
|
| 12842 |
/*
|
| 12843 |
DPCT1010:92: SYCL uses exceptions to report errors and does
|
|
@@ -12867,10 +12867,10 @@ static void ggml_sycl_op_mul_mat(const ggml_tensor *src0,
|
|
| 12867 |
if (!dst_on_device) {
|
| 12868 |
void * dst_off_device;
|
| 12869 |
dpct::memcpy_direction kind;
|
| 12870 |
-
if (dst->backend ==
|
| 12871 |
dst_off_device = dst->data;
|
| 12872 |
kind = dpct::device_to_host;
|
| 12873 |
-
} else if (dst->backend ==
|
| 12874 |
dst_off_device = dst_extra->data_device[g_main_device_index];
|
| 12875 |
kind = dpct::device_to_device;
|
| 12876 |
} else {
|
|
@@ -12954,7 +12954,7 @@ static void ggml_sycl_op_mul_mat(const ggml_tensor *src0,
|
|
| 12954 |
}
|
| 12955 |
}
|
| 12956 |
|
| 12957 |
-
if (dst->backend ==
|
| 12958 |
SYCL_CHECK(ggml_sycl_set_device(g_main_device));
|
| 12959 |
SYCL_CHECK(CHECK_TRY_ERROR(
|
| 12960 |
dpct::get_current_device().queues_wait_and_throw()));
|
|
@@ -13091,7 +13091,7 @@ static void ggml_sycl_mul_mat_vec_p021(const ggml_tensor *src0,
|
|
| 13091 |
const ggml_tensor *src1,
|
| 13092 |
ggml_tensor *dst) try {
|
| 13093 |
GGML_ASSERT(ggml_is_permuted(src0) && ggml_is_permuted(src1));
|
| 13094 |
-
GGML_ASSERT(src0->backend !=
|
| 13095 |
GGML_ASSERT(src0->nb[0] <= src0->nb[1] && src0->nb[2] <= src0->nb[3]); // 0213 permutation
|
| 13096 |
GGML_ASSERT(src1->nb[0] <= src1->nb[1] && src1->nb[2] <= src1->nb[3]); // 0213 permutation
|
| 13097 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
|
@@ -13129,7 +13129,7 @@ static void ggml_sycl_mul_mat_vec_nc(const ggml_tensor *src0,
|
|
| 13129 |
GGML_ASSERT(!ggml_is_transposed(src0));
|
| 13130 |
GGML_ASSERT(!ggml_is_transposed(src1));
|
| 13131 |
GGML_ASSERT(!ggml_is_permuted(src0));
|
| 13132 |
-
GGML_ASSERT(src0->backend !=
|
| 13133 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
| 13134 |
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
| 13135 |
|
|
@@ -13196,7 +13196,7 @@ static void ggml_sycl_mul_mat_mat_batched_sycl(const ggml_tensor *src0,
|
|
| 13196 |
GGML_ASSERT(!ggml_is_transposed(src0));
|
| 13197 |
GGML_ASSERT(!ggml_is_transposed(src1));
|
| 13198 |
|
| 13199 |
-
GGML_ASSERT(src0->backend !=
|
| 13200 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
| 13201 |
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
| 13202 |
|
|
@@ -13372,11 +13372,11 @@ catch (sycl::exception const &exc) {
|
|
| 13372 |
|
| 13373 |
static void ggml_sycl_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
| 13374 |
const bool all_on_device =
|
| 13375 |
-
(src0->backend ==
|
| 13376 |
-
(src1->backend ==
|
| 13377 |
-
( dst->backend ==
|
| 13378 |
|
| 13379 |
-
const bool split = src0->backend ==
|
| 13380 |
|
| 13381 |
int64_t min_compute_capability = INT_MAX;
|
| 13382 |
for (int64_t id = 0; id < g_device_count; ++id) {
|
|
@@ -13505,7 +13505,7 @@ static void ggml_sycl_mul_mat_id_sycl(ggml_tensor * dst) {
|
|
| 13505 |
GGML_ASSERT(!ggml_is_transposed(src00));
|
| 13506 |
GGML_ASSERT(!ggml_is_transposed(src1));
|
| 13507 |
|
| 13508 |
-
GGML_ASSERT(src00->backend !=
|
| 13509 |
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
| 13510 |
|
| 13511 |
GGML_TENSOR_LOCALS(int64_t, ne0, src00, ne);
|
|
@@ -13643,7 +13643,7 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
|
|
| 13643 |
|
| 13644 |
const dpct::queue_ptr stream = g_syclStreams[g_main_device_index][0];
|
| 13645 |
|
| 13646 |
-
if (ids->backend ==
|
| 13647 |
const char * ids_dev = (const char *)((const ggml_tensor_extra_gpu *)ids->extra)->data_device[g_main_device_index];
|
| 13648 |
SYCL_CHECK(CHECK_TRY_ERROR(
|
| 13649 |
stream->memcpy(ids_host.data(), ids_dev, ggml_nbytes(ids))));
|
|
@@ -13661,20 +13661,20 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
|
|
| 13661 |
ggml_tensor src1_row = *src1;
|
| 13662 |
ggml_tensor dst_row = *dst;
|
| 13663 |
|
| 13664 |
-
src1_row.backend =
|
| 13665 |
-
dst_row.backend =
|
| 13666 |
|
| 13667 |
src1_row.extra = &src1_row_extra;
|
| 13668 |
dst_row.extra = &dst_row_extra;
|
| 13669 |
|
| 13670 |
-
char * src1_original = src1->backend ==
|
| 13671 |
(char *) src1->data : (char *) src1_extra->data_device[g_main_device_index];
|
| 13672 |
-
char * dst_original = dst->backend ==
|
| 13673 |
(char *) dst->data : (char *) dst_extra->data_device[g_main_device_index];
|
| 13674 |
|
| 13675 |
if (src1->ne[1] == 1) {
|
| 13676 |
-
GGML_ASSERT(src1->backend ==
|
| 13677 |
-
GGML_ASSERT(dst->backend ==
|
| 13678 |
|
| 13679 |
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
|
| 13680 |
//int32_t row_id;
|
|
@@ -13756,7 +13756,7 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
|
|
| 13756 |
}
|
| 13757 |
}
|
| 13758 |
|
| 13759 |
-
if (dst->backend ==
|
| 13760 |
SYCL_CHECK(CHECK_TRY_ERROR(stream->wait()));
|
| 13761 |
}
|
| 13762 |
}
|
|
@@ -13779,8 +13779,8 @@ static void ggml_sycl_cpy(const ggml_tensor *src0, const ggml_tensor *src1,
|
|
| 13779 |
const int64_t ne = ggml_nelements(src0);
|
| 13780 |
GGML_ASSERT(ne == ggml_nelements(src1));
|
| 13781 |
|
| 13782 |
-
GGML_ASSERT(src0->backend ==
|
| 13783 |
-
GGML_ASSERT(src1->backend ==
|
| 13784 |
|
| 13785 |
GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX);
|
| 13786 |
GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX);
|
|
@@ -13887,17 +13887,17 @@ void ggml_sycl_transform_tensor(void *data, struct ggml_tensor *tensor) try {
|
|
| 13887 |
memset(extra, 0, sizeof(*extra));
|
| 13888 |
|
| 13889 |
for (int64_t id = 0; id < g_device_count; ++id) {
|
| 13890 |
-
if (backend ==
|
| 13891 |
continue;
|
| 13892 |
}
|
| 13893 |
ggml_sycl_set_device(get_device_id_by_index(id));
|
| 13894 |
const dpct::queue_ptr stream = g_syclStreams[id][0];
|
| 13895 |
|
| 13896 |
int64_t row_low, row_high;
|
| 13897 |
-
if (backend ==
|
| 13898 |
row_low = 0;
|
| 13899 |
row_high = nrows;
|
| 13900 |
-
} else if (backend ==
|
| 13901 |
const int64_t rounding = get_row_rounding(tensor->type);
|
| 13902 |
|
| 13903 |
row_low = id == 0 ? 0 : nrows*g_tensor_split[id];
|
|
@@ -13946,7 +13946,7 @@ void ggml_sycl_transform_tensor(void *data, struct ggml_tensor *tensor) try {
|
|
| 13946 |
|
| 13947 |
extra->data_device[id] = buf;
|
| 13948 |
|
| 13949 |
-
if (backend ==
|
| 13950 |
for (int64_t is = 0; is < MAX_STREAMS; ++is) {
|
| 13951 |
SYCL_CHECK(CHECK_TRY_ERROR(extra->events[id][is] =
|
| 13952 |
new sycl::event()));
|
|
@@ -13963,7 +13963,7 @@ catch (sycl::exception const &exc) {
|
|
| 13963 |
}
|
| 13964 |
|
| 13965 |
void ggml_sycl_free_data(struct ggml_tensor *tensor) try {
|
| 13966 |
-
if (!tensor || !tensor->extra || (tensor->backend !=
|
| 13967 |
return;
|
| 13968 |
}
|
| 13969 |
|
|
@@ -14016,15 +14016,15 @@ static void ggml_sycl_assign_buffers_impl(struct ggml_tensor *tensor,
|
|
| 14016 |
return;
|
| 14017 |
}
|
| 14018 |
|
| 14019 |
-
tensor->backend =
|
| 14020 |
|
| 14021 |
-
if (tensor->src[0] != nullptr && tensor->src[0]->backend ==
|
| 14022 |
const ggml_op src0_op = tensor->src[0]->op;
|
| 14023 |
if (src0_op == GGML_OP_RESHAPE || src0_op == GGML_OP_TRANSPOSE || src0_op == GGML_OP_VIEW || src0_op == GGML_OP_PERMUTE) {
|
| 14024 |
ggml_sycl_assign_buffers_impl(tensor->src[0], scratch, force_inplace, no_alloc);
|
| 14025 |
}
|
| 14026 |
}
|
| 14027 |
-
if (tensor->op == GGML_OP_CPY && tensor->src[1]->backend ==
|
| 14028 |
ggml_sycl_assign_buffers_impl(tensor->src[1], scratch, force_inplace, no_alloc);
|
| 14029 |
}
|
| 14030 |
|
|
@@ -14042,7 +14042,7 @@ static void ggml_sycl_assign_buffers_impl(struct ggml_tensor *tensor,
|
|
| 14042 |
SYCL_CHECK(ggml_sycl_set_device(g_main_device));
|
| 14043 |
const dpct::queue_ptr stream = g_syclStreams[g_main_device_index][0];
|
| 14044 |
|
| 14045 |
-
if (inplace && (tensor->src[0]->backend ==
|
| 14046 |
ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu * ) tensor->src[0]->extra;
|
| 14047 |
char * src0_ddc = (char *) src0_extra->data_device[g_main_device_index];
|
| 14048 |
size_t offset = 0;
|
|
@@ -14111,7 +14111,7 @@ void ggml_sycl_assign_scratch_offset(struct ggml_tensor *tensor,
|
|
| 14111 |
|
| 14112 |
const bool inplace = tensor->view_src != nullptr;
|
| 14113 |
|
| 14114 |
-
if (inplace && (tensor->view_src->backend ==
|
| 14115 |
ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu * ) tensor->view_src->extra;
|
| 14116 |
char * src0_ddc = (char *) src0_extra->data_device[g_main_device_index];
|
| 14117 |
size_t view_offset = 0;
|
|
@@ -14132,7 +14132,7 @@ catch (sycl::exception const &exc) {
|
|
| 14132 |
}
|
| 14133 |
|
| 14134 |
void ggml_sycl_copy_to_device(struct ggml_tensor *tensor) try {
|
| 14135 |
-
GGML_ASSERT(tensor->backend ==
|
| 14136 |
GGML_ASSERT(ggml_is_contiguous(tensor));
|
| 14137 |
|
| 14138 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
|
@@ -14219,9 +14219,9 @@ bool ggml_sycl_compute_forward(struct ggml_compute_params * params, struct ggml_
|
|
| 14219 |
if (!g_sycl_loaded) return false;
|
| 14220 |
|
| 14221 |
ggml_sycl_func_t func;
|
| 14222 |
-
const bool any_on_device = tensor->backend ==
|
| 14223 |
-
|| (tensor->src[0] != nullptr && (tensor->src[0]->backend ==
|
| 14224 |
-
|| (tensor->src[1] != nullptr && tensor->src[1]->backend ==
|
| 14225 |
|
| 14226 |
if (!any_on_device && tensor->op != GGML_OP_MUL_MAT && tensor->op != GGML_OP_MUL_MAT_ID) {
|
| 14227 |
return false;
|
|
@@ -14359,14 +14359,14 @@ bool ggml_sycl_compute_forward(struct ggml_compute_params * params, struct ggml_
|
|
| 14359 |
return false;
|
| 14360 |
}
|
| 14361 |
|
| 14362 |
-
if (tensor->src[0] != nullptr && tensor->src[0]->backend ==
|
| 14363 |
ggml_sycl_set_peer_access(tensor->src[1]->ne[1]);
|
| 14364 |
}
|
| 14365 |
|
| 14366 |
if (params->ith != 0) {
|
| 14367 |
return true;
|
| 14368 |
}
|
| 14369 |
-
if (params->type ==
|
| 14370 |
return true;
|
| 14371 |
}
|
| 14372 |
func(tensor->src[0], tensor->src[1], tensor);
|
|
@@ -14517,7 +14517,7 @@ static void ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
|
| 14517 |
|
| 14518 |
extra->data_device[ctx->device] = tensor->data;
|
| 14519 |
|
| 14520 |
-
tensor->backend =
|
| 14521 |
tensor->extra = extra;
|
| 14522 |
|
| 14523 |
if (ggml_is_quantized(tensor->type)) {
|
|
@@ -14548,7 +14548,7 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer,
|
|
| 14548 |
ggml_tensor *tensor,
|
| 14549 |
const void *data, size_t offset,
|
| 14550 |
size_t size) try {
|
| 14551 |
-
GGML_ASSERT(tensor->backend ==
|
| 14552 |
|
| 14553 |
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
|
| 14554 |
|
|
@@ -14573,7 +14573,7 @@ static void ggml_backend_sycl_buffer_get_tensor(ggml_backend_buffer_t buffer,
|
|
| 14573 |
const ggml_tensor *tensor,
|
| 14574 |
void *data, size_t offset,
|
| 14575 |
size_t size) try {
|
| 14576 |
-
GGML_ASSERT(tensor->backend ==
|
| 14577 |
|
| 14578 |
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
|
| 14579 |
|
|
@@ -14809,7 +14809,7 @@ static void ggml_backend_sycl_set_tensor_async(ggml_backend_t backend,
|
|
| 14809 |
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
| 14810 |
|
| 14811 |
GGML_ASSERT(tensor->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) && "unsupported buffer type");
|
| 14812 |
-
GGML_ASSERT(tensor->backend ==
|
| 14813 |
|
| 14814 |
SYCL_CHECK(CHECK_TRY_ERROR(g_syclStreams[sycl_ctx->device][0]->memcpy(
|
| 14815 |
(char *)tensor->data + offset, data, size)));
|
|
@@ -14827,7 +14827,7 @@ static void ggml_backend_sycl_get_tensor_async(ggml_backend_t backend,
|
|
| 14827 |
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
| 14828 |
|
| 14829 |
GGML_ASSERT(tensor->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) && "unsupported buffer type");
|
| 14830 |
-
GGML_ASSERT(tensor->backend ==
|
| 14831 |
|
| 14832 |
SYCL_CHECK(CHECK_TRY_ERROR(g_syclStreams[sycl_ctx->device][0]->memcpy(
|
| 14833 |
data, (const char *)tensor->data + offset, size)));
|
|
@@ -14880,7 +14880,7 @@ static bool ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_cgraph
|
|
| 14880 |
ggml_sycl_set_main_device(sycl_ctx->device);
|
| 14881 |
|
| 14882 |
ggml_compute_params params = {};
|
| 14883 |
-
params.type =
|
| 14884 |
params.ith = 0;
|
| 14885 |
for (int i = 0; i < cgraph->n_nodes; i++) {
|
| 14886 |
ggml_tensor * node = cgraph->nodes[i];
|
|
@@ -14888,13 +14888,13 @@ static bool ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_cgraph
|
|
| 14888 |
if (node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE)
|
| 14889 |
continue;
|
| 14890 |
|
| 14891 |
-
assert(node->backend ==
|
| 14892 |
assert(node->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device));
|
| 14893 |
assert(node->extra != nullptr);
|
| 14894 |
|
| 14895 |
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
| 14896 |
if (node->src[j] != nullptr) {
|
| 14897 |
-
assert(node->src[j]->backend ==
|
| 14898 |
assert(node->src[j]->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device));
|
| 14899 |
assert(node->src[j]->extra != nullptr);
|
| 14900 |
}
|
|
|
|
| 3338 |
|
| 3339 |
size_t total_elements = ggml_nelements(src);
|
| 3340 |
|
| 3341 |
+
const bool src_on_device = src->backend == GGML_BACKEND_TYPE_GPU || src->backend == GGML_BACKEND_TYPE_GPU_SPLIT;
|
| 3342 |
float *src_data =NULL;
|
| 3343 |
if(src_on_device) {
|
| 3344 |
ggml_tensor_extra_gpu * src_extra = (ggml_tensor_extra_gpu *) src->extra;
|
|
|
|
| 8086 |
int ixj = col ^ j;
|
| 8087 |
if (ixj > col) {
|
| 8088 |
if ((col & k) == 0) {
|
| 8089 |
+
if (order == GGML_SORT_ORDER_ASC ? x_row[dst_row[col]] > x_row[dst_row[ixj]] : x_row[dst_row[col]] < x_row[dst_row[ixj]]) {
|
| 8090 |
swap(dst_row[col], dst_row[ixj]);
|
| 8091 |
}
|
| 8092 |
} else {
|
| 8093 |
+
if (order == GGML_SORT_ORDER_ASC ? x_row[dst_row[col]] < x_row[dst_row[ixj]] : x_row[dst_row[col]] > x_row[dst_row[ixj]]) {
|
| 8094 |
swap(dst_row[col], dst_row[ixj]);
|
| 8095 |
}
|
| 8096 |
}
|
|
|
|
| 10825 |
|
| 10826 |
const sycl::range<3> block_dims(1, 1, ncols);
|
| 10827 |
const sycl::range<3> block_nums(1, nrows, 1);
|
| 10828 |
+
if (order == GGML_SORT_ORDER_ASC) {
|
| 10829 |
/*
|
| 10830 |
DPCT1049:44: The work-group size passed to the SYCL kernel may exceed
|
| 10831 |
the limit. To get the device limit, query
|
|
|
|
| 10834 |
stream->parallel_for(
|
| 10835 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
| 10836 |
[=](sycl::nd_item<3> item_ct1) {
|
| 10837 |
+
k_argsort_f32_i32<GGML_SORT_ORDER_ASC>(x, dst, ncols, item_ct1);
|
| 10838 |
});
|
| 10839 |
+
} else if (order == GGML_SORT_ORDER_DESC) {
|
| 10840 |
/*
|
| 10841 |
DPCT1049:45: The work-group size passed to the SYCL kernel may exceed
|
| 10842 |
the limit. To get the device limit, query
|
|
|
|
| 10845 |
stream->parallel_for(
|
| 10846 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
| 10847 |
[=](sycl::nd_item<3> item_ct1) {
|
| 10848 |
+
k_argsort_f32_i32<GGML_SORT_ORDER_DESC>(x, dst, ncols, item_ct1);
|
| 10849 |
});
|
| 10850 |
} else {
|
| 10851 |
GGML_ASSERT(false);
|
|
|
|
| 11407 |
|
| 11408 |
dpct::memcpy_direction kind;
|
| 11409 |
char * src_ptr;
|
| 11410 |
+
if (src->backend == GGML_BACKEND_TYPE_CPU) {
|
| 11411 |
kind = dpct::host_to_device;
|
| 11412 |
src_ptr = (char *) src->data;
|
| 11413 |
+
// GGML_SYCL_DEBUG("ggml_sycl_cpy_tensor_2d GGML_BACKEND_TYPE_CPU src_ptr %p\n", src_ptr);
|
| 11414 |
+
} else if (src->backend == GGML_BACKEND_TYPE_GPU || src->backend == GGML_BACKEND_TYPE_GPU_SPLIT) {
|
| 11415 |
+
GGML_ASSERT(src->backend != GGML_BACKEND_TYPE_GPU_SPLIT || (i1_low == 0 && i1_high == src->ne[1]));
|
| 11416 |
kind = dpct::device_to_device;
|
| 11417 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra;
|
| 11418 |
int id;
|
|
|
|
| 11846 |
|
| 11847 |
// the main device has a larger memory buffer to hold the results from all GPUs
|
| 11848 |
// nrows_dst == nrows of the matrix that the dequantize_mul_mat kernel writes into
|
| 11849 |
+
const int64_t nrows_dst = dst->backend == GGML_BACKEND_TYPE_GPU && device_id == g_main_device ? ne0 : row_diff;
|
| 11850 |
|
| 11851 |
switch (src0->type) {
|
| 11852 |
case GGML_TYPE_Q4_0:
|
|
|
|
| 12119 |
|
| 12120 |
// the main device has a larger memory buffer to hold the results from all GPUs
|
| 12121 |
// ldc == nrows of the matrix that cuBLAS writes into
|
| 12122 |
+
int ldc = dst->backend == GGML_BACKEND_TYPE_GPU && device_id == g_main_device ? ne0 : row_diff;
|
| 12123 |
|
| 12124 |
#ifdef GGML_SYCL_F16
|
| 12125 |
bool use_fp16 = true; // TODO(Yu) SYCL capability check
|
|
|
|
| 12501 |
const bool use_src1 = src1 != nullptr;
|
| 12502 |
const int64_t nrows1 = use_src1 ? ggml_nrows(src1) : 1;
|
| 12503 |
|
| 12504 |
+
GGML_ASSERT(!use_src1 || src1->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
|
| 12505 |
+
GGML_ASSERT( dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
|
| 12506 |
|
| 12507 |
ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu *) src0->extra;
|
| 12508 |
ggml_tensor_extra_gpu * src1_extra = use_src1 ? (ggml_tensor_extra_gpu *) src1->extra : nullptr;
|
| 12509 |
ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra;
|
| 12510 |
|
| 12511 |
+
const bool src0_on_device = src0->backend == GGML_BACKEND_TYPE_GPU || src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT;
|
| 12512 |
+
const bool src1_on_device = use_src1 && src1->backend == GGML_BACKEND_TYPE_GPU;
|
| 12513 |
+
const bool dst_on_device = dst->backend == GGML_BACKEND_TYPE_GPU;
|
| 12514 |
|
| 12515 |
// dd = data device
|
| 12516 |
float * src0_ddf = nullptr;
|
|
|
|
| 12565 |
main_stream->memcpy(dst->data, dst_ddf, ggml_nbytes(dst))));
|
| 12566 |
}
|
| 12567 |
|
| 12568 |
+
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
| 12569 |
SYCL_CHECK(CHECK_TRY_ERROR(
|
| 12570 |
dpct::get_current_device().queues_wait_and_throw()));
|
| 12571 |
}
|
|
|
|
| 12640 |
const int nb2 = dst->nb[2];
|
| 12641 |
const int nb3 = dst->nb[3];
|
| 12642 |
|
| 12643 |
+
GGML_ASSERT(dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
|
| 12644 |
+
GGML_ASSERT(src1->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
|
| 12645 |
|
| 12646 |
GGML_ASSERT(ne12 >= ne02 && ne12 % ne02 == 0);
|
| 12647 |
|
|
|
|
| 12656 |
ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu *) src1->extra;
|
| 12657 |
ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra;
|
| 12658 |
|
| 12659 |
+
const bool src0_on_device = src0->backend == GGML_BACKEND_TYPE_GPU || src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT;
|
| 12660 |
const bool src0_is_contiguous = ggml_is_contiguous(src0);
|
| 12661 |
const bool src1_is_contiguous = ggml_is_contiguous(src1);
|
| 12662 |
|
| 12663 |
int64_t src1_padded_col_size = GGML_PAD(ne10, MATRIX_ROW_PADDING);
|
| 12664 |
|
| 12665 |
+
const bool split = src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT;
|
| 12666 |
GGML_ASSERT(!(split && ne02 > 1));
|
| 12667 |
GGML_ASSERT(!(split && ne03 > 1));
|
| 12668 |
GGML_ASSERT(!(split && ne02 < ne12));
|
|
|
|
| 12717 |
|
| 12718 |
used_devices++;
|
| 12719 |
|
| 12720 |
+
const bool src1_on_device = src1->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device_index;
|
| 12721 |
+
const bool dst_on_device = dst->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device_index;
|
| 12722 |
|
| 12723 |
ggml_sycl_set_device(get_device_id_by_index(id));
|
| 12724 |
const dpct::queue_ptr stream = g_syclStreams[id][0];
|
|
|
|
| 12782 |
continue;
|
| 12783 |
}
|
| 12784 |
|
| 12785 |
+
const bool src1_on_device = src1->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device_index;
|
| 12786 |
+
const bool dst_on_device = dst->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device_index;
|
| 12787 |
const int64_t row_diff = row_high[id] - row_low[id];
|
| 12788 |
|
| 12789 |
ggml_sycl_set_device(get_device_id_by_index(id));
|
|
|
|
| 12809 |
|
| 12810 |
// the main device memory buffer can be on VRAM scratch, with space for all partial results
|
| 12811 |
// in that case an offset on dst_ddf_i is needed
|
| 12812 |
+
if (dst->backend == GGML_BACKEND_TYPE_GPU && id == g_main_device_index) {
|
| 12813 |
dst_dd_i += row_low[id]; // offset is 0 if no tensor split
|
| 12814 |
}
|
| 12815 |
|
| 12816 |
// copy src0, src1 to device if necessary
|
| 12817 |
+
if (src1->backend == GGML_BACKEND_TYPE_GPU && src1_is_contiguous) {
|
| 12818 |
if (id != g_main_device_index) {
|
| 12819 |
if (convert_src1_to_q8_1) {
|
| 12820 |
char * src1_ddq_i_source = src1_ddq[g_main_device_index] + src1_ddq_i_offset;
|
|
|
|
| 12830 |
src1_ncols * ne10 * sizeof(float))));
|
| 12831 |
}
|
| 12832 |
}
|
| 12833 |
+
} else if (src1->backend == GGML_BACKEND_TYPE_CPU || (src1_on_device && !src1_is_contiguous)) {
|
| 12834 |
SYCL_CHECK(ggml_sycl_cpy_tensor_2d(
|
| 12835 |
src1_ddf_i, src1, i03, i02, src1_col_0, src1_col_0+src1_ncols, stream));
|
| 12836 |
} else {
|
| 12837 |
GGML_ASSERT(false);
|
| 12838 |
}
|
| 12839 |
|
| 12840 |
+
if (convert_src1_to_q8_1 && (src1->backend == GGML_BACKEND_TYPE_CPU || !src1_is_contiguous)) {
|
| 12841 |
quantize_row_q8_1_sycl(src1_ddf_i, src1_ddq_i, ne10, src1_ncols, src1_padded_col_size, stream);
|
| 12842 |
/*
|
| 12843 |
DPCT1010:92: SYCL uses exceptions to report errors and does
|
|
|
|
| 12867 |
if (!dst_on_device) {
|
| 12868 |
void * dst_off_device;
|
| 12869 |
dpct::memcpy_direction kind;
|
| 12870 |
+
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
| 12871 |
dst_off_device = dst->data;
|
| 12872 |
kind = dpct::device_to_host;
|
| 12873 |
+
} else if (dst->backend == GGML_BACKEND_TYPE_GPU) {
|
| 12874 |
dst_off_device = dst_extra->data_device[g_main_device_index];
|
| 12875 |
kind = dpct::device_to_device;
|
| 12876 |
} else {
|
|
|
|
| 12954 |
}
|
| 12955 |
}
|
| 12956 |
|
| 12957 |
+
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
| 12958 |
SYCL_CHECK(ggml_sycl_set_device(g_main_device));
|
| 12959 |
SYCL_CHECK(CHECK_TRY_ERROR(
|
| 12960 |
dpct::get_current_device().queues_wait_and_throw()));
|
|
|
|
| 13091 |
const ggml_tensor *src1,
|
| 13092 |
ggml_tensor *dst) try {
|
| 13093 |
GGML_ASSERT(ggml_is_permuted(src0) && ggml_is_permuted(src1));
|
| 13094 |
+
GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
|
| 13095 |
GGML_ASSERT(src0->nb[0] <= src0->nb[1] && src0->nb[2] <= src0->nb[3]); // 0213 permutation
|
| 13096 |
GGML_ASSERT(src1->nb[0] <= src1->nb[1] && src1->nb[2] <= src1->nb[3]); // 0213 permutation
|
| 13097 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
|
|
|
| 13129 |
GGML_ASSERT(!ggml_is_transposed(src0));
|
| 13130 |
GGML_ASSERT(!ggml_is_transposed(src1));
|
| 13131 |
GGML_ASSERT(!ggml_is_permuted(src0));
|
| 13132 |
+
GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
|
| 13133 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
| 13134 |
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
| 13135 |
|
|
|
|
| 13196 |
GGML_ASSERT(!ggml_is_transposed(src0));
|
| 13197 |
GGML_ASSERT(!ggml_is_transposed(src1));
|
| 13198 |
|
| 13199 |
+
GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
|
| 13200 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
| 13201 |
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
| 13202 |
|
|
|
|
| 13372 |
|
| 13373 |
static void ggml_sycl_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
| 13374 |
const bool all_on_device =
|
| 13375 |
+
(src0->backend == GGML_BACKEND_TYPE_GPU || src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT) &&
|
| 13376 |
+
(src1->backend == GGML_BACKEND_TYPE_GPU) &&
|
| 13377 |
+
( dst->backend == GGML_BACKEND_TYPE_GPU);
|
| 13378 |
|
| 13379 |
+
const bool split = src0->backend == GGML_BACKEND_TYPE_GPU_SPLIT;
|
| 13380 |
|
| 13381 |
int64_t min_compute_capability = INT_MAX;
|
| 13382 |
for (int64_t id = 0; id < g_device_count; ++id) {
|
|
|
|
| 13505 |
GGML_ASSERT(!ggml_is_transposed(src00));
|
| 13506 |
GGML_ASSERT(!ggml_is_transposed(src1));
|
| 13507 |
|
| 13508 |
+
GGML_ASSERT(src00->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
|
| 13509 |
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
| 13510 |
|
| 13511 |
GGML_TENSOR_LOCALS(int64_t, ne0, src00, ne);
|
|
|
|
| 13643 |
|
| 13644 |
const dpct::queue_ptr stream = g_syclStreams[g_main_device_index][0];
|
| 13645 |
|
| 13646 |
+
if (ids->backend == GGML_BACKEND_TYPE_GPU) {
|
| 13647 |
const char * ids_dev = (const char *)((const ggml_tensor_extra_gpu *)ids->extra)->data_device[g_main_device_index];
|
| 13648 |
SYCL_CHECK(CHECK_TRY_ERROR(
|
| 13649 |
stream->memcpy(ids_host.data(), ids_dev, ggml_nbytes(ids))));
|
|
|
|
| 13661 |
ggml_tensor src1_row = *src1;
|
| 13662 |
ggml_tensor dst_row = *dst;
|
| 13663 |
|
| 13664 |
+
src1_row.backend = GGML_BACKEND_TYPE_GPU;
|
| 13665 |
+
dst_row.backend = GGML_BACKEND_TYPE_GPU;
|
| 13666 |
|
| 13667 |
src1_row.extra = &src1_row_extra;
|
| 13668 |
dst_row.extra = &dst_row_extra;
|
| 13669 |
|
| 13670 |
+
char * src1_original = src1->backend == GGML_BACKEND_TYPE_CPU ?
|
| 13671 |
(char *) src1->data : (char *) src1_extra->data_device[g_main_device_index];
|
| 13672 |
+
char * dst_original = dst->backend == GGML_BACKEND_TYPE_CPU ?
|
| 13673 |
(char *) dst->data : (char *) dst_extra->data_device[g_main_device_index];
|
| 13674 |
|
| 13675 |
if (src1->ne[1] == 1) {
|
| 13676 |
+
GGML_ASSERT(src1->backend == GGML_BACKEND_TYPE_GPU);
|
| 13677 |
+
GGML_ASSERT(dst->backend == GGML_BACKEND_TYPE_GPU);
|
| 13678 |
|
| 13679 |
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
|
| 13680 |
//int32_t row_id;
|
|
|
|
| 13756 |
}
|
| 13757 |
}
|
| 13758 |
|
| 13759 |
+
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
| 13760 |
SYCL_CHECK(CHECK_TRY_ERROR(stream->wait()));
|
| 13761 |
}
|
| 13762 |
}
|
|
|
|
| 13779 |
const int64_t ne = ggml_nelements(src0);
|
| 13780 |
GGML_ASSERT(ne == ggml_nelements(src1));
|
| 13781 |
|
| 13782 |
+
GGML_ASSERT(src0->backend == GGML_BACKEND_TYPE_GPU);
|
| 13783 |
+
GGML_ASSERT(src1->backend == GGML_BACKEND_TYPE_GPU);
|
| 13784 |
|
| 13785 |
GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX);
|
| 13786 |
GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX);
|
|
|
|
| 13887 |
memset(extra, 0, sizeof(*extra));
|
| 13888 |
|
| 13889 |
for (int64_t id = 0; id < g_device_count; ++id) {
|
| 13890 |
+
if (backend == GGML_BACKEND_TYPE_GPU && id != g_main_device_index) {
|
| 13891 |
continue;
|
| 13892 |
}
|
| 13893 |
ggml_sycl_set_device(get_device_id_by_index(id));
|
| 13894 |
const dpct::queue_ptr stream = g_syclStreams[id][0];
|
| 13895 |
|
| 13896 |
int64_t row_low, row_high;
|
| 13897 |
+
if (backend == GGML_BACKEND_TYPE_GPU) {
|
| 13898 |
row_low = 0;
|
| 13899 |
row_high = nrows;
|
| 13900 |
+
} else if (backend == GGML_BACKEND_TYPE_GPU_SPLIT) {
|
| 13901 |
const int64_t rounding = get_row_rounding(tensor->type);
|
| 13902 |
|
| 13903 |
row_low = id == 0 ? 0 : nrows*g_tensor_split[id];
|
|
|
|
| 13946 |
|
| 13947 |
extra->data_device[id] = buf;
|
| 13948 |
|
| 13949 |
+
if (backend == GGML_BACKEND_TYPE_GPU_SPLIT) {
|
| 13950 |
for (int64_t is = 0; is < MAX_STREAMS; ++is) {
|
| 13951 |
SYCL_CHECK(CHECK_TRY_ERROR(extra->events[id][is] =
|
| 13952 |
new sycl::event()));
|
|
|
|
| 13963 |
}
|
| 13964 |
|
| 13965 |
void ggml_sycl_free_data(struct ggml_tensor *tensor) try {
|
| 13966 |
+
if (!tensor || !tensor->extra || (tensor->backend != GGML_BACKEND_TYPE_GPU && tensor->backend != GGML_BACKEND_TYPE_GPU_SPLIT) ) {
|
| 13967 |
return;
|
| 13968 |
}
|
| 13969 |
|
|
|
|
| 14016 |
return;
|
| 14017 |
}
|
| 14018 |
|
| 14019 |
+
tensor->backend = GGML_BACKEND_TYPE_GPU;
|
| 14020 |
|
| 14021 |
+
if (tensor->src[0] != nullptr && tensor->src[0]->backend == GGML_BACKEND_TYPE_CPU) {
|
| 14022 |
const ggml_op src0_op = tensor->src[0]->op;
|
| 14023 |
if (src0_op == GGML_OP_RESHAPE || src0_op == GGML_OP_TRANSPOSE || src0_op == GGML_OP_VIEW || src0_op == GGML_OP_PERMUTE) {
|
| 14024 |
ggml_sycl_assign_buffers_impl(tensor->src[0], scratch, force_inplace, no_alloc);
|
| 14025 |
}
|
| 14026 |
}
|
| 14027 |
+
if (tensor->op == GGML_OP_CPY && tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU) {
|
| 14028 |
ggml_sycl_assign_buffers_impl(tensor->src[1], scratch, force_inplace, no_alloc);
|
| 14029 |
}
|
| 14030 |
|
|
|
|
| 14042 |
SYCL_CHECK(ggml_sycl_set_device(g_main_device));
|
| 14043 |
const dpct::queue_ptr stream = g_syclStreams[g_main_device_index][0];
|
| 14044 |
|
| 14045 |
+
if (inplace && (tensor->src[0]->backend == GGML_BACKEND_TYPE_GPU || tensor->src[0]->backend == GGML_BACKEND_TYPE_GPU_SPLIT)) {
|
| 14046 |
ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu * ) tensor->src[0]->extra;
|
| 14047 |
char * src0_ddc = (char *) src0_extra->data_device[g_main_device_index];
|
| 14048 |
size_t offset = 0;
|
|
|
|
| 14111 |
|
| 14112 |
const bool inplace = tensor->view_src != nullptr;
|
| 14113 |
|
| 14114 |
+
if (inplace && (tensor->view_src->backend == GGML_BACKEND_TYPE_GPU || tensor->view_src->backend == GGML_BACKEND_TYPE_GPU_SPLIT)) {
|
| 14115 |
ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu * ) tensor->view_src->extra;
|
| 14116 |
char * src0_ddc = (char *) src0_extra->data_device[g_main_device_index];
|
| 14117 |
size_t view_offset = 0;
|
|
|
|
| 14132 |
}
|
| 14133 |
|
| 14134 |
void ggml_sycl_copy_to_device(struct ggml_tensor *tensor) try {
|
| 14135 |
+
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
| 14136 |
GGML_ASSERT(ggml_is_contiguous(tensor));
|
| 14137 |
|
| 14138 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
|
|
|
| 14219 |
if (!g_sycl_loaded) return false;
|
| 14220 |
|
| 14221 |
ggml_sycl_func_t func;
|
| 14222 |
+
const bool any_on_device = tensor->backend == GGML_BACKEND_TYPE_GPU
|
| 14223 |
+
|| (tensor->src[0] != nullptr && (tensor->src[0]->backend == GGML_BACKEND_TYPE_GPU || tensor->src[0]->backend == GGML_BACKEND_TYPE_GPU_SPLIT))
|
| 14224 |
+
|| (tensor->src[1] != nullptr && tensor->src[1]->backend == GGML_BACKEND_TYPE_GPU);
|
| 14225 |
|
| 14226 |
if (!any_on_device && tensor->op != GGML_OP_MUL_MAT && tensor->op != GGML_OP_MUL_MAT_ID) {
|
| 14227 |
return false;
|
|
|
|
| 14359 |
return false;
|
| 14360 |
}
|
| 14361 |
|
| 14362 |
+
if (tensor->src[0] != nullptr && tensor->src[0]->backend == GGML_BACKEND_TYPE_GPU_SPLIT) {
|
| 14363 |
ggml_sycl_set_peer_access(tensor->src[1]->ne[1]);
|
| 14364 |
}
|
| 14365 |
|
| 14366 |
if (params->ith != 0) {
|
| 14367 |
return true;
|
| 14368 |
}
|
| 14369 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 14370 |
return true;
|
| 14371 |
}
|
| 14372 |
func(tensor->src[0], tensor->src[1], tensor);
|
|
|
|
| 14517 |
|
| 14518 |
extra->data_device[ctx->device] = tensor->data;
|
| 14519 |
|
| 14520 |
+
tensor->backend = GGML_BACKEND_TYPE_GPU;
|
| 14521 |
tensor->extra = extra;
|
| 14522 |
|
| 14523 |
if (ggml_is_quantized(tensor->type)) {
|
|
|
|
| 14548 |
ggml_tensor *tensor,
|
| 14549 |
const void *data, size_t offset,
|
| 14550 |
size_t size) try {
|
| 14551 |
+
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
| 14552 |
|
| 14553 |
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
|
| 14554 |
|
|
|
|
| 14573 |
const ggml_tensor *tensor,
|
| 14574 |
void *data, size_t offset,
|
| 14575 |
size_t size) try {
|
| 14576 |
+
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
| 14577 |
|
| 14578 |
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
|
| 14579 |
|
|
|
|
| 14809 |
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
| 14810 |
|
| 14811 |
GGML_ASSERT(tensor->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) && "unsupported buffer type");
|
| 14812 |
+
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
| 14813 |
|
| 14814 |
SYCL_CHECK(CHECK_TRY_ERROR(g_syclStreams[sycl_ctx->device][0]->memcpy(
|
| 14815 |
(char *)tensor->data + offset, data, size)));
|
|
|
|
| 14827 |
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
| 14828 |
|
| 14829 |
GGML_ASSERT(tensor->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) && "unsupported buffer type");
|
| 14830 |
+
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
| 14831 |
|
| 14832 |
SYCL_CHECK(CHECK_TRY_ERROR(g_syclStreams[sycl_ctx->device][0]->memcpy(
|
| 14833 |
data, (const char *)tensor->data + offset, size)));
|
|
|
|
| 14880 |
ggml_sycl_set_main_device(sycl_ctx->device);
|
| 14881 |
|
| 14882 |
ggml_compute_params params = {};
|
| 14883 |
+
params.type = GGML_TASK_TYPE_COMPUTE;
|
| 14884 |
params.ith = 0;
|
| 14885 |
for (int i = 0; i < cgraph->n_nodes; i++) {
|
| 14886 |
ggml_tensor * node = cgraph->nodes[i];
|
|
|
|
| 14888 |
if (node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE)
|
| 14889 |
continue;
|
| 14890 |
|
| 14891 |
+
assert(node->backend == GGML_BACKEND_TYPE_GPU);
|
| 14892 |
assert(node->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device));
|
| 14893 |
assert(node->extra != nullptr);
|
| 14894 |
|
| 14895 |
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
| 14896 |
if (node->src[j] != nullptr) {
|
| 14897 |
+
assert(node->src[j]->backend == GGML_BACKEND_TYPE_GPU);
|
| 14898 |
assert(node->src[j]->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device));
|
| 14899 |
assert(node->src[j]->extra != nullptr);
|
| 14900 |
}
|
ggml-vulkan.cpp
CHANGED
|
@@ -2320,8 +2320,8 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context * su
|
|
| 2320 |
src1_uma = d_Qy != nullptr;
|
| 2321 |
}
|
| 2322 |
|
| 2323 |
-
const bool load_x = src0->backend !=
|
| 2324 |
-
const bool load_y = src1->backend !=
|
| 2325 |
|
| 2326 |
const bool x_non_contig = !load_x && !ggml_vk_dim01_contiguous(src0);
|
| 2327 |
const bool y_non_contig = !load_y && !ggml_vk_dim01_contiguous(src1);
|
|
@@ -2453,7 +2453,7 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context * su
|
|
| 2453 |
// compute
|
| 2454 |
ggml_vk_matmul(ctx, subctx, *pipeline, { d_X, x_buf_offset, x_sz * ne02 * ne03 }, { d_Y, y_buf_offset, y_sz * ne12 * ne13 }, { d_D, d_buf_offset, d_sz * ne12 * ne13 }, { ctx->prealloc_split_k, 0, d_sz * ne12 * ne13 * split_k }, ne01, ne11, ne10, ne10, ne10, ne01, split_k, ne12*ne13, ne02, ne12, r2, r3, stride_batch_x, stride_batch_y, ne20*ne21); // NOLINT
|
| 2455 |
|
| 2456 |
-
if (dst->backend ==
|
| 2457 |
// copy dst to host
|
| 2458 |
float * d = (float *) ((char *) dst->data);
|
| 2459 |
ggml_vk_buffer_read_async(ctx, subctx, d_D, 0, d, sizeof(float) * d_ne * ne12 * ne13);
|
|
@@ -2506,8 +2506,8 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context
|
|
| 2506 |
src1_uma = d_Qy != nullptr;
|
| 2507 |
}
|
| 2508 |
|
| 2509 |
-
const bool load_x = src0->backend !=
|
| 2510 |
-
const bool load_y = src1->backend !=
|
| 2511 |
|
| 2512 |
const bool x_non_contig = !load_x && !ggml_vk_dim01_contiguous(src0);
|
| 2513 |
const bool y_non_contig = !load_y && !ggml_vk_dim01_contiguous(src1);
|
|
@@ -2630,7 +2630,7 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context
|
|
| 2630 |
ggml_vk_sync_buffers(subctx);
|
| 2631 |
ggml_vk_dispatch_pipeline(ctx, subctx, *dmmv, { { d_X, x_offset, x_sz }, { d_Y, y_buffer_offset, y_sz + y_shader_offset }, { d_D, d_buffer_offset, d_sz + d_shader_offset } }, 3 * sizeof(int), &pc, { (uint32_t)ne01, 1, 1});
|
| 2632 |
|
| 2633 |
-
if (dst->backend ==
|
| 2634 |
// copy dst to host
|
| 2635 |
float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
|
| 2636 |
ggml_vk_sync_buffers(subctx);
|
|
@@ -2647,7 +2647,7 @@ static void ggml_vk_mul_mat_vec_p021_f16_f32(ggml_backend_vk_context * ctx, vk_c
|
|
| 2647 |
std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", backend=" << dst->backend << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3] << "),)" << std::endl;
|
| 2648 |
#endif
|
| 2649 |
GGML_ASSERT(ggml_is_permuted(src0) && ggml_is_permuted(src1));
|
| 2650 |
-
GGML_ASSERT(src0->backend ==
|
| 2651 |
GGML_ASSERT(src0->nb[0] <= src0->nb[1] && src0->nb[2] <= src0->nb[3]); // NOLINT
|
| 2652 |
GGML_ASSERT(src1->nb[0] <= src1->nb[1] && src1->nb[2] <= src1->nb[3]); // NOLINT
|
| 2653 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
|
@@ -2679,7 +2679,7 @@ static void ggml_vk_mul_mat_vec_p021_f16_f32(ggml_backend_vk_context * ctx, vk_c
|
|
| 2679 |
src1_uma = d_Qy != nullptr;
|
| 2680 |
}
|
| 2681 |
|
| 2682 |
-
const bool load_y = src1->backend !=
|
| 2683 |
|
| 2684 |
const uint64_t x_ne = ne00 * ne01 * ne02;
|
| 2685 |
const uint64_t y_ne = ne10 * ne11 * ne12;
|
|
@@ -2721,7 +2721,7 @@ static void ggml_vk_mul_mat_vec_p021_f16_f32(ggml_backend_vk_context * ctx, vk_c
|
|
| 2721 |
ggml_vk_sync_buffers(subctx);
|
| 2722 |
ggml_vk_dispatch_pipeline(ctx, subctx, ctx->pipeline_mul_mat_vec_p021_f16_f32, { { d_Qx, qx_buf_offset, qx_sz }, { d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, { d_D, d_buffer_offset, d_sz + d_shader_offset } }, 6 * sizeof(uint32_t), &pc, { 1, (uint32_t)ne01, (uint32_t)ne12 });
|
| 2723 |
|
| 2724 |
-
if (dst->backend ==
|
| 2725 |
// copy dst to host
|
| 2726 |
float * d = (float *) dst->data;
|
| 2727 |
ggml_vk_sync_buffers(subctx);
|
|
@@ -2738,7 +2738,7 @@ static void ggml_vk_mul_mat_vec_nc_f16_f32(ggml_backend_vk_context * ctx, vk_con
|
|
| 2738 |
GGML_ASSERT(!ggml_is_transposed(src0));
|
| 2739 |
GGML_ASSERT(!ggml_is_transposed(src1));
|
| 2740 |
GGML_ASSERT(!ggml_is_permuted(src0));
|
| 2741 |
-
GGML_ASSERT(src0->backend ==
|
| 2742 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
| 2743 |
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
| 2744 |
|
|
@@ -2771,7 +2771,7 @@ static void ggml_vk_mul_mat_vec_nc_f16_f32(ggml_backend_vk_context * ctx, vk_con
|
|
| 2771 |
src1_uma = d_Qy != nullptr;
|
| 2772 |
}
|
| 2773 |
|
| 2774 |
-
const bool load_y = src1->backend !=
|
| 2775 |
|
| 2776 |
const uint64_t d_ne = ne01 * ne11 * ne12;
|
| 2777 |
|
|
@@ -2814,7 +2814,7 @@ static void ggml_vk_mul_mat_vec_nc_f16_f32(ggml_backend_vk_context * ctx, vk_con
|
|
| 2814 |
ggml_vk_sync_buffers(subctx);
|
| 2815 |
ggml_vk_dispatch_pipeline(ctx, subctx, ctx->pipeline_mul_mat_vec_nc_f16_f32, { { d_Qx, qx_buf_offset, qx_sz }, { d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, { d_D, d_buffer_offset, d_sz + d_shader_offset } }, 7 * sizeof(uint32_t), &pc, { 1, (uint32_t)ne01, (uint32_t)ne12 });
|
| 2816 |
|
| 2817 |
-
if (dst->backend ==
|
| 2818 |
// copy dst to host
|
| 2819 |
float * d = (float *) dst->data;
|
| 2820 |
ggml_vk_sync_buffers(subctx);
|
|
@@ -2832,7 +2832,7 @@ static bool ggml_vk_can_mul_mat(const ggml_tensor * src0, const ggml_tensor * sr
|
|
| 2832 |
return (src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) &&
|
| 2833 |
(src1->type == GGML_TYPE_F32 || src1->type == GGML_TYPE_F16 || ggml_is_quantized(src1->type)) &&
|
| 2834 |
dst->type == GGML_TYPE_F32 &&
|
| 2835 |
-
((ne0 >= 32 && ne1 >= 32 && ne10 >= 32) || src0->backend ==
|
| 2836 |
}
|
| 2837 |
|
| 2838 |
static void ggml_vk_mul_mat(ggml_backend_vk_context * ctx, vk_context * subctx, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) {
|
|
@@ -2880,8 +2880,8 @@ static void ggml_vk_op_repeat(ggml_backend_vk_context * ctx, vk_context * subctx
|
|
| 2880 |
// TODO: support for transposed / permuted tensors
|
| 2881 |
GGML_ASSERT(nb0 == sizeof(float));
|
| 2882 |
GGML_ASSERT(nb00 == sizeof(float));
|
| 2883 |
-
GGML_ASSERT(src0->backend ==
|
| 2884 |
-
GGML_ASSERT(dst->backend ==
|
| 2885 |
|
| 2886 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) dst->extra;
|
| 2887 |
ggml_tensor_extra_gpu * extra_src0 = (ggml_tensor_extra_gpu *) src0->extra;
|
|
@@ -3110,8 +3110,8 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context * subctx, c
|
|
| 3110 |
}
|
| 3111 |
}
|
| 3112 |
|
| 3113 |
-
const bool transfer_src0 = src0->backend !=
|
| 3114 |
-
const bool transfer_src1 = use_src1 && src1->backend !=
|
| 3115 |
|
| 3116 |
uint64_t x_sz = ggml_vk_align_size(ggml_type_size(src0->type) * ne0, ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment);
|
| 3117 |
uint64_t y_sz = use_src1 ? ggml_vk_align_size(ggml_type_size(src1->type) * ne1, ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment) : 0;
|
|
@@ -3120,7 +3120,7 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context * subctx, c
|
|
| 3120 |
vk_buffer d_D = extra->buffer_gpu.lock();
|
| 3121 |
|
| 3122 |
// Workaround for tiny tensor inputs on ROPE
|
| 3123 |
-
if (use_src1 && src1->backend ==
|
| 3124 |
y_sz = VK_WHOLE_SIZE;
|
| 3125 |
}
|
| 3126 |
|
|
@@ -3209,9 +3209,9 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context * subctx, c
|
|
| 3209 |
ggml_vk_sync_buffers(subctx);
|
| 3210 |
ggml_vk_dispatch_pipeline(ctx, subctx, *pipeline, { { d_X, x_buf_offset, x_sz }, { d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements);
|
| 3211 |
}
|
| 3212 |
-
if (dst->backend ==
|
| 3213 |
ggml_vk_d2h_tensor_2d(ctx, subctx, d_D, 0, dst);
|
| 3214 |
-
} else if(dst->backend ==
|
| 3215 |
// copy dst to host
|
| 3216 |
float * d = (float *) dst->data;
|
| 3217 |
ggml_vk_buffer_read_async(ctx, subctx, d_D, 0, d, d_sz);
|
|
@@ -3253,7 +3253,7 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context * subctx, c
|
|
| 3253 |
ggml_vk_sync_buffers(subctx);
|
| 3254 |
ggml_vk_dispatch_pipeline(ctx, subctx, *pipeline, { { d_X, x_buf_offset + x_offset, x_sz }, { d_D, d_buf_offset + d_offset, d_sz } }, sizeof(PC), &pc, elements);
|
| 3255 |
}
|
| 3256 |
-
if (dst->backend ==
|
| 3257 |
// copy dst to host
|
| 3258 |
ggml_vk_buffer_read_async(ctx, subctx, d_D, d_buf_offset + d_offset, (char *) dst->data + i02*nb2 + i03*nb3, d_sz);
|
| 3259 |
}
|
|
@@ -3359,7 +3359,7 @@ static void ggml_vk_rope(ggml_backend_vk_context * ctx, vk_context * subctx, con
|
|
| 3359 |
|
| 3360 |
static void ggml_vk_nop(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, ggml_tensor * dst) {
|
| 3361 |
// If backend is CPU, data from src0 has to be copied off the device
|
| 3362 |
-
if (dst->backend ==
|
| 3363 |
ggml_tensor_extra_gpu * extra_src0 = (ggml_tensor_extra_gpu *) src0->extra;
|
| 3364 |
vk_buffer d_D = extra_src0->buffer_gpu.lock();
|
| 3365 |
ggml_vk_sync_buffers(subctx);
|
|
@@ -3994,9 +3994,9 @@ static void ggml_vk_preallocate_buffers_graph(ggml_backend_vk_context * ctx, ggm
|
|
| 3994 |
#ifdef GGML_VULKAN_DEBUG
|
| 3995 |
std::cerr << "ggml_vk_preallocate_buffers_graph(" << node << ")" << std::endl;
|
| 3996 |
#endif
|
| 3997 |
-
const bool any_on_device = node->backend ==
|
| 3998 |
-
|| (node->src[0] != nullptr && (node->src[0]->backend ==
|
| 3999 |
-
|| (node->src[1] != nullptr && (node->src[1]->backend ==
|
| 4000 |
|
| 4001 |
if (ctx->disable || (!any_on_device && node->op != GGML_OP_MUL_MAT)) {
|
| 4002 |
return;
|
|
@@ -4215,9 +4215,9 @@ static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) {
|
|
| 4215 |
}
|
| 4216 |
|
| 4217 |
static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * node, bool last_node){
|
| 4218 |
-
const bool any_on_device = node->backend ==
|
| 4219 |
-
|| (node->src[0] != nullptr && (node->src[0]->backend ==
|
| 4220 |
-
|| (node->src[1] != nullptr && node->src[1]->backend ==
|
| 4221 |
|
| 4222 |
if (ctx->disable || (!any_on_device && node->op != GGML_OP_MUL_MAT) || (node->op == GGML_OP_MUL_MAT && !any_on_device && !ggml_vk_can_mul_mat(node->src[0], node->src[1], node))) {
|
| 4223 |
return;
|
|
@@ -4371,7 +4371,7 @@ static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod
|
|
| 4371 |
last_node = true;
|
| 4372 |
#endif
|
| 4373 |
|
| 4374 |
-
if (node->backend ==
|
| 4375 |
ggml_vk_ctx_end(ctx->compute_ctx);
|
| 4376 |
ctx->compute_ctx->exit_tensor = node;
|
| 4377 |
ctx->compute_ctx = nullptr;
|
|
@@ -4379,9 +4379,9 @@ static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod
|
|
| 4379 |
}
|
| 4380 |
|
| 4381 |
static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_compute_params * params, ggml_tensor * tensor){
|
| 4382 |
-
const bool any_on_device = tensor->backend ==
|
| 4383 |
-
|| (tensor->src[0] != nullptr && (tensor->src[0]->backend ==
|
| 4384 |
-
|| (tensor->src[1] != nullptr && tensor->src[1]->backend ==
|
| 4385 |
|
| 4386 |
if (ctx->disable || (!any_on_device && tensor->op != GGML_OP_MUL_MAT)) {
|
| 4387 |
return false;
|
|
@@ -4442,7 +4442,7 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_compute_
|
|
| 4442 |
if (params->ith != 0) {
|
| 4443 |
return true;
|
| 4444 |
}
|
| 4445 |
-
if (params->type ==
|
| 4446 |
return true;
|
| 4447 |
}
|
| 4448 |
|
|
@@ -4745,7 +4745,7 @@ GGML_CALL static void ggml_backend_vk_buffer_init_tensor(ggml_backend_buffer_t b
|
|
| 4745 |
extra->offset = (uint8_t *) tensor->data - (uint8_t *) vk_ptr_base;
|
| 4746 |
}
|
| 4747 |
|
| 4748 |
-
tensor->backend =
|
| 4749 |
tensor->extra = extra;
|
| 4750 |
}
|
| 4751 |
|
|
@@ -4753,7 +4753,7 @@ GGML_CALL static void ggml_backend_vk_buffer_set_tensor(ggml_backend_buffer_t bu
|
|
| 4753 |
#ifdef GGML_VULKAN_DEBUG
|
| 4754 |
std::cerr << "ggml_backend_vk_buffer_set_tensor(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ")" << std::endl;
|
| 4755 |
#endif
|
| 4756 |
-
GGML_ASSERT(tensor->backend ==
|
| 4757 |
|
| 4758 |
ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context;
|
| 4759 |
|
|
@@ -4768,7 +4768,7 @@ GGML_CALL static void ggml_backend_vk_buffer_get_tensor(ggml_backend_buffer_t bu
|
|
| 4768 |
#ifdef GGML_VULKAN_DEBUG
|
| 4769 |
std::cerr << "ggml_backend_vk_buffer_get_tensor(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ")" << std::endl;
|
| 4770 |
#endif
|
| 4771 |
-
GGML_ASSERT(tensor->backend ==
|
| 4772 |
|
| 4773 |
ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context;
|
| 4774 |
|
|
@@ -4999,7 +4999,7 @@ GGML_CALL static void ggml_backend_vk_set_tensor_async(ggml_backend_t backend, g
|
|
| 4999 |
#endif
|
| 5000 |
ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
|
| 5001 |
GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_buffer_type(ctx->idx) || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type");
|
| 5002 |
-
GGML_ASSERT(tensor->backend ==
|
| 5003 |
|
| 5004 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
| 5005 |
|
|
@@ -5020,7 +5020,7 @@ GGML_CALL static void ggml_backend_vk_get_tensor_async(ggml_backend_t backend, c
|
|
| 5020 |
#endif
|
| 5021 |
ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
|
| 5022 |
GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_buffer_type(ctx->idx) || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type");
|
| 5023 |
-
GGML_ASSERT(tensor->backend ==
|
| 5024 |
|
| 5025 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
| 5026 |
|
|
@@ -5097,7 +5097,7 @@ GGML_CALL static bool ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml
|
|
| 5097 |
int last_node = cgraph->n_nodes - 1;
|
| 5098 |
|
| 5099 |
// If the last op in the cgraph isn't backend GPU, the command buffer doesn't get closed properly
|
| 5100 |
-
while (last_node > 0 && cgraph->nodes[last_node]->backend !=
|
| 5101 |
last_node -= 1;
|
| 5102 |
}
|
| 5103 |
|
|
@@ -5106,7 +5106,7 @@ GGML_CALL static bool ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml
|
|
| 5106 |
}
|
| 5107 |
|
| 5108 |
ggml_compute_params params = {};
|
| 5109 |
-
params.type =
|
| 5110 |
params.ith = 0;
|
| 5111 |
for (int i = 0; i < cgraph->n_nodes; i++) {
|
| 5112 |
ggml_tensor * node = cgraph->nodes[i];
|
|
@@ -5416,7 +5416,7 @@ static void ggml_vk_print_tensor_area(const ggml_tensor * tensor, const void * d
|
|
| 5416 |
static void ggml_vk_print_tensor(ggml_backend_vk_context * ctx, const ggml_tensor * tensor, const char * name) {
|
| 5417 |
void * tensor_data = tensor->data;
|
| 5418 |
|
| 5419 |
-
if (tensor->backend ==
|
| 5420 |
const size_t tensor_size = ggml_nbytes(tensor);
|
| 5421 |
tensor_data = malloc(tensor_size);
|
| 5422 |
|
|
@@ -5442,14 +5442,14 @@ static void ggml_vk_print_tensor(ggml_backend_vk_context * ctx, const ggml_tenso
|
|
| 5442 |
std::vector<const ggml_tensor *> done;
|
| 5443 |
ggml_vk_print_graph_origin(tensor, done);
|
| 5444 |
|
| 5445 |
-
if (tensor->backend ==
|
| 5446 |
free(tensor_data);
|
| 5447 |
}
|
| 5448 |
}
|
| 5449 |
|
| 5450 |
static void ggml_vk_check_tensor(const std::string& name, const ggml_tensor * tensor) {
|
| 5451 |
return;
|
| 5452 |
-
GGML_ASSERT(tensor->backend ==
|
| 5453 |
if (tensor->type != GGML_TYPE_F32 && tensor->type != GGML_TYPE_F16) {
|
| 5454 |
return;
|
| 5455 |
}
|
|
@@ -5487,7 +5487,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
|
|
| 5487 |
if (params->ith != 0) {
|
| 5488 |
return;
|
| 5489 |
}
|
| 5490 |
-
if (params->type ==
|
| 5491 |
return;
|
| 5492 |
}
|
| 5493 |
|
|
@@ -5524,10 +5524,10 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
|
|
| 5524 |
|
| 5525 |
src0_buffer = malloc(src0_size);
|
| 5526 |
src0_clone->data = src0_buffer;
|
| 5527 |
-
if (src0->backend ==
|
| 5528 |
memcpy(src0_clone->data, src0->data, src0_size);
|
| 5529 |
memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 5530 |
-
} else if (src0->backend ==
|
| 5531 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src0->extra;
|
| 5532 |
uint64_t offset = extra->offset;
|
| 5533 |
if (!ggml_is_contiguous(src0) && ggml_vk_dim01_contiguous(src0)) {
|
|
@@ -5567,10 +5567,10 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
|
|
| 5567 |
|
| 5568 |
src1_buffer = malloc(src1_size);
|
| 5569 |
src1_clone->data = src1_buffer;
|
| 5570 |
-
if (src1->backend ==
|
| 5571 |
memcpy(src1_clone->data, src1->data, src1_size);
|
| 5572 |
memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 5573 |
-
} else if (src1->backend ==
|
| 5574 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src1->extra;
|
| 5575 |
uint64_t offset = extra->offset;
|
| 5576 |
if (!ggml_is_contiguous(src1) && ggml_vk_dim01_contiguous(src1)) {
|
|
@@ -5729,7 +5729,7 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_compute_
|
|
| 5729 |
if (params->ith != 0) {
|
| 5730 |
return;
|
| 5731 |
}
|
| 5732 |
-
if (params->type ==
|
| 5733 |
return;
|
| 5734 |
}
|
| 5735 |
if (!(vk_output_tensor > 0 && vk_output_tensor == check_counter) && check_counter <= vk_skip_checks) {
|
|
@@ -5741,7 +5741,7 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_compute_
|
|
| 5741 |
|
| 5742 |
void * tensor_data = tensor->data;
|
| 5743 |
|
| 5744 |
-
if (tensor->backend ==
|
| 5745 |
size_t tensor_size = ggml_nbytes(tensor);
|
| 5746 |
tensor_data = malloc(tensor_size);
|
| 5747 |
|
|
@@ -5874,7 +5874,7 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_compute_
|
|
| 5874 |
comp_result = nullptr;
|
| 5875 |
comp_size = 0;
|
| 5876 |
|
| 5877 |
-
if (tensor->backend ==
|
| 5878 |
free(tensor_data);
|
| 5879 |
}
|
| 5880 |
}
|
|
|
|
| 2320 |
src1_uma = d_Qy != nullptr;
|
| 2321 |
}
|
| 2322 |
|
| 2323 |
+
const bool load_x = src0->backend != GGML_BACKEND_TYPE_GPU && !src0_uma;
|
| 2324 |
+
const bool load_y = src1->backend != GGML_BACKEND_TYPE_GPU && !src1_uma;
|
| 2325 |
|
| 2326 |
const bool x_non_contig = !load_x && !ggml_vk_dim01_contiguous(src0);
|
| 2327 |
const bool y_non_contig = !load_y && !ggml_vk_dim01_contiguous(src1);
|
|
|
|
| 2453 |
// compute
|
| 2454 |
ggml_vk_matmul(ctx, subctx, *pipeline, { d_X, x_buf_offset, x_sz * ne02 * ne03 }, { d_Y, y_buf_offset, y_sz * ne12 * ne13 }, { d_D, d_buf_offset, d_sz * ne12 * ne13 }, { ctx->prealloc_split_k, 0, d_sz * ne12 * ne13 * split_k }, ne01, ne11, ne10, ne10, ne10, ne01, split_k, ne12*ne13, ne02, ne12, r2, r3, stride_batch_x, stride_batch_y, ne20*ne21); // NOLINT
|
| 2455 |
|
| 2456 |
+
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
| 2457 |
// copy dst to host
|
| 2458 |
float * d = (float *) ((char *) dst->data);
|
| 2459 |
ggml_vk_buffer_read_async(ctx, subctx, d_D, 0, d, sizeof(float) * d_ne * ne12 * ne13);
|
|
|
|
| 2506 |
src1_uma = d_Qy != nullptr;
|
| 2507 |
}
|
| 2508 |
|
| 2509 |
+
const bool load_x = src0->backend != GGML_BACKEND_TYPE_GPU && !src0_uma;
|
| 2510 |
+
const bool load_y = src1->backend != GGML_BACKEND_TYPE_GPU && !src1_uma;
|
| 2511 |
|
| 2512 |
const bool x_non_contig = !load_x && !ggml_vk_dim01_contiguous(src0);
|
| 2513 |
const bool y_non_contig = !load_y && !ggml_vk_dim01_contiguous(src1);
|
|
|
|
| 2630 |
ggml_vk_sync_buffers(subctx);
|
| 2631 |
ggml_vk_dispatch_pipeline(ctx, subctx, *dmmv, { { d_X, x_offset, x_sz }, { d_Y, y_buffer_offset, y_sz + y_shader_offset }, { d_D, d_buffer_offset, d_sz + d_shader_offset } }, 3 * sizeof(int), &pc, { (uint32_t)ne01, 1, 1});
|
| 2632 |
|
| 2633 |
+
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
| 2634 |
// copy dst to host
|
| 2635 |
float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
|
| 2636 |
ggml_vk_sync_buffers(subctx);
|
|
|
|
| 2647 |
std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", backend=" << dst->backend << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3] << "),)" << std::endl;
|
| 2648 |
#endif
|
| 2649 |
GGML_ASSERT(ggml_is_permuted(src0) && ggml_is_permuted(src1));
|
| 2650 |
+
GGML_ASSERT(src0->backend == GGML_BACKEND_TYPE_GPU);
|
| 2651 |
GGML_ASSERT(src0->nb[0] <= src0->nb[1] && src0->nb[2] <= src0->nb[3]); // NOLINT
|
| 2652 |
GGML_ASSERT(src1->nb[0] <= src1->nb[1] && src1->nb[2] <= src1->nb[3]); // NOLINT
|
| 2653 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
|
|
|
| 2679 |
src1_uma = d_Qy != nullptr;
|
| 2680 |
}
|
| 2681 |
|
| 2682 |
+
const bool load_y = src1->backend != GGML_BACKEND_TYPE_GPU && !src1_uma;
|
| 2683 |
|
| 2684 |
const uint64_t x_ne = ne00 * ne01 * ne02;
|
| 2685 |
const uint64_t y_ne = ne10 * ne11 * ne12;
|
|
|
|
| 2721 |
ggml_vk_sync_buffers(subctx);
|
| 2722 |
ggml_vk_dispatch_pipeline(ctx, subctx, ctx->pipeline_mul_mat_vec_p021_f16_f32, { { d_Qx, qx_buf_offset, qx_sz }, { d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, { d_D, d_buffer_offset, d_sz + d_shader_offset } }, 6 * sizeof(uint32_t), &pc, { 1, (uint32_t)ne01, (uint32_t)ne12 });
|
| 2723 |
|
| 2724 |
+
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
| 2725 |
// copy dst to host
|
| 2726 |
float * d = (float *) dst->data;
|
| 2727 |
ggml_vk_sync_buffers(subctx);
|
|
|
|
| 2738 |
GGML_ASSERT(!ggml_is_transposed(src0));
|
| 2739 |
GGML_ASSERT(!ggml_is_transposed(src1));
|
| 2740 |
GGML_ASSERT(!ggml_is_permuted(src0));
|
| 2741 |
+
GGML_ASSERT(src0->backend == GGML_BACKEND_TYPE_GPU);
|
| 2742 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
| 2743 |
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
| 2744 |
|
|
|
|
| 2771 |
src1_uma = d_Qy != nullptr;
|
| 2772 |
}
|
| 2773 |
|
| 2774 |
+
const bool load_y = src1->backend != GGML_BACKEND_TYPE_GPU && !src1_uma;
|
| 2775 |
|
| 2776 |
const uint64_t d_ne = ne01 * ne11 * ne12;
|
| 2777 |
|
|
|
|
| 2814 |
ggml_vk_sync_buffers(subctx);
|
| 2815 |
ggml_vk_dispatch_pipeline(ctx, subctx, ctx->pipeline_mul_mat_vec_nc_f16_f32, { { d_Qx, qx_buf_offset, qx_sz }, { d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, { d_D, d_buffer_offset, d_sz + d_shader_offset } }, 7 * sizeof(uint32_t), &pc, { 1, (uint32_t)ne01, (uint32_t)ne12 });
|
| 2816 |
|
| 2817 |
+
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
| 2818 |
// copy dst to host
|
| 2819 |
float * d = (float *) dst->data;
|
| 2820 |
ggml_vk_sync_buffers(subctx);
|
|
|
|
| 2832 |
return (src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) &&
|
| 2833 |
(src1->type == GGML_TYPE_F32 || src1->type == GGML_TYPE_F16 || ggml_is_quantized(src1->type)) &&
|
| 2834 |
dst->type == GGML_TYPE_F32 &&
|
| 2835 |
+
((ne0 >= 32 && ne1 >= 32 && ne10 >= 32) || src0->backend == GGML_BACKEND_TYPE_GPU);
|
| 2836 |
}
|
| 2837 |
|
| 2838 |
static void ggml_vk_mul_mat(ggml_backend_vk_context * ctx, vk_context * subctx, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) {
|
|
|
|
| 2880 |
// TODO: support for transposed / permuted tensors
|
| 2881 |
GGML_ASSERT(nb0 == sizeof(float));
|
| 2882 |
GGML_ASSERT(nb00 == sizeof(float));
|
| 2883 |
+
GGML_ASSERT(src0->backend == GGML_BACKEND_TYPE_GPU);
|
| 2884 |
+
GGML_ASSERT(dst->backend == GGML_BACKEND_TYPE_GPU);
|
| 2885 |
|
| 2886 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) dst->extra;
|
| 2887 |
ggml_tensor_extra_gpu * extra_src0 = (ggml_tensor_extra_gpu *) src0->extra;
|
|
|
|
| 3110 |
}
|
| 3111 |
}
|
| 3112 |
|
| 3113 |
+
const bool transfer_src0 = src0->backend != GGML_BACKEND_TYPE_GPU && !src0_uma;
|
| 3114 |
+
const bool transfer_src1 = use_src1 && src1->backend != GGML_BACKEND_TYPE_GPU && !src1_uma;
|
| 3115 |
|
| 3116 |
uint64_t x_sz = ggml_vk_align_size(ggml_type_size(src0->type) * ne0, ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment);
|
| 3117 |
uint64_t y_sz = use_src1 ? ggml_vk_align_size(ggml_type_size(src1->type) * ne1, ctx->device.lock()->properties.limits.minStorageBufferOffsetAlignment) : 0;
|
|
|
|
| 3120 |
vk_buffer d_D = extra->buffer_gpu.lock();
|
| 3121 |
|
| 3122 |
// Workaround for tiny tensor inputs on ROPE
|
| 3123 |
+
if (use_src1 && src1->backend == GGML_BACKEND_TYPE_GPU && y_sz > d_D->size) {
|
| 3124 |
y_sz = VK_WHOLE_SIZE;
|
| 3125 |
}
|
| 3126 |
|
|
|
|
| 3209 |
ggml_vk_sync_buffers(subctx);
|
| 3210 |
ggml_vk_dispatch_pipeline(ctx, subctx, *pipeline, { { d_X, x_buf_offset, x_sz }, { d_D, d_buf_offset, d_sz } }, sizeof(PC), &pc, elements);
|
| 3211 |
}
|
| 3212 |
+
if (dst->backend == GGML_BACKEND_TYPE_CPU && op == GGML_OP_CPY) {
|
| 3213 |
ggml_vk_d2h_tensor_2d(ctx, subctx, d_D, 0, dst);
|
| 3214 |
+
} else if(dst->backend == GGML_BACKEND_TYPE_CPU) {
|
| 3215 |
// copy dst to host
|
| 3216 |
float * d = (float *) dst->data;
|
| 3217 |
ggml_vk_buffer_read_async(ctx, subctx, d_D, 0, d, d_sz);
|
|
|
|
| 3253 |
ggml_vk_sync_buffers(subctx);
|
| 3254 |
ggml_vk_dispatch_pipeline(ctx, subctx, *pipeline, { { d_X, x_buf_offset + x_offset, x_sz }, { d_D, d_buf_offset + d_offset, d_sz } }, sizeof(PC), &pc, elements);
|
| 3255 |
}
|
| 3256 |
+
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
| 3257 |
// copy dst to host
|
| 3258 |
ggml_vk_buffer_read_async(ctx, subctx, d_D, d_buf_offset + d_offset, (char *) dst->data + i02*nb2 + i03*nb3, d_sz);
|
| 3259 |
}
|
|
|
|
| 3359 |
|
| 3360 |
static void ggml_vk_nop(ggml_backend_vk_context * ctx, vk_context * subctx, const ggml_tensor * src0, ggml_tensor * dst) {
|
| 3361 |
// If backend is CPU, data from src0 has to be copied off the device
|
| 3362 |
+
if (dst->backend == GGML_BACKEND_TYPE_CPU) {
|
| 3363 |
ggml_tensor_extra_gpu * extra_src0 = (ggml_tensor_extra_gpu *) src0->extra;
|
| 3364 |
vk_buffer d_D = extra_src0->buffer_gpu.lock();
|
| 3365 |
ggml_vk_sync_buffers(subctx);
|
|
|
|
| 3994 |
#ifdef GGML_VULKAN_DEBUG
|
| 3995 |
std::cerr << "ggml_vk_preallocate_buffers_graph(" << node << ")" << std::endl;
|
| 3996 |
#endif
|
| 3997 |
+
const bool any_on_device = node->backend == GGML_BACKEND_TYPE_GPU
|
| 3998 |
+
|| (node->src[0] != nullptr && (node->src[0]->backend == GGML_BACKEND_TYPE_GPU || node->src[0]->backend == GGML_BACKEND_TYPE_GPU_SPLIT))
|
| 3999 |
+
|| (node->src[1] != nullptr && (node->src[1]->backend == GGML_BACKEND_TYPE_GPU));
|
| 4000 |
|
| 4001 |
if (ctx->disable || (!any_on_device && node->op != GGML_OP_MUL_MAT)) {
|
| 4002 |
return;
|
|
|
|
| 4215 |
}
|
| 4216 |
|
| 4217 |
static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * node, bool last_node){
|
| 4218 |
+
const bool any_on_device = node->backend == GGML_BACKEND_TYPE_GPU
|
| 4219 |
+
|| (node->src[0] != nullptr && (node->src[0]->backend == GGML_BACKEND_TYPE_GPU || node->src[0]->backend == GGML_BACKEND_TYPE_GPU_SPLIT))
|
| 4220 |
+
|| (node->src[1] != nullptr && node->src[1]->backend == GGML_BACKEND_TYPE_GPU);
|
| 4221 |
|
| 4222 |
if (ctx->disable || (!any_on_device && node->op != GGML_OP_MUL_MAT) || (node->op == GGML_OP_MUL_MAT && !any_on_device && !ggml_vk_can_mul_mat(node->src[0], node->src[1], node))) {
|
| 4223 |
return;
|
|
|
|
| 4371 |
last_node = true;
|
| 4372 |
#endif
|
| 4373 |
|
| 4374 |
+
if (node->backend == GGML_BACKEND_TYPE_CPU || last_node) {
|
| 4375 |
ggml_vk_ctx_end(ctx->compute_ctx);
|
| 4376 |
ctx->compute_ctx->exit_tensor = node;
|
| 4377 |
ctx->compute_ctx = nullptr;
|
|
|
|
| 4379 |
}
|
| 4380 |
|
| 4381 |
static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_compute_params * params, ggml_tensor * tensor){
|
| 4382 |
+
const bool any_on_device = tensor->backend == GGML_BACKEND_TYPE_GPU
|
| 4383 |
+
|| (tensor->src[0] != nullptr && (tensor->src[0]->backend == GGML_BACKEND_TYPE_GPU || tensor->src[0]->backend == GGML_BACKEND_TYPE_GPU_SPLIT))
|
| 4384 |
+
|| (tensor->src[1] != nullptr && tensor->src[1]->backend == GGML_BACKEND_TYPE_GPU);
|
| 4385 |
|
| 4386 |
if (ctx->disable || (!any_on_device && tensor->op != GGML_OP_MUL_MAT)) {
|
| 4387 |
return false;
|
|
|
|
| 4442 |
if (params->ith != 0) {
|
| 4443 |
return true;
|
| 4444 |
}
|
| 4445 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 4446 |
return true;
|
| 4447 |
}
|
| 4448 |
|
|
|
|
| 4745 |
extra->offset = (uint8_t *) tensor->data - (uint8_t *) vk_ptr_base;
|
| 4746 |
}
|
| 4747 |
|
| 4748 |
+
tensor->backend = GGML_BACKEND_TYPE_GPU;
|
| 4749 |
tensor->extra = extra;
|
| 4750 |
}
|
| 4751 |
|
|
|
|
| 4753 |
#ifdef GGML_VULKAN_DEBUG
|
| 4754 |
std::cerr << "ggml_backend_vk_buffer_set_tensor(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ")" << std::endl;
|
| 4755 |
#endif
|
| 4756 |
+
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
| 4757 |
|
| 4758 |
ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context;
|
| 4759 |
|
|
|
|
| 4768 |
#ifdef GGML_VULKAN_DEBUG
|
| 4769 |
std::cerr << "ggml_backend_vk_buffer_get_tensor(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ")" << std::endl;
|
| 4770 |
#endif
|
| 4771 |
+
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
| 4772 |
|
| 4773 |
ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context;
|
| 4774 |
|
|
|
|
| 4999 |
#endif
|
| 5000 |
ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
|
| 5001 |
GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_buffer_type(ctx->idx) || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type");
|
| 5002 |
+
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
| 5003 |
|
| 5004 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
| 5005 |
|
|
|
|
| 5020 |
#endif
|
| 5021 |
ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
|
| 5022 |
GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_buffer_type(ctx->idx) || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type");
|
| 5023 |
+
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
| 5024 |
|
| 5025 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
| 5026 |
|
|
|
|
| 5097 |
int last_node = cgraph->n_nodes - 1;
|
| 5098 |
|
| 5099 |
// If the last op in the cgraph isn't backend GPU, the command buffer doesn't get closed properly
|
| 5100 |
+
while (last_node > 0 && cgraph->nodes[last_node]->backend != GGML_BACKEND_TYPE_GPU) {
|
| 5101 |
last_node -= 1;
|
| 5102 |
}
|
| 5103 |
|
|
|
|
| 5106 |
}
|
| 5107 |
|
| 5108 |
ggml_compute_params params = {};
|
| 5109 |
+
params.type = GGML_TASK_TYPE_COMPUTE;
|
| 5110 |
params.ith = 0;
|
| 5111 |
for (int i = 0; i < cgraph->n_nodes; i++) {
|
| 5112 |
ggml_tensor * node = cgraph->nodes[i];
|
|
|
|
| 5416 |
static void ggml_vk_print_tensor(ggml_backend_vk_context * ctx, const ggml_tensor * tensor, const char * name) {
|
| 5417 |
void * tensor_data = tensor->data;
|
| 5418 |
|
| 5419 |
+
if (tensor->backend == GGML_BACKEND_TYPE_GPU) {
|
| 5420 |
const size_t tensor_size = ggml_nbytes(tensor);
|
| 5421 |
tensor_data = malloc(tensor_size);
|
| 5422 |
|
|
|
|
| 5442 |
std::vector<const ggml_tensor *> done;
|
| 5443 |
ggml_vk_print_graph_origin(tensor, done);
|
| 5444 |
|
| 5445 |
+
if (tensor->backend == GGML_BACKEND_TYPE_GPU) {
|
| 5446 |
free(tensor_data);
|
| 5447 |
}
|
| 5448 |
}
|
| 5449 |
|
| 5450 |
static void ggml_vk_check_tensor(const std::string& name, const ggml_tensor * tensor) {
|
| 5451 |
return;
|
| 5452 |
+
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_CPU);
|
| 5453 |
if (tensor->type != GGML_TYPE_F32 && tensor->type != GGML_TYPE_F16) {
|
| 5454 |
return;
|
| 5455 |
}
|
|
|
|
| 5487 |
if (params->ith != 0) {
|
| 5488 |
return;
|
| 5489 |
}
|
| 5490 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE || tensor->op == GGML_OP_TRANSPOSE) {
|
| 5491 |
return;
|
| 5492 |
}
|
| 5493 |
|
|
|
|
| 5524 |
|
| 5525 |
src0_buffer = malloc(src0_size);
|
| 5526 |
src0_clone->data = src0_buffer;
|
| 5527 |
+
if (src0->backend == GGML_BACKEND_TYPE_CPU) {
|
| 5528 |
memcpy(src0_clone->data, src0->data, src0_size);
|
| 5529 |
memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 5530 |
+
} else if (src0->backend == GGML_BACKEND_TYPE_GPU) {
|
| 5531 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src0->extra;
|
| 5532 |
uint64_t offset = extra->offset;
|
| 5533 |
if (!ggml_is_contiguous(src0) && ggml_vk_dim01_contiguous(src0)) {
|
|
|
|
| 5567 |
|
| 5568 |
src1_buffer = malloc(src1_size);
|
| 5569 |
src1_clone->data = src1_buffer;
|
| 5570 |
+
if (src1->backend == GGML_BACKEND_TYPE_CPU) {
|
| 5571 |
memcpy(src1_clone->data, src1->data, src1_size);
|
| 5572 |
memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 5573 |
+
} else if (src1->backend == GGML_BACKEND_TYPE_GPU) {
|
| 5574 |
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src1->extra;
|
| 5575 |
uint64_t offset = extra->offset;
|
| 5576 |
if (!ggml_is_contiguous(src1) && ggml_vk_dim01_contiguous(src1)) {
|
|
|
|
| 5729 |
if (params->ith != 0) {
|
| 5730 |
return;
|
| 5731 |
}
|
| 5732 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE || tensor->op == GGML_OP_TRANSPOSE) {
|
| 5733 |
return;
|
| 5734 |
}
|
| 5735 |
if (!(vk_output_tensor > 0 && vk_output_tensor == check_counter) && check_counter <= vk_skip_checks) {
|
|
|
|
| 5741 |
|
| 5742 |
void * tensor_data = tensor->data;
|
| 5743 |
|
| 5744 |
+
if (tensor->backend == GGML_BACKEND_TYPE_GPU) {
|
| 5745 |
size_t tensor_size = ggml_nbytes(tensor);
|
| 5746 |
tensor_data = malloc(tensor_size);
|
| 5747 |
|
|
|
|
| 5874 |
comp_result = nullptr;
|
| 5875 |
comp_size = 0;
|
| 5876 |
|
| 5877 |
+
if (tensor->backend == GGML_BACKEND_TYPE_GPU) {
|
| 5878 |
free(tensor_data);
|
| 5879 |
}
|
| 5880 |
}
|
ggml.c
CHANGED
|
@@ -2725,7 +2725,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
| 2725 |
}
|
| 2726 |
}
|
| 2727 |
|
| 2728 |
-
struct ggml_object * const obj_new = ggml_new_object(ctx,
|
| 2729 |
|
| 2730 |
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
|
| 2731 |
|
|
@@ -2733,7 +2733,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
| 2733 |
|
| 2734 |
*result = (struct ggml_tensor) {
|
| 2735 |
/*.type =*/ type,
|
| 2736 |
-
/*.backend =*/
|
| 2737 |
/*.buffer =*/ NULL,
|
| 2738 |
/*.ne =*/ { 1, 1, 1, 1 },
|
| 2739 |
/*.nb =*/ { 0, 0, 0, 0 },
|
|
@@ -3306,7 +3306,7 @@ struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx) {
|
|
| 3306 |
char * const mem_buffer = ctx->mem_buffer;
|
| 3307 |
|
| 3308 |
while (obj != NULL) {
|
| 3309 |
-
if (obj->type ==
|
| 3310 |
return (struct ggml_tensor *)(mem_buffer + obj->offs);
|
| 3311 |
}
|
| 3312 |
|
|
@@ -3323,7 +3323,7 @@ struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struc
|
|
| 3323 |
char * const mem_buffer = ctx->mem_buffer;
|
| 3324 |
|
| 3325 |
while (obj != NULL) {
|
| 3326 |
-
if (obj->type ==
|
| 3327 |
return (struct ggml_tensor *)(mem_buffer + obj->offs);
|
| 3328 |
}
|
| 3329 |
|
|
@@ -3339,7 +3339,7 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
|
|
| 3339 |
char * const mem_buffer = ctx->mem_buffer;
|
| 3340 |
|
| 3341 |
while (obj != NULL) {
|
| 3342 |
-
if (obj->type ==
|
| 3343 |
struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
|
| 3344 |
if (strcmp(cur->name, name) == 0) {
|
| 3345 |
return cur;
|
|
@@ -5883,7 +5883,7 @@ struct ggml_tensor * ggml_top_k(
|
|
| 5883 |
int k) {
|
| 5884 |
GGML_ASSERT(a->ne[0] >= k);
|
| 5885 |
|
| 5886 |
-
struct ggml_tensor * result = ggml_argsort(ctx, a,
|
| 5887 |
|
| 5888 |
result = ggml_view_4d(ctx, result,
|
| 5889 |
k, result->ne[1], result->ne[2], result->ne[3],
|
|
@@ -6677,7 +6677,7 @@ static void ggml_compute_forward_dup_same_cont(
|
|
| 6677 |
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
| 6678 |
GGML_ASSERT(src0->type == dst->type);
|
| 6679 |
|
| 6680 |
-
if (params->type ==
|
| 6681 |
return;
|
| 6682 |
}
|
| 6683 |
|
|
@@ -6709,7 +6709,7 @@ static void ggml_compute_forward_dup_f16(
|
|
| 6709 |
|
| 6710 |
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
| 6711 |
|
| 6712 |
-
if (params->type ==
|
| 6713 |
return;
|
| 6714 |
}
|
| 6715 |
|
|
@@ -6982,7 +6982,7 @@ static void ggml_compute_forward_dup_f32(
|
|
| 6982 |
|
| 6983 |
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
| 6984 |
|
| 6985 |
-
if (params->type ==
|
| 6986 |
return;
|
| 6987 |
}
|
| 6988 |
|
|
@@ -7235,7 +7235,7 @@ static void ggml_compute_forward_dup_bytes(
|
|
| 7235 |
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
| 7236 |
GGML_ASSERT(src0->type == dst->type);
|
| 7237 |
|
| 7238 |
-
if (params->type ==
|
| 7239 |
return;
|
| 7240 |
}
|
| 7241 |
|
|
@@ -7415,7 +7415,7 @@ static void ggml_compute_forward_add_f32(
|
|
| 7415 |
|
| 7416 |
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
| 7417 |
|
| 7418 |
-
if (params->type ==
|
| 7419 |
return;
|
| 7420 |
}
|
| 7421 |
|
|
@@ -7423,7 +7423,7 @@ static void ggml_compute_forward_add_f32(
|
|
| 7423 |
const int nth = params->nth;
|
| 7424 |
|
| 7425 |
#ifdef GGML_USE_CLBLAST
|
| 7426 |
-
if (src1->backend ==
|
| 7427 |
// TODO: OpenCL kernel support full broadcast
|
| 7428 |
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
|
| 7429 |
if (ith == 0) {
|
|
@@ -7505,7 +7505,7 @@ static void ggml_compute_forward_add_f16_f32(
|
|
| 7505 |
|
| 7506 |
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
| 7507 |
|
| 7508 |
-
if (params->type ==
|
| 7509 |
return;
|
| 7510 |
}
|
| 7511 |
|
|
@@ -7584,7 +7584,7 @@ static void ggml_compute_forward_add_f16_f16(
|
|
| 7584 |
|
| 7585 |
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
| 7586 |
|
| 7587 |
-
if (params->type ==
|
| 7588 |
return;
|
| 7589 |
}
|
| 7590 |
|
|
@@ -7640,7 +7640,7 @@ static void ggml_compute_forward_add_q_f32(
|
|
| 7640 |
|
| 7641 |
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
| 7642 |
|
| 7643 |
-
if (params->type ==
|
| 7644 |
return;
|
| 7645 |
}
|
| 7646 |
|
|
@@ -7778,7 +7778,7 @@ static void ggml_compute_forward_add1_f32(
|
|
| 7778 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 7779 |
GGML_ASSERT(ggml_is_scalar(src1));
|
| 7780 |
|
| 7781 |
-
if (params->type ==
|
| 7782 |
return;
|
| 7783 |
}
|
| 7784 |
|
|
@@ -7832,7 +7832,7 @@ static void ggml_compute_forward_add1_f16_f32(
|
|
| 7832 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 7833 |
GGML_ASSERT(ggml_is_scalar(src1));
|
| 7834 |
|
| 7835 |
-
if (params->type ==
|
| 7836 |
return;
|
| 7837 |
}
|
| 7838 |
|
|
@@ -7884,7 +7884,7 @@ static void ggml_compute_forward_add1_f16_f16(
|
|
| 7884 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 7885 |
GGML_ASSERT(ggml_is_scalar(src1));
|
| 7886 |
|
| 7887 |
-
if (params->type ==
|
| 7888 |
return;
|
| 7889 |
}
|
| 7890 |
|
|
@@ -7936,7 +7936,7 @@ static void ggml_compute_forward_add1_q_f32(
|
|
| 7936 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 7937 |
GGML_ASSERT(ggml_is_scalar(src1));
|
| 7938 |
|
| 7939 |
-
if (params->type ==
|
| 7940 |
return;
|
| 7941 |
}
|
| 7942 |
|
|
@@ -8066,7 +8066,7 @@ static void ggml_compute_forward_acc_f32(
|
|
| 8066 |
size_t offset = ((int32_t *) dst->op_params)[3];
|
| 8067 |
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
| 8068 |
|
| 8069 |
-
if (!inplace && (params->type ==
|
| 8070 |
if (params->ith != 0) {
|
| 8071 |
return;
|
| 8072 |
}
|
|
@@ -8078,7 +8078,7 @@ static void ggml_compute_forward_acc_f32(
|
|
| 8078 |
ggml_nbytes(dst));
|
| 8079 |
}
|
| 8080 |
|
| 8081 |
-
if (params->type ==
|
| 8082 |
return;
|
| 8083 |
}
|
| 8084 |
|
|
@@ -8180,7 +8180,7 @@ static void ggml_compute_forward_sub_f32(
|
|
| 8180 |
assert(params->ith == 0);
|
| 8181 |
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
| 8182 |
|
| 8183 |
-
if (params->type ==
|
| 8184 |
return;
|
| 8185 |
}
|
| 8186 |
|
|
@@ -8261,14 +8261,14 @@ static void ggml_compute_forward_mul_f32(
|
|
| 8261 |
|
| 8262 |
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
| 8263 |
|
| 8264 |
-
if (params->type ==
|
| 8265 |
return;
|
| 8266 |
}
|
| 8267 |
const int ith = params->ith;
|
| 8268 |
const int nth = params->nth;
|
| 8269 |
|
| 8270 |
#if defined(GGML_USE_CLBLAST)
|
| 8271 |
-
if (src1->backend ==
|
| 8272 |
// TODO: OpenCL kernel support full broadcast
|
| 8273 |
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
|
| 8274 |
if (ith == 0) {
|
|
@@ -8369,7 +8369,7 @@ static void ggml_compute_forward_div_f32(
|
|
| 8369 |
|
| 8370 |
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
| 8371 |
|
| 8372 |
-
if (params->type ==
|
| 8373 |
return;
|
| 8374 |
}
|
| 8375 |
|
|
@@ -8464,7 +8464,7 @@ static void ggml_compute_forward_sqr_f32(
|
|
| 8464 |
assert(params->ith == 0);
|
| 8465 |
assert(ggml_are_same_shape(src0, dst));
|
| 8466 |
|
| 8467 |
-
if (params->type ==
|
| 8468 |
return;
|
| 8469 |
}
|
| 8470 |
|
|
@@ -8510,7 +8510,7 @@ static void ggml_compute_forward_sqrt_f32(
|
|
| 8510 |
assert(params->ith == 0);
|
| 8511 |
assert(ggml_are_same_shape(src0, dst));
|
| 8512 |
|
| 8513 |
-
if (params->type ==
|
| 8514 |
return;
|
| 8515 |
}
|
| 8516 |
|
|
@@ -8556,7 +8556,7 @@ static void ggml_compute_forward_log_f32(
|
|
| 8556 |
GGML_ASSERT(params->ith == 0);
|
| 8557 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 8558 |
|
| 8559 |
-
if (params->type ==
|
| 8560 |
return;
|
| 8561 |
}
|
| 8562 |
|
|
@@ -8602,7 +8602,7 @@ static void ggml_compute_forward_sum_f32(
|
|
| 8602 |
assert(params->ith == 0);
|
| 8603 |
assert(ggml_is_scalar(dst));
|
| 8604 |
|
| 8605 |
-
if (params->type ==
|
| 8606 |
return;
|
| 8607 |
}
|
| 8608 |
|
|
@@ -8637,7 +8637,7 @@ static void ggml_compute_forward_sum_f16(
|
|
| 8637 |
assert(params->ith == 0);
|
| 8638 |
assert(ggml_is_scalar(dst));
|
| 8639 |
|
| 8640 |
-
if (params->type ==
|
| 8641 |
return;
|
| 8642 |
}
|
| 8643 |
|
|
@@ -8694,7 +8694,7 @@ static void ggml_compute_forward_sum_rows_f32(
|
|
| 8694 |
|
| 8695 |
GGML_ASSERT(params->ith == 0);
|
| 8696 |
|
| 8697 |
-
if (params->type ==
|
| 8698 |
return;
|
| 8699 |
}
|
| 8700 |
|
|
@@ -8749,7 +8749,7 @@ static void ggml_compute_forward_mean_f32(
|
|
| 8749 |
|
| 8750 |
assert(params->ith == 0);
|
| 8751 |
|
| 8752 |
-
if (params->type ==
|
| 8753 |
return;
|
| 8754 |
}
|
| 8755 |
|
|
@@ -8808,7 +8808,7 @@ static void ggml_compute_forward_argmax_f32(
|
|
| 8808 |
|
| 8809 |
assert(params->ith == 0);
|
| 8810 |
|
| 8811 |
-
if (params->type ==
|
| 8812 |
return;
|
| 8813 |
}
|
| 8814 |
|
|
@@ -8859,7 +8859,7 @@ static void ggml_compute_forward_repeat_f32(
|
|
| 8859 |
GGML_ASSERT(params->ith == 0);
|
| 8860 |
GGML_ASSERT(ggml_can_repeat(src0, dst));
|
| 8861 |
|
| 8862 |
-
if (params->type ==
|
| 8863 |
return;
|
| 8864 |
}
|
| 8865 |
|
|
@@ -8904,7 +8904,7 @@ static void ggml_compute_forward_repeat_f16(
|
|
| 8904 |
GGML_ASSERT(params->ith == 0);
|
| 8905 |
GGML_ASSERT(ggml_can_repeat(src0, dst));
|
| 8906 |
|
| 8907 |
-
if (params->type ==
|
| 8908 |
return;
|
| 8909 |
}
|
| 8910 |
|
|
@@ -8978,7 +8978,7 @@ static void ggml_compute_forward_repeat_back_f32(
|
|
| 8978 |
GGML_ASSERT(params->ith == 0);
|
| 8979 |
GGML_ASSERT(ggml_can_repeat(dst, src0));
|
| 8980 |
|
| 8981 |
-
if (params->type ==
|
| 8982 |
return;
|
| 8983 |
}
|
| 8984 |
|
|
@@ -9055,7 +9055,7 @@ static void ggml_compute_forward_concat_f32(
|
|
| 9055 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 9056 |
const struct ggml_tensor * src1 = dst->src[1];
|
| 9057 |
|
| 9058 |
-
if (params->type ==
|
| 9059 |
return;
|
| 9060 |
}
|
| 9061 |
|
|
@@ -9127,7 +9127,7 @@ static void ggml_compute_forward_abs_f32(
|
|
| 9127 |
assert(params->ith == 0);
|
| 9128 |
assert(ggml_are_same_shape(src0, dst));
|
| 9129 |
|
| 9130 |
-
if (params->type ==
|
| 9131 |
return;
|
| 9132 |
}
|
| 9133 |
|
|
@@ -9173,7 +9173,7 @@ static void ggml_compute_forward_sgn_f32(
|
|
| 9173 |
assert(params->ith == 0);
|
| 9174 |
assert(ggml_are_same_shape(src0, dst));
|
| 9175 |
|
| 9176 |
-
if (params->type ==
|
| 9177 |
return;
|
| 9178 |
}
|
| 9179 |
|
|
@@ -9219,7 +9219,7 @@ static void ggml_compute_forward_neg_f32(
|
|
| 9219 |
assert(params->ith == 0);
|
| 9220 |
assert(ggml_are_same_shape(src0, dst));
|
| 9221 |
|
| 9222 |
-
if (params->type ==
|
| 9223 |
return;
|
| 9224 |
}
|
| 9225 |
|
|
@@ -9265,7 +9265,7 @@ static void ggml_compute_forward_step_f32(
|
|
| 9265 |
assert(params->ith == 0);
|
| 9266 |
assert(ggml_are_same_shape(src0, dst));
|
| 9267 |
|
| 9268 |
-
if (params->type ==
|
| 9269 |
return;
|
| 9270 |
}
|
| 9271 |
|
|
@@ -9311,7 +9311,7 @@ static void ggml_compute_forward_tanh_f32(
|
|
| 9311 |
assert(params->ith == 0);
|
| 9312 |
assert(ggml_are_same_shape(src0, dst));
|
| 9313 |
|
| 9314 |
-
if (params->type ==
|
| 9315 |
return;
|
| 9316 |
}
|
| 9317 |
|
|
@@ -9357,7 +9357,7 @@ static void ggml_compute_forward_elu_f32(
|
|
| 9357 |
assert(params->ith == 0);
|
| 9358 |
assert(ggml_are_same_shape(src0, dst));
|
| 9359 |
|
| 9360 |
-
if (params->type ==
|
| 9361 |
return;
|
| 9362 |
}
|
| 9363 |
|
|
@@ -9403,7 +9403,7 @@ static void ggml_compute_forward_relu_f32(
|
|
| 9403 |
assert(params->ith == 0);
|
| 9404 |
assert(ggml_are_same_shape(src0, dst));
|
| 9405 |
|
| 9406 |
-
if (params->type ==
|
| 9407 |
return;
|
| 9408 |
}
|
| 9409 |
|
|
@@ -9450,7 +9450,7 @@ static void ggml_compute_forward_gelu_f32(
|
|
| 9450 |
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
| 9451 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 9452 |
|
| 9453 |
-
if (params->type ==
|
| 9454 |
return;
|
| 9455 |
}
|
| 9456 |
|
|
@@ -9513,7 +9513,7 @@ static void ggml_compute_forward_gelu_quick_f32(
|
|
| 9513 |
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
| 9514 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 9515 |
|
| 9516 |
-
if (params->type ==
|
| 9517 |
return;
|
| 9518 |
}
|
| 9519 |
|
|
@@ -9576,7 +9576,7 @@ static void ggml_compute_forward_silu_f32(
|
|
| 9576 |
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
| 9577 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 9578 |
|
| 9579 |
-
if (params->type ==
|
| 9580 |
return;
|
| 9581 |
}
|
| 9582 |
|
|
@@ -9637,7 +9637,7 @@ static void ggml_compute_forward_leaky_relu_f32(
|
|
| 9637 |
assert(params->ith == 0);
|
| 9638 |
assert(ggml_are_same_shape(src0, dst));
|
| 9639 |
|
| 9640 |
-
if (params->type ==
|
| 9641 |
return;
|
| 9642 |
}
|
| 9643 |
|
|
@@ -9690,7 +9690,7 @@ static void ggml_compute_forward_silu_back_f32(
|
|
| 9690 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 9691 |
GGML_ASSERT(ggml_are_same_shape(src0, grad));
|
| 9692 |
|
| 9693 |
-
if (params->type ==
|
| 9694 |
return;
|
| 9695 |
}
|
| 9696 |
|
|
@@ -9752,7 +9752,7 @@ static void ggml_compute_forward_hardswish_f32(
|
|
| 9752 |
assert(params->ith == 0);
|
| 9753 |
assert(ggml_are_same_shape(src0, dst));
|
| 9754 |
|
| 9755 |
-
if (params->type ==
|
| 9756 |
return;
|
| 9757 |
}
|
| 9758 |
|
|
@@ -9795,7 +9795,7 @@ static void ggml_compute_forward_hardsigmoid_f32(
|
|
| 9795 |
assert(params->ith == 0);
|
| 9796 |
assert(ggml_are_same_shape(src0, dst));
|
| 9797 |
|
| 9798 |
-
if (params->type ==
|
| 9799 |
return;
|
| 9800 |
}
|
| 9801 |
|
|
@@ -9841,7 +9841,7 @@ static void ggml_compute_forward_norm_f32(
|
|
| 9841 |
|
| 9842 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 9843 |
|
| 9844 |
-
if (params->type ==
|
| 9845 |
return;
|
| 9846 |
}
|
| 9847 |
|
|
@@ -9916,7 +9916,7 @@ static void ggml_compute_forward_rms_norm_f32(
|
|
| 9916 |
|
| 9917 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 9918 |
|
| 9919 |
-
if (params->type ==
|
| 9920 |
return;
|
| 9921 |
}
|
| 9922 |
|
|
@@ -9987,7 +9987,7 @@ static void ggml_compute_forward_rms_norm_back_f32(
|
|
| 9987 |
|
| 9988 |
GGML_ASSERT(ggml_are_same_shape(src0, dst) && ggml_are_same_shape(src0, src1));
|
| 9989 |
|
| 9990 |
-
if (params->type ==
|
| 9991 |
return;
|
| 9992 |
}
|
| 9993 |
|
|
@@ -10165,7 +10165,7 @@ static void ggml_compute_forward_group_norm_f32(
|
|
| 10165 |
|
| 10166 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 10167 |
|
| 10168 |
-
if (params->type ==
|
| 10169 |
return;
|
| 10170 |
}
|
| 10171 |
|
|
@@ -10332,7 +10332,7 @@ static void ggml_compute_forward_mul_mat(
|
|
| 10332 |
|
| 10333 |
#if defined(GGML_USE_CLBLAST)
|
| 10334 |
if (ggml_cl_can_mul_mat(src0, src1, dst)) {
|
| 10335 |
-
if (params->ith == 0 && params->type ==
|
| 10336 |
ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
|
| 10337 |
}
|
| 10338 |
return;
|
|
@@ -10345,7 +10345,7 @@ static void ggml_compute_forward_mul_mat(
|
|
| 10345 |
const size_t desired_wsize = ne13*ne12*ne_plane*sizeof(float);
|
| 10346 |
UNUSED(desired_wsize);
|
| 10347 |
|
| 10348 |
-
if (params->type ==
|
| 10349 |
if (type != GGML_TYPE_F32) {
|
| 10350 |
assert(params->wsize >= desired_wsize);
|
| 10351 |
// parallelize by src0 rows
|
|
@@ -10368,7 +10368,7 @@ static void ggml_compute_forward_mul_mat(
|
|
| 10368 |
return;
|
| 10369 |
}
|
| 10370 |
|
| 10371 |
-
if (params->type ==
|
| 10372 |
return;
|
| 10373 |
}
|
| 10374 |
|
|
@@ -10406,7 +10406,7 @@ static void ggml_compute_forward_mul_mat(
|
|
| 10406 |
}
|
| 10407 |
#endif
|
| 10408 |
|
| 10409 |
-
if (params->type ==
|
| 10410 |
if (ith != 0) {
|
| 10411 |
return;
|
| 10412 |
}
|
|
@@ -10430,7 +10430,7 @@ static void ggml_compute_forward_mul_mat(
|
|
| 10430 |
return;
|
| 10431 |
}
|
| 10432 |
|
| 10433 |
-
if (params->type ==
|
| 10434 |
return;
|
| 10435 |
}
|
| 10436 |
|
|
@@ -10587,7 +10587,7 @@ static void ggml_compute_forward_mul_mat_id(
|
|
| 10587 |
|
| 10588 |
#define MMID_MATRIX_ROW(row_id, i1) matrix_rows[(row_id)*ne11 + (i1)]
|
| 10589 |
|
| 10590 |
-
if (params->type ==
|
| 10591 |
if (ith != 0) {
|
| 10592 |
return;
|
| 10593 |
}
|
|
@@ -10624,7 +10624,7 @@ static void ggml_compute_forward_mul_mat_id(
|
|
| 10624 |
return;
|
| 10625 |
}
|
| 10626 |
|
| 10627 |
-
if (params->type ==
|
| 10628 |
return;
|
| 10629 |
}
|
| 10630 |
|
|
@@ -10772,7 +10772,7 @@ static void ggml_compute_forward_out_prod_f32(
|
|
| 10772 |
(ggml_is_contiguous(src1) || ggml_is_transposed(src1));
|
| 10773 |
#endif
|
| 10774 |
|
| 10775 |
-
if (params->type ==
|
| 10776 |
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) // gemm beta will zero dst
|
| 10777 |
if (use_blas) {
|
| 10778 |
return;
|
|
@@ -10785,7 +10785,7 @@ static void ggml_compute_forward_out_prod_f32(
|
|
| 10785 |
return;
|
| 10786 |
}
|
| 10787 |
|
| 10788 |
-
if (params->type ==
|
| 10789 |
return;
|
| 10790 |
}
|
| 10791 |
|
|
@@ -10965,7 +10965,7 @@ static void ggml_compute_forward_out_prod_q_f32(
|
|
| 10965 |
// TODO: #if defined(GGML_USE_CUBLAS) ggml_cuda_out_prod
|
| 10966 |
// TODO: #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
|
| 10967 |
|
| 10968 |
-
if (params->type ==
|
| 10969 |
if (ith != 0) {
|
| 10970 |
return;
|
| 10971 |
}
|
|
@@ -10973,7 +10973,7 @@ static void ggml_compute_forward_out_prod_q_f32(
|
|
| 10973 |
return;
|
| 10974 |
}
|
| 10975 |
|
| 10976 |
-
if (params->type ==
|
| 10977 |
return;
|
| 10978 |
}
|
| 10979 |
|
|
@@ -11091,7 +11091,7 @@ static void ggml_compute_forward_scale_f32(
|
|
| 11091 |
GGML_ASSERT(ggml_is_contiguous(dst));
|
| 11092 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 11093 |
|
| 11094 |
-
if (params->type ==
|
| 11095 |
return;
|
| 11096 |
}
|
| 11097 |
|
|
@@ -11163,7 +11163,7 @@ static void ggml_compute_forward_set_f32(
|
|
| 11163 |
size_t offset = ((int32_t *) dst->op_params)[3];
|
| 11164 |
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
| 11165 |
|
| 11166 |
-
if (!inplace && (params->type ==
|
| 11167 |
if (params->ith != 0) {
|
| 11168 |
return;
|
| 11169 |
}
|
|
@@ -11175,7 +11175,7 @@ static void ggml_compute_forward_set_f32(
|
|
| 11175 |
ggml_nbytes(dst));
|
| 11176 |
}
|
| 11177 |
|
| 11178 |
-
if (params->type ==
|
| 11179 |
return;
|
| 11180 |
}
|
| 11181 |
|
|
@@ -11323,7 +11323,7 @@ static void ggml_compute_forward_get_rows_q(
|
|
| 11323 |
|
| 11324 |
assert(params->ith == 0);
|
| 11325 |
|
| 11326 |
-
if (params->type ==
|
| 11327 |
return;
|
| 11328 |
}
|
| 11329 |
|
|
@@ -11363,7 +11363,7 @@ static void ggml_compute_forward_get_rows_f16(
|
|
| 11363 |
|
| 11364 |
assert(params->ith == 0);
|
| 11365 |
|
| 11366 |
-
if (params->type ==
|
| 11367 |
return;
|
| 11368 |
}
|
| 11369 |
|
|
@@ -11400,7 +11400,7 @@ static void ggml_compute_forward_get_rows_f32(
|
|
| 11400 |
|
| 11401 |
assert(params->ith == 0);
|
| 11402 |
|
| 11403 |
-
if (params->type ==
|
| 11404 |
return;
|
| 11405 |
}
|
| 11406 |
|
|
@@ -11503,14 +11503,14 @@ static void ggml_compute_forward_get_rows_back_f32_f16(
|
|
| 11503 |
|
| 11504 |
// ggml_compute_forward_dup_same_cont(params, opt0, dst);
|
| 11505 |
|
| 11506 |
-
if (params->type ==
|
| 11507 |
if (params->ith != 0) {
|
| 11508 |
return;
|
| 11509 |
}
|
| 11510 |
memset(dst->data, 0, ggml_nbytes(dst));
|
| 11511 |
}
|
| 11512 |
|
| 11513 |
-
if (params->type ==
|
| 11514 |
return;
|
| 11515 |
}
|
| 11516 |
|
|
@@ -11542,14 +11542,14 @@ static void ggml_compute_forward_get_rows_back_f32(
|
|
| 11542 |
|
| 11543 |
// ggml_compute_forward_dup_same_cont(params, opt0, dst);
|
| 11544 |
|
| 11545 |
-
if (params->type ==
|
| 11546 |
if (params->ith != 0) {
|
| 11547 |
return;
|
| 11548 |
}
|
| 11549 |
memset(dst->data, 0, ggml_nbytes(dst));
|
| 11550 |
}
|
| 11551 |
|
| 11552 |
-
if (params->type ==
|
| 11553 |
return;
|
| 11554 |
}
|
| 11555 |
|
|
@@ -11619,7 +11619,7 @@ static void ggml_compute_forward_diag_f32(
|
|
| 11619 |
|
| 11620 |
GGML_ASSERT(params->ith == 0);
|
| 11621 |
|
| 11622 |
-
if (params->type ==
|
| 11623 |
return;
|
| 11624 |
}
|
| 11625 |
|
|
@@ -11688,7 +11688,7 @@ static void ggml_compute_forward_diag_mask_f32(
|
|
| 11688 |
|
| 11689 |
GGML_ASSERT(n_past >= 0);
|
| 11690 |
|
| 11691 |
-
if (!inplace && (params->type ==
|
| 11692 |
if (ith != 0) {
|
| 11693 |
return;
|
| 11694 |
}
|
|
@@ -11702,7 +11702,7 @@ static void ggml_compute_forward_diag_mask_f32(
|
|
| 11702 |
ggml_nbytes(dst));
|
| 11703 |
}
|
| 11704 |
|
| 11705 |
-
if (params->type ==
|
| 11706 |
return;
|
| 11707 |
}
|
| 11708 |
|
|
@@ -11776,7 +11776,7 @@ static void ggml_compute_forward_soft_max_f32(
|
|
| 11776 |
assert(ggml_is_contiguous(dst));
|
| 11777 |
assert(ggml_are_same_shape(src0, dst));
|
| 11778 |
|
| 11779 |
-
if (params->type ==
|
| 11780 |
return;
|
| 11781 |
}
|
| 11782 |
|
|
@@ -11914,7 +11914,7 @@ static void ggml_compute_forward_soft_max_back_f32(
|
|
| 11914 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 11915 |
GGML_ASSERT(ggml_are_same_shape(src1, dst));
|
| 11916 |
|
| 11917 |
-
if (params->type ==
|
| 11918 |
return;
|
| 11919 |
}
|
| 11920 |
|
|
@@ -12008,7 +12008,7 @@ static void ggml_compute_forward_alibi_f32(
|
|
| 12008 |
|
| 12009 |
assert(params->ith == 0);
|
| 12010 |
|
| 12011 |
-
if (params->type ==
|
| 12012 |
return;
|
| 12013 |
}
|
| 12014 |
|
|
@@ -12067,7 +12067,7 @@ static void ggml_compute_forward_alibi_f16(
|
|
| 12067 |
|
| 12068 |
assert(params->ith == 0);
|
| 12069 |
|
| 12070 |
-
if (params->type ==
|
| 12071 |
return;
|
| 12072 |
}
|
| 12073 |
|
|
@@ -12174,7 +12174,7 @@ static void ggml_compute_forward_clamp_f32(
|
|
| 12174 |
|
| 12175 |
assert(params->ith == 0);
|
| 12176 |
|
| 12177 |
-
if (params->type ==
|
| 12178 |
return;
|
| 12179 |
}
|
| 12180 |
|
|
@@ -12314,7 +12314,7 @@ static void ggml_compute_forward_rope_f32(
|
|
| 12314 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 12315 |
const struct ggml_tensor * src1 = dst->src[1];
|
| 12316 |
|
| 12317 |
-
if (params->type ==
|
| 12318 |
return;
|
| 12319 |
}
|
| 12320 |
|
|
@@ -12492,7 +12492,7 @@ static void ggml_compute_forward_rope_f16(
|
|
| 12492 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 12493 |
const struct ggml_tensor * src1 = dst->src[1];
|
| 12494 |
|
| 12495 |
-
if (params->type ==
|
| 12496 |
return;
|
| 12497 |
}
|
| 12498 |
|
|
@@ -12723,7 +12723,7 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
|
| 12723 |
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
| 12724 |
GGML_ASSERT(nb10 == sizeof(float));
|
| 12725 |
|
| 12726 |
-
if (params->type ==
|
| 12727 |
if (ith != 0) {
|
| 12728 |
return;
|
| 12729 |
}
|
|
@@ -12763,7 +12763,7 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
|
| 12763 |
return;
|
| 12764 |
}
|
| 12765 |
|
| 12766 |
-
if (params->type ==
|
| 12767 |
return;
|
| 12768 |
}
|
| 12769 |
|
|
@@ -12822,7 +12822,7 @@ static void ggml_compute_forward_conv_transpose_1d_f32(
|
|
| 12822 |
GGML_ASSERT(nb00 == sizeof(float));
|
| 12823 |
GGML_ASSERT(nb10 == sizeof(float));
|
| 12824 |
|
| 12825 |
-
if (params->type ==
|
| 12826 |
if (ith != 0) {
|
| 12827 |
return;
|
| 12828 |
}
|
|
@@ -12862,7 +12862,7 @@ static void ggml_compute_forward_conv_transpose_1d_f32(
|
|
| 12862 |
return;
|
| 12863 |
}
|
| 12864 |
|
| 12865 |
-
if (params->type ==
|
| 12866 |
return;
|
| 12867 |
}
|
| 12868 |
|
|
@@ -12966,11 +12966,11 @@ static void ggml_compute_forward_im2col_f32(
|
|
| 12966 |
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
| 12967 |
GGML_ASSERT(nb10 == sizeof(float));
|
| 12968 |
|
| 12969 |
-
if (params->type ==
|
| 12970 |
return;
|
| 12971 |
}
|
| 12972 |
|
| 12973 |
-
if (params->type ==
|
| 12974 |
return;
|
| 12975 |
}
|
| 12976 |
|
|
@@ -13054,11 +13054,11 @@ static void ggml_compute_forward_im2col_f16(
|
|
| 13054 |
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
| 13055 |
GGML_ASSERT(nb10 == sizeof(float));
|
| 13056 |
|
| 13057 |
-
if (params->type ==
|
| 13058 |
return;
|
| 13059 |
}
|
| 13060 |
|
| 13061 |
-
if (params->type ==
|
| 13062 |
return;
|
| 13063 |
}
|
| 13064 |
|
|
@@ -13140,7 +13140,7 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
| 13140 |
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
| 13141 |
GGML_ASSERT(nb10 == sizeof(float));
|
| 13142 |
|
| 13143 |
-
if (params->type ==
|
| 13144 |
if (ith != 0) {
|
| 13145 |
return;
|
| 13146 |
}
|
|
@@ -13182,7 +13182,7 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
| 13182 |
return;
|
| 13183 |
}
|
| 13184 |
|
| 13185 |
-
if (params->type ==
|
| 13186 |
return;
|
| 13187 |
}
|
| 13188 |
|
|
@@ -13234,7 +13234,7 @@ static void ggml_compute_forward_pool_1d_sk_p0(
|
|
| 13234 |
assert(src->type == GGML_TYPE_F32);
|
| 13235 |
assert(params->ith == 0);
|
| 13236 |
|
| 13237 |
-
if (params->type ==
|
| 13238 |
return;
|
| 13239 |
}
|
| 13240 |
|
|
@@ -13303,7 +13303,7 @@ static void ggml_compute_forward_pool_2d(
|
|
| 13303 |
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
| 13304 |
GGML_ASSERT(params->ith == 0);
|
| 13305 |
|
| 13306 |
-
if (params->type ==
|
| 13307 |
return;
|
| 13308 |
}
|
| 13309 |
|
|
@@ -13376,7 +13376,7 @@ static void ggml_compute_forward_upscale_f32(
|
|
| 13376 |
|
| 13377 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 13378 |
|
| 13379 |
-
if (params->type ==
|
| 13380 |
return;
|
| 13381 |
}
|
| 13382 |
|
|
@@ -13436,7 +13436,7 @@ static void ggml_compute_forward_pad_f32(
|
|
| 13436 |
|
| 13437 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 13438 |
|
| 13439 |
-
if (params->type ==
|
| 13440 |
return;
|
| 13441 |
}
|
| 13442 |
|
|
@@ -13497,7 +13497,7 @@ static void ggml_compute_forward_argsort_f32(
|
|
| 13497 |
|
| 13498 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 13499 |
|
| 13500 |
-
if (params->type ==
|
| 13501 |
return;
|
| 13502 |
}
|
| 13503 |
|
|
@@ -13523,8 +13523,8 @@ static void ggml_compute_forward_argsort_f32(
|
|
| 13523 |
// C doesn't have a functional sort, so we do a bubble sort instead
|
| 13524 |
for (int64_t j = 0; j < ne0; j++) {
|
| 13525 |
for (int64_t k = j + 1; k < ne0; k++) {
|
| 13526 |
-
if ((order ==
|
| 13527 |
-
(order ==
|
| 13528 |
int32_t tmp = dst_data[j];
|
| 13529 |
dst_data[j] = dst_data[k];
|
| 13530 |
dst_data[k] = tmp;
|
|
@@ -13607,11 +13607,11 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
| 13607 |
GGML_ASSERT(nb1 <= nb2);
|
| 13608 |
GGML_ASSERT(nb2 <= nb3);
|
| 13609 |
|
| 13610 |
-
if (params->type ==
|
| 13611 |
return;
|
| 13612 |
}
|
| 13613 |
|
| 13614 |
-
if (params->type ==
|
| 13615 |
return;
|
| 13616 |
}
|
| 13617 |
|
|
@@ -13799,11 +13799,11 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
| 13799 |
GGML_ASSERT(nb1 <= nb2);
|
| 13800 |
GGML_ASSERT(nb2 <= nb3);
|
| 13801 |
|
| 13802 |
-
if (params->type ==
|
| 13803 |
return;
|
| 13804 |
}
|
| 13805 |
|
| 13806 |
-
if (params->type ==
|
| 13807 |
return;
|
| 13808 |
}
|
| 13809 |
|
|
@@ -14058,11 +14058,11 @@ static void ggml_compute_forward_flash_ff_f16(
|
|
| 14058 |
GGML_ASSERT(nb1 <= nb2);
|
| 14059 |
GGML_ASSERT(nb2 <= nb3);
|
| 14060 |
|
| 14061 |
-
if (params->type ==
|
| 14062 |
return;
|
| 14063 |
}
|
| 14064 |
|
| 14065 |
-
if (params->type ==
|
| 14066 |
return;
|
| 14067 |
}
|
| 14068 |
|
|
@@ -14217,14 +14217,14 @@ static void ggml_compute_forward_flash_attn_back_f32(
|
|
| 14217 |
GGML_ASSERT(nb1 <= nb2);
|
| 14218 |
GGML_ASSERT(nb2 <= nb3);
|
| 14219 |
|
| 14220 |
-
if (params->type ==
|
| 14221 |
if (ith == 0) {
|
| 14222 |
memset(dst->data, 0, nb0*ne0*ne1*ne2*ne3);
|
| 14223 |
}
|
| 14224 |
return;
|
| 14225 |
}
|
| 14226 |
|
| 14227 |
-
if (params->type ==
|
| 14228 |
return;
|
| 14229 |
}
|
| 14230 |
|
|
@@ -14540,7 +14540,7 @@ static void ggml_compute_forward_win_part_f32(
|
|
| 14540 |
|
| 14541 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 14542 |
|
| 14543 |
-
if (params->type ==
|
| 14544 |
return;
|
| 14545 |
}
|
| 14546 |
|
|
@@ -14606,7 +14606,7 @@ static void ggml_compute_forward_win_unpart_f32(
|
|
| 14606 |
|
| 14607 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 14608 |
|
| 14609 |
-
if (params->type ==
|
| 14610 |
return;
|
| 14611 |
}
|
| 14612 |
|
|
@@ -14734,7 +14734,7 @@ static void ggml_compute_forward_get_rel_pos_f16(
|
|
| 14734 |
|
| 14735 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 14736 |
|
| 14737 |
-
if (params->type ==
|
| 14738 |
return;
|
| 14739 |
}
|
| 14740 |
|
|
@@ -14786,14 +14786,14 @@ static void ggml_compute_forward_add_rel_pos_f32(
|
|
| 14786 |
const struct ggml_tensor * src2 = dst->src[2];
|
| 14787 |
|
| 14788 |
const bool inplace = (bool) ((int32_t *) dst->op_params)[0];
|
| 14789 |
-
if (!inplace && params->type ==
|
| 14790 |
if (params->ith != 0) {
|
| 14791 |
return;
|
| 14792 |
}
|
| 14793 |
memcpy((char *) dst->data, (char *) src0->data, ggml_nbytes(dst));
|
| 14794 |
return;
|
| 14795 |
}
|
| 14796 |
-
if (params->type ==
|
| 14797 |
return;
|
| 14798 |
}
|
| 14799 |
|
|
@@ -14875,7 +14875,7 @@ static void ggml_compute_forward_map_unary_f32(
|
|
| 14875 |
|
| 14876 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 14877 |
|
| 14878 |
-
if (params->type ==
|
| 14879 |
return;
|
| 14880 |
}
|
| 14881 |
|
|
@@ -14924,7 +14924,7 @@ static void ggml_compute_forward_map_binary_f32(
|
|
| 14924 |
assert(params->ith == 0);
|
| 14925 |
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
| 14926 |
|
| 14927 |
-
if (params->type ==
|
| 14928 |
return;
|
| 14929 |
}
|
| 14930 |
|
|
@@ -14973,7 +14973,7 @@ static void ggml_compute_forward_map_custom1_f32(
|
|
| 14973 |
|
| 14974 |
assert(params->ith == 0);
|
| 14975 |
|
| 14976 |
-
if (params->type ==
|
| 14977 |
return;
|
| 14978 |
}
|
| 14979 |
|
|
@@ -14992,7 +14992,7 @@ static void ggml_compute_forward_map_custom2_f32(
|
|
| 14992 |
|
| 14993 |
assert(params->ith == 0);
|
| 14994 |
|
| 14995 |
-
if (params->type ==
|
| 14996 |
return;
|
| 14997 |
}
|
| 14998 |
|
|
@@ -15012,7 +15012,7 @@ static void ggml_compute_forward_map_custom3_f32(
|
|
| 15012 |
|
| 15013 |
assert(params->ith == 0);
|
| 15014 |
|
| 15015 |
-
if (params->type ==
|
| 15016 |
return;
|
| 15017 |
}
|
| 15018 |
|
|
@@ -15027,7 +15027,7 @@ static void ggml_compute_forward_map_custom1(
|
|
| 15027 |
|
| 15028 |
const struct ggml_tensor * a = dst->src[0];
|
| 15029 |
|
| 15030 |
-
if (params->type ==
|
| 15031 |
return;
|
| 15032 |
}
|
| 15033 |
|
|
@@ -15045,7 +15045,7 @@ static void ggml_compute_forward_map_custom2(
|
|
| 15045 |
const struct ggml_tensor * a = dst->src[0];
|
| 15046 |
const struct ggml_tensor * b = dst->src[1];
|
| 15047 |
|
| 15048 |
-
if (params->type ==
|
| 15049 |
return;
|
| 15050 |
}
|
| 15051 |
|
|
@@ -15064,7 +15064,7 @@ static void ggml_compute_forward_map_custom3(
|
|
| 15064 |
const struct ggml_tensor * b = dst->src[1];
|
| 15065 |
const struct ggml_tensor * c = dst->src[2];
|
| 15066 |
|
| 15067 |
-
if (params->type ==
|
| 15068 |
return;
|
| 15069 |
}
|
| 15070 |
|
|
@@ -15098,14 +15098,14 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
|
|
| 15098 |
|
| 15099 |
GGML_ASSERT(params->wsize >= sizeof(float) * (nth + nth * nc));
|
| 15100 |
|
| 15101 |
-
if (params->type ==
|
| 15102 |
if (ith == 0) {
|
| 15103 |
memset(sums, 0, sizeof(float) * (nth + nth * nc));
|
| 15104 |
}
|
| 15105 |
return;
|
| 15106 |
}
|
| 15107 |
|
| 15108 |
-
if (params->type ==
|
| 15109 |
if (ith == 0) {
|
| 15110 |
float * dp = (float *) dst->data;
|
| 15111 |
ggml_vec_sum_f32(nth, dp, sums);
|
|
@@ -15220,7 +15220,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|
| 15220 |
const int64_t ith = params->ith;
|
| 15221 |
const int64_t nth = params->nth;
|
| 15222 |
|
| 15223 |
-
if (params->type ==
|
| 15224 |
return;
|
| 15225 |
}
|
| 15226 |
|
|
@@ -15327,8 +15327,8 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
| 15327 |
if (skip_cpu) {
|
| 15328 |
return;
|
| 15329 |
}
|
| 15330 |
-
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend ==
|
| 15331 |
-
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend ==
|
| 15332 |
#elif defined(GGML_USE_VULKAN)
|
| 15333 |
const bool skip_cpu = ggml_vk_compute_forward_cpu_assist(params, tensor);
|
| 15334 |
#ifdef GGML_VULKAN_CHECK_RESULTS
|
|
@@ -15339,8 +15339,8 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
| 15339 |
if (skip_cpu) {
|
| 15340 |
return;
|
| 15341 |
}
|
| 15342 |
-
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend ==
|
| 15343 |
-
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend ==
|
| 15344 |
#endif // GGML_USE_CUBLAS
|
| 15345 |
|
| 15346 |
#ifdef GGML_USE_SYCL
|
|
@@ -16886,7 +16886,7 @@ size_t ggml_graph_overhead(void) {
|
|
| 16886 |
|
| 16887 |
struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
|
| 16888 |
const size_t obj_size = ggml_graph_nbytes(size, grads);
|
| 16889 |
-
struct ggml_object * obj = ggml_new_object(ctx,
|
| 16890 |
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
|
| 16891 |
|
| 16892 |
struct ggml_tensor ** data_start = (struct ggml_tensor **) (cgraph + 1);
|
|
@@ -17433,7 +17433,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
| 17433 |
set_numa_thread_affinity(state->ith);
|
| 17434 |
|
| 17435 |
int node_n = -1;
|
| 17436 |
-
int task_phase =
|
| 17437 |
|
| 17438 |
while (true) {
|
| 17439 |
if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) {
|
|
@@ -17445,7 +17445,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
| 17445 |
// all other threads are finished and spinning
|
| 17446 |
// do finalize and init here so we don't have synchronize again
|
| 17447 |
struct ggml_compute_params params = {
|
| 17448 |
-
/*.type =*/
|
| 17449 |
/*.ith =*/ 0,
|
| 17450 |
/*.nth =*/ 0,
|
| 17451 |
/*.wsize =*/ cplan->work_size,
|
|
@@ -17476,17 +17476,17 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
| 17476 |
if (n_tasks == 1) {
|
| 17477 |
/* INIT */
|
| 17478 |
if (GGML_OP_HAS_INIT[node->op]) {
|
| 17479 |
-
params.type =
|
| 17480 |
ggml_compute_forward(¶ms, node);
|
| 17481 |
}
|
| 17482 |
|
| 17483 |
// TODO: maybe push node_n to the atomic but if other threads see n_tasks is 1,
|
| 17484 |
// they do something more efficient than spinning (?)
|
| 17485 |
-
params.type =
|
| 17486 |
ggml_compute_forward(¶ms, node);
|
| 17487 |
|
| 17488 |
if (GGML_OP_HAS_FINALIZE[node->op]) {
|
| 17489 |
-
params.type =
|
| 17490 |
ggml_compute_forward(¶ms, node);
|
| 17491 |
}
|
| 17492 |
|
|
@@ -17500,7 +17500,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
| 17500 |
}
|
| 17501 |
}
|
| 17502 |
|
| 17503 |
-
task_phase =
|
| 17504 |
atomic_store(&state->shared->n_active, n_threads);
|
| 17505 |
atomic_store(&state->shared->node_n, node_n);
|
| 17506 |
atomic_store(&state->shared->node_task, task_phase);
|
|
@@ -17517,7 +17517,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
| 17517 |
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
| 17518 |
|
| 17519 |
struct ggml_compute_params params = {
|
| 17520 |
-
/*.type =*/
|
| 17521 |
/*.ith =*/ state->ith,
|
| 17522 |
/*.nth =*/ n_tasks,
|
| 17523 |
/*.wsize =*/ cplan->work_size,
|
|
@@ -17531,7 +17531,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
| 17531 |
}
|
| 17532 |
|
| 17533 |
if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
|
| 17534 |
-
task_phase =
|
| 17535 |
atomic_store(&state->shared->n_active, n_threads);
|
| 17536 |
atomic_store(&state->shared->node_task, task_phase);
|
| 17537 |
}
|
|
@@ -17546,12 +17546,12 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
| 17546 |
}
|
| 17547 |
|
| 17548 |
if (state->ith < n_tasks) {
|
| 17549 |
-
params.type =
|
| 17550 |
ggml_compute_forward(¶ms, node);
|
| 17551 |
}
|
| 17552 |
|
| 17553 |
if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
|
| 17554 |
-
task_phase =
|
| 17555 |
atomic_store(&state->shared->n_active, n_threads);
|
| 17556 |
atomic_store(&state->shared->node_task, task_phase);
|
| 17557 |
}
|
|
@@ -17787,7 +17787,7 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
| 17787 |
/*.n_threads =*/ n_threads,
|
| 17788 |
/*.n_active =*/ n_threads,
|
| 17789 |
/*.node_n =*/ -1,
|
| 17790 |
-
/*.node_task =*/
|
| 17791 |
/*.abort_callback =*/ NULL,
|
| 17792 |
/*.abort_callback_data =*/ NULL,
|
| 17793 |
};
|
|
@@ -17855,7 +17855,7 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
| 17855 |
void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
|
| 17856 |
struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);
|
| 17857 |
|
| 17858 |
-
struct ggml_object * obj = ggml_new_object(ctx,
|
| 17859 |
|
| 17860 |
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
| 17861 |
|
|
@@ -18663,7 +18663,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
| 18663 |
float * pf = params.past > 0 ? opt->adam.pf->data : NULL; // past function values
|
| 18664 |
|
| 18665 |
struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads);
|
| 18666 |
-
struct ggml_object * obj = ggml_new_object(ctx,
|
| 18667 |
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
| 18668 |
|
| 18669 |
bool cancel = false;
|
|
@@ -18675,7 +18675,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
| 18675 |
if (callback) {
|
| 18676 |
callback(callback_data, accum_step, &sched, &cancel);
|
| 18677 |
if (cancel) {
|
| 18678 |
-
return
|
| 18679 |
}
|
| 18680 |
}
|
| 18681 |
// ggml_graph_reset (gf);
|
|
@@ -18766,7 +18766,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
| 18766 |
if (callback) {
|
| 18767 |
callback(callback_data, accum_step, &sched, &cancel);
|
| 18768 |
if (cancel) {
|
| 18769 |
-
return
|
| 18770 |
}
|
| 18771 |
}
|
| 18772 |
// ggml_graph_reset (gf);
|
|
@@ -18783,7 +18783,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
| 18783 |
if (fabsf(fx - fx_prev[0])/fx < params.adam.eps_f) {
|
| 18784 |
GGML_PRINT_DEBUG("converged\n");
|
| 18785 |
|
| 18786 |
-
return
|
| 18787 |
}
|
| 18788 |
|
| 18789 |
// delta-based convergence test
|
|
@@ -18793,7 +18793,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
| 18793 |
const float rate = (pf[(iter0 + t)%params.past] - fx)/fx;
|
| 18794 |
|
| 18795 |
if (fabsf(rate) < params.delta) {
|
| 18796 |
-
return
|
| 18797 |
}
|
| 18798 |
}
|
| 18799 |
|
|
@@ -18809,7 +18809,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
| 18809 |
++n_no_improvement[0];
|
| 18810 |
|
| 18811 |
if (n_no_improvement[0] >= params.max_no_improvement) {
|
| 18812 |
-
return
|
| 18813 |
}
|
| 18814 |
}
|
| 18815 |
}
|
|
@@ -18827,7 +18827,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
| 18827 |
}
|
| 18828 |
}
|
| 18829 |
|
| 18830 |
-
return
|
| 18831 |
}
|
| 18832 |
|
| 18833 |
//
|
|
@@ -18908,7 +18908,7 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
| 18908 |
float sched = 0;
|
| 18909 |
callback(callback_data, accum_step, &sched, cancel);
|
| 18910 |
if (*cancel) {
|
| 18911 |
-
return
|
| 18912 |
}
|
| 18913 |
}
|
| 18914 |
// ggml_graph_reset (gf);
|
|
@@ -18981,7 +18981,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
| 18981 |
if (params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_WOLFE ||
|
| 18982 |
params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE) {
|
| 18983 |
if (params.lbfgs.wolfe <= params.lbfgs.ftol || 1.f <= params.lbfgs.wolfe) {
|
| 18984 |
-
return
|
| 18985 |
}
|
| 18986 |
}
|
| 18987 |
|
|
@@ -19010,7 +19010,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
| 19010 |
}
|
| 19011 |
|
| 19012 |
struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads);
|
| 19013 |
-
struct ggml_object * obj = ggml_new_object(ctx,
|
| 19014 |
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
| 19015 |
|
| 19016 |
float * x = opt->lbfgs.x->data; // current parameters
|
|
@@ -19051,7 +19051,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
| 19051 |
float sched = 0;
|
| 19052 |
callback(callback_data, accum_step, &sched, &cancel);
|
| 19053 |
if (cancel) {
|
| 19054 |
-
return
|
| 19055 |
}
|
| 19056 |
}
|
| 19057 |
// ggml_graph_reset (gf);
|
|
@@ -19079,7 +19079,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
| 19079 |
|
| 19080 |
// already optimized
|
| 19081 |
if (gnorm/xnorm <= params.lbfgs.eps) {
|
| 19082 |
-
return
|
| 19083 |
}
|
| 19084 |
|
| 19085 |
if (opt->just_initialized) {
|
|
@@ -19124,7 +19124,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
| 19124 |
// way to test and don't want to break something with so many changes lined up
|
| 19125 |
ls = linesearch_backtracking(¶ms, nx, x, &fx, g, d, step, xp, f, gb, &cplan, np, ps, &cancel, callback, callback_data);
|
| 19126 |
if (cancel) {
|
| 19127 |
-
return
|
| 19128 |
}
|
| 19129 |
|
| 19130 |
if (ls < 0) {
|
|
@@ -19147,7 +19147,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
| 19147 |
}
|
| 19148 |
if (gnorm/xnorm <= params.lbfgs.eps) {
|
| 19149 |
// converged
|
| 19150 |
-
return
|
| 19151 |
}
|
| 19152 |
|
| 19153 |
// delta-based convergence test
|
|
@@ -19157,7 +19157,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
| 19157 |
const float rate = (pf[k[0]%params.past] - fx)/fx;
|
| 19158 |
|
| 19159 |
if (fabsf(rate) < params.delta) {
|
| 19160 |
-
return
|
| 19161 |
}
|
| 19162 |
}
|
| 19163 |
|
|
@@ -19173,14 +19173,14 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
| 19173 |
n_no_improvement[0]++;
|
| 19174 |
|
| 19175 |
if (n_no_improvement[0] >= params.max_no_improvement) {
|
| 19176 |
-
return
|
| 19177 |
}
|
| 19178 |
}
|
| 19179 |
}
|
| 19180 |
|
| 19181 |
if (params.lbfgs.n_iter != 0 && params.lbfgs.n_iter < it + 1) {
|
| 19182 |
// reached the maximum number of iterations
|
| 19183 |
-
return
|
| 19184 |
}
|
| 19185 |
|
| 19186 |
// update vectors s and y:
|
|
@@ -19236,17 +19236,17 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
| 19236 |
|
| 19237 |
GGML_ASSERT(false && "lbfgs failed");
|
| 19238 |
|
| 19239 |
-
return
|
| 19240 |
}
|
| 19241 |
|
| 19242 |
struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
| 19243 |
struct ggml_opt_params result;
|
| 19244 |
|
| 19245 |
switch (type) {
|
| 19246 |
-
case
|
| 19247 |
{
|
| 19248 |
result = (struct ggml_opt_params) {
|
| 19249 |
-
.type =
|
| 19250 |
.graph_size = GGML_DEFAULT_GRAPH_SIZE,
|
| 19251 |
.n_threads = 1, // FIXME: GGML_DEFAULT_N_THREADS ?
|
| 19252 |
.past = 0,
|
|
@@ -19274,10 +19274,10 @@ struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
|
| 19274 |
},
|
| 19275 |
};
|
| 19276 |
} break;
|
| 19277 |
-
case
|
| 19278 |
{
|
| 19279 |
result = (struct ggml_opt_params) {
|
| 19280 |
-
.type =
|
| 19281 |
.graph_size = GGML_DEFAULT_GRAPH_SIZE,
|
| 19282 |
.n_threads = 1,
|
| 19283 |
.past = 0,
|
|
@@ -19322,12 +19322,12 @@ GGML_API void ggml_opt_init(
|
|
| 19322 |
opt->just_initialized = true;
|
| 19323 |
if (opt->ctx == NULL) {
|
| 19324 |
struct ggml_init_params ctx_opt_params;
|
| 19325 |
-
if (opt->params.type ==
|
| 19326 |
ctx_opt_params.mem_size = GGML_MEM_ALIGN*3 + ggml_tensor_overhead()*3 + ggml_type_size(GGML_TYPE_F32)*nx*3;
|
| 19327 |
if (opt->params.past > 0) {
|
| 19328 |
ctx_opt_params.mem_size += GGML_MEM_ALIGN + ggml_tensor_overhead() + ggml_type_size(GGML_TYPE_F32)*opt->params.past;
|
| 19329 |
}
|
| 19330 |
-
} else if (opt->params.type ==
|
| 19331 |
ctx_opt_params.mem_size = GGML_MEM_ALIGN*9 + ggml_tensor_overhead()*9 + ggml_type_size(GGML_TYPE_F32)*(nx*5 + opt->params.lbfgs.m*2 + nx*opt->params.lbfgs.m*2);
|
| 19332 |
if (opt->params.past > 0) {
|
| 19333 |
ctx_opt_params.mem_size += GGML_MEM_ALIGN + ggml_tensor_overhead() + ggml_type_size(GGML_TYPE_F32)*opt->params.past;
|
|
@@ -19339,7 +19339,7 @@ GGML_API void ggml_opt_init(
|
|
| 19339 |
opt->ctx = ggml_init(ctx_opt_params);
|
| 19340 |
}
|
| 19341 |
switch (opt->params.type) {
|
| 19342 |
-
case
|
| 19343 |
{
|
| 19344 |
opt->adam.g = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
| 19345 |
opt->adam.m = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
|
@@ -19353,7 +19353,7 @@ GGML_API void ggml_opt_init(
|
|
| 19353 |
ggml_set_zero(opt->adam.pf);
|
| 19354 |
}
|
| 19355 |
} break;
|
| 19356 |
-
case
|
| 19357 |
{
|
| 19358 |
opt->lbfgs.x = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
| 19359 |
opt->lbfgs.xp = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
|
@@ -19397,13 +19397,13 @@ enum ggml_opt_result ggml_opt(
|
|
| 19397 |
|
| 19398 |
ctx = ggml_init(params_ctx);
|
| 19399 |
if (ctx == NULL) {
|
| 19400 |
-
return
|
| 19401 |
}
|
| 19402 |
|
| 19403 |
free_ctx = true;
|
| 19404 |
}
|
| 19405 |
|
| 19406 |
-
enum ggml_opt_result result =
|
| 19407 |
|
| 19408 |
struct ggml_opt_context * opt = (struct ggml_opt_context *) alloca(sizeof(struct ggml_opt_context));
|
| 19409 |
|
|
@@ -19442,14 +19442,14 @@ enum ggml_opt_result ggml_opt_resume_g(
|
|
| 19442 |
void * callback_data) {
|
| 19443 |
|
| 19444 |
// build forward + backward compute graphs
|
| 19445 |
-
enum ggml_opt_result result =
|
| 19446 |
|
| 19447 |
switch (opt->params.type) {
|
| 19448 |
-
case
|
| 19449 |
{
|
| 19450 |
result = ggml_opt_adam(ctx, opt, opt->params, f, gf, gb, callback, callback_data);
|
| 19451 |
} break;
|
| 19452 |
-
case
|
| 19453 |
{
|
| 19454 |
result = ggml_opt_lbfgs(ctx, opt, opt->params, f, gf, gb, callback, callback_data);
|
| 19455 |
} break;
|
|
|
|
| 2725 |
}
|
| 2726 |
}
|
| 2727 |
|
| 2728 |
+
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
|
| 2729 |
|
| 2730 |
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
|
| 2731 |
|
|
|
|
| 2733 |
|
| 2734 |
*result = (struct ggml_tensor) {
|
| 2735 |
/*.type =*/ type,
|
| 2736 |
+
/*.backend =*/ GGML_BACKEND_TYPE_CPU,
|
| 2737 |
/*.buffer =*/ NULL,
|
| 2738 |
/*.ne =*/ { 1, 1, 1, 1 },
|
| 2739 |
/*.nb =*/ { 0, 0, 0, 0 },
|
|
|
|
| 3306 |
char * const mem_buffer = ctx->mem_buffer;
|
| 3307 |
|
| 3308 |
while (obj != NULL) {
|
| 3309 |
+
if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
|
| 3310 |
return (struct ggml_tensor *)(mem_buffer + obj->offs);
|
| 3311 |
}
|
| 3312 |
|
|
|
|
| 3323 |
char * const mem_buffer = ctx->mem_buffer;
|
| 3324 |
|
| 3325 |
while (obj != NULL) {
|
| 3326 |
+
if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
|
| 3327 |
return (struct ggml_tensor *)(mem_buffer + obj->offs);
|
| 3328 |
}
|
| 3329 |
|
|
|
|
| 3339 |
char * const mem_buffer = ctx->mem_buffer;
|
| 3340 |
|
| 3341 |
while (obj != NULL) {
|
| 3342 |
+
if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
|
| 3343 |
struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
|
| 3344 |
if (strcmp(cur->name, name) == 0) {
|
| 3345 |
return cur;
|
|
|
|
| 5883 |
int k) {
|
| 5884 |
GGML_ASSERT(a->ne[0] >= k);
|
| 5885 |
|
| 5886 |
+
struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC);
|
| 5887 |
|
| 5888 |
result = ggml_view_4d(ctx, result,
|
| 5889 |
k, result->ne[1], result->ne[2], result->ne[3],
|
|
|
|
| 6677 |
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
| 6678 |
GGML_ASSERT(src0->type == dst->type);
|
| 6679 |
|
| 6680 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 6681 |
return;
|
| 6682 |
}
|
| 6683 |
|
|
|
|
| 6709 |
|
| 6710 |
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
| 6711 |
|
| 6712 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 6713 |
return;
|
| 6714 |
}
|
| 6715 |
|
|
|
|
| 6982 |
|
| 6983 |
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
| 6984 |
|
| 6985 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 6986 |
return;
|
| 6987 |
}
|
| 6988 |
|
|
|
|
| 7235 |
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
| 7236 |
GGML_ASSERT(src0->type == dst->type);
|
| 7237 |
|
| 7238 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 7239 |
return;
|
| 7240 |
}
|
| 7241 |
|
|
|
|
| 7415 |
|
| 7416 |
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
| 7417 |
|
| 7418 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 7419 |
return;
|
| 7420 |
}
|
| 7421 |
|
|
|
|
| 7423 |
const int nth = params->nth;
|
| 7424 |
|
| 7425 |
#ifdef GGML_USE_CLBLAST
|
| 7426 |
+
if (src1->backend == GGML_BACKEND_TYPE_GPU) {
|
| 7427 |
// TODO: OpenCL kernel support full broadcast
|
| 7428 |
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
|
| 7429 |
if (ith == 0) {
|
|
|
|
| 7505 |
|
| 7506 |
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
| 7507 |
|
| 7508 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 7509 |
return;
|
| 7510 |
}
|
| 7511 |
|
|
|
|
| 7584 |
|
| 7585 |
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
| 7586 |
|
| 7587 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 7588 |
return;
|
| 7589 |
}
|
| 7590 |
|
|
|
|
| 7640 |
|
| 7641 |
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
| 7642 |
|
| 7643 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 7644 |
return;
|
| 7645 |
}
|
| 7646 |
|
|
|
|
| 7778 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 7779 |
GGML_ASSERT(ggml_is_scalar(src1));
|
| 7780 |
|
| 7781 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 7782 |
return;
|
| 7783 |
}
|
| 7784 |
|
|
|
|
| 7832 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 7833 |
GGML_ASSERT(ggml_is_scalar(src1));
|
| 7834 |
|
| 7835 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 7836 |
return;
|
| 7837 |
}
|
| 7838 |
|
|
|
|
| 7884 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 7885 |
GGML_ASSERT(ggml_is_scalar(src1));
|
| 7886 |
|
| 7887 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 7888 |
return;
|
| 7889 |
}
|
| 7890 |
|
|
|
|
| 7936 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 7937 |
GGML_ASSERT(ggml_is_scalar(src1));
|
| 7938 |
|
| 7939 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 7940 |
return;
|
| 7941 |
}
|
| 7942 |
|
|
|
|
| 8066 |
size_t offset = ((int32_t *) dst->op_params)[3];
|
| 8067 |
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
| 8068 |
|
| 8069 |
+
if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
|
| 8070 |
if (params->ith != 0) {
|
| 8071 |
return;
|
| 8072 |
}
|
|
|
|
| 8078 |
ggml_nbytes(dst));
|
| 8079 |
}
|
| 8080 |
|
| 8081 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 8082 |
return;
|
| 8083 |
}
|
| 8084 |
|
|
|
|
| 8180 |
assert(params->ith == 0);
|
| 8181 |
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
| 8182 |
|
| 8183 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 8184 |
return;
|
| 8185 |
}
|
| 8186 |
|
|
|
|
| 8261 |
|
| 8262 |
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
| 8263 |
|
| 8264 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 8265 |
return;
|
| 8266 |
}
|
| 8267 |
const int ith = params->ith;
|
| 8268 |
const int nth = params->nth;
|
| 8269 |
|
| 8270 |
#if defined(GGML_USE_CLBLAST)
|
| 8271 |
+
if (src1->backend == GGML_BACKEND_TYPE_GPU) {
|
| 8272 |
// TODO: OpenCL kernel support full broadcast
|
| 8273 |
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
|
| 8274 |
if (ith == 0) {
|
|
|
|
| 8369 |
|
| 8370 |
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
| 8371 |
|
| 8372 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 8373 |
return;
|
| 8374 |
}
|
| 8375 |
|
|
|
|
| 8464 |
assert(params->ith == 0);
|
| 8465 |
assert(ggml_are_same_shape(src0, dst));
|
| 8466 |
|
| 8467 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 8468 |
return;
|
| 8469 |
}
|
| 8470 |
|
|
|
|
| 8510 |
assert(params->ith == 0);
|
| 8511 |
assert(ggml_are_same_shape(src0, dst));
|
| 8512 |
|
| 8513 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 8514 |
return;
|
| 8515 |
}
|
| 8516 |
|
|
|
|
| 8556 |
GGML_ASSERT(params->ith == 0);
|
| 8557 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 8558 |
|
| 8559 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 8560 |
return;
|
| 8561 |
}
|
| 8562 |
|
|
|
|
| 8602 |
assert(params->ith == 0);
|
| 8603 |
assert(ggml_is_scalar(dst));
|
| 8604 |
|
| 8605 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 8606 |
return;
|
| 8607 |
}
|
| 8608 |
|
|
|
|
| 8637 |
assert(params->ith == 0);
|
| 8638 |
assert(ggml_is_scalar(dst));
|
| 8639 |
|
| 8640 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 8641 |
return;
|
| 8642 |
}
|
| 8643 |
|
|
|
|
| 8694 |
|
| 8695 |
GGML_ASSERT(params->ith == 0);
|
| 8696 |
|
| 8697 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 8698 |
return;
|
| 8699 |
}
|
| 8700 |
|
|
|
|
| 8749 |
|
| 8750 |
assert(params->ith == 0);
|
| 8751 |
|
| 8752 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 8753 |
return;
|
| 8754 |
}
|
| 8755 |
|
|
|
|
| 8808 |
|
| 8809 |
assert(params->ith == 0);
|
| 8810 |
|
| 8811 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 8812 |
return;
|
| 8813 |
}
|
| 8814 |
|
|
|
|
| 8859 |
GGML_ASSERT(params->ith == 0);
|
| 8860 |
GGML_ASSERT(ggml_can_repeat(src0, dst));
|
| 8861 |
|
| 8862 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 8863 |
return;
|
| 8864 |
}
|
| 8865 |
|
|
|
|
| 8904 |
GGML_ASSERT(params->ith == 0);
|
| 8905 |
GGML_ASSERT(ggml_can_repeat(src0, dst));
|
| 8906 |
|
| 8907 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 8908 |
return;
|
| 8909 |
}
|
| 8910 |
|
|
|
|
| 8978 |
GGML_ASSERT(params->ith == 0);
|
| 8979 |
GGML_ASSERT(ggml_can_repeat(dst, src0));
|
| 8980 |
|
| 8981 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 8982 |
return;
|
| 8983 |
}
|
| 8984 |
|
|
|
|
| 9055 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 9056 |
const struct ggml_tensor * src1 = dst->src[1];
|
| 9057 |
|
| 9058 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 9059 |
return;
|
| 9060 |
}
|
| 9061 |
|
|
|
|
| 9127 |
assert(params->ith == 0);
|
| 9128 |
assert(ggml_are_same_shape(src0, dst));
|
| 9129 |
|
| 9130 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 9131 |
return;
|
| 9132 |
}
|
| 9133 |
|
|
|
|
| 9173 |
assert(params->ith == 0);
|
| 9174 |
assert(ggml_are_same_shape(src0, dst));
|
| 9175 |
|
| 9176 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 9177 |
return;
|
| 9178 |
}
|
| 9179 |
|
|
|
|
| 9219 |
assert(params->ith == 0);
|
| 9220 |
assert(ggml_are_same_shape(src0, dst));
|
| 9221 |
|
| 9222 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 9223 |
return;
|
| 9224 |
}
|
| 9225 |
|
|
|
|
| 9265 |
assert(params->ith == 0);
|
| 9266 |
assert(ggml_are_same_shape(src0, dst));
|
| 9267 |
|
| 9268 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 9269 |
return;
|
| 9270 |
}
|
| 9271 |
|
|
|
|
| 9311 |
assert(params->ith == 0);
|
| 9312 |
assert(ggml_are_same_shape(src0, dst));
|
| 9313 |
|
| 9314 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 9315 |
return;
|
| 9316 |
}
|
| 9317 |
|
|
|
|
| 9357 |
assert(params->ith == 0);
|
| 9358 |
assert(ggml_are_same_shape(src0, dst));
|
| 9359 |
|
| 9360 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 9361 |
return;
|
| 9362 |
}
|
| 9363 |
|
|
|
|
| 9403 |
assert(params->ith == 0);
|
| 9404 |
assert(ggml_are_same_shape(src0, dst));
|
| 9405 |
|
| 9406 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 9407 |
return;
|
| 9408 |
}
|
| 9409 |
|
|
|
|
| 9450 |
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
| 9451 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 9452 |
|
| 9453 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 9454 |
return;
|
| 9455 |
}
|
| 9456 |
|
|
|
|
| 9513 |
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
| 9514 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 9515 |
|
| 9516 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 9517 |
return;
|
| 9518 |
}
|
| 9519 |
|
|
|
|
| 9576 |
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
| 9577 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 9578 |
|
| 9579 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 9580 |
return;
|
| 9581 |
}
|
| 9582 |
|
|
|
|
| 9637 |
assert(params->ith == 0);
|
| 9638 |
assert(ggml_are_same_shape(src0, dst));
|
| 9639 |
|
| 9640 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 9641 |
return;
|
| 9642 |
}
|
| 9643 |
|
|
|
|
| 9690 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 9691 |
GGML_ASSERT(ggml_are_same_shape(src0, grad));
|
| 9692 |
|
| 9693 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 9694 |
return;
|
| 9695 |
}
|
| 9696 |
|
|
|
|
| 9752 |
assert(params->ith == 0);
|
| 9753 |
assert(ggml_are_same_shape(src0, dst));
|
| 9754 |
|
| 9755 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 9756 |
return;
|
| 9757 |
}
|
| 9758 |
|
|
|
|
| 9795 |
assert(params->ith == 0);
|
| 9796 |
assert(ggml_are_same_shape(src0, dst));
|
| 9797 |
|
| 9798 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 9799 |
return;
|
| 9800 |
}
|
| 9801 |
|
|
|
|
| 9841 |
|
| 9842 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 9843 |
|
| 9844 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 9845 |
return;
|
| 9846 |
}
|
| 9847 |
|
|
|
|
| 9916 |
|
| 9917 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 9918 |
|
| 9919 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 9920 |
return;
|
| 9921 |
}
|
| 9922 |
|
|
|
|
| 9987 |
|
| 9988 |
GGML_ASSERT(ggml_are_same_shape(src0, dst) && ggml_are_same_shape(src0, src1));
|
| 9989 |
|
| 9990 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 9991 |
return;
|
| 9992 |
}
|
| 9993 |
|
|
|
|
| 10165 |
|
| 10166 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 10167 |
|
| 10168 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 10169 |
return;
|
| 10170 |
}
|
| 10171 |
|
|
|
|
| 10332 |
|
| 10333 |
#if defined(GGML_USE_CLBLAST)
|
| 10334 |
if (ggml_cl_can_mul_mat(src0, src1, dst)) {
|
| 10335 |
+
if (params->ith == 0 && params->type == GGML_TASK_TYPE_COMPUTE) {
|
| 10336 |
ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
|
| 10337 |
}
|
| 10338 |
return;
|
|
|
|
| 10345 |
const size_t desired_wsize = ne13*ne12*ne_plane*sizeof(float);
|
| 10346 |
UNUSED(desired_wsize);
|
| 10347 |
|
| 10348 |
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
| 10349 |
if (type != GGML_TYPE_F32) {
|
| 10350 |
assert(params->wsize >= desired_wsize);
|
| 10351 |
// parallelize by src0 rows
|
|
|
|
| 10368 |
return;
|
| 10369 |
}
|
| 10370 |
|
| 10371 |
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 10372 |
return;
|
| 10373 |
}
|
| 10374 |
|
|
|
|
| 10406 |
}
|
| 10407 |
#endif
|
| 10408 |
|
| 10409 |
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
| 10410 |
if (ith != 0) {
|
| 10411 |
return;
|
| 10412 |
}
|
|
|
|
| 10430 |
return;
|
| 10431 |
}
|
| 10432 |
|
| 10433 |
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 10434 |
return;
|
| 10435 |
}
|
| 10436 |
|
|
|
|
| 10587 |
|
| 10588 |
#define MMID_MATRIX_ROW(row_id, i1) matrix_rows[(row_id)*ne11 + (i1)]
|
| 10589 |
|
| 10590 |
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
| 10591 |
if (ith != 0) {
|
| 10592 |
return;
|
| 10593 |
}
|
|
|
|
| 10624 |
return;
|
| 10625 |
}
|
| 10626 |
|
| 10627 |
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 10628 |
return;
|
| 10629 |
}
|
| 10630 |
|
|
|
|
| 10772 |
(ggml_is_contiguous(src1) || ggml_is_transposed(src1));
|
| 10773 |
#endif
|
| 10774 |
|
| 10775 |
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
| 10776 |
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) // gemm beta will zero dst
|
| 10777 |
if (use_blas) {
|
| 10778 |
return;
|
|
|
|
| 10785 |
return;
|
| 10786 |
}
|
| 10787 |
|
| 10788 |
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 10789 |
return;
|
| 10790 |
}
|
| 10791 |
|
|
|
|
| 10965 |
// TODO: #if defined(GGML_USE_CUBLAS) ggml_cuda_out_prod
|
| 10966 |
// TODO: #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
|
| 10967 |
|
| 10968 |
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
| 10969 |
if (ith != 0) {
|
| 10970 |
return;
|
| 10971 |
}
|
|
|
|
| 10973 |
return;
|
| 10974 |
}
|
| 10975 |
|
| 10976 |
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 10977 |
return;
|
| 10978 |
}
|
| 10979 |
|
|
|
|
| 11091 |
GGML_ASSERT(ggml_is_contiguous(dst));
|
| 11092 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 11093 |
|
| 11094 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 11095 |
return;
|
| 11096 |
}
|
| 11097 |
|
|
|
|
| 11163 |
size_t offset = ((int32_t *) dst->op_params)[3];
|
| 11164 |
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
| 11165 |
|
| 11166 |
+
if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
|
| 11167 |
if (params->ith != 0) {
|
| 11168 |
return;
|
| 11169 |
}
|
|
|
|
| 11175 |
ggml_nbytes(dst));
|
| 11176 |
}
|
| 11177 |
|
| 11178 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 11179 |
return;
|
| 11180 |
}
|
| 11181 |
|
|
|
|
| 11323 |
|
| 11324 |
assert(params->ith == 0);
|
| 11325 |
|
| 11326 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 11327 |
return;
|
| 11328 |
}
|
| 11329 |
|
|
|
|
| 11363 |
|
| 11364 |
assert(params->ith == 0);
|
| 11365 |
|
| 11366 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 11367 |
return;
|
| 11368 |
}
|
| 11369 |
|
|
|
|
| 11400 |
|
| 11401 |
assert(params->ith == 0);
|
| 11402 |
|
| 11403 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 11404 |
return;
|
| 11405 |
}
|
| 11406 |
|
|
|
|
| 11503 |
|
| 11504 |
// ggml_compute_forward_dup_same_cont(params, opt0, dst);
|
| 11505 |
|
| 11506 |
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
| 11507 |
if (params->ith != 0) {
|
| 11508 |
return;
|
| 11509 |
}
|
| 11510 |
memset(dst->data, 0, ggml_nbytes(dst));
|
| 11511 |
}
|
| 11512 |
|
| 11513 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 11514 |
return;
|
| 11515 |
}
|
| 11516 |
|
|
|
|
| 11542 |
|
| 11543 |
// ggml_compute_forward_dup_same_cont(params, opt0, dst);
|
| 11544 |
|
| 11545 |
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
| 11546 |
if (params->ith != 0) {
|
| 11547 |
return;
|
| 11548 |
}
|
| 11549 |
memset(dst->data, 0, ggml_nbytes(dst));
|
| 11550 |
}
|
| 11551 |
|
| 11552 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 11553 |
return;
|
| 11554 |
}
|
| 11555 |
|
|
|
|
| 11619 |
|
| 11620 |
GGML_ASSERT(params->ith == 0);
|
| 11621 |
|
| 11622 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 11623 |
return;
|
| 11624 |
}
|
| 11625 |
|
|
|
|
| 11688 |
|
| 11689 |
GGML_ASSERT(n_past >= 0);
|
| 11690 |
|
| 11691 |
+
if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
|
| 11692 |
if (ith != 0) {
|
| 11693 |
return;
|
| 11694 |
}
|
|
|
|
| 11702 |
ggml_nbytes(dst));
|
| 11703 |
}
|
| 11704 |
|
| 11705 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 11706 |
return;
|
| 11707 |
}
|
| 11708 |
|
|
|
|
| 11776 |
assert(ggml_is_contiguous(dst));
|
| 11777 |
assert(ggml_are_same_shape(src0, dst));
|
| 11778 |
|
| 11779 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 11780 |
return;
|
| 11781 |
}
|
| 11782 |
|
|
|
|
| 11914 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 11915 |
GGML_ASSERT(ggml_are_same_shape(src1, dst));
|
| 11916 |
|
| 11917 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 11918 |
return;
|
| 11919 |
}
|
| 11920 |
|
|
|
|
| 12008 |
|
| 12009 |
assert(params->ith == 0);
|
| 12010 |
|
| 12011 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 12012 |
return;
|
| 12013 |
}
|
| 12014 |
|
|
|
|
| 12067 |
|
| 12068 |
assert(params->ith == 0);
|
| 12069 |
|
| 12070 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 12071 |
return;
|
| 12072 |
}
|
| 12073 |
|
|
|
|
| 12174 |
|
| 12175 |
assert(params->ith == 0);
|
| 12176 |
|
| 12177 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 12178 |
return;
|
| 12179 |
}
|
| 12180 |
|
|
|
|
| 12314 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 12315 |
const struct ggml_tensor * src1 = dst->src[1];
|
| 12316 |
|
| 12317 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 12318 |
return;
|
| 12319 |
}
|
| 12320 |
|
|
|
|
| 12492 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 12493 |
const struct ggml_tensor * src1 = dst->src[1];
|
| 12494 |
|
| 12495 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 12496 |
return;
|
| 12497 |
}
|
| 12498 |
|
|
|
|
| 12723 |
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
| 12724 |
GGML_ASSERT(nb10 == sizeof(float));
|
| 12725 |
|
| 12726 |
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
| 12727 |
if (ith != 0) {
|
| 12728 |
return;
|
| 12729 |
}
|
|
|
|
| 12763 |
return;
|
| 12764 |
}
|
| 12765 |
|
| 12766 |
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 12767 |
return;
|
| 12768 |
}
|
| 12769 |
|
|
|
|
| 12822 |
GGML_ASSERT(nb00 == sizeof(float));
|
| 12823 |
GGML_ASSERT(nb10 == sizeof(float));
|
| 12824 |
|
| 12825 |
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
| 12826 |
if (ith != 0) {
|
| 12827 |
return;
|
| 12828 |
}
|
|
|
|
| 12862 |
return;
|
| 12863 |
}
|
| 12864 |
|
| 12865 |
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 12866 |
return;
|
| 12867 |
}
|
| 12868 |
|
|
|
|
| 12966 |
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
| 12967 |
GGML_ASSERT(nb10 == sizeof(float));
|
| 12968 |
|
| 12969 |
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
| 12970 |
return;
|
| 12971 |
}
|
| 12972 |
|
| 12973 |
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 12974 |
return;
|
| 12975 |
}
|
| 12976 |
|
|
|
|
| 13054 |
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
| 13055 |
GGML_ASSERT(nb10 == sizeof(float));
|
| 13056 |
|
| 13057 |
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
| 13058 |
return;
|
| 13059 |
}
|
| 13060 |
|
| 13061 |
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 13062 |
return;
|
| 13063 |
}
|
| 13064 |
|
|
|
|
| 13140 |
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
| 13141 |
GGML_ASSERT(nb10 == sizeof(float));
|
| 13142 |
|
| 13143 |
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
| 13144 |
if (ith != 0) {
|
| 13145 |
return;
|
| 13146 |
}
|
|
|
|
| 13182 |
return;
|
| 13183 |
}
|
| 13184 |
|
| 13185 |
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 13186 |
return;
|
| 13187 |
}
|
| 13188 |
|
|
|
|
| 13234 |
assert(src->type == GGML_TYPE_F32);
|
| 13235 |
assert(params->ith == 0);
|
| 13236 |
|
| 13237 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 13238 |
return;
|
| 13239 |
}
|
| 13240 |
|
|
|
|
| 13303 |
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
| 13304 |
GGML_ASSERT(params->ith == 0);
|
| 13305 |
|
| 13306 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 13307 |
return;
|
| 13308 |
}
|
| 13309 |
|
|
|
|
| 13376 |
|
| 13377 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 13378 |
|
| 13379 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 13380 |
return;
|
| 13381 |
}
|
| 13382 |
|
|
|
|
| 13436 |
|
| 13437 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 13438 |
|
| 13439 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 13440 |
return;
|
| 13441 |
}
|
| 13442 |
|
|
|
|
| 13497 |
|
| 13498 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 13499 |
|
| 13500 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 13501 |
return;
|
| 13502 |
}
|
| 13503 |
|
|
|
|
| 13523 |
// C doesn't have a functional sort, so we do a bubble sort instead
|
| 13524 |
for (int64_t j = 0; j < ne0; j++) {
|
| 13525 |
for (int64_t k = j + 1; k < ne0; k++) {
|
| 13526 |
+
if ((order == GGML_SORT_ORDER_ASC && src_data[dst_data[j]] > src_data[dst_data[k]]) ||
|
| 13527 |
+
(order == GGML_SORT_ORDER_DESC && src_data[dst_data[j]] < src_data[dst_data[k]])) {
|
| 13528 |
int32_t tmp = dst_data[j];
|
| 13529 |
dst_data[j] = dst_data[k];
|
| 13530 |
dst_data[k] = tmp;
|
|
|
|
| 13607 |
GGML_ASSERT(nb1 <= nb2);
|
| 13608 |
GGML_ASSERT(nb2 <= nb3);
|
| 13609 |
|
| 13610 |
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
| 13611 |
return;
|
| 13612 |
}
|
| 13613 |
|
| 13614 |
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 13615 |
return;
|
| 13616 |
}
|
| 13617 |
|
|
|
|
| 13799 |
GGML_ASSERT(nb1 <= nb2);
|
| 13800 |
GGML_ASSERT(nb2 <= nb3);
|
| 13801 |
|
| 13802 |
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
| 13803 |
return;
|
| 13804 |
}
|
| 13805 |
|
| 13806 |
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 13807 |
return;
|
| 13808 |
}
|
| 13809 |
|
|
|
|
| 14058 |
GGML_ASSERT(nb1 <= nb2);
|
| 14059 |
GGML_ASSERT(nb2 <= nb3);
|
| 14060 |
|
| 14061 |
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
| 14062 |
return;
|
| 14063 |
}
|
| 14064 |
|
| 14065 |
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 14066 |
return;
|
| 14067 |
}
|
| 14068 |
|
|
|
|
| 14217 |
GGML_ASSERT(nb1 <= nb2);
|
| 14218 |
GGML_ASSERT(nb2 <= nb3);
|
| 14219 |
|
| 14220 |
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
| 14221 |
if (ith == 0) {
|
| 14222 |
memset(dst->data, 0, nb0*ne0*ne1*ne2*ne3);
|
| 14223 |
}
|
| 14224 |
return;
|
| 14225 |
}
|
| 14226 |
|
| 14227 |
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 14228 |
return;
|
| 14229 |
}
|
| 14230 |
|
|
|
|
| 14540 |
|
| 14541 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 14542 |
|
| 14543 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 14544 |
return;
|
| 14545 |
}
|
| 14546 |
|
|
|
|
| 14606 |
|
| 14607 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 14608 |
|
| 14609 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 14610 |
return;
|
| 14611 |
}
|
| 14612 |
|
|
|
|
| 14734 |
|
| 14735 |
const struct ggml_tensor * src0 = dst->src[0];
|
| 14736 |
|
| 14737 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 14738 |
return;
|
| 14739 |
}
|
| 14740 |
|
|
|
|
| 14786 |
const struct ggml_tensor * src2 = dst->src[2];
|
| 14787 |
|
| 14788 |
const bool inplace = (bool) ((int32_t *) dst->op_params)[0];
|
| 14789 |
+
if (!inplace && params->type == GGML_TASK_TYPE_INIT) {
|
| 14790 |
if (params->ith != 0) {
|
| 14791 |
return;
|
| 14792 |
}
|
| 14793 |
memcpy((char *) dst->data, (char *) src0->data, ggml_nbytes(dst));
|
| 14794 |
return;
|
| 14795 |
}
|
| 14796 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 14797 |
return;
|
| 14798 |
}
|
| 14799 |
|
|
|
|
| 14875 |
|
| 14876 |
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
| 14877 |
|
| 14878 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 14879 |
return;
|
| 14880 |
}
|
| 14881 |
|
|
|
|
| 14924 |
assert(params->ith == 0);
|
| 14925 |
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
| 14926 |
|
| 14927 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 14928 |
return;
|
| 14929 |
}
|
| 14930 |
|
|
|
|
| 14973 |
|
| 14974 |
assert(params->ith == 0);
|
| 14975 |
|
| 14976 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 14977 |
return;
|
| 14978 |
}
|
| 14979 |
|
|
|
|
| 14992 |
|
| 14993 |
assert(params->ith == 0);
|
| 14994 |
|
| 14995 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 14996 |
return;
|
| 14997 |
}
|
| 14998 |
|
|
|
|
| 15012 |
|
| 15013 |
assert(params->ith == 0);
|
| 15014 |
|
| 15015 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 15016 |
return;
|
| 15017 |
}
|
| 15018 |
|
|
|
|
| 15027 |
|
| 15028 |
const struct ggml_tensor * a = dst->src[0];
|
| 15029 |
|
| 15030 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 15031 |
return;
|
| 15032 |
}
|
| 15033 |
|
|
|
|
| 15045 |
const struct ggml_tensor * a = dst->src[0];
|
| 15046 |
const struct ggml_tensor * b = dst->src[1];
|
| 15047 |
|
| 15048 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 15049 |
return;
|
| 15050 |
}
|
| 15051 |
|
|
|
|
| 15064 |
const struct ggml_tensor * b = dst->src[1];
|
| 15065 |
const struct ggml_tensor * c = dst->src[2];
|
| 15066 |
|
| 15067 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 15068 |
return;
|
| 15069 |
}
|
| 15070 |
|
|
|
|
| 15098 |
|
| 15099 |
GGML_ASSERT(params->wsize >= sizeof(float) * (nth + nth * nc));
|
| 15100 |
|
| 15101 |
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
| 15102 |
if (ith == 0) {
|
| 15103 |
memset(sums, 0, sizeof(float) * (nth + nth * nc));
|
| 15104 |
}
|
| 15105 |
return;
|
| 15106 |
}
|
| 15107 |
|
| 15108 |
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 15109 |
if (ith == 0) {
|
| 15110 |
float * dp = (float *) dst->data;
|
| 15111 |
ggml_vec_sum_f32(nth, dp, sums);
|
|
|
|
| 15220 |
const int64_t ith = params->ith;
|
| 15221 |
const int64_t nth = params->nth;
|
| 15222 |
|
| 15223 |
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
| 15224 |
return;
|
| 15225 |
}
|
| 15226 |
|
|
|
|
| 15327 |
if (skip_cpu) {
|
| 15328 |
return;
|
| 15329 |
}
|
| 15330 |
+
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_TYPE_CPU);
|
| 15331 |
+
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
|
| 15332 |
#elif defined(GGML_USE_VULKAN)
|
| 15333 |
const bool skip_cpu = ggml_vk_compute_forward_cpu_assist(params, tensor);
|
| 15334 |
#ifdef GGML_VULKAN_CHECK_RESULTS
|
|
|
|
| 15339 |
if (skip_cpu) {
|
| 15340 |
return;
|
| 15341 |
}
|
| 15342 |
+
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_TYPE_CPU);
|
| 15343 |
+
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
|
| 15344 |
#endif // GGML_USE_CUBLAS
|
| 15345 |
|
| 15346 |
#ifdef GGML_USE_SYCL
|
|
|
|
| 16886 |
|
| 16887 |
struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
|
| 16888 |
const size_t obj_size = ggml_graph_nbytes(size, grads);
|
| 16889 |
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
|
| 16890 |
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
|
| 16891 |
|
| 16892 |
struct ggml_tensor ** data_start = (struct ggml_tensor **) (cgraph + 1);
|
|
|
|
| 17433 |
set_numa_thread_affinity(state->ith);
|
| 17434 |
|
| 17435 |
int node_n = -1;
|
| 17436 |
+
int task_phase = GGML_TASK_TYPE_FINALIZE;
|
| 17437 |
|
| 17438 |
while (true) {
|
| 17439 |
if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) {
|
|
|
|
| 17445 |
// all other threads are finished and spinning
|
| 17446 |
// do finalize and init here so we don't have synchronize again
|
| 17447 |
struct ggml_compute_params params = {
|
| 17448 |
+
/*.type =*/ GGML_TASK_TYPE_FINALIZE,
|
| 17449 |
/*.ith =*/ 0,
|
| 17450 |
/*.nth =*/ 0,
|
| 17451 |
/*.wsize =*/ cplan->work_size,
|
|
|
|
| 17476 |
if (n_tasks == 1) {
|
| 17477 |
/* INIT */
|
| 17478 |
if (GGML_OP_HAS_INIT[node->op]) {
|
| 17479 |
+
params.type = GGML_TASK_TYPE_INIT;
|
| 17480 |
ggml_compute_forward(¶ms, node);
|
| 17481 |
}
|
| 17482 |
|
| 17483 |
// TODO: maybe push node_n to the atomic but if other threads see n_tasks is 1,
|
| 17484 |
// they do something more efficient than spinning (?)
|
| 17485 |
+
params.type = GGML_TASK_TYPE_COMPUTE;
|
| 17486 |
ggml_compute_forward(¶ms, node);
|
| 17487 |
|
| 17488 |
if (GGML_OP_HAS_FINALIZE[node->op]) {
|
| 17489 |
+
params.type = GGML_TASK_TYPE_FINALIZE;
|
| 17490 |
ggml_compute_forward(¶ms, node);
|
| 17491 |
}
|
| 17492 |
|
|
|
|
| 17500 |
}
|
| 17501 |
}
|
| 17502 |
|
| 17503 |
+
task_phase = GGML_TASK_TYPE_INIT;
|
| 17504 |
atomic_store(&state->shared->n_active, n_threads);
|
| 17505 |
atomic_store(&state->shared->node_n, node_n);
|
| 17506 |
atomic_store(&state->shared->node_task, task_phase);
|
|
|
|
| 17517 |
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
| 17518 |
|
| 17519 |
struct ggml_compute_params params = {
|
| 17520 |
+
/*.type =*/ GGML_TASK_TYPE_INIT,
|
| 17521 |
/*.ith =*/ state->ith,
|
| 17522 |
/*.nth =*/ n_tasks,
|
| 17523 |
/*.wsize =*/ cplan->work_size,
|
|
|
|
| 17531 |
}
|
| 17532 |
|
| 17533 |
if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
|
| 17534 |
+
task_phase = GGML_TASK_TYPE_COMPUTE;
|
| 17535 |
atomic_store(&state->shared->n_active, n_threads);
|
| 17536 |
atomic_store(&state->shared->node_task, task_phase);
|
| 17537 |
}
|
|
|
|
| 17546 |
}
|
| 17547 |
|
| 17548 |
if (state->ith < n_tasks) {
|
| 17549 |
+
params.type = GGML_TASK_TYPE_COMPUTE;
|
| 17550 |
ggml_compute_forward(¶ms, node);
|
| 17551 |
}
|
| 17552 |
|
| 17553 |
if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
|
| 17554 |
+
task_phase = GGML_TASK_TYPE_FINALIZE;
|
| 17555 |
atomic_store(&state->shared->n_active, n_threads);
|
| 17556 |
atomic_store(&state->shared->node_task, task_phase);
|
| 17557 |
}
|
|
|
|
| 17787 |
/*.n_threads =*/ n_threads,
|
| 17788 |
/*.n_active =*/ n_threads,
|
| 17789 |
/*.node_n =*/ -1,
|
| 17790 |
+
/*.node_task =*/ GGML_TASK_TYPE_FINALIZE,
|
| 17791 |
/*.abort_callback =*/ NULL,
|
| 17792 |
/*.abort_callback_data =*/ NULL,
|
| 17793 |
};
|
|
|
|
| 17855 |
void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
|
| 17856 |
struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);
|
| 17857 |
|
| 17858 |
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
|
| 17859 |
|
| 17860 |
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
| 17861 |
|
|
|
|
| 18663 |
float * pf = params.past > 0 ? opt->adam.pf->data : NULL; // past function values
|
| 18664 |
|
| 18665 |
struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads);
|
| 18666 |
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
|
| 18667 |
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
| 18668 |
|
| 18669 |
bool cancel = false;
|
|
|
|
| 18675 |
if (callback) {
|
| 18676 |
callback(callback_data, accum_step, &sched, &cancel);
|
| 18677 |
if (cancel) {
|
| 18678 |
+
return GGML_OPT_RESULT_CANCEL;
|
| 18679 |
}
|
| 18680 |
}
|
| 18681 |
// ggml_graph_reset (gf);
|
|
|
|
| 18766 |
if (callback) {
|
| 18767 |
callback(callback_data, accum_step, &sched, &cancel);
|
| 18768 |
if (cancel) {
|
| 18769 |
+
return GGML_OPT_RESULT_CANCEL;;
|
| 18770 |
}
|
| 18771 |
}
|
| 18772 |
// ggml_graph_reset (gf);
|
|
|
|
| 18783 |
if (fabsf(fx - fx_prev[0])/fx < params.adam.eps_f) {
|
| 18784 |
GGML_PRINT_DEBUG("converged\n");
|
| 18785 |
|
| 18786 |
+
return GGML_OPT_RESULT_OK;
|
| 18787 |
}
|
| 18788 |
|
| 18789 |
// delta-based convergence test
|
|
|
|
| 18793 |
const float rate = (pf[(iter0 + t)%params.past] - fx)/fx;
|
| 18794 |
|
| 18795 |
if (fabsf(rate) < params.delta) {
|
| 18796 |
+
return GGML_OPT_RESULT_OK;
|
| 18797 |
}
|
| 18798 |
}
|
| 18799 |
|
|
|
|
| 18809 |
++n_no_improvement[0];
|
| 18810 |
|
| 18811 |
if (n_no_improvement[0] >= params.max_no_improvement) {
|
| 18812 |
+
return GGML_OPT_RESULT_OK;
|
| 18813 |
}
|
| 18814 |
}
|
| 18815 |
}
|
|
|
|
| 18827 |
}
|
| 18828 |
}
|
| 18829 |
|
| 18830 |
+
return GGML_OPT_RESULT_DID_NOT_CONVERGE;
|
| 18831 |
}
|
| 18832 |
|
| 18833 |
//
|
|
|
|
| 18908 |
float sched = 0;
|
| 18909 |
callback(callback_data, accum_step, &sched, cancel);
|
| 18910 |
if (*cancel) {
|
| 18911 |
+
return GGML_OPT_RESULT_CANCEL;
|
| 18912 |
}
|
| 18913 |
}
|
| 18914 |
// ggml_graph_reset (gf);
|
|
|
|
| 18981 |
if (params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_WOLFE ||
|
| 18982 |
params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE) {
|
| 18983 |
if (params.lbfgs.wolfe <= params.lbfgs.ftol || 1.f <= params.lbfgs.wolfe) {
|
| 18984 |
+
return GGML_OPT_RESULT_INVALID_WOLFE;
|
| 18985 |
}
|
| 18986 |
}
|
| 18987 |
|
|
|
|
| 19010 |
}
|
| 19011 |
|
| 19012 |
struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads);
|
| 19013 |
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
|
| 19014 |
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
| 19015 |
|
| 19016 |
float * x = opt->lbfgs.x->data; // current parameters
|
|
|
|
| 19051 |
float sched = 0;
|
| 19052 |
callback(callback_data, accum_step, &sched, &cancel);
|
| 19053 |
if (cancel) {
|
| 19054 |
+
return GGML_OPT_RESULT_CANCEL;
|
| 19055 |
}
|
| 19056 |
}
|
| 19057 |
// ggml_graph_reset (gf);
|
|
|
|
| 19079 |
|
| 19080 |
// already optimized
|
| 19081 |
if (gnorm/xnorm <= params.lbfgs.eps) {
|
| 19082 |
+
return GGML_OPT_RESULT_OK;
|
| 19083 |
}
|
| 19084 |
|
| 19085 |
if (opt->just_initialized) {
|
|
|
|
| 19124 |
// way to test and don't want to break something with so many changes lined up
|
| 19125 |
ls = linesearch_backtracking(¶ms, nx, x, &fx, g, d, step, xp, f, gb, &cplan, np, ps, &cancel, callback, callback_data);
|
| 19126 |
if (cancel) {
|
| 19127 |
+
return GGML_OPT_RESULT_CANCEL;
|
| 19128 |
}
|
| 19129 |
|
| 19130 |
if (ls < 0) {
|
|
|
|
| 19147 |
}
|
| 19148 |
if (gnorm/xnorm <= params.lbfgs.eps) {
|
| 19149 |
// converged
|
| 19150 |
+
return GGML_OPT_RESULT_OK;
|
| 19151 |
}
|
| 19152 |
|
| 19153 |
// delta-based convergence test
|
|
|
|
| 19157 |
const float rate = (pf[k[0]%params.past] - fx)/fx;
|
| 19158 |
|
| 19159 |
if (fabsf(rate) < params.delta) {
|
| 19160 |
+
return GGML_OPT_RESULT_OK;
|
| 19161 |
}
|
| 19162 |
}
|
| 19163 |
|
|
|
|
| 19173 |
n_no_improvement[0]++;
|
| 19174 |
|
| 19175 |
if (n_no_improvement[0] >= params.max_no_improvement) {
|
| 19176 |
+
return GGML_OPT_RESULT_OK;
|
| 19177 |
}
|
| 19178 |
}
|
| 19179 |
}
|
| 19180 |
|
| 19181 |
if (params.lbfgs.n_iter != 0 && params.lbfgs.n_iter < it + 1) {
|
| 19182 |
// reached the maximum number of iterations
|
| 19183 |
+
return GGML_OPT_RESULT_DID_NOT_CONVERGE;
|
| 19184 |
}
|
| 19185 |
|
| 19186 |
// update vectors s and y:
|
|
|
|
| 19236 |
|
| 19237 |
GGML_ASSERT(false && "lbfgs failed");
|
| 19238 |
|
| 19239 |
+
return GGML_OPT_RESULT_DID_NOT_CONVERGE;
|
| 19240 |
}
|
| 19241 |
|
| 19242 |
struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
| 19243 |
struct ggml_opt_params result;
|
| 19244 |
|
| 19245 |
switch (type) {
|
| 19246 |
+
case GGML_OPT_TYPE_ADAM:
|
| 19247 |
{
|
| 19248 |
result = (struct ggml_opt_params) {
|
| 19249 |
+
.type = GGML_OPT_TYPE_ADAM,
|
| 19250 |
.graph_size = GGML_DEFAULT_GRAPH_SIZE,
|
| 19251 |
.n_threads = 1, // FIXME: GGML_DEFAULT_N_THREADS ?
|
| 19252 |
.past = 0,
|
|
|
|
| 19274 |
},
|
| 19275 |
};
|
| 19276 |
} break;
|
| 19277 |
+
case GGML_OPT_TYPE_LBFGS:
|
| 19278 |
{
|
| 19279 |
result = (struct ggml_opt_params) {
|
| 19280 |
+
.type = GGML_OPT_TYPE_LBFGS,
|
| 19281 |
.graph_size = GGML_DEFAULT_GRAPH_SIZE,
|
| 19282 |
.n_threads = 1,
|
| 19283 |
.past = 0,
|
|
|
|
| 19322 |
opt->just_initialized = true;
|
| 19323 |
if (opt->ctx == NULL) {
|
| 19324 |
struct ggml_init_params ctx_opt_params;
|
| 19325 |
+
if (opt->params.type == GGML_OPT_TYPE_ADAM) {
|
| 19326 |
ctx_opt_params.mem_size = GGML_MEM_ALIGN*3 + ggml_tensor_overhead()*3 + ggml_type_size(GGML_TYPE_F32)*nx*3;
|
| 19327 |
if (opt->params.past > 0) {
|
| 19328 |
ctx_opt_params.mem_size += GGML_MEM_ALIGN + ggml_tensor_overhead() + ggml_type_size(GGML_TYPE_F32)*opt->params.past;
|
| 19329 |
}
|
| 19330 |
+
} else if (opt->params.type == GGML_OPT_TYPE_LBFGS) {
|
| 19331 |
ctx_opt_params.mem_size = GGML_MEM_ALIGN*9 + ggml_tensor_overhead()*9 + ggml_type_size(GGML_TYPE_F32)*(nx*5 + opt->params.lbfgs.m*2 + nx*opt->params.lbfgs.m*2);
|
| 19332 |
if (opt->params.past > 0) {
|
| 19333 |
ctx_opt_params.mem_size += GGML_MEM_ALIGN + ggml_tensor_overhead() + ggml_type_size(GGML_TYPE_F32)*opt->params.past;
|
|
|
|
| 19339 |
opt->ctx = ggml_init(ctx_opt_params);
|
| 19340 |
}
|
| 19341 |
switch (opt->params.type) {
|
| 19342 |
+
case GGML_OPT_TYPE_ADAM:
|
| 19343 |
{
|
| 19344 |
opt->adam.g = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
| 19345 |
opt->adam.m = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
|
|
|
| 19353 |
ggml_set_zero(opt->adam.pf);
|
| 19354 |
}
|
| 19355 |
} break;
|
| 19356 |
+
case GGML_OPT_TYPE_LBFGS:
|
| 19357 |
{
|
| 19358 |
opt->lbfgs.x = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
| 19359 |
opt->lbfgs.xp = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
|
|
|
| 19397 |
|
| 19398 |
ctx = ggml_init(params_ctx);
|
| 19399 |
if (ctx == NULL) {
|
| 19400 |
+
return GGML_OPT_RESULT_NO_CONTEXT;
|
| 19401 |
}
|
| 19402 |
|
| 19403 |
free_ctx = true;
|
| 19404 |
}
|
| 19405 |
|
| 19406 |
+
enum ggml_opt_result result = GGML_OPT_RESULT_OK;
|
| 19407 |
|
| 19408 |
struct ggml_opt_context * opt = (struct ggml_opt_context *) alloca(sizeof(struct ggml_opt_context));
|
| 19409 |
|
|
|
|
| 19442 |
void * callback_data) {
|
| 19443 |
|
| 19444 |
// build forward + backward compute graphs
|
| 19445 |
+
enum ggml_opt_result result = GGML_OPT_RESULT_OK;
|
| 19446 |
|
| 19447 |
switch (opt->params.type) {
|
| 19448 |
+
case GGML_OPT_TYPE_ADAM:
|
| 19449 |
{
|
| 19450 |
result = ggml_opt_adam(ctx, opt, opt->params, f, gf, gb, callback, callback_data);
|
| 19451 |
} break;
|
| 19452 |
+
case GGML_OPT_TYPE_LBFGS:
|
| 19453 |
{
|
| 19454 |
result = ggml_opt_lbfgs(ctx, opt, opt->params, f, gf, gb, callback, callback_data);
|
| 19455 |
} break;
|
ggml.h
CHANGED
|
@@ -364,9 +364,9 @@ extern "C" {
|
|
| 364 |
};
|
| 365 |
|
| 366 |
enum ggml_backend_type {
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
};
|
| 371 |
|
| 372 |
// model file types
|
|
@@ -498,9 +498,9 @@ extern "C" {
|
|
| 498 |
};
|
| 499 |
|
| 500 |
enum ggml_object_type {
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
};
|
| 505 |
|
| 506 |
enum ggml_log_level {
|
|
@@ -642,9 +642,9 @@ extern "C" {
|
|
| 642 |
// NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
|
| 643 |
// This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
|
| 644 |
enum ggml_task_type {
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
};
|
| 649 |
|
| 650 |
struct ggml_compute_params {
|
|
@@ -1659,8 +1659,8 @@ extern "C" {
|
|
| 1659 |
|
| 1660 |
// sort rows
|
| 1661 |
enum ggml_sort_order {
|
| 1662 |
-
|
| 1663 |
-
|
| 1664 |
};
|
| 1665 |
|
| 1666 |
GGML_API struct ggml_tensor * ggml_argsort(
|
|
@@ -1953,8 +1953,8 @@ extern "C" {
|
|
| 1953 |
|
| 1954 |
// optimization methods
|
| 1955 |
enum ggml_opt_type {
|
| 1956 |
-
|
| 1957 |
-
|
| 1958 |
};
|
| 1959 |
|
| 1960 |
// linesearch methods
|
|
@@ -1968,12 +1968,12 @@ extern "C" {
|
|
| 1968 |
|
| 1969 |
// optimization return values
|
| 1970 |
enum ggml_opt_result {
|
| 1971 |
-
|
| 1972 |
-
|
| 1973 |
-
|
| 1974 |
-
|
| 1975 |
-
|
| 1976 |
-
|
| 1977 |
|
| 1978 |
GGML_LINESEARCH_FAIL = -128,
|
| 1979 |
GGML_LINESEARCH_MINIMUM_STEP,
|
|
|
|
| 364 |
};
|
| 365 |
|
| 366 |
enum ggml_backend_type {
|
| 367 |
+
GGML_BACKEND_TYPE_CPU = 0,
|
| 368 |
+
GGML_BACKEND_TYPE_GPU = 10,
|
| 369 |
+
GGML_BACKEND_TYPE_GPU_SPLIT = 20,
|
| 370 |
};
|
| 371 |
|
| 372 |
// model file types
|
|
|
|
| 498 |
};
|
| 499 |
|
| 500 |
enum ggml_object_type {
|
| 501 |
+
GGML_OBJECT_TYPE_TENSOR,
|
| 502 |
+
GGML_OBJECT_TYPE_GRAPH,
|
| 503 |
+
GGML_OBJECT_TYPE_WORK_BUFFER
|
| 504 |
};
|
| 505 |
|
| 506 |
enum ggml_log_level {
|
|
|
|
| 642 |
// NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
|
| 643 |
// This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
|
| 644 |
enum ggml_task_type {
|
| 645 |
+
GGML_TASK_TYPE_INIT = 0,
|
| 646 |
+
GGML_TASK_TYPE_COMPUTE,
|
| 647 |
+
GGML_TASK_TYPE_FINALIZE,
|
| 648 |
};
|
| 649 |
|
| 650 |
struct ggml_compute_params {
|
|
|
|
| 1659 |
|
| 1660 |
// sort rows
|
| 1661 |
enum ggml_sort_order {
|
| 1662 |
+
GGML_SORT_ORDER_ASC,
|
| 1663 |
+
GGML_SORT_ORDER_DESC,
|
| 1664 |
};
|
| 1665 |
|
| 1666 |
GGML_API struct ggml_tensor * ggml_argsort(
|
|
|
|
| 1953 |
|
| 1954 |
// optimization methods
|
| 1955 |
enum ggml_opt_type {
|
| 1956 |
+
GGML_OPT_TYPE_ADAM,
|
| 1957 |
+
GGML_OPT_TYPE_LBFGS,
|
| 1958 |
};
|
| 1959 |
|
| 1960 |
// linesearch methods
|
|
|
|
| 1968 |
|
| 1969 |
// optimization return values
|
| 1970 |
enum ggml_opt_result {
|
| 1971 |
+
GGML_OPT_RESULT_OK = 0,
|
| 1972 |
+
GGML_OPT_RESULT_DID_NOT_CONVERGE,
|
| 1973 |
+
GGML_OPT_RESULT_NO_CONTEXT,
|
| 1974 |
+
GGML_OPT_RESULT_INVALID_WOLFE,
|
| 1975 |
+
GGML_OPT_RESULT_FAIL,
|
| 1976 |
+
GGML_OPT_RESULT_CANCEL,
|
| 1977 |
|
| 1978 |
GGML_LINESEARCH_FAIL = -128,
|
| 1979 |
GGML_LINESEARCH_MINIMUM_STEP,
|