Spaces:
Running
Running
Disable iqx on windows as WA (llama/6435)
Browse files* disable iqx on windows as WA
* array instead of global_memory
- ggml-common.h +3 -2
- ggml-sycl.cpp +38 -114
ggml-common.h
CHANGED
|
@@ -447,10 +447,11 @@ static_assert(sizeof(block_iq4_xs) == sizeof(ggml_half) + sizeof(uint16_t) + QK_
|
|
| 447 |
|
| 448 |
#define GGML_COMMON_IMPL
|
| 449 |
#elif defined(GGML_COMMON_IMPL_SYCL)
|
|
|
|
| 450 |
#include <cstdint>
|
| 451 |
|
| 452 |
-
#define GGML_TABLE_BEGIN(type, name, size) static
|
| 453 |
-
#define GGML_TABLE_END() }
|
| 454 |
|
| 455 |
#define GGML_COMMON_IMPL
|
| 456 |
#endif
|
|
|
|
| 447 |
|
| 448 |
#define GGML_COMMON_IMPL
|
| 449 |
#elif defined(GGML_COMMON_IMPL_SYCL)
|
| 450 |
+
|
| 451 |
#include <cstdint>
|
| 452 |
|
| 453 |
+
#define GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
|
| 454 |
+
#define GGML_TABLE_END() };
|
| 455 |
|
| 456 |
#define GGML_COMMON_IMPL
|
| 457 |
#endif
|
ggml-sycl.cpp
CHANGED
|
@@ -8079,7 +8079,7 @@ template <bool need_check> static void
|
|
| 8079 |
template <int qk, int qi, typename block_q_t, int vdr, vec_dot_q_sycl_t vec_dot_q_sycl>
|
| 8080 |
static void mul_mat_vec_q(const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst, const int ncols, const int nrows,
|
| 8081 |
const sycl::nd_item<3> &item_ct1,
|
| 8082 |
-
const uint32_t *iq3xxs_grid_ptr, const uint64_t *ksigns64_ptr) {
|
| 8083 |
const int row = item_ct1.get_group(2) * item_ct1.get_local_range(1) +
|
| 8084 |
item_ct1.get_local_id(1);
|
| 8085 |
|
|
@@ -9956,17 +9956,14 @@ static void dequantize_row_iq2_xxs_sycl(const void *vx, dst_t *y, const int k,
|
|
| 9956 |
dpct::queue_ptr stream) {
|
| 9957 |
const int nb = k / QK_K;
|
| 9958 |
{
|
| 9959 |
-
iq2xxs_grid.init(*stream);
|
| 9960 |
-
ksigns_iq2xs.init(*stream);
|
| 9961 |
-
kmask_iq2xs.init(*stream);
|
| 9962 |
|
| 9963 |
dpct::has_capability_or_fail(stream->get_device(),
|
| 9964 |
{sycl::aspect::fp16});
|
| 9965 |
|
| 9966 |
stream->submit([&](sycl::handler &cgh) {
|
| 9967 |
-
auto iq2xxs_grid_ptr_ct1 = iq2xxs_grid
|
| 9968 |
-
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs
|
| 9969 |
-
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs
|
| 9970 |
|
| 9971 |
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
| 9972 |
sycl::range<3>(1, 1, 32),
|
|
@@ -9985,17 +9982,14 @@ static void dequantize_row_iq2_xs_sycl(const void *vx, dst_t *y, const int k,
|
|
| 9985 |
dpct::queue_ptr stream) {
|
| 9986 |
const int nb = k / QK_K;
|
| 9987 |
{
|
| 9988 |
-
iq2xs_grid.init(*stream);
|
| 9989 |
-
ksigns_iq2xs.init(*stream);
|
| 9990 |
-
kmask_iq2xs.init(*stream);
|
| 9991 |
|
| 9992 |
dpct::has_capability_or_fail(stream->get_device(),
|
| 9993 |
{sycl::aspect::fp16});
|
| 9994 |
|
| 9995 |
stream->submit([&](sycl::handler &cgh) {
|
| 9996 |
-
auto iq2xs_grid_ptr_ct1 = iq2xs_grid
|
| 9997 |
-
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs
|
| 9998 |
-
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs
|
| 9999 |
|
| 10000 |
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
| 10001 |
sycl::range<3>(1, 1, 32),
|
|
@@ -10014,17 +10008,14 @@ static void dequantize_row_iq3_xxs_sycl(const void *vx, dst_t *y, const int k,
|
|
| 10014 |
dpct::queue_ptr stream) {
|
| 10015 |
const int nb = k / QK_K;
|
| 10016 |
{
|
| 10017 |
-
iq3xxs_grid.init(*stream);
|
| 10018 |
-
ksigns_iq2xs.init(*stream);
|
| 10019 |
-
kmask_iq2xs.init(*stream);
|
| 10020 |
|
| 10021 |
dpct::has_capability_or_fail(stream->get_device(),
|
| 10022 |
{sycl::aspect::fp16});
|
| 10023 |
|
| 10024 |
stream->submit([&](sycl::handler &cgh) {
|
| 10025 |
-
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid
|
| 10026 |
-
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs
|
| 10027 |
-
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs
|
| 10028 |
|
| 10029 |
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
| 10030 |
sycl::range<3>(1, 1, 32),
|
|
@@ -10043,17 +10034,14 @@ static void dequantize_row_iq3_s_sycl(const void *vx, dst_t *y, const int k,
|
|
| 10043 |
dpct::queue_ptr stream) {
|
| 10044 |
const int nb = k / QK_K;
|
| 10045 |
{
|
| 10046 |
-
iq3s_grid.init(*stream);
|
| 10047 |
-
ksigns_iq2xs.init(*stream);
|
| 10048 |
-
kmask_iq2xs.init(*stream);
|
| 10049 |
|
| 10050 |
dpct::has_capability_or_fail(stream->get_device(),
|
| 10051 |
{sycl::aspect::fp16});
|
| 10052 |
|
| 10053 |
stream->submit([&](sycl::handler &cgh) {
|
| 10054 |
-
auto iq3s_grid_ptr_ct1 = iq3s_grid
|
| 10055 |
-
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs
|
| 10056 |
-
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs
|
| 10057 |
|
| 10058 |
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
| 10059 |
sycl::range<3>(1, 1, 32),
|
|
@@ -10072,17 +10060,14 @@ static void dequantize_row_iq1_s_sycl(const void *vx, dst_t *y, const int k,
|
|
| 10072 |
dpct::queue_ptr stream) {
|
| 10073 |
const int nb = k / QK_K;
|
| 10074 |
{
|
| 10075 |
-
iq1s_grid_gpu.init(*stream);
|
| 10076 |
-
ksigns_iq2xs.init(*stream);
|
| 10077 |
-
kmask_iq2xs.init(*stream);
|
| 10078 |
|
| 10079 |
dpct::has_capability_or_fail(stream->get_device(),
|
| 10080 |
{sycl::aspect::fp16});
|
| 10081 |
|
| 10082 |
stream->submit([&](sycl::handler &cgh) {
|
| 10083 |
-
auto iq1s_grid_ptr_ct1 = iq1s_grid_gpu
|
| 10084 |
-
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs
|
| 10085 |
-
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs
|
| 10086 |
|
| 10087 |
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
| 10088 |
sycl::range<3>(1, 1, 32),
|
|
@@ -10415,12 +10400,8 @@ static void mul_mat_vec_q4_0_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10415 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10416 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10417 |
{
|
| 10418 |
-
iq3xxs_grid.init(*stream);
|
| 10419 |
-
ksigns64.init(*stream);
|
| 10420 |
|
| 10421 |
stream->submit([&](sycl::handler &cgh) {
|
| 10422 |
-
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
| 10423 |
-
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
| 10424 |
|
| 10425 |
cgh.parallel_for(
|
| 10426 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
@@ -10428,8 +10409,7 @@ static void mul_mat_vec_q4_0_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10428 |
[[intel::reqd_sub_group_size(32)]] {
|
| 10429 |
mul_mat_vec_q<QK4_0, QI4_0, block_q4_0,
|
| 10430 |
VDR_Q4_0_Q8_1_MMVQ, vec_dot_q4_0_q8_1>(
|
| 10431 |
-
vx, vy, dst, ncols, nrows, item_ct1
|
| 10432 |
-
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
|
| 10433 |
});
|
| 10434 |
});
|
| 10435 |
}
|
|
@@ -10444,12 +10424,8 @@ static void mul_mat_vec_q4_1_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10444 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10445 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10446 |
{
|
| 10447 |
-
iq3xxs_grid.init(*stream);
|
| 10448 |
-
ksigns64.init(*stream);
|
| 10449 |
|
| 10450 |
stream->submit([&](sycl::handler &cgh) {
|
| 10451 |
-
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
| 10452 |
-
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
| 10453 |
|
| 10454 |
cgh.parallel_for(
|
| 10455 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
@@ -10457,8 +10433,7 @@ static void mul_mat_vec_q4_1_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10457 |
[[intel::reqd_sub_group_size(32)]] {
|
| 10458 |
mul_mat_vec_q<QK4_0, QI4_1, block_q4_1,
|
| 10459 |
VDR_Q4_1_Q8_1_MMVQ, vec_dot_q4_1_q8_1>(
|
| 10460 |
-
vx, vy, dst, ncols, nrows, item_ct1
|
| 10461 |
-
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
|
| 10462 |
});
|
| 10463 |
});
|
| 10464 |
}
|
|
@@ -10473,12 +10448,8 @@ static void mul_mat_vec_q5_0_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10473 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10474 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10475 |
{
|
| 10476 |
-
iq3xxs_grid.init(*stream);
|
| 10477 |
-
ksigns64.init(*stream);
|
| 10478 |
|
| 10479 |
stream->submit([&](sycl::handler &cgh) {
|
| 10480 |
-
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
| 10481 |
-
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
| 10482 |
|
| 10483 |
cgh.parallel_for(
|
| 10484 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
@@ -10486,8 +10457,7 @@ static void mul_mat_vec_q5_0_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10486 |
[[intel::reqd_sub_group_size(32)]] {
|
| 10487 |
mul_mat_vec_q<QK5_0, QI5_0, block_q5_0,
|
| 10488 |
VDR_Q5_0_Q8_1_MMVQ, vec_dot_q5_0_q8_1>(
|
| 10489 |
-
vx, vy, dst, ncols, nrows, item_ct1
|
| 10490 |
-
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
|
| 10491 |
});
|
| 10492 |
});
|
| 10493 |
}
|
|
@@ -10502,12 +10472,8 @@ static void mul_mat_vec_q5_1_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10502 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10503 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10504 |
{
|
| 10505 |
-
iq3xxs_grid.init(*stream);
|
| 10506 |
-
ksigns64.init(*stream);
|
| 10507 |
|
| 10508 |
stream->submit([&](sycl::handler &cgh) {
|
| 10509 |
-
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
| 10510 |
-
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
| 10511 |
|
| 10512 |
cgh.parallel_for(
|
| 10513 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
@@ -10515,8 +10481,7 @@ static void mul_mat_vec_q5_1_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10515 |
[[intel::reqd_sub_group_size(32)]] {
|
| 10516 |
mul_mat_vec_q<QK5_1, QI5_1, block_q5_1,
|
| 10517 |
VDR_Q5_1_Q8_1_MMVQ, vec_dot_q5_1_q8_1>(
|
| 10518 |
-
vx, vy, dst, ncols, nrows, item_ct1
|
| 10519 |
-
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
|
| 10520 |
});
|
| 10521 |
});
|
| 10522 |
}
|
|
@@ -10531,12 +10496,8 @@ static void mul_mat_vec_q8_0_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10531 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10532 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10533 |
{
|
| 10534 |
-
iq3xxs_grid.init(*stream);
|
| 10535 |
-
ksigns64.init(*stream);
|
| 10536 |
|
| 10537 |
stream->submit([&](sycl::handler &cgh) {
|
| 10538 |
-
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
| 10539 |
-
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
| 10540 |
|
| 10541 |
cgh.parallel_for(
|
| 10542 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
@@ -10544,8 +10505,7 @@ static void mul_mat_vec_q8_0_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10544 |
[[intel::reqd_sub_group_size(32)]] {
|
| 10545 |
mul_mat_vec_q<QK8_0, QI8_0, block_q8_0,
|
| 10546 |
VDR_Q8_0_Q8_1_MMVQ, vec_dot_q8_0_q8_1>(
|
| 10547 |
-
vx, vy, dst, ncols, nrows, item_ct1
|
| 10548 |
-
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
|
| 10549 |
});
|
| 10550 |
});
|
| 10551 |
}
|
|
@@ -10560,12 +10520,8 @@ static void mul_mat_vec_q2_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10560 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10561 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10562 |
{
|
| 10563 |
-
iq3xxs_grid.init(*stream);
|
| 10564 |
-
ksigns64.init(*stream);
|
| 10565 |
|
| 10566 |
stream->submit([&](sycl::handler &cgh) {
|
| 10567 |
-
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
| 10568 |
-
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
| 10569 |
|
| 10570 |
cgh.parallel_for(
|
| 10571 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
@@ -10573,8 +10529,7 @@ static void mul_mat_vec_q2_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10573 |
[[intel::reqd_sub_group_size(32)]] {
|
| 10574 |
mul_mat_vec_q<QK_K, QI2_K, block_q2_K,
|
| 10575 |
VDR_Q2_K_Q8_1_MMVQ, vec_dot_q2_K_q8_1>(
|
| 10576 |
-
vx, vy, dst, ncols, nrows, item_ct1
|
| 10577 |
-
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
|
| 10578 |
});
|
| 10579 |
});
|
| 10580 |
}
|
|
@@ -10589,12 +10544,8 @@ static void mul_mat_vec_q3_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10589 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10590 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10591 |
{
|
| 10592 |
-
iq3xxs_grid.init(*stream);
|
| 10593 |
-
ksigns64.init(*stream);
|
| 10594 |
|
| 10595 |
stream->submit([&](sycl::handler &cgh) {
|
| 10596 |
-
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
| 10597 |
-
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
| 10598 |
|
| 10599 |
cgh.parallel_for(
|
| 10600 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
@@ -10602,8 +10553,7 @@ static void mul_mat_vec_q3_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10602 |
[[intel::reqd_sub_group_size(32)]] {
|
| 10603 |
mul_mat_vec_q<QK_K, QI3_K, block_q3_K,
|
| 10604 |
VDR_Q3_K_Q8_1_MMVQ, vec_dot_q3_K_q8_1>(
|
| 10605 |
-
vx, vy, dst, ncols, nrows, item_ct1
|
| 10606 |
-
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
|
| 10607 |
});
|
| 10608 |
});
|
| 10609 |
}
|
|
@@ -10618,12 +10568,8 @@ static void mul_mat_vec_q4_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10618 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10619 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10620 |
{
|
| 10621 |
-
iq3xxs_grid.init(*stream);
|
| 10622 |
-
ksigns64.init(*stream);
|
| 10623 |
|
| 10624 |
stream->submit([&](sycl::handler &cgh) {
|
| 10625 |
-
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
| 10626 |
-
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
| 10627 |
|
| 10628 |
cgh.parallel_for(
|
| 10629 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
@@ -10631,8 +10577,7 @@ static void mul_mat_vec_q4_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10631 |
[[intel::reqd_sub_group_size(32)]] {
|
| 10632 |
mul_mat_vec_q<QK_K, QI4_K, block_q4_K,
|
| 10633 |
VDR_Q4_K_Q8_1_MMVQ, vec_dot_q4_K_q8_1>(
|
| 10634 |
-
vx, vy, dst, ncols, nrows, item_ct1
|
| 10635 |
-
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
|
| 10636 |
});
|
| 10637 |
});
|
| 10638 |
}
|
|
@@ -10647,12 +10592,8 @@ static void mul_mat_vec_q5_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10647 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10648 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10649 |
{
|
| 10650 |
-
iq3xxs_grid.init(*stream);
|
| 10651 |
-
ksigns64.init(*stream);
|
| 10652 |
|
| 10653 |
stream->submit([&](sycl::handler &cgh) {
|
| 10654 |
-
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
| 10655 |
-
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
| 10656 |
|
| 10657 |
cgh.parallel_for(
|
| 10658 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
@@ -10660,8 +10601,7 @@ static void mul_mat_vec_q5_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10660 |
[[intel::reqd_sub_group_size(32)]] {
|
| 10661 |
mul_mat_vec_q<QK_K, QI5_K, block_q5_K,
|
| 10662 |
VDR_Q5_K_Q8_1_MMVQ, vec_dot_q5_K_q8_1>(
|
| 10663 |
-
vx, vy, dst, ncols, nrows, item_ct1
|
| 10664 |
-
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
|
| 10665 |
});
|
| 10666 |
});
|
| 10667 |
}
|
|
@@ -10676,12 +10616,8 @@ static void mul_mat_vec_q6_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10676 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10677 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10678 |
{
|
| 10679 |
-
iq3xxs_grid.init(*stream);
|
| 10680 |
-
ksigns64.init(*stream);
|
| 10681 |
|
| 10682 |
stream->submit([&](sycl::handler &cgh) {
|
| 10683 |
-
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
| 10684 |
-
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
| 10685 |
|
| 10686 |
cgh.parallel_for(
|
| 10687 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
@@ -10689,13 +10625,13 @@ static void mul_mat_vec_q6_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10689 |
[[intel::reqd_sub_group_size(32)]] {
|
| 10690 |
mul_mat_vec_q<QK_K, QI6_K, block_q6_K,
|
| 10691 |
VDR_Q6_K_Q8_1_MMVQ, vec_dot_q6_K_q8_1>(
|
| 10692 |
-
vx, vy, dst, ncols, nrows, item_ct1
|
| 10693 |
-
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
|
| 10694 |
});
|
| 10695 |
});
|
| 10696 |
}
|
| 10697 |
}
|
| 10698 |
|
|
|
|
| 10699 |
static void mul_mat_vec_iq2_xxs_q8_1_sycl(const void *vx, const void *vy,
|
| 10700 |
float *dst, const int ncols,
|
| 10701 |
const int nrows,
|
|
@@ -10705,15 +10641,11 @@ static void mul_mat_vec_iq2_xxs_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10705 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10706 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10707 |
{
|
| 10708 |
-
iq2xxs_grid.init(*stream);
|
| 10709 |
-
ksigns_iq2xs.init(*stream);
|
| 10710 |
-
kmask_iq2xs.init(*stream);
|
| 10711 |
-
|
| 10712 |
|
| 10713 |
stream->submit([&](sycl::handler &cgh) {
|
| 10714 |
-
auto iq2xxs_grid_ptr_ct1 = iq2xxs_grid
|
| 10715 |
-
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs
|
| 10716 |
-
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs
|
| 10717 |
|
| 10718 |
cgh.parallel_for(
|
| 10719 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
@@ -10736,12 +10668,10 @@ static void mul_mat_vec_iq2_xs_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10736 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10737 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10738 |
{
|
| 10739 |
-
iq2xs_grid.init(*stream);
|
| 10740 |
-
ksigns64.init(*stream);
|
| 10741 |
|
| 10742 |
stream->submit([&](sycl::handler &cgh) {
|
| 10743 |
-
auto iq2xs_grid_ptr_ct1 = iq2xs_grid
|
| 10744 |
-
auto ksigns64_ptr_ct1 = ksigns64
|
| 10745 |
|
| 10746 |
cgh.parallel_for(
|
| 10747 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
@@ -10764,12 +10694,10 @@ static void mul_mat_vec_iq3_xxs_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10764 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10765 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10766 |
{
|
| 10767 |
-
iq3xxs_grid.init(*stream);
|
| 10768 |
-
ksigns64.init(*stream);
|
| 10769 |
|
| 10770 |
stream->submit([&](sycl::handler &cgh) {
|
| 10771 |
-
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid
|
| 10772 |
-
auto ksigns64_ptr_ct1 = ksigns64
|
| 10773 |
|
| 10774 |
cgh.parallel_for(
|
| 10775 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
@@ -10792,12 +10720,10 @@ static void mul_mat_vec_iq3_s_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10792 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10793 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10794 |
{
|
| 10795 |
-
iq3s_grid.init(*stream);
|
| 10796 |
-
ksigns64.init(*stream);
|
| 10797 |
|
| 10798 |
stream->submit([&](sycl::handler &cgh) {
|
| 10799 |
-
auto iq3s_grid_ptr_ct1 = iq3s_grid
|
| 10800 |
-
auto ksigns64_ptr_ct1 = ksigns64
|
| 10801 |
|
| 10802 |
cgh.parallel_for(
|
| 10803 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
@@ -10820,12 +10746,10 @@ static void mul_mat_vec_iq1_s_q8_1_sycl(const void *vx, const void *vy,
|
|
| 10820 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10821 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10822 |
{
|
| 10823 |
-
iq1s_grid_gpu.init(*stream);
|
| 10824 |
-
ksigns64.init(*stream);
|
| 10825 |
|
| 10826 |
stream->submit([&](sycl::handler &cgh) {
|
| 10827 |
-
auto iq1s_grid_ptr_ct1 = iq1s_grid_gpu
|
| 10828 |
-
auto ksigns64_ptr_ct1 = ksigns64
|
| 10829 |
|
| 10830 |
cgh.parallel_for(
|
| 10831 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
|
|
| 8079 |
template <int qk, int qi, typename block_q_t, int vdr, vec_dot_q_sycl_t vec_dot_q_sycl>
|
| 8080 |
static void mul_mat_vec_q(const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst, const int ncols, const int nrows,
|
| 8081 |
const sycl::nd_item<3> &item_ct1,
|
| 8082 |
+
const uint32_t *iq3xxs_grid_ptr=nullptr, const uint64_t *ksigns64_ptr=nullptr) {
|
| 8083 |
const int row = item_ct1.get_group(2) * item_ct1.get_local_range(1) +
|
| 8084 |
item_ct1.get_local_id(1);
|
| 8085 |
|
|
|
|
| 9956 |
dpct::queue_ptr stream) {
|
| 9957 |
const int nb = k / QK_K;
|
| 9958 |
{
|
|
|
|
|
|
|
|
|
|
| 9959 |
|
| 9960 |
dpct::has_capability_or_fail(stream->get_device(),
|
| 9961 |
{sycl::aspect::fp16});
|
| 9962 |
|
| 9963 |
stream->submit([&](sycl::handler &cgh) {
|
| 9964 |
+
auto iq2xxs_grid_ptr_ct1 = &iq2xxs_grid[0];
|
| 9965 |
+
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
|
| 9966 |
+
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
|
| 9967 |
|
| 9968 |
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
| 9969 |
sycl::range<3>(1, 1, 32),
|
|
|
|
| 9982 |
dpct::queue_ptr stream) {
|
| 9983 |
const int nb = k / QK_K;
|
| 9984 |
{
|
|
|
|
|
|
|
|
|
|
| 9985 |
|
| 9986 |
dpct::has_capability_or_fail(stream->get_device(),
|
| 9987 |
{sycl::aspect::fp16});
|
| 9988 |
|
| 9989 |
stream->submit([&](sycl::handler &cgh) {
|
| 9990 |
+
auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0];
|
| 9991 |
+
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
|
| 9992 |
+
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
|
| 9993 |
|
| 9994 |
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
| 9995 |
sycl::range<3>(1, 1, 32),
|
|
|
|
| 10008 |
dpct::queue_ptr stream) {
|
| 10009 |
const int nb = k / QK_K;
|
| 10010 |
{
|
|
|
|
|
|
|
|
|
|
| 10011 |
|
| 10012 |
dpct::has_capability_or_fail(stream->get_device(),
|
| 10013 |
{sycl::aspect::fp16});
|
| 10014 |
|
| 10015 |
stream->submit([&](sycl::handler &cgh) {
|
| 10016 |
+
auto iq3xxs_grid_ptr_ct1 = &iq3xxs_grid[0];
|
| 10017 |
+
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
|
| 10018 |
+
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
|
| 10019 |
|
| 10020 |
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
| 10021 |
sycl::range<3>(1, 1, 32),
|
|
|
|
| 10034 |
dpct::queue_ptr stream) {
|
| 10035 |
const int nb = k / QK_K;
|
| 10036 |
{
|
|
|
|
|
|
|
|
|
|
| 10037 |
|
| 10038 |
dpct::has_capability_or_fail(stream->get_device(),
|
| 10039 |
{sycl::aspect::fp16});
|
| 10040 |
|
| 10041 |
stream->submit([&](sycl::handler &cgh) {
|
| 10042 |
+
auto iq3s_grid_ptr_ct1 = &iq3s_grid[0];
|
| 10043 |
+
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
|
| 10044 |
+
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
|
| 10045 |
|
| 10046 |
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
| 10047 |
sycl::range<3>(1, 1, 32),
|
|
|
|
| 10060 |
dpct::queue_ptr stream) {
|
| 10061 |
const int nb = k / QK_K;
|
| 10062 |
{
|
|
|
|
|
|
|
|
|
|
| 10063 |
|
| 10064 |
dpct::has_capability_or_fail(stream->get_device(),
|
| 10065 |
{sycl::aspect::fp16});
|
| 10066 |
|
| 10067 |
stream->submit([&](sycl::handler &cgh) {
|
| 10068 |
+
auto iq1s_grid_ptr_ct1 = &iq1s_grid_gpu[0];
|
| 10069 |
+
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
|
| 10070 |
+
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
|
| 10071 |
|
| 10072 |
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
| 10073 |
sycl::range<3>(1, 1, 32),
|
|
|
|
| 10400 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10401 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10402 |
{
|
|
|
|
|
|
|
| 10403 |
|
| 10404 |
stream->submit([&](sycl::handler &cgh) {
|
|
|
|
|
|
|
| 10405 |
|
| 10406 |
cgh.parallel_for(
|
| 10407 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
|
|
| 10409 |
[[intel::reqd_sub_group_size(32)]] {
|
| 10410 |
mul_mat_vec_q<QK4_0, QI4_0, block_q4_0,
|
| 10411 |
VDR_Q4_0_Q8_1_MMVQ, vec_dot_q4_0_q8_1>(
|
| 10412 |
+
vx, vy, dst, ncols, nrows, item_ct1);
|
|
|
|
| 10413 |
});
|
| 10414 |
});
|
| 10415 |
}
|
|
|
|
| 10424 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10425 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10426 |
{
|
|
|
|
|
|
|
| 10427 |
|
| 10428 |
stream->submit([&](sycl::handler &cgh) {
|
|
|
|
|
|
|
| 10429 |
|
| 10430 |
cgh.parallel_for(
|
| 10431 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
|
|
| 10433 |
[[intel::reqd_sub_group_size(32)]] {
|
| 10434 |
mul_mat_vec_q<QK4_0, QI4_1, block_q4_1,
|
| 10435 |
VDR_Q4_1_Q8_1_MMVQ, vec_dot_q4_1_q8_1>(
|
| 10436 |
+
vx, vy, dst, ncols, nrows, item_ct1);
|
|
|
|
| 10437 |
});
|
| 10438 |
});
|
| 10439 |
}
|
|
|
|
| 10448 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10449 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10450 |
{
|
|
|
|
|
|
|
| 10451 |
|
| 10452 |
stream->submit([&](sycl::handler &cgh) {
|
|
|
|
|
|
|
| 10453 |
|
| 10454 |
cgh.parallel_for(
|
| 10455 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
|
|
| 10457 |
[[intel::reqd_sub_group_size(32)]] {
|
| 10458 |
mul_mat_vec_q<QK5_0, QI5_0, block_q5_0,
|
| 10459 |
VDR_Q5_0_Q8_1_MMVQ, vec_dot_q5_0_q8_1>(
|
| 10460 |
+
vx, vy, dst, ncols, nrows, item_ct1);
|
|
|
|
| 10461 |
});
|
| 10462 |
});
|
| 10463 |
}
|
|
|
|
| 10472 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10473 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10474 |
{
|
|
|
|
|
|
|
| 10475 |
|
| 10476 |
stream->submit([&](sycl::handler &cgh) {
|
|
|
|
|
|
|
| 10477 |
|
| 10478 |
cgh.parallel_for(
|
| 10479 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
|
|
| 10481 |
[[intel::reqd_sub_group_size(32)]] {
|
| 10482 |
mul_mat_vec_q<QK5_1, QI5_1, block_q5_1,
|
| 10483 |
VDR_Q5_1_Q8_1_MMVQ, vec_dot_q5_1_q8_1>(
|
| 10484 |
+
vx, vy, dst, ncols, nrows, item_ct1);
|
|
|
|
| 10485 |
});
|
| 10486 |
});
|
| 10487 |
}
|
|
|
|
| 10496 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10497 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10498 |
{
|
|
|
|
|
|
|
| 10499 |
|
| 10500 |
stream->submit([&](sycl::handler &cgh) {
|
|
|
|
|
|
|
| 10501 |
|
| 10502 |
cgh.parallel_for(
|
| 10503 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
|
|
| 10505 |
[[intel::reqd_sub_group_size(32)]] {
|
| 10506 |
mul_mat_vec_q<QK8_0, QI8_0, block_q8_0,
|
| 10507 |
VDR_Q8_0_Q8_1_MMVQ, vec_dot_q8_0_q8_1>(
|
| 10508 |
+
vx, vy, dst, ncols, nrows, item_ct1);
|
|
|
|
| 10509 |
});
|
| 10510 |
});
|
| 10511 |
}
|
|
|
|
| 10520 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10521 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10522 |
{
|
|
|
|
|
|
|
| 10523 |
|
| 10524 |
stream->submit([&](sycl::handler &cgh) {
|
|
|
|
|
|
|
| 10525 |
|
| 10526 |
cgh.parallel_for(
|
| 10527 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
|
|
| 10529 |
[[intel::reqd_sub_group_size(32)]] {
|
| 10530 |
mul_mat_vec_q<QK_K, QI2_K, block_q2_K,
|
| 10531 |
VDR_Q2_K_Q8_1_MMVQ, vec_dot_q2_K_q8_1>(
|
| 10532 |
+
vx, vy, dst, ncols, nrows, item_ct1);
|
|
|
|
| 10533 |
});
|
| 10534 |
});
|
| 10535 |
}
|
|
|
|
| 10544 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10545 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10546 |
{
|
|
|
|
|
|
|
| 10547 |
|
| 10548 |
stream->submit([&](sycl::handler &cgh) {
|
|
|
|
|
|
|
| 10549 |
|
| 10550 |
cgh.parallel_for(
|
| 10551 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
|
|
| 10553 |
[[intel::reqd_sub_group_size(32)]] {
|
| 10554 |
mul_mat_vec_q<QK_K, QI3_K, block_q3_K,
|
| 10555 |
VDR_Q3_K_Q8_1_MMVQ, vec_dot_q3_K_q8_1>(
|
| 10556 |
+
vx, vy, dst, ncols, nrows, item_ct1);
|
|
|
|
| 10557 |
});
|
| 10558 |
});
|
| 10559 |
}
|
|
|
|
| 10568 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10569 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10570 |
{
|
|
|
|
|
|
|
| 10571 |
|
| 10572 |
stream->submit([&](sycl::handler &cgh) {
|
|
|
|
|
|
|
| 10573 |
|
| 10574 |
cgh.parallel_for(
|
| 10575 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
|
|
| 10577 |
[[intel::reqd_sub_group_size(32)]] {
|
| 10578 |
mul_mat_vec_q<QK_K, QI4_K, block_q4_K,
|
| 10579 |
VDR_Q4_K_Q8_1_MMVQ, vec_dot_q4_K_q8_1>(
|
| 10580 |
+
vx, vy, dst, ncols, nrows, item_ct1);
|
|
|
|
| 10581 |
});
|
| 10582 |
});
|
| 10583 |
}
|
|
|
|
| 10592 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10593 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10594 |
{
|
|
|
|
|
|
|
| 10595 |
|
| 10596 |
stream->submit([&](sycl::handler &cgh) {
|
|
|
|
|
|
|
| 10597 |
|
| 10598 |
cgh.parallel_for(
|
| 10599 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
|
|
| 10601 |
[[intel::reqd_sub_group_size(32)]] {
|
| 10602 |
mul_mat_vec_q<QK_K, QI5_K, block_q5_K,
|
| 10603 |
VDR_Q5_K_Q8_1_MMVQ, vec_dot_q5_K_q8_1>(
|
| 10604 |
+
vx, vy, dst, ncols, nrows, item_ct1);
|
|
|
|
| 10605 |
});
|
| 10606 |
});
|
| 10607 |
}
|
|
|
|
| 10616 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10617 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10618 |
{
|
|
|
|
|
|
|
| 10619 |
|
| 10620 |
stream->submit([&](sycl::handler &cgh) {
|
|
|
|
|
|
|
| 10621 |
|
| 10622 |
cgh.parallel_for(
|
| 10623 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
|
|
| 10625 |
[[intel::reqd_sub_group_size(32)]] {
|
| 10626 |
mul_mat_vec_q<QK_K, QI6_K, block_q6_K,
|
| 10627 |
VDR_Q6_K_Q8_1_MMVQ, vec_dot_q6_K_q8_1>(
|
| 10628 |
+
vx, vy, dst, ncols, nrows, item_ct1);
|
|
|
|
| 10629 |
});
|
| 10630 |
});
|
| 10631 |
}
|
| 10632 |
}
|
| 10633 |
|
| 10634 |
+
|
| 10635 |
static void mul_mat_vec_iq2_xxs_q8_1_sycl(const void *vx, const void *vy,
|
| 10636 |
float *dst, const int ncols,
|
| 10637 |
const int nrows,
|
|
|
|
| 10641 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10642 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10643 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10644 |
|
| 10645 |
stream->submit([&](sycl::handler &cgh) {
|
| 10646 |
+
auto iq2xxs_grid_ptr_ct1 = &iq2xxs_grid[0];
|
| 10647 |
+
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
|
| 10648 |
+
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
|
| 10649 |
|
| 10650 |
cgh.parallel_for(
|
| 10651 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
|
|
| 10668 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10669 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10670 |
{
|
|
|
|
|
|
|
| 10671 |
|
| 10672 |
stream->submit([&](sycl::handler &cgh) {
|
| 10673 |
+
auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0];
|
| 10674 |
+
auto ksigns64_ptr_ct1 = &ksigns64[0];
|
| 10675 |
|
| 10676 |
cgh.parallel_for(
|
| 10677 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
|
|
| 10694 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10695 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10696 |
{
|
|
|
|
|
|
|
| 10697 |
|
| 10698 |
stream->submit([&](sycl::handler &cgh) {
|
| 10699 |
+
auto iq3xxs_grid_ptr_ct1 = &iq3xxs_grid[0];
|
| 10700 |
+
auto ksigns64_ptr_ct1 = &ksigns64[0];
|
| 10701 |
|
| 10702 |
cgh.parallel_for(
|
| 10703 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
|
|
| 10720 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10721 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10722 |
{
|
|
|
|
|
|
|
| 10723 |
|
| 10724 |
stream->submit([&](sycl::handler &cgh) {
|
| 10725 |
+
auto iq3s_grid_ptr_ct1 = &iq3s_grid[0];
|
| 10726 |
+
auto ksigns64_ptr_ct1 = &ksigns64[0];
|
| 10727 |
|
| 10728 |
cgh.parallel_for(
|
| 10729 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
|
|
|
| 10746 |
const sycl::range<3> block_nums(1, 1, block_num_y);
|
| 10747 |
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
| 10748 |
{
|
|
|
|
|
|
|
| 10749 |
|
| 10750 |
stream->submit([&](sycl::handler &cgh) {
|
| 10751 |
+
auto iq1s_grid_ptr_ct1 = &iq1s_grid_gpu[0];
|
| 10752 |
+
auto ksigns64_ptr_ct1 = &ksigns64[0];
|
| 10753 |
|
| 10754 |
cgh.parallel_for(
|
| 10755 |
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|