Spaces:
Running
Running
AidanBeltonS
commited on
Commit
·
6dbe297
1
Parent(s):
1554348
Use multi_ptr to clean up deprecated warnings (llama/8256)
Browse files- ggml/src/ggml-sycl/common.hpp +6 -0
- ggml/src/ggml-sycl/convert.cpp +1 -1
- ggml/src/ggml-sycl/mmq.cpp +92 -92
- ggml/src/ggml-sycl/norm.cpp +3 -3
- ggml/src/ggml-sycl/softmax.cpp +1 -1
ggml/src/ggml-sycl/common.hpp
CHANGED
|
@@ -346,4 +346,10 @@ inline sycl::vec<Tp, n> vec_aligned_load(const Tp* aligned_ptr) {
|
|
| 346 |
return *reinterpret_cast<const sycl::vec<Tp, n>*>(aligned_ptr);
|
| 347 |
}
|
| 348 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 349 |
#endif // GGML_SYCL_COMMON_HPP
|
|
|
|
| 346 |
return *reinterpret_cast<const sycl::vec<Tp, n>*>(aligned_ptr);
|
| 347 |
}
|
| 348 |
|
| 349 |
+
// Helper for accessing pointers with no warnings
|
| 350 |
+
template <typename Tp, int dim>
|
| 351 |
+
static __dpct_inline__ Tp* get_pointer(sycl::local_accessor<Tp, dim> acc) {
|
| 352 |
+
return acc.template get_multi_ptr<sycl::access::decorated::no>().get();
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
#endif // GGML_SYCL_COMMON_HPP
|
ggml/src/ggml-sycl/convert.cpp
CHANGED
|
@@ -158,7 +158,7 @@ static void dequantize_row_q4_K_sycl(const void *vx, dst_t *y, const int k,
|
|
| 158 |
sycl::range<3>(1, 1, 32),
|
| 159 |
sycl::range<3>(1, 1, 32)),
|
| 160 |
[=](sycl::nd_item<3> item_ct1) {
|
| 161 |
-
dequantize_block_q4_K(vx, y,
|
| 162 |
});
|
| 163 |
});
|
| 164 |
}
|
|
|
|
| 158 |
sycl::range<3>(1, 1, 32),
|
| 159 |
sycl::range<3>(1, 1, 32)),
|
| 160 |
[=](sycl::nd_item<3> item_ct1) {
|
| 161 |
+
dequantize_block_q4_K(vx, y, get_pointer(scale_local_acc), item_ct1);
|
| 162 |
});
|
| 163 |
});
|
| 164 |
}
|
ggml/src/ggml-sycl/mmq.cpp
CHANGED
|
@@ -1835,10 +1835,10 @@ static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy,
|
|
| 1835 |
mul_mat_q4_0<need_check>(
|
| 1836 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 1837 |
nrows_dst, item_ct1,
|
| 1838 |
-
|
| 1839 |
-
|
| 1840 |
-
|
| 1841 |
-
|
| 1842 |
});
|
| 1843 |
});
|
| 1844 |
}
|
|
@@ -1870,10 +1870,10 @@ static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy,
|
|
| 1870 |
mul_mat_q4_0<need_check>(
|
| 1871 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 1872 |
nrows_dst, item_ct1,
|
| 1873 |
-
|
| 1874 |
-
|
| 1875 |
-
|
| 1876 |
-
|
| 1877 |
});
|
| 1878 |
});
|
| 1879 |
}
|
|
@@ -1950,10 +1950,10 @@ static void ggml_mul_mat_q4_1_q8_1_sycl(const void *vx, const void *vy,
|
|
| 1950 |
mul_mat_q4_1<need_check>(
|
| 1951 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 1952 |
nrows_dst, item_ct1,
|
| 1953 |
-
|
| 1954 |
-
|
| 1955 |
-
|
| 1956 |
-
|
| 1957 |
});
|
| 1958 |
});
|
| 1959 |
}
|
|
@@ -1985,10 +1985,10 @@ static void ggml_mul_mat_q4_1_q8_1_sycl(const void *vx, const void *vy,
|
|
| 1985 |
mul_mat_q4_1<need_check>(
|
| 1986 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 1987 |
nrows_dst, item_ct1,
|
| 1988 |
-
|
| 1989 |
-
|
| 1990 |
-
|
| 1991 |
-
|
| 1992 |
});
|
| 1993 |
});
|
| 1994 |
}
|
|
@@ -2065,10 +2065,10 @@ static void ggml_mul_mat_q5_0_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2065 |
mul_mat_q5_0<need_check>(
|
| 2066 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2067 |
nrows_dst, item_ct1,
|
| 2068 |
-
|
| 2069 |
-
|
| 2070 |
-
|
| 2071 |
-
|
| 2072 |
});
|
| 2073 |
});
|
| 2074 |
}
|
|
@@ -2100,10 +2100,10 @@ static void ggml_mul_mat_q5_0_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2100 |
mul_mat_q5_0<need_check>(
|
| 2101 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2102 |
nrows_dst, item_ct1,
|
| 2103 |
-
|
| 2104 |
-
|
| 2105 |
-
|
| 2106 |
-
|
| 2107 |
});
|
| 2108 |
});
|
| 2109 |
}
|
|
@@ -2180,10 +2180,10 @@ static void ggml_mul_mat_q5_1_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2180 |
mul_mat_q5_1<need_check>(
|
| 2181 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2182 |
nrows_dst, item_ct1,
|
| 2183 |
-
|
| 2184 |
-
|
| 2185 |
-
|
| 2186 |
-
|
| 2187 |
});
|
| 2188 |
});
|
| 2189 |
}
|
|
@@ -2215,10 +2215,10 @@ static void ggml_mul_mat_q5_1_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2215 |
mul_mat_q5_1<need_check>(
|
| 2216 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2217 |
nrows_dst, item_ct1,
|
| 2218 |
-
|
| 2219 |
-
|
| 2220 |
-
|
| 2221 |
-
|
| 2222 |
});
|
| 2223 |
});
|
| 2224 |
}
|
|
@@ -2295,10 +2295,10 @@ static void ggml_mul_mat_q8_0_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2295 |
mul_mat_q8_0<need_check>(
|
| 2296 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2297 |
nrows_dst, item_ct1,
|
| 2298 |
-
|
| 2299 |
-
|
| 2300 |
-
|
| 2301 |
-
|
| 2302 |
});
|
| 2303 |
});
|
| 2304 |
}
|
|
@@ -2330,10 +2330,10 @@ static void ggml_mul_mat_q8_0_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2330 |
mul_mat_q8_0<need_check>(
|
| 2331 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2332 |
nrows_dst, item_ct1,
|
| 2333 |
-
|
| 2334 |
-
|
| 2335 |
-
|
| 2336 |
-
|
| 2337 |
});
|
| 2338 |
});
|
| 2339 |
}
|
|
@@ -2412,11 +2412,11 @@ static void ggml_mul_mat_q2_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2412 |
mul_mat_q2_K<need_check>(
|
| 2413 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2414 |
nrows_dst, item_ct1,
|
| 2415 |
-
|
| 2416 |
-
|
| 2417 |
-
|
| 2418 |
-
|
| 2419 |
-
|
| 2420 |
});
|
| 2421 |
});
|
| 2422 |
}
|
|
@@ -2450,11 +2450,11 @@ static void ggml_mul_mat_q2_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2450 |
mul_mat_q2_K<need_check>(
|
| 2451 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2452 |
nrows_dst, item_ct1,
|
| 2453 |
-
|
| 2454 |
-
|
| 2455 |
-
|
| 2456 |
-
|
| 2457 |
-
|
| 2458 |
});
|
| 2459 |
});
|
| 2460 |
}
|
|
@@ -2537,12 +2537,12 @@ static void ggml_mul_mat_q3_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2537 |
mul_mat_q3_K<need_check>(
|
| 2538 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2539 |
nrows_dst, item_ct1,
|
| 2540 |
-
|
| 2541 |
-
|
| 2542 |
-
|
| 2543 |
-
|
| 2544 |
-
|
| 2545 |
-
|
| 2546 |
});
|
| 2547 |
});
|
| 2548 |
}
|
|
@@ -2578,12 +2578,12 @@ static void ggml_mul_mat_q3_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2578 |
mul_mat_q3_K<need_check>(
|
| 2579 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2580 |
nrows_dst, item_ct1,
|
| 2581 |
-
|
| 2582 |
-
|
| 2583 |
-
|
| 2584 |
-
|
| 2585 |
-
|
| 2586 |
-
|
| 2587 |
});
|
| 2588 |
});
|
| 2589 |
}
|
|
@@ -2663,11 +2663,11 @@ static void ggml_mul_mat_q4_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2663 |
mul_mat_q4_K<need_check>(
|
| 2664 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2665 |
nrows_dst, item_ct1,
|
| 2666 |
-
|
| 2667 |
-
|
| 2668 |
-
|
| 2669 |
-
|
| 2670 |
-
|
| 2671 |
});
|
| 2672 |
});
|
| 2673 |
}
|
|
@@ -2701,11 +2701,11 @@ static void ggml_mul_mat_q4_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2701 |
mul_mat_q4_K<need_check>(
|
| 2702 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2703 |
nrows_dst, item_ct1,
|
| 2704 |
-
|
| 2705 |
-
|
| 2706 |
-
|
| 2707 |
-
|
| 2708 |
-
|
| 2709 |
});
|
| 2710 |
});
|
| 2711 |
}
|
|
@@ -2784,11 +2784,11 @@ static void ggml_mul_mat_q5_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2784 |
mul_mat_q5_K<need_check>(
|
| 2785 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2786 |
nrows_dst, item_ct1,
|
| 2787 |
-
|
| 2788 |
-
|
| 2789 |
-
|
| 2790 |
-
|
| 2791 |
-
|
| 2792 |
});
|
| 2793 |
});
|
| 2794 |
}
|
|
@@ -2822,11 +2822,11 @@ static void ggml_mul_mat_q5_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2822 |
mul_mat_q5_K<need_check>(
|
| 2823 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2824 |
nrows_dst, item_ct1,
|
| 2825 |
-
|
| 2826 |
-
|
| 2827 |
-
|
| 2828 |
-
|
| 2829 |
-
|
| 2830 |
});
|
| 2831 |
});
|
| 2832 |
}
|
|
@@ -2905,11 +2905,11 @@ static void ggml_mul_mat_q6_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2905 |
mul_mat_q6_K<need_check>(
|
| 2906 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2907 |
nrows_dst, item_ct1,
|
| 2908 |
-
|
| 2909 |
-
|
| 2910 |
-
|
| 2911 |
-
|
| 2912 |
-
|
| 2913 |
});
|
| 2914 |
});
|
| 2915 |
}
|
|
@@ -2943,11 +2943,11 @@ static void ggml_mul_mat_q6_K_q8_1_sycl(const void *vx, const void *vy,
|
|
| 2943 |
mul_mat_q6_K<need_check>(
|
| 2944 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2945 |
nrows_dst, item_ct1,
|
| 2946 |
-
|
| 2947 |
-
|
| 2948 |
-
|
| 2949 |
-
|
| 2950 |
-
|
| 2951 |
});
|
| 2952 |
});
|
| 2953 |
}
|
|
|
|
| 1835 |
mul_mat_q4_0<need_check>(
|
| 1836 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 1837 |
nrows_dst, item_ct1,
|
| 1838 |
+
get_pointer(tile_x_qs_q4_0_acc_ct1),
|
| 1839 |
+
get_pointer(tile_x_d_q4_0_acc_ct1),
|
| 1840 |
+
get_pointer(tile_y_qs_acc_ct1),
|
| 1841 |
+
get_pointer(tile_y_ds_acc_ct1));
|
| 1842 |
});
|
| 1843 |
});
|
| 1844 |
}
|
|
|
|
| 1870 |
mul_mat_q4_0<need_check>(
|
| 1871 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 1872 |
nrows_dst, item_ct1,
|
| 1873 |
+
get_pointer(tile_x_qs_q4_0_acc_ct1),
|
| 1874 |
+
get_pointer(tile_x_d_q4_0_acc_ct1),
|
| 1875 |
+
get_pointer(tile_y_qs_acc_ct1),
|
| 1876 |
+
get_pointer(tile_y_ds_acc_ct1));
|
| 1877 |
});
|
| 1878 |
});
|
| 1879 |
}
|
|
|
|
| 1950 |
mul_mat_q4_1<need_check>(
|
| 1951 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 1952 |
nrows_dst, item_ct1,
|
| 1953 |
+
get_pointer(tile_x_qs_q4_1_acc_ct1),
|
| 1954 |
+
get_pointer(tile_x_dm_q4_1_acc_ct1),
|
| 1955 |
+
get_pointer(tile_y_qs_acc_ct1),
|
| 1956 |
+
get_pointer(tile_y_ds_acc_ct1));
|
| 1957 |
});
|
| 1958 |
});
|
| 1959 |
}
|
|
|
|
| 1985 |
mul_mat_q4_1<need_check>(
|
| 1986 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 1987 |
nrows_dst, item_ct1,
|
| 1988 |
+
get_pointer(tile_x_qs_q4_1_acc_ct1),
|
| 1989 |
+
get_pointer(tile_x_dm_q4_1_acc_ct1),
|
| 1990 |
+
get_pointer(tile_y_qs_acc_ct1),
|
| 1991 |
+
get_pointer(tile_y_ds_acc_ct1));
|
| 1992 |
});
|
| 1993 |
});
|
| 1994 |
}
|
|
|
|
| 2065 |
mul_mat_q5_0<need_check>(
|
| 2066 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2067 |
nrows_dst, item_ct1,
|
| 2068 |
+
get_pointer(tile_x_ql_q5_0_acc_ct1),
|
| 2069 |
+
get_pointer(tile_x_d_q5_0_acc_ct1),
|
| 2070 |
+
get_pointer(tile_y_qs_acc_ct1),
|
| 2071 |
+
get_pointer(tile_y_ds_acc_ct1));
|
| 2072 |
});
|
| 2073 |
});
|
| 2074 |
}
|
|
|
|
| 2100 |
mul_mat_q5_0<need_check>(
|
| 2101 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2102 |
nrows_dst, item_ct1,
|
| 2103 |
+
get_pointer(tile_x_ql_q5_0_acc_ct1),
|
| 2104 |
+
get_pointer(tile_x_d_q5_0_acc_ct1),
|
| 2105 |
+
get_pointer(tile_y_qs_acc_ct1),
|
| 2106 |
+
get_pointer(tile_y_ds_acc_ct1));
|
| 2107 |
});
|
| 2108 |
});
|
| 2109 |
}
|
|
|
|
| 2180 |
mul_mat_q5_1<need_check>(
|
| 2181 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2182 |
nrows_dst, item_ct1,
|
| 2183 |
+
get_pointer(tile_x_ql_q5_1_acc_ct1),
|
| 2184 |
+
get_pointer(tile_x_dm_q5_1_acc_ct1),
|
| 2185 |
+
get_pointer(tile_y_qs_acc_ct1),
|
| 2186 |
+
get_pointer(tile_y_ds_acc_ct1));
|
| 2187 |
});
|
| 2188 |
});
|
| 2189 |
}
|
|
|
|
| 2215 |
mul_mat_q5_1<need_check>(
|
| 2216 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2217 |
nrows_dst, item_ct1,
|
| 2218 |
+
get_pointer(tile_x_ql_q5_1_acc_ct1),
|
| 2219 |
+
get_pointer(tile_x_dm_q5_1_acc_ct1),
|
| 2220 |
+
get_pointer(tile_y_qs_acc_ct1),
|
| 2221 |
+
get_pointer(tile_y_ds_acc_ct1));
|
| 2222 |
});
|
| 2223 |
});
|
| 2224 |
}
|
|
|
|
| 2295 |
mul_mat_q8_0<need_check>(
|
| 2296 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2297 |
nrows_dst, item_ct1,
|
| 2298 |
+
get_pointer(tile_x_qs_q8_0_acc_ct1),
|
| 2299 |
+
get_pointer(tile_x_d_q8_0_acc_ct1),
|
| 2300 |
+
get_pointer(tile_y_qs_acc_ct1),
|
| 2301 |
+
get_pointer(tile_y_ds_acc_ct1));
|
| 2302 |
});
|
| 2303 |
});
|
| 2304 |
}
|
|
|
|
| 2330 |
mul_mat_q8_0<need_check>(
|
| 2331 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2332 |
nrows_dst, item_ct1,
|
| 2333 |
+
get_pointer(tile_x_qs_q8_0_acc_ct1),
|
| 2334 |
+
get_pointer(tile_x_d_q8_0_acc_ct1),
|
| 2335 |
+
get_pointer(tile_y_qs_acc_ct1),
|
| 2336 |
+
get_pointer(tile_y_ds_acc_ct1));
|
| 2337 |
});
|
| 2338 |
});
|
| 2339 |
}
|
|
|
|
| 2412 |
mul_mat_q2_K<need_check>(
|
| 2413 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2414 |
nrows_dst, item_ct1,
|
| 2415 |
+
get_pointer(tile_x_ql_q2_K_acc_ct1),
|
| 2416 |
+
get_pointer(tile_x_dm_q2_K_acc_ct1),
|
| 2417 |
+
get_pointer(tile_x_sc_q2_K_acc_ct1),
|
| 2418 |
+
get_pointer(tile_y_qs_acc_ct1),
|
| 2419 |
+
get_pointer(tile_y_ds_acc_ct1));
|
| 2420 |
});
|
| 2421 |
});
|
| 2422 |
}
|
|
|
|
| 2450 |
mul_mat_q2_K<need_check>(
|
| 2451 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2452 |
nrows_dst, item_ct1,
|
| 2453 |
+
get_pointer(tile_x_ql_q2_K_acc_ct1),
|
| 2454 |
+
get_pointer(tile_x_dm_q2_K_acc_ct1),
|
| 2455 |
+
get_pointer(tile_x_sc_q2_K_acc_ct1),
|
| 2456 |
+
get_pointer(tile_y_qs_acc_ct1),
|
| 2457 |
+
get_pointer(tile_y_ds_acc_ct1));
|
| 2458 |
});
|
| 2459 |
});
|
| 2460 |
}
|
|
|
|
| 2537 |
mul_mat_q3_K<need_check>(
|
| 2538 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2539 |
nrows_dst, item_ct1,
|
| 2540 |
+
get_pointer(tile_x_ql_q3_K_acc_ct1),
|
| 2541 |
+
get_pointer(tile_x_dm_q3_K_acc_ct1),
|
| 2542 |
+
get_pointer(tile_x_qh_q3_K_acc_ct1),
|
| 2543 |
+
get_pointer(tile_x_sc_q3_K_acc_ct1),
|
| 2544 |
+
get_pointer(tile_y_qs_acc_ct1),
|
| 2545 |
+
get_pointer(tile_y_ds_acc_ct1));
|
| 2546 |
});
|
| 2547 |
});
|
| 2548 |
}
|
|
|
|
| 2578 |
mul_mat_q3_K<need_check>(
|
| 2579 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2580 |
nrows_dst, item_ct1,
|
| 2581 |
+
get_pointer(tile_x_ql_q3_K_acc_ct1),
|
| 2582 |
+
get_pointer(tile_x_dm_q3_K_acc_ct1),
|
| 2583 |
+
get_pointer(tile_x_qh_q3_K_acc_ct1),
|
| 2584 |
+
get_pointer(tile_x_sc_q3_K_acc_ct1),
|
| 2585 |
+
get_pointer(tile_y_qs_acc_ct1),
|
| 2586 |
+
get_pointer(tile_y_ds_acc_ct1));
|
| 2587 |
});
|
| 2588 |
});
|
| 2589 |
}
|
|
|
|
| 2663 |
mul_mat_q4_K<need_check>(
|
| 2664 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2665 |
nrows_dst, item_ct1,
|
| 2666 |
+
get_pointer(tile_x_ql_q4_K_acc_ct1),
|
| 2667 |
+
get_pointer(tile_x_dm_q4_K_acc_ct1),
|
| 2668 |
+
get_pointer(tile_x_sc_q4_K_acc_ct1),
|
| 2669 |
+
get_pointer(tile_y_qs_acc_ct1),
|
| 2670 |
+
get_pointer(tile_y_ds_acc_ct1));
|
| 2671 |
});
|
| 2672 |
});
|
| 2673 |
}
|
|
|
|
| 2701 |
mul_mat_q4_K<need_check>(
|
| 2702 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2703 |
nrows_dst, item_ct1,
|
| 2704 |
+
get_pointer(tile_x_ql_q4_K_acc_ct1),
|
| 2705 |
+
get_pointer(tile_x_dm_q4_K_acc_ct1),
|
| 2706 |
+
get_pointer(tile_x_sc_q4_K_acc_ct1),
|
| 2707 |
+
get_pointer(tile_y_qs_acc_ct1),
|
| 2708 |
+
get_pointer(tile_y_ds_acc_ct1));
|
| 2709 |
});
|
| 2710 |
});
|
| 2711 |
}
|
|
|
|
| 2784 |
mul_mat_q5_K<need_check>(
|
| 2785 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2786 |
nrows_dst, item_ct1,
|
| 2787 |
+
get_pointer(tile_x_ql_q5_K_acc_ct1),
|
| 2788 |
+
get_pointer(tile_x_dm_q5_K_acc_ct1),
|
| 2789 |
+
get_pointer(tile_x_sc_q5_K_acc_ct1),
|
| 2790 |
+
get_pointer(tile_y_qs_acc_ct1),
|
| 2791 |
+
get_pointer(tile_y_ds_acc_ct1));
|
| 2792 |
});
|
| 2793 |
});
|
| 2794 |
}
|
|
|
|
| 2822 |
mul_mat_q5_K<need_check>(
|
| 2823 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2824 |
nrows_dst, item_ct1,
|
| 2825 |
+
get_pointer(tile_x_ql_q5_K_acc_ct1),
|
| 2826 |
+
get_pointer(tile_x_dm_q5_K_acc_ct1),
|
| 2827 |
+
get_pointer(tile_x_sc_q5_K_acc_ct1),
|
| 2828 |
+
get_pointer(tile_y_qs_acc_ct1),
|
| 2829 |
+
get_pointer(tile_y_ds_acc_ct1));
|
| 2830 |
});
|
| 2831 |
});
|
| 2832 |
}
|
|
|
|
| 2905 |
mul_mat_q6_K<need_check>(
|
| 2906 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2907 |
nrows_dst, item_ct1,
|
| 2908 |
+
get_pointer(tile_x_ql_acc_ct1),
|
| 2909 |
+
get_pointer(tile_x_dm_acc_ct1),
|
| 2910 |
+
get_pointer(tile_x_sc_acc_ct1),
|
| 2911 |
+
get_pointer(tile_y_qs_acc_ct1),
|
| 2912 |
+
get_pointer(tile_y_ds_acc_ct1));
|
| 2913 |
});
|
| 2914 |
});
|
| 2915 |
}
|
|
|
|
| 2943 |
mul_mat_q6_K<need_check>(
|
| 2944 |
vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y,
|
| 2945 |
nrows_dst, item_ct1,
|
| 2946 |
+
get_pointer(tile_x_ql_acc_ct1),
|
| 2947 |
+
get_pointer(tile_x_dm_acc_ct1),
|
| 2948 |
+
get_pointer(tile_x_sc_acc_ct1),
|
| 2949 |
+
get_pointer(tile_y_qs_acc_ct1),
|
| 2950 |
+
get_pointer(tile_y_ds_acc_ct1));
|
| 2951 |
});
|
| 2952 |
});
|
| 2953 |
}
|
ggml/src/ggml-sycl/norm.cpp
CHANGED
|
@@ -218,7 +218,7 @@ static void norm_f32_sycl(const float* x, float* dst, const int ncols,
|
|
| 218 |
[=](sycl::nd_item<3> item_ct1)
|
| 219 |
[[intel::reqd_sub_group_size(WARP_SIZE)]] {
|
| 220 |
norm_f32(x, dst, ncols, eps, item_ct1,
|
| 221 |
-
|
| 222 |
});
|
| 223 |
});
|
| 224 |
}
|
|
@@ -265,7 +265,7 @@ static void group_norm_f32_sycl(const float* x, float* dst,
|
|
| 265 |
[[intel::reqd_sub_group_size(WARP_SIZE)]] {
|
| 266 |
group_norm_f32(x, dst, group_size, ne_elements,
|
| 267 |
eps_ct4, item_ct1,
|
| 268 |
-
|
| 269 |
});
|
| 270 |
});
|
| 271 |
}
|
|
@@ -306,7 +306,7 @@ static void rms_norm_f32_sycl(const float* x, float* dst, const int ncols,
|
|
| 306 |
[=](sycl::nd_item<3> item_ct1)
|
| 307 |
[[intel::reqd_sub_group_size(WARP_SIZE)]] {
|
| 308 |
rms_norm_f32(x, dst, ncols, eps, item_ct1,
|
| 309 |
-
|
| 310 |
});
|
| 311 |
});
|
| 312 |
}
|
|
|
|
| 218 |
[=](sycl::nd_item<3> item_ct1)
|
| 219 |
[[intel::reqd_sub_group_size(WARP_SIZE)]] {
|
| 220 |
norm_f32(x, dst, ncols, eps, item_ct1,
|
| 221 |
+
get_pointer(s_sum_acc_ct1), work_group_size);
|
| 222 |
});
|
| 223 |
});
|
| 224 |
}
|
|
|
|
| 265 |
[[intel::reqd_sub_group_size(WARP_SIZE)]] {
|
| 266 |
group_norm_f32(x, dst, group_size, ne_elements,
|
| 267 |
eps_ct4, item_ct1,
|
| 268 |
+
get_pointer(s_sum_acc_ct1), work_group_size);
|
| 269 |
});
|
| 270 |
});
|
| 271 |
}
|
|
|
|
| 306 |
[=](sycl::nd_item<3> item_ct1)
|
| 307 |
[[intel::reqd_sub_group_size(WARP_SIZE)]] {
|
| 308 |
rms_norm_f32(x, dst, ncols, eps, item_ct1,
|
| 309 |
+
get_pointer(s_sum_acc_ct1), work_group_size);
|
| 310 |
});
|
| 311 |
});
|
| 312 |
}
|
ggml/src/ggml-sycl/softmax.cpp
CHANGED
|
@@ -136,7 +136,7 @@ static void soft_max_f32_submitter(const float * x, const float * mask, float *
|
|
| 136 |
soft_max_f32<vals_smem, ncols_template, block_size_template>(x, mask, dst, ncols_par,
|
| 137 |
nrows_y, scale, max_bias, m0,
|
| 138 |
m1, n_head_log2, item_ct1,
|
| 139 |
-
|
| 140 |
});
|
| 141 |
});
|
| 142 |
}
|
|
|
|
| 136 |
soft_max_f32<vals_smem, ncols_template, block_size_template>(x, mask, dst, ncols_par,
|
| 137 |
nrows_y, scale, max_bias, m0,
|
| 138 |
m1, n_head_log2, item_ct1,
|
| 139 |
+
get_pointer(local_buf_acc));
|
| 140 |
});
|
| 141 |
});
|
| 142 |
}
|