Spaces:
Running
Running
mgroeber9110
Marcus Groeber
commited on
Commit
·
49e3343
1
Parent(s):
a027c1d
ggml : portability fixes for VS 2017 (llama/12150)
Browse files* Add include files for std::min/max and std::toupper/tolower
* win32: move _USE_MATH_DEFINES before includes to ensure M_PI is defined
* Use GGML_RESTRICT instead of "restrict" keyword everywhere, and use "__restrict" in MSVC plain C mode
* win32: only use __restrict in MSVC if C11/C17 support is not enabled
---------
Co-authored-by: Marcus Groeber <[email protected]>
- ggml/include/ggml.h +5 -1
- ggml/src/ggml-backend-reg.cpp +1 -0
- ggml/src/ggml-backend.cpp +1 -0
- ggml/src/ggml-cpu/ggml-cpu-quants.c +396 -396
- ggml/src/ggml-cpu/ggml-cpu.c +13 -13
- ggml/src/ggml-quants.c +114 -114
- ggml/src/ggml.c +3 -3
ggml/include/ggml.h
CHANGED
|
@@ -2140,7 +2140,11 @@ extern "C" {
|
|
| 2140 |
# define GGML_RESTRICT
|
| 2141 |
# endif
|
| 2142 |
#else
|
| 2143 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2144 |
#endif
|
| 2145 |
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
| 2146 |
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
|
|
|
| 2140 |
# define GGML_RESTRICT
|
| 2141 |
# endif
|
| 2142 |
#else
|
| 2143 |
+
# if defined (_MSC_VER) && (__STDC_VERSION__ < 201112L)
|
| 2144 |
+
# define GGML_RESTRICT __restrict
|
| 2145 |
+
# else
|
| 2146 |
+
# define GGML_RESTRICT restrict
|
| 2147 |
+
# endif
|
| 2148 |
#endif
|
| 2149 |
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
| 2150 |
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
ggml/src/ggml-backend-reg.cpp
CHANGED
|
@@ -8,6 +8,7 @@
|
|
| 8 |
#include <string>
|
| 9 |
#include <type_traits>
|
| 10 |
#include <vector>
|
|
|
|
| 11 |
|
| 12 |
#ifdef _WIN32
|
| 13 |
# define WIN32_LEAN_AND_MEAN
|
|
|
|
| 8 |
#include <string>
|
| 9 |
#include <type_traits>
|
| 10 |
#include <vector>
|
| 11 |
+
#include <cctype>
|
| 12 |
|
| 13 |
#ifdef _WIN32
|
| 14 |
# define WIN32_LEAN_AND_MEAN
|
ggml/src/ggml-backend.cpp
CHANGED
|
@@ -21,6 +21,7 @@
|
|
| 21 |
#include <string.h>
|
| 22 |
#include <string>
|
| 23 |
#include <vector>
|
|
|
|
| 24 |
|
| 25 |
#ifdef __APPLE__
|
| 26 |
#include <sys/types.h>
|
|
|
|
| 21 |
#include <string.h>
|
| 22 |
#include <string>
|
| 23 |
#include <vector>
|
| 24 |
+
#include <algorithm>
|
| 25 |
|
| 26 |
#ifdef __APPLE__
|
| 27 |
#include <sys/types.h>
|
ggml/src/ggml-cpu/ggml-cpu-quants.c
CHANGED
|
@@ -719,28 +719,28 @@ static inline __m128i packNibbles( __m256i bytes ) {
|
|
| 719 |
}
|
| 720 |
#endif //__loongarch_asx
|
| 721 |
|
| 722 |
-
void quantize_row_q4_0(const float *
|
| 723 |
quantize_row_q4_0_ref(x, y, k);
|
| 724 |
}
|
| 725 |
|
| 726 |
-
void quantize_row_q4_1(const float *
|
| 727 |
quantize_row_q4_1_ref(x, y, k);
|
| 728 |
}
|
| 729 |
|
| 730 |
-
void quantize_row_q5_0(const float *
|
| 731 |
quantize_row_q5_0_ref(x, y, k);
|
| 732 |
}
|
| 733 |
|
| 734 |
-
void quantize_row_q5_1(const float *
|
| 735 |
quantize_row_q5_1_ref(x, y, k);
|
| 736 |
}
|
| 737 |
|
| 738 |
-
void quantize_row_q8_0(const float *
|
| 739 |
assert(QK8_0 == 32);
|
| 740 |
assert(k % QK8_0 == 0);
|
| 741 |
const int nb = k / QK8_0;
|
| 742 |
|
| 743 |
-
block_q8_0 *
|
| 744 |
|
| 745 |
#if defined(__ARM_NEON)
|
| 746 |
for (int i = 0; i < nb; i++) {
|
|
@@ -1050,11 +1050,11 @@ void quantize_row_q8_0(const float * restrict x, void * restrict vy, int64_t k)
|
|
| 1050 |
#endif
|
| 1051 |
}
|
| 1052 |
|
| 1053 |
-
void quantize_row_q8_1(const float *
|
| 1054 |
assert(k % QK8_1 == 0);
|
| 1055 |
const int nb = k / QK8_1;
|
| 1056 |
|
| 1057 |
-
block_q8_1 *
|
| 1058 |
|
| 1059 |
#if defined(__ARM_NEON)
|
| 1060 |
for (int i = 0; i < nb; i++) {
|
|
@@ -1428,8 +1428,8 @@ static inline int nearest_int(float fval) {
|
|
| 1428 |
return (i & 0x007fffff) - 0x00400000;
|
| 1429 |
}
|
| 1430 |
|
| 1431 |
-
static float make_qx_quants(int n, int nmax, const float *
|
| 1432 |
-
const float *
|
| 1433 |
float max = 0;
|
| 1434 |
float amax = 0;
|
| 1435 |
for (int i = 0; i < n; ++i) {
|
|
@@ -1497,7 +1497,7 @@ static float make_qx_quants(int n, int nmax, const float * restrict x, int8_t *
|
|
| 1497 |
return scale;
|
| 1498 |
}
|
| 1499 |
|
| 1500 |
-
static float make_q3_quants(int n, int nmax, const float *
|
| 1501 |
float max = 0;
|
| 1502 |
float amax = 0;
|
| 1503 |
for (int i = 0; i < n; ++i) {
|
|
@@ -1556,7 +1556,7 @@ static float make_q3_quants(int n, int nmax, const float * restrict x, int8_t *
|
|
| 1556 |
return 1/iscale;
|
| 1557 |
}
|
| 1558 |
|
| 1559 |
-
static float make_qkx1_quants(int n, int nmax, const float *
|
| 1560 |
int ntry, float alpha) {
|
| 1561 |
float min = x[0];
|
| 1562 |
float max = x[0];
|
|
@@ -1599,8 +1599,8 @@ static float make_qkx1_quants(int n, int nmax, const float * restrict x, uint8_t
|
|
| 1599 |
return scale;
|
| 1600 |
}
|
| 1601 |
|
| 1602 |
-
static float make_qkx2_quants(int n, int nmax, const float *
|
| 1603 |
-
uint8_t *
|
| 1604 |
float rmin, float rdelta, int nstep, bool use_mad) {
|
| 1605 |
float min = x[0];
|
| 1606 |
float max = x[0];
|
|
@@ -1680,7 +1680,7 @@ static float make_qkx2_quants(int n, int nmax, const float * restrict x, const f
|
|
| 1680 |
return scale;
|
| 1681 |
}
|
| 1682 |
|
| 1683 |
-
static inline void get_scale_min_k4(int j, const uint8_t *
|
| 1684 |
if (j < 4) {
|
| 1685 |
*d = q[j] & 63; *m = q[j + 4] & 63;
|
| 1686 |
} else {
|
|
@@ -1691,51 +1691,51 @@ static inline void get_scale_min_k4(int j, const uint8_t * restrict q, uint8_t *
|
|
| 1691 |
|
| 1692 |
//========================- 2-bit (de)-quantization
|
| 1693 |
|
| 1694 |
-
void quantize_row_q2_K(const float *
|
| 1695 |
quantize_row_q2_K_ref(x, vy, k);
|
| 1696 |
}
|
| 1697 |
|
| 1698 |
//========================= 3-bit (de)-quantization
|
| 1699 |
|
| 1700 |
-
void quantize_row_q3_K(const float *
|
| 1701 |
quantize_row_q3_K_ref(x, vy, k);
|
| 1702 |
}
|
| 1703 |
|
| 1704 |
// ====================== 4-bit (de)-quantization
|
| 1705 |
|
| 1706 |
-
void quantize_row_q4_K(const float *
|
| 1707 |
assert(k % QK_K == 0);
|
| 1708 |
-
block_q4_K *
|
| 1709 |
quantize_row_q4_K_ref(x, y, k);
|
| 1710 |
}
|
| 1711 |
|
| 1712 |
// ====================== 5-bit (de)-quantization
|
| 1713 |
|
| 1714 |
-
void quantize_row_q5_K(const float *
|
| 1715 |
assert(k % QK_K == 0);
|
| 1716 |
-
block_q5_K *
|
| 1717 |
quantize_row_q5_K_ref(x, y, k);
|
| 1718 |
}
|
| 1719 |
|
| 1720 |
// ====================== 6-bit (de)-quantization
|
| 1721 |
|
| 1722 |
-
void quantize_row_q6_K(const float *
|
| 1723 |
assert(k % QK_K == 0);
|
| 1724 |
-
block_q6_K *
|
| 1725 |
quantize_row_q6_K_ref(x, y, k);
|
| 1726 |
}
|
| 1727 |
|
| 1728 |
// ====================== Ternary (de)-quantization (BitNet b1.58 and TriLMs)
|
| 1729 |
|
| 1730 |
-
void quantize_row_tq1_0(const float *
|
| 1731 |
assert(k % QK_K == 0);
|
| 1732 |
-
block_tq1_0 *
|
| 1733 |
quantize_row_tq1_0_ref(x, y, k);
|
| 1734 |
}
|
| 1735 |
|
| 1736 |
-
void quantize_row_tq2_0(const float *
|
| 1737 |
assert(k % QK_K == 0);
|
| 1738 |
-
block_tq2_0 *
|
| 1739 |
quantize_row_tq2_0_ref(x, y, k);
|
| 1740 |
}
|
| 1741 |
|
|
@@ -1743,11 +1743,11 @@ static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -1
|
|
| 1743 |
|
| 1744 |
//===================================== Q8_K ==============================================
|
| 1745 |
|
| 1746 |
-
void quantize_row_q8_K(const float *
|
| 1747 |
#ifdef __wasm_simd128__
|
| 1748 |
assert(k % QK_K == 0);
|
| 1749 |
const int64_t nb = k / QK_K;
|
| 1750 |
-
block_q8_K *
|
| 1751 |
|
| 1752 |
for (int i = 0; i < nb; i++) {
|
| 1753 |
const float * x_block = x + i * QK_K;
|
|
@@ -1909,7 +1909,7 @@ static inline __m128i get_scale_shuffle(int i) {
|
|
| 1909 |
}
|
| 1910 |
#endif
|
| 1911 |
|
| 1912 |
-
void ggml_vec_dot_q4_0_q8_0(int n, float *
|
| 1913 |
const int qk = QK8_0;
|
| 1914 |
const int nb = n / qk;
|
| 1915 |
|
|
@@ -1924,23 +1924,23 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|
| 1924 |
UNUSED(by);
|
| 1925 |
UNUSED(bs);
|
| 1926 |
|
| 1927 |
-
const block_q4_0 *
|
| 1928 |
-
const block_q8_0 *
|
| 1929 |
|
| 1930 |
#if defined(__ARM_FEATURE_MATMUL_INT8)
|
| 1931 |
if (nrc == 2) {
|
| 1932 |
-
const block_q4_0 *
|
| 1933 |
-
const block_q4_0 *
|
| 1934 |
-
const block_q8_0 *
|
| 1935 |
-
const block_q8_0 *
|
| 1936 |
|
| 1937 |
float32x4_t sumv0 = vdupq_n_f32(0.0f);
|
| 1938 |
|
| 1939 |
for (int i = 0; i < nb; i++) {
|
| 1940 |
-
const block_q4_0 *
|
| 1941 |
-
const block_q4_0 *
|
| 1942 |
-
const block_q8_0 *
|
| 1943 |
-
const block_q8_0 *
|
| 1944 |
|
| 1945 |
const uint8x16_t m4b = vdupq_n_u8(0x0F);
|
| 1946 |
const int8x16_t s8b = vdupq_n_s8(0x8);
|
|
@@ -2017,10 +2017,10 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|
| 2017 |
const svbool_t ph4 = svptrue_pat_b32(SV_VL4);
|
| 2018 |
|
| 2019 |
for (; ib + 1 < nb; ib += 2) {
|
| 2020 |
-
const block_q4_0 *
|
| 2021 |
-
const block_q4_0 *
|
| 2022 |
-
const block_q8_0 *
|
| 2023 |
-
const block_q8_0 *
|
| 2024 |
|
| 2025 |
// load x
|
| 2026 |
const svuint8_t qx0r = svld1rq_u8(svptrue_b8(), x0->qs);
|
|
@@ -2063,10 +2063,10 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|
| 2063 |
const svbool_t pl16 = svnot_b_z(svptrue_b8(), ph16);
|
| 2064 |
|
| 2065 |
for (; ib + 1 < nb; ib += 2) {
|
| 2066 |
-
const block_q4_0 *
|
| 2067 |
-
const block_q4_0 *
|
| 2068 |
-
const block_q8_0 *
|
| 2069 |
-
const block_q8_0 *
|
| 2070 |
|
| 2071 |
// load x
|
| 2072 |
const svuint8_t qx0r = svld1rq_u8(svptrue_b8(), x0->qs);
|
|
@@ -2104,10 +2104,10 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|
| 2104 |
const svbool_t pl16 = svnot_b_z(ph32, ph16);
|
| 2105 |
|
| 2106 |
for (; ib + 1 < nb; ib += 2) {
|
| 2107 |
-
const block_q4_0 *
|
| 2108 |
-
const block_q4_0 *
|
| 2109 |
-
const block_q8_0 *
|
| 2110 |
-
const block_q8_0 *
|
| 2111 |
|
| 2112 |
// load x
|
| 2113 |
const svuint8_t qx0r = svld1rq_u8(ph32, x0->qs);
|
|
@@ -2144,10 +2144,10 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|
| 2144 |
float32x4_t sumv1 = vdupq_n_f32(0.0f);
|
| 2145 |
|
| 2146 |
for (; ib + 1 < nb; ib += 2) {
|
| 2147 |
-
const block_q4_0 *
|
| 2148 |
-
const block_q4_0 *
|
| 2149 |
-
const block_q8_0 *
|
| 2150 |
-
const block_q8_0 *
|
| 2151 |
|
| 2152 |
const uint8x16_t m4b = vdupq_n_u8(0x0F);
|
| 2153 |
const int8x16_t s8b = vdupq_n_s8(0x8);
|
|
@@ -2189,10 +2189,10 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|
| 2189 |
const v128_t s8b = wasm_i8x16_splat(0x8);
|
| 2190 |
|
| 2191 |
for (; ib + 1 < nb; ib += 2) {
|
| 2192 |
-
const block_q4_0 *
|
| 2193 |
-
const block_q4_0 *
|
| 2194 |
-
const block_q8_0 *
|
| 2195 |
-
const block_q8_0 *
|
| 2196 |
|
| 2197 |
// Load and process x0
|
| 2198 |
v128_t v0_0 = wasm_v128_load(x0->qs);
|
|
@@ -2609,7 +2609,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|
| 2609 |
*s = sumf;
|
| 2610 |
}
|
| 2611 |
|
| 2612 |
-
void ggml_vec_dot_q4_1_q8_1(int n, float *
|
| 2613 |
const int qk = QK8_1;
|
| 2614 |
const int nb = n / qk;
|
| 2615 |
|
|
@@ -2624,24 +2624,24 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|
| 2624 |
UNUSED(by);
|
| 2625 |
UNUSED(bs);
|
| 2626 |
|
| 2627 |
-
const block_q4_1 *
|
| 2628 |
-
const block_q8_1 *
|
| 2629 |
|
| 2630 |
#if defined(__ARM_FEATURE_MATMUL_INT8)
|
| 2631 |
if (nrc == 2) {
|
| 2632 |
-
const block_q4_1 *
|
| 2633 |
-
const block_q4_1 *
|
| 2634 |
-
const block_q8_1 *
|
| 2635 |
-
const block_q8_1 *
|
| 2636 |
|
| 2637 |
float32x4_t sumv0 = vdupq_n_f32(0.0f);
|
| 2638 |
float32x4_t summs0 = vdupq_n_f32(0.0f);
|
| 2639 |
|
| 2640 |
for (int i = 0; i < nb; i++) {
|
| 2641 |
-
const block_q4_1 *
|
| 2642 |
-
const block_q4_1 *
|
| 2643 |
-
const block_q8_1 *
|
| 2644 |
-
const block_q8_1 *
|
| 2645 |
|
| 2646 |
float32_t summs_t[4] = {
|
| 2647 |
GGML_FP16_TO_FP32(b_x0->m) * GGML_FP16_TO_FP32(b_y0->s),
|
|
@@ -2715,10 +2715,10 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|
| 2715 |
float summs = 0;
|
| 2716 |
|
| 2717 |
for (; ib + 1 < nb; ib += 2) {
|
| 2718 |
-
const block_q4_1 *
|
| 2719 |
-
const block_q4_1 *
|
| 2720 |
-
const block_q8_1 *
|
| 2721 |
-
const block_q8_1 *
|
| 2722 |
|
| 2723 |
summs += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->s) + GGML_FP16_TO_FP32(x1->m) * GGML_FP16_TO_FP32(y1->s);
|
| 2724 |
|
|
@@ -2931,7 +2931,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|
| 2931 |
*s = sumf;
|
| 2932 |
}
|
| 2933 |
|
| 2934 |
-
void ggml_vec_dot_q5_0_q8_0(int n, float *
|
| 2935 |
const int qk = QK8_0;
|
| 2936 |
const int nb = n / qk;
|
| 2937 |
|
|
@@ -2946,8 +2946,8 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|
| 2946 |
UNUSED(by);
|
| 2947 |
UNUSED(bs);
|
| 2948 |
|
| 2949 |
-
const block_q5_0 *
|
| 2950 |
-
const block_q8_0 *
|
| 2951 |
|
| 2952 |
#if defined(__ARM_NEON)
|
| 2953 |
float32x4_t sumv0 = vdupq_n_f32(0.0f);
|
|
@@ -2960,10 +2960,10 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|
| 2960 |
uint64_t tmp1[4];
|
| 2961 |
|
| 2962 |
for (; ib + 1 < nb; ib += 2) {
|
| 2963 |
-
const block_q5_0 *
|
| 2964 |
-
const block_q5_0 *
|
| 2965 |
-
const block_q8_0 *
|
| 2966 |
-
const block_q8_0 *
|
| 2967 |
|
| 2968 |
const uint8x16_t m4b = vdupq_n_u8(0x0F);
|
| 2969 |
|
|
@@ -3024,8 +3024,8 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|
| 3024 |
|
| 3025 |
// TODO: check if unrolling this is better
|
| 3026 |
for (; ib < nb; ++ib) {
|
| 3027 |
-
const block_q5_0 *
|
| 3028 |
-
const block_q8_0 *
|
| 3029 |
|
| 3030 |
const v128_t m4b = wasm_i8x16_splat(0x0F);
|
| 3031 |
|
|
@@ -3286,7 +3286,7 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|
| 3286 |
*s = sumf;
|
| 3287 |
}
|
| 3288 |
|
| 3289 |
-
void ggml_vec_dot_q5_1_q8_1(int n, float *
|
| 3290 |
const int qk = QK8_1;
|
| 3291 |
const int nb = n / qk;
|
| 3292 |
|
|
@@ -3301,8 +3301,8 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|
| 3301 |
UNUSED(by);
|
| 3302 |
UNUSED(bs);
|
| 3303 |
|
| 3304 |
-
const block_q5_1 *
|
| 3305 |
-
const block_q8_1 *
|
| 3306 |
|
| 3307 |
#if defined(__ARM_NEON)
|
| 3308 |
float32x4_t sumv0 = vdupq_n_f32(0.0f);
|
|
@@ -3318,10 +3318,10 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|
| 3318 |
uint64_t tmp1[4];
|
| 3319 |
|
| 3320 |
for (; ib + 1 < nb; ib += 2) {
|
| 3321 |
-
const block_q5_1 *
|
| 3322 |
-
const block_q5_1 *
|
| 3323 |
-
const block_q8_1 *
|
| 3324 |
-
const block_q8_1 *
|
| 3325 |
|
| 3326 |
const uint8x16_t m4b = vdupq_n_u8(0x0F);
|
| 3327 |
|
|
@@ -3387,8 +3387,8 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|
| 3387 |
|
| 3388 |
// TODO: check if unrolling this is better
|
| 3389 |
for (; ib < nb; ++ib) {
|
| 3390 |
-
const block_q5_1 *
|
| 3391 |
-
const block_q8_1 *
|
| 3392 |
|
| 3393 |
summs += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->s);
|
| 3394 |
|
|
@@ -3660,7 +3660,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|
| 3660 |
*s = sumf;
|
| 3661 |
}
|
| 3662 |
|
| 3663 |
-
void ggml_vec_dot_q8_0_q8_0(int n, float *
|
| 3664 |
const int qk = QK8_0;
|
| 3665 |
const int nb = n / qk;
|
| 3666 |
|
|
@@ -3675,24 +3675,24 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|
| 3675 |
UNUSED(by);
|
| 3676 |
UNUSED(bs);
|
| 3677 |
|
| 3678 |
-
const block_q8_0 *
|
| 3679 |
-
const block_q8_0 *
|
| 3680 |
|
| 3681 |
#if defined(__ARM_FEATURE_MATMUL_INT8)
|
| 3682 |
if (nrc == 2) {
|
| 3683 |
-
const block_q8_0 *
|
| 3684 |
-
const block_q8_0 *
|
| 3685 |
-
const block_q8_0 *
|
| 3686 |
-
const block_q8_0 *
|
| 3687 |
|
| 3688 |
float32x4_t sumv0 = vdupq_n_f32(0.0f);
|
| 3689 |
|
| 3690 |
for (int i = 0; i < nb; i++) {
|
| 3691 |
-
const block_q8_0 *
|
| 3692 |
-
const block_q8_0 *
|
| 3693 |
|
| 3694 |
-
const block_q8_0 *
|
| 3695 |
-
const block_q8_0 *
|
| 3696 |
|
| 3697 |
const int8x16_t x0_l = vld1q_s8(b_x0->qs);
|
| 3698 |
const int8x16_t x0_h = vld1q_s8(b_x0->qs + 16);
|
|
@@ -3757,10 +3757,10 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|
| 3757 |
const svbool_t pl16 = svptrue_pat_b32(SV_VL4);
|
| 3758 |
|
| 3759 |
for (; ib + 1 < nb; ib += 2) {
|
| 3760 |
-
const block_q8_0 *
|
| 3761 |
-
const block_q8_0 *
|
| 3762 |
-
const block_q8_0 *
|
| 3763 |
-
const block_q8_0 *
|
| 3764 |
|
| 3765 |
// load x
|
| 3766 |
const svint8_t qx0_0 = svld1_s8(ph16, x0->qs);
|
|
@@ -3788,10 +3788,10 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|
| 3788 |
{
|
| 3789 |
//printf("sve256");
|
| 3790 |
for (; ib + 1 < nb; ib += 2) {
|
| 3791 |
-
const block_q8_0 *
|
| 3792 |
-
const block_q8_0 *
|
| 3793 |
-
const block_q8_0 *
|
| 3794 |
-
const block_q8_0 *
|
| 3795 |
|
| 3796 |
// load x
|
| 3797 |
const svint8_t qx0 = svld1_s8(svptrue_b8(), x0->qs);
|
|
@@ -3824,10 +3824,10 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|
| 3824 |
svfloat32_t sumv00 = svdup_n_f32(0.0f);
|
| 3825 |
|
| 3826 |
for (; ib + 1 < nb; ib += 2) {
|
| 3827 |
-
const block_q8_0 *
|
| 3828 |
-
const block_q8_0 *
|
| 3829 |
-
const block_q8_0 *
|
| 3830 |
-
const block_q8_0 *
|
| 3831 |
|
| 3832 |
//load 32 int8_t in first half of vector and put another 32 int8_t in second vector lower bits
|
| 3833 |
// and add them to make one 64 element vector
|
|
@@ -3867,10 +3867,10 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|
| 3867 |
float32x4_t sumv1 = vdupq_n_f32(0.0f);
|
| 3868 |
|
| 3869 |
for (; ib + 1 < nb; ib += 2) {
|
| 3870 |
-
const block_q8_0 *
|
| 3871 |
-
const block_q8_0 *
|
| 3872 |
-
const block_q8_0 *
|
| 3873 |
-
const block_q8_0 *
|
| 3874 |
|
| 3875 |
const int8x16_t x0_0 = vld1q_s8(x0->qs);
|
| 3876 |
const int8x16_t x0_1 = vld1q_s8(x0->qs + 16);
|
|
@@ -3897,8 +3897,8 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|
| 3897 |
v128_t sumv = wasm_f32x4_splat(0.0f);
|
| 3898 |
|
| 3899 |
for (; ib < nb; ++ib) {
|
| 3900 |
-
const block_q8_0 *
|
| 3901 |
-
const block_q8_0 *
|
| 3902 |
|
| 3903 |
const v128_t x0_0 = wasm_v128_load(x0->qs);
|
| 3904 |
const v128_t x0_1 = wasm_v128_load(x0->qs + 16);
|
|
@@ -4080,15 +4080,15 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r
|
|
| 4080 |
*s = sumf;
|
| 4081 |
}
|
| 4082 |
|
| 4083 |
-
void ggml_vec_dot_tq1_0_q8_K(int n, float *
|
| 4084 |
assert(nrc == 1);
|
| 4085 |
UNUSED(nrc);
|
| 4086 |
UNUSED(bx);
|
| 4087 |
UNUSED(by);
|
| 4088 |
UNUSED(bs);
|
| 4089 |
|
| 4090 |
-
const block_tq1_0 *
|
| 4091 |
-
const block_q8_K *
|
| 4092 |
|
| 4093 |
const int nb = n / QK_K;
|
| 4094 |
|
|
@@ -4403,15 +4403,15 @@ void ggml_vec_dot_tq1_0_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
| 4403 |
#endif
|
| 4404 |
}
|
| 4405 |
|
| 4406 |
-
void ggml_vec_dot_tq2_0_q8_K(int n, float *
|
| 4407 |
assert(nrc == 1);
|
| 4408 |
UNUSED(nrc);
|
| 4409 |
UNUSED(bx);
|
| 4410 |
UNUSED(by);
|
| 4411 |
UNUSED(bs);
|
| 4412 |
|
| 4413 |
-
const block_tq2_0 *
|
| 4414 |
-
const block_q8_K *
|
| 4415 |
|
| 4416 |
const int nb = n / QK_K;
|
| 4417 |
|
|
@@ -4575,15 +4575,15 @@ void ggml_vec_dot_tq2_0_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
| 4575 |
#endif
|
| 4576 |
}
|
| 4577 |
|
| 4578 |
-
void ggml_vec_dot_q2_K_q8_K(int n, float *
|
| 4579 |
assert(nrc == 1);
|
| 4580 |
UNUSED(nrc);
|
| 4581 |
UNUSED(bx);
|
| 4582 |
UNUSED(by);
|
| 4583 |
UNUSED(bs);
|
| 4584 |
|
| 4585 |
-
const block_q2_K *
|
| 4586 |
-
const block_q8_K *
|
| 4587 |
|
| 4588 |
const int nb = n / QK_K;
|
| 4589 |
|
|
@@ -4603,9 +4603,9 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 4603 |
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
|
| 4604 |
svfloat32_t dmin_broad = svdup_n_f32((float32_t)dmin);
|
| 4605 |
|
| 4606 |
-
const uint8_t *
|
| 4607 |
-
const int8_t *
|
| 4608 |
-
const uint8_t *
|
| 4609 |
|
| 4610 |
svuint32_t mins_and_scales_sve = svld1ub_u32(svptrue_b32(), sc);
|
| 4611 |
const svint32_t mins_sv_1 = svreinterpret_s32_u32(svlsr_n_u32_x(svptrue_b32(), mins_and_scales_sve, 4));
|
|
@@ -4748,9 +4748,9 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 4748 |
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
|
| 4749 |
svfloat32_t dmin_broad = svdup_n_f32((float32_t)dmin);
|
| 4750 |
|
| 4751 |
-
const uint8_t *
|
| 4752 |
-
const int8_t *
|
| 4753 |
-
const uint8_t *
|
| 4754 |
|
| 4755 |
const svuint32_t mins_and_scales_sve = svld1ub_u32(svptrue_pat_b32(SV_VL8), sc); sc += 8;
|
| 4756 |
const svint32_t scales_sv = svreinterpret_s32_u32(svand_u32_m(svptrue_pat_b32(SV_VL8), mins_and_scales_sve, m4s));
|
|
@@ -4847,9 +4847,9 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 4847 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 4848 |
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
|
| 4849 |
|
| 4850 |
-
const uint8_t *
|
| 4851 |
-
const int8_t *
|
| 4852 |
-
const uint8_t *
|
| 4853 |
|
| 4854 |
const uint8x16_t mins_and_scales = vld1q_u8(sc);
|
| 4855 |
const uint8x16_t scales = vandq_u8(mins_and_scales, m4);
|
|
@@ -4912,8 +4912,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 4912 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 4913 |
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
|
| 4914 |
|
| 4915 |
-
const uint8_t *
|
| 4916 |
-
const int8_t *
|
| 4917 |
|
| 4918 |
const __m128i mins_and_scales = _mm_loadu_si128((const __m128i*)x[i].scales);
|
| 4919 |
const __m128i scales8 = _mm_and_si128(mins_and_scales, m4);
|
|
@@ -4979,8 +4979,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 4979 |
const float dall = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 4980 |
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
|
| 4981 |
|
| 4982 |
-
const uint8_t *
|
| 4983 |
-
const int8_t *
|
| 4984 |
|
| 4985 |
// load mins and scales from block_q2_K.scales[QK_K/16]
|
| 4986 |
const __m128i mins_and_scales = _mm_loadu_si128((const __m128i*)x[i].scales);
|
|
@@ -5306,8 +5306,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 5306 |
vector signed int vsumi6 = v0;
|
| 5307 |
vector signed int vsumi7 = v0;
|
| 5308 |
|
| 5309 |
-
const uint8_t *
|
| 5310 |
-
const int8_t *
|
| 5311 |
|
| 5312 |
for (int j = 0; j < QK_K/128; ++j) {
|
| 5313 |
__builtin_prefetch(q2, 0, 1);
|
|
@@ -5398,8 +5398,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 5398 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 5399 |
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
|
| 5400 |
|
| 5401 |
-
const uint8_t *
|
| 5402 |
-
const int8_t *
|
| 5403 |
|
| 5404 |
const __m128i mins_and_scales128 = __lsx_vld((const __m128i*)x[i].scales, 0);
|
| 5405 |
const __m128i scales128 = __lsx_vandi_b(mins_and_scales128, 0xf);
|
|
@@ -5492,7 +5492,7 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 5492 |
#endif
|
| 5493 |
}
|
| 5494 |
|
| 5495 |
-
void ggml_vec_dot_q3_K_q8_K(int n, float *
|
| 5496 |
assert(n % QK_K == 0);
|
| 5497 |
assert(nrc == 1);
|
| 5498 |
UNUSED(nrc);
|
|
@@ -5503,8 +5503,8 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 5503 |
const uint32_t kmask1 = 0x03030303;
|
| 5504 |
const uint32_t kmask2 = 0x0f0f0f0f;
|
| 5505 |
|
| 5506 |
-
const block_q3_K *
|
| 5507 |
-
const block_q8_K *
|
| 5508 |
|
| 5509 |
const int nb = n / QK_K;
|
| 5510 |
|
|
@@ -5529,9 +5529,9 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 5529 |
|
| 5530 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 5531 |
|
| 5532 |
-
const uint8_t *
|
| 5533 |
-
const uint8_t *
|
| 5534 |
-
const int8_t *
|
| 5535 |
|
| 5536 |
// Set up scales
|
| 5537 |
memcpy(aux, x[i].scales, 12);
|
|
@@ -5705,9 +5705,9 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 5705 |
|
| 5706 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 5707 |
|
| 5708 |
-
const uint8_t *
|
| 5709 |
-
const uint8_t *
|
| 5710 |
-
const int8_t *
|
| 5711 |
|
| 5712 |
ggml_uint8x16x2_t qhbits = ggml_vld1q_u8_x2(qh);
|
| 5713 |
|
|
@@ -5791,8 +5791,8 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 5791 |
|
| 5792 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 5793 |
|
| 5794 |
-
const uint8_t *
|
| 5795 |
-
const int8_t *
|
| 5796 |
|
| 5797 |
// Set up scales
|
| 5798 |
memcpy(aux, x[i].scales, 12);
|
|
@@ -5896,8 +5896,8 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 5896 |
|
| 5897 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 5898 |
|
| 5899 |
-
const uint8_t *
|
| 5900 |
-
const int8_t *
|
| 5901 |
|
| 5902 |
// Set up scales
|
| 5903 |
aux = (const uint32_t *)x[i].scales;
|
|
@@ -6030,9 +6030,9 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 6030 |
|
| 6031 |
float sumf = 0;
|
| 6032 |
for (int i = 0; i < nb; ++i) {
|
| 6033 |
-
const uint8_t *
|
| 6034 |
-
const uint8_t *
|
| 6035 |
-
const int8_t *
|
| 6036 |
|
| 6037 |
// Process blocks with SIMD
|
| 6038 |
int8_t * a = aux8;
|
|
@@ -6119,9 +6119,9 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 6119 |
float sumf = 0;
|
| 6120 |
for (int i = 0; i < nb; ++i) {
|
| 6121 |
|
| 6122 |
-
const uint8_t *
|
| 6123 |
-
const uint8_t *
|
| 6124 |
-
const int8_t *
|
| 6125 |
|
| 6126 |
memcpy(aux, x[i].scales, 12);
|
| 6127 |
utmp[3] = ((aux[1] >> 4) & kmask2) | (((aux[2] >> 6) & kmask1) << 4);
|
|
@@ -6261,8 +6261,8 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 6261 |
vector signed int vsumi6 = v0;
|
| 6262 |
vector signed int vsumi7 = v0;
|
| 6263 |
|
| 6264 |
-
const uint8_t *
|
| 6265 |
-
const int8_t *
|
| 6266 |
|
| 6267 |
for (int j = 0; j < QK_K/128; ++j) {
|
| 6268 |
__builtin_prefetch(q3, 0, 1);
|
|
@@ -6375,8 +6375,8 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 6375 |
for (int i = 0; i < nb; ++i) {
|
| 6376 |
|
| 6377 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 6378 |
-
const uint8_t *
|
| 6379 |
-
const int8_t *
|
| 6380 |
// Set up scales
|
| 6381 |
memcpy(aux, x[i].scales, 12);
|
| 6382 |
__m128i scales128 = lsx_set_w(
|
|
@@ -6461,11 +6461,11 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 6461 |
|
| 6462 |
float sumf = 0;
|
| 6463 |
for (int i = 0; i < nb; ++i) {
|
| 6464 |
-
const uint8_t *
|
| 6465 |
-
const uint8_t *
|
| 6466 |
-
const int8_t *
|
| 6467 |
memset(aux32, 0, 8*sizeof(int32_t));
|
| 6468 |
-
int8_t *
|
| 6469 |
uint8_t m = 1;
|
| 6470 |
for (int j = 0; j < QK_K; j += 128) {
|
| 6471 |
for (int l = 0; l < 32; ++l) a[l] = q3[l] & 3;
|
|
@@ -6508,7 +6508,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 6508 |
|
| 6509 |
}
|
| 6510 |
|
| 6511 |
-
void ggml_vec_dot_q4_K_q8_K(int n, float *
|
| 6512 |
assert(n % QK_K == 0);
|
| 6513 |
assert(nrc == 1);
|
| 6514 |
UNUSED(nrc);
|
|
@@ -6516,8 +6516,8 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 6516 |
UNUSED(by);
|
| 6517 |
UNUSED(bs);
|
| 6518 |
|
| 6519 |
-
const block_q4_K *
|
| 6520 |
-
const block_q8_K *
|
| 6521 |
|
| 6522 |
const int nb = n / QK_K;
|
| 6523 |
|
|
@@ -6552,8 +6552,8 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 6552 |
|
| 6553 |
const uint8_t * scales = (const uint8_t *)utmp;
|
| 6554 |
|
| 6555 |
-
const uint8_t *
|
| 6556 |
-
const int8_t *
|
| 6557 |
|
| 6558 |
const int vector_length = ggml_cpu_get_sve_cnt()*8;
|
| 6559 |
const svuint8_t m4b = svdup_n_u8(0xf);
|
|
@@ -6640,8 +6640,8 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 6640 |
|
| 6641 |
const uint8_t * scales = (const uint8_t *)utmp;
|
| 6642 |
|
| 6643 |
-
const uint8_t *
|
| 6644 |
-
const int8_t *
|
| 6645 |
|
| 6646 |
int32_t sumi1 = 0;
|
| 6647 |
int32_t sumi2 = 0;
|
|
@@ -6679,8 +6679,8 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 6679 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 6680 |
const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin); // Corrected sign
|
| 6681 |
|
| 6682 |
-
const uint8_t *
|
| 6683 |
-
const int8_t *
|
| 6684 |
|
| 6685 |
// Process scales and mins
|
| 6686 |
memcpy(utmp, x[i].scales, 12);
|
|
@@ -6692,7 +6692,7 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 6692 |
|
| 6693 |
// Sum mins * q8sums
|
| 6694 |
int32_t sumi = 0;
|
| 6695 |
-
const int16_t *
|
| 6696 |
const uint8_t * m = (const uint8_t *)&utmp[2];
|
| 6697 |
for (int j = 0; j < 16; j += 2) {
|
| 6698 |
sumi += (q8sums[j] + q8sums[j+1]) * m[j/2];
|
|
@@ -6791,8 +6791,8 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 6791 |
utmp[2] = uaux;
|
| 6792 |
utmp[0] &= kmask1;
|
| 6793 |
|
| 6794 |
-
const uint8_t *
|
| 6795 |
-
const int8_t *
|
| 6796 |
|
| 6797 |
const __m256i mins_and_scales = _mm256_cvtepu8_epi16(_mm_set_epi32(utmp[3], utmp[2], utmp[1], utmp[0]));
|
| 6798 |
|
|
@@ -6850,8 +6850,8 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 6850 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 6851 |
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
|
| 6852 |
|
| 6853 |
-
const uint8_t *
|
| 6854 |
-
const int8_t *
|
| 6855 |
|
| 6856 |
memcpy(utmp, x[i].scales, 12);
|
| 6857 |
utmp[3] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4);
|
|
@@ -6951,8 +6951,8 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 6951 |
vint32m1_t sumi = __riscv_vredsum_vs_i32m1_i32m1(prod, __riscv_vmv_v_x_i32m1(0, 1), vl);
|
| 6952 |
sumf -= dmin * __riscv_vmv_x_s_i32m1_i32(sumi);
|
| 6953 |
|
| 6954 |
-
const uint8_t *
|
| 6955 |
-
const int8_t *
|
| 6956 |
|
| 6957 |
vl = 32;
|
| 6958 |
|
|
@@ -7053,8 +7053,8 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 7053 |
vector signed int vsumi2 = v0;
|
| 7054 |
vector signed int vsumi3 = v0;
|
| 7055 |
|
| 7056 |
-
const uint8_t *
|
| 7057 |
-
const int8_t *
|
| 7058 |
|
| 7059 |
for (int j = 0; j < QK_K/64; j+=2) {
|
| 7060 |
__builtin_prefetch(q4, 0, 1);
|
|
@@ -7145,8 +7145,8 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 7145 |
utmp[2] = uaux;
|
| 7146 |
utmp[0] &= kmask1;
|
| 7147 |
|
| 7148 |
-
const uint8_t *
|
| 7149 |
-
const int8_t *
|
| 7150 |
|
| 7151 |
const __m128i mins_and_scales128 = lsx_set_w(utmp[3], utmp[2], utmp[1], utmp[0]);
|
| 7152 |
const __m128i mins128 = __lsx_vexth_h_b(mins_and_scales128);
|
|
@@ -7228,8 +7228,8 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 7228 |
sumf -= dmin * (v_mins[0] + v_mins[1] + v_mins[2] + v_mins[3]);
|
| 7229 |
|
| 7230 |
const uint8_t * scales = (const uint8_t *)utmp;
|
| 7231 |
-
const uint8_t *
|
| 7232 |
-
const int8_t *
|
| 7233 |
|
| 7234 |
int32_t sumi1 = 0;
|
| 7235 |
int32_t sumi2 = 0;
|
|
@@ -7277,10 +7277,10 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 7277 |
|
| 7278 |
float sumf = 0;
|
| 7279 |
for (int i = 0; i < nb; ++i) {
|
| 7280 |
-
const uint8_t *
|
| 7281 |
-
const int8_t *
|
| 7282 |
memset(aux32, 0, 8*sizeof(int32_t));
|
| 7283 |
-
int8_t *
|
| 7284 |
for (int j = 0; j < QK_K/64; ++j) {
|
| 7285 |
for (int l = 0; l < 32; ++l) a[l] = (int8_t)(q4[l] & 0xF);
|
| 7286 |
a += 32;
|
|
@@ -7323,7 +7323,7 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 7323 |
#endif
|
| 7324 |
}
|
| 7325 |
|
| 7326 |
-
void ggml_vec_dot_q5_K_q8_K(int n, float *
|
| 7327 |
assert(n % QK_K == 0);
|
| 7328 |
assert(nrc == 1);
|
| 7329 |
UNUSED(nrc);
|
|
@@ -7331,8 +7331,8 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 7331 |
UNUSED(by);
|
| 7332 |
UNUSED(bs);
|
| 7333 |
|
| 7334 |
-
const block_q5_K *
|
| 7335 |
-
const block_q8_K *
|
| 7336 |
|
| 7337 |
const int nb = n / QK_K;
|
| 7338 |
|
|
@@ -7374,9 +7374,9 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 7374 |
|
| 7375 |
const uint8_t * scales = (const uint8_t *)utmp;
|
| 7376 |
|
| 7377 |
-
const uint8_t *
|
| 7378 |
-
const uint8_t *
|
| 7379 |
-
const int8_t *
|
| 7380 |
|
| 7381 |
ggml_uint8x16x2_t qhbits = ggml_vld1q_u8_x2(qh);
|
| 7382 |
|
|
@@ -7421,8 +7421,8 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 7421 |
float summs = 0.f;
|
| 7422 |
|
| 7423 |
for (int i = 0; i < nb; ++i) {
|
| 7424 |
-
const uint8_t *
|
| 7425 |
-
const int8_t *
|
| 7426 |
|
| 7427 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 7428 |
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
|
|
@@ -7505,8 +7505,8 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 7505 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 7506 |
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
|
| 7507 |
|
| 7508 |
-
const uint8_t *
|
| 7509 |
-
const int8_t *
|
| 7510 |
|
| 7511 |
memcpy(utmp, x[i].scales, 12);
|
| 7512 |
utmp[3] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4);
|
|
@@ -7597,9 +7597,9 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 7597 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 7598 |
const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin); // Fixed sign
|
| 7599 |
|
| 7600 |
-
const uint8_t *
|
| 7601 |
-
const uint8_t *
|
| 7602 |
-
const int8_t *
|
| 7603 |
|
| 7604 |
// Process scales and mins
|
| 7605 |
memcpy(utmp, x[i].scales, 12);
|
|
@@ -7611,7 +7611,7 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 7611 |
|
| 7612 |
// Sum mins * q8sums
|
| 7613 |
int32_t sumi_mins = 0;
|
| 7614 |
-
const int16_t *
|
| 7615 |
const uint8_t * m = (const uint8_t *)&utmp[2];
|
| 7616 |
for (int j = 0; j < 16; j += 2) {
|
| 7617 |
sumi_mins += (q8sums[j] + q8sums[j+1]) * m[j/2];
|
|
@@ -7715,9 +7715,9 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 7715 |
|
| 7716 |
vl = 8;
|
| 7717 |
|
| 7718 |
-
const uint8_t *
|
| 7719 |
-
const uint8_t *
|
| 7720 |
-
const int8_t *
|
| 7721 |
|
| 7722 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 7723 |
const float dmin = GGML_FP16_TO_FP32(x[i].dmin) * y[i].d;
|
|
@@ -7856,8 +7856,8 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 7856 |
vector signed int vsumi2 = v0;
|
| 7857 |
vector signed int vsumi3 = v0;
|
| 7858 |
|
| 7859 |
-
const uint8_t *
|
| 7860 |
-
const int8_t *
|
| 7861 |
|
| 7862 |
for (int j = 0; j < QK_K/64; ++j) {
|
| 7863 |
__builtin_prefetch(q5, 0, 1);
|
|
@@ -7929,8 +7929,8 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 7929 |
|
| 7930 |
for (int i = 0; i < nb; ++i) {
|
| 7931 |
|
| 7932 |
-
const uint8_t *
|
| 7933 |
-
const int8_t *
|
| 7934 |
|
| 7935 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 7936 |
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
|
|
@@ -8039,9 +8039,9 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 8039 |
const int32_t mins = v_mins[0] + v_mins[1] + v_mins[2] + v_mins[3];
|
| 8040 |
|
| 8041 |
const uint8_t * scales = (const uint8_t *)utmp;
|
| 8042 |
-
const uint8_t *
|
| 8043 |
-
const uint8_t *
|
| 8044 |
-
const int8_t *
|
| 8045 |
|
| 8046 |
v_xh[0] = vec_xl(0 , x0h);
|
| 8047 |
v_xh[1] = vec_xl(16, x0h);
|
|
@@ -8094,11 +8094,11 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 8094 |
|
| 8095 |
float sumf = 0;
|
| 8096 |
for (int i = 0; i < nb; ++i) {
|
| 8097 |
-
const uint8_t *
|
| 8098 |
-
const uint8_t *
|
| 8099 |
-
const int8_t *
|
| 8100 |
memset(aux32, 0, 8*sizeof(int32_t));
|
| 8101 |
-
int8_t *
|
| 8102 |
uint8_t m = 1;
|
| 8103 |
for (int j = 0; j < QK_K/64; ++j) {
|
| 8104 |
for (int l = 0; l < 32; ++l) a[l] = (int8_t)(q4[l] & 0xF);
|
|
@@ -8145,7 +8145,7 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 8145 |
#endif
|
| 8146 |
}
|
| 8147 |
|
| 8148 |
-
void ggml_vec_dot_q6_K_q8_K(int n, float *
|
| 8149 |
assert(n % QK_K == 0);
|
| 8150 |
assert(nrc == 1);
|
| 8151 |
UNUSED(nrc);
|
|
@@ -8153,8 +8153,8 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 8153 |
UNUSED(by);
|
| 8154 |
UNUSED(bs);
|
| 8155 |
|
| 8156 |
-
const block_q6_K *
|
| 8157 |
-
const block_q8_K *
|
| 8158 |
|
| 8159 |
const int nb = n / QK_K;
|
| 8160 |
|
|
@@ -8174,11 +8174,11 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 8174 |
|
| 8175 |
const float d_all = GGML_FP16_TO_FP32(x[i].d);
|
| 8176 |
|
| 8177 |
-
const uint8_t *
|
| 8178 |
-
const uint8_t *
|
| 8179 |
-
const int8_t *
|
| 8180 |
|
| 8181 |
-
const int8_t *
|
| 8182 |
|
| 8183 |
const ggml_int16x8x2_t q8sums = ggml_vld1q_s16_x2(y[i].bsums);
|
| 8184 |
const int8x16_t scales = vld1q_s8(scale);
|
|
@@ -8265,9 +8265,9 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 8265 |
|
| 8266 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 8267 |
|
| 8268 |
-
const uint8_t *
|
| 8269 |
-
const uint8_t *
|
| 8270 |
-
const int8_t *
|
| 8271 |
|
| 8272 |
const __m128i scales = _mm_loadu_si128((const __m128i*)x[i].scales);
|
| 8273 |
|
|
@@ -8343,9 +8343,9 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 8343 |
|
| 8344 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 8345 |
|
| 8346 |
-
const uint8_t *
|
| 8347 |
-
const uint8_t *
|
| 8348 |
-
const int8_t *
|
| 8349 |
|
| 8350 |
// handle the q6_k -32 offset separately using bsums
|
| 8351 |
const __m128i q8sums_0 = _mm_loadu_si128((const __m128i*)y[i].bsums);
|
|
@@ -8444,8 +8444,8 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 8444 |
|
| 8445 |
for (int i = 0; i < nb; ++i) {
|
| 8446 |
// Unpack 6-bit quantized data into aux8 (unchanged)
|
| 8447 |
-
const uint8_t *
|
| 8448 |
-
const uint8_t *
|
| 8449 |
int8_t * a = aux8;
|
| 8450 |
for (int j = 0; j < QK_K; j += 128) {
|
| 8451 |
for (int l = 0; l < 32; ++l) {
|
|
@@ -8459,8 +8459,8 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 8459 |
qh += 32;
|
| 8460 |
}
|
| 8461 |
|
| 8462 |
-
const int8_t *
|
| 8463 |
-
const int8_t *
|
| 8464 |
v128_t acc0 = wasm_i32x4_splat(0);
|
| 8465 |
v128_t acc1 = wasm_i32x4_splat(0);
|
| 8466 |
|
|
@@ -8523,11 +8523,11 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 8523 |
|
| 8524 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 8525 |
|
| 8526 |
-
const uint8_t *
|
| 8527 |
-
const uint8_t *
|
| 8528 |
-
const int8_t *
|
| 8529 |
|
| 8530 |
-
const int8_t *
|
| 8531 |
|
| 8532 |
size_t vl;
|
| 8533 |
|
|
@@ -8629,10 +8629,10 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 8629 |
vector signed int vsumi6 = v0;
|
| 8630 |
vector signed int vsumi7 = v0;
|
| 8631 |
|
| 8632 |
-
const uint8_t *
|
| 8633 |
-
const uint8_t *
|
| 8634 |
-
const int8_t *
|
| 8635 |
-
const int8_t *
|
| 8636 |
|
| 8637 |
for (int j = 0; j < QK_K/128; ++j) {
|
| 8638 |
__builtin_prefetch(q6, 0, 0);
|
|
@@ -8748,9 +8748,9 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 8748 |
|
| 8749 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 8750 |
|
| 8751 |
-
const uint8_t *
|
| 8752 |
-
const uint8_t *
|
| 8753 |
-
const int8_t *
|
| 8754 |
|
| 8755 |
const __m128i scales128 = __lsx_vld((const __m128i*)x[i].scales, 0);
|
| 8756 |
const v16i8 shuffle_mask = {0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15};
|
|
@@ -8816,11 +8816,11 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 8816 |
for (int i = 0; i < nb; ++i) {
|
| 8817 |
const float d_all = GGML_FP16_TO_FP32(x[i].d);
|
| 8818 |
|
| 8819 |
-
const uint8_t *
|
| 8820 |
-
const uint8_t *
|
| 8821 |
-
const int8_t *
|
| 8822 |
|
| 8823 |
-
const int8_t *
|
| 8824 |
|
| 8825 |
const int16x8_t v_ysumsl = vec_xl(0 , y[i].bsums);
|
| 8826 |
const int16x8_t v_ysumsh = vec_xl(16, y[i].bsums);
|
|
@@ -8931,11 +8931,11 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, size_t bs, const void * r
|
|
| 8931 |
|
| 8932 |
float sumf = 0;
|
| 8933 |
for (int i = 0; i < nb; ++i) {
|
| 8934 |
-
const uint8_t *
|
| 8935 |
-
const uint8_t *
|
| 8936 |
-
const int8_t *
|
| 8937 |
memset(aux32, 0, 8*sizeof(int32_t));
|
| 8938 |
-
int8_t *
|
| 8939 |
for (int j = 0; j < QK_K; j += 128) {
|
| 8940 |
for (int l = 0; l < 32; ++l) {
|
| 8941 |
a[l + 0] = (int8_t)((q4[l + 0] & 0xF) | (((qh[l] >> 0) & 3) << 4)) - 32;
|
|
@@ -9003,7 +9003,7 @@ static const int8_t keven_signs_q2xs[1024] = {
|
|
| 9003 |
};
|
| 9004 |
#endif
|
| 9005 |
|
| 9006 |
-
void ggml_vec_dot_iq2_xxs_q8_K(int n, float *
|
| 9007 |
assert(n % QK_K == 0);
|
| 9008 |
assert(nrc == 1);
|
| 9009 |
UNUSED(nrc);
|
|
@@ -9011,8 +9011,8 @@ void ggml_vec_dot_iq2_xxs_q8_K(int n, float * restrict s, size_t bs, const void
|
|
| 9011 |
UNUSED(by);
|
| 9012 |
UNUSED(bs);
|
| 9013 |
|
| 9014 |
-
const block_iq2_xxs *
|
| 9015 |
-
const block_q8_K *
|
| 9016 |
|
| 9017 |
const int nb = n / QK_K;
|
| 9018 |
|
|
@@ -9030,8 +9030,8 @@ void ggml_vec_dot_iq2_xxs_q8_K(int n, float * restrict s, size_t bs, const void
|
|
| 9030 |
float sumf = 0;
|
| 9031 |
for (int i = 0; i < nb; ++i) {
|
| 9032 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9033 |
-
const uint16_t *
|
| 9034 |
-
const int8_t *
|
| 9035 |
float sumf1 = 0, sumf2 = 0;
|
| 9036 |
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
| 9037 |
q8b = ggml_vld1q_s8_x4(q8); q8 += 64;
|
|
@@ -9067,8 +9067,8 @@ void ggml_vec_dot_iq2_xxs_q8_K(int n, float * restrict s, size_t bs, const void
|
|
| 9067 |
__m256 accumf = _mm256_setzero_ps();
|
| 9068 |
for (int i = 0; i < nb; ++i) {
|
| 9069 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9070 |
-
const uint16_t *
|
| 9071 |
-
const int8_t *
|
| 9072 |
__m256i sumi1 = _mm256_setzero_si256();
|
| 9073 |
__m256i sumi2 = _mm256_setzero_si256();
|
| 9074 |
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
|
@@ -9108,8 +9108,8 @@ void ggml_vec_dot_iq2_xxs_q8_K(int n, float * restrict s, size_t bs, const void
|
|
| 9108 |
__m256 accumf = _mm256_setzero_ps();
|
| 9109 |
for (int i = 0; i < nb; ++i) {
|
| 9110 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9111 |
-
const uint16_t *
|
| 9112 |
-
const int8_t *
|
| 9113 |
__m128i sumi1_0 = _mm_setzero_si128();
|
| 9114 |
__m128i sumi1_1 = _mm_setzero_si128();
|
| 9115 |
__m128i sumi2_0 = _mm_setzero_si128();
|
|
@@ -9173,8 +9173,8 @@ void ggml_vec_dot_iq2_xxs_q8_K(int n, float * restrict s, size_t bs, const void
|
|
| 9173 |
vector signed int vsumi2 = v0;
|
| 9174 |
vector signed int vsumi3 = v0;
|
| 9175 |
|
| 9176 |
-
const uint16_t *
|
| 9177 |
-
const int8_t *
|
| 9178 |
|
| 9179 |
for (int j = 0; j < QK_K/32; j += 2) {
|
| 9180 |
__builtin_prefetch(q2, 0, 1);
|
|
@@ -9250,8 +9250,8 @@ void ggml_vec_dot_iq2_xxs_q8_K(int n, float * restrict s, size_t bs, const void
|
|
| 9250 |
__m256 accumf = (__m256)__lasx_xvldi(0);
|
| 9251 |
for (int i = 0; i < nb; ++i) {
|
| 9252 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9253 |
-
const uint16_t *
|
| 9254 |
-
const int8_t *
|
| 9255 |
__m256i sumi1 = __lasx_xvldi(0);
|
| 9256 |
__m256i sumi2 = __lasx_xvldi(0);
|
| 9257 |
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
|
@@ -9291,8 +9291,8 @@ void ggml_vec_dot_iq2_xxs_q8_K(int n, float * restrict s, size_t bs, const void
|
|
| 9291 |
//
|
| 9292 |
// for (int i = 0; i < nb; ++i) {
|
| 9293 |
// const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9294 |
-
// const uint16_t *
|
| 9295 |
-
// const int8_t *
|
| 9296 |
//
|
| 9297 |
// float sumf1 = 0, sumf2 = 0;
|
| 9298 |
//
|
|
@@ -9340,8 +9340,8 @@ void ggml_vec_dot_iq2_xxs_q8_K(int n, float * restrict s, size_t bs, const void
|
|
| 9340 |
float sumf = 0.f;
|
| 9341 |
for (int i = 0; i < nb; ++i) {
|
| 9342 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9343 |
-
const uint16_t *
|
| 9344 |
-
const int8_t *
|
| 9345 |
int32_t bsum = 0;
|
| 9346 |
for (int ib32 = 0; ib32 < QK_K/32; ++ib32) {
|
| 9347 |
memcpy(aux32, q2, 2*sizeof(uint32_t));
|
|
@@ -9364,7 +9364,7 @@ void ggml_vec_dot_iq2_xxs_q8_K(int n, float * restrict s, size_t bs, const void
|
|
| 9364 |
#endif
|
| 9365 |
}
|
| 9366 |
|
| 9367 |
-
void ggml_vec_dot_iq2_xs_q8_K(int n, float *
|
| 9368 |
assert(n % QK_K == 0);
|
| 9369 |
assert(nrc == 1);
|
| 9370 |
UNUSED(nrc);
|
|
@@ -9372,8 +9372,8 @@ void ggml_vec_dot_iq2_xs_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
| 9372 |
UNUSED(by);
|
| 9373 |
UNUSED(bs);
|
| 9374 |
|
| 9375 |
-
const block_iq2_xs *
|
| 9376 |
-
const block_q8_K *
|
| 9377 |
|
| 9378 |
const int nb = n / QK_K;
|
| 9379 |
|
|
@@ -9390,8 +9390,8 @@ void ggml_vec_dot_iq2_xs_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
| 9390 |
float sumf = 0;
|
| 9391 |
for (int i = 0; i < nb; ++i) {
|
| 9392 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9393 |
-
const uint16_t *
|
| 9394 |
-
const int8_t *
|
| 9395 |
const uint8x8_t scales8 = vld1_u8(x[i].scales);
|
| 9396 |
const uint8x8_t scales_l = vand_u8(scales8, vdup_n_u8(0xf));
|
| 9397 |
const uint8x8_t scales_h = vshr_n_u8(scales8, 4);
|
|
@@ -9468,8 +9468,8 @@ void ggml_vec_dot_iq2_xs_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
| 9468 |
__m256 accumf = _mm256_setzero_ps();
|
| 9469 |
for (int i = 0; i < nb; ++i) {
|
| 9470 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9471 |
-
const uint16_t *
|
| 9472 |
-
const int8_t *
|
| 9473 |
|
| 9474 |
memcpy(&aux64, x[i].scales, 8);
|
| 9475 |
__m128i stmp = _mm_set1_epi64x(aux64);
|
|
@@ -9589,8 +9589,8 @@ void ggml_vec_dot_iq2_xs_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
| 9589 |
__m256 accumf = _mm256_setzero_ps();
|
| 9590 |
for (int i = 0; i < nb; ++i) {
|
| 9591 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9592 |
-
const uint16_t *
|
| 9593 |
-
const int8_t *
|
| 9594 |
|
| 9595 |
memcpy(&aux64, x[i].scales, 8);
|
| 9596 |
__m128i stmp = _mm_set1_epi64x(aux64);
|
|
@@ -9744,8 +9744,8 @@ void ggml_vec_dot_iq2_xs_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
| 9744 |
__m256 accumf = (__m256)__lasx_xvldi(0);
|
| 9745 |
for (int i = 0; i < nb; ++i) {
|
| 9746 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9747 |
-
const uint16_t *
|
| 9748 |
-
const int8_t *
|
| 9749 |
|
| 9750 |
memcpy(&aux64, x[i].scales, 8);
|
| 9751 |
__m128i stmp = __lsx_vreplgr2vr_d(aux64);
|
|
@@ -9842,9 +9842,9 @@ void ggml_vec_dot_iq2_xs_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
| 9842 |
vector signed int vsumi2 = v0;
|
| 9843 |
vector signed int vsumi3 = v0;
|
| 9844 |
|
| 9845 |
-
const uint16_t *
|
| 9846 |
-
const uint8_t *
|
| 9847 |
-
const int8_t *
|
| 9848 |
|
| 9849 |
for (int j = 0; j < QK_K/64; ++j) {
|
| 9850 |
__builtin_prefetch(q2, 0, 1);
|
|
@@ -9914,9 +9914,9 @@ void ggml_vec_dot_iq2_xs_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
| 9914 |
float sumf = 0.f;
|
| 9915 |
for (int i = 0; i < nb; ++i) {
|
| 9916 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9917 |
-
const uint16_t *
|
| 9918 |
-
const uint8_t *
|
| 9919 |
-
const int8_t *
|
| 9920 |
int32_t bsum = 0;
|
| 9921 |
for (int ib32 = 0; ib32 < QK_K/32; ++ib32) {
|
| 9922 |
const uint16_t ls1 = 2*(sc[ib32] & 0xf) + 1;
|
|
@@ -9949,7 +9949,7 @@ void ggml_vec_dot_iq2_xs_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
| 9949 |
#endif
|
| 9950 |
}
|
| 9951 |
|
| 9952 |
-
void ggml_vec_dot_iq2_s_q8_K(int n, float *
|
| 9953 |
assert(n % QK_K == 0);
|
| 9954 |
assert(nrc == 1);
|
| 9955 |
UNUSED(nrc);
|
|
@@ -9957,8 +9957,8 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
| 9957 |
UNUSED(by);
|
| 9958 |
UNUSED(bs);
|
| 9959 |
|
| 9960 |
-
const block_iq2_s *
|
| 9961 |
-
const block_q8_K *
|
| 9962 |
|
| 9963 |
const int nb = n / QK_K;
|
| 9964 |
|
|
@@ -9984,10 +9984,10 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
| 9984 |
|
| 9985 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9986 |
|
| 9987 |
-
const uint8_t *
|
| 9988 |
-
const uint8_t *
|
| 9989 |
-
const uint16_t *
|
| 9990 |
-
const int8_t *
|
| 9991 |
|
| 9992 |
int sumi1 = 0, sumi2 = 0;
|
| 9993 |
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
|
@@ -10058,10 +10058,10 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
| 10058 |
__m256 accumf = _mm256_setzero_ps();
|
| 10059 |
for (int i = 0; i < nb; ++i) {
|
| 10060 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 10061 |
-
const uint8_t *
|
| 10062 |
-
const uint8_t *
|
| 10063 |
-
const uint16_t *
|
| 10064 |
-
const int8_t *
|
| 10065 |
|
| 10066 |
memcpy(&aux64, x[i].scales, 8);
|
| 10067 |
const __m128i scales8 = _mm_add_epi8(_mm_slli_epi16(_mm_and_si128(_mm_set_epi64x(aux64 >> 4, aux64), m4), 1), m1);
|
|
@@ -10131,10 +10131,10 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
| 10131 |
__m256 accumf = _mm256_setzero_ps();
|
| 10132 |
for (int i = 0; i < nb; ++i) {
|
| 10133 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 10134 |
-
const uint8_t *
|
| 10135 |
-
const uint8_t *
|
| 10136 |
-
const uint16_t *
|
| 10137 |
-
const int8_t *
|
| 10138 |
|
| 10139 |
memcpy(&aux64, x[i].scales, 8);
|
| 10140 |
const __m128i scales8 = _mm_add_epi8(_mm_slli_epi16(_mm_and_si128(_mm_set_epi64x(aux64 >> 4, aux64), m4), 1), m1);
|
|
@@ -10229,11 +10229,11 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
| 10229 |
vector signed int vsumi2 = v0;
|
| 10230 |
vector signed int vsumi3 = v0;
|
| 10231 |
|
| 10232 |
-
const uint8_t *
|
| 10233 |
-
const uint8_t *
|
| 10234 |
-
const uint16_t *
|
| 10235 |
-
const uint8_t *
|
| 10236 |
-
const int8_t *
|
| 10237 |
|
| 10238 |
for (int j = 0; j < QK_K/32; j += 2) {
|
| 10239 |
__builtin_prefetch(q2, 0, 1);
|
|
@@ -10330,10 +10330,10 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
| 10330 |
__m256 accumf = (__m256)__lasx_xvldi(0);
|
| 10331 |
for (int i = 0; i < nb; ++i) {
|
| 10332 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 10333 |
-
const uint8_t *
|
| 10334 |
-
const uint8_t *
|
| 10335 |
-
const uint16_t *
|
| 10336 |
-
const int8_t *
|
| 10337 |
|
| 10338 |
__m128i tmp1;
|
| 10339 |
memcpy(&aux64, x[i].scales, 8);
|
|
@@ -10427,7 +10427,7 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
| 10427 |
|
| 10428 |
}
|
| 10429 |
|
| 10430 |
-
void ggml_vec_dot_iq3_xxs_q8_K(int n, float *
|
| 10431 |
assert(n % QK_K == 0);
|
| 10432 |
assert(nrc == 1);
|
| 10433 |
UNUSED(nrc);
|
|
@@ -10435,8 +10435,8 @@ void ggml_vec_dot_iq3_xxs_q8_K(int n, float * restrict s, size_t bs, const void
|
|
| 10435 |
UNUSED(by);
|
| 10436 |
UNUSED(bs);
|
| 10437 |
|
| 10438 |
-
const block_iq3_xxs *
|
| 10439 |
-
const block_q8_K *
|
| 10440 |
|
| 10441 |
const int nb = n / QK_K;
|
| 10442 |
|
|
@@ -10452,9 +10452,9 @@ void ggml_vec_dot_iq3_xxs_q8_K(int n, float * restrict s, size_t bs, const void
|
|
| 10452 |
float sumf = 0;
|
| 10453 |
for (int i = 0; i < nb; ++i) {
|
| 10454 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 10455 |
-
const uint8_t *
|
| 10456 |
-
const uint8_t *
|
| 10457 |
-
const int8_t *
|
| 10458 |
float sumf1 = 0, sumf2 = 0;
|
| 10459 |
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
| 10460 |
q8b = ggml_vld1q_s8_x4(q8); q8 += 64;
|
|
@@ -10490,9 +10490,9 @@ void ggml_vec_dot_iq3_xxs_q8_K(int n, float * restrict s, size_t bs, const void
|
|
| 10490 |
__m256 accumf = _mm256_setzero_ps();
|
| 10491 |
for (int i = 0; i < nb; ++i) {
|
| 10492 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 10493 |
-
const uint8_t *
|
| 10494 |
-
const uint8_t *
|
| 10495 |
-
const int8_t *
|
| 10496 |
__m256i sumi1 = _mm256_setzero_si256();
|
| 10497 |
__m256i sumi2 = _mm256_setzero_si256();
|
| 10498 |
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
|
@@ -10535,9 +10535,9 @@ void ggml_vec_dot_iq3_xxs_q8_K(int n, float * restrict s, size_t bs, const void
|
|
| 10535 |
__m256 accumf = _mm256_setzero_ps();
|
| 10536 |
for (int i = 0; i < nb; ++i) {
|
| 10537 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 10538 |
-
const uint8_t *
|
| 10539 |
-
const uint8_t *
|
| 10540 |
-
const int8_t *
|
| 10541 |
__m128i sumi1_0 = _mm_setzero_si128();
|
| 10542 |
__m128i sumi1_1 = _mm_setzero_si128();
|
| 10543 |
__m128i sumi2_0 = _mm_setzero_si128();
|
|
@@ -10604,9 +10604,9 @@ void ggml_vec_dot_iq3_xxs_q8_K(int n, float * restrict s, size_t bs, const void
|
|
| 10604 |
vector signed int vsumi2 = v0;
|
| 10605 |
vector signed int vsumi3 = v0;
|
| 10606 |
|
| 10607 |
-
const uint8_t *
|
| 10608 |
-
const uint32_t *
|
| 10609 |
-
const int8_t *
|
| 10610 |
|
| 10611 |
#pragma GCC unroll 1
|
| 10612 |
for (int j = 0; j < QK_K/32; j += 2) {
|
|
@@ -10678,9 +10678,9 @@ void ggml_vec_dot_iq3_xxs_q8_K(int n, float * restrict s, size_t bs, const void
|
|
| 10678 |
__m256 accumf = (__m256)__lasx_xvldi(0);
|
| 10679 |
for (int i = 0; i < nb; ++i) {
|
| 10680 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 10681 |
-
const uint8_t *
|
| 10682 |
-
const uint8_t *
|
| 10683 |
-
const int8_t *
|
| 10684 |
__m256i sumi1 = __lasx_xvldi(0);
|
| 10685 |
__m256i sumi2 = __lasx_xvldi(0);
|
| 10686 |
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
|
@@ -10723,9 +10723,9 @@ void ggml_vec_dot_iq3_xxs_q8_K(int n, float * restrict s, size_t bs, const void
|
|
| 10723 |
float sumf = 0.f;
|
| 10724 |
for (int i = 0; i < nb; ++i) {
|
| 10725 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 10726 |
-
const uint8_t *
|
| 10727 |
-
const uint8_t *
|
| 10728 |
-
const int8_t *
|
| 10729 |
int32_t bsum = 0;
|
| 10730 |
for (int ib32 = 0; ib32 < QK_K/32; ++ib32) {
|
| 10731 |
memcpy(&aux32, gas, sizeof(uint32_t)); gas += sizeof(uint32_t);
|
|
@@ -10750,7 +10750,7 @@ void ggml_vec_dot_iq3_xxs_q8_K(int n, float * restrict s, size_t bs, const void
|
|
| 10750 |
#endif
|
| 10751 |
}
|
| 10752 |
|
| 10753 |
-
void ggml_vec_dot_iq3_s_q8_K (int n, float *
|
| 10754 |
assert(n % QK_K == 0);
|
| 10755 |
assert(nrc == 1);
|
| 10756 |
UNUSED(nrc);
|
|
@@ -10758,8 +10758,8 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * restrict s, size_t bs, const void *
|
|
| 10758 |
UNUSED(by);
|
| 10759 |
UNUSED(bs);
|
| 10760 |
|
| 10761 |
-
const block_iq3_s *
|
| 10762 |
-
const block_q8_K *
|
| 10763 |
|
| 10764 |
const int nb = n / QK_K;
|
| 10765 |
|
|
@@ -10796,10 +10796,10 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * restrict s, size_t bs, const void *
|
|
| 10796 |
float sumf = 0;
|
| 10797 |
for (int i = 0; i < nb; ++i) {
|
| 10798 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 10799 |
-
const uint8_t *
|
| 10800 |
-
const uint8_t *
|
| 10801 |
-
const uint16_t *
|
| 10802 |
-
const int8_t *
|
| 10803 |
|
| 10804 |
memcpy(scales32, x[i].scales, 4);
|
| 10805 |
scales32[1] = (((scales32[0] >> 4) & 0x0f0f0f0f) << 1) | 0x01010101;
|
|
@@ -10878,10 +10878,10 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * restrict s, size_t bs, const void *
|
|
| 10878 |
__m256 accumf = _mm256_setzero_ps();
|
| 10879 |
for (int i = 0; i < nb; ++i) {
|
| 10880 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 10881 |
-
const uint8_t *
|
| 10882 |
-
const uint8_t *
|
| 10883 |
-
const uint16_t *
|
| 10884 |
-
const int8_t *
|
| 10885 |
__m256i sumi1 = _mm256_setzero_si256();
|
| 10886 |
__m256i sumi2 = _mm256_setzero_si256();
|
| 10887 |
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
|
@@ -10963,10 +10963,10 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * restrict s, size_t bs, const void *
|
|
| 10963 |
__m256 accumf = _mm256_setzero_ps();
|
| 10964 |
for (int i = 0; i < nb; ++i) {
|
| 10965 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 10966 |
-
const uint8_t *
|
| 10967 |
-
const uint8_t *
|
| 10968 |
-
const uint16_t *
|
| 10969 |
-
const int8_t *
|
| 10970 |
__m128i sumi1_0 = _mm_setzero_si128();
|
| 10971 |
__m128i sumi1_1 = _mm_setzero_si128();
|
| 10972 |
__m128i sumi2_0 = _mm_setzero_si128();
|
|
@@ -11064,11 +11064,11 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * restrict s, size_t bs, const void *
|
|
| 11064 |
vector float vyd = vec_splats(y[i].d);
|
| 11065 |
vector float vd = vec_mul(vxd, vyd);
|
| 11066 |
|
| 11067 |
-
const uint8_t *
|
| 11068 |
-
const uint8_t *
|
| 11069 |
-
const uint16_t *
|
| 11070 |
-
const uint8_t *
|
| 11071 |
-
const int8_t *
|
| 11072 |
|
| 11073 |
vector signed int vsumi0 = v0;
|
| 11074 |
vector signed int vsumi1 = v0;
|
|
@@ -11175,10 +11175,10 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * restrict s, size_t bs, const void *
|
|
| 11175 |
__m256 accumf = (__m256)__lasx_xvldi(0);
|
| 11176 |
for (int i = 0; i < nb; ++i) {
|
| 11177 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 11178 |
-
const uint8_t *
|
| 11179 |
-
const uint8_t *
|
| 11180 |
-
const uint16_t *
|
| 11181 |
-
const int8_t *
|
| 11182 |
__m256i sumi1 = __lasx_xvldi(0);
|
| 11183 |
__m256i sumi2 = __lasx_xvldi(0);
|
| 11184 |
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
|
@@ -11236,10 +11236,10 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * restrict s, size_t bs, const void *
|
|
| 11236 |
float sumf = 0.f;
|
| 11237 |
for (int i = 0; i < nb; ++i) {
|
| 11238 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 11239 |
-
const uint8_t *
|
| 11240 |
-
const uint8_t *
|
| 11241 |
-
const uint8_t *
|
| 11242 |
-
const int8_t *
|
| 11243 |
int32_t bsum = 0;
|
| 11244 |
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
| 11245 |
const uint32_t ls1 = 2*(x[i].scales[ib32/2] & 0xf) + 1;
|
|
@@ -11291,7 +11291,7 @@ static inline __m256i mul_add_epi8(const __m256i x, const __m256i y) {
|
|
| 11291 |
}
|
| 11292 |
#endif
|
| 11293 |
|
| 11294 |
-
void ggml_vec_dot_iq1_s_q8_K (int n, float *
|
| 11295 |
assert(n % QK_K == 0);
|
| 11296 |
assert(nrc == 1);
|
| 11297 |
UNUSED(nrc);
|
|
@@ -11299,8 +11299,8 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * restrict s, size_t bs, const void
|
|
| 11299 |
UNUSED(by);
|
| 11300 |
UNUSED(bs);
|
| 11301 |
|
| 11302 |
-
const block_iq1_s *
|
| 11303 |
-
const block_q8_K *
|
| 11304 |
|
| 11305 |
const int nb = n / QK_K;
|
| 11306 |
|
|
@@ -11458,10 +11458,10 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * restrict s, size_t bs, const void
|
|
| 11458 |
vector signed int vsumi3 = vec_splats((int32_t)0);
|
| 11459 |
vector signed int vsumi8 = vec_splats((int32_t)0);
|
| 11460 |
|
| 11461 |
-
const uint8_t *
|
| 11462 |
-
const uint16_t *
|
| 11463 |
-
const int8_t *
|
| 11464 |
-
const int16_t *
|
| 11465 |
|
| 11466 |
for (int j = 0; j < QK_K/32; j += 2) {
|
| 11467 |
__builtin_prefetch(q1, 0, 1);
|
|
@@ -11622,7 +11622,7 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * restrict s, size_t bs, const void
|
|
| 11622 |
#endif
|
| 11623 |
}
|
| 11624 |
|
| 11625 |
-
void ggml_vec_dot_iq1_m_q8_K (int n, float *
|
| 11626 |
assert(n % QK_K == 0);
|
| 11627 |
assert(nrc == 1);
|
| 11628 |
UNUSED(nrc);
|
|
@@ -11630,8 +11630,8 @@ void ggml_vec_dot_iq1_m_q8_K (int n, float * restrict s, size_t bs, const void
|
|
| 11630 |
UNUSED(by);
|
| 11631 |
UNUSED(bs);
|
| 11632 |
|
| 11633 |
-
const block_iq1_m *
|
| 11634 |
-
const block_q8_K *
|
| 11635 |
|
| 11636 |
const int nb = n / QK_K;
|
| 11637 |
|
|
@@ -11912,7 +11912,7 @@ void ggml_vec_dot_iq1_m_q8_K (int n, float * restrict s, size_t bs, const void
|
|
| 11912 |
#endif
|
| 11913 |
}
|
| 11914 |
|
| 11915 |
-
void ggml_vec_dot_iq4_nl_q8_0(int n, float *
|
| 11916 |
assert(nrc == 1);
|
| 11917 |
UNUSED(nrc);
|
| 11918 |
UNUSED(bx);
|
|
@@ -11921,8 +11921,8 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * restrict s, size_t bs, const void *
|
|
| 11921 |
assert(n % QK4_NL == 0);
|
| 11922 |
static_assert(QK4_NL == QK8_0, "QK4_NL and QK8_0 must be the same");
|
| 11923 |
|
| 11924 |
-
const block_iq4_nl *
|
| 11925 |
-
const block_q8_0 *
|
| 11926 |
|
| 11927 |
const int nb = n / QK4_NL;
|
| 11928 |
|
|
@@ -12097,8 +12097,8 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * restrict s, size_t bs, const void *
|
|
| 12097 |
const uint8x16_t v_m = vec_splat_u8(0x0F);
|
| 12098 |
|
| 12099 |
for (; ib < nb; ++ib) {
|
| 12100 |
-
const block_iq4_nl *
|
| 12101 |
-
const block_q8_0 *
|
| 12102 |
|
| 12103 |
const uint8x16_t v_x = vec_xl(0, x0->qs);
|
| 12104 |
int8x16_t v_xl = (int8x16_t)vec_and(v_x, v_m);
|
|
@@ -12126,7 +12126,7 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * restrict s, size_t bs, const void *
|
|
| 12126 |
*s = sumf;
|
| 12127 |
}
|
| 12128 |
|
| 12129 |
-
void ggml_vec_dot_iq4_xs_q8_K(int n, float *
|
| 12130 |
assert(nrc == 1);
|
| 12131 |
UNUSED(nrc);
|
| 12132 |
UNUSED(bx);
|
|
@@ -12134,8 +12134,8 @@ void ggml_vec_dot_iq4_xs_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
| 12134 |
UNUSED(bs);
|
| 12135 |
assert(n % QK_K == 0);
|
| 12136 |
|
| 12137 |
-
const block_iq4_xs *
|
| 12138 |
-
const block_q8_K *
|
| 12139 |
|
| 12140 |
const int nb = n / QK_K;
|
| 12141 |
|
|
@@ -12292,9 +12292,9 @@ void ggml_vec_dot_iq4_xs_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
| 12292 |
|
| 12293 |
uint16_t h = x[ibl].scales_h;
|
| 12294 |
|
| 12295 |
-
const uint8_t *
|
| 12296 |
-
const uint8_t *
|
| 12297 |
-
const int8_t *
|
| 12298 |
|
| 12299 |
for (int ib = 0; ib < QK_K/64; ib ++ ) {
|
| 12300 |
__builtin_prefetch(q4, 0, 1);
|
|
@@ -12398,8 +12398,8 @@ void ggml_vec_dot_iq4_xs_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
| 12398 |
float sumf = 0;
|
| 12399 |
|
| 12400 |
for (int ibl = 0; ibl < nb; ++ibl) {
|
| 12401 |
-
const uint8_t *
|
| 12402 |
-
const int8_t *
|
| 12403 |
|
| 12404 |
uint16_t h = x[ibl].scales_h;
|
| 12405 |
|
|
@@ -12479,12 +12479,12 @@ void ggml_vec_dot_iq4_xs_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
| 12479 |
|
| 12480 |
// ============================ 4-bit non-linear quants
|
| 12481 |
|
| 12482 |
-
void quantize_row_iq4_nl(const float *
|
| 12483 |
assert(k % QK4_NL == 0);
|
| 12484 |
quantize_row_iq4_nl_ref(x, y, k);
|
| 12485 |
}
|
| 12486 |
|
| 12487 |
-
void quantize_row_iq4_xs(const float *
|
| 12488 |
assert(k % QK_K == 0);
|
| 12489 |
quantize_iq4_xs(x, y, 1, k, NULL);
|
| 12490 |
}
|
|
|
|
| 719 |
}
|
| 720 |
#endif //__loongarch_asx
|
| 721 |
|
| 722 |
+
void quantize_row_q4_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
|
| 723 |
quantize_row_q4_0_ref(x, y, k);
|
| 724 |
}
|
| 725 |
|
| 726 |
+
void quantize_row_q4_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
|
| 727 |
quantize_row_q4_1_ref(x, y, k);
|
| 728 |
}
|
| 729 |
|
| 730 |
+
void quantize_row_q5_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
|
| 731 |
quantize_row_q5_0_ref(x, y, k);
|
| 732 |
}
|
| 733 |
|
| 734 |
+
void quantize_row_q5_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
|
| 735 |
quantize_row_q5_1_ref(x, y, k);
|
| 736 |
}
|
| 737 |
|
| 738 |
+
void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
|
| 739 |
assert(QK8_0 == 32);
|
| 740 |
assert(k % QK8_0 == 0);
|
| 741 |
const int nb = k / QK8_0;
|
| 742 |
|
| 743 |
+
block_q8_0 * GGML_RESTRICT y = vy;
|
| 744 |
|
| 745 |
#if defined(__ARM_NEON)
|
| 746 |
for (int i = 0; i < nb; i++) {
|
|
|
|
| 1050 |
#endif
|
| 1051 |
}
|
| 1052 |
|
| 1053 |
+
void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
|
| 1054 |
assert(k % QK8_1 == 0);
|
| 1055 |
const int nb = k / QK8_1;
|
| 1056 |
|
| 1057 |
+
block_q8_1 * GGML_RESTRICT y = vy;
|
| 1058 |
|
| 1059 |
#if defined(__ARM_NEON)
|
| 1060 |
for (int i = 0; i < nb; i++) {
|
|
|
|
| 1428 |
return (i & 0x007fffff) - 0x00400000;
|
| 1429 |
}
|
| 1430 |
|
| 1431 |
+
static float make_qx_quants(int n, int nmax, const float * GGML_RESTRICT x, int8_t * GGML_RESTRICT L, int rmse_type,
|
| 1432 |
+
const float * GGML_RESTRICT qw) {
|
| 1433 |
float max = 0;
|
| 1434 |
float amax = 0;
|
| 1435 |
for (int i = 0; i < n; ++i) {
|
|
|
|
| 1497 |
return scale;
|
| 1498 |
}
|
| 1499 |
|
| 1500 |
+
static float make_q3_quants(int n, int nmax, const float * GGML_RESTRICT x, int8_t * GGML_RESTRICT L, bool do_rmse) {
|
| 1501 |
float max = 0;
|
| 1502 |
float amax = 0;
|
| 1503 |
for (int i = 0; i < n; ++i) {
|
|
|
|
| 1556 |
return 1/iscale;
|
| 1557 |
}
|
| 1558 |
|
| 1559 |
+
static float make_qkx1_quants(int n, int nmax, const float * GGML_RESTRICT x, uint8_t * GGML_RESTRICT L, float * GGML_RESTRICT the_min,
|
| 1560 |
int ntry, float alpha) {
|
| 1561 |
float min = x[0];
|
| 1562 |
float max = x[0];
|
|
|
|
| 1599 |
return scale;
|
| 1600 |
}
|
| 1601 |
|
| 1602 |
+
static float make_qkx2_quants(int n, int nmax, const float * GGML_RESTRICT x, const float * GGML_RESTRICT weights,
|
| 1603 |
+
uint8_t * GGML_RESTRICT L, float * GGML_RESTRICT the_min, uint8_t * GGML_RESTRICT Laux,
|
| 1604 |
float rmin, float rdelta, int nstep, bool use_mad) {
|
| 1605 |
float min = x[0];
|
| 1606 |
float max = x[0];
|
|
|
|
| 1680 |
return scale;
|
| 1681 |
}
|
| 1682 |
|
| 1683 |
+
static inline void get_scale_min_k4(int j, const uint8_t * GGML_RESTRICT q, uint8_t * GGML_RESTRICT d, uint8_t * GGML_RESTRICT m) {
|
| 1684 |
if (j < 4) {
|
| 1685 |
*d = q[j] & 63; *m = q[j + 4] & 63;
|
| 1686 |
} else {
|
|
|
|
| 1691 |
|
| 1692 |
//========================- 2-bit (de)-quantization
|
| 1693 |
|
| 1694 |
+
void quantize_row_q2_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
|
| 1695 |
quantize_row_q2_K_ref(x, vy, k);
|
| 1696 |
}
|
| 1697 |
|
| 1698 |
//========================= 3-bit (de)-quantization
|
| 1699 |
|
| 1700 |
+
void quantize_row_q3_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
|
| 1701 |
quantize_row_q3_K_ref(x, vy, k);
|
| 1702 |
}
|
| 1703 |
|
| 1704 |
// ====================== 4-bit (de)-quantization
|
| 1705 |
|
| 1706 |
+
void quantize_row_q4_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
|
| 1707 |
assert(k % QK_K == 0);
|
| 1708 |
+
block_q4_K * GGML_RESTRICT y = vy;
|
| 1709 |
quantize_row_q4_K_ref(x, y, k);
|
| 1710 |
}
|
| 1711 |
|
| 1712 |
// ====================== 5-bit (de)-quantization
|
| 1713 |
|
| 1714 |
+
void quantize_row_q5_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
|
| 1715 |
assert(k % QK_K == 0);
|
| 1716 |
+
block_q5_K * GGML_RESTRICT y = vy;
|
| 1717 |
quantize_row_q5_K_ref(x, y, k);
|
| 1718 |
}
|
| 1719 |
|
| 1720 |
// ====================== 6-bit (de)-quantization
|
| 1721 |
|
| 1722 |
+
void quantize_row_q6_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
|
| 1723 |
assert(k % QK_K == 0);
|
| 1724 |
+
block_q6_K * GGML_RESTRICT y = vy;
|
| 1725 |
quantize_row_q6_K_ref(x, y, k);
|
| 1726 |
}
|
| 1727 |
|
| 1728 |
// ====================== Ternary (de)-quantization (BitNet b1.58 and TriLMs)
|
| 1729 |
|
| 1730 |
+
void quantize_row_tq1_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
|
| 1731 |
assert(k % QK_K == 0);
|
| 1732 |
+
block_tq1_0 * GGML_RESTRICT y = vy;
|
| 1733 |
quantize_row_tq1_0_ref(x, y, k);
|
| 1734 |
}
|
| 1735 |
|
| 1736 |
+
void quantize_row_tq2_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
|
| 1737 |
assert(k % QK_K == 0);
|
| 1738 |
+
block_tq2_0 * GGML_RESTRICT y = vy;
|
| 1739 |
quantize_row_tq2_0_ref(x, y, k);
|
| 1740 |
}
|
| 1741 |
|
|
|
|
| 1743 |
|
| 1744 |
//===================================== Q8_K ==============================================
|
| 1745 |
|
| 1746 |
+
void quantize_row_q8_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
|
| 1747 |
#ifdef __wasm_simd128__
|
| 1748 |
assert(k % QK_K == 0);
|
| 1749 |
const int64_t nb = k / QK_K;
|
| 1750 |
+
block_q8_K * GGML_RESTRICT yc = y; // Cast to proper type
|
| 1751 |
|
| 1752 |
for (int i = 0; i < nb; i++) {
|
| 1753 |
const float * x_block = x + i * QK_K;
|
|
|
|
| 1909 |
}
|
| 1910 |
#endif
|
| 1911 |
|
| 1912 |
+
void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
| 1913 |
const int qk = QK8_0;
|
| 1914 |
const int nb = n / qk;
|
| 1915 |
|
|
|
|
| 1924 |
UNUSED(by);
|
| 1925 |
UNUSED(bs);
|
| 1926 |
|
| 1927 |
+
const block_q4_0 * GGML_RESTRICT x = vx;
|
| 1928 |
+
const block_q8_0 * GGML_RESTRICT y = vy;
|
| 1929 |
|
| 1930 |
#if defined(__ARM_FEATURE_MATMUL_INT8)
|
| 1931 |
if (nrc == 2) {
|
| 1932 |
+
const block_q4_0 * GGML_RESTRICT vx0 = vx;
|
| 1933 |
+
const block_q4_0 * GGML_RESTRICT vx1 = (const block_q4_0 *) ((const uint8_t*)vx + bx);
|
| 1934 |
+
const block_q8_0 * GGML_RESTRICT vy0 = vy;
|
| 1935 |
+
const block_q8_0 * GGML_RESTRICT vy1 = (const block_q8_0 *) ((const uint8_t*)vy + by);
|
| 1936 |
|
| 1937 |
float32x4_t sumv0 = vdupq_n_f32(0.0f);
|
| 1938 |
|
| 1939 |
for (int i = 0; i < nb; i++) {
|
| 1940 |
+
const block_q4_0 * GGML_RESTRICT b_x0 = &vx0[i];
|
| 1941 |
+
const block_q4_0 * GGML_RESTRICT b_x1 = &vx1[i];
|
| 1942 |
+
const block_q8_0 * GGML_RESTRICT b_y0 = &vy0[i];
|
| 1943 |
+
const block_q8_0 * GGML_RESTRICT b_y1 = &vy1[i];
|
| 1944 |
|
| 1945 |
const uint8x16_t m4b = vdupq_n_u8(0x0F);
|
| 1946 |
const int8x16_t s8b = vdupq_n_s8(0x8);
|
|
|
|
| 2017 |
const svbool_t ph4 = svptrue_pat_b32(SV_VL4);
|
| 2018 |
|
| 2019 |
for (; ib + 1 < nb; ib += 2) {
|
| 2020 |
+
const block_q4_0 * GGML_RESTRICT x0 = &x[ib + 0];
|
| 2021 |
+
const block_q4_0 * GGML_RESTRICT x1 = &x[ib + 1];
|
| 2022 |
+
const block_q8_0 * GGML_RESTRICT y0 = &y[ib + 0];
|
| 2023 |
+
const block_q8_0 * GGML_RESTRICT y1 = &y[ib + 1];
|
| 2024 |
|
| 2025 |
// load x
|
| 2026 |
const svuint8_t qx0r = svld1rq_u8(svptrue_b8(), x0->qs);
|
|
|
|
| 2063 |
const svbool_t pl16 = svnot_b_z(svptrue_b8(), ph16);
|
| 2064 |
|
| 2065 |
for (; ib + 1 < nb; ib += 2) {
|
| 2066 |
+
const block_q4_0 * GGML_RESTRICT x0 = &x[ib + 0];
|
| 2067 |
+
const block_q4_0 * GGML_RESTRICT x1 = &x[ib + 1];
|
| 2068 |
+
const block_q8_0 * GGML_RESTRICT y0 = &y[ib + 0];
|
| 2069 |
+
const block_q8_0 * GGML_RESTRICT y1 = &y[ib + 1];
|
| 2070 |
|
| 2071 |
// load x
|
| 2072 |
const svuint8_t qx0r = svld1rq_u8(svptrue_b8(), x0->qs);
|
|
|
|
| 2104 |
const svbool_t pl16 = svnot_b_z(ph32, ph16);
|
| 2105 |
|
| 2106 |
for (; ib + 1 < nb; ib += 2) {
|
| 2107 |
+
const block_q4_0 * GGML_RESTRICT x0 = &x[ib + 0];
|
| 2108 |
+
const block_q4_0 * GGML_RESTRICT x1 = &x[ib + 1];
|
| 2109 |
+
const block_q8_0 * GGML_RESTRICT y0 = &y[ib + 0];
|
| 2110 |
+
const block_q8_0 * GGML_RESTRICT y1 = &y[ib + 1];
|
| 2111 |
|
| 2112 |
// load x
|
| 2113 |
const svuint8_t qx0r = svld1rq_u8(ph32, x0->qs);
|
|
|
|
| 2144 |
float32x4_t sumv1 = vdupq_n_f32(0.0f);
|
| 2145 |
|
| 2146 |
for (; ib + 1 < nb; ib += 2) {
|
| 2147 |
+
const block_q4_0 * GGML_RESTRICT x0 = &x[ib + 0];
|
| 2148 |
+
const block_q4_0 * GGML_RESTRICT x1 = &x[ib + 1];
|
| 2149 |
+
const block_q8_0 * GGML_RESTRICT y0 = &y[ib + 0];
|
| 2150 |
+
const block_q8_0 * GGML_RESTRICT y1 = &y[ib + 1];
|
| 2151 |
|
| 2152 |
const uint8x16_t m4b = vdupq_n_u8(0x0F);
|
| 2153 |
const int8x16_t s8b = vdupq_n_s8(0x8);
|
|
|
|
| 2189 |
const v128_t s8b = wasm_i8x16_splat(0x8);
|
| 2190 |
|
| 2191 |
for (; ib + 1 < nb; ib += 2) {
|
| 2192 |
+
const block_q4_0 * GGML_RESTRICT x0 = &x[ib];
|
| 2193 |
+
const block_q4_0 * GGML_RESTRICT x1 = &x[ib + 1];
|
| 2194 |
+
const block_q8_0 * GGML_RESTRICT y0 = &y[ib];
|
| 2195 |
+
const block_q8_0 * GGML_RESTRICT y1 = &y[ib + 1];
|
| 2196 |
|
| 2197 |
// Load and process x0
|
| 2198 |
v128_t v0_0 = wasm_v128_load(x0->qs);
|
|
|
|
| 2609 |
*s = sumf;
|
| 2610 |
}
|
| 2611 |
|
| 2612 |
+
void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
| 2613 |
const int qk = QK8_1;
|
| 2614 |
const int nb = n / qk;
|
| 2615 |
|
|
|
|
| 2624 |
UNUSED(by);
|
| 2625 |
UNUSED(bs);
|
| 2626 |
|
| 2627 |
+
const block_q4_1 * GGML_RESTRICT x = vx;
|
| 2628 |
+
const block_q8_1 * GGML_RESTRICT y = vy;
|
| 2629 |
|
| 2630 |
#if defined(__ARM_FEATURE_MATMUL_INT8)
|
| 2631 |
if (nrc == 2) {
|
| 2632 |
+
const block_q4_1 * GGML_RESTRICT vx0 = vx;
|
| 2633 |
+
const block_q4_1 * GGML_RESTRICT vx1 = (const block_q4_1 *) ((const uint8_t*)vx + bx);
|
| 2634 |
+
const block_q8_1 * GGML_RESTRICT vy0 = vy;
|
| 2635 |
+
const block_q8_1 * GGML_RESTRICT vy1 = (const block_q8_1 *) ((const uint8_t*)vy + by);
|
| 2636 |
|
| 2637 |
float32x4_t sumv0 = vdupq_n_f32(0.0f);
|
| 2638 |
float32x4_t summs0 = vdupq_n_f32(0.0f);
|
| 2639 |
|
| 2640 |
for (int i = 0; i < nb; i++) {
|
| 2641 |
+
const block_q4_1 * GGML_RESTRICT b_x0 = &vx0[i];
|
| 2642 |
+
const block_q4_1 * GGML_RESTRICT b_x1 = &vx1[i];
|
| 2643 |
+
const block_q8_1 * GGML_RESTRICT b_y0 = &vy0[i];
|
| 2644 |
+
const block_q8_1 * GGML_RESTRICT b_y1 = &vy1[i];
|
| 2645 |
|
| 2646 |
float32_t summs_t[4] = {
|
| 2647 |
GGML_FP16_TO_FP32(b_x0->m) * GGML_FP16_TO_FP32(b_y0->s),
|
|
|
|
| 2715 |
float summs = 0;
|
| 2716 |
|
| 2717 |
for (; ib + 1 < nb; ib += 2) {
|
| 2718 |
+
const block_q4_1 * GGML_RESTRICT x0 = &x[ib + 0];
|
| 2719 |
+
const block_q4_1 * GGML_RESTRICT x1 = &x[ib + 1];
|
| 2720 |
+
const block_q8_1 * GGML_RESTRICT y0 = &y[ib + 0];
|
| 2721 |
+
const block_q8_1 * GGML_RESTRICT y1 = &y[ib + 1];
|
| 2722 |
|
| 2723 |
summs += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->s) + GGML_FP16_TO_FP32(x1->m) * GGML_FP16_TO_FP32(y1->s);
|
| 2724 |
|
|
|
|
| 2931 |
*s = sumf;
|
| 2932 |
}
|
| 2933 |
|
| 2934 |
+
void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
| 2935 |
const int qk = QK8_0;
|
| 2936 |
const int nb = n / qk;
|
| 2937 |
|
|
|
|
| 2946 |
UNUSED(by);
|
| 2947 |
UNUSED(bs);
|
| 2948 |
|
| 2949 |
+
const block_q5_0 * GGML_RESTRICT x = vx;
|
| 2950 |
+
const block_q8_0 * GGML_RESTRICT y = vy;
|
| 2951 |
|
| 2952 |
#if defined(__ARM_NEON)
|
| 2953 |
float32x4_t sumv0 = vdupq_n_f32(0.0f);
|
|
|
|
| 2960 |
uint64_t tmp1[4];
|
| 2961 |
|
| 2962 |
for (; ib + 1 < nb; ib += 2) {
|
| 2963 |
+
const block_q5_0 * GGML_RESTRICT x0 = &x[ib];
|
| 2964 |
+
const block_q5_0 * GGML_RESTRICT x1 = &x[ib + 1];
|
| 2965 |
+
const block_q8_0 * GGML_RESTRICT y0 = &y[ib];
|
| 2966 |
+
const block_q8_0 * GGML_RESTRICT y1 = &y[ib + 1];
|
| 2967 |
|
| 2968 |
const uint8x16_t m4b = vdupq_n_u8(0x0F);
|
| 2969 |
|
|
|
|
| 3024 |
|
| 3025 |
// TODO: check if unrolling this is better
|
| 3026 |
for (; ib < nb; ++ib) {
|
| 3027 |
+
const block_q5_0 * GGML_RESTRICT x0 = &x[ib];
|
| 3028 |
+
const block_q8_0 * GGML_RESTRICT y0 = &y[ib];
|
| 3029 |
|
| 3030 |
const v128_t m4b = wasm_i8x16_splat(0x0F);
|
| 3031 |
|
|
|
|
| 3286 |
*s = sumf;
|
| 3287 |
}
|
| 3288 |
|
| 3289 |
+
void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
| 3290 |
const int qk = QK8_1;
|
| 3291 |
const int nb = n / qk;
|
| 3292 |
|
|
|
|
| 3301 |
UNUSED(by);
|
| 3302 |
UNUSED(bs);
|
| 3303 |
|
| 3304 |
+
const block_q5_1 * GGML_RESTRICT x = vx;
|
| 3305 |
+
const block_q8_1 * GGML_RESTRICT y = vy;
|
| 3306 |
|
| 3307 |
#if defined(__ARM_NEON)
|
| 3308 |
float32x4_t sumv0 = vdupq_n_f32(0.0f);
|
|
|
|
| 3318 |
uint64_t tmp1[4];
|
| 3319 |
|
| 3320 |
for (; ib + 1 < nb; ib += 2) {
|
| 3321 |
+
const block_q5_1 * GGML_RESTRICT x0 = &x[ib];
|
| 3322 |
+
const block_q5_1 * GGML_RESTRICT x1 = &x[ib + 1];
|
| 3323 |
+
const block_q8_1 * GGML_RESTRICT y0 = &y[ib];
|
| 3324 |
+
const block_q8_1 * GGML_RESTRICT y1 = &y[ib + 1];
|
| 3325 |
|
| 3326 |
const uint8x16_t m4b = vdupq_n_u8(0x0F);
|
| 3327 |
|
|
|
|
| 3387 |
|
| 3388 |
// TODO: check if unrolling this is better
|
| 3389 |
for (; ib < nb; ++ib) {
|
| 3390 |
+
const block_q5_1 * GGML_RESTRICT x0 = &x[ib];
|
| 3391 |
+
const block_q8_1 * GGML_RESTRICT y0 = &y[ib];
|
| 3392 |
|
| 3393 |
summs += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->s);
|
| 3394 |
|
|
|
|
| 3660 |
*s = sumf;
|
| 3661 |
}
|
| 3662 |
|
| 3663 |
+
void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
| 3664 |
const int qk = QK8_0;
|
| 3665 |
const int nb = n / qk;
|
| 3666 |
|
|
|
|
| 3675 |
UNUSED(by);
|
| 3676 |
UNUSED(bs);
|
| 3677 |
|
| 3678 |
+
const block_q8_0 * GGML_RESTRICT x = vx;
|
| 3679 |
+
const block_q8_0 * GGML_RESTRICT y = vy;
|
| 3680 |
|
| 3681 |
#if defined(__ARM_FEATURE_MATMUL_INT8)
|
| 3682 |
if (nrc == 2) {
|
| 3683 |
+
const block_q8_0 * GGML_RESTRICT vx0 = vx;
|
| 3684 |
+
const block_q8_0 * GGML_RESTRICT vx1 = (const block_q8_0 *) ((const uint8_t*)vx + bx);
|
| 3685 |
+
const block_q8_0 * GGML_RESTRICT vy0 = vy;
|
| 3686 |
+
const block_q8_0 * GGML_RESTRICT vy1 = (const block_q8_0 *) ((const uint8_t*)vy + by);
|
| 3687 |
|
| 3688 |
float32x4_t sumv0 = vdupq_n_f32(0.0f);
|
| 3689 |
|
| 3690 |
for (int i = 0; i < nb; i++) {
|
| 3691 |
+
const block_q8_0 * GGML_RESTRICT b_x0 = &vx0[i];
|
| 3692 |
+
const block_q8_0 * GGML_RESTRICT b_y0 = &vy0[i];
|
| 3693 |
|
| 3694 |
+
const block_q8_0 * GGML_RESTRICT b_x1 = &vx1[i];
|
| 3695 |
+
const block_q8_0 * GGML_RESTRICT b_y1 = &vy1[i];
|
| 3696 |
|
| 3697 |
const int8x16_t x0_l = vld1q_s8(b_x0->qs);
|
| 3698 |
const int8x16_t x0_h = vld1q_s8(b_x0->qs + 16);
|
|
|
|
| 3757 |
const svbool_t pl16 = svptrue_pat_b32(SV_VL4);
|
| 3758 |
|
| 3759 |
for (; ib + 1 < nb; ib += 2) {
|
| 3760 |
+
const block_q8_0 * GGML_RESTRICT x0 = &x[ib + 0];
|
| 3761 |
+
const block_q8_0 * GGML_RESTRICT x1 = &x[ib + 1];
|
| 3762 |
+
const block_q8_0 * GGML_RESTRICT y0 = &y[ib + 0];
|
| 3763 |
+
const block_q8_0 * GGML_RESTRICT y1 = &y[ib + 1];
|
| 3764 |
|
| 3765 |
// load x
|
| 3766 |
const svint8_t qx0_0 = svld1_s8(ph16, x0->qs);
|
|
|
|
| 3788 |
{
|
| 3789 |
//printf("sve256");
|
| 3790 |
for (; ib + 1 < nb; ib += 2) {
|
| 3791 |
+
const block_q8_0 * GGML_RESTRICT x0 = &x[ib + 0];
|
| 3792 |
+
const block_q8_0 * GGML_RESTRICT x1 = &x[ib + 1];
|
| 3793 |
+
const block_q8_0 * GGML_RESTRICT y0 = &y[ib + 0];
|
| 3794 |
+
const block_q8_0 * GGML_RESTRICT y1 = &y[ib + 1];
|
| 3795 |
|
| 3796 |
// load x
|
| 3797 |
const svint8_t qx0 = svld1_s8(svptrue_b8(), x0->qs);
|
|
|
|
| 3824 |
svfloat32_t sumv00 = svdup_n_f32(0.0f);
|
| 3825 |
|
| 3826 |
for (; ib + 1 < nb; ib += 2) {
|
| 3827 |
+
const block_q8_0 * GGML_RESTRICT x0 = &x[ib + 0];
|
| 3828 |
+
const block_q8_0 * GGML_RESTRICT x1 = &x[ib + 1];
|
| 3829 |
+
const block_q8_0 * GGML_RESTRICT y0 = &y[ib + 0];
|
| 3830 |
+
const block_q8_0 * GGML_RESTRICT y1 = &y[ib + 1];
|
| 3831 |
|
| 3832 |
//load 32 int8_t in first half of vector and put another 32 int8_t in second vector lower bits
|
| 3833 |
// and add them to make one 64 element vector
|
|
|
|
| 3867 |
float32x4_t sumv1 = vdupq_n_f32(0.0f);
|
| 3868 |
|
| 3869 |
for (; ib + 1 < nb; ib += 2) {
|
| 3870 |
+
const block_q8_0 * GGML_RESTRICT x0 = &x[ib + 0];
|
| 3871 |
+
const block_q8_0 * GGML_RESTRICT x1 = &x[ib + 1];
|
| 3872 |
+
const block_q8_0 * GGML_RESTRICT y0 = &y[ib + 0];
|
| 3873 |
+
const block_q8_0 * GGML_RESTRICT y1 = &y[ib + 1];
|
| 3874 |
|
| 3875 |
const int8x16_t x0_0 = vld1q_s8(x0->qs);
|
| 3876 |
const int8x16_t x0_1 = vld1q_s8(x0->qs + 16);
|
|
|
|
| 3897 |
v128_t sumv = wasm_f32x4_splat(0.0f);
|
| 3898 |
|
| 3899 |
for (; ib < nb; ++ib) {
|
| 3900 |
+
const block_q8_0 * GGML_RESTRICT x0 = &x[ib];
|
| 3901 |
+
const block_q8_0 * GGML_RESTRICT y0 = &y[ib];
|
| 3902 |
|
| 3903 |
const v128_t x0_0 = wasm_v128_load(x0->qs);
|
| 3904 |
const v128_t x0_1 = wasm_v128_load(x0->qs + 16);
|
|
|
|
| 4080 |
*s = sumf;
|
| 4081 |
}
|
| 4082 |
|
| 4083 |
+
void ggml_vec_dot_tq1_0_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
| 4084 |
assert(nrc == 1);
|
| 4085 |
UNUSED(nrc);
|
| 4086 |
UNUSED(bx);
|
| 4087 |
UNUSED(by);
|
| 4088 |
UNUSED(bs);
|
| 4089 |
|
| 4090 |
+
const block_tq1_0 * GGML_RESTRICT x = vx;
|
| 4091 |
+
const block_q8_K * GGML_RESTRICT y = vy;
|
| 4092 |
|
| 4093 |
const int nb = n / QK_K;
|
| 4094 |
|
|
|
|
| 4403 |
#endif
|
| 4404 |
}
|
| 4405 |
|
| 4406 |
+
void ggml_vec_dot_tq2_0_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
| 4407 |
assert(nrc == 1);
|
| 4408 |
UNUSED(nrc);
|
| 4409 |
UNUSED(bx);
|
| 4410 |
UNUSED(by);
|
| 4411 |
UNUSED(bs);
|
| 4412 |
|
| 4413 |
+
const block_tq2_0 * GGML_RESTRICT x = vx;
|
| 4414 |
+
const block_q8_K * GGML_RESTRICT y = vy;
|
| 4415 |
|
| 4416 |
const int nb = n / QK_K;
|
| 4417 |
|
|
|
|
| 4575 |
#endif
|
| 4576 |
}
|
| 4577 |
|
| 4578 |
+
void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
| 4579 |
assert(nrc == 1);
|
| 4580 |
UNUSED(nrc);
|
| 4581 |
UNUSED(bx);
|
| 4582 |
UNUSED(by);
|
| 4583 |
UNUSED(bs);
|
| 4584 |
|
| 4585 |
+
const block_q2_K * GGML_RESTRICT x = vx;
|
| 4586 |
+
const block_q8_K * GGML_RESTRICT y = vy;
|
| 4587 |
|
| 4588 |
const int nb = n / QK_K;
|
| 4589 |
|
|
|
|
| 4603 |
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
|
| 4604 |
svfloat32_t dmin_broad = svdup_n_f32((float32_t)dmin);
|
| 4605 |
|
| 4606 |
+
const uint8_t * GGML_RESTRICT q2 = x[i].qs;
|
| 4607 |
+
const int8_t * GGML_RESTRICT q8_sv = y[i].qs;
|
| 4608 |
+
const uint8_t * GGML_RESTRICT sc = x[i].scales;
|
| 4609 |
|
| 4610 |
svuint32_t mins_and_scales_sve = svld1ub_u32(svptrue_b32(), sc);
|
| 4611 |
const svint32_t mins_sv_1 = svreinterpret_s32_u32(svlsr_n_u32_x(svptrue_b32(), mins_and_scales_sve, 4));
|
|
|
|
| 4748 |
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
|
| 4749 |
svfloat32_t dmin_broad = svdup_n_f32((float32_t)dmin);
|
| 4750 |
|
| 4751 |
+
const uint8_t * GGML_RESTRICT q2 = x[i].qs;
|
| 4752 |
+
const int8_t * GGML_RESTRICT q8_sv = y[i].qs;
|
| 4753 |
+
const uint8_t * GGML_RESTRICT sc = x[i].scales;
|
| 4754 |
|
| 4755 |
const svuint32_t mins_and_scales_sve = svld1ub_u32(svptrue_pat_b32(SV_VL8), sc); sc += 8;
|
| 4756 |
const svint32_t scales_sv = svreinterpret_s32_u32(svand_u32_m(svptrue_pat_b32(SV_VL8), mins_and_scales_sve, m4s));
|
|
|
|
| 4847 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 4848 |
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
|
| 4849 |
|
| 4850 |
+
const uint8_t * GGML_RESTRICT q2 = x[i].qs;
|
| 4851 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 4852 |
+
const uint8_t * GGML_RESTRICT sc = x[i].scales;
|
| 4853 |
|
| 4854 |
const uint8x16_t mins_and_scales = vld1q_u8(sc);
|
| 4855 |
const uint8x16_t scales = vandq_u8(mins_and_scales, m4);
|
|
|
|
| 4912 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 4913 |
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
|
| 4914 |
|
| 4915 |
+
const uint8_t * GGML_RESTRICT q2 = x[i].qs;
|
| 4916 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 4917 |
|
| 4918 |
const __m128i mins_and_scales = _mm_loadu_si128((const __m128i*)x[i].scales);
|
| 4919 |
const __m128i scales8 = _mm_and_si128(mins_and_scales, m4);
|
|
|
|
| 4979 |
const float dall = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 4980 |
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
|
| 4981 |
|
| 4982 |
+
const uint8_t * GGML_RESTRICT q2 = x[i].qs;
|
| 4983 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 4984 |
|
| 4985 |
// load mins and scales from block_q2_K.scales[QK_K/16]
|
| 4986 |
const __m128i mins_and_scales = _mm_loadu_si128((const __m128i*)x[i].scales);
|
|
|
|
| 5306 |
vector signed int vsumi6 = v0;
|
| 5307 |
vector signed int vsumi7 = v0;
|
| 5308 |
|
| 5309 |
+
const uint8_t * GGML_RESTRICT q2 = x[i].qs;
|
| 5310 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 5311 |
|
| 5312 |
for (int j = 0; j < QK_K/128; ++j) {
|
| 5313 |
__builtin_prefetch(q2, 0, 1);
|
|
|
|
| 5398 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 5399 |
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
|
| 5400 |
|
| 5401 |
+
const uint8_t * GGML_RESTRICT q2 = x[i].qs;
|
| 5402 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 5403 |
|
| 5404 |
const __m128i mins_and_scales128 = __lsx_vld((const __m128i*)x[i].scales, 0);
|
| 5405 |
const __m128i scales128 = __lsx_vandi_b(mins_and_scales128, 0xf);
|
|
|
|
| 5492 |
#endif
|
| 5493 |
}
|
| 5494 |
|
| 5495 |
+
void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
| 5496 |
assert(n % QK_K == 0);
|
| 5497 |
assert(nrc == 1);
|
| 5498 |
UNUSED(nrc);
|
|
|
|
| 5503 |
const uint32_t kmask1 = 0x03030303;
|
| 5504 |
const uint32_t kmask2 = 0x0f0f0f0f;
|
| 5505 |
|
| 5506 |
+
const block_q3_K * GGML_RESTRICT x = vx;
|
| 5507 |
+
const block_q8_K * GGML_RESTRICT y = vy;
|
| 5508 |
|
| 5509 |
const int nb = n / QK_K;
|
| 5510 |
|
|
|
|
| 5529 |
|
| 5530 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 5531 |
|
| 5532 |
+
const uint8_t * GGML_RESTRICT q3_sv = x[i].qs;
|
| 5533 |
+
const uint8_t * GGML_RESTRICT qh_sv = x[i].hmask;
|
| 5534 |
+
const int8_t * GGML_RESTRICT q8_sv = y[i].qs;
|
| 5535 |
|
| 5536 |
// Set up scales
|
| 5537 |
memcpy(aux, x[i].scales, 12);
|
|
|
|
| 5705 |
|
| 5706 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 5707 |
|
| 5708 |
+
const uint8_t * GGML_RESTRICT q3 = x[i].qs;
|
| 5709 |
+
const uint8_t * GGML_RESTRICT qh = x[i].hmask;
|
| 5710 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 5711 |
|
| 5712 |
ggml_uint8x16x2_t qhbits = ggml_vld1q_u8_x2(qh);
|
| 5713 |
|
|
|
|
| 5791 |
|
| 5792 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 5793 |
|
| 5794 |
+
const uint8_t * GGML_RESTRICT q3 = x[i].qs;
|
| 5795 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 5796 |
|
| 5797 |
// Set up scales
|
| 5798 |
memcpy(aux, x[i].scales, 12);
|
|
|
|
| 5896 |
|
| 5897 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 5898 |
|
| 5899 |
+
const uint8_t * GGML_RESTRICT q3 = x[i].qs;
|
| 5900 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 5901 |
|
| 5902 |
// Set up scales
|
| 5903 |
aux = (const uint32_t *)x[i].scales;
|
|
|
|
| 6030 |
|
| 6031 |
float sumf = 0;
|
| 6032 |
for (int i = 0; i < nb; ++i) {
|
| 6033 |
+
const uint8_t * GGML_RESTRICT q3 = x[i].qs;
|
| 6034 |
+
const uint8_t * GGML_RESTRICT hm = x[i].hmask;
|
| 6035 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 6036 |
|
| 6037 |
// Process blocks with SIMD
|
| 6038 |
int8_t * a = aux8;
|
|
|
|
| 6119 |
float sumf = 0;
|
| 6120 |
for (int i = 0; i < nb; ++i) {
|
| 6121 |
|
| 6122 |
+
const uint8_t * GGML_RESTRICT q3 = x[i].qs;
|
| 6123 |
+
const uint8_t * GGML_RESTRICT qh = x[i].hmask;
|
| 6124 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 6125 |
|
| 6126 |
memcpy(aux, x[i].scales, 12);
|
| 6127 |
utmp[3] = ((aux[1] >> 4) & kmask2) | (((aux[2] >> 6) & kmask1) << 4);
|
|
|
|
| 6261 |
vector signed int vsumi6 = v0;
|
| 6262 |
vector signed int vsumi7 = v0;
|
| 6263 |
|
| 6264 |
+
const uint8_t * GGML_RESTRICT q3 = x[i].qs;
|
| 6265 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 6266 |
|
| 6267 |
for (int j = 0; j < QK_K/128; ++j) {
|
| 6268 |
__builtin_prefetch(q3, 0, 1);
|
|
|
|
| 6375 |
for (int i = 0; i < nb; ++i) {
|
| 6376 |
|
| 6377 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 6378 |
+
const uint8_t * GGML_RESTRICT q3 = x[i].qs;
|
| 6379 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 6380 |
// Set up scales
|
| 6381 |
memcpy(aux, x[i].scales, 12);
|
| 6382 |
__m128i scales128 = lsx_set_w(
|
|
|
|
| 6461 |
|
| 6462 |
float sumf = 0;
|
| 6463 |
for (int i = 0; i < nb; ++i) {
|
| 6464 |
+
const uint8_t * GGML_RESTRICT q3 = x[i].qs;
|
| 6465 |
+
const uint8_t * GGML_RESTRICT hm = x[i].hmask;
|
| 6466 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 6467 |
memset(aux32, 0, 8*sizeof(int32_t));
|
| 6468 |
+
int8_t * GGML_RESTRICT a = aux8;
|
| 6469 |
uint8_t m = 1;
|
| 6470 |
for (int j = 0; j < QK_K; j += 128) {
|
| 6471 |
for (int l = 0; l < 32; ++l) a[l] = q3[l] & 3;
|
|
|
|
| 6508 |
|
| 6509 |
}
|
| 6510 |
|
| 6511 |
+
void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
| 6512 |
assert(n % QK_K == 0);
|
| 6513 |
assert(nrc == 1);
|
| 6514 |
UNUSED(nrc);
|
|
|
|
| 6516 |
UNUSED(by);
|
| 6517 |
UNUSED(bs);
|
| 6518 |
|
| 6519 |
+
const block_q4_K * GGML_RESTRICT x = vx;
|
| 6520 |
+
const block_q8_K * GGML_RESTRICT y = vy;
|
| 6521 |
|
| 6522 |
const int nb = n / QK_K;
|
| 6523 |
|
|
|
|
| 6552 |
|
| 6553 |
const uint8_t * scales = (const uint8_t *)utmp;
|
| 6554 |
|
| 6555 |
+
const uint8_t * GGML_RESTRICT q4 = x[i].qs;
|
| 6556 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 6557 |
|
| 6558 |
const int vector_length = ggml_cpu_get_sve_cnt()*8;
|
| 6559 |
const svuint8_t m4b = svdup_n_u8(0xf);
|
|
|
|
| 6640 |
|
| 6641 |
const uint8_t * scales = (const uint8_t *)utmp;
|
| 6642 |
|
| 6643 |
+
const uint8_t * GGML_RESTRICT q4 = x[i].qs;
|
| 6644 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 6645 |
|
| 6646 |
int32_t sumi1 = 0;
|
| 6647 |
int32_t sumi2 = 0;
|
|
|
|
| 6679 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 6680 |
const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin); // Corrected sign
|
| 6681 |
|
| 6682 |
+
const uint8_t * GGML_RESTRICT q4 = x[i].qs;
|
| 6683 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 6684 |
|
| 6685 |
// Process scales and mins
|
| 6686 |
memcpy(utmp, x[i].scales, 12);
|
|
|
|
| 6692 |
|
| 6693 |
// Sum mins * q8sums
|
| 6694 |
int32_t sumi = 0;
|
| 6695 |
+
const int16_t * GGML_RESTRICT q8sums = y[i].bsums;
|
| 6696 |
const uint8_t * m = (const uint8_t *)&utmp[2];
|
| 6697 |
for (int j = 0; j < 16; j += 2) {
|
| 6698 |
sumi += (q8sums[j] + q8sums[j+1]) * m[j/2];
|
|
|
|
| 6791 |
utmp[2] = uaux;
|
| 6792 |
utmp[0] &= kmask1;
|
| 6793 |
|
| 6794 |
+
const uint8_t * GGML_RESTRICT q4 = x[i].qs;
|
| 6795 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 6796 |
|
| 6797 |
const __m256i mins_and_scales = _mm256_cvtepu8_epi16(_mm_set_epi32(utmp[3], utmp[2], utmp[1], utmp[0]));
|
| 6798 |
|
|
|
|
| 6850 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 6851 |
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
|
| 6852 |
|
| 6853 |
+
const uint8_t * GGML_RESTRICT q4 = x[i].qs;
|
| 6854 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 6855 |
|
| 6856 |
memcpy(utmp, x[i].scales, 12);
|
| 6857 |
utmp[3] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4);
|
|
|
|
| 6951 |
vint32m1_t sumi = __riscv_vredsum_vs_i32m1_i32m1(prod, __riscv_vmv_v_x_i32m1(0, 1), vl);
|
| 6952 |
sumf -= dmin * __riscv_vmv_x_s_i32m1_i32(sumi);
|
| 6953 |
|
| 6954 |
+
const uint8_t * GGML_RESTRICT q4 = x[i].qs;
|
| 6955 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 6956 |
|
| 6957 |
vl = 32;
|
| 6958 |
|
|
|
|
| 7053 |
vector signed int vsumi2 = v0;
|
| 7054 |
vector signed int vsumi3 = v0;
|
| 7055 |
|
| 7056 |
+
const uint8_t * GGML_RESTRICT q4 = x[i].qs;
|
| 7057 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 7058 |
|
| 7059 |
for (int j = 0; j < QK_K/64; j+=2) {
|
| 7060 |
__builtin_prefetch(q4, 0, 1);
|
|
|
|
| 7145 |
utmp[2] = uaux;
|
| 7146 |
utmp[0] &= kmask1;
|
| 7147 |
|
| 7148 |
+
const uint8_t * GGML_RESTRICT q4 = x[i].qs;
|
| 7149 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 7150 |
|
| 7151 |
const __m128i mins_and_scales128 = lsx_set_w(utmp[3], utmp[2], utmp[1], utmp[0]);
|
| 7152 |
const __m128i mins128 = __lsx_vexth_h_b(mins_and_scales128);
|
|
|
|
| 7228 |
sumf -= dmin * (v_mins[0] + v_mins[1] + v_mins[2] + v_mins[3]);
|
| 7229 |
|
| 7230 |
const uint8_t * scales = (const uint8_t *)utmp;
|
| 7231 |
+
const uint8_t * GGML_RESTRICT x0 = x[i].qs;
|
| 7232 |
+
const int8_t * GGML_RESTRICT y0 = y[i].qs;
|
| 7233 |
|
| 7234 |
int32_t sumi1 = 0;
|
| 7235 |
int32_t sumi2 = 0;
|
|
|
|
| 7277 |
|
| 7278 |
float sumf = 0;
|
| 7279 |
for (int i = 0; i < nb; ++i) {
|
| 7280 |
+
const uint8_t * GGML_RESTRICT q4 = x[i].qs;
|
| 7281 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 7282 |
memset(aux32, 0, 8*sizeof(int32_t));
|
| 7283 |
+
int8_t * GGML_RESTRICT a = aux8;
|
| 7284 |
for (int j = 0; j < QK_K/64; ++j) {
|
| 7285 |
for (int l = 0; l < 32; ++l) a[l] = (int8_t)(q4[l] & 0xF);
|
| 7286 |
a += 32;
|
|
|
|
| 7323 |
#endif
|
| 7324 |
}
|
| 7325 |
|
| 7326 |
+
void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
| 7327 |
assert(n % QK_K == 0);
|
| 7328 |
assert(nrc == 1);
|
| 7329 |
UNUSED(nrc);
|
|
|
|
| 7331 |
UNUSED(by);
|
| 7332 |
UNUSED(bs);
|
| 7333 |
|
| 7334 |
+
const block_q5_K * GGML_RESTRICT x = vx;
|
| 7335 |
+
const block_q8_K * GGML_RESTRICT y = vy;
|
| 7336 |
|
| 7337 |
const int nb = n / QK_K;
|
| 7338 |
|
|
|
|
| 7374 |
|
| 7375 |
const uint8_t * scales = (const uint8_t *)utmp;
|
| 7376 |
|
| 7377 |
+
const uint8_t * GGML_RESTRICT q5 = x[i].qs;
|
| 7378 |
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
| 7379 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 7380 |
|
| 7381 |
ggml_uint8x16x2_t qhbits = ggml_vld1q_u8_x2(qh);
|
| 7382 |
|
|
|
|
| 7421 |
float summs = 0.f;
|
| 7422 |
|
| 7423 |
for (int i = 0; i < nb; ++i) {
|
| 7424 |
+
const uint8_t * GGML_RESTRICT q5 = x[i].qs;
|
| 7425 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 7426 |
|
| 7427 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 7428 |
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
|
|
|
|
| 7505 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 7506 |
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
|
| 7507 |
|
| 7508 |
+
const uint8_t * GGML_RESTRICT q5 = x[i].qs;
|
| 7509 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 7510 |
|
| 7511 |
memcpy(utmp, x[i].scales, 12);
|
| 7512 |
utmp[3] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4);
|
|
|
|
| 7597 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 7598 |
const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin); // Fixed sign
|
| 7599 |
|
| 7600 |
+
const uint8_t * GGML_RESTRICT q5 = x[i].qs;
|
| 7601 |
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
| 7602 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 7603 |
|
| 7604 |
// Process scales and mins
|
| 7605 |
memcpy(utmp, x[i].scales, 12);
|
|
|
|
| 7611 |
|
| 7612 |
// Sum mins * q8sums
|
| 7613 |
int32_t sumi_mins = 0;
|
| 7614 |
+
const int16_t * GGML_RESTRICT q8sums = y[i].bsums;
|
| 7615 |
const uint8_t * m = (const uint8_t *)&utmp[2];
|
| 7616 |
for (int j = 0; j < 16; j += 2) {
|
| 7617 |
sumi_mins += (q8sums[j] + q8sums[j+1]) * m[j/2];
|
|
|
|
| 7715 |
|
| 7716 |
vl = 8;
|
| 7717 |
|
| 7718 |
+
const uint8_t * GGML_RESTRICT q5 = x[i].qs;
|
| 7719 |
+
const uint8_t * GGML_RESTRICT hm = x[i].qh;
|
| 7720 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 7721 |
|
| 7722 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 7723 |
const float dmin = GGML_FP16_TO_FP32(x[i].dmin) * y[i].d;
|
|
|
|
| 7856 |
vector signed int vsumi2 = v0;
|
| 7857 |
vector signed int vsumi3 = v0;
|
| 7858 |
|
| 7859 |
+
const uint8_t * GGML_RESTRICT q5 = x[i].qs;
|
| 7860 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 7861 |
|
| 7862 |
for (int j = 0; j < QK_K/64; ++j) {
|
| 7863 |
__builtin_prefetch(q5, 0, 1);
|
|
|
|
| 7929 |
|
| 7930 |
for (int i = 0; i < nb; ++i) {
|
| 7931 |
|
| 7932 |
+
const uint8_t * GGML_RESTRICT q5 = x[i].qs;
|
| 7933 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 7934 |
|
| 7935 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 7936 |
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
|
|
|
|
| 8039 |
const int32_t mins = v_mins[0] + v_mins[1] + v_mins[2] + v_mins[3];
|
| 8040 |
|
| 8041 |
const uint8_t * scales = (const uint8_t *)utmp;
|
| 8042 |
+
const uint8_t * GGML_RESTRICT x0l = x[i].qs;
|
| 8043 |
+
const uint8_t * GGML_RESTRICT x0h = x[i].qh;
|
| 8044 |
+
const int8_t * GGML_RESTRICT y0 = y[i].qs;
|
| 8045 |
|
| 8046 |
v_xh[0] = vec_xl(0 , x0h);
|
| 8047 |
v_xh[1] = vec_xl(16, x0h);
|
|
|
|
| 8094 |
|
| 8095 |
float sumf = 0;
|
| 8096 |
for (int i = 0; i < nb; ++i) {
|
| 8097 |
+
const uint8_t * GGML_RESTRICT q4 = x[i].qs;
|
| 8098 |
+
const uint8_t * GGML_RESTRICT hm = x[i].qh;
|
| 8099 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 8100 |
memset(aux32, 0, 8*sizeof(int32_t));
|
| 8101 |
+
int8_t * GGML_RESTRICT a = aux8;
|
| 8102 |
uint8_t m = 1;
|
| 8103 |
for (int j = 0; j < QK_K/64; ++j) {
|
| 8104 |
for (int l = 0; l < 32; ++l) a[l] = (int8_t)(q4[l] & 0xF);
|
|
|
|
| 8145 |
#endif
|
| 8146 |
}
|
| 8147 |
|
| 8148 |
+
void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
| 8149 |
assert(n % QK_K == 0);
|
| 8150 |
assert(nrc == 1);
|
| 8151 |
UNUSED(nrc);
|
|
|
|
| 8153 |
UNUSED(by);
|
| 8154 |
UNUSED(bs);
|
| 8155 |
|
| 8156 |
+
const block_q6_K * GGML_RESTRICT x = vx;
|
| 8157 |
+
const block_q8_K * GGML_RESTRICT y = vy;
|
| 8158 |
|
| 8159 |
const int nb = n / QK_K;
|
| 8160 |
|
|
|
|
| 8174 |
|
| 8175 |
const float d_all = GGML_FP16_TO_FP32(x[i].d);
|
| 8176 |
|
| 8177 |
+
const uint8_t * GGML_RESTRICT q6 = x[i].ql;
|
| 8178 |
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
| 8179 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 8180 |
|
| 8181 |
+
const int8_t * GGML_RESTRICT scale = x[i].scales;
|
| 8182 |
|
| 8183 |
const ggml_int16x8x2_t q8sums = ggml_vld1q_s16_x2(y[i].bsums);
|
| 8184 |
const int8x16_t scales = vld1q_s8(scale);
|
|
|
|
| 8265 |
|
| 8266 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 8267 |
|
| 8268 |
+
const uint8_t * GGML_RESTRICT q4 = x[i].ql;
|
| 8269 |
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
| 8270 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 8271 |
|
| 8272 |
const __m128i scales = _mm_loadu_si128((const __m128i*)x[i].scales);
|
| 8273 |
|
|
|
|
| 8343 |
|
| 8344 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 8345 |
|
| 8346 |
+
const uint8_t * GGML_RESTRICT q4 = x[i].ql;
|
| 8347 |
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
| 8348 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 8349 |
|
| 8350 |
// handle the q6_k -32 offset separately using bsums
|
| 8351 |
const __m128i q8sums_0 = _mm_loadu_si128((const __m128i*)y[i].bsums);
|
|
|
|
| 8444 |
|
| 8445 |
for (int i = 0; i < nb; ++i) {
|
| 8446 |
// Unpack 6-bit quantized data into aux8 (unchanged)
|
| 8447 |
+
const uint8_t * GGML_RESTRICT q4 = x[i].ql;
|
| 8448 |
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
| 8449 |
int8_t * a = aux8;
|
| 8450 |
for (int j = 0; j < QK_K; j += 128) {
|
| 8451 |
for (int l = 0; l < 32; ++l) {
|
|
|
|
| 8459 |
qh += 32;
|
| 8460 |
}
|
| 8461 |
|
| 8462 |
+
const int8_t * GGML_RESTRICT a_ptr = aux8;
|
| 8463 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 8464 |
v128_t acc0 = wasm_i32x4_splat(0);
|
| 8465 |
v128_t acc1 = wasm_i32x4_splat(0);
|
| 8466 |
|
|
|
|
| 8523 |
|
| 8524 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 8525 |
|
| 8526 |
+
const uint8_t * GGML_RESTRICT q6 = x[i].ql;
|
| 8527 |
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
| 8528 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 8529 |
|
| 8530 |
+
const int8_t * GGML_RESTRICT scale = x[i].scales;
|
| 8531 |
|
| 8532 |
size_t vl;
|
| 8533 |
|
|
|
|
| 8629 |
vector signed int vsumi6 = v0;
|
| 8630 |
vector signed int vsumi7 = v0;
|
| 8631 |
|
| 8632 |
+
const uint8_t * GGML_RESTRICT q6 = x[i].ql;
|
| 8633 |
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
| 8634 |
+
const int8_t * GGML_RESTRICT qs = x[i].scales;
|
| 8635 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 8636 |
|
| 8637 |
for (int j = 0; j < QK_K/128; ++j) {
|
| 8638 |
__builtin_prefetch(q6, 0, 0);
|
|
|
|
| 8748 |
|
| 8749 |
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
| 8750 |
|
| 8751 |
+
const uint8_t * GGML_RESTRICT q4 = x[i].ql;
|
| 8752 |
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
| 8753 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 8754 |
|
| 8755 |
const __m128i scales128 = __lsx_vld((const __m128i*)x[i].scales, 0);
|
| 8756 |
const v16i8 shuffle_mask = {0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15};
|
|
|
|
| 8816 |
for (int i = 0; i < nb; ++i) {
|
| 8817 |
const float d_all = GGML_FP16_TO_FP32(x[i].d);
|
| 8818 |
|
| 8819 |
+
const uint8_t * GGML_RESTRICT x0l = x[i].ql;
|
| 8820 |
+
const uint8_t * GGML_RESTRICT x0h = x[i].qh;
|
| 8821 |
+
const int8_t * GGML_RESTRICT y0 = y[i].qs;
|
| 8822 |
|
| 8823 |
+
const int8_t * GGML_RESTRICT scale = x[i].scales;
|
| 8824 |
|
| 8825 |
const int16x8_t v_ysumsl = vec_xl(0 , y[i].bsums);
|
| 8826 |
const int16x8_t v_ysumsh = vec_xl(16, y[i].bsums);
|
|
|
|
| 8931 |
|
| 8932 |
float sumf = 0;
|
| 8933 |
for (int i = 0; i < nb; ++i) {
|
| 8934 |
+
const uint8_t * GGML_RESTRICT q4 = x[i].ql;
|
| 8935 |
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
| 8936 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 8937 |
memset(aux32, 0, 8*sizeof(int32_t));
|
| 8938 |
+
int8_t * GGML_RESTRICT a = aux8;
|
| 8939 |
for (int j = 0; j < QK_K; j += 128) {
|
| 8940 |
for (int l = 0; l < 32; ++l) {
|
| 8941 |
a[l + 0] = (int8_t)((q4[l + 0] & 0xF) | (((qh[l] >> 0) & 3) << 4)) - 32;
|
|
|
|
| 9003 |
};
|
| 9004 |
#endif
|
| 9005 |
|
| 9006 |
+
void ggml_vec_dot_iq2_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
| 9007 |
assert(n % QK_K == 0);
|
| 9008 |
assert(nrc == 1);
|
| 9009 |
UNUSED(nrc);
|
|
|
|
| 9011 |
UNUSED(by);
|
| 9012 |
UNUSED(bs);
|
| 9013 |
|
| 9014 |
+
const block_iq2_xxs * GGML_RESTRICT x = vx;
|
| 9015 |
+
const block_q8_K * GGML_RESTRICT y = vy;
|
| 9016 |
|
| 9017 |
const int nb = n / QK_K;
|
| 9018 |
|
|
|
|
| 9030 |
float sumf = 0;
|
| 9031 |
for (int i = 0; i < nb; ++i) {
|
| 9032 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9033 |
+
const uint16_t * GGML_RESTRICT q2 = x[i].qs;
|
| 9034 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 9035 |
float sumf1 = 0, sumf2 = 0;
|
| 9036 |
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
| 9037 |
q8b = ggml_vld1q_s8_x4(q8); q8 += 64;
|
|
|
|
| 9067 |
__m256 accumf = _mm256_setzero_ps();
|
| 9068 |
for (int i = 0; i < nb; ++i) {
|
| 9069 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9070 |
+
const uint16_t * GGML_RESTRICT q2 = x[i].qs;
|
| 9071 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 9072 |
__m256i sumi1 = _mm256_setzero_si256();
|
| 9073 |
__m256i sumi2 = _mm256_setzero_si256();
|
| 9074 |
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
|
|
|
| 9108 |
__m256 accumf = _mm256_setzero_ps();
|
| 9109 |
for (int i = 0; i < nb; ++i) {
|
| 9110 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9111 |
+
const uint16_t * GGML_RESTRICT q2 = x[i].qs;
|
| 9112 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 9113 |
__m128i sumi1_0 = _mm_setzero_si128();
|
| 9114 |
__m128i sumi1_1 = _mm_setzero_si128();
|
| 9115 |
__m128i sumi2_0 = _mm_setzero_si128();
|
|
|
|
| 9173 |
vector signed int vsumi2 = v0;
|
| 9174 |
vector signed int vsumi3 = v0;
|
| 9175 |
|
| 9176 |
+
const uint16_t * GGML_RESTRICT q2 = x[i].qs;
|
| 9177 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 9178 |
|
| 9179 |
for (int j = 0; j < QK_K/32; j += 2) {
|
| 9180 |
__builtin_prefetch(q2, 0, 1);
|
|
|
|
| 9250 |
__m256 accumf = (__m256)__lasx_xvldi(0);
|
| 9251 |
for (int i = 0; i < nb; ++i) {
|
| 9252 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9253 |
+
const uint16_t * GGML_RESTRICT q2 = x[i].qs;
|
| 9254 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 9255 |
__m256i sumi1 = __lasx_xvldi(0);
|
| 9256 |
__m256i sumi2 = __lasx_xvldi(0);
|
| 9257 |
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
|
|
|
| 9291 |
//
|
| 9292 |
// for (int i = 0; i < nb; ++i) {
|
| 9293 |
// const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9294 |
+
// const uint16_t * GGML_RESTRICT q2 = x[i].qs;
|
| 9295 |
+
// const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 9296 |
//
|
| 9297 |
// float sumf1 = 0, sumf2 = 0;
|
| 9298 |
//
|
|
|
|
| 9340 |
float sumf = 0.f;
|
| 9341 |
for (int i = 0; i < nb; ++i) {
|
| 9342 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9343 |
+
const uint16_t * GGML_RESTRICT q2 = x[i].qs;
|
| 9344 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 9345 |
int32_t bsum = 0;
|
| 9346 |
for (int ib32 = 0; ib32 < QK_K/32; ++ib32) {
|
| 9347 |
memcpy(aux32, q2, 2*sizeof(uint32_t));
|
|
|
|
| 9364 |
#endif
|
| 9365 |
}
|
| 9366 |
|
| 9367 |
+
void ggml_vec_dot_iq2_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
| 9368 |
assert(n % QK_K == 0);
|
| 9369 |
assert(nrc == 1);
|
| 9370 |
UNUSED(nrc);
|
|
|
|
| 9372 |
UNUSED(by);
|
| 9373 |
UNUSED(bs);
|
| 9374 |
|
| 9375 |
+
const block_iq2_xs * GGML_RESTRICT x = vx;
|
| 9376 |
+
const block_q8_K * GGML_RESTRICT y = vy;
|
| 9377 |
|
| 9378 |
const int nb = n / QK_K;
|
| 9379 |
|
|
|
|
| 9390 |
float sumf = 0;
|
| 9391 |
for (int i = 0; i < nb; ++i) {
|
| 9392 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9393 |
+
const uint16_t * GGML_RESTRICT q2 = x[i].qs;
|
| 9394 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 9395 |
const uint8x8_t scales8 = vld1_u8(x[i].scales);
|
| 9396 |
const uint8x8_t scales_l = vand_u8(scales8, vdup_n_u8(0xf));
|
| 9397 |
const uint8x8_t scales_h = vshr_n_u8(scales8, 4);
|
|
|
|
| 9468 |
__m256 accumf = _mm256_setzero_ps();
|
| 9469 |
for (int i = 0; i < nb; ++i) {
|
| 9470 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9471 |
+
const uint16_t * GGML_RESTRICT q2 = x[i].qs;
|
| 9472 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 9473 |
|
| 9474 |
memcpy(&aux64, x[i].scales, 8);
|
| 9475 |
__m128i stmp = _mm_set1_epi64x(aux64);
|
|
|
|
| 9589 |
__m256 accumf = _mm256_setzero_ps();
|
| 9590 |
for (int i = 0; i < nb; ++i) {
|
| 9591 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9592 |
+
const uint16_t * GGML_RESTRICT q2 = x[i].qs;
|
| 9593 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 9594 |
|
| 9595 |
memcpy(&aux64, x[i].scales, 8);
|
| 9596 |
__m128i stmp = _mm_set1_epi64x(aux64);
|
|
|
|
| 9744 |
__m256 accumf = (__m256)__lasx_xvldi(0);
|
| 9745 |
for (int i = 0; i < nb; ++i) {
|
| 9746 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9747 |
+
const uint16_t * GGML_RESTRICT q2 = x[i].qs;
|
| 9748 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 9749 |
|
| 9750 |
memcpy(&aux64, x[i].scales, 8);
|
| 9751 |
__m128i stmp = __lsx_vreplgr2vr_d(aux64);
|
|
|
|
| 9842 |
vector signed int vsumi2 = v0;
|
| 9843 |
vector signed int vsumi3 = v0;
|
| 9844 |
|
| 9845 |
+
const uint16_t * GGML_RESTRICT q2 = x[i].qs;
|
| 9846 |
+
const uint8_t * GGML_RESTRICT sc = x[i].scales;
|
| 9847 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 9848 |
|
| 9849 |
for (int j = 0; j < QK_K/64; ++j) {
|
| 9850 |
__builtin_prefetch(q2, 0, 1);
|
|
|
|
| 9914 |
float sumf = 0.f;
|
| 9915 |
for (int i = 0; i < nb; ++i) {
|
| 9916 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9917 |
+
const uint16_t * GGML_RESTRICT q2 = x[i].qs;
|
| 9918 |
+
const uint8_t * GGML_RESTRICT sc = x[i].scales;
|
| 9919 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 9920 |
int32_t bsum = 0;
|
| 9921 |
for (int ib32 = 0; ib32 < QK_K/32; ++ib32) {
|
| 9922 |
const uint16_t ls1 = 2*(sc[ib32] & 0xf) + 1;
|
|
|
|
| 9949 |
#endif
|
| 9950 |
}
|
| 9951 |
|
| 9952 |
+
void ggml_vec_dot_iq2_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
| 9953 |
assert(n % QK_K == 0);
|
| 9954 |
assert(nrc == 1);
|
| 9955 |
UNUSED(nrc);
|
|
|
|
| 9957 |
UNUSED(by);
|
| 9958 |
UNUSED(bs);
|
| 9959 |
|
| 9960 |
+
const block_iq2_s * GGML_RESTRICT x = vx;
|
| 9961 |
+
const block_q8_K * GGML_RESTRICT y = vy;
|
| 9962 |
|
| 9963 |
const int nb = n / QK_K;
|
| 9964 |
|
|
|
|
| 9984 |
|
| 9985 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 9986 |
|
| 9987 |
+
const uint8_t * GGML_RESTRICT qs = x[i].qs;
|
| 9988 |
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
| 9989 |
+
const uint16_t * GGML_RESTRICT signs = (const uint16_t *)(x[i].qs + QK_K/8);
|
| 9990 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 9991 |
|
| 9992 |
int sumi1 = 0, sumi2 = 0;
|
| 9993 |
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
|
|
|
| 10058 |
__m256 accumf = _mm256_setzero_ps();
|
| 10059 |
for (int i = 0; i < nb; ++i) {
|
| 10060 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 10061 |
+
const uint8_t * GGML_RESTRICT qs = x[i].qs;
|
| 10062 |
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
| 10063 |
+
const uint16_t * GGML_RESTRICT signs = (const uint16_t *)(x[i].qs + QK_K/8);
|
| 10064 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 10065 |
|
| 10066 |
memcpy(&aux64, x[i].scales, 8);
|
| 10067 |
const __m128i scales8 = _mm_add_epi8(_mm_slli_epi16(_mm_and_si128(_mm_set_epi64x(aux64 >> 4, aux64), m4), 1), m1);
|
|
|
|
| 10131 |
__m256 accumf = _mm256_setzero_ps();
|
| 10132 |
for (int i = 0; i < nb; ++i) {
|
| 10133 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 10134 |
+
const uint8_t * GGML_RESTRICT qs = x[i].qs;
|
| 10135 |
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
| 10136 |
+
const uint16_t * GGML_RESTRICT signs = (const uint16_t *)(x[i].qs + QK_K/8);
|
| 10137 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 10138 |
|
| 10139 |
memcpy(&aux64, x[i].scales, 8);
|
| 10140 |
const __m128i scales8 = _mm_add_epi8(_mm_slli_epi16(_mm_and_si128(_mm_set_epi64x(aux64 >> 4, aux64), m4), 1), m1);
|
|
|
|
| 10229 |
vector signed int vsumi2 = v0;
|
| 10230 |
vector signed int vsumi3 = v0;
|
| 10231 |
|
| 10232 |
+
const uint8_t * GGML_RESTRICT q2 = x[i].qs;
|
| 10233 |
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
| 10234 |
+
const uint16_t * GGML_RESTRICT signs = (const uint16_t *)(x[i].qs + QK_K/8);
|
| 10235 |
+
const uint8_t * GGML_RESTRICT sc = x[i].scales;
|
| 10236 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 10237 |
|
| 10238 |
for (int j = 0; j < QK_K/32; j += 2) {
|
| 10239 |
__builtin_prefetch(q2, 0, 1);
|
|
|
|
| 10330 |
__m256 accumf = (__m256)__lasx_xvldi(0);
|
| 10331 |
for (int i = 0; i < nb; ++i) {
|
| 10332 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 10333 |
+
const uint8_t * GGML_RESTRICT qs = x[i].qs;
|
| 10334 |
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
| 10335 |
+
const uint16_t * GGML_RESTRICT signs = (const uint16_t *)(x[i].qs + QK_K/8);
|
| 10336 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 10337 |
|
| 10338 |
__m128i tmp1;
|
| 10339 |
memcpy(&aux64, x[i].scales, 8);
|
|
|
|
| 10427 |
|
| 10428 |
}
|
| 10429 |
|
| 10430 |
+
void ggml_vec_dot_iq3_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
| 10431 |
assert(n % QK_K == 0);
|
| 10432 |
assert(nrc == 1);
|
| 10433 |
UNUSED(nrc);
|
|
|
|
| 10435 |
UNUSED(by);
|
| 10436 |
UNUSED(bs);
|
| 10437 |
|
| 10438 |
+
const block_iq3_xxs * GGML_RESTRICT x = vx;
|
| 10439 |
+
const block_q8_K * GGML_RESTRICT y = vy;
|
| 10440 |
|
| 10441 |
const int nb = n / QK_K;
|
| 10442 |
|
|
|
|
| 10452 |
float sumf = 0;
|
| 10453 |
for (int i = 0; i < nb; ++i) {
|
| 10454 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 10455 |
+
const uint8_t * GGML_RESTRICT q3 = x[i].qs;
|
| 10456 |
+
const uint8_t * GGML_RESTRICT gas = x[i].qs + QK_K/4;
|
| 10457 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 10458 |
float sumf1 = 0, sumf2 = 0;
|
| 10459 |
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
| 10460 |
q8b = ggml_vld1q_s8_x4(q8); q8 += 64;
|
|
|
|
| 10490 |
__m256 accumf = _mm256_setzero_ps();
|
| 10491 |
for (int i = 0; i < nb; ++i) {
|
| 10492 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 10493 |
+
const uint8_t * GGML_RESTRICT q3 = x[i].qs;
|
| 10494 |
+
const uint8_t * GGML_RESTRICT gas = x[i].qs + QK_K/4;
|
| 10495 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 10496 |
__m256i sumi1 = _mm256_setzero_si256();
|
| 10497 |
__m256i sumi2 = _mm256_setzero_si256();
|
| 10498 |
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
|
|
|
| 10535 |
__m256 accumf = _mm256_setzero_ps();
|
| 10536 |
for (int i = 0; i < nb; ++i) {
|
| 10537 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 10538 |
+
const uint8_t * GGML_RESTRICT q3 = x[i].qs;
|
| 10539 |
+
const uint8_t * GGML_RESTRICT gas = x[i].qs + QK_K/4;
|
| 10540 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 10541 |
__m128i sumi1_0 = _mm_setzero_si128();
|
| 10542 |
__m128i sumi1_1 = _mm_setzero_si128();
|
| 10543 |
__m128i sumi2_0 = _mm_setzero_si128();
|
|
|
|
| 10604 |
vector signed int vsumi2 = v0;
|
| 10605 |
vector signed int vsumi3 = v0;
|
| 10606 |
|
| 10607 |
+
const uint8_t * GGML_RESTRICT q3 = x[i].qs;
|
| 10608 |
+
const uint32_t * GGML_RESTRICT signs = (const uint32_t *)(x[i].qs + QK_K/4);
|
| 10609 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 10610 |
|
| 10611 |
#pragma GCC unroll 1
|
| 10612 |
for (int j = 0; j < QK_K/32; j += 2) {
|
|
|
|
| 10678 |
__m256 accumf = (__m256)__lasx_xvldi(0);
|
| 10679 |
for (int i = 0; i < nb; ++i) {
|
| 10680 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 10681 |
+
const uint8_t * GGML_RESTRICT q3 = x[i].qs;
|
| 10682 |
+
const uint8_t * GGML_RESTRICT gas = x[i].qs + QK_K/4;
|
| 10683 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 10684 |
__m256i sumi1 = __lasx_xvldi(0);
|
| 10685 |
__m256i sumi2 = __lasx_xvldi(0);
|
| 10686 |
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
|
|
|
| 10723 |
float sumf = 0.f;
|
| 10724 |
for (int i = 0; i < nb; ++i) {
|
| 10725 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 10726 |
+
const uint8_t * GGML_RESTRICT q3 = x[i].qs;
|
| 10727 |
+
const uint8_t * GGML_RESTRICT gas = x[i].qs + QK_K/4;
|
| 10728 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 10729 |
int32_t bsum = 0;
|
| 10730 |
for (int ib32 = 0; ib32 < QK_K/32; ++ib32) {
|
| 10731 |
memcpy(&aux32, gas, sizeof(uint32_t)); gas += sizeof(uint32_t);
|
|
|
|
| 10750 |
#endif
|
| 10751 |
}
|
| 10752 |
|
| 10753 |
+
void ggml_vec_dot_iq3_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
| 10754 |
assert(n % QK_K == 0);
|
| 10755 |
assert(nrc == 1);
|
| 10756 |
UNUSED(nrc);
|
|
|
|
| 10758 |
UNUSED(by);
|
| 10759 |
UNUSED(bs);
|
| 10760 |
|
| 10761 |
+
const block_iq3_s * GGML_RESTRICT x = vx;
|
| 10762 |
+
const block_q8_K * GGML_RESTRICT y = vy;
|
| 10763 |
|
| 10764 |
const int nb = n / QK_K;
|
| 10765 |
|
|
|
|
| 10796 |
float sumf = 0;
|
| 10797 |
for (int i = 0; i < nb; ++i) {
|
| 10798 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 10799 |
+
const uint8_t * GGML_RESTRICT qs = x[i].qs;
|
| 10800 |
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
| 10801 |
+
const uint16_t * GGML_RESTRICT signs = (const uint16_t *)x[i].signs;
|
| 10802 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 10803 |
|
| 10804 |
memcpy(scales32, x[i].scales, 4);
|
| 10805 |
scales32[1] = (((scales32[0] >> 4) & 0x0f0f0f0f) << 1) | 0x01010101;
|
|
|
|
| 10878 |
__m256 accumf = _mm256_setzero_ps();
|
| 10879 |
for (int i = 0; i < nb; ++i) {
|
| 10880 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 10881 |
+
const uint8_t * GGML_RESTRICT qs = x[i].qs;
|
| 10882 |
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
| 10883 |
+
const uint16_t * GGML_RESTRICT signs = (const uint16_t *)x[i].signs;
|
| 10884 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 10885 |
__m256i sumi1 = _mm256_setzero_si256();
|
| 10886 |
__m256i sumi2 = _mm256_setzero_si256();
|
| 10887 |
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
|
|
|
| 10963 |
__m256 accumf = _mm256_setzero_ps();
|
| 10964 |
for (int i = 0; i < nb; ++i) {
|
| 10965 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 10966 |
+
const uint8_t * GGML_RESTRICT qs = x[i].qs;
|
| 10967 |
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
| 10968 |
+
const uint16_t * GGML_RESTRICT signs = (const uint16_t *)x[i].signs;
|
| 10969 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 10970 |
__m128i sumi1_0 = _mm_setzero_si128();
|
| 10971 |
__m128i sumi1_1 = _mm_setzero_si128();
|
| 10972 |
__m128i sumi2_0 = _mm_setzero_si128();
|
|
|
|
| 11064 |
vector float vyd = vec_splats(y[i].d);
|
| 11065 |
vector float vd = vec_mul(vxd, vyd);
|
| 11066 |
|
| 11067 |
+
const uint8_t * GGML_RESTRICT q3 = x[i].qs;
|
| 11068 |
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
| 11069 |
+
const uint16_t * GGML_RESTRICT signs = (const uint16_t *)(x[i].signs);
|
| 11070 |
+
const uint8_t * GGML_RESTRICT sc = x[i].scales;
|
| 11071 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 11072 |
|
| 11073 |
vector signed int vsumi0 = v0;
|
| 11074 |
vector signed int vsumi1 = v0;
|
|
|
|
| 11175 |
__m256 accumf = (__m256)__lasx_xvldi(0);
|
| 11176 |
for (int i = 0; i < nb; ++i) {
|
| 11177 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 11178 |
+
const uint8_t * GGML_RESTRICT qs = x[i].qs;
|
| 11179 |
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
| 11180 |
+
const uint16_t * GGML_RESTRICT signs = (const uint16_t *)x[i].signs;
|
| 11181 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 11182 |
__m256i sumi1 = __lasx_xvldi(0);
|
| 11183 |
__m256i sumi2 = __lasx_xvldi(0);
|
| 11184 |
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
|
|
|
| 11236 |
float sumf = 0.f;
|
| 11237 |
for (int i = 0; i < nb; ++i) {
|
| 11238 |
const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
|
| 11239 |
+
const uint8_t * GGML_RESTRICT qs = x[i].qs;
|
| 11240 |
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
| 11241 |
+
const uint8_t * GGML_RESTRICT signs = x[i].signs;
|
| 11242 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 11243 |
int32_t bsum = 0;
|
| 11244 |
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
| 11245 |
const uint32_t ls1 = 2*(x[i].scales[ib32/2] & 0xf) + 1;
|
|
|
|
| 11291 |
}
|
| 11292 |
#endif
|
| 11293 |
|
| 11294 |
+
void ggml_vec_dot_iq1_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
| 11295 |
assert(n % QK_K == 0);
|
| 11296 |
assert(nrc == 1);
|
| 11297 |
UNUSED(nrc);
|
|
|
|
| 11299 |
UNUSED(by);
|
| 11300 |
UNUSED(bs);
|
| 11301 |
|
| 11302 |
+
const block_iq1_s * GGML_RESTRICT x = vx;
|
| 11303 |
+
const block_q8_K * GGML_RESTRICT y = vy;
|
| 11304 |
|
| 11305 |
const int nb = n / QK_K;
|
| 11306 |
|
|
|
|
| 11458 |
vector signed int vsumi3 = vec_splats((int32_t)0);
|
| 11459 |
vector signed int vsumi8 = vec_splats((int32_t)0);
|
| 11460 |
|
| 11461 |
+
const uint8_t * GGML_RESTRICT q1 = x[i].qs;
|
| 11462 |
+
const uint16_t * GGML_RESTRICT qh = x[i].qh;
|
| 11463 |
+
const int8_t * GGML_RESTRICT q8 = y[i].qs;
|
| 11464 |
+
const int16_t * GGML_RESTRICT qs = y[i].bsums;
|
| 11465 |
|
| 11466 |
for (int j = 0; j < QK_K/32; j += 2) {
|
| 11467 |
__builtin_prefetch(q1, 0, 1);
|
|
|
|
| 11622 |
#endif
|
| 11623 |
}
|
| 11624 |
|
| 11625 |
+
void ggml_vec_dot_iq1_m_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
| 11626 |
assert(n % QK_K == 0);
|
| 11627 |
assert(nrc == 1);
|
| 11628 |
UNUSED(nrc);
|
|
|
|
| 11630 |
UNUSED(by);
|
| 11631 |
UNUSED(bs);
|
| 11632 |
|
| 11633 |
+
const block_iq1_m * GGML_RESTRICT x = vx;
|
| 11634 |
+
const block_q8_K * GGML_RESTRICT y = vy;
|
| 11635 |
|
| 11636 |
const int nb = n / QK_K;
|
| 11637 |
|
|
|
|
| 11912 |
#endif
|
| 11913 |
}
|
| 11914 |
|
| 11915 |
+
void ggml_vec_dot_iq4_nl_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
| 11916 |
assert(nrc == 1);
|
| 11917 |
UNUSED(nrc);
|
| 11918 |
UNUSED(bx);
|
|
|
|
| 11921 |
assert(n % QK4_NL == 0);
|
| 11922 |
static_assert(QK4_NL == QK8_0, "QK4_NL and QK8_0 must be the same");
|
| 11923 |
|
| 11924 |
+
const block_iq4_nl * GGML_RESTRICT x = vx;
|
| 11925 |
+
const block_q8_0 * GGML_RESTRICT y = vy;
|
| 11926 |
|
| 11927 |
const int nb = n / QK4_NL;
|
| 11928 |
|
|
|
|
| 12097 |
const uint8x16_t v_m = vec_splat_u8(0x0F);
|
| 12098 |
|
| 12099 |
for (; ib < nb; ++ib) {
|
| 12100 |
+
const block_iq4_nl * GGML_RESTRICT x0 = &x[ib];
|
| 12101 |
+
const block_q8_0 * GGML_RESTRICT y0 = &y[ib];
|
| 12102 |
|
| 12103 |
const uint8x16_t v_x = vec_xl(0, x0->qs);
|
| 12104 |
int8x16_t v_xl = (int8x16_t)vec_and(v_x, v_m);
|
|
|
|
| 12126 |
*s = sumf;
|
| 12127 |
}
|
| 12128 |
|
| 12129 |
+
void ggml_vec_dot_iq4_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
| 12130 |
assert(nrc == 1);
|
| 12131 |
UNUSED(nrc);
|
| 12132 |
UNUSED(bx);
|
|
|
|
| 12134 |
UNUSED(bs);
|
| 12135 |
assert(n % QK_K == 0);
|
| 12136 |
|
| 12137 |
+
const block_iq4_xs * GGML_RESTRICT x = vx;
|
| 12138 |
+
const block_q8_K * GGML_RESTRICT y = vy;
|
| 12139 |
|
| 12140 |
const int nb = n / QK_K;
|
| 12141 |
|
|
|
|
| 12292 |
|
| 12293 |
uint16_t h = x[ibl].scales_h;
|
| 12294 |
|
| 12295 |
+
const uint8_t * GGML_RESTRICT q4 = x[ibl].qs;
|
| 12296 |
+
const uint8_t * GGML_RESTRICT sc = x[ibl].scales_l;
|
| 12297 |
+
const int8_t * GGML_RESTRICT q8 = y[ibl].qs;
|
| 12298 |
|
| 12299 |
for (int ib = 0; ib < QK_K/64; ib ++ ) {
|
| 12300 |
__builtin_prefetch(q4, 0, 1);
|
|
|
|
| 12398 |
float sumf = 0;
|
| 12399 |
|
| 12400 |
for (int ibl = 0; ibl < nb; ++ibl) {
|
| 12401 |
+
const uint8_t * GGML_RESTRICT q4 = x[ibl].qs;
|
| 12402 |
+
const int8_t * GGML_RESTRICT q8 = y[ibl].qs;
|
| 12403 |
|
| 12404 |
uint16_t h = x[ibl].scales_h;
|
| 12405 |
|
|
|
|
| 12479 |
|
| 12480 |
// ============================ 4-bit non-linear quants
|
| 12481 |
|
| 12482 |
+
void quantize_row_iq4_nl(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
|
| 12483 |
assert(k % QK4_NL == 0);
|
| 12484 |
quantize_row_iq4_nl_ref(x, y, k);
|
| 12485 |
}
|
| 12486 |
|
| 12487 |
+
void quantize_row_iq4_xs(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
|
| 12488 |
assert(k % QK_K == 0);
|
| 12489 |
quantize_iq4_xs(x, y, 1, k, NULL);
|
| 12490 |
}
|
ggml/src/ggml-cpu/ggml-cpu.c
CHANGED
|
@@ -247,9 +247,9 @@ typedef pthread_t ggml_thread_t;
|
|
| 247 |
static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
|
| 248 |
|
| 249 |
|
| 250 |
-
static void ggml_vec_dot_f32(int n, float *
|
| 251 |
-
static void ggml_vec_dot_f16(int n, float *
|
| 252 |
-
static void ggml_vec_dot_bf16(int n, float *
|
| 253 |
|
| 254 |
static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
|
| 255 |
[GGML_TYPE_F32] = {
|
|
@@ -1451,7 +1451,7 @@ inline static void ggml_vec_div_f16 (const int n, ggml_fp16_t * z, const ggml_fp
|
|
| 1451 |
}
|
| 1452 |
}
|
| 1453 |
|
| 1454 |
-
static void ggml_vec_dot_f32(int n, float *
|
| 1455 |
assert(nrc == 1);
|
| 1456 |
UNUSED(nrc);
|
| 1457 |
UNUSED(bx);
|
|
@@ -1494,7 +1494,7 @@ static void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float *
|
|
| 1494 |
*s = sumf;
|
| 1495 |
}
|
| 1496 |
|
| 1497 |
-
static void ggml_vec_dot_bf16(int n, float *
|
| 1498 |
assert(nrc == 1);
|
| 1499 |
UNUSED(nrc);
|
| 1500 |
UNUSED(bx);
|
|
@@ -1562,7 +1562,7 @@ static void ggml_vec_dot_bf16(int n, float * restrict s, size_t bs, ggml_bf16_t
|
|
| 1562 |
*s = sumf;
|
| 1563 |
}
|
| 1564 |
|
| 1565 |
-
static void ggml_vec_dot_f16(int n, float *
|
| 1566 |
assert(nrc == 1);
|
| 1567 |
UNUSED(nrc);
|
| 1568 |
UNUSED(bx);
|
|
@@ -1606,10 +1606,10 @@ static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t *
|
|
| 1606 |
|
| 1607 |
// compute GGML_VEC_DOT_UNROLL dot products at once
|
| 1608 |
// xs - x row stride in bytes
|
| 1609 |
-
inline static void ggml_vec_dot_f16_unroll(const int n, const int xs, float *
|
| 1610 |
ggml_float sumf[GGML_VEC_DOT_UNROLL] = { 0.0 };
|
| 1611 |
|
| 1612 |
-
ggml_fp16_t *
|
| 1613 |
|
| 1614 |
for (int i = 0; i < GGML_VEC_DOT_UNROLL; ++i) {
|
| 1615 |
x[i] = (ggml_fp16_t *) ((char *) xv + i*xs);
|
|
@@ -1659,7 +1659,7 @@ inline static void ggml_vec_dot_f16_unroll(const int n, const int xs, float * re
|
|
| 1659 |
}
|
| 1660 |
}
|
| 1661 |
|
| 1662 |
-
inline static void ggml_vec_mad_f32(const int n, float *
|
| 1663 |
#if defined(GGML_SIMD)
|
| 1664 |
const int np = (n & ~(GGML_F32_STEP - 1));
|
| 1665 |
|
|
@@ -1690,7 +1690,7 @@ inline static void ggml_vec_mad_f32(const int n, float * restrict y, const float
|
|
| 1690 |
#endif
|
| 1691 |
}
|
| 1692 |
|
| 1693 |
-
inline static void ggml_vec_mad_f16(const int n, ggml_fp16_t *
|
| 1694 |
#if defined(GGML_SIMD)
|
| 1695 |
const int np = (n & ~(GGML_F16_STEP - 1));
|
| 1696 |
|
|
@@ -1722,10 +1722,10 @@ inline static void ggml_vec_mad_f16(const int n, ggml_fp16_t * restrict y, const
|
|
| 1722 |
}
|
| 1723 |
|
| 1724 |
// xs and vs are byte strides of x and v
|
| 1725 |
-
inline static void ggml_vec_mad_f32_unroll(const int n, const int xs, const int vs, float *
|
| 1726 |
|
| 1727 |
-
const float *
|
| 1728 |
-
const float *
|
| 1729 |
|
| 1730 |
for (int i = 0; i < GGML_VEC_MAD_UNROLL; ++i) {
|
| 1731 |
x[i] = (const float *) ((const char *) xv + i*xs);
|
|
|
|
| 247 |
static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
|
| 248 |
|
| 249 |
|
| 250 |
+
static void ggml_vec_dot_f32(int n, float * GGML_RESTRICT s, size_t bs, const float * GGML_RESTRICT x, size_t bx, const float * GGML_RESTRICT y, size_t by, int nrc);
|
| 251 |
+
static void ggml_vec_dot_f16(int n, float * GGML_RESTRICT s, size_t bs, ggml_fp16_t * GGML_RESTRICT x, size_t bx, ggml_fp16_t * GGML_RESTRICT y, size_t by, int nrc);
|
| 252 |
+
static void ggml_vec_dot_bf16(int n, float * GGML_RESTRICT s, size_t bs, ggml_bf16_t * GGML_RESTRICT x, size_t bx, ggml_bf16_t * GGML_RESTRICT y, size_t by, int nrc);
|
| 253 |
|
| 254 |
static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
|
| 255 |
[GGML_TYPE_F32] = {
|
|
|
|
| 1451 |
}
|
| 1452 |
}
|
| 1453 |
|
| 1454 |
+
static void ggml_vec_dot_f32(int n, float * GGML_RESTRICT s, size_t bs, const float * GGML_RESTRICT x, size_t bx, const float * GGML_RESTRICT y, size_t by, int nrc) {
|
| 1455 |
assert(nrc == 1);
|
| 1456 |
UNUSED(nrc);
|
| 1457 |
UNUSED(bx);
|
|
|
|
| 1494 |
*s = sumf;
|
| 1495 |
}
|
| 1496 |
|
| 1497 |
+
static void ggml_vec_dot_bf16(int n, float * GGML_RESTRICT s, size_t bs, ggml_bf16_t * GGML_RESTRICT x, size_t bx, ggml_bf16_t * GGML_RESTRICT y, size_t by, int nrc) {
|
| 1498 |
assert(nrc == 1);
|
| 1499 |
UNUSED(nrc);
|
| 1500 |
UNUSED(bx);
|
|
|
|
| 1562 |
*s = sumf;
|
| 1563 |
}
|
| 1564 |
|
| 1565 |
+
static void ggml_vec_dot_f16(int n, float * GGML_RESTRICT s, size_t bs, ggml_fp16_t * GGML_RESTRICT x, size_t bx, ggml_fp16_t * GGML_RESTRICT y, size_t by, int nrc) {
|
| 1566 |
assert(nrc == 1);
|
| 1567 |
UNUSED(nrc);
|
| 1568 |
UNUSED(bx);
|
|
|
|
| 1606 |
|
| 1607 |
// compute GGML_VEC_DOT_UNROLL dot products at once
|
| 1608 |
// xs - x row stride in bytes
|
| 1609 |
+
inline static void ggml_vec_dot_f16_unroll(const int n, const int xs, float * GGML_RESTRICT s, void * GGML_RESTRICT xv, ggml_fp16_t * GGML_RESTRICT y) {
|
| 1610 |
ggml_float sumf[GGML_VEC_DOT_UNROLL] = { 0.0 };
|
| 1611 |
|
| 1612 |
+
ggml_fp16_t * GGML_RESTRICT x[GGML_VEC_DOT_UNROLL];
|
| 1613 |
|
| 1614 |
for (int i = 0; i < GGML_VEC_DOT_UNROLL; ++i) {
|
| 1615 |
x[i] = (ggml_fp16_t *) ((char *) xv + i*xs);
|
|
|
|
| 1659 |
}
|
| 1660 |
}
|
| 1661 |
|
| 1662 |
+
inline static void ggml_vec_mad_f32(const int n, float * GGML_RESTRICT y, const float * GGML_RESTRICT x, const float v) {
|
| 1663 |
#if defined(GGML_SIMD)
|
| 1664 |
const int np = (n & ~(GGML_F32_STEP - 1));
|
| 1665 |
|
|
|
|
| 1690 |
#endif
|
| 1691 |
}
|
| 1692 |
|
| 1693 |
+
inline static void ggml_vec_mad_f16(const int n, ggml_fp16_t * GGML_RESTRICT y, const ggml_fp16_t * GGML_RESTRICT x, const float v) {
|
| 1694 |
#if defined(GGML_SIMD)
|
| 1695 |
const int np = (n & ~(GGML_F16_STEP - 1));
|
| 1696 |
|
|
|
|
| 1722 |
}
|
| 1723 |
|
| 1724 |
// xs and vs are byte strides of x and v
|
| 1725 |
+
inline static void ggml_vec_mad_f32_unroll(const int n, const int xs, const int vs, float * GGML_RESTRICT y, const float * GGML_RESTRICT xv, const float * GGML_RESTRICT vv) {
|
| 1726 |
|
| 1727 |
+
const float * GGML_RESTRICT x[GGML_VEC_MAD_UNROLL];
|
| 1728 |
+
const float * GGML_RESTRICT v[GGML_VEC_MAD_UNROLL];
|
| 1729 |
|
| 1730 |
for (int i = 0; i < GGML_VEC_MAD_UNROLL; ++i) {
|
| 1731 |
x[i] = (const float *) ((const char *) xv + i*xs);
|
ggml/src/ggml-quants.c
CHANGED
|
@@ -28,7 +28,7 @@
|
|
| 28 |
#define UNUSED GGML_UNUSED
|
| 29 |
|
| 30 |
// reference implementation for deterministic creation of model files
|
| 31 |
-
void quantize_row_q4_0_ref(const float *
|
| 32 |
static const int qk = QK4_0;
|
| 33 |
|
| 34 |
assert(k % qk == 0);
|
|
@@ -65,7 +65,7 @@ void quantize_row_q4_0_ref(const float * restrict x, block_q4_0 * restrict y, in
|
|
| 65 |
}
|
| 66 |
}
|
| 67 |
|
| 68 |
-
void quantize_row_q4_1_ref(const float *
|
| 69 |
const int qk = QK4_1;
|
| 70 |
|
| 71 |
assert(k % qk == 0);
|
|
@@ -102,7 +102,7 @@ void quantize_row_q4_1_ref(const float * restrict x, block_q4_1 * restrict y, in
|
|
| 102 |
}
|
| 103 |
}
|
| 104 |
|
| 105 |
-
void quantize_row_q5_0_ref(const float *
|
| 106 |
static const int qk = QK5_0;
|
| 107 |
|
| 108 |
assert(k % qk == 0);
|
|
@@ -146,7 +146,7 @@ void quantize_row_q5_0_ref(const float * restrict x, block_q5_0 * restrict y, in
|
|
| 146 |
}
|
| 147 |
}
|
| 148 |
|
| 149 |
-
void quantize_row_q5_1_ref(const float *
|
| 150 |
const int qk = QK5_1;
|
| 151 |
|
| 152 |
assert(k % qk == 0);
|
|
@@ -191,7 +191,7 @@ void quantize_row_q5_1_ref(const float * restrict x, block_q5_1 * restrict y, in
|
|
| 191 |
}
|
| 192 |
|
| 193 |
// reference implementation for deterministic creation of model files
|
| 194 |
-
void quantize_row_q8_0_ref(const float *
|
| 195 |
assert(k % QK8_0 == 0);
|
| 196 |
const int nb = k / QK8_0;
|
| 197 |
|
|
@@ -217,7 +217,7 @@ void quantize_row_q8_0_ref(const float * restrict x, block_q8_0 * restrict y, in
|
|
| 217 |
}
|
| 218 |
|
| 219 |
// reference implementation for deterministic creation of model files
|
| 220 |
-
void quantize_row_q8_1_ref(const float *
|
| 221 |
assert(QK8_1 == 32);
|
| 222 |
assert(k % QK8_1 == 0);
|
| 223 |
const int nb = k / QK8_1;
|
|
@@ -252,7 +252,7 @@ void quantize_row_q8_1_ref(const float * restrict x, block_q8_1 * restrict y, in
|
|
| 252 |
}
|
| 253 |
}
|
| 254 |
|
| 255 |
-
void dequantize_row_q4_0(const block_q4_0 *
|
| 256 |
static const int qk = QK4_0;
|
| 257 |
|
| 258 |
assert(k % qk == 0);
|
|
@@ -272,7 +272,7 @@ void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int6
|
|
| 272 |
}
|
| 273 |
}
|
| 274 |
|
| 275 |
-
void dequantize_row_q4_1(const block_q4_1 *
|
| 276 |
static const int qk = QK4_1;
|
| 277 |
|
| 278 |
assert(k % qk == 0);
|
|
@@ -293,7 +293,7 @@ void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int6
|
|
| 293 |
}
|
| 294 |
}
|
| 295 |
|
| 296 |
-
void dequantize_row_q5_0(const block_q5_0 *
|
| 297 |
static const int qk = QK5_0;
|
| 298 |
|
| 299 |
assert(k % qk == 0);
|
|
@@ -319,7 +319,7 @@ void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int6
|
|
| 319 |
}
|
| 320 |
}
|
| 321 |
|
| 322 |
-
void dequantize_row_q5_1(const block_q5_1 *
|
| 323 |
static const int qk = QK5_1;
|
| 324 |
|
| 325 |
assert(k % qk == 0);
|
|
@@ -346,7 +346,7 @@ void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int6
|
|
| 346 |
}
|
| 347 |
}
|
| 348 |
|
| 349 |
-
void dequantize_row_q8_0(const block_q8_0 *
|
| 350 |
static const int qk = QK8_0;
|
| 351 |
|
| 352 |
assert(k % qk == 0);
|
|
@@ -376,8 +376,8 @@ static inline int nearest_int(float fval) {
|
|
| 376 |
return (i & 0x007fffff) - 0x00400000;
|
| 377 |
}
|
| 378 |
|
| 379 |
-
static float make_qx_quants(int n, int nmax, const float *
|
| 380 |
-
const float *
|
| 381 |
float max = 0;
|
| 382 |
float amax = 0;
|
| 383 |
for (int i = 0; i < n; ++i) {
|
|
@@ -445,7 +445,7 @@ static float make_qx_quants(int n, int nmax, const float * restrict x, int8_t *
|
|
| 445 |
return scale;
|
| 446 |
}
|
| 447 |
|
| 448 |
-
static float make_q3_quants(int n, int nmax, const float *
|
| 449 |
float max = 0;
|
| 450 |
float amax = 0;
|
| 451 |
for (int i = 0; i < n; ++i) {
|
|
@@ -504,7 +504,7 @@ static float make_q3_quants(int n, int nmax, const float * restrict x, int8_t *
|
|
| 504 |
return 1/iscale;
|
| 505 |
}
|
| 506 |
|
| 507 |
-
static float make_qkx1_quants(int n, int nmax, const float *
|
| 508 |
int ntry, float alpha) {
|
| 509 |
float min = x[0];
|
| 510 |
float max = x[0];
|
|
@@ -547,8 +547,8 @@ static float make_qkx1_quants(int n, int nmax, const float * restrict x, uint8_t
|
|
| 547 |
return scale;
|
| 548 |
}
|
| 549 |
|
| 550 |
-
static float make_qkx2_quants(int n, int nmax, const float *
|
| 551 |
-
uint8_t *
|
| 552 |
float rmin, float rdelta, int nstep, bool use_mad) {
|
| 553 |
float min = x[0];
|
| 554 |
float max = x[0];
|
|
@@ -628,7 +628,7 @@ static float make_qkx2_quants(int n, int nmax, const float * restrict x, const f
|
|
| 628 |
return scale;
|
| 629 |
}
|
| 630 |
|
| 631 |
-
static inline void get_scale_min_k4(int j, const uint8_t *
|
| 632 |
if (j < 4) {
|
| 633 |
*d = q[j] & 63; *m = q[j + 4] & 63;
|
| 634 |
} else {
|
|
@@ -639,7 +639,7 @@ static inline void get_scale_min_k4(int j, const uint8_t * restrict q, uint8_t *
|
|
| 639 |
|
| 640 |
//========================- 2-bit (de)-quantization
|
| 641 |
|
| 642 |
-
void quantize_row_q2_K_ref(const float *
|
| 643 |
assert(k % QK_K == 0);
|
| 644 |
const int nb = k / QK_K;
|
| 645 |
|
|
@@ -709,7 +709,7 @@ void quantize_row_q2_K_ref(const float * restrict x, block_q2_K * restrict y, in
|
|
| 709 |
}
|
| 710 |
}
|
| 711 |
|
| 712 |
-
void dequantize_row_q2_K(const block_q2_K *
|
| 713 |
assert(k % QK_K == 0);
|
| 714 |
const int nb = k / QK_K;
|
| 715 |
|
|
@@ -741,8 +741,8 @@ void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int6
|
|
| 741 |
}
|
| 742 |
}
|
| 743 |
|
| 744 |
-
static float make_qkx3_quants(int n, int nmax, const float *
|
| 745 |
-
uint8_t *
|
| 746 |
float rmin, float rdelta, int nstep, bool use_mad) {
|
| 747 |
float min = x[0];
|
| 748 |
float max = x[0];
|
|
@@ -824,7 +824,7 @@ static float make_qkx3_quants(int n, int nmax, const float * restrict x, const f
|
|
| 824 |
return scale;
|
| 825 |
}
|
| 826 |
|
| 827 |
-
static float make_qp_quants(int n, int nmax, const float *
|
| 828 |
float max = 0;
|
| 829 |
for (int i = 0; i < n; ++i) {
|
| 830 |
max = MAX(max, x[i]);
|
|
@@ -897,7 +897,7 @@ static float make_qp_quants(int n, int nmax, const float * restrict x, uint8_t *
|
|
| 897 |
return sumlx/suml2;
|
| 898 |
}
|
| 899 |
|
| 900 |
-
static void quantize_row_q2_K_impl(const float *
|
| 901 |
GGML_ASSERT(quant_weights);
|
| 902 |
assert(k % QK_K == 0);
|
| 903 |
const int nb = k / QK_K;
|
|
@@ -917,7 +917,7 @@ static void quantize_row_q2_K_impl(const float * restrict x, block_q2_K * restri
|
|
| 917 |
for (int j = 0; j < QK_K; ++j) sumx2 += x[j]*x[j];
|
| 918 |
float sigma2 = sumx2/QK_K;
|
| 919 |
for (int j = 0; j < QK_K/16; ++j) {
|
| 920 |
-
const float *
|
| 921 |
for (int l = 0; l < 16; ++l) weight[l] = qw[l] * sqrtf(sigma2 + x[16*j + l]*x[16*j + l]);
|
| 922 |
for (int l = 0; l < QK_K/16; ++l) sw[j] += weight[l];
|
| 923 |
scales[j] = make_qkx3_quants(16, 3, x + 16*j, weight, L + 16*j, &mins[j], Laux, -0.9f, 0.05f, 36, false);
|
|
@@ -959,7 +959,7 @@ static void quantize_row_q2_K_impl(const float * restrict x, block_q2_K * restri
|
|
| 959 |
}
|
| 960 |
}
|
| 961 |
|
| 962 |
-
size_t quantize_q2_K(const float *
|
| 963 |
size_t row_size = ggml_row_size(GGML_TYPE_Q2_K, n_per_row);
|
| 964 |
if (!quant_weights) {
|
| 965 |
quantize_row_q2_K_ref(src, dst, (int64_t)nrow*n_per_row);
|
|
@@ -977,7 +977,7 @@ size_t quantize_q2_K(const float * restrict src, void * restrict dst, int64_t nr
|
|
| 977 |
|
| 978 |
//========================= 3-bit (de)-quantization
|
| 979 |
|
| 980 |
-
void quantize_row_q3_K_ref(const float *
|
| 981 |
assert(k % QK_K == 0);
|
| 982 |
const int nb = k / QK_K;
|
| 983 |
|
|
@@ -1053,7 +1053,7 @@ void quantize_row_q3_K_ref(const float * restrict x, block_q3_K * restrict y, in
|
|
| 1053 |
}
|
| 1054 |
}
|
| 1055 |
|
| 1056 |
-
void dequantize_row_q3_K(const block_q3_K *
|
| 1057 |
assert(k % QK_K == 0);
|
| 1058 |
const int nb = k / QK_K;
|
| 1059 |
|
|
@@ -1067,8 +1067,8 @@ void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int6
|
|
| 1067 |
|
| 1068 |
const float d_all = GGML_FP16_TO_FP32(x[i].d);
|
| 1069 |
|
| 1070 |
-
const uint8_t *
|
| 1071 |
-
const uint8_t *
|
| 1072 |
uint8_t m = 1;
|
| 1073 |
|
| 1074 |
memcpy(aux, x[i].scales, 12);
|
|
@@ -1103,7 +1103,7 @@ void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int6
|
|
| 1103 |
}
|
| 1104 |
}
|
| 1105 |
|
| 1106 |
-
static void quantize_row_q3_K_impl(const float *
|
| 1107 |
assert(n_per_row % QK_K == 0);
|
| 1108 |
const int nb = n_per_row / QK_K;
|
| 1109 |
|
|
@@ -1187,7 +1187,7 @@ static void quantize_row_q3_K_impl(const float * restrict x, block_q3_K * restri
|
|
| 1187 |
}
|
| 1188 |
}
|
| 1189 |
|
| 1190 |
-
size_t quantize_q3_K(const float *
|
| 1191 |
size_t row_size = ggml_row_size(GGML_TYPE_Q3_K, n_per_row);
|
| 1192 |
if (!quant_weights) {
|
| 1193 |
quantize_row_q3_K_ref(src, dst, (int64_t)nrow*n_per_row);
|
|
@@ -1205,7 +1205,7 @@ size_t quantize_q3_K(const float * restrict src, void * restrict dst, int64_t nr
|
|
| 1205 |
|
| 1206 |
// ====================== 4-bit (de)-quantization
|
| 1207 |
|
| 1208 |
-
void quantize_row_q4_K_ref(const float *
|
| 1209 |
assert(k % QK_K == 0);
|
| 1210 |
const int nb = k / QK_K;
|
| 1211 |
|
|
@@ -1277,7 +1277,7 @@ void quantize_row_q4_K_ref(const float * restrict x, block_q4_K * restrict y, in
|
|
| 1277 |
}
|
| 1278 |
}
|
| 1279 |
|
| 1280 |
-
void dequantize_row_q4_K(const block_q4_K *
|
| 1281 |
assert(k % QK_K == 0);
|
| 1282 |
const int nb = k / QK_K;
|
| 1283 |
|
|
@@ -1301,7 +1301,7 @@ void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int6
|
|
| 1301 |
}
|
| 1302 |
}
|
| 1303 |
|
| 1304 |
-
static void quantize_row_q4_K_impl(const float *
|
| 1305 |
assert(n_per_row % QK_K == 0);
|
| 1306 |
const int64_t nb = n_per_row / QK_K;
|
| 1307 |
|
|
@@ -1374,7 +1374,7 @@ static void quantize_row_q4_K_impl(const float * restrict x, block_q4_K * restri
|
|
| 1374 |
}
|
| 1375 |
}
|
| 1376 |
|
| 1377 |
-
size_t quantize_q4_K(const float *
|
| 1378 |
size_t row_size = ggml_row_size(GGML_TYPE_Q4_K, n_per_row);
|
| 1379 |
if (!quant_weights) {
|
| 1380 |
quantize_row_q4_K_ref(src, dst, (int64_t)nrow*n_per_row);
|
|
@@ -1392,7 +1392,7 @@ size_t quantize_q4_K(const float * restrict src, void * restrict dst, int64_t nr
|
|
| 1392 |
|
| 1393 |
// ====================== 5-bit (de)-quantization
|
| 1394 |
|
| 1395 |
-
void quantize_row_q5_K_ref(const float *
|
| 1396 |
assert(k % QK_K == 0);
|
| 1397 |
const int64_t nb = k / QK_K;
|
| 1398 |
|
|
@@ -1454,8 +1454,8 @@ void quantize_row_q5_K_ref(const float * restrict x, block_q5_K * restrict y, in
|
|
| 1454 |
}
|
| 1455 |
}
|
| 1456 |
|
| 1457 |
-
uint8_t *
|
| 1458 |
-
uint8_t *
|
| 1459 |
memset(qh, 0, QK_K/8);
|
| 1460 |
|
| 1461 |
uint8_t m1 = 1, m2 = 2;
|
|
@@ -1479,7 +1479,7 @@ void quantize_row_q5_K_ref(const float * restrict x, block_q5_K * restrict y, in
|
|
| 1479 |
}
|
| 1480 |
}
|
| 1481 |
|
| 1482 |
-
void dequantize_row_q5_K(const block_q5_K *
|
| 1483 |
assert(k % QK_K == 0);
|
| 1484 |
const int64_t nb = k / QK_K;
|
| 1485 |
|
|
@@ -1506,7 +1506,7 @@ void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int6
|
|
| 1506 |
}
|
| 1507 |
}
|
| 1508 |
|
| 1509 |
-
static void quantize_row_q5_K_impl(const float *
|
| 1510 |
assert(n_per_row % QK_K == 0);
|
| 1511 |
const int64_t nb = n_per_row / QK_K;
|
| 1512 |
|
|
@@ -1573,8 +1573,8 @@ static void quantize_row_q5_K_impl(const float * restrict x, block_q5_K * restri
|
|
| 1573 |
}
|
| 1574 |
}
|
| 1575 |
|
| 1576 |
-
uint8_t *
|
| 1577 |
-
uint8_t *
|
| 1578 |
memset(qh, 0, QK_K/8);
|
| 1579 |
|
| 1580 |
uint8_t m1 = 1, m2 = 2;
|
|
@@ -1599,7 +1599,7 @@ static void quantize_row_q5_K_impl(const float * restrict x, block_q5_K * restri
|
|
| 1599 |
}
|
| 1600 |
}
|
| 1601 |
|
| 1602 |
-
size_t quantize_q5_K(const float *
|
| 1603 |
size_t row_size = ggml_row_size(GGML_TYPE_Q5_K, n_per_row);
|
| 1604 |
if (!quant_weights) {
|
| 1605 |
quantize_row_q5_K_ref(src, dst, (int64_t)nrow*n_per_row);
|
|
@@ -1617,7 +1617,7 @@ size_t quantize_q5_K(const float * restrict src, void * restrict dst, int64_t nr
|
|
| 1617 |
|
| 1618 |
// ====================== 6-bit (de)-quantization
|
| 1619 |
|
| 1620 |
-
void quantize_row_q6_K_ref(const float *
|
| 1621 |
assert(k % QK_K == 0);
|
| 1622 |
const int64_t nb = k / QK_K;
|
| 1623 |
|
|
@@ -1667,8 +1667,8 @@ void quantize_row_q6_K_ref(const float * restrict x, block_q6_K * restrict y, in
|
|
| 1667 |
}
|
| 1668 |
}
|
| 1669 |
|
| 1670 |
-
uint8_t *
|
| 1671 |
-
uint8_t *
|
| 1672 |
for (int j = 0; j < QK_K; j += 128) {
|
| 1673 |
for (int l = 0; l < 32; ++l) {
|
| 1674 |
const uint8_t q1 = L[j + l + 0] & 0xF;
|
|
@@ -1687,16 +1687,16 @@ void quantize_row_q6_K_ref(const float * restrict x, block_q6_K * restrict y, in
|
|
| 1687 |
}
|
| 1688 |
}
|
| 1689 |
|
| 1690 |
-
void dequantize_row_q6_K(const block_q6_K *
|
| 1691 |
assert(k % QK_K == 0);
|
| 1692 |
const int64_t nb = k / QK_K;
|
| 1693 |
|
| 1694 |
for (int i = 0; i < nb; i++) {
|
| 1695 |
const float d = GGML_FP16_TO_FP32(x[i].d);
|
| 1696 |
|
| 1697 |
-
const uint8_t *
|
| 1698 |
-
const uint8_t *
|
| 1699 |
-
const int8_t *
|
| 1700 |
|
| 1701 |
for (int n = 0; n < QK_K; n += 128) {
|
| 1702 |
for (int l = 0; l < 32; ++l) {
|
|
@@ -1718,7 +1718,7 @@ void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int6
|
|
| 1718 |
}
|
| 1719 |
}
|
| 1720 |
|
| 1721 |
-
static void quantize_row_q6_K_impl(const float *
|
| 1722 |
assert(n_per_row % QK_K == 0);
|
| 1723 |
const int64_t nb = n_per_row / QK_K;
|
| 1724 |
|
|
@@ -1781,8 +1781,8 @@ static void quantize_row_q6_K_impl(const float * restrict x, block_q6_K * restri
|
|
| 1781 |
}
|
| 1782 |
}
|
| 1783 |
|
| 1784 |
-
uint8_t *
|
| 1785 |
-
uint8_t *
|
| 1786 |
for (int j = 0; j < QK_K; j += 128) {
|
| 1787 |
for (int l = 0; l < 32; ++l) {
|
| 1788 |
const uint8_t q1 = L[j + l + 0] & 0xF;
|
|
@@ -1802,7 +1802,7 @@ static void quantize_row_q6_K_impl(const float * restrict x, block_q6_K * restri
|
|
| 1802 |
}
|
| 1803 |
}
|
| 1804 |
|
| 1805 |
-
size_t quantize_q6_K(const float *
|
| 1806 |
size_t row_size = ggml_row_size(GGML_TYPE_Q6_K, n_per_row);
|
| 1807 |
if (!quant_weights) {
|
| 1808 |
quantize_row_q6_K_ref(src, dst, (int64_t)nrow*n_per_row);
|
|
@@ -1818,7 +1818,7 @@ size_t quantize_q6_K(const float * restrict src, void * restrict dst, int64_t nr
|
|
| 1818 |
return nrow * row_size;
|
| 1819 |
}
|
| 1820 |
|
| 1821 |
-
static void quantize_row_q4_0_impl(const float *
|
| 1822 |
static_assert(QK4_0 == 32, "QK4_0 must be 32");
|
| 1823 |
|
| 1824 |
if (!quant_weights) {
|
|
@@ -1846,7 +1846,7 @@ static void quantize_row_q4_0_impl(const float * restrict x, block_q4_0 * restri
|
|
| 1846 |
}
|
| 1847 |
}
|
| 1848 |
|
| 1849 |
-
size_t quantize_q4_0(const float *
|
| 1850 |
if (!quant_weights) {
|
| 1851 |
quantize_row_q4_0_ref(src, dst, (int64_t)nrow*n_per_row);
|
| 1852 |
return nrow * ggml_row_size(GGML_TYPE_Q4_0, n_per_row);
|
|
@@ -1861,7 +1861,7 @@ size_t quantize_q4_0(const float * restrict src, void * restrict dst, int64_t nr
|
|
| 1861 |
return nrow * row_size;
|
| 1862 |
}
|
| 1863 |
|
| 1864 |
-
static void quantize_row_q4_1_impl(const float *
|
| 1865 |
static_assert(QK4_1 == 32, "QK4_1 must be 32");
|
| 1866 |
|
| 1867 |
if (!quant_weights) {
|
|
@@ -1891,7 +1891,7 @@ static void quantize_row_q4_1_impl(const float * restrict x, block_q4_1 * restri
|
|
| 1891 |
}
|
| 1892 |
}
|
| 1893 |
|
| 1894 |
-
size_t quantize_q4_1(const float *
|
| 1895 |
if (!quant_weights) {
|
| 1896 |
quantize_row_q4_1_ref(src, dst, (int64_t)nrow*n_per_row);
|
| 1897 |
return nrow * ggml_row_size(GGML_TYPE_Q4_1, n_per_row);
|
|
@@ -1906,7 +1906,7 @@ size_t quantize_q4_1(const float * restrict src, void * restrict dst, int64_t nr
|
|
| 1906 |
return nrow * row_size;
|
| 1907 |
}
|
| 1908 |
|
| 1909 |
-
static void quantize_row_q5_0_impl(const float *
|
| 1910 |
static_assert(QK5_0 == 32, "QK5_0 must be 32");
|
| 1911 |
|
| 1912 |
if (!quant_weights) {
|
|
@@ -1945,7 +1945,7 @@ static void quantize_row_q5_0_impl(const float * restrict x, block_q5_0 * restri
|
|
| 1945 |
}
|
| 1946 |
}
|
| 1947 |
|
| 1948 |
-
size_t quantize_q5_0(const float *
|
| 1949 |
if (!quant_weights) {
|
| 1950 |
quantize_row_q5_0_ref(src, dst, (int64_t)nrow*n_per_row);
|
| 1951 |
return nrow * ggml_row_size(GGML_TYPE_Q5_0, n_per_row);
|
|
@@ -1960,7 +1960,7 @@ size_t quantize_q5_0(const float * restrict src, void * restrict dst, int64_t nr
|
|
| 1960 |
return nrow * row_size;
|
| 1961 |
}
|
| 1962 |
|
| 1963 |
-
static void quantize_row_q5_1_impl(const float *
|
| 1964 |
static_assert(QK5_1 == 32, "QK5_1 must be 32");
|
| 1965 |
|
| 1966 |
if (!quant_weights) {
|
|
@@ -1998,7 +1998,7 @@ static void quantize_row_q5_1_impl(const float * restrict x, block_q5_1 * restri
|
|
| 1998 |
}
|
| 1999 |
}
|
| 2000 |
|
| 2001 |
-
size_t quantize_q5_1(const float *
|
| 2002 |
if (!quant_weights) {
|
| 2003 |
quantize_row_q5_1_ref(src, dst, (int64_t)nrow*n_per_row);
|
| 2004 |
return nrow * ggml_row_size(GGML_TYPE_Q5_1, n_per_row);
|
|
@@ -2013,7 +2013,7 @@ size_t quantize_q5_1(const float * restrict src, void * restrict dst, int64_t nr
|
|
| 2013 |
return nrow * row_size;
|
| 2014 |
}
|
| 2015 |
|
| 2016 |
-
size_t quantize_q8_0(const float *
|
| 2017 |
(void)quant_weights; // not used
|
| 2018 |
const size_t row_size = ggml_row_size(GGML_TYPE_Q8_0, n_per_row);
|
| 2019 |
quantize_row_q8_0_ref(src, dst, (int64_t)nrow*n_per_row);
|
|
@@ -2022,7 +2022,7 @@ size_t quantize_q8_0(const float * restrict src, void * restrict dst, int64_t nr
|
|
| 2022 |
|
| 2023 |
// ====================== Ternary (de)-quantization (BitNet b1.58 and TriLMs)
|
| 2024 |
|
| 2025 |
-
void quantize_row_tq1_0_ref(const float *
|
| 2026 |
assert(k % QK_K == 0);
|
| 2027 |
const int64_t nb = k / QK_K;
|
| 2028 |
|
|
@@ -2088,7 +2088,7 @@ void quantize_row_tq1_0_ref(const float * restrict x, block_tq1_0 * restrict y,
|
|
| 2088 |
}
|
| 2089 |
}
|
| 2090 |
|
| 2091 |
-
void quantize_row_tq2_0_ref(const float *
|
| 2092 |
assert(k % QK_K == 0);
|
| 2093 |
const int64_t nb = k / QK_K;
|
| 2094 |
|
|
@@ -2120,21 +2120,21 @@ void quantize_row_tq2_0_ref(const float * restrict x, block_tq2_0 * restrict y,
|
|
| 2120 |
}
|
| 2121 |
}
|
| 2122 |
|
| 2123 |
-
size_t quantize_tq1_0(const float *
|
| 2124 |
(void)quant_weights; // not used
|
| 2125 |
const size_t row_size = ggml_row_size(GGML_TYPE_TQ1_0, n_per_row);
|
| 2126 |
quantize_row_tq1_0_ref(src, dst, (int64_t)nrow*n_per_row);
|
| 2127 |
return nrow * row_size;
|
| 2128 |
}
|
| 2129 |
|
| 2130 |
-
size_t quantize_tq2_0(const float *
|
| 2131 |
(void)quant_weights; // not used
|
| 2132 |
const size_t row_size = ggml_row_size(GGML_TYPE_TQ2_0, n_per_row);
|
| 2133 |
quantize_row_tq2_0_ref(src, dst, (int64_t)nrow*n_per_row);
|
| 2134 |
return nrow * row_size;
|
| 2135 |
}
|
| 2136 |
|
| 2137 |
-
void dequantize_row_tq1_0(const block_tq1_0 *
|
| 2138 |
assert(k % QK_K == 0);
|
| 2139 |
const int64_t nb = k / QK_K;
|
| 2140 |
|
|
@@ -2173,7 +2173,7 @@ void dequantize_row_tq1_0(const block_tq1_0 * restrict x, float * restrict y, in
|
|
| 2173 |
}
|
| 2174 |
}
|
| 2175 |
|
| 2176 |
-
void dequantize_row_tq2_0(const block_tq2_0 *
|
| 2177 |
assert(k % QK_K == 0);
|
| 2178 |
const int64_t nb = k / QK_K;
|
| 2179 |
|
|
@@ -2194,7 +2194,7 @@ void dequantize_row_tq2_0(const block_tq2_0 * restrict x, float * restrict y, in
|
|
| 2194 |
|
| 2195 |
// ====================== "True" 2-bit (de)-quantization
|
| 2196 |
|
| 2197 |
-
void dequantize_row_iq2_xxs(const block_iq2_xxs *
|
| 2198 |
assert(k % QK_K == 0);
|
| 2199 |
const int64_t nb = k / QK_K;
|
| 2200 |
|
|
@@ -2222,7 +2222,7 @@ void dequantize_row_iq2_xxs(const block_iq2_xxs * restrict x, float * restrict y
|
|
| 2222 |
|
| 2223 |
// ====================== 2.3125 bpw (de)-quantization
|
| 2224 |
|
| 2225 |
-
void dequantize_row_iq2_xs(const block_iq2_xs *
|
| 2226 |
assert(k % QK_K == 0);
|
| 2227 |
const int64_t nb = k / QK_K;
|
| 2228 |
|
|
@@ -2249,7 +2249,7 @@ void dequantize_row_iq2_xs(const block_iq2_xs * restrict x, float * restrict y,
|
|
| 2249 |
|
| 2250 |
// ====================== 2.5625 bpw (de)-quantization
|
| 2251 |
|
| 2252 |
-
void dequantize_row_iq2_s(const block_iq2_s *
|
| 2253 |
assert(k % QK_K == 0);
|
| 2254 |
const int64_t nb = k / QK_K;
|
| 2255 |
|
|
@@ -2281,7 +2281,7 @@ void dequantize_row_iq2_s(const block_iq2_s * restrict x, float * restrict y, in
|
|
| 2281 |
|
| 2282 |
// ====================== 3.0625 bpw (de)-quantization
|
| 2283 |
|
| 2284 |
-
void dequantize_row_iq3_xxs(const block_iq3_xxs *
|
| 2285 |
assert(k % QK_K == 0);
|
| 2286 |
const int64_t nb = k / QK_K;
|
| 2287 |
|
|
@@ -2313,7 +2313,7 @@ void dequantize_row_iq3_xxs(const block_iq3_xxs * restrict x, float * restrict y
|
|
| 2313 |
|
| 2314 |
// ====================== 3.3125 bpw (de)-quantization
|
| 2315 |
|
| 2316 |
-
void dequantize_row_iq3_s(const block_iq3_s *
|
| 2317 |
assert(k % QK_K == 0);
|
| 2318 |
const int64_t nb = k / QK_K;
|
| 2319 |
|
|
@@ -2356,7 +2356,7 @@ void dequantize_row_iq3_s(const block_iq3_s * restrict x, float * restrict y, in
|
|
| 2356 |
|
| 2357 |
// ====================== 1.5625 bpw (de)-quantization
|
| 2358 |
|
| 2359 |
-
void dequantize_row_iq1_s(const block_iq1_s *
|
| 2360 |
assert(k % QK_K == 0);
|
| 2361 |
const int64_t nb = k / QK_K;
|
| 2362 |
|
|
@@ -2381,7 +2381,7 @@ void dequantize_row_iq1_s(const block_iq1_s * restrict x, float * restrict y, in
|
|
| 2381 |
}
|
| 2382 |
}
|
| 2383 |
|
| 2384 |
-
void dequantize_row_iq1_m(const block_iq1_m *
|
| 2385 |
assert(k % QK_K == 0);
|
| 2386 |
const int64_t nb = k / QK_K;
|
| 2387 |
|
|
@@ -2433,7 +2433,7 @@ void dequantize_row_iq1_m(const block_iq1_m * restrict x, float * restrict y, in
|
|
| 2433 |
|
| 2434 |
static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
|
| 2435 |
|
| 2436 |
-
void dequantize_row_iq4_nl(const block_iq4_nl *
|
| 2437 |
assert(k % QK4_NL == 0);
|
| 2438 |
const int64_t nb = k / QK4_NL;
|
| 2439 |
|
|
@@ -2451,7 +2451,7 @@ void dequantize_row_iq4_nl(const block_iq4_nl * restrict x, float * restrict y,
|
|
| 2451 |
}
|
| 2452 |
}
|
| 2453 |
|
| 2454 |
-
void dequantize_row_iq4_xs(const block_iq4_xs *
|
| 2455 |
assert(k % QK_K == 0);
|
| 2456 |
const int64_t nb = k / QK_K;
|
| 2457 |
|
|
@@ -2476,7 +2476,7 @@ void dequantize_row_iq4_xs(const block_iq4_xs * restrict x, float * restrict y,
|
|
| 2476 |
|
| 2477 |
//===================================== Q8_K ==============================================
|
| 2478 |
|
| 2479 |
-
void quantize_row_q8_K_ref(const float *
|
| 2480 |
assert(k % QK_K == 0);
|
| 2481 |
const int64_t nb = k / QK_K;
|
| 2482 |
|
|
@@ -2515,7 +2515,7 @@ void quantize_row_q8_K_ref(const float * restrict x, block_q8_K * restrict y, in
|
|
| 2515 |
}
|
| 2516 |
}
|
| 2517 |
|
| 2518 |
-
void dequantize_row_q8_K(const block_q8_K *
|
| 2519 |
assert(k % QK_K == 0);
|
| 2520 |
const int64_t nb = k / QK_K;
|
| 2521 |
|
|
@@ -2927,8 +2927,8 @@ void iq2xs_free_impl(enum ggml_type type) {
|
|
| 2927 |
}
|
| 2928 |
}
|
| 2929 |
|
| 2930 |
-
static int iq2_find_best_neighbour(const uint16_t *
|
| 2931 |
-
const float *
|
| 2932 |
int num_neighbors = neighbours[0];
|
| 2933 |
GGML_ASSERT(num_neighbors > 0);
|
| 2934 |
float best_d2 = FLT_MAX;
|
|
@@ -2951,7 +2951,7 @@ static int iq2_find_best_neighbour(const uint16_t * restrict neighbours, const u
|
|
| 2951 |
return grid_index;
|
| 2952 |
}
|
| 2953 |
|
| 2954 |
-
static void quantize_row_iq2_xxs_impl(const float *
|
| 2955 |
|
| 2956 |
const int gindex = iq2_data_index(GGML_TYPE_IQ2_XXS);
|
| 2957 |
|
|
@@ -3124,7 +3124,7 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict
|
|
| 3124 |
}
|
| 3125 |
}
|
| 3126 |
|
| 3127 |
-
static void quantize_row_iq2_xs_impl(const float *
|
| 3128 |
|
| 3129 |
const int gindex = iq2_data_index(GGML_TYPE_IQ2_XS);
|
| 3130 |
|
|
@@ -3304,7 +3304,7 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
|
|
| 3304 |
}
|
| 3305 |
}
|
| 3306 |
|
| 3307 |
-
size_t quantize_iq2_xxs(const float *
|
| 3308 |
GGML_ASSERT(n_per_row%QK_K == 0);
|
| 3309 |
int64_t nblock = n_per_row/QK_K;
|
| 3310 |
char * qrow = (char *)dst;
|
|
@@ -3316,7 +3316,7 @@ size_t quantize_iq2_xxs(const float * restrict src, void * restrict dst, int64_t
|
|
| 3316 |
return nrow * nblock * sizeof(block_iq2_xxs);
|
| 3317 |
}
|
| 3318 |
|
| 3319 |
-
size_t quantize_iq2_xs(const float *
|
| 3320 |
GGML_ASSERT(n_per_row%QK_K == 0);
|
| 3321 |
int64_t nblock = n_per_row/QK_K;
|
| 3322 |
char * qrow = (char *)dst;
|
|
@@ -3521,8 +3521,8 @@ void iq3xs_free_impl(int grid_size) {
|
|
| 3521 |
}
|
| 3522 |
}
|
| 3523 |
|
| 3524 |
-
static int iq3_find_best_neighbour(const uint16_t *
|
| 3525 |
-
const float *
|
| 3526 |
int num_neighbors = neighbours[0];
|
| 3527 |
GGML_ASSERT(num_neighbors > 0);
|
| 3528 |
float best_d2 = FLT_MAX;
|
|
@@ -3545,8 +3545,8 @@ static int iq3_find_best_neighbour(const uint16_t * restrict neighbours, const u
|
|
| 3545 |
return grid_index;
|
| 3546 |
}
|
| 3547 |
|
| 3548 |
-
static void quantize_row_iq3_xxs_impl(int grid_size, const float *
|
| 3549 |
-
const float *
|
| 3550 |
|
| 3551 |
const int gindex = iq3_data_index(grid_size);
|
| 3552 |
|
|
@@ -3758,7 +3758,7 @@ static void quantize_row_iq3_xxs_impl(int grid_size, const float * restrict x, v
|
|
| 3758 |
}
|
| 3759 |
}
|
| 3760 |
|
| 3761 |
-
size_t quantize_iq3_xxs(const float *
|
| 3762 |
GGML_ASSERT(n_per_row%QK_K == 0);
|
| 3763 |
int64_t nblock = n_per_row/QK_K;
|
| 3764 |
char * qrow = (char *)dst;
|
|
@@ -3770,13 +3770,13 @@ size_t quantize_iq3_xxs(const float * restrict src, void * restrict dst, int64_t
|
|
| 3770 |
return nrow * nblock * sizeof(block_iq3_xxs);
|
| 3771 |
}
|
| 3772 |
|
| 3773 |
-
void quantize_row_iq3_xxs_ref(const float *
|
| 3774 |
assert(k % QK_K == 0);
|
| 3775 |
quantize_row_iq3_xxs_impl(256, x, y, k, NULL);
|
| 3776 |
}
|
| 3777 |
|
| 3778 |
-
static void quantize_row_iq3_s_impl(int block_size, const float *
|
| 3779 |
-
const float *
|
| 3780 |
float * scales,
|
| 3781 |
float * weight,
|
| 3782 |
float * xval,
|
|
@@ -3958,7 +3958,7 @@ static void quantize_row_iq3_s_impl(int block_size, const float * restrict x, vo
|
|
| 3958 |
}
|
| 3959 |
|
| 3960 |
#define IQ3S_BLOCK_SIZE 32
|
| 3961 |
-
size_t quantize_iq3_s(const float *
|
| 3962 |
GGML_ASSERT(n_per_row%QK_K == 0);
|
| 3963 |
int64_t nblock = n_per_row/QK_K;
|
| 3964 |
float scales[QK_K/IQ3S_BLOCK_SIZE];
|
|
@@ -3980,7 +3980,7 @@ size_t quantize_iq3_s(const float * restrict src, void * restrict dst, int64_t n
|
|
| 3980 |
return nrow * nblock * sizeof(block_iq3_s);
|
| 3981 |
}
|
| 3982 |
|
| 3983 |
-
void quantize_row_iq3_s_ref(const float *
|
| 3984 |
assert(k % QK_K == 0);
|
| 3985 |
quantize_iq3_s(x, y, 1, k, NULL);
|
| 3986 |
}
|
|
@@ -3988,8 +3988,8 @@ void quantize_row_iq3_s_ref(const float * restrict x, block_iq3_s * restrict y,
|
|
| 3988 |
|
| 3989 |
// =================================== 1.5 bpw ===================================================
|
| 3990 |
|
| 3991 |
-
static int iq1_find_best_neighbour(const uint16_t *
|
| 3992 |
-
const float *
|
| 3993 |
int num_neighbors = neighbours[0];
|
| 3994 |
GGML_ASSERT(num_neighbors > 0);
|
| 3995 |
float best_score = -FLT_MAX;
|
|
@@ -4048,8 +4048,8 @@ static int iq1_find_best_neighbour(const uint16_t * restrict neighbours, const u
|
|
| 4048 |
return grid_index;
|
| 4049 |
}
|
| 4050 |
|
| 4051 |
-
static int iq1_find_best_neighbour2(const uint16_t *
|
| 4052 |
-
const float *
|
| 4053 |
int num_neighbors = neighbours[0];
|
| 4054 |
GGML_ASSERT(num_neighbors > 0);
|
| 4055 |
float best_score = FLT_MAX;
|
|
@@ -4113,7 +4113,7 @@ static int iq1_sort_helper(const void * left, const void * right) {
|
|
| 4113 |
|
| 4114 |
#define IQ1S_BLOCK_SIZE 32
|
| 4115 |
#define IQ1M_BLOCK_SIZE 16
|
| 4116 |
-
static void quantize_row_iq1_s_impl(const float *
|
| 4117 |
float * scales,
|
| 4118 |
float * weight,
|
| 4119 |
float * sumx,
|
|
@@ -4271,7 +4271,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
|
|
| 4271 |
}
|
| 4272 |
}
|
| 4273 |
|
| 4274 |
-
size_t quantize_iq1_s(const float *
|
| 4275 |
GGML_ASSERT(n_per_row%QK_K == 0);
|
| 4276 |
float scales[QK_K/IQ1S_BLOCK_SIZE];
|
| 4277 |
float weight[IQ1S_BLOCK_SIZE];
|
|
@@ -4291,7 +4291,7 @@ size_t quantize_iq1_s(const float * restrict src, void * restrict dst, int64_t n
|
|
| 4291 |
return nrow * nblock * sizeof(block_iq1_s);
|
| 4292 |
}
|
| 4293 |
|
| 4294 |
-
static void quantize_row_iq1_m_impl(const float *
|
| 4295 |
float * scales,
|
| 4296 |
float * weight,
|
| 4297 |
float * pairs,
|
|
@@ -4539,7 +4539,7 @@ static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy
|
|
| 4539 |
}
|
| 4540 |
}
|
| 4541 |
|
| 4542 |
-
size_t quantize_iq1_m(const float *
|
| 4543 |
GGML_ASSERT(n_per_row%QK_K == 0);
|
| 4544 |
float scales[QK_K/IQ1M_BLOCK_SIZE];
|
| 4545 |
float weight[IQ1M_BLOCK_SIZE];
|
|
@@ -4570,7 +4570,7 @@ static inline int best_index_int8(int n, const int8_t * val, float x) {
|
|
| 4570 |
return x - val[mu-1] < val[mu] - x ? mu-1 : mu;
|
| 4571 |
}
|
| 4572 |
|
| 4573 |
-
static void quantize_row_iq4_nl_impl(const int super_block_size, const int block_size, const float *
|
| 4574 |
ggml_fp16_t * dh, uint8_t * q4, uint16_t * scales_h, uint8_t * scales_l,
|
| 4575 |
float * scales, float * weight, uint8_t * L,
|
| 4576 |
const int8_t * values,
|
|
@@ -4681,7 +4681,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
|
|
| 4681 |
}
|
| 4682 |
}
|
| 4683 |
|
| 4684 |
-
size_t quantize_iq4_nl(const float *
|
| 4685 |
GGML_ASSERT(n_per_row%QK4_NL == 0);
|
| 4686 |
int64_t nblock = n_per_row/QK4_NL;
|
| 4687 |
char * qrow = (char *)dst;
|
|
@@ -4703,8 +4703,8 @@ size_t quantize_iq4_nl(const float * restrict src, void * restrict dst, int64_t
|
|
| 4703 |
return nrow * nblock * sizeof(block_iq4_nl);
|
| 4704 |
}
|
| 4705 |
|
| 4706 |
-
//void quantize_row_iq4_nl_ref(const float *
|
| 4707 |
-
void quantize_row_iq4_nl_ref(const float *
|
| 4708 |
GGML_ASSERT(k%QK4_NL == 0);
|
| 4709 |
int64_t nblock = k/QK4_NL;
|
| 4710 |
uint8_t L[QK4_NL];
|
|
@@ -4719,7 +4719,7 @@ void quantize_row_iq4_nl_ref(const float * restrict x, block_iq4_nl * restrict y
|
|
| 4719 |
}
|
| 4720 |
}
|
| 4721 |
|
| 4722 |
-
size_t quantize_iq4_xs(const float *
|
| 4723 |
GGML_ASSERT(n_per_row%QK_K == 0);
|
| 4724 |
int64_t nblock = n_per_row/QK_K;
|
| 4725 |
char * qrow = (char *)dst;
|
|
@@ -4739,14 +4739,14 @@ size_t quantize_iq4_xs(const float * restrict src, void * restrict dst, int64_t
|
|
| 4739 |
return nrow * nblock * sizeof(block_iq4_xs);
|
| 4740 |
}
|
| 4741 |
|
| 4742 |
-
void quantize_row_iq4_xs_ref(const float *
|
| 4743 |
assert(k % QK_K == 0);
|
| 4744 |
quantize_iq4_xs(x, y, 1, k, NULL);
|
| 4745 |
}
|
| 4746 |
|
| 4747 |
// =============================== 2.5625 bpw
|
| 4748 |
|
| 4749 |
-
static void quantize_row_iq2_s_impl(const float *
|
| 4750 |
|
| 4751 |
const int gindex = iq2_data_index(GGML_TYPE_IQ2_S);
|
| 4752 |
|
|
@@ -4914,7 +4914,7 @@ static void quantize_row_iq2_s_impl(const float * restrict x, void * restrict vy
|
|
| 4914 |
}
|
| 4915 |
}
|
| 4916 |
|
| 4917 |
-
size_t quantize_iq2_s(const float *
|
| 4918 |
GGML_ASSERT(n_per_row%QK_K == 0);
|
| 4919 |
int64_t nblock = n_per_row/QK_K;
|
| 4920 |
char * qrow = (char *)dst;
|
|
@@ -4926,7 +4926,7 @@ size_t quantize_iq2_s(const float * restrict src, void * restrict dst, int64_t n
|
|
| 4926 |
return nrow * nblock * sizeof(block_iq2_s);
|
| 4927 |
}
|
| 4928 |
|
| 4929 |
-
void quantize_row_iq2_s_ref(const float *
|
| 4930 |
assert(k % QK_K == 0);
|
| 4931 |
quantize_iq2_s(x, y, 1, k, NULL);
|
| 4932 |
}
|
|
|
|
| 28 |
#define UNUSED GGML_UNUSED
|
| 29 |
|
| 30 |
// reference implementation for deterministic creation of model files
|
| 31 |
+
void quantize_row_q4_0_ref(const float * GGML_RESTRICT x, block_q4_0 * GGML_RESTRICT y, int64_t k) {
|
| 32 |
static const int qk = QK4_0;
|
| 33 |
|
| 34 |
assert(k % qk == 0);
|
|
|
|
| 65 |
}
|
| 66 |
}
|
| 67 |
|
| 68 |
+
void quantize_row_q4_1_ref(const float * GGML_RESTRICT x, block_q4_1 * GGML_RESTRICT y, int64_t k) {
|
| 69 |
const int qk = QK4_1;
|
| 70 |
|
| 71 |
assert(k % qk == 0);
|
|
|
|
| 102 |
}
|
| 103 |
}
|
| 104 |
|
| 105 |
+
void quantize_row_q5_0_ref(const float * GGML_RESTRICT x, block_q5_0 * GGML_RESTRICT y, int64_t k) {
|
| 106 |
static const int qk = QK5_0;
|
| 107 |
|
| 108 |
assert(k % qk == 0);
|
|
|
|
| 146 |
}
|
| 147 |
}
|
| 148 |
|
| 149 |
+
void quantize_row_q5_1_ref(const float * GGML_RESTRICT x, block_q5_1 * GGML_RESTRICT y, int64_t k) {
|
| 150 |
const int qk = QK5_1;
|
| 151 |
|
| 152 |
assert(k % qk == 0);
|
|
|
|
| 191 |
}
|
| 192 |
|
| 193 |
// reference implementation for deterministic creation of model files
|
| 194 |
+
void quantize_row_q8_0_ref(const float * GGML_RESTRICT x, block_q8_0 * GGML_RESTRICT y, int64_t k) {
|
| 195 |
assert(k % QK8_0 == 0);
|
| 196 |
const int nb = k / QK8_0;
|
| 197 |
|
|
|
|
| 217 |
}
|
| 218 |
|
| 219 |
// reference implementation for deterministic creation of model files
|
| 220 |
+
void quantize_row_q8_1_ref(const float * GGML_RESTRICT x, block_q8_1 * GGML_RESTRICT y, int64_t k) {
|
| 221 |
assert(QK8_1 == 32);
|
| 222 |
assert(k % QK8_1 == 0);
|
| 223 |
const int nb = k / QK8_1;
|
|
|
|
| 252 |
}
|
| 253 |
}
|
| 254 |
|
| 255 |
+
void dequantize_row_q4_0(const block_q4_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
| 256 |
static const int qk = QK4_0;
|
| 257 |
|
| 258 |
assert(k % qk == 0);
|
|
|
|
| 272 |
}
|
| 273 |
}
|
| 274 |
|
| 275 |
+
void dequantize_row_q4_1(const block_q4_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
| 276 |
static const int qk = QK4_1;
|
| 277 |
|
| 278 |
assert(k % qk == 0);
|
|
|
|
| 293 |
}
|
| 294 |
}
|
| 295 |
|
| 296 |
+
void dequantize_row_q5_0(const block_q5_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
| 297 |
static const int qk = QK5_0;
|
| 298 |
|
| 299 |
assert(k % qk == 0);
|
|
|
|
| 319 |
}
|
| 320 |
}
|
| 321 |
|
| 322 |
+
void dequantize_row_q5_1(const block_q5_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
| 323 |
static const int qk = QK5_1;
|
| 324 |
|
| 325 |
assert(k % qk == 0);
|
|
|
|
| 346 |
}
|
| 347 |
}
|
| 348 |
|
| 349 |
+
void dequantize_row_q8_0(const block_q8_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
| 350 |
static const int qk = QK8_0;
|
| 351 |
|
| 352 |
assert(k % qk == 0);
|
|
|
|
| 376 |
return (i & 0x007fffff) - 0x00400000;
|
| 377 |
}
|
| 378 |
|
| 379 |
+
static float make_qx_quants(int n, int nmax, const float * GGML_RESTRICT x, int8_t * GGML_RESTRICT L, int rmse_type,
|
| 380 |
+
const float * GGML_RESTRICT qw) {
|
| 381 |
float max = 0;
|
| 382 |
float amax = 0;
|
| 383 |
for (int i = 0; i < n; ++i) {
|
|
|
|
| 445 |
return scale;
|
| 446 |
}
|
| 447 |
|
| 448 |
+
static float make_q3_quants(int n, int nmax, const float * GGML_RESTRICT x, int8_t * GGML_RESTRICT L, bool do_rmse) {
|
| 449 |
float max = 0;
|
| 450 |
float amax = 0;
|
| 451 |
for (int i = 0; i < n; ++i) {
|
|
|
|
| 504 |
return 1/iscale;
|
| 505 |
}
|
| 506 |
|
| 507 |
+
static float make_qkx1_quants(int n, int nmax, const float * GGML_RESTRICT x, uint8_t * GGML_RESTRICT L, float * GGML_RESTRICT the_min,
|
| 508 |
int ntry, float alpha) {
|
| 509 |
float min = x[0];
|
| 510 |
float max = x[0];
|
|
|
|
| 547 |
return scale;
|
| 548 |
}
|
| 549 |
|
| 550 |
+
static float make_qkx2_quants(int n, int nmax, const float * GGML_RESTRICT x, const float * GGML_RESTRICT weights,
|
| 551 |
+
uint8_t * GGML_RESTRICT L, float * GGML_RESTRICT the_min, uint8_t * GGML_RESTRICT Laux,
|
| 552 |
float rmin, float rdelta, int nstep, bool use_mad) {
|
| 553 |
float min = x[0];
|
| 554 |
float max = x[0];
|
|
|
|
| 628 |
return scale;
|
| 629 |
}
|
| 630 |
|
| 631 |
+
static inline void get_scale_min_k4(int j, const uint8_t * GGML_RESTRICT q, uint8_t * GGML_RESTRICT d, uint8_t * GGML_RESTRICT m) {
|
| 632 |
if (j < 4) {
|
| 633 |
*d = q[j] & 63; *m = q[j + 4] & 63;
|
| 634 |
} else {
|
|
|
|
| 639 |
|
| 640 |
//========================- 2-bit (de)-quantization
|
| 641 |
|
| 642 |
+
void quantize_row_q2_K_ref(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int64_t k) {
|
| 643 |
assert(k % QK_K == 0);
|
| 644 |
const int nb = k / QK_K;
|
| 645 |
|
|
|
|
| 709 |
}
|
| 710 |
}
|
| 711 |
|
| 712 |
+
void dequantize_row_q2_K(const block_q2_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
| 713 |
assert(k % QK_K == 0);
|
| 714 |
const int nb = k / QK_K;
|
| 715 |
|
|
|
|
| 741 |
}
|
| 742 |
}
|
| 743 |
|
| 744 |
+
static float make_qkx3_quants(int n, int nmax, const float * GGML_RESTRICT x, const float * GGML_RESTRICT weights,
|
| 745 |
+
uint8_t * GGML_RESTRICT L, float * GGML_RESTRICT the_min, uint8_t * GGML_RESTRICT Laux,
|
| 746 |
float rmin, float rdelta, int nstep, bool use_mad) {
|
| 747 |
float min = x[0];
|
| 748 |
float max = x[0];
|
|
|
|
| 824 |
return scale;
|
| 825 |
}
|
| 826 |
|
| 827 |
+
static float make_qp_quants(int n, int nmax, const float * GGML_RESTRICT x, uint8_t * GGML_RESTRICT L, const float * quant_weights) {
|
| 828 |
float max = 0;
|
| 829 |
for (int i = 0; i < n; ++i) {
|
| 830 |
max = MAX(max, x[i]);
|
|
|
|
| 897 |
return sumlx/suml2;
|
| 898 |
}
|
| 899 |
|
| 900 |
+
static void quantize_row_q2_K_impl(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int k, const float * GGML_RESTRICT quant_weights) {
|
| 901 |
GGML_ASSERT(quant_weights);
|
| 902 |
assert(k % QK_K == 0);
|
| 903 |
const int nb = k / QK_K;
|
|
|
|
| 917 |
for (int j = 0; j < QK_K; ++j) sumx2 += x[j]*x[j];
|
| 918 |
float sigma2 = sumx2/QK_K;
|
| 919 |
for (int j = 0; j < QK_K/16; ++j) {
|
| 920 |
+
const float * GGML_RESTRICT qw = quant_weights + QK_K * i + 16*j;
|
| 921 |
for (int l = 0; l < 16; ++l) weight[l] = qw[l] * sqrtf(sigma2 + x[16*j + l]*x[16*j + l]);
|
| 922 |
for (int l = 0; l < QK_K/16; ++l) sw[j] += weight[l];
|
| 923 |
scales[j] = make_qkx3_quants(16, 3, x + 16*j, weight, L + 16*j, &mins[j], Laux, -0.9f, 0.05f, 36, false);
|
|
|
|
| 959 |
}
|
| 960 |
}
|
| 961 |
|
| 962 |
+
size_t quantize_q2_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
| 963 |
size_t row_size = ggml_row_size(GGML_TYPE_Q2_K, n_per_row);
|
| 964 |
if (!quant_weights) {
|
| 965 |
quantize_row_q2_K_ref(src, dst, (int64_t)nrow*n_per_row);
|
|
|
|
| 977 |
|
| 978 |
//========================= 3-bit (de)-quantization
|
| 979 |
|
| 980 |
+
void quantize_row_q3_K_ref(const float * GGML_RESTRICT x, block_q3_K * GGML_RESTRICT y, int64_t k) {
|
| 981 |
assert(k % QK_K == 0);
|
| 982 |
const int nb = k / QK_K;
|
| 983 |
|
|
|
|
| 1053 |
}
|
| 1054 |
}
|
| 1055 |
|
| 1056 |
+
void dequantize_row_q3_K(const block_q3_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
| 1057 |
assert(k % QK_K == 0);
|
| 1058 |
const int nb = k / QK_K;
|
| 1059 |
|
|
|
|
| 1067 |
|
| 1068 |
const float d_all = GGML_FP16_TO_FP32(x[i].d);
|
| 1069 |
|
| 1070 |
+
const uint8_t * GGML_RESTRICT q = x[i].qs;
|
| 1071 |
+
const uint8_t * GGML_RESTRICT hm = x[i].hmask;
|
| 1072 |
uint8_t m = 1;
|
| 1073 |
|
| 1074 |
memcpy(aux, x[i].scales, 12);
|
|
|
|
| 1103 |
}
|
| 1104 |
}
|
| 1105 |
|
| 1106 |
+
static void quantize_row_q3_K_impl(const float * GGML_RESTRICT x, block_q3_K * GGML_RESTRICT y, int64_t n_per_row, const float * GGML_RESTRICT quant_weights) {
|
| 1107 |
assert(n_per_row % QK_K == 0);
|
| 1108 |
const int nb = n_per_row / QK_K;
|
| 1109 |
|
|
|
|
| 1187 |
}
|
| 1188 |
}
|
| 1189 |
|
| 1190 |
+
size_t quantize_q3_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
| 1191 |
size_t row_size = ggml_row_size(GGML_TYPE_Q3_K, n_per_row);
|
| 1192 |
if (!quant_weights) {
|
| 1193 |
quantize_row_q3_K_ref(src, dst, (int64_t)nrow*n_per_row);
|
|
|
|
| 1205 |
|
| 1206 |
// ====================== 4-bit (de)-quantization
|
| 1207 |
|
| 1208 |
+
void quantize_row_q4_K_ref(const float * GGML_RESTRICT x, block_q4_K * GGML_RESTRICT y, int64_t k) {
|
| 1209 |
assert(k % QK_K == 0);
|
| 1210 |
const int nb = k / QK_K;
|
| 1211 |
|
|
|
|
| 1277 |
}
|
| 1278 |
}
|
| 1279 |
|
| 1280 |
+
void dequantize_row_q4_K(const block_q4_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
| 1281 |
assert(k % QK_K == 0);
|
| 1282 |
const int nb = k / QK_K;
|
| 1283 |
|
|
|
|
| 1301 |
}
|
| 1302 |
}
|
| 1303 |
|
| 1304 |
+
static void quantize_row_q4_K_impl(const float * GGML_RESTRICT x, block_q4_K * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
|
| 1305 |
assert(n_per_row % QK_K == 0);
|
| 1306 |
const int64_t nb = n_per_row / QK_K;
|
| 1307 |
|
|
|
|
| 1374 |
}
|
| 1375 |
}
|
| 1376 |
|
| 1377 |
+
size_t quantize_q4_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
| 1378 |
size_t row_size = ggml_row_size(GGML_TYPE_Q4_K, n_per_row);
|
| 1379 |
if (!quant_weights) {
|
| 1380 |
quantize_row_q4_K_ref(src, dst, (int64_t)nrow*n_per_row);
|
|
|
|
| 1392 |
|
| 1393 |
// ====================== 5-bit (de)-quantization
|
| 1394 |
|
| 1395 |
+
void quantize_row_q5_K_ref(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int64_t k) {
|
| 1396 |
assert(k % QK_K == 0);
|
| 1397 |
const int64_t nb = k / QK_K;
|
| 1398 |
|
|
|
|
| 1454 |
}
|
| 1455 |
}
|
| 1456 |
|
| 1457 |
+
uint8_t * GGML_RESTRICT qh = y[i].qh;
|
| 1458 |
+
uint8_t * GGML_RESTRICT ql = y[i].qs;
|
| 1459 |
memset(qh, 0, QK_K/8);
|
| 1460 |
|
| 1461 |
uint8_t m1 = 1, m2 = 2;
|
|
|
|
| 1479 |
}
|
| 1480 |
}
|
| 1481 |
|
| 1482 |
+
void dequantize_row_q5_K(const block_q5_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
| 1483 |
assert(k % QK_K == 0);
|
| 1484 |
const int64_t nb = k / QK_K;
|
| 1485 |
|
|
|
|
| 1506 |
}
|
| 1507 |
}
|
| 1508 |
|
| 1509 |
+
static void quantize_row_q5_K_impl(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
|
| 1510 |
assert(n_per_row % QK_K == 0);
|
| 1511 |
const int64_t nb = n_per_row / QK_K;
|
| 1512 |
|
|
|
|
| 1573 |
}
|
| 1574 |
}
|
| 1575 |
|
| 1576 |
+
uint8_t * GGML_RESTRICT qh = y[i].qh;
|
| 1577 |
+
uint8_t * GGML_RESTRICT ql = y[i].qs;
|
| 1578 |
memset(qh, 0, QK_K/8);
|
| 1579 |
|
| 1580 |
uint8_t m1 = 1, m2 = 2;
|
|
|
|
| 1599 |
}
|
| 1600 |
}
|
| 1601 |
|
| 1602 |
+
size_t quantize_q5_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
| 1603 |
size_t row_size = ggml_row_size(GGML_TYPE_Q5_K, n_per_row);
|
| 1604 |
if (!quant_weights) {
|
| 1605 |
quantize_row_q5_K_ref(src, dst, (int64_t)nrow*n_per_row);
|
|
|
|
| 1617 |
|
| 1618 |
// ====================== 6-bit (de)-quantization
|
| 1619 |
|
| 1620 |
+
void quantize_row_q6_K_ref(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int64_t k) {
|
| 1621 |
assert(k % QK_K == 0);
|
| 1622 |
const int64_t nb = k / QK_K;
|
| 1623 |
|
|
|
|
| 1667 |
}
|
| 1668 |
}
|
| 1669 |
|
| 1670 |
+
uint8_t * GGML_RESTRICT ql = y[i].ql;
|
| 1671 |
+
uint8_t * GGML_RESTRICT qh = y[i].qh;
|
| 1672 |
for (int j = 0; j < QK_K; j += 128) {
|
| 1673 |
for (int l = 0; l < 32; ++l) {
|
| 1674 |
const uint8_t q1 = L[j + l + 0] & 0xF;
|
|
|
|
| 1687 |
}
|
| 1688 |
}
|
| 1689 |
|
| 1690 |
+
void dequantize_row_q6_K(const block_q6_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
| 1691 |
assert(k % QK_K == 0);
|
| 1692 |
const int64_t nb = k / QK_K;
|
| 1693 |
|
| 1694 |
for (int i = 0; i < nb; i++) {
|
| 1695 |
const float d = GGML_FP16_TO_FP32(x[i].d);
|
| 1696 |
|
| 1697 |
+
const uint8_t * GGML_RESTRICT ql = x[i].ql;
|
| 1698 |
+
const uint8_t * GGML_RESTRICT qh = x[i].qh;
|
| 1699 |
+
const int8_t * GGML_RESTRICT sc = x[i].scales;
|
| 1700 |
|
| 1701 |
for (int n = 0; n < QK_K; n += 128) {
|
| 1702 |
for (int l = 0; l < 32; ++l) {
|
|
|
|
| 1718 |
}
|
| 1719 |
}
|
| 1720 |
|
| 1721 |
+
static void quantize_row_q6_K_impl(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
|
| 1722 |
assert(n_per_row % QK_K == 0);
|
| 1723 |
const int64_t nb = n_per_row / QK_K;
|
| 1724 |
|
|
|
|
| 1781 |
}
|
| 1782 |
}
|
| 1783 |
|
| 1784 |
+
uint8_t * GGML_RESTRICT ql = y[i].ql;
|
| 1785 |
+
uint8_t * GGML_RESTRICT qh = y[i].qh;
|
| 1786 |
for (int j = 0; j < QK_K; j += 128) {
|
| 1787 |
for (int l = 0; l < 32; ++l) {
|
| 1788 |
const uint8_t q1 = L[j + l + 0] & 0xF;
|
|
|
|
| 1802 |
}
|
| 1803 |
}
|
| 1804 |
|
| 1805 |
+
size_t quantize_q6_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
| 1806 |
size_t row_size = ggml_row_size(GGML_TYPE_Q6_K, n_per_row);
|
| 1807 |
if (!quant_weights) {
|
| 1808 |
quantize_row_q6_K_ref(src, dst, (int64_t)nrow*n_per_row);
|
|
|
|
| 1818 |
return nrow * row_size;
|
| 1819 |
}
|
| 1820 |
|
| 1821 |
+
static void quantize_row_q4_0_impl(const float * GGML_RESTRICT x, block_q4_0 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
|
| 1822 |
static_assert(QK4_0 == 32, "QK4_0 must be 32");
|
| 1823 |
|
| 1824 |
if (!quant_weights) {
|
|
|
|
| 1846 |
}
|
| 1847 |
}
|
| 1848 |
|
| 1849 |
+
size_t quantize_q4_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
| 1850 |
if (!quant_weights) {
|
| 1851 |
quantize_row_q4_0_ref(src, dst, (int64_t)nrow*n_per_row);
|
| 1852 |
return nrow * ggml_row_size(GGML_TYPE_Q4_0, n_per_row);
|
|
|
|
| 1861 |
return nrow * row_size;
|
| 1862 |
}
|
| 1863 |
|
| 1864 |
+
static void quantize_row_q4_1_impl(const float * GGML_RESTRICT x, block_q4_1 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
|
| 1865 |
static_assert(QK4_1 == 32, "QK4_1 must be 32");
|
| 1866 |
|
| 1867 |
if (!quant_weights) {
|
|
|
|
| 1891 |
}
|
| 1892 |
}
|
| 1893 |
|
| 1894 |
+
size_t quantize_q4_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
| 1895 |
if (!quant_weights) {
|
| 1896 |
quantize_row_q4_1_ref(src, dst, (int64_t)nrow*n_per_row);
|
| 1897 |
return nrow * ggml_row_size(GGML_TYPE_Q4_1, n_per_row);
|
|
|
|
| 1906 |
return nrow * row_size;
|
| 1907 |
}
|
| 1908 |
|
| 1909 |
+
static void quantize_row_q5_0_impl(const float * GGML_RESTRICT x, block_q5_0 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
|
| 1910 |
static_assert(QK5_0 == 32, "QK5_0 must be 32");
|
| 1911 |
|
| 1912 |
if (!quant_weights) {
|
|
|
|
| 1945 |
}
|
| 1946 |
}
|
| 1947 |
|
| 1948 |
+
size_t quantize_q5_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
| 1949 |
if (!quant_weights) {
|
| 1950 |
quantize_row_q5_0_ref(src, dst, (int64_t)nrow*n_per_row);
|
| 1951 |
return nrow * ggml_row_size(GGML_TYPE_Q5_0, n_per_row);
|
|
|
|
| 1960 |
return nrow * row_size;
|
| 1961 |
}
|
| 1962 |
|
| 1963 |
+
static void quantize_row_q5_1_impl(const float * GGML_RESTRICT x, block_q5_1 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
|
| 1964 |
static_assert(QK5_1 == 32, "QK5_1 must be 32");
|
| 1965 |
|
| 1966 |
if (!quant_weights) {
|
|
|
|
| 1998 |
}
|
| 1999 |
}
|
| 2000 |
|
| 2001 |
+
size_t quantize_q5_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
| 2002 |
if (!quant_weights) {
|
| 2003 |
quantize_row_q5_1_ref(src, dst, (int64_t)nrow*n_per_row);
|
| 2004 |
return nrow * ggml_row_size(GGML_TYPE_Q5_1, n_per_row);
|
|
|
|
| 2013 |
return nrow * row_size;
|
| 2014 |
}
|
| 2015 |
|
| 2016 |
+
size_t quantize_q8_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
| 2017 |
(void)quant_weights; // not used
|
| 2018 |
const size_t row_size = ggml_row_size(GGML_TYPE_Q8_0, n_per_row);
|
| 2019 |
quantize_row_q8_0_ref(src, dst, (int64_t)nrow*n_per_row);
|
|
|
|
| 2022 |
|
| 2023 |
// ====================== Ternary (de)-quantization (BitNet b1.58 and TriLMs)
|
| 2024 |
|
| 2025 |
+
void quantize_row_tq1_0_ref(const float * GGML_RESTRICT x, block_tq1_0 * GGML_RESTRICT y, int64_t k) {
|
| 2026 |
assert(k % QK_K == 0);
|
| 2027 |
const int64_t nb = k / QK_K;
|
| 2028 |
|
|
|
|
| 2088 |
}
|
| 2089 |
}
|
| 2090 |
|
| 2091 |
+
void quantize_row_tq2_0_ref(const float * GGML_RESTRICT x, block_tq2_0 * GGML_RESTRICT y, int64_t k) {
|
| 2092 |
assert(k % QK_K == 0);
|
| 2093 |
const int64_t nb = k / QK_K;
|
| 2094 |
|
|
|
|
| 2120 |
}
|
| 2121 |
}
|
| 2122 |
|
| 2123 |
+
size_t quantize_tq1_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
| 2124 |
(void)quant_weights; // not used
|
| 2125 |
const size_t row_size = ggml_row_size(GGML_TYPE_TQ1_0, n_per_row);
|
| 2126 |
quantize_row_tq1_0_ref(src, dst, (int64_t)nrow*n_per_row);
|
| 2127 |
return nrow * row_size;
|
| 2128 |
}
|
| 2129 |
|
| 2130 |
+
size_t quantize_tq2_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
| 2131 |
(void)quant_weights; // not used
|
| 2132 |
const size_t row_size = ggml_row_size(GGML_TYPE_TQ2_0, n_per_row);
|
| 2133 |
quantize_row_tq2_0_ref(src, dst, (int64_t)nrow*n_per_row);
|
| 2134 |
return nrow * row_size;
|
| 2135 |
}
|
| 2136 |
|
| 2137 |
+
void dequantize_row_tq1_0(const block_tq1_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
| 2138 |
assert(k % QK_K == 0);
|
| 2139 |
const int64_t nb = k / QK_K;
|
| 2140 |
|
|
|
|
| 2173 |
}
|
| 2174 |
}
|
| 2175 |
|
| 2176 |
+
void dequantize_row_tq2_0(const block_tq2_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
| 2177 |
assert(k % QK_K == 0);
|
| 2178 |
const int64_t nb = k / QK_K;
|
| 2179 |
|
|
|
|
| 2194 |
|
| 2195 |
// ====================== "True" 2-bit (de)-quantization
|
| 2196 |
|
| 2197 |
+
void dequantize_row_iq2_xxs(const block_iq2_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
| 2198 |
assert(k % QK_K == 0);
|
| 2199 |
const int64_t nb = k / QK_K;
|
| 2200 |
|
|
|
|
| 2222 |
|
| 2223 |
// ====================== 2.3125 bpw (de)-quantization
|
| 2224 |
|
| 2225 |
+
void dequantize_row_iq2_xs(const block_iq2_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
| 2226 |
assert(k % QK_K == 0);
|
| 2227 |
const int64_t nb = k / QK_K;
|
| 2228 |
|
|
|
|
| 2249 |
|
| 2250 |
// ====================== 2.5625 bpw (de)-quantization
|
| 2251 |
|
| 2252 |
+
void dequantize_row_iq2_s(const block_iq2_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
| 2253 |
assert(k % QK_K == 0);
|
| 2254 |
const int64_t nb = k / QK_K;
|
| 2255 |
|
|
|
|
| 2281 |
|
| 2282 |
// ====================== 3.0625 bpw (de)-quantization
|
| 2283 |
|
| 2284 |
+
void dequantize_row_iq3_xxs(const block_iq3_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
| 2285 |
assert(k % QK_K == 0);
|
| 2286 |
const int64_t nb = k / QK_K;
|
| 2287 |
|
|
|
|
| 2313 |
|
| 2314 |
// ====================== 3.3125 bpw (de)-quantization
|
| 2315 |
|
| 2316 |
+
void dequantize_row_iq3_s(const block_iq3_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
| 2317 |
assert(k % QK_K == 0);
|
| 2318 |
const int64_t nb = k / QK_K;
|
| 2319 |
|
|
|
|
| 2356 |
|
| 2357 |
// ====================== 1.5625 bpw (de)-quantization
|
| 2358 |
|
| 2359 |
+
void dequantize_row_iq1_s(const block_iq1_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
| 2360 |
assert(k % QK_K == 0);
|
| 2361 |
const int64_t nb = k / QK_K;
|
| 2362 |
|
|
|
|
| 2381 |
}
|
| 2382 |
}
|
| 2383 |
|
| 2384 |
+
void dequantize_row_iq1_m(const block_iq1_m * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
| 2385 |
assert(k % QK_K == 0);
|
| 2386 |
const int64_t nb = k / QK_K;
|
| 2387 |
|
|
|
|
| 2433 |
|
| 2434 |
static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
|
| 2435 |
|
| 2436 |
+
void dequantize_row_iq4_nl(const block_iq4_nl * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
| 2437 |
assert(k % QK4_NL == 0);
|
| 2438 |
const int64_t nb = k / QK4_NL;
|
| 2439 |
|
|
|
|
| 2451 |
}
|
| 2452 |
}
|
| 2453 |
|
| 2454 |
+
void dequantize_row_iq4_xs(const block_iq4_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
| 2455 |
assert(k % QK_K == 0);
|
| 2456 |
const int64_t nb = k / QK_K;
|
| 2457 |
|
|
|
|
| 2476 |
|
| 2477 |
//===================================== Q8_K ==============================================
|
| 2478 |
|
| 2479 |
+
void quantize_row_q8_K_ref(const float * GGML_RESTRICT x, block_q8_K * GGML_RESTRICT y, int64_t k) {
|
| 2480 |
assert(k % QK_K == 0);
|
| 2481 |
const int64_t nb = k / QK_K;
|
| 2482 |
|
|
|
|
| 2515 |
}
|
| 2516 |
}
|
| 2517 |
|
| 2518 |
+
void dequantize_row_q8_K(const block_q8_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
|
| 2519 |
assert(k % QK_K == 0);
|
| 2520 |
const int64_t nb = k / QK_K;
|
| 2521 |
|
|
|
|
| 2927 |
}
|
| 2928 |
}
|
| 2929 |
|
| 2930 |
+
static int iq2_find_best_neighbour(const uint16_t * GGML_RESTRICT neighbours, const uint64_t * GGML_RESTRICT grid,
|
| 2931 |
+
const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float scale, int8_t * GGML_RESTRICT L) {
|
| 2932 |
int num_neighbors = neighbours[0];
|
| 2933 |
GGML_ASSERT(num_neighbors > 0);
|
| 2934 |
float best_d2 = FLT_MAX;
|
|
|
|
| 2951 |
return grid_index;
|
| 2952 |
}
|
| 2953 |
|
| 2954 |
+
static void quantize_row_iq2_xxs_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights) {
|
| 2955 |
|
| 2956 |
const int gindex = iq2_data_index(GGML_TYPE_IQ2_XXS);
|
| 2957 |
|
|
|
|
| 3124 |
}
|
| 3125 |
}
|
| 3126 |
|
| 3127 |
+
static void quantize_row_iq2_xs_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights) {
|
| 3128 |
|
| 3129 |
const int gindex = iq2_data_index(GGML_TYPE_IQ2_XS);
|
| 3130 |
|
|
|
|
| 3304 |
}
|
| 3305 |
}
|
| 3306 |
|
| 3307 |
+
size_t quantize_iq2_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
| 3308 |
GGML_ASSERT(n_per_row%QK_K == 0);
|
| 3309 |
int64_t nblock = n_per_row/QK_K;
|
| 3310 |
char * qrow = (char *)dst;
|
|
|
|
| 3316 |
return nrow * nblock * sizeof(block_iq2_xxs);
|
| 3317 |
}
|
| 3318 |
|
| 3319 |
+
size_t quantize_iq2_xs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
| 3320 |
GGML_ASSERT(n_per_row%QK_K == 0);
|
| 3321 |
int64_t nblock = n_per_row/QK_K;
|
| 3322 |
char * qrow = (char *)dst;
|
|
|
|
| 3521 |
}
|
| 3522 |
}
|
| 3523 |
|
| 3524 |
+
static int iq3_find_best_neighbour(const uint16_t * GGML_RESTRICT neighbours, const uint32_t * GGML_RESTRICT grid,
|
| 3525 |
+
const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float scale, int8_t * GGML_RESTRICT L) {
|
| 3526 |
int num_neighbors = neighbours[0];
|
| 3527 |
GGML_ASSERT(num_neighbors > 0);
|
| 3528 |
float best_d2 = FLT_MAX;
|
|
|
|
| 3545 |
return grid_index;
|
| 3546 |
}
|
| 3547 |
|
| 3548 |
+
static void quantize_row_iq3_xxs_impl(int grid_size, const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n,
|
| 3549 |
+
const float * GGML_RESTRICT quant_weights) {
|
| 3550 |
|
| 3551 |
const int gindex = iq3_data_index(grid_size);
|
| 3552 |
|
|
|
|
| 3758 |
}
|
| 3759 |
}
|
| 3760 |
|
| 3761 |
+
size_t quantize_iq3_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
| 3762 |
GGML_ASSERT(n_per_row%QK_K == 0);
|
| 3763 |
int64_t nblock = n_per_row/QK_K;
|
| 3764 |
char * qrow = (char *)dst;
|
|
|
|
| 3770 |
return nrow * nblock * sizeof(block_iq3_xxs);
|
| 3771 |
}
|
| 3772 |
|
| 3773 |
+
void quantize_row_iq3_xxs_ref(const float * GGML_RESTRICT x, block_iq3_xxs * GGML_RESTRICT y, int64_t k) {
|
| 3774 |
assert(k % QK_K == 0);
|
| 3775 |
quantize_row_iq3_xxs_impl(256, x, y, k, NULL);
|
| 3776 |
}
|
| 3777 |
|
| 3778 |
+
static void quantize_row_iq3_s_impl(int block_size, const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int n,
|
| 3779 |
+
const float * GGML_RESTRICT quant_weights,
|
| 3780 |
float * scales,
|
| 3781 |
float * weight,
|
| 3782 |
float * xval,
|
|
|
|
| 3958 |
}
|
| 3959 |
|
| 3960 |
#define IQ3S_BLOCK_SIZE 32
|
| 3961 |
+
size_t quantize_iq3_s(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
| 3962 |
GGML_ASSERT(n_per_row%QK_K == 0);
|
| 3963 |
int64_t nblock = n_per_row/QK_K;
|
| 3964 |
float scales[QK_K/IQ3S_BLOCK_SIZE];
|
|
|
|
| 3980 |
return nrow * nblock * sizeof(block_iq3_s);
|
| 3981 |
}
|
| 3982 |
|
| 3983 |
+
void quantize_row_iq3_s_ref(const float * GGML_RESTRICT x, block_iq3_s * GGML_RESTRICT y, int64_t k) {
|
| 3984 |
assert(k % QK_K == 0);
|
| 3985 |
quantize_iq3_s(x, y, 1, k, NULL);
|
| 3986 |
}
|
|
|
|
| 3988 |
|
| 3989 |
// =================================== 1.5 bpw ===================================================
|
| 3990 |
|
| 3991 |
+
static int iq1_find_best_neighbour(const uint16_t * GGML_RESTRICT neighbours, const uint64_t * GGML_RESTRICT grid,
|
| 3992 |
+
const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float * scale, int8_t * GGML_RESTRICT L, int ngrid) {
|
| 3993 |
int num_neighbors = neighbours[0];
|
| 3994 |
GGML_ASSERT(num_neighbors > 0);
|
| 3995 |
float best_score = -FLT_MAX;
|
|
|
|
| 4048 |
return grid_index;
|
| 4049 |
}
|
| 4050 |
|
| 4051 |
+
static int iq1_find_best_neighbour2(const uint16_t * GGML_RESTRICT neighbours, const uint64_t * GGML_RESTRICT grid,
|
| 4052 |
+
const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float scale, const float * GGML_RESTRICT xg, int8_t * GGML_RESTRICT L, int ngrid) {
|
| 4053 |
int num_neighbors = neighbours[0];
|
| 4054 |
GGML_ASSERT(num_neighbors > 0);
|
| 4055 |
float best_score = FLT_MAX;
|
|
|
|
| 4113 |
|
| 4114 |
#define IQ1S_BLOCK_SIZE 32
|
| 4115 |
#define IQ1M_BLOCK_SIZE 16
|
| 4116 |
+
static void quantize_row_iq1_s_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights,
|
| 4117 |
float * scales,
|
| 4118 |
float * weight,
|
| 4119 |
float * sumx,
|
|
|
|
| 4271 |
}
|
| 4272 |
}
|
| 4273 |
|
| 4274 |
+
size_t quantize_iq1_s(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
| 4275 |
GGML_ASSERT(n_per_row%QK_K == 0);
|
| 4276 |
float scales[QK_K/IQ1S_BLOCK_SIZE];
|
| 4277 |
float weight[IQ1S_BLOCK_SIZE];
|
|
|
|
| 4291 |
return nrow * nblock * sizeof(block_iq1_s);
|
| 4292 |
}
|
| 4293 |
|
| 4294 |
+
static void quantize_row_iq1_m_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights,
|
| 4295 |
float * scales,
|
| 4296 |
float * weight,
|
| 4297 |
float * pairs,
|
|
|
|
| 4539 |
}
|
| 4540 |
}
|
| 4541 |
|
| 4542 |
+
size_t quantize_iq1_m(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
| 4543 |
GGML_ASSERT(n_per_row%QK_K == 0);
|
| 4544 |
float scales[QK_K/IQ1M_BLOCK_SIZE];
|
| 4545 |
float weight[IQ1M_BLOCK_SIZE];
|
|
|
|
| 4570 |
return x - val[mu-1] < val[mu] - x ? mu-1 : mu;
|
| 4571 |
}
|
| 4572 |
|
| 4573 |
+
static void quantize_row_iq4_nl_impl(const int super_block_size, const int block_size, const float * GGML_RESTRICT x,
|
| 4574 |
ggml_fp16_t * dh, uint8_t * q4, uint16_t * scales_h, uint8_t * scales_l,
|
| 4575 |
float * scales, float * weight, uint8_t * L,
|
| 4576 |
const int8_t * values,
|
|
|
|
| 4681 |
}
|
| 4682 |
}
|
| 4683 |
|
| 4684 |
+
size_t quantize_iq4_nl(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
| 4685 |
GGML_ASSERT(n_per_row%QK4_NL == 0);
|
| 4686 |
int64_t nblock = n_per_row/QK4_NL;
|
| 4687 |
char * qrow = (char *)dst;
|
|
|
|
| 4703 |
return nrow * nblock * sizeof(block_iq4_nl);
|
| 4704 |
}
|
| 4705 |
|
| 4706 |
+
//void quantize_row_iq4_nl_ref(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
|
| 4707 |
+
void quantize_row_iq4_nl_ref(const float * GGML_RESTRICT x, block_iq4_nl * GGML_RESTRICT y, int64_t k) {
|
| 4708 |
GGML_ASSERT(k%QK4_NL == 0);
|
| 4709 |
int64_t nblock = k/QK4_NL;
|
| 4710 |
uint8_t L[QK4_NL];
|
|
|
|
| 4719 |
}
|
| 4720 |
}
|
| 4721 |
|
| 4722 |
+
size_t quantize_iq4_xs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
| 4723 |
GGML_ASSERT(n_per_row%QK_K == 0);
|
| 4724 |
int64_t nblock = n_per_row/QK_K;
|
| 4725 |
char * qrow = (char *)dst;
|
|
|
|
| 4739 |
return nrow * nblock * sizeof(block_iq4_xs);
|
| 4740 |
}
|
| 4741 |
|
| 4742 |
+
void quantize_row_iq4_xs_ref(const float * GGML_RESTRICT x, block_iq4_xs * GGML_RESTRICT y, int64_t k) {
|
| 4743 |
assert(k % QK_K == 0);
|
| 4744 |
quantize_iq4_xs(x, y, 1, k, NULL);
|
| 4745 |
}
|
| 4746 |
|
| 4747 |
// =============================== 2.5625 bpw
|
| 4748 |
|
| 4749 |
+
static void quantize_row_iq2_s_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights) {
|
| 4750 |
|
| 4751 |
const int gindex = iq2_data_index(GGML_TYPE_IQ2_S);
|
| 4752 |
|
|
|
|
| 4914 |
}
|
| 4915 |
}
|
| 4916 |
|
| 4917 |
+
size_t quantize_iq2_s(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
|
| 4918 |
GGML_ASSERT(n_per_row%QK_K == 0);
|
| 4919 |
int64_t nblock = n_per_row/QK_K;
|
| 4920 |
char * qrow = (char *)dst;
|
|
|
|
| 4926 |
return nrow * nblock * sizeof(block_iq2_s);
|
| 4927 |
}
|
| 4928 |
|
| 4929 |
+
void quantize_row_iq2_s_ref(const float * GGML_RESTRICT x, block_iq2_s * GGML_RESTRICT y, int64_t k) {
|
| 4930 |
assert(k % QK_K == 0);
|
| 4931 |
quantize_iq2_s(x, y, 1, k, NULL);
|
| 4932 |
}
|
ggml/src/ggml.c
CHANGED
|
@@ -565,9 +565,9 @@ FILE * ggml_fopen(const char * fname, const char * mode) {
|
|
| 565 |
#endif
|
| 566 |
|
| 567 |
}
|
| 568 |
-
static void ggml_vec_dot_f32(int n, float *
|
| 569 |
-
static void ggml_vec_dot_f16(int n, float *
|
| 570 |
-
static void ggml_vec_dot_bf16(int n, float *
|
| 571 |
|
| 572 |
static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
| 573 |
[GGML_TYPE_I8] = {
|
|
|
|
| 565 |
#endif
|
| 566 |
|
| 567 |
}
|
| 568 |
+
static void ggml_vec_dot_f32(int n, float * GGML_RESTRICT s, size_t bs, const float * GGML_RESTRICT x, size_t bx, const float * GGML_RESTRICT y, size_t by, int nrc);
|
| 569 |
+
static void ggml_vec_dot_f16(int n, float * GGML_RESTRICT s, size_t bs, ggml_fp16_t * GGML_RESTRICT x, size_t bx, ggml_fp16_t * GGML_RESTRICT y, size_t by, int nrc);
|
| 570 |
+
static void ggml_vec_dot_bf16(int n, float * GGML_RESTRICT s, size_t bs, ggml_bf16_t * GGML_RESTRICT x, size_t bx, ggml_bf16_t * GGML_RESTRICT y, size_t by, int nrc);
|
| 571 |
|
| 572 |
static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
| 573 |
[GGML_TYPE_I8] = {
|