Spaces:
Running
Running
ggml : try fix 32-bit arm compat (#1938)
Browse files* ggml : try fix 32-bit arm compat
* ggml : fix cont
- ggml-quants.c +8 -7
ggml-quants.c
CHANGED
|
@@ -9758,8 +9758,8 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
| 9758 |
|
| 9759 |
static const uint8_t k_mask2[16] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,};
|
| 9760 |
|
| 9761 |
-
const
|
| 9762 |
-
const uint8x16_t
|
| 9763 |
const uint8x16_t m1 = vdupq_n_u8(1);
|
| 9764 |
const int32x4_t vzero = vdupq_n_s32(0);
|
| 9765 |
|
|
@@ -10103,11 +10103,12 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const v
|
|
| 10103 |
|
| 10104 |
static const int16_t k_shift[8] = {8, 7, 6, 5, 4, 3, 2, 1};
|
| 10105 |
|
| 10106 |
-
const
|
| 10107 |
-
const uint8x16_t
|
| 10108 |
-
|
| 10109 |
-
const
|
| 10110 |
-
const
|
|
|
|
| 10111 |
|
| 10112 |
uint8x16x2_t vs;
|
| 10113 |
ggml_int8x16x4_t q3s;
|
|
|
|
| 9758 |
|
| 9759 |
static const uint8_t k_mask2[16] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,};
|
| 9760 |
|
| 9761 |
+
const ggml_uint8x16x2_t mask1 = ggml_vld1q_u8_x2(k_mask1);
|
| 9762 |
+
const uint8x16_t mask2 = vld1q_u8(k_mask2);
|
| 9763 |
const uint8x16_t m1 = vdupq_n_u8(1);
|
| 9764 |
const int32x4_t vzero = vdupq_n_s32(0);
|
| 9765 |
|
|
|
|
| 10103 |
|
| 10104 |
static const int16_t k_shift[8] = {8, 7, 6, 5, 4, 3, 2, 1};
|
| 10105 |
|
| 10106 |
+
const ggml_uint8x16x2_t mask1 = ggml_vld1q_u8_x2(k_mask1);
|
| 10107 |
+
const uint8x16_t mask2 = vld1q_u8(k_mask2);
|
| 10108 |
+
|
| 10109 |
+
const int16x8_t hshift = vld1q_s16(k_shift);
|
| 10110 |
+
const uint16x8_t m256 = vdupq_n_u16(256);
|
| 10111 |
+
const uint8x16_t m1 = vdupq_n_u8(1);
|
| 10112 |
|
| 10113 |
uint8x16x2_t vs;
|
| 10114 |
ggml_int8x16x4_t q3s;
|