Spaces:
Running
Running
ggml-cpu: reduce asm calls for hsum (llama/14037)
Browse filesSigned-off-by: Aaron Teo <aaron.teo1@ibm.com>
ggml/src/ggml-cpu/simd-mappings.h
CHANGED
|
@@ -944,10 +944,8 @@ static inline void __lsx_f16x4_store(ggml_fp16_t * x, __m128 y) {
|
|
| 944 |
for (int i = 0; i < offset; ++i) { \
|
| 945 |
x[i] = vec_add(x[i], x[offset + i]); \
|
| 946 |
} \
|
| 947 |
-
|
| 948 |
-
|
| 949 |
-
vec_extract(x[0], 2) + \
|
| 950 |
-
vec_extract(x[0], 3); \
|
| 951 |
}
|
| 952 |
|
| 953 |
#define GGML_F32_VEC GGML_F32x4
|
|
|
|
| 944 |
for (int i = 0; i < offset; ++i) { \
|
| 945 |
x[i] = vec_add(x[i], x[offset + i]); \
|
| 946 |
} \
|
| 947 |
+
float32x4_t tmp = x[0] + vec_reve(x[0]); \
|
| 948 |
+
res = tmp[0] + tmp[1]; \
|
|
|
|
|
|
|
| 949 |
}
|
| 950 |
|
| 951 |
#define GGML_F32_VEC GGML_F32x4
|