taronaeo commited on
Commit
17c0dfa
·
1 Parent(s): c005248

ggml-cpu: reduce asm calls for hsum (llama/14037)

Browse files

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>

Files changed (1) hide show
  1. ggml/src/ggml-cpu/simd-mappings.h +2 -4
ggml/src/ggml-cpu/simd-mappings.h CHANGED
@@ -944,10 +944,8 @@ static inline void __lsx_f16x4_store(ggml_fp16_t * x, __m128 y) {
944
  for (int i = 0; i < offset; ++i) { \
945
  x[i] = vec_add(x[i], x[offset + i]); \
946
  } \
947
- res = vec_extract(x[0], 0) + \
948
- vec_extract(x[0], 1) + \
949
- vec_extract(x[0], 2) + \
950
- vec_extract(x[0], 3); \
951
  }
952
 
953
  #define GGML_F32_VEC GGML_F32x4
 
944
  for (int i = 0; i < offset; ++i) { \
945
  x[i] = vec_add(x[i], x[offset + i]); \
946
  } \
947
+ float32x4_t tmp = x[0] + vec_reve(x[0]); \
948
+ res = tmp[0] + tmp[1]; \
 
 
949
  }
950
 
951
  #define GGML_F32_VEC GGML_F32x4