ggerganov commited on
Commit
84b7b50
·
unverified ·
1 Parent(s): a3ee28a

Correct implementation of FP16 GELU

Browse files

Can toggle it via the GGML_GELU_FP16 macro

Files changed (1) hide show
  1. ggml.c +12 -10
ggml.c CHANGED
@@ -14,7 +14,6 @@
14
  #include <stdint.h>
15
  #include <stdio.h>
16
 
17
-
18
  #if defined _MSC_VER
19
  #include "msvc_thread_atomic.h"
20
  #else
@@ -24,6 +23,7 @@ typedef void* thread_ret_t;
24
  #endif
25
 
26
  #define GGML_DEBUG 0
 
27
 
28
  #if UINTPTR_MAX == 0xFFFFFFFF
29
  #define GGML_MEM_ALIGN 4
@@ -723,20 +723,22 @@ inline static void ggml_vec_gelu_f16(const int n, ggml_fp16_t * y, const ggml_fp
723
  }
724
  }
725
 
726
- //inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
727
- // uint16_t t;
728
- // for (int i = 0; i < n; ++i) {
729
- // ggml_fp16_t fp16 = ggml_fp32_to_fp16(x[i]);
730
- // memcpy(&t, &fp16, sizeof(uint16_t));
731
- // y[i] = table_gelu_f16[t];
732
- // }
733
- //}
734
-
 
735
  inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
736
  for (int i = 0; i < n; ++i) {
737
  y[i] = ggml_gelu_f32(x[i]);
738
  }
739
  }
 
740
 
741
  inline static void ggml_vec_sum_f32 (const int n, float * s, const float * x) { ggml_float sum = 0.0; for (int i = 0; i < n; ++i) sum += x[i]; *s += sum; }
742
  inline static void ggml_vec_norm_inv_f32(const int n, float * s, const float * x) { ggml_vec_norm_f32(n, s, x); *s = 1./(*s); }
 
14
  #include <stdint.h>
15
  #include <stdio.h>
16
 
 
17
  #if defined _MSC_VER
18
  #include "msvc_thread_atomic.h"
19
  #else
 
23
  #endif
24
 
25
  #define GGML_DEBUG 0
26
+ #define GGML_GELU_FP16
27
 
28
  #if UINTPTR_MAX == 0xFFFFFFFF
29
  #define GGML_MEM_ALIGN 4
 
723
  }
724
  }
725
 
726
+ #ifdef GGML_GELU_FP16
727
+ inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
728
+ uint16_t t;
729
+ for (int i = 0; i < n; ++i) {
730
+ ggml_fp16_t fp16 = ggml_fp32_to_fp16(x[i]);
731
+ memcpy(&t, &fp16, sizeof(uint16_t));
732
+ y[i] = ggml_fp16_to_fp32(table_gelu_f16[t]);
733
+ }
734
+ }
735
+ #else
736
  inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
737
  for (int i = 0; i < n; ++i) {
738
  y[i] = ggml_gelu_f32(x[i]);
739
  }
740
  }
741
+ #endif
742
 
743
  inline static void ggml_vec_sum_f32 (const int n, float * s, const float * x) { ggml_float sum = 0.0; for (int i = 0; i < n; ++i) sum += x[i]; *s += sum; }
744
  inline static void ggml_vec_norm_inv_f32(const int n, float * s, const float * x) { ggml_vec_norm_f32(n, s, x); *s = 1./(*s); }