Diego Devesa commited on
Commit
59dd404
·
1 Parent(s): ba20d5c

ggml : fix gelu tables initialization (llama/10172)

Browse files
Files changed (1) hide show
  1. ggml/src/ggml-cpu.c +10 -4
ggml/src/ggml-cpu.c CHANGED
@@ -13678,6 +13678,13 @@ int ggml_cpu_get_sve_cnt(void) {
13678
  }
13679
 
13680
  void ggml_cpu_init(void) {
 
 
 
 
 
 
 
13681
  ggml_critical_section_start();
13682
 
13683
  static bool is_first_call = true;
@@ -13685,8 +13692,7 @@ void ggml_cpu_init(void) {
13685
  if (is_first_call) {
13686
  // initialize GELU, Quick GELU, SILU and EXP F32 tables
13687
  {
13688
- // FIXME: this may be called before ggml_init
13689
- //const uint64_t t_start = ggml_time_us(); UNUSED(t_start);
13690
 
13691
  for (int i = 0; i < (1 << 16); ++i) {
13692
  union {
@@ -13698,9 +13704,9 @@ void ggml_cpu_init(void) {
13698
  ggml_table_gelu_quick_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_quick_f32(f));
13699
  }
13700
 
13701
- //const uint64_t t_end = ggml_time_us(); UNUSED(t_end);
13702
 
13703
- //GGML_PRINT_DEBUG("%s: GELU, Quick GELU, SILU and EXP tables initialized in %f ms\n", __func__, (t_end - t_start)/1000.0);
13704
  }
13705
 
13706
  #if defined(__ARM_ARCH)
 
13678
  }
13679
 
13680
  void ggml_cpu_init(void) {
13681
+ // needed to initialize f16 tables
13682
+ {
13683
+ struct ggml_init_params params = { 0, NULL, false };
13684
+ struct ggml_context * ctx = ggml_init(params);
13685
+ ggml_free(ctx);
13686
+ }
13687
+
13688
  ggml_critical_section_start();
13689
 
13690
  static bool is_first_call = true;
 
13692
  if (is_first_call) {
13693
  // initialize GELU, Quick GELU, SILU and EXP F32 tables
13694
  {
13695
+ const uint64_t t_start = ggml_time_us(); UNUSED(t_start);
 
13696
 
13697
  for (int i = 0; i < (1 << 16); ++i) {
13698
  union {
 
13704
  ggml_table_gelu_quick_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_quick_f32(f));
13705
  }
13706
 
13707
+ const uint64_t t_end = ggml_time_us(); UNUSED(t_end);
13708
 
13709
+ GGML_PRINT_DEBUG("%s: GELU, Quick GELU, SILU and EXP tables initialized in %f ms\n", __func__, (t_end - t_start)/1000.0);
13710
  }
13711
 
13712
  #if defined(__ARM_ARCH)