bmwl root commited on
Commit
0206c2d
·
unverified ·
1 Parent(s): de4041f

ggml : android and old glibc NUMA incompatibility bugfixes (llama/5557)

Browse files

* #ifdef out some code NUMA blocks for Android due to lack of support

* added in some __ANDROID__ if def gates around numa code and forced GLIBC prior to 2.29 to use a syscall for getcpu instead of the wrapper

* Changed gates on numa platform specific stuff to __gnu_linux__ to skip any platforms without glibc

* harmonizing #if defined blocks for numa code to __gnu_linux__ since that's the only model that's being followed anyways

---------

Co-authored-by: root <[email protected]>

Files changed (1) hide show
  1. ggml.c +14 -5
ggml.c CHANGED
@@ -23,6 +23,9 @@
23
  #include <limits.h>
24
  #include <stdarg.h>
25
  #include <signal.h>
 
 
 
26
 
27
  #ifdef GGML_USE_METAL
28
  #include <unistd.h>
@@ -1971,7 +1974,7 @@ struct ggml_numa_nodes {
1971
  uint32_t n_nodes;
1972
  uint32_t total_cpus; // hardware threads on system
1973
  uint32_t current_node; // node on which main process is execting
1974
- #ifdef __linux__
1975
  cpu_set_t cpuset; // cpuset from numactl
1976
  #else
1977
  uint32_t cpuset; // no NUMA support outside of Linux at this time. Use a portable datatype
@@ -2009,7 +2012,7 @@ inline static void ggml_critical_section_end(void) {
2009
  atomic_fetch_sub(&g_state_barrier, 1);
2010
  }
2011
 
2012
- #ifdef __linux__
2013
  static cpu_set_t ggml_get_numa_affinity(void) {
2014
  cpu_set_t cpuset;
2015
  pthread_t thread;
@@ -2031,7 +2034,7 @@ void ggml_numa_init(enum ggml_numa_strategy numa_flag) {
2031
  return;
2032
  }
2033
 
2034
- #ifdef __linux__
2035
  struct stat st;
2036
  char path[256];
2037
  int rv;
@@ -2063,7 +2066,13 @@ void ggml_numa_init(enum ggml_numa_strategy numa_flag) {
2063
 
2064
  // figure out which node we're on
2065
  uint current_cpu;
2066
- int getcpu_ret = getcpu(&current_cpu, &g_state.numa.current_node);
 
 
 
 
 
 
2067
 
2068
  if (g_state.numa.n_nodes < 1 || g_state.numa.total_cpus < 1 || getcpu_ret != 0) {
2069
  g_state.numa.n_nodes = 0;
@@ -16734,7 +16743,7 @@ typedef pthread_t ggml_thread_t;
16734
  #endif
16735
 
16736
  // Android's libc implementation "bionic" does not support setting affinity
16737
- #if defined(__linux__) && !defined(__BIONIC__)
16738
  static void set_numa_thread_affinity(int thread_n) {
16739
  if (!ggml_is_numa()) {
16740
  return;
 
23
  #include <limits.h>
24
  #include <stdarg.h>
25
  #include <signal.h>
26
+ #if defined(__gnu_linux__)
27
+ #include <syscall.h>
28
+ #endif
29
 
30
  #ifdef GGML_USE_METAL
31
  #include <unistd.h>
 
1974
  uint32_t n_nodes;
1975
  uint32_t total_cpus; // hardware threads on system
1976
  uint32_t current_node; // node on which main process is execting
1977
+ #if defined(__gnu_linux__)
1978
  cpu_set_t cpuset; // cpuset from numactl
1979
  #else
1980
  uint32_t cpuset; // no NUMA support outside of Linux at this time. Use a portable datatype
 
2012
  atomic_fetch_sub(&g_state_barrier, 1);
2013
  }
2014
 
2015
+ #if defined(__gnu_linux__)
2016
  static cpu_set_t ggml_get_numa_affinity(void) {
2017
  cpu_set_t cpuset;
2018
  pthread_t thread;
 
2034
  return;
2035
  }
2036
 
2037
+ #if defined(__gnu_linux__)
2038
  struct stat st;
2039
  char path[256];
2040
  int rv;
 
2066
 
2067
  // figure out which node we're on
2068
  uint current_cpu;
2069
+ int getcpu_ret = 0;
2070
+ #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 28)
2071
+ getcpu_ret = getcpu(&current_cpu, &g_state.numa.current_node);
2072
+ #else
2073
+ // old glibc doesn't have a wrapper for this call. Fall back on direct syscall
2074
+ getcpu_ret = syscall(SYS_getcpu,&current_cpu,&g_state.numa.current_node);
2075
+ #endif
2076
 
2077
  if (g_state.numa.n_nodes < 1 || g_state.numa.total_cpus < 1 || getcpu_ret != 0) {
2078
  g_state.numa.n_nodes = 0;
 
16743
  #endif
16744
 
16745
  // Android's libc implementation "bionic" does not support setting affinity
16746
+ #if defined(__gnu_linux__)
16747
  static void set_numa_thread_affinity(int thread_n) {
16748
  if (!ggml_is_numa()) {
16749
  return;