Spaces:
Running
Running
ggml : android and old glibc NUMA incompatibility bugfixes (llama/5557)
Browse files* #ifdef out some code NUMA blocks for Android due to lack of support
* added in some __ANDROID__ if def gates around numa code and forced GLIBC prior to 2.29 to use a syscall for getcpu instead of the wrapper
* Changed gates on numa platform specific stuff to __gnu_linux__ to skip any platforms without glibc
* harmonizing #if defined blocks for numa code to __gnu_linux__ since that's the only model that's being followed anyways
---------
Co-authored-by: root <[email protected]>
ggml.c
CHANGED
|
@@ -23,6 +23,9 @@
|
|
| 23 |
#include <limits.h>
|
| 24 |
#include <stdarg.h>
|
| 25 |
#include <signal.h>
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
#ifdef GGML_USE_METAL
|
| 28 |
#include <unistd.h>
|
|
@@ -1971,7 +1974,7 @@ struct ggml_numa_nodes {
|
|
| 1971 |
uint32_t n_nodes;
|
| 1972 |
uint32_t total_cpus; // hardware threads on system
|
| 1973 |
uint32_t current_node; // node on which main process is execting
|
| 1974 |
-
#
|
| 1975 |
cpu_set_t cpuset; // cpuset from numactl
|
| 1976 |
#else
|
| 1977 |
uint32_t cpuset; // no NUMA support outside of Linux at this time. Use a portable datatype
|
|
@@ -2009,7 +2012,7 @@ inline static void ggml_critical_section_end(void) {
|
|
| 2009 |
atomic_fetch_sub(&g_state_barrier, 1);
|
| 2010 |
}
|
| 2011 |
|
| 2012 |
-
#
|
| 2013 |
static cpu_set_t ggml_get_numa_affinity(void) {
|
| 2014 |
cpu_set_t cpuset;
|
| 2015 |
pthread_t thread;
|
|
@@ -2031,7 +2034,7 @@ void ggml_numa_init(enum ggml_numa_strategy numa_flag) {
|
|
| 2031 |
return;
|
| 2032 |
}
|
| 2033 |
|
| 2034 |
-
#
|
| 2035 |
struct stat st;
|
| 2036 |
char path[256];
|
| 2037 |
int rv;
|
|
@@ -2063,7 +2066,13 @@ void ggml_numa_init(enum ggml_numa_strategy numa_flag) {
|
|
| 2063 |
|
| 2064 |
// figure out which node we're on
|
| 2065 |
uint current_cpu;
|
| 2066 |
-
int getcpu_ret =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2067 |
|
| 2068 |
if (g_state.numa.n_nodes < 1 || g_state.numa.total_cpus < 1 || getcpu_ret != 0) {
|
| 2069 |
g_state.numa.n_nodes = 0;
|
|
@@ -16734,7 +16743,7 @@ typedef pthread_t ggml_thread_t;
|
|
| 16734 |
#endif
|
| 16735 |
|
| 16736 |
// Android's libc implementation "bionic" does not support setting affinity
|
| 16737 |
-
#if defined(
|
| 16738 |
static void set_numa_thread_affinity(int thread_n) {
|
| 16739 |
if (!ggml_is_numa()) {
|
| 16740 |
return;
|
|
|
|
| 23 |
#include <limits.h>
|
| 24 |
#include <stdarg.h>
|
| 25 |
#include <signal.h>
|
| 26 |
+
#if defined(__gnu_linux__)
|
| 27 |
+
#include <syscall.h>
|
| 28 |
+
#endif
|
| 29 |
|
| 30 |
#ifdef GGML_USE_METAL
|
| 31 |
#include <unistd.h>
|
|
|
|
| 1974 |
uint32_t n_nodes;
|
| 1975 |
uint32_t total_cpus; // hardware threads on system
|
| 1976 |
uint32_t current_node; // node on which main process is execting
|
| 1977 |
+
#if defined(__gnu_linux__)
|
| 1978 |
cpu_set_t cpuset; // cpuset from numactl
|
| 1979 |
#else
|
| 1980 |
uint32_t cpuset; // no NUMA support outside of Linux at this time. Use a portable datatype
|
|
|
|
| 2012 |
atomic_fetch_sub(&g_state_barrier, 1);
|
| 2013 |
}
|
| 2014 |
|
| 2015 |
+
#if defined(__gnu_linux__)
|
| 2016 |
static cpu_set_t ggml_get_numa_affinity(void) {
|
| 2017 |
cpu_set_t cpuset;
|
| 2018 |
pthread_t thread;
|
|
|
|
| 2034 |
return;
|
| 2035 |
}
|
| 2036 |
|
| 2037 |
+
#if defined(__gnu_linux__)
|
| 2038 |
struct stat st;
|
| 2039 |
char path[256];
|
| 2040 |
int rv;
|
|
|
|
| 2066 |
|
| 2067 |
// figure out which node we're on
|
| 2068 |
uint current_cpu;
|
| 2069 |
+
int getcpu_ret = 0;
|
| 2070 |
+
#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 28)
|
| 2071 |
+
getcpu_ret = getcpu(¤t_cpu, &g_state.numa.current_node);
|
| 2072 |
+
#else
|
| 2073 |
+
// old glibc doesn't have a wrapper for this call. Fall back on direct syscall
|
| 2074 |
+
getcpu_ret = syscall(SYS_getcpu,¤t_cpu,&g_state.numa.current_node);
|
| 2075 |
+
#endif
|
| 2076 |
|
| 2077 |
if (g_state.numa.n_nodes < 1 || g_state.numa.total_cpus < 1 || getcpu_ret != 0) {
|
| 2078 |
g_state.numa.n_nodes = 0;
|
|
|
|
| 16743 |
#endif
|
| 16744 |
|
| 16745 |
// Android's libc implementation "bionic" does not support setting affinity
|
| 16746 |
+
#if defined(__gnu_linux__)
|
| 16747 |
static void set_numa_thread_affinity(int thread_n) {
|
| 16748 |
if (!ggml_is_numa()) {
|
| 16749 |
return;
|