Spaces:
Running
Running
Diego Devesa
commited on
Commit
·
0286805
1
Parent(s):
1bfe279
rpc : use backend registry, support dl backends (llama/13304)
Browse files- ggml/src/ggml-cpu/ggml-cpu.cpp +29 -16
- ggml/src/ggml-rpc/ggml-rpc.cpp +11 -0
ggml/src/ggml-cpu/ggml-cpu.cpp
CHANGED
|
@@ -11,24 +11,26 @@
|
|
| 11 |
#include <vector>
|
| 12 |
|
| 13 |
#ifdef GGML_USE_CPU_HBM
|
| 14 |
-
#include "ggml-cpu-hbm.h"
|
| 15 |
#endif
|
| 16 |
|
| 17 |
#ifdef GGML_USE_CPU_KLEIDIAI
|
| 18 |
-
#include "kleidiai/kleidiai.h"
|
| 19 |
-
#endif
|
| 20 |
-
|
| 21 |
-
#if defined(__APPLE__)
|
| 22 |
-
#include <sys/types.h>
|
| 23 |
-
#include <sys/sysctl.h>
|
| 24 |
#endif
|
| 25 |
|
| 26 |
#if defined(_WIN32)
|
| 27 |
-
#define WIN32_LEAN_AND_MEAN
|
| 28 |
-
#ifndef NOMINMAX
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
#endif
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
| 32 |
#endif
|
| 33 |
|
| 34 |
// ggml-backend interface
|
|
@@ -70,8 +72,10 @@ static ggml_backend_buffer_type_t * ggml_backend_cpu_device_get_extra_buffers_ty
|
|
| 70 |
}
|
| 71 |
|
| 72 |
static bool ggml_backend_cpu_is_extra_buffer_type(ggml_backend_buffer_type_t buft) {
|
| 73 |
-
for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) {
|
| 74 |
-
if (extra && extra == buft)
|
|
|
|
|
|
|
| 75 |
}
|
| 76 |
return false;
|
| 77 |
}
|
|
@@ -330,9 +334,18 @@ static const char * ggml_backend_cpu_device_get_description(ggml_backend_dev_t d
|
|
| 330 |
}
|
| 331 |
|
| 332 |
static void ggml_backend_cpu_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
|
| 337 |
GGML_UNUSED(dev);
|
| 338 |
}
|
|
|
|
| 11 |
#include <vector>
|
| 12 |
|
| 13 |
#ifdef GGML_USE_CPU_HBM
|
| 14 |
+
# include "ggml-cpu-hbm.h"
|
| 15 |
#endif
|
| 16 |
|
| 17 |
#ifdef GGML_USE_CPU_KLEIDIAI
|
| 18 |
+
# include "kleidiai/kleidiai.h"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
#endif
|
| 20 |
|
| 21 |
#if defined(_WIN32)
|
| 22 |
+
# define WIN32_LEAN_AND_MEAN
|
| 23 |
+
# ifndef NOMINMAX
|
| 24 |
+
# define NOMINMAX
|
| 25 |
+
# endif
|
| 26 |
+
# include <windows.h>
|
| 27 |
+
#else
|
| 28 |
+
# include <unistd.h>
|
| 29 |
#endif
|
| 30 |
+
|
| 31 |
+
#if defined(__APPLE__)
|
| 32 |
+
# include <sys/sysctl.h>
|
| 33 |
+
# include <sys/types.h>
|
| 34 |
#endif
|
| 35 |
|
| 36 |
// ggml-backend interface
|
|
|
|
| 72 |
}
|
| 73 |
|
| 74 |
static bool ggml_backend_cpu_is_extra_buffer_type(ggml_backend_buffer_type_t buft) {
|
| 75 |
+
for (auto * extra : ggml_backend_cpu_get_extra_buffers_type()) {
|
| 76 |
+
if (extra && extra == buft) {
|
| 77 |
+
return true;
|
| 78 |
+
}
|
| 79 |
}
|
| 80 |
return false;
|
| 81 |
}
|
|
|
|
| 334 |
}
|
| 335 |
|
| 336 |
static void ggml_backend_cpu_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
| 337 |
+
#ifdef _WIN32
|
| 338 |
+
MEMORYSTATUSEX status;
|
| 339 |
+
status.dwLength = sizeof(status);
|
| 340 |
+
GlobalMemoryStatusEx(&status);
|
| 341 |
+
*total = status.ullTotalPhys;
|
| 342 |
+
*free = status.ullAvailPhys;
|
| 343 |
+
#else
|
| 344 |
+
long pages = sysconf(_SC_PHYS_PAGES);
|
| 345 |
+
long page_size = sysconf(_SC_PAGE_SIZE);
|
| 346 |
+
*total = pages * page_size;
|
| 347 |
+
*free = *total;
|
| 348 |
+
#endif
|
| 349 |
|
| 350 |
GGML_UNUSED(dev);
|
| 351 |
}
|
ggml/src/ggml-rpc/ggml-rpc.cpp
CHANGED
|
@@ -1594,6 +1594,14 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
|
|
| 1594 |
void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint,
|
| 1595 |
const char * cache_dir,
|
| 1596 |
size_t free_mem, size_t total_mem) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1597 |
std::string host;
|
| 1598 |
int port;
|
| 1599 |
if (!parse_endpoint(endpoint, host, port)) {
|
|
@@ -1753,6 +1761,9 @@ static void * ggml_backend_rpc_get_proc_address(ggml_backend_reg_t reg, const ch
|
|
| 1753 |
if (std::strcmp(name, "ggml_backend_rpc_add_device") == 0) {
|
| 1754 |
return (void *)ggml_backend_rpc_add_device;
|
| 1755 |
}
|
|
|
|
|
|
|
|
|
|
| 1756 |
return NULL;
|
| 1757 |
|
| 1758 |
GGML_UNUSED(reg);
|
|
|
|
| 1594 |
void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint,
|
| 1595 |
const char * cache_dir,
|
| 1596 |
size_t free_mem, size_t total_mem) {
|
| 1597 |
+
printf("Starting RPC server v%d.%d.%d\n",
|
| 1598 |
+
RPC_PROTO_MAJOR_VERSION,
|
| 1599 |
+
RPC_PROTO_MINOR_VERSION,
|
| 1600 |
+
RPC_PROTO_PATCH_VERSION);
|
| 1601 |
+
printf(" endpoint : %s\n", endpoint);
|
| 1602 |
+
printf(" local cache : %s\n", cache_dir ? cache_dir : "n/a");
|
| 1603 |
+
printf(" backend memory : %zu MB\n", free_mem / (1024 * 1024));
|
| 1604 |
+
|
| 1605 |
std::string host;
|
| 1606 |
int port;
|
| 1607 |
if (!parse_endpoint(endpoint, host, port)) {
|
|
|
|
| 1761 |
if (std::strcmp(name, "ggml_backend_rpc_add_device") == 0) {
|
| 1762 |
return (void *)ggml_backend_rpc_add_device;
|
| 1763 |
}
|
| 1764 |
+
if (std::strcmp(name, "ggml_backend_rpc_start_server") == 0) {
|
| 1765 |
+
return (void *)ggml_backend_rpc_start_server;
|
| 1766 |
+
}
|
| 1767 |
return NULL;
|
| 1768 |
|
| 1769 |
GGML_UNUSED(reg);
|