Spaces:
Running
Running
rpc : code cleanup (llama/11107)
Browse filesRemove duplicated macros, use GGML_LOG_ERROR for errors
- ggml/src/ggml-rpc/ggml-rpc.cpp +20 -29
ggml/src/ggml-rpc/ggml-rpc.cpp
CHANGED
|
@@ -27,15 +27,6 @@
|
|
| 27 |
#endif
|
| 28 |
#include <cstring>
|
| 29 |
|
| 30 |
-
#define UNUSED GGML_UNUSED
|
| 31 |
-
|
| 32 |
-
#define GGML_DEBUG 0
|
| 33 |
-
#if (GGML_DEBUG >= 1)
|
| 34 |
-
#define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__)
|
| 35 |
-
#else
|
| 36 |
-
#define GGML_PRINT_DEBUG(...)
|
| 37 |
-
#endif
|
| 38 |
-
|
| 39 |
#ifdef _WIN32
|
| 40 |
typedef SOCKET sockfd_t;
|
| 41 |
using ssize_t = __int64;
|
|
@@ -411,7 +402,7 @@ static std::shared_ptr<socket_t> get_socket(const std::string & endpoint) {
|
|
| 411 |
initialized = true;
|
| 412 |
}
|
| 413 |
#else
|
| 414 |
-
|
| 415 |
#endif
|
| 416 |
auto sock = socket_connect(host.c_str(), port);
|
| 417 |
if (sock == nullptr) {
|
|
@@ -640,7 +631,7 @@ static void ggml_backend_rpc_free(ggml_backend_t backend) {
|
|
| 640 |
}
|
| 641 |
|
| 642 |
static void ggml_backend_rpc_synchronize(ggml_backend_t backend) {
|
| 643 |
-
|
| 644 |
// this is no-op because we don't have any async operations
|
| 645 |
}
|
| 646 |
|
|
@@ -850,7 +841,7 @@ void rpc_server::alloc_buffer(const rpc_msg_alloc_buffer_req & request, rpc_msg_
|
|
| 850 |
GGML_PRINT_DEBUG("[%s] size: %" PRIu64 " -> remote_ptr: %" PRIx64 ", remote_size: %" PRIu64 "\n", __func__, request.size, response.remote_ptr, response.remote_size);
|
| 851 |
buffers.insert(buffer);
|
| 852 |
} else {
|
| 853 |
-
|
| 854 |
}
|
| 855 |
}
|
| 856 |
|
|
@@ -872,7 +863,7 @@ bool rpc_server::buffer_get_base(const rpc_msg_buffer_get_base_req & request, rp
|
|
| 872 |
GGML_PRINT_DEBUG("[%s] remote_ptr: %" PRIx64 "\n", __func__, request.remote_ptr);
|
| 873 |
ggml_backend_buffer_t buffer = reinterpret_cast<ggml_backend_buffer_t>(request.remote_ptr);
|
| 874 |
if (buffers.find(buffer) == buffers.end()) {
|
| 875 |
-
|
| 876 |
return false;
|
| 877 |
}
|
| 878 |
void * base = ggml_backend_buffer_get_base(buffer);
|
|
@@ -884,7 +875,7 @@ bool rpc_server::free_buffer(const rpc_msg_free_buffer_req & request) {
|
|
| 884 |
GGML_PRINT_DEBUG("[%s] remote_ptr: %" PRIx64 "\n", __func__, request.remote_ptr);
|
| 885 |
ggml_backend_buffer_t buffer = reinterpret_cast<ggml_backend_buffer_t>(request.remote_ptr);
|
| 886 |
if (buffers.find(buffer) == buffers.end()) {
|
| 887 |
-
|
| 888 |
return false;
|
| 889 |
}
|
| 890 |
ggml_backend_buffer_free(buffer);
|
|
@@ -896,7 +887,7 @@ bool rpc_server::buffer_clear(const rpc_msg_buffer_clear_req & request) {
|
|
| 896 |
GGML_PRINT_DEBUG("[%s] remote_ptr: %" PRIx64 ", value: %u\n", __func__, request.remote_ptr, request.value);
|
| 897 |
ggml_backend_buffer_t buffer = reinterpret_cast<ggml_backend_buffer_t>(request.remote_ptr);
|
| 898 |
if (buffers.find(buffer) == buffers.end()) {
|
| 899 |
-
|
| 900 |
return false;
|
| 901 |
}
|
| 902 |
ggml_backend_buffer_clear(buffer, request.value);
|
|
@@ -952,7 +943,7 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
|
|
| 952 |
struct ggml_context * ctx = ggml_init(params);
|
| 953 |
ggml_tensor * tensor = deserialize_tensor(ctx, in_tensor);
|
| 954 |
if (tensor == nullptr) {
|
| 955 |
-
|
| 956 |
ggml_free(ctx);
|
| 957 |
return false;
|
| 958 |
}
|
|
@@ -1017,7 +1008,7 @@ bool rpc_server::get_tensor(const rpc_msg_get_tensor_req & request, std::vector<
|
|
| 1017 |
struct ggml_context * ctx = ggml_init(params);
|
| 1018 |
ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor);
|
| 1019 |
if (tensor == nullptr) {
|
| 1020 |
-
|
| 1021 |
ggml_free(ctx);
|
| 1022 |
return false;
|
| 1023 |
}
|
|
@@ -1051,7 +1042,7 @@ bool rpc_server::copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_co
|
|
| 1051 |
ggml_tensor * src = deserialize_tensor(ctx, &request.src);
|
| 1052 |
ggml_tensor * dst = deserialize_tensor(ctx, &request.dst);
|
| 1053 |
if (src == nullptr || dst == nullptr) {
|
| 1054 |
-
|
| 1055 |
ggml_free(ctx);
|
| 1056 |
return false;
|
| 1057 |
}
|
|
@@ -1385,14 +1376,14 @@ static void ggml_backend_rpc_device_get_memory(ggml_backend_dev_t dev, size_t *
|
|
| 1385 |
|
| 1386 |
ggml_backend_rpc_get_device_memory(ctx->endpoint.c_str(), free, total);
|
| 1387 |
|
| 1388 |
-
|
| 1389 |
}
|
| 1390 |
|
| 1391 |
static enum ggml_backend_dev_type ggml_backend_rpc_device_get_type(ggml_backend_dev_t dev) {
|
| 1392 |
// TODO: obtain value from the server
|
| 1393 |
return GGML_BACKEND_DEVICE_TYPE_GPU;
|
| 1394 |
|
| 1395 |
-
|
| 1396 |
}
|
| 1397 |
|
| 1398 |
static void ggml_backend_rpc_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
|
|
@@ -1413,7 +1404,7 @@ static ggml_backend_t ggml_backend_rpc_device_init(ggml_backend_dev_t dev, const
|
|
| 1413 |
|
| 1414 |
return ggml_backend_rpc_init(ctx->endpoint.c_str());
|
| 1415 |
|
| 1416 |
-
|
| 1417 |
}
|
| 1418 |
|
| 1419 |
static ggml_backend_buffer_type_t ggml_backend_rpc_device_get_buffer_type(ggml_backend_dev_t dev) {
|
|
@@ -1421,12 +1412,12 @@ static ggml_backend_buffer_type_t ggml_backend_rpc_device_get_buffer_type(ggml_b
|
|
| 1421 |
|
| 1422 |
return ggml_backend_rpc_buffer_type(ctx->endpoint.c_str());
|
| 1423 |
|
| 1424 |
-
|
| 1425 |
}
|
| 1426 |
|
| 1427 |
static bool ggml_backend_rpc_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
|
| 1428 |
-
|
| 1429 |
-
|
| 1430 |
//TODO: call the remote backend and cache the results
|
| 1431 |
return true;
|
| 1432 |
}
|
|
@@ -1463,20 +1454,20 @@ static const struct ggml_backend_device_i ggml_backend_rpc_device_i = {
|
|
| 1463 |
static const char * ggml_backend_rpc_reg_get_name(ggml_backend_reg_t reg) {
|
| 1464 |
return "RPC";
|
| 1465 |
|
| 1466 |
-
|
| 1467 |
}
|
| 1468 |
|
| 1469 |
static size_t ggml_backend_rpc_reg_get_device_count(ggml_backend_reg_t reg) {
|
| 1470 |
return 0;
|
| 1471 |
|
| 1472 |
-
|
| 1473 |
}
|
| 1474 |
|
| 1475 |
static ggml_backend_dev_t ggml_backend_rpc_reg_get_device(ggml_backend_reg_t reg, size_t index) {
|
| 1476 |
GGML_ABORT("The RPC backend does not have enumerated devices - use ggml_backend_add_device instead");
|
| 1477 |
|
| 1478 |
-
|
| 1479 |
-
|
| 1480 |
}
|
| 1481 |
|
| 1482 |
static void * ggml_backend_rpc_get_proc_address(ggml_backend_reg_t reg, const char * name) {
|
|
@@ -1485,7 +1476,7 @@ static void * ggml_backend_rpc_get_proc_address(ggml_backend_reg_t reg, const ch
|
|
| 1485 |
}
|
| 1486 |
return NULL;
|
| 1487 |
|
| 1488 |
-
|
| 1489 |
}
|
| 1490 |
|
| 1491 |
static const struct ggml_backend_reg_i ggml_backend_rpc_reg_i = {
|
|
|
|
| 27 |
#endif
|
| 28 |
#include <cstring>
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
#ifdef _WIN32
|
| 31 |
typedef SOCKET sockfd_t;
|
| 32 |
using ssize_t = __int64;
|
|
|
|
| 402 |
initialized = true;
|
| 403 |
}
|
| 404 |
#else
|
| 405 |
+
GGML_UNUSED(initialized);
|
| 406 |
#endif
|
| 407 |
auto sock = socket_connect(host.c_str(), port);
|
| 408 |
if (sock == nullptr) {
|
|
|
|
| 631 |
}
|
| 632 |
|
| 633 |
static void ggml_backend_rpc_synchronize(ggml_backend_t backend) {
|
| 634 |
+
GGML_UNUSED(backend);
|
| 635 |
// this is no-op because we don't have any async operations
|
| 636 |
}
|
| 637 |
|
|
|
|
| 841 |
GGML_PRINT_DEBUG("[%s] size: %" PRIu64 " -> remote_ptr: %" PRIx64 ", remote_size: %" PRIu64 "\n", __func__, request.size, response.remote_ptr, response.remote_size);
|
| 842 |
buffers.insert(buffer);
|
| 843 |
} else {
|
| 844 |
+
GGML_LOG_ERROR("[%s] size: %" PRIu64 " -> failed\n", __func__, request.size);
|
| 845 |
}
|
| 846 |
}
|
| 847 |
|
|
|
|
| 863 |
GGML_PRINT_DEBUG("[%s] remote_ptr: %" PRIx64 "\n", __func__, request.remote_ptr);
|
| 864 |
ggml_backend_buffer_t buffer = reinterpret_cast<ggml_backend_buffer_t>(request.remote_ptr);
|
| 865 |
if (buffers.find(buffer) == buffers.end()) {
|
| 866 |
+
GGML_LOG_ERROR("[%s] buffer not found\n", __func__);
|
| 867 |
return false;
|
| 868 |
}
|
| 869 |
void * base = ggml_backend_buffer_get_base(buffer);
|
|
|
|
| 875 |
GGML_PRINT_DEBUG("[%s] remote_ptr: %" PRIx64 "\n", __func__, request.remote_ptr);
|
| 876 |
ggml_backend_buffer_t buffer = reinterpret_cast<ggml_backend_buffer_t>(request.remote_ptr);
|
| 877 |
if (buffers.find(buffer) == buffers.end()) {
|
| 878 |
+
GGML_LOG_ERROR("[%s] buffer not found\n", __func__);
|
| 879 |
return false;
|
| 880 |
}
|
| 881 |
ggml_backend_buffer_free(buffer);
|
|
|
|
| 887 |
GGML_PRINT_DEBUG("[%s] remote_ptr: %" PRIx64 ", value: %u\n", __func__, request.remote_ptr, request.value);
|
| 888 |
ggml_backend_buffer_t buffer = reinterpret_cast<ggml_backend_buffer_t>(request.remote_ptr);
|
| 889 |
if (buffers.find(buffer) == buffers.end()) {
|
| 890 |
+
GGML_LOG_ERROR("[%s] buffer not found\n", __func__);
|
| 891 |
return false;
|
| 892 |
}
|
| 893 |
ggml_backend_buffer_clear(buffer, request.value);
|
|
|
|
| 943 |
struct ggml_context * ctx = ggml_init(params);
|
| 944 |
ggml_tensor * tensor = deserialize_tensor(ctx, in_tensor);
|
| 945 |
if (tensor == nullptr) {
|
| 946 |
+
GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
|
| 947 |
ggml_free(ctx);
|
| 948 |
return false;
|
| 949 |
}
|
|
|
|
| 1008 |
struct ggml_context * ctx = ggml_init(params);
|
| 1009 |
ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor);
|
| 1010 |
if (tensor == nullptr) {
|
| 1011 |
+
GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
|
| 1012 |
ggml_free(ctx);
|
| 1013 |
return false;
|
| 1014 |
}
|
|
|
|
| 1042 |
ggml_tensor * src = deserialize_tensor(ctx, &request.src);
|
| 1043 |
ggml_tensor * dst = deserialize_tensor(ctx, &request.dst);
|
| 1044 |
if (src == nullptr || dst == nullptr) {
|
| 1045 |
+
GGML_LOG_ERROR("[%s] error deserializing tensors\n", __func__);
|
| 1046 |
ggml_free(ctx);
|
| 1047 |
return false;
|
| 1048 |
}
|
|
|
|
| 1376 |
|
| 1377 |
ggml_backend_rpc_get_device_memory(ctx->endpoint.c_str(), free, total);
|
| 1378 |
|
| 1379 |
+
GGML_UNUSED(dev);
|
| 1380 |
}
|
| 1381 |
|
| 1382 |
static enum ggml_backend_dev_type ggml_backend_rpc_device_get_type(ggml_backend_dev_t dev) {
|
| 1383 |
// TODO: obtain value from the server
|
| 1384 |
return GGML_BACKEND_DEVICE_TYPE_GPU;
|
| 1385 |
|
| 1386 |
+
GGML_UNUSED(dev);
|
| 1387 |
}
|
| 1388 |
|
| 1389 |
static void ggml_backend_rpc_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
|
|
|
|
| 1404 |
|
| 1405 |
return ggml_backend_rpc_init(ctx->endpoint.c_str());
|
| 1406 |
|
| 1407 |
+
GGML_UNUSED(params);
|
| 1408 |
}
|
| 1409 |
|
| 1410 |
static ggml_backend_buffer_type_t ggml_backend_rpc_device_get_buffer_type(ggml_backend_dev_t dev) {
|
|
|
|
| 1412 |
|
| 1413 |
return ggml_backend_rpc_buffer_type(ctx->endpoint.c_str());
|
| 1414 |
|
| 1415 |
+
GGML_UNUSED(dev);
|
| 1416 |
}
|
| 1417 |
|
| 1418 |
static bool ggml_backend_rpc_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
|
| 1419 |
+
GGML_UNUSED(dev);
|
| 1420 |
+
GGML_UNUSED(op);
|
| 1421 |
//TODO: call the remote backend and cache the results
|
| 1422 |
return true;
|
| 1423 |
}
|
|
|
|
| 1454 |
static const char * ggml_backend_rpc_reg_get_name(ggml_backend_reg_t reg) {
|
| 1455 |
return "RPC";
|
| 1456 |
|
| 1457 |
+
GGML_UNUSED(reg);
|
| 1458 |
}
|
| 1459 |
|
| 1460 |
static size_t ggml_backend_rpc_reg_get_device_count(ggml_backend_reg_t reg) {
|
| 1461 |
return 0;
|
| 1462 |
|
| 1463 |
+
GGML_UNUSED(reg);
|
| 1464 |
}
|
| 1465 |
|
| 1466 |
static ggml_backend_dev_t ggml_backend_rpc_reg_get_device(ggml_backend_reg_t reg, size_t index) {
|
| 1467 |
GGML_ABORT("The RPC backend does not have enumerated devices - use ggml_backend_add_device instead");
|
| 1468 |
|
| 1469 |
+
GGML_UNUSED(reg);
|
| 1470 |
+
GGML_UNUSED(index);
|
| 1471 |
}
|
| 1472 |
|
| 1473 |
static void * ggml_backend_rpc_get_proc_address(ggml_backend_reg_t reg, const char * name) {
|
|
|
|
| 1476 |
}
|
| 1477 |
return NULL;
|
| 1478 |
|
| 1479 |
+
GGML_UNUSED(reg);
|
| 1480 |
}
|
| 1481 |
|
| 1482 |
static const struct ggml_backend_reg_i ggml_backend_rpc_reg_i = {
|