Spaces:
Running
Running
slaren
commited on
ggml : add max buffer sizes to opencl and metal backends (llama/5181)
Browse files- ggml-metal.m +11 -1
- ggml-opencl.cpp +10 -1
ggml-metal.m
CHANGED
|
@@ -2398,6 +2398,16 @@ GGML_CALL static size_t ggml_backend_metal_buffer_type_get_alignment(ggml_backen
|
|
| 2398 |
UNUSED(buft);
|
| 2399 |
}
|
| 2400 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2401 |
GGML_CALL static bool ggml_backend_metal_buffer_type_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend) {
|
| 2402 |
return ggml_backend_is_metal(backend) || ggml_backend_is_cpu(backend);
|
| 2403 |
|
|
@@ -2416,7 +2426,7 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void) {
|
|
| 2416 |
/* .get_name = */ ggml_backend_metal_buffer_type_get_name,
|
| 2417 |
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_alloc_buffer,
|
| 2418 |
/* .get_alignment = */ ggml_backend_metal_buffer_type_get_alignment,
|
| 2419 |
-
/* .get_max_size = */
|
| 2420 |
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
|
| 2421 |
/* .supports_backend = */ ggml_backend_metal_buffer_type_supports_backend,
|
| 2422 |
/* .is_host = */ ggml_backend_metal_buffer_type_is_host,
|
|
|
|
| 2398 |
UNUSED(buft);
|
| 2399 |
}
|
| 2400 |
|
| 2401 |
+
GGML_CALL static size_t ggml_backend_metal_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
|
| 2402 |
+
id<MTLDevice> device = ggml_backend_metal_get_device();
|
| 2403 |
+
size_t max_size = device.maxBufferLength;
|
| 2404 |
+
ggml_backend_metal_free_device();
|
| 2405 |
+
|
| 2406 |
+
return max_size;
|
| 2407 |
+
|
| 2408 |
+
UNUSED(buft);
|
| 2409 |
+
}
|
| 2410 |
+
|
| 2411 |
GGML_CALL static bool ggml_backend_metal_buffer_type_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend) {
|
| 2412 |
return ggml_backend_is_metal(backend) || ggml_backend_is_cpu(backend);
|
| 2413 |
|
|
|
|
| 2426 |
/* .get_name = */ ggml_backend_metal_buffer_type_get_name,
|
| 2427 |
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_alloc_buffer,
|
| 2428 |
/* .get_alignment = */ ggml_backend_metal_buffer_type_get_alignment,
|
| 2429 |
+
/* .get_max_size = */ ggml_backend_metal_buffer_type_get_max_size,
|
| 2430 |
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
|
| 2431 |
/* .supports_backend = */ ggml_backend_metal_buffer_type_supports_backend,
|
| 2432 |
/* .is_host = */ ggml_backend_metal_buffer_type_is_host,
|
ggml-opencl.cpp
CHANGED
|
@@ -2125,6 +2125,15 @@ static size_t ggml_backend_opencl_buffer_type_get_alignment(ggml_backend_buffer_
|
|
| 2125 |
GGML_UNUSED(buffer_type);
|
| 2126 |
}
|
| 2127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2128 |
static bool ggml_backend_opencl_buffer_type_supports_backend(ggml_backend_buffer_type_t buffer_type, ggml_backend_t backend) {
|
| 2129 |
//return ggml_backend_is_opencl(backend); // opencl must be used through the cpu backend
|
| 2130 |
return ggml_backend_is_cpu(backend);
|
|
@@ -2136,7 +2145,7 @@ static ggml_backend_buffer_type_i ggml_backend_opencl_buffer_type_interface = {
|
|
| 2136 |
/* .get_name = */ ggml_backend_opencl_buffer_type_name,
|
| 2137 |
/* .alloc_buffer = */ ggml_backend_opencl_buffer_type_alloc_buffer,
|
| 2138 |
/* .get_alignment = */ ggml_backend_opencl_buffer_type_get_alignment,
|
| 2139 |
-
/* .get_max_size = */
|
| 2140 |
/* .get_alloc_size = */ NULL,
|
| 2141 |
/* .supports_backend = */ ggml_backend_opencl_buffer_type_supports_backend,
|
| 2142 |
/* .is_host = */ NULL,
|
|
|
|
| 2125 |
GGML_UNUSED(buffer_type);
|
| 2126 |
}
|
| 2127 |
|
| 2128 |
+
static size_t ggml_backend_opencl_buffer_type_get_max_size(ggml_backend_buffer_type_t buffer_type) {
|
| 2129 |
+
static size_t max_size = -1;
|
| 2130 |
+
if (max_size == (size_t)-1) {
|
| 2131 |
+
ggml_cl_init();
|
| 2132 |
+
clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &max_size, NULL);
|
| 2133 |
+
}
|
| 2134 |
+
return max_size;
|
| 2135 |
+
}
|
| 2136 |
+
|
| 2137 |
static bool ggml_backend_opencl_buffer_type_supports_backend(ggml_backend_buffer_type_t buffer_type, ggml_backend_t backend) {
|
| 2138 |
//return ggml_backend_is_opencl(backend); // opencl must be used through the cpu backend
|
| 2139 |
return ggml_backend_is_cpu(backend);
|
|
|
|
| 2145 |
/* .get_name = */ ggml_backend_opencl_buffer_type_name,
|
| 2146 |
/* .alloc_buffer = */ ggml_backend_opencl_buffer_type_alloc_buffer,
|
| 2147 |
/* .get_alignment = */ ggml_backend_opencl_buffer_type_get_alignment,
|
| 2148 |
+
/* .get_max_size = */ ggml_backend_opencl_buffer_type_get_max_size,
|
| 2149 |
/* .get_alloc_size = */ NULL,
|
| 2150 |
/* .supports_backend = */ ggml_backend_opencl_buffer_type_supports_backend,
|
| 2151 |
/* .is_host = */ NULL,
|