slaren commited on
Commit
3d354d0
·
unverified ·
1 Parent(s): ea7167a

ggml : add max buffer sizes to opencl and metal backends (llama/5181)

Browse files
Files changed (2) hide show
  1. ggml-metal.m +11 -1
  2. ggml-opencl.cpp +10 -1
ggml-metal.m CHANGED
@@ -2398,6 +2398,16 @@ GGML_CALL static size_t ggml_backend_metal_buffer_type_get_alignment(ggml_backen
2398
  UNUSED(buft);
2399
  }
2400
 
 
 
 
 
 
 
 
 
 
 
2401
  GGML_CALL static bool ggml_backend_metal_buffer_type_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend) {
2402
  return ggml_backend_is_metal(backend) || ggml_backend_is_cpu(backend);
2403
 
@@ -2416,7 +2426,7 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void) {
2416
  /* .get_name = */ ggml_backend_metal_buffer_type_get_name,
2417
  /* .alloc_buffer = */ ggml_backend_metal_buffer_type_alloc_buffer,
2418
  /* .get_alignment = */ ggml_backend_metal_buffer_type_get_alignment,
2419
- /* .get_max_size = */ NULL, // TODO: return device.maxBufferLength
2420
  /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
2421
  /* .supports_backend = */ ggml_backend_metal_buffer_type_supports_backend,
2422
  /* .is_host = */ ggml_backend_metal_buffer_type_is_host,
 
2398
  UNUSED(buft);
2399
  }
2400
 
2401
+ GGML_CALL static size_t ggml_backend_metal_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
2402
+ id<MTLDevice> device = ggml_backend_metal_get_device();
2403
+ size_t max_size = device.maxBufferLength;
2404
+ ggml_backend_metal_free_device();
2405
+
2406
+ return max_size;
2407
+
2408
+ UNUSED(buft);
2409
+ }
2410
+
2411
  GGML_CALL static bool ggml_backend_metal_buffer_type_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend) {
2412
  return ggml_backend_is_metal(backend) || ggml_backend_is_cpu(backend);
2413
 
 
2426
  /* .get_name = */ ggml_backend_metal_buffer_type_get_name,
2427
  /* .alloc_buffer = */ ggml_backend_metal_buffer_type_alloc_buffer,
2428
  /* .get_alignment = */ ggml_backend_metal_buffer_type_get_alignment,
2429
+ /* .get_max_size = */ ggml_backend_metal_buffer_type_get_max_size,
2430
  /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
2431
  /* .supports_backend = */ ggml_backend_metal_buffer_type_supports_backend,
2432
  /* .is_host = */ ggml_backend_metal_buffer_type_is_host,
ggml-opencl.cpp CHANGED
@@ -2125,6 +2125,15 @@ static size_t ggml_backend_opencl_buffer_type_get_alignment(ggml_backend_buffer_
2125
  GGML_UNUSED(buffer_type);
2126
  }
2127
 
 
 
 
 
 
 
 
 
 
2128
  static bool ggml_backend_opencl_buffer_type_supports_backend(ggml_backend_buffer_type_t buffer_type, ggml_backend_t backend) {
2129
  //return ggml_backend_is_opencl(backend); // opencl must be used through the cpu backend
2130
  return ggml_backend_is_cpu(backend);
@@ -2136,7 +2145,7 @@ static ggml_backend_buffer_type_i ggml_backend_opencl_buffer_type_interface = {
2136
  /* .get_name = */ ggml_backend_opencl_buffer_type_name,
2137
  /* .alloc_buffer = */ ggml_backend_opencl_buffer_type_alloc_buffer,
2138
  /* .get_alignment = */ ggml_backend_opencl_buffer_type_get_alignment,
2139
- /* .get_max_size = */ NULL, // TODO: return from device info
2140
  /* .get_alloc_size = */ NULL,
2141
  /* .supports_backend = */ ggml_backend_opencl_buffer_type_supports_backend,
2142
  /* .is_host = */ NULL,
 
2125
  GGML_UNUSED(buffer_type);
2126
  }
2127
 
2128
+ static size_t ggml_backend_opencl_buffer_type_get_max_size(ggml_backend_buffer_type_t buffer_type) {
2129
+ static size_t max_size = -1;
2130
+ if (max_size == (size_t)-1) {
2131
+ ggml_cl_init();
2132
+ clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &max_size, NULL);
2133
+ }
2134
+ return max_size;
2135
+ }
2136
+
2137
  static bool ggml_backend_opencl_buffer_type_supports_backend(ggml_backend_buffer_type_t buffer_type, ggml_backend_t backend) {
2138
  //return ggml_backend_is_opencl(backend); // opencl must be used through the cpu backend
2139
  return ggml_backend_is_cpu(backend);
 
2145
  /* .get_name = */ ggml_backend_opencl_buffer_type_name,
2146
  /* .alloc_buffer = */ ggml_backend_opencl_buffer_type_alloc_buffer,
2147
  /* .get_alignment = */ ggml_backend_opencl_buffer_type_get_alignment,
2148
+ /* .get_max_size = */ ggml_backend_opencl_buffer_type_get_max_size,
2149
  /* .get_alloc_size = */ NULL,
2150
  /* .supports_backend = */ ggml_backend_opencl_buffer_type_supports_backend,
2151
  /* .is_host = */ NULL,