Spaces:
Sleeping
Sleeping
metal : use residency sets (llama/11427)
Browse files* metal : use residency sets
ggml-ci
* metal : restore commandBufferWithUnretainedReferences calls [no ci]
* metal : release descriptors
ggml-ci
* metal : check env GGML_METAL_NO_RESIDENCY
ggml-ci
* metal : fix build + clean-up
ggml-ci
- ggml/src/ggml-metal/ggml-metal.m +119 -17
ggml/src/ggml-metal/ggml-metal.m
CHANGED
|
@@ -19,7 +19,10 @@
|
|
| 19 |
// max number of MTLCommandBuffer used to submit a graph for processing
|
| 20 |
#define GGML_METAL_MAX_COMMAND_BUFFERS 8
|
| 21 |
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
// globals
|
| 25 |
|
|
@@ -39,6 +42,7 @@ static struct ggml_backend_metal_device_context {
|
|
| 39 |
|
| 40 |
bool has_simdgroup_reduction;
|
| 41 |
bool has_simdgroup_mm;
|
|
|
|
| 42 |
bool has_bfloat;
|
| 43 |
bool use_bfloat;
|
| 44 |
|
|
@@ -48,6 +52,7 @@ static struct ggml_backend_metal_device_context {
|
|
| 48 |
/*.mtl_device_ref_count =*/ 0,
|
| 49 |
/*.has_simdgroup_reduction =*/ false,
|
| 50 |
/*.has_simdgroup_mm =*/ false,
|
|
|
|
| 51 |
/*.has_bfloat =*/ false,
|
| 52 |
/*.use_bfloat =*/ false,
|
| 53 |
/*.name =*/ "",
|
|
@@ -65,6 +70,10 @@ static id<MTLDevice> ggml_backend_metal_device_acq(struct ggml_backend_metal_dev
|
|
| 65 |
|
| 66 |
ctx->has_simdgroup_mm = [ctx->mtl_device supportsFamily:MTLGPUFamilyApple7];
|
| 67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
ctx->has_bfloat = [ctx->mtl_device supportsFamily:MTLGPUFamilyMetal3_GGML];
|
| 69 |
ctx->has_bfloat |= [ctx->mtl_device supportsFamily:MTLGPUFamilyApple6];
|
| 70 |
|
|
@@ -483,6 +492,11 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
|
|
| 483 |
GGML_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
|
| 484 |
|
| 485 |
ctx->queue = [device newCommandQueue];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 486 |
ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
|
| 487 |
|
| 488 |
id<MTLLibrary> metal_library;
|
|
@@ -649,6 +663,7 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
|
|
| 649 |
|
| 650 |
GGML_LOG_INFO("%s: simdgroup reduction = %s\n", __func__, ctx_dev->has_simdgroup_reduction ? "true" : "false");
|
| 651 |
GGML_LOG_INFO("%s: simdgroup matrix mul. = %s\n", __func__, ctx_dev->has_simdgroup_mm ? "true" : "false");
|
|
|
|
| 652 |
GGML_LOG_INFO("%s: has bfloat = %s\n", __func__, ctx_dev->has_bfloat ? "true" : "false");
|
| 653 |
GGML_LOG_INFO("%s: use bfloat = %s\n", __func__, ctx_dev->use_bfloat ? "true" : "false");
|
| 654 |
GGML_LOG_INFO("%s: hasUnifiedMemory = %s\n", __func__, ctx_dev->mtl_device.hasUnifiedMemory ? "true" : "false");
|
|
@@ -1035,8 +1050,70 @@ struct ggml_backend_metal_buffer_context {
|
|
| 1035 |
// multiple buffers are used only to avoid the maximum buffer size limitation when using mmap
|
| 1036 |
int n_buffers;
|
| 1037 |
struct ggml_backend_metal_buffer buffers[GGML_METAL_MAX_BUFFERS];
|
|
|
|
|
|
|
|
|
|
| 1038 |
};
|
| 1039 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1040 |
// finds the Metal buffer that contains the tensor data on the GPU device
|
| 1041 |
// the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the
|
| 1042 |
// Metal buffer based on the host memory pointer
|
|
@@ -4176,6 +4253,8 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer)
|
|
| 4176 |
for (int i = 0; i < ctx->n_buffers; i++) {
|
| 4177 |
[ctx->buffers[i].metal release];
|
| 4178 |
}
|
|
|
|
|
|
|
| 4179 |
ggml_backend_metal_device_rel(buffer->buft->device->context);
|
| 4180 |
|
| 4181 |
if (ctx->owned) {
|
|
@@ -4198,19 +4277,19 @@ static void * ggml_backend_metal_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
| 4198 |
static void ggml_backend_metal_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
|
| 4199 |
memset((char *)tensor->data + offset, value, size);
|
| 4200 |
|
| 4201 |
-
|
| 4202 |
}
|
| 4203 |
|
| 4204 |
static void ggml_backend_metal_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
| 4205 |
memcpy((char *)tensor->data + offset, data, size);
|
| 4206 |
|
| 4207 |
-
|
| 4208 |
}
|
| 4209 |
|
| 4210 |
static void ggml_backend_metal_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
| 4211 |
memcpy(data, (const char *)tensor->data + offset, size);
|
| 4212 |
|
| 4213 |
-
|
| 4214 |
}
|
| 4215 |
|
| 4216 |
static bool ggml_backend_metal_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst) {
|
|
@@ -4220,7 +4299,7 @@ static bool ggml_backend_metal_buffer_cpy_tensor(ggml_backend_buffer_t buffer, c
|
|
| 4220 |
}
|
| 4221 |
return false;
|
| 4222 |
|
| 4223 |
-
|
| 4224 |
}
|
| 4225 |
|
| 4226 |
static void ggml_backend_metal_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
|
@@ -4246,7 +4325,7 @@ static struct ggml_backend_buffer_i ggml_backend_metal_buffer_i = {
|
|
| 4246 |
static const char * ggml_backend_metal_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
|
| 4247 |
return "Metal";
|
| 4248 |
|
| 4249 |
-
|
| 4250 |
}
|
| 4251 |
|
| 4252 |
static void ggml_backend_metal_log_allocated_size(id<MTLDevice> device, size_t size_aligned) {
|
|
@@ -4270,8 +4349,8 @@ static void ggml_backend_metal_log_allocated_size(id<MTLDevice> device, size_t s
|
|
| 4270 |
}
|
| 4271 |
#endif
|
| 4272 |
#endif
|
| 4273 |
-
|
| 4274 |
-
|
| 4275 |
}
|
| 4276 |
|
| 4277 |
static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
|
@@ -4284,7 +4363,8 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
|
|
| 4284 |
size_aligned += (size_page - (size_aligned % size_page));
|
| 4285 |
}
|
| 4286 |
|
| 4287 |
-
|
|
|
|
| 4288 |
|
| 4289 |
ctx->all_data = ggml_metal_host_malloc(size_aligned);
|
| 4290 |
ctx->all_size = size_aligned;
|
|
@@ -4307,7 +4387,14 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
|
|
| 4307 |
if (size_aligned > 0 && (ctx->all_data == NULL || ctx->buffers[0].metal == nil)) {
|
| 4308 |
GGML_LOG_ERROR("%s: error: failed to allocate buffer, size = %8.2f MiB\n", __func__, size_aligned / 1024.0 / 1024.0);
|
| 4309 |
free(ctx);
|
| 4310 |
-
ggml_backend_metal_device_rel(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4311 |
return NULL;
|
| 4312 |
}
|
| 4313 |
|
|
@@ -4318,7 +4405,7 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
|
|
| 4318 |
|
| 4319 |
static size_t ggml_backend_metal_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
|
| 4320 |
return 32;
|
| 4321 |
-
|
| 4322 |
}
|
| 4323 |
|
| 4324 |
static size_t ggml_backend_metal_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
|
|
@@ -4328,13 +4415,13 @@ static size_t ggml_backend_metal_buffer_type_get_max_size(ggml_backend_buffer_ty
|
|
| 4328 |
|
| 4329 |
return max_size;
|
| 4330 |
|
| 4331 |
-
|
| 4332 |
}
|
| 4333 |
|
| 4334 |
static bool ggml_backend_metal_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
|
| 4335 |
return true;
|
| 4336 |
|
| 4337 |
-
|
| 4338 |
}
|
| 4339 |
|
| 4340 |
ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void) {
|
|
@@ -4357,7 +4444,7 @@ ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void) {
|
|
| 4357 |
static const char * ggml_backend_metal_buffer_from_ptr_type_get_name(ggml_backend_buffer_type_t buft) {
|
| 4358 |
return "Metal_Mapped";
|
| 4359 |
|
| 4360 |
-
|
| 4361 |
}
|
| 4362 |
|
| 4363 |
static ggml_backend_buffer_type_t ggml_backend_metal_buffer_from_ptr_type(void) {
|
|
@@ -4400,7 +4487,8 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz
|
|
| 4400 |
size_aligned += (size_page - (size_aligned % size_page));
|
| 4401 |
}
|
| 4402 |
|
| 4403 |
-
|
|
|
|
| 4404 |
|
| 4405 |
// the buffer fits into the max buffer size allowed by the device
|
| 4406 |
if (size_aligned <= device.maxBufferLength) {
|
|
@@ -4453,6 +4541,13 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz
|
|
| 4453 |
}
|
| 4454 |
}
|
| 4455 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4456 |
return ggml_backend_buffer_init(ggml_backend_metal_buffer_from_ptr_type(), ggml_backend_metal_buffer_i, ctx, size);
|
| 4457 |
}
|
| 4458 |
|
|
@@ -4461,7 +4556,7 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz
|
|
| 4461 |
static const char * ggml_backend_metal_name(ggml_backend_t backend) {
|
| 4462 |
return "Metal";
|
| 4463 |
|
| 4464 |
-
|
| 4465 |
}
|
| 4466 |
|
| 4467 |
static void ggml_backend_metal_free(ggml_backend_t backend) {
|
|
@@ -4766,6 +4861,13 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_from_ptr(ggml_back
|
|
| 4766 |
}
|
| 4767 |
}
|
| 4768 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4769 |
return ggml_backend_buffer_init(ggml_backend_metal_buffer_from_ptr_type(), ggml_backend_metal_buffer_i, ctx, size);
|
| 4770 |
}
|
| 4771 |
|
|
@@ -4779,7 +4881,7 @@ static bool ggml_backend_metal_device_supports_buft(ggml_backend_dev_t dev, ggml
|
|
| 4779 |
return buft->iface.get_name == ggml_backend_metal_buffer_type_get_name ||
|
| 4780 |
buft->iface.get_name == ggml_backend_metal_buffer_from_ptr_type_get_name;
|
| 4781 |
|
| 4782 |
-
|
| 4783 |
}
|
| 4784 |
|
| 4785 |
static bool ggml_backend_metal_device_offload_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
|
|
|
|
| 19 |
// max number of MTLCommandBuffer used to submit a graph for processing
|
| 20 |
#define GGML_METAL_MAX_COMMAND_BUFFERS 8
|
| 21 |
|
| 22 |
+
// create residency sets only on macOS >= 15.0
|
| 23 |
+
#if TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED >= 150000
|
| 24 |
+
#define GGML_METAL_HAS_RESIDENCY_SETS 1
|
| 25 |
+
#endif
|
| 26 |
|
| 27 |
// globals
|
| 28 |
|
|
|
|
| 42 |
|
| 43 |
bool has_simdgroup_reduction;
|
| 44 |
bool has_simdgroup_mm;
|
| 45 |
+
bool has_residency_sets;
|
| 46 |
bool has_bfloat;
|
| 47 |
bool use_bfloat;
|
| 48 |
|
|
|
|
| 52 |
/*.mtl_device_ref_count =*/ 0,
|
| 53 |
/*.has_simdgroup_reduction =*/ false,
|
| 54 |
/*.has_simdgroup_mm =*/ false,
|
| 55 |
+
/*.has_residency_sets =*/ false,
|
| 56 |
/*.has_bfloat =*/ false,
|
| 57 |
/*.use_bfloat =*/ false,
|
| 58 |
/*.name =*/ "",
|
|
|
|
| 70 |
|
| 71 |
ctx->has_simdgroup_mm = [ctx->mtl_device supportsFamily:MTLGPUFamilyApple7];
|
| 72 |
|
| 73 |
+
#if defined(GGML_METAL_HAS_RESIDENCY_SETS)
|
| 74 |
+
ctx->has_residency_sets = getenv("GGML_METAL_NO_RESIDENCY") == NULL;
|
| 75 |
+
#endif
|
| 76 |
+
|
| 77 |
ctx->has_bfloat = [ctx->mtl_device supportsFamily:MTLGPUFamilyMetal3_GGML];
|
| 78 |
ctx->has_bfloat |= [ctx->mtl_device supportsFamily:MTLGPUFamilyApple6];
|
| 79 |
|
|
|
|
| 492 |
GGML_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
|
| 493 |
|
| 494 |
ctx->queue = [device newCommandQueue];
|
| 495 |
+
if (ctx->queue == nil) {
|
| 496 |
+
GGML_LOG_ERROR("%s: error: failed to create command queue\n", __func__);
|
| 497 |
+
return NULL;
|
| 498 |
+
}
|
| 499 |
+
|
| 500 |
ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
|
| 501 |
|
| 502 |
id<MTLLibrary> metal_library;
|
|
|
|
| 663 |
|
| 664 |
GGML_LOG_INFO("%s: simdgroup reduction = %s\n", __func__, ctx_dev->has_simdgroup_reduction ? "true" : "false");
|
| 665 |
GGML_LOG_INFO("%s: simdgroup matrix mul. = %s\n", __func__, ctx_dev->has_simdgroup_mm ? "true" : "false");
|
| 666 |
+
GGML_LOG_INFO("%s: has residency sets = %s\n", __func__, ctx_dev->has_residency_sets ? "true" : "false");
|
| 667 |
GGML_LOG_INFO("%s: has bfloat = %s\n", __func__, ctx_dev->has_bfloat ? "true" : "false");
|
| 668 |
GGML_LOG_INFO("%s: use bfloat = %s\n", __func__, ctx_dev->use_bfloat ? "true" : "false");
|
| 669 |
GGML_LOG_INFO("%s: hasUnifiedMemory = %s\n", __func__, ctx_dev->mtl_device.hasUnifiedMemory ? "true" : "false");
|
|
|
|
| 1050 |
// multiple buffers are used only to avoid the maximum buffer size limitation when using mmap
|
| 1051 |
int n_buffers;
|
| 1052 |
struct ggml_backend_metal_buffer buffers[GGML_METAL_MAX_BUFFERS];
|
| 1053 |
+
|
| 1054 |
+
// optional MTLResidencySet
|
| 1055 |
+
id rset;
|
| 1056 |
};
|
| 1057 |
|
| 1058 |
+
// rset init
|
| 1059 |
+
static bool ggml_backend_metal_buffer_rset_init(
|
| 1060 |
+
struct ggml_backend_metal_buffer_context * ctx,
|
| 1061 |
+
struct ggml_backend_metal_device_context * ctx_dev,
|
| 1062 |
+
id<MTLDevice> device) {
|
| 1063 |
+
ctx->rset = nil;
|
| 1064 |
+
|
| 1065 |
+
if (!ctx_dev->has_residency_sets) {
|
| 1066 |
+
return true;
|
| 1067 |
+
}
|
| 1068 |
+
|
| 1069 |
+
#if defined(GGML_METAL_HAS_RESIDENCY_SETS)
|
| 1070 |
+
if (@available(macOS 15.0, *)) {
|
| 1071 |
+
MTLResidencySetDescriptor * desc = [[MTLResidencySetDescriptor alloc] init];
|
| 1072 |
+
desc.label = @"ggml_backend_metal";
|
| 1073 |
+
desc.initialCapacity = ctx->n_buffers;
|
| 1074 |
+
|
| 1075 |
+
NSError * error;
|
| 1076 |
+
ctx->rset = [device newResidencySetWithDescriptor:desc error:&error];
|
| 1077 |
+
if (error) {
|
| 1078 |
+
GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
|
| 1079 |
+
[desc release];
|
| 1080 |
+
return false;
|
| 1081 |
+
}
|
| 1082 |
+
|
| 1083 |
+
[desc release];
|
| 1084 |
+
|
| 1085 |
+
for (int i = 0; i < ctx->n_buffers; i++) {
|
| 1086 |
+
[ctx->rset addAllocation:ctx->buffers[i].metal];
|
| 1087 |
+
}
|
| 1088 |
+
|
| 1089 |
+
[ctx->rset commit];
|
| 1090 |
+
[ctx->rset requestResidency];
|
| 1091 |
+
|
| 1092 |
+
return true;
|
| 1093 |
+
}
|
| 1094 |
+
#else
|
| 1095 |
+
GGML_UNUSED(ctx_dev);
|
| 1096 |
+
GGML_UNUSED(device);
|
| 1097 |
+
#endif
|
| 1098 |
+
|
| 1099 |
+
return true;
|
| 1100 |
+
}
|
| 1101 |
+
|
| 1102 |
+
// rset free
|
| 1103 |
+
static void ggml_backend_metal_buffer_rset_free(struct ggml_backend_metal_buffer_context * ctx) {
|
| 1104 |
+
#if defined(GGML_METAL_HAS_RESIDENCY_SETS)
|
| 1105 |
+
if (@available(macOS 15.0, *)) {
|
| 1106 |
+
if (ctx->rset) {
|
| 1107 |
+
[ctx->rset endResidency];
|
| 1108 |
+
[ctx->rset removeAllAllocations];
|
| 1109 |
+
[ctx->rset release];
|
| 1110 |
+
}
|
| 1111 |
+
}
|
| 1112 |
+
#else
|
| 1113 |
+
GGML_UNUSED(ctx);
|
| 1114 |
+
#endif
|
| 1115 |
+
}
|
| 1116 |
+
|
| 1117 |
// finds the Metal buffer that contains the tensor data on the GPU device
|
| 1118 |
// the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the
|
| 1119 |
// Metal buffer based on the host memory pointer
|
|
|
|
| 4253 |
for (int i = 0; i < ctx->n_buffers; i++) {
|
| 4254 |
[ctx->buffers[i].metal release];
|
| 4255 |
}
|
| 4256 |
+
|
| 4257 |
+
ggml_backend_metal_buffer_rset_free(ctx);
|
| 4258 |
ggml_backend_metal_device_rel(buffer->buft->device->context);
|
| 4259 |
|
| 4260 |
if (ctx->owned) {
|
|
|
|
| 4277 |
static void ggml_backend_metal_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
|
| 4278 |
memset((char *)tensor->data + offset, value, size);
|
| 4279 |
|
| 4280 |
+
GGML_UNUSED(buffer);
|
| 4281 |
}
|
| 4282 |
|
| 4283 |
static void ggml_backend_metal_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
| 4284 |
memcpy((char *)tensor->data + offset, data, size);
|
| 4285 |
|
| 4286 |
+
GGML_UNUSED(buffer);
|
| 4287 |
}
|
| 4288 |
|
| 4289 |
static void ggml_backend_metal_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
| 4290 |
memcpy(data, (const char *)tensor->data + offset, size);
|
| 4291 |
|
| 4292 |
+
GGML_UNUSED(buffer);
|
| 4293 |
}
|
| 4294 |
|
| 4295 |
static bool ggml_backend_metal_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst) {
|
|
|
|
| 4299 |
}
|
| 4300 |
return false;
|
| 4301 |
|
| 4302 |
+
GGML_UNUSED(buffer);
|
| 4303 |
}
|
| 4304 |
|
| 4305 |
static void ggml_backend_metal_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
|
|
|
| 4325 |
static const char * ggml_backend_metal_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
|
| 4326 |
return "Metal";
|
| 4327 |
|
| 4328 |
+
GGML_UNUSED(buft);
|
| 4329 |
}
|
| 4330 |
|
| 4331 |
static void ggml_backend_metal_log_allocated_size(id<MTLDevice> device, size_t size_aligned) {
|
|
|
|
| 4349 |
}
|
| 4350 |
#endif
|
| 4351 |
#endif
|
| 4352 |
+
GGML_UNUSED(device);
|
| 4353 |
+
GGML_UNUSED(size_aligned);
|
| 4354 |
}
|
| 4355 |
|
| 4356 |
static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
|
|
|
| 4363 |
size_aligned += (size_page - (size_aligned % size_page));
|
| 4364 |
}
|
| 4365 |
|
| 4366 |
+
struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)buft->device->context;
|
| 4367 |
+
id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
|
| 4368 |
|
| 4369 |
ctx->all_data = ggml_metal_host_malloc(size_aligned);
|
| 4370 |
ctx->all_size = size_aligned;
|
|
|
|
| 4387 |
if (size_aligned > 0 && (ctx->all_data == NULL || ctx->buffers[0].metal == nil)) {
|
| 4388 |
GGML_LOG_ERROR("%s: error: failed to allocate buffer, size = %8.2f MiB\n", __func__, size_aligned / 1024.0 / 1024.0);
|
| 4389 |
free(ctx);
|
| 4390 |
+
ggml_backend_metal_device_rel(ctx_dev);
|
| 4391 |
+
return NULL;
|
| 4392 |
+
}
|
| 4393 |
+
|
| 4394 |
+
if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) {
|
| 4395 |
+
GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
|
| 4396 |
+
free(ctx);
|
| 4397 |
+
ggml_backend_metal_device_rel(ctx_dev);
|
| 4398 |
return NULL;
|
| 4399 |
}
|
| 4400 |
|
|
|
|
| 4405 |
|
| 4406 |
static size_t ggml_backend_metal_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
|
| 4407 |
return 32;
|
| 4408 |
+
GGML_UNUSED(buft);
|
| 4409 |
}
|
| 4410 |
|
| 4411 |
static size_t ggml_backend_metal_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
|
|
|
|
| 4415 |
|
| 4416 |
return max_size;
|
| 4417 |
|
| 4418 |
+
GGML_UNUSED(buft);
|
| 4419 |
}
|
| 4420 |
|
| 4421 |
static bool ggml_backend_metal_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
|
| 4422 |
return true;
|
| 4423 |
|
| 4424 |
+
GGML_UNUSED(buft);
|
| 4425 |
}
|
| 4426 |
|
| 4427 |
ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void) {
|
|
|
|
| 4444 |
static const char * ggml_backend_metal_buffer_from_ptr_type_get_name(ggml_backend_buffer_type_t buft) {
|
| 4445 |
return "Metal_Mapped";
|
| 4446 |
|
| 4447 |
+
GGML_UNUSED(buft);
|
| 4448 |
}
|
| 4449 |
|
| 4450 |
static ggml_backend_buffer_type_t ggml_backend_metal_buffer_from_ptr_type(void) {
|
|
|
|
| 4487 |
size_aligned += (size_page - (size_aligned % size_page));
|
| 4488 |
}
|
| 4489 |
|
| 4490 |
+
struct ggml_backend_metal_device_context * ctx_dev = &g_ggml_ctx_dev_main;
|
| 4491 |
+
id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
|
| 4492 |
|
| 4493 |
// the buffer fits into the max buffer size allowed by the device
|
| 4494 |
if (size_aligned <= device.maxBufferLength) {
|
|
|
|
| 4541 |
}
|
| 4542 |
}
|
| 4543 |
|
| 4544 |
+
if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) {
|
| 4545 |
+
GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
|
| 4546 |
+
free(ctx);
|
| 4547 |
+
ggml_backend_metal_device_rel(ctx_dev);
|
| 4548 |
+
return NULL;
|
| 4549 |
+
}
|
| 4550 |
+
|
| 4551 |
return ggml_backend_buffer_init(ggml_backend_metal_buffer_from_ptr_type(), ggml_backend_metal_buffer_i, ctx, size);
|
| 4552 |
}
|
| 4553 |
|
|
|
|
| 4556 |
static const char * ggml_backend_metal_name(ggml_backend_t backend) {
|
| 4557 |
return "Metal";
|
| 4558 |
|
| 4559 |
+
GGML_UNUSED(backend);
|
| 4560 |
}
|
| 4561 |
|
| 4562 |
static void ggml_backend_metal_free(ggml_backend_t backend) {
|
|
|
|
| 4861 |
}
|
| 4862 |
}
|
| 4863 |
|
| 4864 |
+
if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) {
|
| 4865 |
+
GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
|
| 4866 |
+
free(ctx);
|
| 4867 |
+
ggml_backend_metal_device_rel(ctx_dev);
|
| 4868 |
+
return NULL;
|
| 4869 |
+
}
|
| 4870 |
+
|
| 4871 |
return ggml_backend_buffer_init(ggml_backend_metal_buffer_from_ptr_type(), ggml_backend_metal_buffer_i, ctx, size);
|
| 4872 |
}
|
| 4873 |
|
|
|
|
| 4881 |
return buft->iface.get_name == ggml_backend_metal_buffer_type_get_name ||
|
| 4882 |
buft->iface.get_name == ggml_backend_metal_buffer_from_ptr_type_get_name;
|
| 4883 |
|
| 4884 |
+
GGML_UNUSED(dev);
|
| 4885 |
}
|
| 4886 |
|
| 4887 |
static bool ggml_backend_metal_device_offload_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
|