ggerganov commited on
Commit
9da4d68
·
1 Parent(s): ca6577f

metal : use residency sets (llama/11427)

Browse files

* metal : use residency sets

ggml-ci

* metal : restore commandBufferWithUnretainedReferences calls [no ci]

* metal : release descriptors

ggml-ci

* metal : check env GGML_METAL_NO_RESIDENCY

ggml-ci

* metal : fix build + clean-up

ggml-ci

Files changed (1) hide show
  1. ggml/src/ggml-metal/ggml-metal.m +119 -17
ggml/src/ggml-metal/ggml-metal.m CHANGED
@@ -19,7 +19,10 @@
19
  // max number of MTLCommandBuffer used to submit a graph for processing
20
  #define GGML_METAL_MAX_COMMAND_BUFFERS 8
21
 
22
- #define UNUSED(x) (void)(x)
 
 
 
23
 
24
  // globals
25
 
@@ -39,6 +42,7 @@ static struct ggml_backend_metal_device_context {
39
 
40
  bool has_simdgroup_reduction;
41
  bool has_simdgroup_mm;
 
42
  bool has_bfloat;
43
  bool use_bfloat;
44
 
@@ -48,6 +52,7 @@ static struct ggml_backend_metal_device_context {
48
  /*.mtl_device_ref_count =*/ 0,
49
  /*.has_simdgroup_reduction =*/ false,
50
  /*.has_simdgroup_mm =*/ false,
 
51
  /*.has_bfloat =*/ false,
52
  /*.use_bfloat =*/ false,
53
  /*.name =*/ "",
@@ -65,6 +70,10 @@ static id<MTLDevice> ggml_backend_metal_device_acq(struct ggml_backend_metal_dev
65
 
66
  ctx->has_simdgroup_mm = [ctx->mtl_device supportsFamily:MTLGPUFamilyApple7];
67
 
 
 
 
 
68
  ctx->has_bfloat = [ctx->mtl_device supportsFamily:MTLGPUFamilyMetal3_GGML];
69
  ctx->has_bfloat |= [ctx->mtl_device supportsFamily:MTLGPUFamilyApple6];
70
 
@@ -483,6 +492,11 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
483
  GGML_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
484
 
485
  ctx->queue = [device newCommandQueue];
 
 
 
 
 
486
  ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
487
 
488
  id<MTLLibrary> metal_library;
@@ -649,6 +663,7 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
649
 
650
  GGML_LOG_INFO("%s: simdgroup reduction = %s\n", __func__, ctx_dev->has_simdgroup_reduction ? "true" : "false");
651
  GGML_LOG_INFO("%s: simdgroup matrix mul. = %s\n", __func__, ctx_dev->has_simdgroup_mm ? "true" : "false");
 
652
  GGML_LOG_INFO("%s: has bfloat = %s\n", __func__, ctx_dev->has_bfloat ? "true" : "false");
653
  GGML_LOG_INFO("%s: use bfloat = %s\n", __func__, ctx_dev->use_bfloat ? "true" : "false");
654
  GGML_LOG_INFO("%s: hasUnifiedMemory = %s\n", __func__, ctx_dev->mtl_device.hasUnifiedMemory ? "true" : "false");
@@ -1035,8 +1050,70 @@ struct ggml_backend_metal_buffer_context {
1035
  // multiple buffers are used only to avoid the maximum buffer size limitation when using mmap
1036
  int n_buffers;
1037
  struct ggml_backend_metal_buffer buffers[GGML_METAL_MAX_BUFFERS];
 
 
 
1038
  };
1039
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1040
  // finds the Metal buffer that contains the tensor data on the GPU device
1041
  // the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the
1042
  // Metal buffer based on the host memory pointer
@@ -4176,6 +4253,8 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer)
4176
  for (int i = 0; i < ctx->n_buffers; i++) {
4177
  [ctx->buffers[i].metal release];
4178
  }
 
 
4179
  ggml_backend_metal_device_rel(buffer->buft->device->context);
4180
 
4181
  if (ctx->owned) {
@@ -4198,19 +4277,19 @@ static void * ggml_backend_metal_buffer_get_base(ggml_backend_buffer_t buffer) {
4198
  static void ggml_backend_metal_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
4199
  memset((char *)tensor->data + offset, value, size);
4200
 
4201
- UNUSED(buffer);
4202
  }
4203
 
4204
  static void ggml_backend_metal_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
4205
  memcpy((char *)tensor->data + offset, data, size);
4206
 
4207
- UNUSED(buffer);
4208
  }
4209
 
4210
  static void ggml_backend_metal_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
4211
  memcpy(data, (const char *)tensor->data + offset, size);
4212
 
4213
- UNUSED(buffer);
4214
  }
4215
 
4216
  static bool ggml_backend_metal_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst) {
@@ -4220,7 +4299,7 @@ static bool ggml_backend_metal_buffer_cpy_tensor(ggml_backend_buffer_t buffer, c
4220
  }
4221
  return false;
4222
 
4223
- UNUSED(buffer);
4224
  }
4225
 
4226
  static void ggml_backend_metal_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
@@ -4246,7 +4325,7 @@ static struct ggml_backend_buffer_i ggml_backend_metal_buffer_i = {
4246
  static const char * ggml_backend_metal_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
4247
  return "Metal";
4248
 
4249
- UNUSED(buft);
4250
  }
4251
 
4252
  static void ggml_backend_metal_log_allocated_size(id<MTLDevice> device, size_t size_aligned) {
@@ -4270,8 +4349,8 @@ static void ggml_backend_metal_log_allocated_size(id<MTLDevice> device, size_t s
4270
  }
4271
  #endif
4272
  #endif
4273
- UNUSED(device);
4274
- UNUSED(size_aligned);
4275
  }
4276
 
4277
  static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
@@ -4284,7 +4363,8 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
4284
  size_aligned += (size_page - (size_aligned % size_page));
4285
  }
4286
 
4287
- id<MTLDevice> device = ggml_backend_metal_device_acq(buft->device->context);
 
4288
 
4289
  ctx->all_data = ggml_metal_host_malloc(size_aligned);
4290
  ctx->all_size = size_aligned;
@@ -4307,7 +4387,14 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
4307
  if (size_aligned > 0 && (ctx->all_data == NULL || ctx->buffers[0].metal == nil)) {
4308
  GGML_LOG_ERROR("%s: error: failed to allocate buffer, size = %8.2f MiB\n", __func__, size_aligned / 1024.0 / 1024.0);
4309
  free(ctx);
4310
- ggml_backend_metal_device_rel(buft->device->context);
 
 
 
 
 
 
 
4311
  return NULL;
4312
  }
4313
 
@@ -4318,7 +4405,7 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
4318
 
4319
  static size_t ggml_backend_metal_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
4320
  return 32;
4321
- UNUSED(buft);
4322
  }
4323
 
4324
  static size_t ggml_backend_metal_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
@@ -4328,13 +4415,13 @@ static size_t ggml_backend_metal_buffer_type_get_max_size(ggml_backend_buffer_ty
4328
 
4329
  return max_size;
4330
 
4331
- UNUSED(buft);
4332
  }
4333
 
4334
  static bool ggml_backend_metal_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
4335
  return true;
4336
 
4337
- UNUSED(buft);
4338
  }
4339
 
4340
  ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void) {
@@ -4357,7 +4444,7 @@ ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void) {
4357
  static const char * ggml_backend_metal_buffer_from_ptr_type_get_name(ggml_backend_buffer_type_t buft) {
4358
  return "Metal_Mapped";
4359
 
4360
- UNUSED(buft);
4361
  }
4362
 
4363
  static ggml_backend_buffer_type_t ggml_backend_metal_buffer_from_ptr_type(void) {
@@ -4400,7 +4487,8 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz
4400
  size_aligned += (size_page - (size_aligned % size_page));
4401
  }
4402
 
4403
- id<MTLDevice> device = ggml_backend_metal_device_acq(&g_ggml_ctx_dev_main);
 
4404
 
4405
  // the buffer fits into the max buffer size allowed by the device
4406
  if (size_aligned <= device.maxBufferLength) {
@@ -4453,6 +4541,13 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz
4453
  }
4454
  }
4455
 
 
 
 
 
 
 
 
4456
  return ggml_backend_buffer_init(ggml_backend_metal_buffer_from_ptr_type(), ggml_backend_metal_buffer_i, ctx, size);
4457
  }
4458
 
@@ -4461,7 +4556,7 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz
4461
  static const char * ggml_backend_metal_name(ggml_backend_t backend) {
4462
  return "Metal";
4463
 
4464
- UNUSED(backend);
4465
  }
4466
 
4467
  static void ggml_backend_metal_free(ggml_backend_t backend) {
@@ -4766,6 +4861,13 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_from_ptr(ggml_back
4766
  }
4767
  }
4768
 
 
 
 
 
 
 
 
4769
  return ggml_backend_buffer_init(ggml_backend_metal_buffer_from_ptr_type(), ggml_backend_metal_buffer_i, ctx, size);
4770
  }
4771
 
@@ -4779,7 +4881,7 @@ static bool ggml_backend_metal_device_supports_buft(ggml_backend_dev_t dev, ggml
4779
  return buft->iface.get_name == ggml_backend_metal_buffer_type_get_name ||
4780
  buft->iface.get_name == ggml_backend_metal_buffer_from_ptr_type_get_name;
4781
 
4782
- UNUSED(dev);
4783
  }
4784
 
4785
  static bool ggml_backend_metal_device_offload_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
 
19
  // max number of MTLCommandBuffer used to submit a graph for processing
20
  #define GGML_METAL_MAX_COMMAND_BUFFERS 8
21
 
22
+ // create residency sets only on macOS >= 15.0
23
+ #if TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED >= 150000
24
+ #define GGML_METAL_HAS_RESIDENCY_SETS 1
25
+ #endif
26
 
27
  // globals
28
 
 
42
 
43
  bool has_simdgroup_reduction;
44
  bool has_simdgroup_mm;
45
+ bool has_residency_sets;
46
  bool has_bfloat;
47
  bool use_bfloat;
48
 
 
52
  /*.mtl_device_ref_count =*/ 0,
53
  /*.has_simdgroup_reduction =*/ false,
54
  /*.has_simdgroup_mm =*/ false,
55
+ /*.has_residency_sets =*/ false,
56
  /*.has_bfloat =*/ false,
57
  /*.use_bfloat =*/ false,
58
  /*.name =*/ "",
 
70
 
71
  ctx->has_simdgroup_mm = [ctx->mtl_device supportsFamily:MTLGPUFamilyApple7];
72
 
73
+ #if defined(GGML_METAL_HAS_RESIDENCY_SETS)
74
+ ctx->has_residency_sets = getenv("GGML_METAL_NO_RESIDENCY") == NULL;
75
+ #endif
76
+
77
  ctx->has_bfloat = [ctx->mtl_device supportsFamily:MTLGPUFamilyMetal3_GGML];
78
  ctx->has_bfloat |= [ctx->mtl_device supportsFamily:MTLGPUFamilyApple6];
79
 
 
492
  GGML_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
493
 
494
  ctx->queue = [device newCommandQueue];
495
+ if (ctx->queue == nil) {
496
+ GGML_LOG_ERROR("%s: error: failed to create command queue\n", __func__);
497
+ return NULL;
498
+ }
499
+
500
  ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
501
 
502
  id<MTLLibrary> metal_library;
 
663
 
664
  GGML_LOG_INFO("%s: simdgroup reduction = %s\n", __func__, ctx_dev->has_simdgroup_reduction ? "true" : "false");
665
  GGML_LOG_INFO("%s: simdgroup matrix mul. = %s\n", __func__, ctx_dev->has_simdgroup_mm ? "true" : "false");
666
+ GGML_LOG_INFO("%s: has residency sets = %s\n", __func__, ctx_dev->has_residency_sets ? "true" : "false");
667
  GGML_LOG_INFO("%s: has bfloat = %s\n", __func__, ctx_dev->has_bfloat ? "true" : "false");
668
  GGML_LOG_INFO("%s: use bfloat = %s\n", __func__, ctx_dev->use_bfloat ? "true" : "false");
669
  GGML_LOG_INFO("%s: hasUnifiedMemory = %s\n", __func__, ctx_dev->mtl_device.hasUnifiedMemory ? "true" : "false");
 
1050
  // multiple buffers are used only to avoid the maximum buffer size limitation when using mmap
1051
  int n_buffers;
1052
  struct ggml_backend_metal_buffer buffers[GGML_METAL_MAX_BUFFERS];
1053
+
1054
+ // optional MTLResidencySet
1055
+ id rset;
1056
  };
1057
 
1058
+ // rset init
1059
+ static bool ggml_backend_metal_buffer_rset_init(
1060
+ struct ggml_backend_metal_buffer_context * ctx,
1061
+ struct ggml_backend_metal_device_context * ctx_dev,
1062
+ id<MTLDevice> device) {
1063
+ ctx->rset = nil;
1064
+
1065
+ if (!ctx_dev->has_residency_sets) {
1066
+ return true;
1067
+ }
1068
+
1069
+ #if defined(GGML_METAL_HAS_RESIDENCY_SETS)
1070
+ if (@available(macOS 15.0, *)) {
1071
+ MTLResidencySetDescriptor * desc = [[MTLResidencySetDescriptor alloc] init];
1072
+ desc.label = @"ggml_backend_metal";
1073
+ desc.initialCapacity = ctx->n_buffers;
1074
+
1075
+ NSError * error;
1076
+ ctx->rset = [device newResidencySetWithDescriptor:desc error:&error];
1077
+ if (error) {
1078
+ GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
1079
+ [desc release];
1080
+ return false;
1081
+ }
1082
+
1083
+ [desc release];
1084
+
1085
+ for (int i = 0; i < ctx->n_buffers; i++) {
1086
+ [ctx->rset addAllocation:ctx->buffers[i].metal];
1087
+ }
1088
+
1089
+ [ctx->rset commit];
1090
+ [ctx->rset requestResidency];
1091
+
1092
+ return true;
1093
+ }
1094
+ #else
1095
+ GGML_UNUSED(ctx_dev);
1096
+ GGML_UNUSED(device);
1097
+ #endif
1098
+
1099
+ return true;
1100
+ }
1101
+
1102
+ // rset free
1103
+ static void ggml_backend_metal_buffer_rset_free(struct ggml_backend_metal_buffer_context * ctx) {
1104
+ #if defined(GGML_METAL_HAS_RESIDENCY_SETS)
1105
+ if (@available(macOS 15.0, *)) {
1106
+ if (ctx->rset) {
1107
+ [ctx->rset endResidency];
1108
+ [ctx->rset removeAllAllocations];
1109
+ [ctx->rset release];
1110
+ }
1111
+ }
1112
+ #else
1113
+ GGML_UNUSED(ctx);
1114
+ #endif
1115
+ }
1116
+
1117
  // finds the Metal buffer that contains the tensor data on the GPU device
1118
  // the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the
1119
  // Metal buffer based on the host memory pointer
 
4253
  for (int i = 0; i < ctx->n_buffers; i++) {
4254
  [ctx->buffers[i].metal release];
4255
  }
4256
+
4257
+ ggml_backend_metal_buffer_rset_free(ctx);
4258
  ggml_backend_metal_device_rel(buffer->buft->device->context);
4259
 
4260
  if (ctx->owned) {
 
4277
  static void ggml_backend_metal_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
4278
  memset((char *)tensor->data + offset, value, size);
4279
 
4280
+ GGML_UNUSED(buffer);
4281
  }
4282
 
4283
  static void ggml_backend_metal_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
4284
  memcpy((char *)tensor->data + offset, data, size);
4285
 
4286
+ GGML_UNUSED(buffer);
4287
  }
4288
 
4289
  static void ggml_backend_metal_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
4290
  memcpy(data, (const char *)tensor->data + offset, size);
4291
 
4292
+ GGML_UNUSED(buffer);
4293
  }
4294
 
4295
  static bool ggml_backend_metal_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst) {
 
4299
  }
4300
  return false;
4301
 
4302
+ GGML_UNUSED(buffer);
4303
  }
4304
 
4305
  static void ggml_backend_metal_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
 
4325
  static const char * ggml_backend_metal_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
4326
  return "Metal";
4327
 
4328
+ GGML_UNUSED(buft);
4329
  }
4330
 
4331
  static void ggml_backend_metal_log_allocated_size(id<MTLDevice> device, size_t size_aligned) {
 
4349
  }
4350
  #endif
4351
  #endif
4352
+ GGML_UNUSED(device);
4353
+ GGML_UNUSED(size_aligned);
4354
  }
4355
 
4356
  static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
 
4363
  size_aligned += (size_page - (size_aligned % size_page));
4364
  }
4365
 
4366
+ struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)buft->device->context;
4367
+ id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
4368
 
4369
  ctx->all_data = ggml_metal_host_malloc(size_aligned);
4370
  ctx->all_size = size_aligned;
 
4387
  if (size_aligned > 0 && (ctx->all_data == NULL || ctx->buffers[0].metal == nil)) {
4388
  GGML_LOG_ERROR("%s: error: failed to allocate buffer, size = %8.2f MiB\n", __func__, size_aligned / 1024.0 / 1024.0);
4389
  free(ctx);
4390
+ ggml_backend_metal_device_rel(ctx_dev);
4391
+ return NULL;
4392
+ }
4393
+
4394
+ if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) {
4395
+ GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
4396
+ free(ctx);
4397
+ ggml_backend_metal_device_rel(ctx_dev);
4398
  return NULL;
4399
  }
4400
 
 
4405
 
4406
  static size_t ggml_backend_metal_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
4407
  return 32;
4408
+ GGML_UNUSED(buft);
4409
  }
4410
 
4411
  static size_t ggml_backend_metal_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
 
4415
 
4416
  return max_size;
4417
 
4418
+ GGML_UNUSED(buft);
4419
  }
4420
 
4421
  static bool ggml_backend_metal_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
4422
  return true;
4423
 
4424
+ GGML_UNUSED(buft);
4425
  }
4426
 
4427
  ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void) {
 
4444
  static const char * ggml_backend_metal_buffer_from_ptr_type_get_name(ggml_backend_buffer_type_t buft) {
4445
  return "Metal_Mapped";
4446
 
4447
+ GGML_UNUSED(buft);
4448
  }
4449
 
4450
  static ggml_backend_buffer_type_t ggml_backend_metal_buffer_from_ptr_type(void) {
 
4487
  size_aligned += (size_page - (size_aligned % size_page));
4488
  }
4489
 
4490
+ struct ggml_backend_metal_device_context * ctx_dev = &g_ggml_ctx_dev_main;
4491
+ id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
4492
 
4493
  // the buffer fits into the max buffer size allowed by the device
4494
  if (size_aligned <= device.maxBufferLength) {
 
4541
  }
4542
  }
4543
 
4544
+ if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) {
4545
+ GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
4546
+ free(ctx);
4547
+ ggml_backend_metal_device_rel(ctx_dev);
4548
+ return NULL;
4549
+ }
4550
+
4551
  return ggml_backend_buffer_init(ggml_backend_metal_buffer_from_ptr_type(), ggml_backend_metal_buffer_i, ctx, size);
4552
  }
4553
 
 
4556
  static const char * ggml_backend_metal_name(ggml_backend_t backend) {
4557
  return "Metal";
4558
 
4559
+ GGML_UNUSED(backend);
4560
  }
4561
 
4562
  static void ggml_backend_metal_free(ggml_backend_t backend) {
 
4861
  }
4862
  }
4863
 
4864
+ if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) {
4865
+ GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
4866
+ free(ctx);
4867
+ ggml_backend_metal_device_rel(ctx_dev);
4868
+ return NULL;
4869
+ }
4870
+
4871
  return ggml_backend_buffer_init(ggml_backend_metal_buffer_from_ptr_type(), ggml_backend_metal_buffer_i, ctx, size);
4872
  }
4873
 
 
4881
  return buft->iface.get_name == ggml_backend_metal_buffer_type_get_name ||
4882
  buft->iface.get_name == ggml_backend_metal_buffer_from_ptr_type_get_name;
4883
 
4884
+ GGML_UNUSED(dev);
4885
  }
4886
 
4887
  static bool ggml_backend_metal_device_offload_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {