azarovalex ggerganov commited on
Commit
e2cc0e5
·
unverified ·
1 Parent(s): 7815f68

metal : log `recommendedMaxWorkingSetSize` on iOS 16+ (llama/4936)

Browse files

* metal: Log `recommendedMaxWorkingSetSize` on iOS 16+

* Only log on iOS and macOS, ignoring tvOS and other platforms

* Check for Xcode version before using recommendedMaxWorkingSetSize

---------

Co-authored-by: Georgi Gerganov <[email protected]>

Files changed (1) hide show
  1. ggml-metal.m +27 -31
ggml-metal.m CHANGED
@@ -369,8 +369,12 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
369
  GGML_METAL_LOG_INFO("%s: simdgroup reduction support = %s\n", __func__, ctx->support_simdgroup_reduction ? "true" : "false");
370
  GGML_METAL_LOG_INFO("%s: simdgroup matrix mul. support = %s\n", __func__, ctx->support_simdgroup_mm ? "true" : "false");
371
  GGML_METAL_LOG_INFO("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
372
- #if TARGET_OS_OSX
373
- GGML_METAL_LOG_INFO("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1e6);
 
 
 
 
374
  if (ctx->device.maxTransferRate != 0) {
375
  GGML_METAL_LOG_INFO("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1e6);
376
  } else {
@@ -2369,6 +2373,25 @@ GGML_CALL static const char * ggml_backend_metal_buffer_type_get_name(ggml_backe
2369
  UNUSED(buft);
2370
  }
2371
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2372
  GGML_CALL static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
2373
  struct ggml_backend_metal_buffer_context * ctx = malloc(sizeof(struct ggml_backend_metal_buffer_context));
2374
 
@@ -2401,22 +2424,7 @@ GGML_CALL static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buff
2401
  }
2402
 
2403
  GGML_METAL_LOG_INFO("%s: allocated buffer, size = %8.2f MiB", __func__, size_aligned / 1024.0 / 1024.0);
2404
-
2405
-
2406
- #if TARGET_OS_OSX
2407
- GGML_METAL_LOG_INFO(", (%8.2f / %8.2f)",
2408
- device.currentAllocatedSize / 1024.0 / 1024.0,
2409
- device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
2410
-
2411
- if (device.currentAllocatedSize > device.recommendedMaxWorkingSetSize) {
2412
- GGML_METAL_LOG_WARN("%s: warning: current allocated size is greater than the recommended max working set size\n", __func__);
2413
- } else {
2414
- GGML_METAL_LOG_INFO("\n");
2415
- }
2416
- #else
2417
- GGML_METAL_LOG_INFO(", (%8.2f)\n", device.currentAllocatedSize / 1024.0 / 1024.0);
2418
- #endif
2419
-
2420
 
2421
  return ggml_backend_buffer_init(buft, ggml_backend_metal_buffer_i, ctx, size);
2422
  }
@@ -2524,19 +2532,7 @@ GGML_CALL ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data,
2524
  }
2525
  }
2526
 
2527
- #if TARGET_OS_OSX
2528
- GGML_METAL_LOG_INFO(", (%8.2f / %8.2f)",
2529
- device.currentAllocatedSize / 1024.0 / 1024.0,
2530
- device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
2531
-
2532
- if (device.currentAllocatedSize > device.recommendedMaxWorkingSetSize) {
2533
- GGML_METAL_LOG_WARN("%s: warning: current allocated size is greater than the recommended max working set size\n", __func__);
2534
- } else {
2535
- GGML_METAL_LOG_INFO("\n");
2536
- }
2537
- #else
2538
- GGML_METAL_LOG_INFO(", (%8.2f)\n", device.currentAllocatedSize / 1024.0 / 1024.0);
2539
- #endif
2540
 
2541
  return ggml_backend_buffer_init(ggml_backend_metal_buffer_type(), ggml_backend_metal_buffer_i, ctx, size);
2542
  }
 
369
  GGML_METAL_LOG_INFO("%s: simdgroup reduction support = %s\n", __func__, ctx->support_simdgroup_reduction ? "true" : "false");
370
  GGML_METAL_LOG_INFO("%s: simdgroup matrix mul. support = %s\n", __func__, ctx->support_simdgroup_mm ? "true" : "false");
371
  GGML_METAL_LOG_INFO("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
372
+
373
+ #if TARGET_OS_OSX || (TARGET_OS_IOS && __clang_major__ >= 15)
374
+ if (@available(macOS 10.12, iOS 16.0, *)) {
375
+ GGML_METAL_LOG_INFO("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1e6);
376
+ }
377
+ #elif TARGET_OS_OSX
378
  if (ctx->device.maxTransferRate != 0) {
379
  GGML_METAL_LOG_INFO("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1e6);
380
  } else {
 
2373
  UNUSED(buft);
2374
  }
2375
 
2376
+ static void ggml_backend_metal_log_allocated_size(id<MTLDevice> device) {
2377
+ #if TARGET_OS_OSX || (TARGET_OS_IOS && __clang_major__ >= 15)
2378
+ if (@available(macOS 10.12, iOS 16.0, *)) {
2379
+ GGML_METAL_LOG_INFO(", (%8.2f / %8.2f)",
2380
+ device.currentAllocatedSize / 1024.0 / 1024.0,
2381
+ device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
2382
+
2383
+ if (device.currentAllocatedSize > device.recommendedMaxWorkingSetSize) {
2384
+ GGML_METAL_LOG_WARN("%s: warning: current allocated size is greater than the recommended max working set size\n", __func__);
2385
+ } else {
2386
+ GGML_METAL_LOG_INFO("\n");
2387
+ }
2388
+ } else {
2389
+ GGML_METAL_LOG_INFO(", (%8.2f)\n", device.currentAllocatedSize / 1024.0 / 1024.0);
2390
+ }
2391
+ #endif
2392
+ UNUSED(device);
2393
+ }
2394
+
2395
  GGML_CALL static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
2396
  struct ggml_backend_metal_buffer_context * ctx = malloc(sizeof(struct ggml_backend_metal_buffer_context));
2397
 
 
2424
  }
2425
 
2426
  GGML_METAL_LOG_INFO("%s: allocated buffer, size = %8.2f MiB", __func__, size_aligned / 1024.0 / 1024.0);
2427
+ ggml_backend_metal_log_allocated_size(device);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2428
 
2429
  return ggml_backend_buffer_init(buft, ggml_backend_metal_buffer_i, ctx, size);
2430
  }
 
2532
  }
2533
  }
2534
 
2535
+ ggml_backend_metal_log_allocated_size(device);
 
 
 
 
 
 
 
 
 
 
 
 
2536
 
2537
  return ggml_backend_buffer_init(ggml_backend_metal_buffer_type(), ggml_backend_metal_buffer_i, ctx, size);
2538
  }