Spaces:
Running
Running
metal : log `recommendedMaxWorkingSetSize` on iOS 16+ (llama/4936)
Browse files* metal: Log `recommendedMaxWorkingSetSize` on iOS 16+
* Only log on iOS and macOS, ignoring tvOS and other platforms
* Check for Xcode version before using recommendedMaxWorkingSetSize
---------
Co-authored-by: Georgi Gerganov <[email protected]>
- ggml-metal.m +27 -31
ggml-metal.m
CHANGED
|
@@ -369,8 +369,12 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
|
| 369 |
GGML_METAL_LOG_INFO("%s: simdgroup reduction support = %s\n", __func__, ctx->support_simdgroup_reduction ? "true" : "false");
|
| 370 |
GGML_METAL_LOG_INFO("%s: simdgroup matrix mul. support = %s\n", __func__, ctx->support_simdgroup_mm ? "true" : "false");
|
| 371 |
GGML_METAL_LOG_INFO("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
|
| 372 |
-
|
| 373 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 374 |
if (ctx->device.maxTransferRate != 0) {
|
| 375 |
GGML_METAL_LOG_INFO("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1e6);
|
| 376 |
} else {
|
|
@@ -2369,6 +2373,25 @@ GGML_CALL static const char * ggml_backend_metal_buffer_type_get_name(ggml_backe
|
|
| 2369 |
UNUSED(buft);
|
| 2370 |
}
|
| 2371 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2372 |
GGML_CALL static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
| 2373 |
struct ggml_backend_metal_buffer_context * ctx = malloc(sizeof(struct ggml_backend_metal_buffer_context));
|
| 2374 |
|
|
@@ -2401,22 +2424,7 @@ GGML_CALL static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buff
|
|
| 2401 |
}
|
| 2402 |
|
| 2403 |
GGML_METAL_LOG_INFO("%s: allocated buffer, size = %8.2f MiB", __func__, size_aligned / 1024.0 / 1024.0);
|
| 2404 |
-
|
| 2405 |
-
|
| 2406 |
-
#if TARGET_OS_OSX
|
| 2407 |
-
GGML_METAL_LOG_INFO(", (%8.2f / %8.2f)",
|
| 2408 |
-
device.currentAllocatedSize / 1024.0 / 1024.0,
|
| 2409 |
-
device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
|
| 2410 |
-
|
| 2411 |
-
if (device.currentAllocatedSize > device.recommendedMaxWorkingSetSize) {
|
| 2412 |
-
GGML_METAL_LOG_WARN("%s: warning: current allocated size is greater than the recommended max working set size\n", __func__);
|
| 2413 |
-
} else {
|
| 2414 |
-
GGML_METAL_LOG_INFO("\n");
|
| 2415 |
-
}
|
| 2416 |
-
#else
|
| 2417 |
-
GGML_METAL_LOG_INFO(", (%8.2f)\n", device.currentAllocatedSize / 1024.0 / 1024.0);
|
| 2418 |
-
#endif
|
| 2419 |
-
|
| 2420 |
|
| 2421 |
return ggml_backend_buffer_init(buft, ggml_backend_metal_buffer_i, ctx, size);
|
| 2422 |
}
|
|
@@ -2524,19 +2532,7 @@ GGML_CALL ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data,
|
|
| 2524 |
}
|
| 2525 |
}
|
| 2526 |
|
| 2527 |
-
|
| 2528 |
-
GGML_METAL_LOG_INFO(", (%8.2f / %8.2f)",
|
| 2529 |
-
device.currentAllocatedSize / 1024.0 / 1024.0,
|
| 2530 |
-
device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
|
| 2531 |
-
|
| 2532 |
-
if (device.currentAllocatedSize > device.recommendedMaxWorkingSetSize) {
|
| 2533 |
-
GGML_METAL_LOG_WARN("%s: warning: current allocated size is greater than the recommended max working set size\n", __func__);
|
| 2534 |
-
} else {
|
| 2535 |
-
GGML_METAL_LOG_INFO("\n");
|
| 2536 |
-
}
|
| 2537 |
-
#else
|
| 2538 |
-
GGML_METAL_LOG_INFO(", (%8.2f)\n", device.currentAllocatedSize / 1024.0 / 1024.0);
|
| 2539 |
-
#endif
|
| 2540 |
|
| 2541 |
return ggml_backend_buffer_init(ggml_backend_metal_buffer_type(), ggml_backend_metal_buffer_i, ctx, size);
|
| 2542 |
}
|
|
|
|
| 369 |
GGML_METAL_LOG_INFO("%s: simdgroup reduction support = %s\n", __func__, ctx->support_simdgroup_reduction ? "true" : "false");
|
| 370 |
GGML_METAL_LOG_INFO("%s: simdgroup matrix mul. support = %s\n", __func__, ctx->support_simdgroup_mm ? "true" : "false");
|
| 371 |
GGML_METAL_LOG_INFO("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
|
| 372 |
+
|
| 373 |
+
#if TARGET_OS_OSX || (TARGET_OS_IOS && __clang_major__ >= 15)
|
| 374 |
+
if (@available(macOS 10.12, iOS 16.0, *)) {
|
| 375 |
+
GGML_METAL_LOG_INFO("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1e6);
|
| 376 |
+
}
|
| 377 |
+
#elif TARGET_OS_OSX
|
| 378 |
if (ctx->device.maxTransferRate != 0) {
|
| 379 |
GGML_METAL_LOG_INFO("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1e6);
|
| 380 |
} else {
|
|
|
|
| 2373 |
UNUSED(buft);
|
| 2374 |
}
|
| 2375 |
|
| 2376 |
+
static void ggml_backend_metal_log_allocated_size(id<MTLDevice> device) {
|
| 2377 |
+
#if TARGET_OS_OSX || (TARGET_OS_IOS && __clang_major__ >= 15)
|
| 2378 |
+
if (@available(macOS 10.12, iOS 16.0, *)) {
|
| 2379 |
+
GGML_METAL_LOG_INFO(", (%8.2f / %8.2f)",
|
| 2380 |
+
device.currentAllocatedSize / 1024.0 / 1024.0,
|
| 2381 |
+
device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
|
| 2382 |
+
|
| 2383 |
+
if (device.currentAllocatedSize > device.recommendedMaxWorkingSetSize) {
|
| 2384 |
+
GGML_METAL_LOG_WARN("%s: warning: current allocated size is greater than the recommended max working set size\n", __func__);
|
| 2385 |
+
} else {
|
| 2386 |
+
GGML_METAL_LOG_INFO("\n");
|
| 2387 |
+
}
|
| 2388 |
+
} else {
|
| 2389 |
+
GGML_METAL_LOG_INFO(", (%8.2f)\n", device.currentAllocatedSize / 1024.0 / 1024.0);
|
| 2390 |
+
}
|
| 2391 |
+
#endif
|
| 2392 |
+
UNUSED(device);
|
| 2393 |
+
}
|
| 2394 |
+
|
| 2395 |
GGML_CALL static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
| 2396 |
struct ggml_backend_metal_buffer_context * ctx = malloc(sizeof(struct ggml_backend_metal_buffer_context));
|
| 2397 |
|
|
|
|
| 2424 |
}
|
| 2425 |
|
| 2426 |
GGML_METAL_LOG_INFO("%s: allocated buffer, size = %8.2f MiB", __func__, size_aligned / 1024.0 / 1024.0);
|
| 2427 |
+
ggml_backend_metal_log_allocated_size(device);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2428 |
|
| 2429 |
return ggml_backend_buffer_init(buft, ggml_backend_metal_buffer_i, ctx, size);
|
| 2430 |
}
|
|
|
|
| 2532 |
}
|
| 2533 |
}
|
| 2534 |
|
| 2535 |
+
ggml_backend_metal_log_allocated_size(device);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2536 |
|
| 2537 |
return ggml_backend_buffer_init(ggml_backend_metal_buffer_type(), ggml_backend_metal_buffer_i, ctx, size);
|
| 2538 |
}
|