BB-fat commited on
Commit
e3908a2
·
1 Parent(s): 7274b04

metal : Cache the Metal library at the device context level (llama/12265)

Browse files
Files changed (1) hide show
  1. ggml/src/ggml-metal/ggml-metal.m +135 -120
ggml/src/ggml-metal/ggml-metal.m CHANGED
@@ -46,6 +46,7 @@ static struct ggml_backend_device g_ggml_backend_metal_device;
46
  static struct ggml_backend_metal_device_context {
47
  id<MTLDevice> mtl_device;
48
  int mtl_device_ref_count;
 
49
 
50
  bool has_simdgroup_reduction;
51
  bool has_simdgroup_mm;
@@ -57,6 +58,7 @@ static struct ggml_backend_metal_device_context {
57
  } g_ggml_ctx_dev_main = {
58
  /*.mtl_device =*/ nil,
59
  /*.mtl_device_ref_count =*/ 0,
 
60
  /*.has_simdgroup_reduction =*/ false,
61
  /*.has_simdgroup_mm =*/ false,
62
  /*.has_residency_sets =*/ false,
@@ -108,6 +110,11 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
108
  ctx->mtl_device_ref_count--;
109
 
110
  if (ctx->mtl_device_ref_count == 0) {
 
 
 
 
 
111
  if (ctx->mtl_device) {
112
  [ctx->mtl_device release];
113
  ctx->mtl_device = nil;
@@ -495,163 +502,174 @@ static void * ggml_metal_host_malloc(size_t n) {
495
  return data;
496
  }
497
 
498
- static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t dev) {
499
- GGML_LOG_INFO("%s: allocating\n", __func__);
500
-
501
- #if TARGET_OS_OSX && !GGML_METAL_NDEBUG
502
- // Show all the Metal device instances in the system
503
- NSArray * devices = MTLCopyAllDevices();
504
- for (id<MTLDevice> device in devices) {
505
- GGML_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]);
506
- }
507
- [devices release]; // since it was created by a *Copy* C method
508
- #endif
509
-
510
- // init context
511
- struct ggml_backend_metal_context * ctx = calloc(1, sizeof(struct ggml_backend_metal_context));
512
- struct ggml_backend_metal_device_context * ctx_dev = dev->context;
513
-
514
- id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
515
- GGML_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
516
-
517
- ctx->queue = [device newCommandQueue];
518
- if (ctx->queue == nil) {
519
- GGML_LOG_ERROR("%s: error: failed to create command queue\n", __func__);
520
- return NULL;
521
- }
522
-
523
- ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
524
-
525
  id<MTLLibrary> metal_library = nil;
526
-
527
- // load library
528
- //
529
- // - first check if the library is embedded
530
- // - then check if the library is in the bundle
531
- // - if not found, load the source and compile it
532
- // - if that fails, return NULL
533
- {
534
- NSError * error = nil;
535
- NSString * src = nil;
536
 
537
  #if GGML_METAL_EMBED_LIBRARY
538
- GGML_LOG_INFO("%s: using embedded metal library\n", __func__);
539
 
540
- extern const char ggml_metallib_start[];
541
- extern const char ggml_metallib_end[];
542
 
543
- src = [[NSString alloc] initWithBytes:ggml_metallib_start length:(ggml_metallib_end-ggml_metallib_start) encoding:NSUTF8StringEncoding];
544
 
545
  #else
546
 
547
  #ifdef SWIFT_PACKAGE
548
- NSBundle * bundle = SWIFTPM_MODULE_BUNDLE;
549
  #else
550
- NSBundle * bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
551
  #endif
552
 
553
- NSString * path_lib = [bundle pathForResource:@"default" ofType:@"metallib"];
554
- if (path_lib == nil) {
555
- // Try to find the resource in the directory where the current binary located.
556
- NSString * current_binary = [[NSProcessInfo processInfo] arguments][0];
557
- NSString * bin_dir = [current_binary stringByDeletingLastPathComponent];
558
- NSString * default_metallib_path = [NSString pathWithComponents:@[bin_dir, @"default.metallib"]];
559
- if ([[NSFileManager defaultManager] isReadableFileAtPath:default_metallib_path]) {
560
- GGML_LOG_INFO("%s: found '%s'\n", __func__, [default_metallib_path UTF8String]);
561
- NSDictionary * atts = [[NSFileManager defaultManager] attributesOfItemAtPath:default_metallib_path error:&error];
562
- if (atts && atts[NSFileType] == NSFileTypeSymbolicLink) {
563
- // Optionally, if this is a symlink, try to resolve it.
564
- default_metallib_path = [[NSFileManager defaultManager] destinationOfSymbolicLinkAtPath:default_metallib_path error:&error];
565
- if (default_metallib_path && [default_metallib_path length] > 0 && ![[default_metallib_path substringToIndex:1] isEqualToString:@"/"]) {
566
- // It is a relative path, adding the binary directory as directory prefix.
567
- default_metallib_path = [NSString pathWithComponents:@[bin_dir, default_metallib_path]];
568
- }
569
- if (!default_metallib_path || ![[NSFileManager defaultManager] isReadableFileAtPath:default_metallib_path]) {
570
- // Link to the resource could not be resolved.
571
- default_metallib_path = nil;
572
- } else {
573
- GGML_LOG_INFO("%s: symlink resolved '%s'\n", __func__, [default_metallib_path UTF8String]);
574
- }
575
  }
576
- } else {
577
- // The resource couldn't be found in the binary's directory.
578
- default_metallib_path = nil;
579
  }
580
- path_lib = default_metallib_path;
 
 
581
  }
 
 
582
 
583
- if (path_lib != nil) {
584
- // pre-compiled library found
585
- NSURL * libURL = [NSURL fileURLWithPath:path_lib];
586
- GGML_LOG_INFO("%s: loading '%s'\n", __func__, [path_lib UTF8String]);
587
 
588
- metal_library = [device newLibraryWithURL:libURL error:&error];
589
- if (error) {
590
- GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
591
- return NULL;
592
- }
593
- } else {
594
- GGML_LOG_INFO("%s: default.metallib not found, loading from source\n", __func__);
595
 
596
- NSString * path_source;
597
- NSString * path_resource = [[NSProcessInfo processInfo].environment objectForKey:@"GGML_METAL_PATH_RESOURCES"];
598
 
599
- GGML_LOG_INFO("%s: GGML_METAL_PATH_RESOURCES = %s\n", __func__, path_resource ? [path_resource UTF8String] : "nil");
600
 
601
- if (path_resource) {
602
- path_source = [path_resource stringByAppendingPathComponent:@"ggml-metal.metal"];
603
- } else {
604
- path_source = [bundle pathForResource:@"ggml-metal" ofType:@"metal"];
605
- }
606
 
607
- if (path_source == nil) {
608
- GGML_LOG_WARN("%s: error: could not use bundle path to find ggml-metal.metal, falling back to trying cwd\n", __func__);
609
- path_source = @"ggml-metal.metal";
610
- }
611
 
612
- GGML_LOG_INFO("%s: loading '%s'\n", __func__, [path_source UTF8String]);
613
 
614
- src = [NSString stringWithContentsOfFile:path_source encoding:NSUTF8StringEncoding error:&error];
615
- if (error) {
616
- GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
617
- return NULL;
618
- }
619
  }
 
620
  #endif
621
 
622
- if (!metal_library) {
623
- @autoreleasepool {
624
- // dictionary of preprocessor macros
625
- NSMutableDictionary * prep = [NSMutableDictionary dictionary];
626
 
627
- if (ctx_dev->use_bfloat) {
628
- [prep setObject:@"1" forKey:@"GGML_METAL_USE_BF16"];
629
- }
630
 
631
  #if GGML_METAL_EMBED_LIBRARY
632
- [prep setObject:@"1" forKey:@"GGML_METAL_EMBED_LIBRARY"];
633
  #endif
634
 
635
- MTLCompileOptions * options = [MTLCompileOptions new];
636
- options.preprocessorMacros = prep;
637
 
638
- //[options setFastMathEnabled:false];
639
 
640
- metal_library = [device newLibraryWithSource:src options:options error:&error];
641
- if (error) {
642
- GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
643
- return NULL;
644
- }
645
 
646
  #if !__has_feature(objc_arc)
647
- [options release];
648
  #endif
649
- }
650
  }
 
651
 
652
  #if GGML_METAL_EMBED_LIBRARY
653
- [src release];
654
  #endif // GGML_METAL_EMBED_LIBRARY
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
655
  }
656
 
657
  // print MTL GPU family:
@@ -725,7 +743,6 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
725
  [metal_function release]; \
726
  if (error) { \
727
  GGML_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
728
- [metal_library release]; \
729
  return NULL; \
730
  } \
731
  } else { \
@@ -1044,8 +1061,6 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
1044
  GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_POOL_2D_MAX_F32, pool_2d_max_f32, true);
1045
  }
1046
 
1047
- [metal_library release];
1048
-
1049
  return ctx;
1050
  }
1051
 
 
46
  static struct ggml_backend_metal_device_context {
47
  id<MTLDevice> mtl_device;
48
  int mtl_device_ref_count;
49
+ id<MTLLibrary> mtl_library;
50
 
51
  bool has_simdgroup_reduction;
52
  bool has_simdgroup_mm;
 
58
  } g_ggml_ctx_dev_main = {
59
  /*.mtl_device =*/ nil,
60
  /*.mtl_device_ref_count =*/ 0,
61
+ /*.mtl_library =*/ nil,
62
  /*.has_simdgroup_reduction =*/ false,
63
  /*.has_simdgroup_mm =*/ false,
64
  /*.has_residency_sets =*/ false,
 
110
  ctx->mtl_device_ref_count--;
111
 
112
  if (ctx->mtl_device_ref_count == 0) {
113
+ if (ctx->mtl_library) {
114
+ [ctx->mtl_library release];
115
+ ctx->mtl_library = nil;
116
+ }
117
+
118
  if (ctx->mtl_device) {
119
  [ctx->mtl_device release];
120
  ctx->mtl_device = nil;
 
502
  return data;
503
  }
504
 
505
+ // load library
506
+ //
507
+ // - first check if the library is embedded
508
+ // - then check if the library is in the bundle
509
+ // - if not found, load the source and compile it
510
+ // - if that fails, return NULL
511
+ static id<MTLLibrary> ggml_metal_load_library(id<MTLDevice> device, bool use_bfloat) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
512
  id<MTLLibrary> metal_library = nil;
513
+ NSError * error = nil;
514
+ NSString * src = nil;
 
 
 
 
 
 
 
 
515
 
516
  #if GGML_METAL_EMBED_LIBRARY
517
+ GGML_LOG_INFO("%s: using embedded metal library\n", __func__);
518
 
519
+ extern const char ggml_metallib_start[];
520
+ extern const char ggml_metallib_end[];
521
 
522
+ src = [[NSString alloc] initWithBytes:ggml_metallib_start length:(ggml_metallib_end-ggml_metallib_start) encoding:NSUTF8StringEncoding];
523
 
524
  #else
525
 
526
  #ifdef SWIFT_PACKAGE
527
+ NSBundle * bundle = SWIFTPM_MODULE_BUNDLE;
528
  #else
529
+ NSBundle * bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
530
  #endif
531
 
532
+ NSString * path_lib = [bundle pathForResource:@"default" ofType:@"metallib"];
533
+ if (path_lib == nil) {
534
+ // Try to find the resource in the directory where the current binary located.
535
+ NSString * current_binary = [[NSProcessInfo processInfo] arguments][0];
536
+ NSString * bin_dir = [current_binary stringByDeletingLastPathComponent];
537
+ NSString * default_metallib_path = [NSString pathWithComponents:@[bin_dir, @"default.metallib"]];
538
+ if ([[NSFileManager defaultManager] isReadableFileAtPath:default_metallib_path]) {
539
+ GGML_LOG_INFO("%s: found '%s'\n", __func__, [default_metallib_path UTF8String]);
540
+ NSDictionary * atts = [[NSFileManager defaultManager] attributesOfItemAtPath:default_metallib_path error:&error];
541
+ if (atts && atts[NSFileType] == NSFileTypeSymbolicLink) {
542
+ // Optionally, if this is a symlink, try to resolve it.
543
+ default_metallib_path = [[NSFileManager defaultManager] destinationOfSymbolicLinkAtPath:default_metallib_path error:&error];
544
+ if (default_metallib_path && [default_metallib_path length] > 0 && ![[default_metallib_path substringToIndex:1] isEqualToString:@"/"]) {
545
+ // It is a relative path, adding the binary directory as directory prefix.
546
+ default_metallib_path = [NSString pathWithComponents:@[bin_dir, default_metallib_path]];
547
+ }
548
+ if (!default_metallib_path || ![[NSFileManager defaultManager] isReadableFileAtPath:default_metallib_path]) {
549
+ // Link to the resource could not be resolved.
550
+ default_metallib_path = nil;
551
+ } else {
552
+ GGML_LOG_INFO("%s: symlink resolved '%s'\n", __func__, [default_metallib_path UTF8String]);
 
553
  }
 
 
 
554
  }
555
+ } else {
556
+ // The resource couldn't be found in the binary's directory.
557
+ default_metallib_path = nil;
558
  }
559
+ path_lib = default_metallib_path;
560
+ }
561
 
562
+ if (path_lib != nil) {
563
+ // pre-compiled library found
564
+ NSURL * libURL = [NSURL fileURLWithPath:path_lib];
565
+ GGML_LOG_INFO("%s: loading '%s'\n", __func__, [path_lib UTF8String]);
566
 
567
+ metal_library = [device newLibraryWithURL:libURL error:&error];
568
+ if (error) {
569
+ GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
570
+ return NULL;
571
+ }
572
+ } else {
573
+ GGML_LOG_INFO("%s: default.metallib not found, loading from source\n", __func__);
574
 
575
+ NSString * path_source;
576
+ NSString * path_resource = [[NSProcessInfo processInfo].environment objectForKey:@"GGML_METAL_PATH_RESOURCES"];
577
 
578
+ GGML_LOG_INFO("%s: GGML_METAL_PATH_RESOURCES = %s\n", __func__, path_resource ? [path_resource UTF8String] : "nil");
579
 
580
+ if (path_resource) {
581
+ path_source = [path_resource stringByAppendingPathComponent:@"ggml-metal.metal"];
582
+ } else {
583
+ path_source = [bundle pathForResource:@"ggml-metal" ofType:@"metal"];
584
+ }
585
 
586
+ if (path_source == nil) {
587
+ GGML_LOG_WARN("%s: error: could not use bundle path to find ggml-metal.metal, falling back to trying cwd\n", __func__);
588
+ path_source = @"ggml-metal.metal";
589
+ }
590
 
591
+ GGML_LOG_INFO("%s: loading '%s'\n", __func__, [path_source UTF8String]);
592
 
593
+ src = [NSString stringWithContentsOfFile:path_source encoding:NSUTF8StringEncoding error:&error];
594
+ if (error) {
595
+ GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
596
+ return NULL;
 
597
  }
598
+ }
599
  #endif
600
 
601
+ if (!metal_library) {
602
+ @autoreleasepool {
603
+ // dictionary of preprocessor macros
604
+ NSMutableDictionary * prep = [NSMutableDictionary dictionary];
605
 
606
+ if (use_bfloat) {
607
+ [prep setObject:@"1" forKey:@"GGML_METAL_USE_BF16"];
608
+ }
609
 
610
  #if GGML_METAL_EMBED_LIBRARY
611
+ [prep setObject:@"1" forKey:@"GGML_METAL_EMBED_LIBRARY"];
612
  #endif
613
 
614
+ MTLCompileOptions * options = [MTLCompileOptions new];
615
+ options.preprocessorMacros = prep;
616
 
617
+ //[options setFastMathEnabled:false];
618
 
619
+ metal_library = [device newLibraryWithSource:src options:options error:&error];
620
+ if (error) {
621
+ GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
622
+ return NULL;
623
+ }
624
 
625
  #if !__has_feature(objc_arc)
626
+ [options release];
627
  #endif
 
628
  }
629
+ }
630
 
631
  #if GGML_METAL_EMBED_LIBRARY
632
+ [src release];
633
  #endif // GGML_METAL_EMBED_LIBRARY
634
+
635
+ return metal_library;
636
+ }
637
+
638
+ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t dev) {
639
+ GGML_LOG_INFO("%s: allocating\n", __func__);
640
+
641
+ #if TARGET_OS_OSX && !GGML_METAL_NDEBUG
642
+ // Show all the Metal device instances in the system
643
+ NSArray * devices = MTLCopyAllDevices();
644
+ for (id<MTLDevice> device in devices) {
645
+ GGML_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]);
646
+ }
647
+ [devices release]; // since it was created by a *Copy* C method
648
+ #endif
649
+
650
+ // init context
651
+ struct ggml_backend_metal_context * ctx = calloc(1, sizeof(struct ggml_backend_metal_context));
652
+ struct ggml_backend_metal_device_context * ctx_dev = dev->context;
653
+
654
+ id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
655
+ GGML_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
656
+
657
+ ctx->queue = [device newCommandQueue];
658
+ if (ctx->queue == nil) {
659
+ GGML_LOG_ERROR("%s: error: failed to create command queue\n", __func__);
660
+ return NULL;
661
+ }
662
+
663
+ ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
664
+
665
+ // load library
666
+ if (ctx_dev->mtl_library == nil) {
667
+ ctx_dev->mtl_library = ggml_metal_load_library(device, ctx_dev->use_bfloat);
668
+ }
669
+ id<MTLLibrary> metal_library = ctx_dev->mtl_library;
670
+ if (metal_library == nil) {
671
+ GGML_LOG_ERROR("%s: error: metal library is nil\n", __func__);
672
+ return NULL;
673
  }
674
 
675
  // print MTL GPU family:
 
743
  [metal_function release]; \
744
  if (error) { \
745
  GGML_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
 
746
  return NULL; \
747
  } \
748
  } else { \
 
1061
  GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_POOL_2D_MAX_F32, pool_2d_max_f32, true);
1062
  }
1063
 
 
 
1064
  return ctx;
1065
  }
1066