KitaitiMakoto commited on
Commit
72cecc9
·
unverified ·
1 Parent(s): f198f02

ruby : add Core ML support (#3214)

Browse files

* Prevent overflow

* Fix memsize of Whisper::Context

* Rename xxx_initialize to more Ruby-esque name: xxx_s_new

* Define Whisper::Model::ZipURI

* Define Whisper::Model.coreml_compiled_models

* Make Options' @cmake_options Hash

* Use --{enable,disable}-whisper-coreml option for -I/opt/homebrew/opt/llvm/include

* Prepare Core ML model if enabled

* Add test for ZipURI

* Add signatures for ZipURI

* Add Whisper.system_info_str

* Add test for Whisper.system_info_str

* Add signagure for Model.coreml_compiled_models

* Add signature for Whisper.system_info_str

* Add test for Core ML

* Update date

* Maintain .gitignore

bindings/ruby/.gitignore CHANGED
@@ -1,6 +1,3 @@
1
  LICENSE
2
  pkg/
3
  lib/whisper.*
4
- ext/sources/*
5
- !ext/sources/CMakeGraphVizOptions.cmake
6
- ext/mkmf.log
 
1
  LICENSE
2
  pkg/
3
  lib/whisper.*
 
 
 
bindings/ruby/ext/.gitignore CHANGED
@@ -2,10 +2,8 @@ Makefile
2
  whisper.so
3
  whisper.bundle
4
  whisper.dll
5
- scripts/get-flags.mk
6
  *.o
7
- /*/**/*.c
8
- /*/**/*.cpp
9
- /*/**/*.h
10
- /*/**/*.m
11
- /*/**/*.metal
 
2
  whisper.so
3
  whisper.bundle
4
  whisper.dll
 
5
  *.o
6
+ *.a
7
+ sources/*
8
+ !sources/CMakeGraphVizOptions.cmake
9
+ mkmf.log
 
bindings/ruby/ext/options.rb CHANGED
@@ -20,27 +20,39 @@ class Options
20
  Dir.chdir __dir__ do
21
  output = `#{@cmake.shellescape} -S sources -B build -L`
22
  end
23
- started = false
24
- @cmake_options = output.lines.filter_map {|line|
25
- if line.chomp == "-- Cache values"
26
- started = true
27
- next
28
- end
29
- next unless started
30
- option, value = line.chomp.split("=", 2)
31
- name, type = option.split(":", 2)
32
- [name, type, value]
33
- }
 
34
  end
35
 
36
  private
37
 
38
  def configure
39
- cmake_options.each do |name, type, default_value|
40
  option = option_name(name)
41
  value = type == "BOOL" ? enable_config(option) : arg_config("--#{option}")
42
  @options[name] = [type, value]
43
  end
 
 
 
 
 
 
 
 
 
 
 
44
  end
45
 
46
  def option_name(name)
 
20
  Dir.chdir __dir__ do
21
  output = `#{@cmake.shellescape} -S sources -B build -L`
22
  end
23
+ @cmake_options = output.lines.drop_while {|line| line.chomp != "-- Cache values"}.drop(1)
24
+ .filter_map {|line|
25
+ option, value = line.chomp.split("=", 2)
26
+ name, type = option.split(":", 2)
27
+ [
28
+ name,
29
+ [
30
+ type,
31
+ type == "BOOL" ? value == "ON" : value
32
+ ]
33
+ ]
34
+ }.to_h
35
  end
36
 
37
  private
38
 
39
  def configure
40
+ cmake_options.each_pair do |name, (type, default_value)|
41
  option = option_name(name)
42
  value = type == "BOOL" ? enable_config(option) : arg_config("--#{option}")
43
  @options[name] = [type, value]
44
  end
45
+
46
+ configure_coreml
47
+ end
48
+
49
+ def configure_coreml
50
+ use_coreml = if @options["WHISPER_COREML"][1].nil?
51
+ cmake_options["WHISPER_COREML"][1]
52
+ else
53
+ @options["WHISPER_COREML"][1]
54
+ end
55
+ $CPPFLAGS << " -DRUBY_WHISPER_USE_COREML" if use_coreml
56
  end
57
 
58
  def option_name(name)
bindings/ruby/ext/ruby_whisper.c CHANGED
@@ -22,6 +22,8 @@ ID id_new;
22
  ID id_to_path;
23
  ID id_URI;
24
  ID id_pre_converted_models;
 
 
25
 
26
  static bool is_log_callback_finalized = false;
27
 
@@ -83,6 +85,14 @@ static VALUE ruby_whisper_s_lang_str_full(VALUE self, VALUE id) {
83
  return rb_str_new2(str_full);
84
  }
85
 
 
 
 
 
 
 
 
 
86
  static VALUE ruby_whisper_s_finalize_log_callback(VALUE self, VALUE id) {
87
  is_log_callback_finalized = true;
88
  return Qnil;
@@ -130,6 +140,8 @@ void Init_whisper() {
130
  id_to_path = rb_intern("to_path");
131
  id_URI = rb_intern("URI");
132
  id_pre_converted_models = rb_intern("pre_converted_models");
 
 
133
 
134
  mWhisper = rb_define_module("Whisper");
135
  mVAD = rb_define_module_under(mWhisper, "VAD");
@@ -145,6 +157,7 @@ void Init_whisper() {
145
  rb_define_singleton_method(mWhisper, "lang_id", ruby_whisper_s_lang_id, 1);
146
  rb_define_singleton_method(mWhisper, "lang_str", ruby_whisper_s_lang_str, 1);
147
  rb_define_singleton_method(mWhisper, "lang_str_full", ruby_whisper_s_lang_str_full, 1);
 
148
  rb_define_singleton_method(mWhisper, "log_set", ruby_whisper_s_log_set, 2);
149
  rb_define_private_method(rb_singleton_class(mWhisper), "finalize_log_callback", ruby_whisper_s_finalize_log_callback, 1);
150
 
 
22
  ID id_to_path;
23
  ID id_URI;
24
  ID id_pre_converted_models;
25
+ ID id_coreml_compiled_models;
26
+ ID id_cache;
27
 
28
  static bool is_log_callback_finalized = false;
29
 
 
85
  return rb_str_new2(str_full);
86
  }
87
 
88
+ /*
89
+ * call-seq:
90
+ * system_info_str -> String
91
+ */
92
+ static VALUE ruby_whisper_s_system_info_str(VALUE self) {
93
+ return rb_str_new2(whisper_print_system_info());
94
+ }
95
+
96
  static VALUE ruby_whisper_s_finalize_log_callback(VALUE self, VALUE id) {
97
  is_log_callback_finalized = true;
98
  return Qnil;
 
140
  id_to_path = rb_intern("to_path");
141
  id_URI = rb_intern("URI");
142
  id_pre_converted_models = rb_intern("pre_converted_models");
143
+ id_coreml_compiled_models = rb_intern("coreml_compiled_models");
144
+ id_cache = rb_intern("cache");
145
 
146
  mWhisper = rb_define_module("Whisper");
147
  mVAD = rb_define_module_under(mWhisper, "VAD");
 
157
  rb_define_singleton_method(mWhisper, "lang_id", ruby_whisper_s_lang_id, 1);
158
  rb_define_singleton_method(mWhisper, "lang_str", ruby_whisper_s_lang_str, 1);
159
  rb_define_singleton_method(mWhisper, "lang_str_full", ruby_whisper_s_lang_str_full, 1);
160
+ rb_define_singleton_method(mWhisper, "system_info_str", ruby_whisper_s_system_info_str, 0);
161
  rb_define_singleton_method(mWhisper, "log_set", ruby_whisper_s_log_set, 2);
162
  rb_define_private_method(rb_singleton_class(mWhisper), "finalize_log_callback", ruby_whisper_s_finalize_log_callback, 1);
163
 
bindings/ruby/ext/ruby_whisper_context.c CHANGED
@@ -11,6 +11,8 @@ extern ID id_new;
11
  extern ID id_to_path;
12
  extern ID id_URI;
13
  extern ID id_pre_converted_models;
 
 
14
 
15
  extern VALUE cContext;
16
  extern VALUE eError;
@@ -18,8 +20,8 @@ extern VALUE cModel;
18
 
19
  extern const rb_data_type_t ruby_whisper_params_type;
20
  extern VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self);
21
- extern VALUE rb_whisper_model_initialize(VALUE context);
22
- extern VALUE rb_whisper_segment_initialize(VALUE context, int index);
23
  extern void prepare_transcription(ruby_whisper_params *rwp, VALUE *context);
24
 
25
  static void
@@ -53,6 +55,9 @@ ruby_whisper_memsize(const void *p)
53
  if (!rw) {
54
  return 0;
55
  }
 
 
 
56
  return size;
57
  }
58
 
@@ -79,6 +84,13 @@ ruby_whisper_normalize_model_path(VALUE model_path)
79
  VALUE pre_converted_model = rb_hash_aref(pre_converted_models, model_path);
80
  if (!NIL_P(pre_converted_model)) {
81
  model_path = pre_converted_model;
 
 
 
 
 
 
 
82
  }
83
  else if (TYPE(model_path) == T_STRING) {
84
  const char * model_path_str = StringValueCStr(model_path);
@@ -293,13 +305,20 @@ VALUE ruby_whisper_full(int argc, VALUE *argv, VALUE self)
293
  // Should check when samples.respond_to?(:length)?
294
  } else {
295
  if (TYPE(samples) == T_ARRAY) {
296
- n_samples = RARRAY_LEN(samples);
 
 
 
297
  } else if (memory_view_available_p) {
298
  if (!rb_memory_view_get(samples, &view, RUBY_MEMORY_VIEW_SIMPLE)) {
299
  view.obj = Qnil;
300
  rb_raise(rb_eArgError, "unable to get a memory view");
301
  }
302
- n_samples = view.byte_size / view.item_size;
 
 
 
 
303
  } else if (rb_respond_to(samples, id_length)) {
304
  n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
305
  } else {
@@ -387,10 +406,17 @@ ruby_whisper_full_parallel(int argc, VALUE *argv,VALUE self)
387
  view.obj = Qnil;
388
  rb_raise(rb_eArgError, "unable to get a memory view");
389
  }
390
- n_samples = view.byte_size / view.item_size;
 
 
 
 
391
  } else {
392
  if (TYPE(samples) == T_ARRAY) {
393
- n_samples = RARRAY_LEN(samples);
 
 
 
394
  } else if (rb_respond_to(samples, id_length)) {
395
  n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
396
  } else {
@@ -476,7 +502,7 @@ ruby_whisper_full_get_segment_t0(VALUE self, VALUE i_segment)
476
  TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
477
  const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
478
  const int64_t t0 = whisper_full_get_segment_t0(rw->context, c_i_segment);
479
- return INT2NUM(t0);
480
  }
481
 
482
  /*
@@ -494,7 +520,7 @@ ruby_whisper_full_get_segment_t1(VALUE self, VALUE i_segment)
494
  TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
495
  const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
496
  const int64_t t1 = whisper_full_get_segment_t1(rw->context, c_i_segment);
497
- return INT2NUM(t1);
498
  }
499
 
500
  /*
@@ -552,7 +578,7 @@ ruby_whisper_full_get_segment_no_speech_prob(VALUE self, VALUE i_segment)
552
  static VALUE
553
  ruby_whisper_full_get_segment(VALUE self, VALUE i_segment)
554
  {
555
- return rb_whisper_segment_initialize(self, NUM2INT(i_segment));
556
  }
557
 
558
  /*
@@ -586,7 +612,7 @@ ruby_whisper_each_segment(VALUE self)
586
 
587
  const int n_segments = whisper_full_n_segments(rw->context);
588
  for (int i = 0; i < n_segments; ++i) {
589
- rb_yield(rb_whisper_segment_initialize(self, i));
590
  }
591
 
592
  return self;
@@ -599,7 +625,7 @@ ruby_whisper_each_segment(VALUE self)
599
  static VALUE
600
  ruby_whisper_get_model(VALUE self)
601
  {
602
- return rb_whisper_model_initialize(self);
603
  }
604
 
605
  void
 
11
  extern ID id_to_path;
12
  extern ID id_URI;
13
  extern ID id_pre_converted_models;
14
+ extern ID id_coreml_compiled_models;
15
+ extern ID id_cache;
16
 
17
  extern VALUE cContext;
18
  extern VALUE eError;
 
20
 
21
  extern const rb_data_type_t ruby_whisper_params_type;
22
  extern VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self);
23
+ extern VALUE rb_whisper_model_s_new(VALUE context);
24
+ extern VALUE rb_whisper_segment_s_new(VALUE context, int index);
25
  extern void prepare_transcription(ruby_whisper_params *rwp, VALUE *context);
26
 
27
  static void
 
55
  if (!rw) {
56
  return 0;
57
  }
58
+ if (rw->context) {
59
+ size += sizeof(rw->context);
60
+ }
61
  return size;
62
  }
63
 
 
84
  VALUE pre_converted_model = rb_hash_aref(pre_converted_models, model_path);
85
  if (!NIL_P(pre_converted_model)) {
86
  model_path = pre_converted_model;
87
+ #ifdef RUBY_WHISPER_USE_COREML
88
+ VALUE coreml_converted_models = rb_funcall(cModel, id_coreml_compiled_models, 0);
89
+ VALUE coreml_converted_model = rb_hash_aref(coreml_converted_models, pre_converted_model);
90
+ if (!NIL_P(coreml_converted_model)) {
91
+ rb_funcall(coreml_converted_model, id_cache, 0);
92
+ }
93
+ #endif
94
  }
95
  else if (TYPE(model_path) == T_STRING) {
96
  const char * model_path_str = StringValueCStr(model_path);
 
305
  // Should check when samples.respond_to?(:length)?
306
  } else {
307
  if (TYPE(samples) == T_ARRAY) {
308
+ if (RARRAY_LEN(samples) > INT_MAX) {
309
+ rb_raise(rb_eArgError, "samples are too long");
310
+ }
311
+ n_samples = (int)RARRAY_LEN(samples);
312
  } else if (memory_view_available_p) {
313
  if (!rb_memory_view_get(samples, &view, RUBY_MEMORY_VIEW_SIMPLE)) {
314
  view.obj = Qnil;
315
  rb_raise(rb_eArgError, "unable to get a memory view");
316
  }
317
+ ssize_t n_samples_size = view.byte_size / view.item_size;
318
+ if (n_samples_size > INT_MAX) {
319
+ rb_raise(rb_eArgError, "samples are too long");
320
+ }
321
+ n_samples = (int)n_samples_size;
322
  } else if (rb_respond_to(samples, id_length)) {
323
  n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
324
  } else {
 
406
  view.obj = Qnil;
407
  rb_raise(rb_eArgError, "unable to get a memory view");
408
  }
409
+ ssize_t n_samples_size = view.byte_size / view.item_size;
410
+ if (n_samples_size > INT_MAX) {
411
+ rb_raise(rb_eArgError, "samples are too long");
412
+ }
413
+ n_samples = (int)n_samples_size;
414
  } else {
415
  if (TYPE(samples) == T_ARRAY) {
416
+ if (RARRAY_LEN(samples) > INT_MAX) {
417
+ rb_raise(rb_eArgError, "samples are too long");
418
+ }
419
+ n_samples = (int)RARRAY_LEN(samples);
420
  } else if (rb_respond_to(samples, id_length)) {
421
  n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
422
  } else {
 
502
  TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
503
  const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
504
  const int64_t t0 = whisper_full_get_segment_t0(rw->context, c_i_segment);
505
+ return LONG2NUM(t0);
506
  }
507
 
508
  /*
 
520
  TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
521
  const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
522
  const int64_t t1 = whisper_full_get_segment_t1(rw->context, c_i_segment);
523
+ return LONG2NUM(t1);
524
  }
525
 
526
  /*
 
578
  static VALUE
579
  ruby_whisper_full_get_segment(VALUE self, VALUE i_segment)
580
  {
581
+ return rb_whisper_segment_s_new(self, NUM2INT(i_segment));
582
  }
583
 
584
  /*
 
612
 
613
  const int n_segments = whisper_full_n_segments(rw->context);
614
  for (int i = 0; i < n_segments; ++i) {
615
+ rb_yield(rb_whisper_segment_s_new(self, i));
616
  }
617
 
618
  return self;
 
625
  static VALUE
626
  ruby_whisper_get_model(VALUE self)
627
  {
628
+ return rb_whisper_model_s_new(self);
629
  }
630
 
631
  void
bindings/ruby/ext/ruby_whisper_model.c CHANGED
@@ -35,7 +35,7 @@ static VALUE ruby_whisper_model_allocate(VALUE klass) {
35
  return TypedData_Make_Struct(klass, ruby_whisper_model, &rb_whisper_model_type, rwm);
36
  }
37
 
38
- VALUE rb_whisper_model_initialize(VALUE context) {
39
  ruby_whisper_model *rwm;
40
  const VALUE model = ruby_whisper_model_allocate(cModel);
41
  TypedData_Get_Struct(model, ruby_whisper_model, &rb_whisper_model_type, rwm);
 
35
  return TypedData_Make_Struct(klass, ruby_whisper_model, &rb_whisper_model_type, rwm);
36
  }
37
 
38
+ VALUE rb_whisper_model_s_new(VALUE context) {
39
  ruby_whisper_model *rwm;
40
  const VALUE model = ruby_whisper_model_allocate(cModel);
41
  TypedData_Get_Struct(model, ruby_whisper_model, &rb_whisper_model_type, rwm);
bindings/ruby/ext/ruby_whisper_params.c CHANGED
@@ -34,7 +34,7 @@ extern VALUE cVADParams;
34
  extern ID id_call;
35
 
36
  extern VALUE ruby_whisper_normalize_model_path(VALUE model_path);
37
- extern VALUE rb_whisper_segment_initialize(VALUE context, int index);
38
  extern const rb_data_type_t ruby_whisper_vad_params_type;
39
 
40
  static ID param_names[RUBY_WHISPER_PARAMS_PARAM_NAMES_COUNT];
@@ -110,7 +110,7 @@ static void new_segment_callback(struct whisper_context *ctx, struct whisper_sta
110
  const int n_segments = whisper_full_n_segments_from_state(state);
111
  for (int i = n_new; i > 0; i--) {
112
  int i_segment = n_segments - i;
113
- VALUE segment = rb_whisper_segment_initialize(*container->context, i_segment);
114
  for (int j = 0; j < callbacks_len; j++) {
115
  VALUE cb = rb_ary_entry(container->callbacks, j);
116
  rb_funcall(cb, id_call, 1, segment);
 
34
  extern ID id_call;
35
 
36
  extern VALUE ruby_whisper_normalize_model_path(VALUE model_path);
37
+ extern VALUE rb_whisper_segment_s_new(VALUE context, int index);
38
  extern const rb_data_type_t ruby_whisper_vad_params_type;
39
 
40
  static ID param_names[RUBY_WHISPER_PARAMS_PARAM_NAMES_COUNT];
 
110
  const int n_segments = whisper_full_n_segments_from_state(state);
111
  for (int i = n_new; i > 0; i--) {
112
  int i_segment = n_segments - i;
113
+ VALUE segment = rb_whisper_segment_s_new(*container->context, i_segment);
114
  for (int j = 0; j < callbacks_len; j++) {
115
  VALUE cb = rb_ary_entry(container->callbacks, j);
116
  rb_funcall(cb, id_call, 1, segment);
bindings/ruby/ext/ruby_whisper_segment.c CHANGED
@@ -38,7 +38,7 @@ ruby_whisper_segment_allocate(VALUE klass)
38
  }
39
 
40
  VALUE
41
- rb_whisper_segment_initialize(VALUE context, int index)
42
  {
43
  ruby_whisper_segment *rws;
44
  const VALUE segment = ruby_whisper_segment_allocate(cSegment);
@@ -63,7 +63,7 @@ ruby_whisper_segment_get_start_time(VALUE self)
63
  TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
64
  const int64_t t0 = whisper_full_get_segment_t0(rw->context, rws->index);
65
  // able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
66
- return INT2NUM(t0 * 10);
67
  }
68
 
69
  /*
@@ -81,7 +81,7 @@ ruby_whisper_segment_get_end_time(VALUE self)
81
  TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
82
  const int64_t t1 = whisper_full_get_segment_t1(rw->context, rws->index);
83
  // able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
84
- return INT2NUM(t1 * 10);
85
  }
86
 
87
  /*
 
38
  }
39
 
40
  VALUE
41
+ rb_whisper_segment_s_new(VALUE context, int index)
42
  {
43
  ruby_whisper_segment *rws;
44
  const VALUE segment = ruby_whisper_segment_allocate(cSegment);
 
63
  TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
64
  const int64_t t0 = whisper_full_get_segment_t0(rw->context, rws->index);
65
  // able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
66
+ return LONG2NUM(t0 * 10);
67
  }
68
 
69
  /*
 
81
  TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
82
  const int64_t t1 = whisper_full_get_segment_t1(rw->context, rws->index);
83
  // able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
84
+ return LONG2NUM(t1 * 10);
85
  }
86
 
87
  /*
bindings/ruby/lib/whisper/model/uri.rb CHANGED
@@ -130,6 +130,44 @@ module Whisper
130
  end
131
  end
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  @pre_converted_models = %w[
134
  tiny
135
  tiny.en
@@ -171,8 +209,25 @@ module Whisper
171
  @pre_converted_models[name] = URI.new("https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-#{name}.bin")
172
  end
173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  class << self
175
- attr_reader :pre_converted_models
176
  end
177
  end
178
  end
 
130
  end
131
  end
132
 
133
+ class ZipURI < URI
134
+ def cache
135
+ zip_path = Pathname(super)
136
+ dest = unzipped_path
137
+ return if dest.exist? && dest.mtime >= zip_path.mtime
138
+ escaping dest do
139
+ system "unzip", "-q", "-d", zip_path.dirname.to_path, zip_path.to_path, exception: true
140
+ end
141
+ zip_path.to_path
142
+ end
143
+
144
+ def clear_cache
145
+ super
146
+ unzipped_path.rmtree if unzipped_path.exist?
147
+ end
148
+
149
+ private
150
+
151
+ def unzipped_path
152
+ cache_path.sub_ext("")
153
+ end
154
+
155
+ def escaping(path)
156
+ escaped = Pathname("#{path}.removing")
157
+ if path.exist?
158
+ escaped.rmtree if escaped.exist?
159
+ path.rename escaped
160
+ end
161
+ yield
162
+ ensure
163
+ if path.exist?
164
+ escaped.rmtree if escaped.exist?
165
+ else
166
+ escaped.rename path if escaped.exist?
167
+ end
168
+ end
169
+ end
170
+
171
  @pre_converted_models = %w[
172
  tiny
173
  tiny.en
 
209
  @pre_converted_models[name] = URI.new("https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-#{name}.bin")
210
  end
211
 
212
+ @coreml_compiled_models = %w[
213
+ tiny
214
+ tiny.en
215
+ base
216
+ base.en
217
+ small
218
+ small.en
219
+ medium
220
+ medium.en
221
+ large-v1
222
+ large-v2
223
+ large-v3
224
+ large-v3-turbo
225
+ ].each_with_object({}) do |name, models|
226
+ models[@pre_converted_models[name]] = ZipURI.new("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-#{name}-encoder.mlmodelc.zip")
227
+ end
228
+
229
  class << self
230
+ attr_reader :pre_converted_models, :coreml_compiled_models
231
  end
232
  end
233
  end
bindings/ruby/sig/whisper.rbs CHANGED
@@ -22,6 +22,7 @@ module Whisper
22
  def self.lang_str: (Integer id) -> String
23
  def self.lang_str_full: (Integer id) -> String
24
  def self.log_set: (log_callback, Object? user_data) -> log_callback
 
25
 
26
  class Context
27
  def self.new: (path | ::URI::HTTP) -> instance
@@ -386,6 +387,7 @@ module Whisper
386
 
387
  class Model
388
  def self.pre_converted_models: () -> Hash[String, Model::URI]
 
389
  def self.new: () -> instance
390
  def n_vocab: () -> Integer
391
  def n_audio_ctx: () -> Integer
@@ -405,6 +407,11 @@ module Whisper
405
  def to_path: -> String
406
  def clear_cache: -> void
407
  end
 
 
 
 
 
408
  end
409
 
410
  class Segment
 
22
  def self.lang_str: (Integer id) -> String
23
  def self.lang_str_full: (Integer id) -> String
24
  def self.log_set: (log_callback, Object? user_data) -> log_callback
25
+ def self.system_info_str: () -> String
26
 
27
  class Context
28
  def self.new: (path | ::URI::HTTP) -> instance
 
387
 
388
  class Model
389
  def self.pre_converted_models: () -> Hash[String, Model::URI]
390
+ def self.coreml_compiled_models: () -> Hash[Model::URI, Model::ZipURI]
391
  def self.new: () -> instance
392
  def n_vocab: () -> Integer
393
  def n_audio_ctx: () -> Integer
 
407
  def to_path: -> String
408
  def clear_cache: -> void
409
  end
410
+
411
+ class ZipURI < URI
412
+ def cache: () -> String
413
+ def clear_cache: () -> void
414
+ end
415
  end
416
 
417
  class Segment
bindings/ruby/tests/test_model.rb CHANGED
@@ -106,4 +106,13 @@ class TestModel < TestBase
106
  assert_equal 1, model.ftype
107
  assert_equal "base", model.type
108
  end
 
 
 
 
 
 
 
 
 
109
  end
 
106
  assert_equal 1, model.ftype
107
  assert_equal "base", model.type
108
  end
109
+
110
+ def test_coreml_model_auto_download
111
+ uri = Whisper::Model.coreml_compiled_models[Whisper::Model.pre_converted_models["tiny"]]
112
+ model_path = Pathname(uri.to_path).sub_ext("")
113
+ model_path.rmtree if model_path.exist?
114
+
115
+ uri.cache
116
+ assert_path_exist model_path
117
+ end
118
  end
bindings/ruby/tests/test_package.rb CHANGED
@@ -25,6 +25,20 @@ class TestPackage < TestBase
25
  end
26
  end
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  private
29
 
30
  def assert_installed(dir, version)
 
25
  end
26
  end
27
 
28
+ def test_install_with_coreml
29
+ omit_unless RUBY_PLATFORM.match?(/darwin/) do
30
+ gemspec = Gem::Specification.load("whispercpp.gemspec")
31
+ Dir.mktmpdir do |dir|
32
+ system "gem", "install", "--install-dir", dir.shellescape, "--no-document", "pkg/#{gemspec.file_name.shellescape}", "--", "--enable-whisper-coreml", exception: true
33
+ assert_installed dir, gemspec.version
34
+ assert_nothing_raised do
35
+ libdir = File.join(dir, "gems", "#{gemspec.name}-#{gemspec.version}", "lib")
36
+ system "ruby", "-I", libdir, "-r", "whisper", "-e", "Whisper::Context.new('tiny')", exception: true
37
+ end
38
+ end
39
+ end
40
+ end
41
+
42
  private
43
 
44
  def assert_installed(dir, version)
bindings/ruby/tests/test_whisper.rb CHANGED
@@ -94,6 +94,10 @@ class TestWhisper < TestBase
94
  end
95
  end
96
 
 
 
 
 
97
  def test_log_set
98
  user_data = Object.new
99
  logs = []
 
94
  end
95
  end
96
 
97
+ def test_system_info_str
98
+ assert_match /\AWHISPER : COREML = \d | OPENVINO = \d |/, Whisper.system_info_str
99
+ end
100
+
101
  def test_log_set
102
  user_data = Object.new
103
  logs = []
bindings/ruby/whispercpp.gemspec CHANGED
@@ -4,7 +4,7 @@ Gem::Specification.new do |s|
4
  s.name = "whispercpp"
5
  s.authors = ["Georgi Gerganov", "Todd A. Fisher"]
6
  s.version = '1.3.3'
7
- s.date = '2025-05-29'
8
  s.description = %q{High-performance inference of OpenAI's Whisper automatic speech recognition (ASR) model via Ruby}
9
  s.email = '[email protected]'
10
  s.extra_rdoc_files = ['LICENSE', 'README.md']
 
4
  s.name = "whispercpp"
5
  s.authors = ["Georgi Gerganov", "Todd A. Fisher"]
6
  s.version = '1.3.3'
7
+ s.date = '2025-06-01'
8
  s.description = %q{High-performance inference of OpenAI's Whisper automatic speech recognition (ASR) model via Ruby}
9
  s.email = '[email protected]'
10
  s.extra_rdoc_files = ['LICENSE', 'README.md']