Spaces:
Running
Running
ruby : add Core ML support (#3214)
Browse files* Prevent overflow
* Fix memsize of Whisper::Context
* Rename xxx_initialize to more Ruby-esque name: xxx_s_new
* Define Whisper::Model::ZipURI
* Define Whisper::Model.coreml_compiled_models
* Make Options' @cmake_options Hash
* Use --{enable,disable}-whisper-coreml option for -I/opt/homebrew/opt/llvm/include
* Prepare Core ML model if enabled
* Add test for ZipURI
* Add signatures for ZipURI
* Add Whisper.system_info_str
* Add test for Whisper.system_info_str
* Add signagure for Model.coreml_compiled_models
* Add signature for Whisper.system_info_str
* Add test for Core ML
* Update date
* Maintain .gitignore
- bindings/ruby/.gitignore +0 -3
- bindings/ruby/ext/.gitignore +4 -6
- bindings/ruby/ext/options.rb +24 -12
- bindings/ruby/ext/ruby_whisper.c +13 -0
- bindings/ruby/ext/ruby_whisper_context.c +37 -11
- bindings/ruby/ext/ruby_whisper_model.c +1 -1
- bindings/ruby/ext/ruby_whisper_params.c +2 -2
- bindings/ruby/ext/ruby_whisper_segment.c +3 -3
- bindings/ruby/lib/whisper/model/uri.rb +56 -1
- bindings/ruby/sig/whisper.rbs +7 -0
- bindings/ruby/tests/test_model.rb +9 -0
- bindings/ruby/tests/test_package.rb +14 -0
- bindings/ruby/tests/test_whisper.rb +4 -0
- bindings/ruby/whispercpp.gemspec +1 -1
bindings/ruby/.gitignore
CHANGED
|
@@ -1,6 +1,3 @@
|
|
| 1 |
LICENSE
|
| 2 |
pkg/
|
| 3 |
lib/whisper.*
|
| 4 |
-
ext/sources/*
|
| 5 |
-
!ext/sources/CMakeGraphVizOptions.cmake
|
| 6 |
-
ext/mkmf.log
|
|
|
|
| 1 |
LICENSE
|
| 2 |
pkg/
|
| 3 |
lib/whisper.*
|
|
|
|
|
|
|
|
|
bindings/ruby/ext/.gitignore
CHANGED
|
@@ -2,10 +2,8 @@ Makefile
|
|
| 2 |
whisper.so
|
| 3 |
whisper.bundle
|
| 4 |
whisper.dll
|
| 5 |
-
scripts/get-flags.mk
|
| 6 |
*.o
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
/*/**/*.metal
|
|
|
|
| 2 |
whisper.so
|
| 3 |
whisper.bundle
|
| 4 |
whisper.dll
|
|
|
|
| 5 |
*.o
|
| 6 |
+
*.a
|
| 7 |
+
sources/*
|
| 8 |
+
!sources/CMakeGraphVizOptions.cmake
|
| 9 |
+
mkmf.log
|
|
|
bindings/ruby/ext/options.rb
CHANGED
|
@@ -20,27 +20,39 @@ class Options
|
|
| 20 |
Dir.chdir __dir__ do
|
| 21 |
output = `#{@cmake.shellescape} -S sources -B build -L`
|
| 22 |
end
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
| 34 |
end
|
| 35 |
|
| 36 |
private
|
| 37 |
|
| 38 |
def configure
|
| 39 |
-
cmake_options.
|
| 40 |
option = option_name(name)
|
| 41 |
value = type == "BOOL" ? enable_config(option) : arg_config("--#{option}")
|
| 42 |
@options[name] = [type, value]
|
| 43 |
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
end
|
| 45 |
|
| 46 |
def option_name(name)
|
|
|
|
| 20 |
Dir.chdir __dir__ do
|
| 21 |
output = `#{@cmake.shellescape} -S sources -B build -L`
|
| 22 |
end
|
| 23 |
+
@cmake_options = output.lines.drop_while {|line| line.chomp != "-- Cache values"}.drop(1)
|
| 24 |
+
.filter_map {|line|
|
| 25 |
+
option, value = line.chomp.split("=", 2)
|
| 26 |
+
name, type = option.split(":", 2)
|
| 27 |
+
[
|
| 28 |
+
name,
|
| 29 |
+
[
|
| 30 |
+
type,
|
| 31 |
+
type == "BOOL" ? value == "ON" : value
|
| 32 |
+
]
|
| 33 |
+
]
|
| 34 |
+
}.to_h
|
| 35 |
end
|
| 36 |
|
| 37 |
private
|
| 38 |
|
| 39 |
def configure
|
| 40 |
+
cmake_options.each_pair do |name, (type, default_value)|
|
| 41 |
option = option_name(name)
|
| 42 |
value = type == "BOOL" ? enable_config(option) : arg_config("--#{option}")
|
| 43 |
@options[name] = [type, value]
|
| 44 |
end
|
| 45 |
+
|
| 46 |
+
configure_coreml
|
| 47 |
+
end
|
| 48 |
+
|
| 49 |
+
def configure_coreml
|
| 50 |
+
use_coreml = if @options["WHISPER_COREML"][1].nil?
|
| 51 |
+
cmake_options["WHISPER_COREML"][1]
|
| 52 |
+
else
|
| 53 |
+
@options["WHISPER_COREML"][1]
|
| 54 |
+
end
|
| 55 |
+
$CPPFLAGS << " -DRUBY_WHISPER_USE_COREML" if use_coreml
|
| 56 |
end
|
| 57 |
|
| 58 |
def option_name(name)
|
bindings/ruby/ext/ruby_whisper.c
CHANGED
|
@@ -22,6 +22,8 @@ ID id_new;
|
|
| 22 |
ID id_to_path;
|
| 23 |
ID id_URI;
|
| 24 |
ID id_pre_converted_models;
|
|
|
|
|
|
|
| 25 |
|
| 26 |
static bool is_log_callback_finalized = false;
|
| 27 |
|
|
@@ -83,6 +85,14 @@ static VALUE ruby_whisper_s_lang_str_full(VALUE self, VALUE id) {
|
|
| 83 |
return rb_str_new2(str_full);
|
| 84 |
}
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
static VALUE ruby_whisper_s_finalize_log_callback(VALUE self, VALUE id) {
|
| 87 |
is_log_callback_finalized = true;
|
| 88 |
return Qnil;
|
|
@@ -130,6 +140,8 @@ void Init_whisper() {
|
|
| 130 |
id_to_path = rb_intern("to_path");
|
| 131 |
id_URI = rb_intern("URI");
|
| 132 |
id_pre_converted_models = rb_intern("pre_converted_models");
|
|
|
|
|
|
|
| 133 |
|
| 134 |
mWhisper = rb_define_module("Whisper");
|
| 135 |
mVAD = rb_define_module_under(mWhisper, "VAD");
|
|
@@ -145,6 +157,7 @@ void Init_whisper() {
|
|
| 145 |
rb_define_singleton_method(mWhisper, "lang_id", ruby_whisper_s_lang_id, 1);
|
| 146 |
rb_define_singleton_method(mWhisper, "lang_str", ruby_whisper_s_lang_str, 1);
|
| 147 |
rb_define_singleton_method(mWhisper, "lang_str_full", ruby_whisper_s_lang_str_full, 1);
|
|
|
|
| 148 |
rb_define_singleton_method(mWhisper, "log_set", ruby_whisper_s_log_set, 2);
|
| 149 |
rb_define_private_method(rb_singleton_class(mWhisper), "finalize_log_callback", ruby_whisper_s_finalize_log_callback, 1);
|
| 150 |
|
|
|
|
| 22 |
ID id_to_path;
|
| 23 |
ID id_URI;
|
| 24 |
ID id_pre_converted_models;
|
| 25 |
+
ID id_coreml_compiled_models;
|
| 26 |
+
ID id_cache;
|
| 27 |
|
| 28 |
static bool is_log_callback_finalized = false;
|
| 29 |
|
|
|
|
| 85 |
return rb_str_new2(str_full);
|
| 86 |
}
|
| 87 |
|
| 88 |
+
/*
|
| 89 |
+
* call-seq:
|
| 90 |
+
* system_info_str -> String
|
| 91 |
+
*/
|
| 92 |
+
static VALUE ruby_whisper_s_system_info_str(VALUE self) {
|
| 93 |
+
return rb_str_new2(whisper_print_system_info());
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
static VALUE ruby_whisper_s_finalize_log_callback(VALUE self, VALUE id) {
|
| 97 |
is_log_callback_finalized = true;
|
| 98 |
return Qnil;
|
|
|
|
| 140 |
id_to_path = rb_intern("to_path");
|
| 141 |
id_URI = rb_intern("URI");
|
| 142 |
id_pre_converted_models = rb_intern("pre_converted_models");
|
| 143 |
+
id_coreml_compiled_models = rb_intern("coreml_compiled_models");
|
| 144 |
+
id_cache = rb_intern("cache");
|
| 145 |
|
| 146 |
mWhisper = rb_define_module("Whisper");
|
| 147 |
mVAD = rb_define_module_under(mWhisper, "VAD");
|
|
|
|
| 157 |
rb_define_singleton_method(mWhisper, "lang_id", ruby_whisper_s_lang_id, 1);
|
| 158 |
rb_define_singleton_method(mWhisper, "lang_str", ruby_whisper_s_lang_str, 1);
|
| 159 |
rb_define_singleton_method(mWhisper, "lang_str_full", ruby_whisper_s_lang_str_full, 1);
|
| 160 |
+
rb_define_singleton_method(mWhisper, "system_info_str", ruby_whisper_s_system_info_str, 0);
|
| 161 |
rb_define_singleton_method(mWhisper, "log_set", ruby_whisper_s_log_set, 2);
|
| 162 |
rb_define_private_method(rb_singleton_class(mWhisper), "finalize_log_callback", ruby_whisper_s_finalize_log_callback, 1);
|
| 163 |
|
bindings/ruby/ext/ruby_whisper_context.c
CHANGED
|
@@ -11,6 +11,8 @@ extern ID id_new;
|
|
| 11 |
extern ID id_to_path;
|
| 12 |
extern ID id_URI;
|
| 13 |
extern ID id_pre_converted_models;
|
|
|
|
|
|
|
| 14 |
|
| 15 |
extern VALUE cContext;
|
| 16 |
extern VALUE eError;
|
|
@@ -18,8 +20,8 @@ extern VALUE cModel;
|
|
| 18 |
|
| 19 |
extern const rb_data_type_t ruby_whisper_params_type;
|
| 20 |
extern VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self);
|
| 21 |
-
extern VALUE
|
| 22 |
-
extern VALUE
|
| 23 |
extern void prepare_transcription(ruby_whisper_params *rwp, VALUE *context);
|
| 24 |
|
| 25 |
static void
|
|
@@ -53,6 +55,9 @@ ruby_whisper_memsize(const void *p)
|
|
| 53 |
if (!rw) {
|
| 54 |
return 0;
|
| 55 |
}
|
|
|
|
|
|
|
|
|
|
| 56 |
return size;
|
| 57 |
}
|
| 58 |
|
|
@@ -79,6 +84,13 @@ ruby_whisper_normalize_model_path(VALUE model_path)
|
|
| 79 |
VALUE pre_converted_model = rb_hash_aref(pre_converted_models, model_path);
|
| 80 |
if (!NIL_P(pre_converted_model)) {
|
| 81 |
model_path = pre_converted_model;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
}
|
| 83 |
else if (TYPE(model_path) == T_STRING) {
|
| 84 |
const char * model_path_str = StringValueCStr(model_path);
|
|
@@ -293,13 +305,20 @@ VALUE ruby_whisper_full(int argc, VALUE *argv, VALUE self)
|
|
| 293 |
// Should check when samples.respond_to?(:length)?
|
| 294 |
} else {
|
| 295 |
if (TYPE(samples) == T_ARRAY) {
|
| 296 |
-
|
|
|
|
|
|
|
|
|
|
| 297 |
} else if (memory_view_available_p) {
|
| 298 |
if (!rb_memory_view_get(samples, &view, RUBY_MEMORY_VIEW_SIMPLE)) {
|
| 299 |
view.obj = Qnil;
|
| 300 |
rb_raise(rb_eArgError, "unable to get a memory view");
|
| 301 |
}
|
| 302 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
} else if (rb_respond_to(samples, id_length)) {
|
| 304 |
n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
|
| 305 |
} else {
|
|
@@ -387,10 +406,17 @@ ruby_whisper_full_parallel(int argc, VALUE *argv,VALUE self)
|
|
| 387 |
view.obj = Qnil;
|
| 388 |
rb_raise(rb_eArgError, "unable to get a memory view");
|
| 389 |
}
|
| 390 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
} else {
|
| 392 |
if (TYPE(samples) == T_ARRAY) {
|
| 393 |
-
|
|
|
|
|
|
|
|
|
|
| 394 |
} else if (rb_respond_to(samples, id_length)) {
|
| 395 |
n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
|
| 396 |
} else {
|
|
@@ -476,7 +502,7 @@ ruby_whisper_full_get_segment_t0(VALUE self, VALUE i_segment)
|
|
| 476 |
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
| 477 |
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
| 478 |
const int64_t t0 = whisper_full_get_segment_t0(rw->context, c_i_segment);
|
| 479 |
-
return
|
| 480 |
}
|
| 481 |
|
| 482 |
/*
|
|
@@ -494,7 +520,7 @@ ruby_whisper_full_get_segment_t1(VALUE self, VALUE i_segment)
|
|
| 494 |
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
| 495 |
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
| 496 |
const int64_t t1 = whisper_full_get_segment_t1(rw->context, c_i_segment);
|
| 497 |
-
return
|
| 498 |
}
|
| 499 |
|
| 500 |
/*
|
|
@@ -552,7 +578,7 @@ ruby_whisper_full_get_segment_no_speech_prob(VALUE self, VALUE i_segment)
|
|
| 552 |
static VALUE
|
| 553 |
ruby_whisper_full_get_segment(VALUE self, VALUE i_segment)
|
| 554 |
{
|
| 555 |
-
return
|
| 556 |
}
|
| 557 |
|
| 558 |
/*
|
|
@@ -586,7 +612,7 @@ ruby_whisper_each_segment(VALUE self)
|
|
| 586 |
|
| 587 |
const int n_segments = whisper_full_n_segments(rw->context);
|
| 588 |
for (int i = 0; i < n_segments; ++i) {
|
| 589 |
-
rb_yield(
|
| 590 |
}
|
| 591 |
|
| 592 |
return self;
|
|
@@ -599,7 +625,7 @@ ruby_whisper_each_segment(VALUE self)
|
|
| 599 |
static VALUE
|
| 600 |
ruby_whisper_get_model(VALUE self)
|
| 601 |
{
|
| 602 |
-
return
|
| 603 |
}
|
| 604 |
|
| 605 |
void
|
|
|
|
| 11 |
extern ID id_to_path;
|
| 12 |
extern ID id_URI;
|
| 13 |
extern ID id_pre_converted_models;
|
| 14 |
+
extern ID id_coreml_compiled_models;
|
| 15 |
+
extern ID id_cache;
|
| 16 |
|
| 17 |
extern VALUE cContext;
|
| 18 |
extern VALUE eError;
|
|
|
|
| 20 |
|
| 21 |
extern const rb_data_type_t ruby_whisper_params_type;
|
| 22 |
extern VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self);
|
| 23 |
+
extern VALUE rb_whisper_model_s_new(VALUE context);
|
| 24 |
+
extern VALUE rb_whisper_segment_s_new(VALUE context, int index);
|
| 25 |
extern void prepare_transcription(ruby_whisper_params *rwp, VALUE *context);
|
| 26 |
|
| 27 |
static void
|
|
|
|
| 55 |
if (!rw) {
|
| 56 |
return 0;
|
| 57 |
}
|
| 58 |
+
if (rw->context) {
|
| 59 |
+
size += sizeof(rw->context);
|
| 60 |
+
}
|
| 61 |
return size;
|
| 62 |
}
|
| 63 |
|
|
|
|
| 84 |
VALUE pre_converted_model = rb_hash_aref(pre_converted_models, model_path);
|
| 85 |
if (!NIL_P(pre_converted_model)) {
|
| 86 |
model_path = pre_converted_model;
|
| 87 |
+
#ifdef RUBY_WHISPER_USE_COREML
|
| 88 |
+
VALUE coreml_converted_models = rb_funcall(cModel, id_coreml_compiled_models, 0);
|
| 89 |
+
VALUE coreml_converted_model = rb_hash_aref(coreml_converted_models, pre_converted_model);
|
| 90 |
+
if (!NIL_P(coreml_converted_model)) {
|
| 91 |
+
rb_funcall(coreml_converted_model, id_cache, 0);
|
| 92 |
+
}
|
| 93 |
+
#endif
|
| 94 |
}
|
| 95 |
else if (TYPE(model_path) == T_STRING) {
|
| 96 |
const char * model_path_str = StringValueCStr(model_path);
|
|
|
|
| 305 |
// Should check when samples.respond_to?(:length)?
|
| 306 |
} else {
|
| 307 |
if (TYPE(samples) == T_ARRAY) {
|
| 308 |
+
if (RARRAY_LEN(samples) > INT_MAX) {
|
| 309 |
+
rb_raise(rb_eArgError, "samples are too long");
|
| 310 |
+
}
|
| 311 |
+
n_samples = (int)RARRAY_LEN(samples);
|
| 312 |
} else if (memory_view_available_p) {
|
| 313 |
if (!rb_memory_view_get(samples, &view, RUBY_MEMORY_VIEW_SIMPLE)) {
|
| 314 |
view.obj = Qnil;
|
| 315 |
rb_raise(rb_eArgError, "unable to get a memory view");
|
| 316 |
}
|
| 317 |
+
ssize_t n_samples_size = view.byte_size / view.item_size;
|
| 318 |
+
if (n_samples_size > INT_MAX) {
|
| 319 |
+
rb_raise(rb_eArgError, "samples are too long");
|
| 320 |
+
}
|
| 321 |
+
n_samples = (int)n_samples_size;
|
| 322 |
} else if (rb_respond_to(samples, id_length)) {
|
| 323 |
n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
|
| 324 |
} else {
|
|
|
|
| 406 |
view.obj = Qnil;
|
| 407 |
rb_raise(rb_eArgError, "unable to get a memory view");
|
| 408 |
}
|
| 409 |
+
ssize_t n_samples_size = view.byte_size / view.item_size;
|
| 410 |
+
if (n_samples_size > INT_MAX) {
|
| 411 |
+
rb_raise(rb_eArgError, "samples are too long");
|
| 412 |
+
}
|
| 413 |
+
n_samples = (int)n_samples_size;
|
| 414 |
} else {
|
| 415 |
if (TYPE(samples) == T_ARRAY) {
|
| 416 |
+
if (RARRAY_LEN(samples) > INT_MAX) {
|
| 417 |
+
rb_raise(rb_eArgError, "samples are too long");
|
| 418 |
+
}
|
| 419 |
+
n_samples = (int)RARRAY_LEN(samples);
|
| 420 |
} else if (rb_respond_to(samples, id_length)) {
|
| 421 |
n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
|
| 422 |
} else {
|
|
|
|
| 502 |
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
| 503 |
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
| 504 |
const int64_t t0 = whisper_full_get_segment_t0(rw->context, c_i_segment);
|
| 505 |
+
return LONG2NUM(t0);
|
| 506 |
}
|
| 507 |
|
| 508 |
/*
|
|
|
|
| 520 |
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
| 521 |
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
| 522 |
const int64_t t1 = whisper_full_get_segment_t1(rw->context, c_i_segment);
|
| 523 |
+
return LONG2NUM(t1);
|
| 524 |
}
|
| 525 |
|
| 526 |
/*
|
|
|
|
| 578 |
static VALUE
|
| 579 |
ruby_whisper_full_get_segment(VALUE self, VALUE i_segment)
|
| 580 |
{
|
| 581 |
+
return rb_whisper_segment_s_new(self, NUM2INT(i_segment));
|
| 582 |
}
|
| 583 |
|
| 584 |
/*
|
|
|
|
| 612 |
|
| 613 |
const int n_segments = whisper_full_n_segments(rw->context);
|
| 614 |
for (int i = 0; i < n_segments; ++i) {
|
| 615 |
+
rb_yield(rb_whisper_segment_s_new(self, i));
|
| 616 |
}
|
| 617 |
|
| 618 |
return self;
|
|
|
|
| 625 |
static VALUE
|
| 626 |
ruby_whisper_get_model(VALUE self)
|
| 627 |
{
|
| 628 |
+
return rb_whisper_model_s_new(self);
|
| 629 |
}
|
| 630 |
|
| 631 |
void
|
bindings/ruby/ext/ruby_whisper_model.c
CHANGED
|
@@ -35,7 +35,7 @@ static VALUE ruby_whisper_model_allocate(VALUE klass) {
|
|
| 35 |
return TypedData_Make_Struct(klass, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
| 36 |
}
|
| 37 |
|
| 38 |
-
VALUE
|
| 39 |
ruby_whisper_model *rwm;
|
| 40 |
const VALUE model = ruby_whisper_model_allocate(cModel);
|
| 41 |
TypedData_Get_Struct(model, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
|
|
|
| 35 |
return TypedData_Make_Struct(klass, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
| 36 |
}
|
| 37 |
|
| 38 |
+
VALUE rb_whisper_model_s_new(VALUE context) {
|
| 39 |
ruby_whisper_model *rwm;
|
| 40 |
const VALUE model = ruby_whisper_model_allocate(cModel);
|
| 41 |
TypedData_Get_Struct(model, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
bindings/ruby/ext/ruby_whisper_params.c
CHANGED
|
@@ -34,7 +34,7 @@ extern VALUE cVADParams;
|
|
| 34 |
extern ID id_call;
|
| 35 |
|
| 36 |
extern VALUE ruby_whisper_normalize_model_path(VALUE model_path);
|
| 37 |
-
extern VALUE
|
| 38 |
extern const rb_data_type_t ruby_whisper_vad_params_type;
|
| 39 |
|
| 40 |
static ID param_names[RUBY_WHISPER_PARAMS_PARAM_NAMES_COUNT];
|
|
@@ -110,7 +110,7 @@ static void new_segment_callback(struct whisper_context *ctx, struct whisper_sta
|
|
| 110 |
const int n_segments = whisper_full_n_segments_from_state(state);
|
| 111 |
for (int i = n_new; i > 0; i--) {
|
| 112 |
int i_segment = n_segments - i;
|
| 113 |
-
VALUE segment =
|
| 114 |
for (int j = 0; j < callbacks_len; j++) {
|
| 115 |
VALUE cb = rb_ary_entry(container->callbacks, j);
|
| 116 |
rb_funcall(cb, id_call, 1, segment);
|
|
|
|
| 34 |
extern ID id_call;
|
| 35 |
|
| 36 |
extern VALUE ruby_whisper_normalize_model_path(VALUE model_path);
|
| 37 |
+
extern VALUE rb_whisper_segment_s_new(VALUE context, int index);
|
| 38 |
extern const rb_data_type_t ruby_whisper_vad_params_type;
|
| 39 |
|
| 40 |
static ID param_names[RUBY_WHISPER_PARAMS_PARAM_NAMES_COUNT];
|
|
|
|
| 110 |
const int n_segments = whisper_full_n_segments_from_state(state);
|
| 111 |
for (int i = n_new; i > 0; i--) {
|
| 112 |
int i_segment = n_segments - i;
|
| 113 |
+
VALUE segment = rb_whisper_segment_s_new(*container->context, i_segment);
|
| 114 |
for (int j = 0; j < callbacks_len; j++) {
|
| 115 |
VALUE cb = rb_ary_entry(container->callbacks, j);
|
| 116 |
rb_funcall(cb, id_call, 1, segment);
|
bindings/ruby/ext/ruby_whisper_segment.c
CHANGED
|
@@ -38,7 +38,7 @@ ruby_whisper_segment_allocate(VALUE klass)
|
|
| 38 |
}
|
| 39 |
|
| 40 |
VALUE
|
| 41 |
-
|
| 42 |
{
|
| 43 |
ruby_whisper_segment *rws;
|
| 44 |
const VALUE segment = ruby_whisper_segment_allocate(cSegment);
|
|
@@ -63,7 +63,7 @@ ruby_whisper_segment_get_start_time(VALUE self)
|
|
| 63 |
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
|
| 64 |
const int64_t t0 = whisper_full_get_segment_t0(rw->context, rws->index);
|
| 65 |
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
|
| 66 |
-
return
|
| 67 |
}
|
| 68 |
|
| 69 |
/*
|
|
@@ -81,7 +81,7 @@ ruby_whisper_segment_get_end_time(VALUE self)
|
|
| 81 |
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
|
| 82 |
const int64_t t1 = whisper_full_get_segment_t1(rw->context, rws->index);
|
| 83 |
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
|
| 84 |
-
return
|
| 85 |
}
|
| 86 |
|
| 87 |
/*
|
|
|
|
| 38 |
}
|
| 39 |
|
| 40 |
VALUE
|
| 41 |
+
rb_whisper_segment_s_new(VALUE context, int index)
|
| 42 |
{
|
| 43 |
ruby_whisper_segment *rws;
|
| 44 |
const VALUE segment = ruby_whisper_segment_allocate(cSegment);
|
|
|
|
| 63 |
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
|
| 64 |
const int64_t t0 = whisper_full_get_segment_t0(rw->context, rws->index);
|
| 65 |
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
|
| 66 |
+
return LONG2NUM(t0 * 10);
|
| 67 |
}
|
| 68 |
|
| 69 |
/*
|
|
|
|
| 81 |
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
|
| 82 |
const int64_t t1 = whisper_full_get_segment_t1(rw->context, rws->index);
|
| 83 |
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
|
| 84 |
+
return LONG2NUM(t1 * 10);
|
| 85 |
}
|
| 86 |
|
| 87 |
/*
|
bindings/ruby/lib/whisper/model/uri.rb
CHANGED
|
@@ -130,6 +130,44 @@ module Whisper
|
|
| 130 |
end
|
| 131 |
end
|
| 132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
@pre_converted_models = %w[
|
| 134 |
tiny
|
| 135 |
tiny.en
|
|
@@ -171,8 +209,25 @@ module Whisper
|
|
| 171 |
@pre_converted_models[name] = URI.new("https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-#{name}.bin")
|
| 172 |
end
|
| 173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
class << self
|
| 175 |
-
attr_reader :pre_converted_models
|
| 176 |
end
|
| 177 |
end
|
| 178 |
end
|
|
|
|
| 130 |
end
|
| 131 |
end
|
| 132 |
|
| 133 |
+
class ZipURI < URI
|
| 134 |
+
def cache
|
| 135 |
+
zip_path = Pathname(super)
|
| 136 |
+
dest = unzipped_path
|
| 137 |
+
return if dest.exist? && dest.mtime >= zip_path.mtime
|
| 138 |
+
escaping dest do
|
| 139 |
+
system "unzip", "-q", "-d", zip_path.dirname.to_path, zip_path.to_path, exception: true
|
| 140 |
+
end
|
| 141 |
+
zip_path.to_path
|
| 142 |
+
end
|
| 143 |
+
|
| 144 |
+
def clear_cache
|
| 145 |
+
super
|
| 146 |
+
unzipped_path.rmtree if unzipped_path.exist?
|
| 147 |
+
end
|
| 148 |
+
|
| 149 |
+
private
|
| 150 |
+
|
| 151 |
+
def unzipped_path
|
| 152 |
+
cache_path.sub_ext("")
|
| 153 |
+
end
|
| 154 |
+
|
| 155 |
+
def escaping(path)
|
| 156 |
+
escaped = Pathname("#{path}.removing")
|
| 157 |
+
if path.exist?
|
| 158 |
+
escaped.rmtree if escaped.exist?
|
| 159 |
+
path.rename escaped
|
| 160 |
+
end
|
| 161 |
+
yield
|
| 162 |
+
ensure
|
| 163 |
+
if path.exist?
|
| 164 |
+
escaped.rmtree if escaped.exist?
|
| 165 |
+
else
|
| 166 |
+
escaped.rename path if escaped.exist?
|
| 167 |
+
end
|
| 168 |
+
end
|
| 169 |
+
end
|
| 170 |
+
|
| 171 |
@pre_converted_models = %w[
|
| 172 |
tiny
|
| 173 |
tiny.en
|
|
|
|
| 209 |
@pre_converted_models[name] = URI.new("https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-#{name}.bin")
|
| 210 |
end
|
| 211 |
|
| 212 |
+
@coreml_compiled_models = %w[
|
| 213 |
+
tiny
|
| 214 |
+
tiny.en
|
| 215 |
+
base
|
| 216 |
+
base.en
|
| 217 |
+
small
|
| 218 |
+
small.en
|
| 219 |
+
medium
|
| 220 |
+
medium.en
|
| 221 |
+
large-v1
|
| 222 |
+
large-v2
|
| 223 |
+
large-v3
|
| 224 |
+
large-v3-turbo
|
| 225 |
+
].each_with_object({}) do |name, models|
|
| 226 |
+
models[@pre_converted_models[name]] = ZipURI.new("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-#{name}-encoder.mlmodelc.zip")
|
| 227 |
+
end
|
| 228 |
+
|
| 229 |
class << self
|
| 230 |
+
attr_reader :pre_converted_models, :coreml_compiled_models
|
| 231 |
end
|
| 232 |
end
|
| 233 |
end
|
bindings/ruby/sig/whisper.rbs
CHANGED
|
@@ -22,6 +22,7 @@ module Whisper
|
|
| 22 |
def self.lang_str: (Integer id) -> String
|
| 23 |
def self.lang_str_full: (Integer id) -> String
|
| 24 |
def self.log_set: (log_callback, Object? user_data) -> log_callback
|
|
|
|
| 25 |
|
| 26 |
class Context
|
| 27 |
def self.new: (path | ::URI::HTTP) -> instance
|
|
@@ -386,6 +387,7 @@ module Whisper
|
|
| 386 |
|
| 387 |
class Model
|
| 388 |
def self.pre_converted_models: () -> Hash[String, Model::URI]
|
|
|
|
| 389 |
def self.new: () -> instance
|
| 390 |
def n_vocab: () -> Integer
|
| 391 |
def n_audio_ctx: () -> Integer
|
|
@@ -405,6 +407,11 @@ module Whisper
|
|
| 405 |
def to_path: -> String
|
| 406 |
def clear_cache: -> void
|
| 407 |
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 408 |
end
|
| 409 |
|
| 410 |
class Segment
|
|
|
|
| 22 |
def self.lang_str: (Integer id) -> String
|
| 23 |
def self.lang_str_full: (Integer id) -> String
|
| 24 |
def self.log_set: (log_callback, Object? user_data) -> log_callback
|
| 25 |
+
def self.system_info_str: () -> String
|
| 26 |
|
| 27 |
class Context
|
| 28 |
def self.new: (path | ::URI::HTTP) -> instance
|
|
|
|
| 387 |
|
| 388 |
class Model
|
| 389 |
def self.pre_converted_models: () -> Hash[String, Model::URI]
|
| 390 |
+
def self.coreml_compiled_models: () -> Hash[Model::URI, Model::ZipURI]
|
| 391 |
def self.new: () -> instance
|
| 392 |
def n_vocab: () -> Integer
|
| 393 |
def n_audio_ctx: () -> Integer
|
|
|
|
| 407 |
def to_path: -> String
|
| 408 |
def clear_cache: -> void
|
| 409 |
end
|
| 410 |
+
|
| 411 |
+
class ZipURI < URI
|
| 412 |
+
def cache: () -> String
|
| 413 |
+
def clear_cache: () -> void
|
| 414 |
+
end
|
| 415 |
end
|
| 416 |
|
| 417 |
class Segment
|
bindings/ruby/tests/test_model.rb
CHANGED
|
@@ -106,4 +106,13 @@ class TestModel < TestBase
|
|
| 106 |
assert_equal 1, model.ftype
|
| 107 |
assert_equal "base", model.type
|
| 108 |
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
end
|
|
|
|
| 106 |
assert_equal 1, model.ftype
|
| 107 |
assert_equal "base", model.type
|
| 108 |
end
|
| 109 |
+
|
| 110 |
+
def test_coreml_model_auto_download
|
| 111 |
+
uri = Whisper::Model.coreml_compiled_models[Whisper::Model.pre_converted_models["tiny"]]
|
| 112 |
+
model_path = Pathname(uri.to_path).sub_ext("")
|
| 113 |
+
model_path.rmtree if model_path.exist?
|
| 114 |
+
|
| 115 |
+
uri.cache
|
| 116 |
+
assert_path_exist model_path
|
| 117 |
+
end
|
| 118 |
end
|
bindings/ruby/tests/test_package.rb
CHANGED
|
@@ -25,6 +25,20 @@ class TestPackage < TestBase
|
|
| 25 |
end
|
| 26 |
end
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
private
|
| 29 |
|
| 30 |
def assert_installed(dir, version)
|
|
|
|
| 25 |
end
|
| 26 |
end
|
| 27 |
|
| 28 |
+
def test_install_with_coreml
|
| 29 |
+
omit_unless RUBY_PLATFORM.match?(/darwin/) do
|
| 30 |
+
gemspec = Gem::Specification.load("whispercpp.gemspec")
|
| 31 |
+
Dir.mktmpdir do |dir|
|
| 32 |
+
system "gem", "install", "--install-dir", dir.shellescape, "--no-document", "pkg/#{gemspec.file_name.shellescape}", "--", "--enable-whisper-coreml", exception: true
|
| 33 |
+
assert_installed dir, gemspec.version
|
| 34 |
+
assert_nothing_raised do
|
| 35 |
+
libdir = File.join(dir, "gems", "#{gemspec.name}-#{gemspec.version}", "lib")
|
| 36 |
+
system "ruby", "-I", libdir, "-r", "whisper", "-e", "Whisper::Context.new('tiny')", exception: true
|
| 37 |
+
end
|
| 38 |
+
end
|
| 39 |
+
end
|
| 40 |
+
end
|
| 41 |
+
|
| 42 |
private
|
| 43 |
|
| 44 |
def assert_installed(dir, version)
|
bindings/ruby/tests/test_whisper.rb
CHANGED
|
@@ -94,6 +94,10 @@ class TestWhisper < TestBase
|
|
| 94 |
end
|
| 95 |
end
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
def test_log_set
|
| 98 |
user_data = Object.new
|
| 99 |
logs = []
|
|
|
|
| 94 |
end
|
| 95 |
end
|
| 96 |
|
| 97 |
+
def test_system_info_str
|
| 98 |
+
assert_match /\AWHISPER : COREML = \d | OPENVINO = \d |/, Whisper.system_info_str
|
| 99 |
+
end
|
| 100 |
+
|
| 101 |
def test_log_set
|
| 102 |
user_data = Object.new
|
| 103 |
logs = []
|
bindings/ruby/whispercpp.gemspec
CHANGED
|
@@ -4,7 +4,7 @@ Gem::Specification.new do |s|
|
|
| 4 |
s.name = "whispercpp"
|
| 5 |
s.authors = ["Georgi Gerganov", "Todd A. Fisher"]
|
| 6 |
s.version = '1.3.3'
|
| 7 |
-
s.date = '2025-
|
| 8 |
s.description = %q{High-performance inference of OpenAI's Whisper automatic speech recognition (ASR) model via Ruby}
|
| 9 |
s.email = '[email protected]'
|
| 10 |
s.extra_rdoc_files = ['LICENSE', 'README.md']
|
|
|
|
| 4 |
s.name = "whispercpp"
|
| 5 |
s.authors = ["Georgi Gerganov", "Todd A. Fisher"]
|
| 6 |
s.version = '1.3.3'
|
| 7 |
+
s.date = '2025-06-01'
|
| 8 |
s.description = %q{High-performance inference of OpenAI's Whisper automatic speech recognition (ASR) model via Ruby}
|
| 9 |
s.email = '[email protected]'
|
| 10 |
s.extra_rdoc_files = ['LICENSE', 'README.md']
|