Spaces:
Running
Running
ruby : Fix of C++ header guard name, model URI support, type signature and more (#2683)
Browse files* Add test to make Whisper::Context.new accept URI string
* Add test to make Whisper::Context.new accept URI
* Make Whisper::Context.new accept URI string and URI
* Update README
Revert "Fix argument of rb_undefine_finalizer"
* Fix typos
* Add type signature file
* Assign literarl to const variable
* Load Whisper::Model::URI from Init_whisper
* Simplify .gitignore
* Don't load whisper.so from whisper/model/uri.rb
* Use each_with_object instead of each
* Add Development section to README
* Rename header guard to conform to C++ naming convention
- bindings/ruby/.gitignore +1 -3
- bindings/ruby/README.md +16 -4
- bindings/ruby/ext/ruby_whisper.cpp +17 -2
- bindings/ruby/ext/ruby_whisper.h +2 -2
- bindings/ruby/lib/whisper.rb +0 -2
- bindings/ruby/lib/whisper/model/uri.rb +130 -130
- bindings/ruby/sig/whisper.rbs +153 -0
- bindings/ruby/tests/test_model.rb +38 -0
bindings/ruby/.gitignore
CHANGED
|
@@ -1,5 +1,3 @@
|
|
| 1 |
LICENSE
|
| 2 |
pkg/
|
| 3 |
-
lib/whisper
|
| 4 |
-
lib/whisper.bundle
|
| 5 |
-
lib/whisper.dll
|
|
|
|
| 1 |
LICENSE
|
| 2 |
pkg/
|
| 3 |
+
lib/whisper.*
|
|
|
|
|
|
bindings/ruby/README.md
CHANGED
|
@@ -60,10 +60,10 @@ You also can use shorthand for pre-converted models:
|
|
| 60 |
whisper = Whisper::Context.new("base.en")
|
| 61 |
```
|
| 62 |
|
| 63 |
-
You can see the list of prepared model names by `Whisper::Model.
|
| 64 |
|
| 65 |
```ruby
|
| 66 |
-
puts Whisper::Model.
|
| 67 |
# tiny
|
| 68 |
# tiny.en
|
| 69 |
# tiny-q5_1
|
|
@@ -87,8 +87,9 @@ whisper = Whisper::Context.new("path/to/your/model.bin")
|
|
| 87 |
Or, you can download model files:
|
| 88 |
|
| 89 |
```ruby
|
| 90 |
-
|
| 91 |
-
|
|
|
|
| 92 |
```
|
| 93 |
|
| 94 |
See [models][] page for details.
|
|
@@ -222,6 +223,17 @@ end
|
|
| 222 |
|
| 223 |
The second argument `samples` may be an array, an object with `length` and `each` method, or a MemoryView. If you can prepare audio data as C array and export it as a MemoryView, whispercpp accepts and works with it with zero copy.
|
| 224 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
License
|
| 226 |
-------
|
| 227 |
|
|
|
|
| 60 |
whisper = Whisper::Context.new("base.en")
|
| 61 |
```
|
| 62 |
|
| 63 |
+
You can see the list of prepared model names by `Whisper::Model.pre_converted_models.keys`:
|
| 64 |
|
| 65 |
```ruby
|
| 66 |
+
puts Whisper::Model.pre_converted_models.keys
|
| 67 |
# tiny
|
| 68 |
# tiny.en
|
| 69 |
# tiny-q5_1
|
|
|
|
| 87 |
Or, you can download model files:
|
| 88 |
|
| 89 |
```ruby
|
| 90 |
+
whisper = Whisper::Context.new("https://example.net/uri/of/your/model.bin")
|
| 91 |
+
# Or
|
| 92 |
+
whisper = Whisper::Context.new(URI("https://example.net/uri/of/your/model.bin"))
|
| 93 |
```
|
| 94 |
|
| 95 |
See [models][] page for details.
|
|
|
|
| 223 |
|
| 224 |
The second argument `samples` may be an array, an object with `length` and `each` method, or a MemoryView. If you can prepare audio data as C array and export it as a MemoryView, whispercpp accepts and works with it with zero copy.
|
| 225 |
|
| 226 |
+
Development
|
| 227 |
+
-----------
|
| 228 |
+
|
| 229 |
+
% git clone https://github.com/ggerganov/whisper.cpp.git
|
| 230 |
+
% cd whisper.cpp/bindings/ruby
|
| 231 |
+
% rake test
|
| 232 |
+
|
| 233 |
+
First call of `rake test` builds an extension and downloads a model for testing. After that, you add tests in `tests` directory and modify `ext/ruby_whisper.cpp`.
|
| 234 |
+
|
| 235 |
+
If something seems wrong on build, running `rake clean` solves some cases.
|
| 236 |
+
|
| 237 |
License
|
| 238 |
-------
|
| 239 |
|
bindings/ruby/ext/ruby_whisper.cpp
CHANGED
|
@@ -49,6 +49,7 @@ static ID id_length;
|
|
| 49 |
static ID id_next;
|
| 50 |
static ID id_new;
|
| 51 |
static ID id_to_path;
|
|
|
|
| 52 |
static ID id_pre_converted_models;
|
| 53 |
|
| 54 |
static bool is_log_callback_finalized = false;
|
|
@@ -283,6 +284,17 @@ static VALUE ruby_whisper_initialize(int argc, VALUE *argv, VALUE self) {
|
|
| 283 |
if (!NIL_P(pre_converted_model)) {
|
| 284 |
whisper_model_file_path = pre_converted_model;
|
| 285 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
if (rb_respond_to(whisper_model_file_path, id_to_path)) {
|
| 287 |
whisper_model_file_path = rb_funcall(whisper_model_file_path, id_to_path, 0);
|
| 288 |
}
|
|
@@ -837,7 +849,7 @@ static VALUE ruby_whisper_full_get_segment_text(VALUE self, VALUE i_segment) {
|
|
| 837 |
|
| 838 |
/*
|
| 839 |
* call-seq:
|
| 840 |
-
* full_get_segment_no_speech_prob -> Float
|
| 841 |
*/
|
| 842 |
static VALUE ruby_whisper_full_get_segment_no_speech_prob(VALUE self, VALUE i_segment) {
|
| 843 |
ruby_whisper *rw;
|
|
@@ -1755,7 +1767,7 @@ static VALUE ruby_whisper_c_model_type(VALUE self) {
|
|
| 1755 |
|
| 1756 |
static VALUE ruby_whisper_error_initialize(VALUE self, VALUE code) {
|
| 1757 |
const int c_code = NUM2INT(code);
|
| 1758 |
-
char *raw_message;
|
| 1759 |
switch (c_code) {
|
| 1760 |
case -2:
|
| 1761 |
raw_message = "failed to compute log mel spectrogram";
|
|
@@ -1802,6 +1814,7 @@ void Init_whisper() {
|
|
| 1802 |
id_next = rb_intern("next");
|
| 1803 |
id_new = rb_intern("new");
|
| 1804 |
id_to_path = rb_intern("to_path");
|
|
|
|
| 1805 |
id_pre_converted_models = rb_intern("pre_converted_models");
|
| 1806 |
|
| 1807 |
mWhisper = rb_define_module("Whisper");
|
|
@@ -1941,6 +1954,8 @@ void Init_whisper() {
|
|
| 1941 |
rb_define_method(cModel, "n_mels", ruby_whisper_c_model_n_mels, 0);
|
| 1942 |
rb_define_method(cModel, "ftype", ruby_whisper_c_model_ftype, 0);
|
| 1943 |
rb_define_method(cModel, "type", ruby_whisper_c_model_type, 0);
|
|
|
|
|
|
|
| 1944 |
}
|
| 1945 |
#ifdef __cplusplus
|
| 1946 |
}
|
|
|
|
| 49 |
static ID id_next;
|
| 50 |
static ID id_new;
|
| 51 |
static ID id_to_path;
|
| 52 |
+
static ID id_URI;
|
| 53 |
static ID id_pre_converted_models;
|
| 54 |
|
| 55 |
static bool is_log_callback_finalized = false;
|
|
|
|
| 284 |
if (!NIL_P(pre_converted_model)) {
|
| 285 |
whisper_model_file_path = pre_converted_model;
|
| 286 |
}
|
| 287 |
+
if (TYPE(whisper_model_file_path) == T_STRING) {
|
| 288 |
+
const char * whisper_model_file_path_str = StringValueCStr(whisper_model_file_path);
|
| 289 |
+
if (strncmp("http://", whisper_model_file_path_str, 7) == 0 || strncmp("https://", whisper_model_file_path_str, 8) == 0) {
|
| 290 |
+
VALUE uri_class = rb_const_get(cModel, id_URI);
|
| 291 |
+
whisper_model_file_path = rb_class_new_instance(1, &whisper_model_file_path, uri_class);
|
| 292 |
+
}
|
| 293 |
+
}
|
| 294 |
+
if (rb_obj_is_kind_of(whisper_model_file_path, rb_path2class("URI::HTTP"))) {
|
| 295 |
+
VALUE uri_class = rb_const_get(cModel, id_URI);
|
| 296 |
+
whisper_model_file_path = rb_class_new_instance(1, &whisper_model_file_path, uri_class);
|
| 297 |
+
}
|
| 298 |
if (rb_respond_to(whisper_model_file_path, id_to_path)) {
|
| 299 |
whisper_model_file_path = rb_funcall(whisper_model_file_path, id_to_path, 0);
|
| 300 |
}
|
|
|
|
| 849 |
|
| 850 |
/*
|
| 851 |
* call-seq:
|
| 852 |
+
* full_get_segment_no_speech_prob(segment_index) -> Float
|
| 853 |
*/
|
| 854 |
static VALUE ruby_whisper_full_get_segment_no_speech_prob(VALUE self, VALUE i_segment) {
|
| 855 |
ruby_whisper *rw;
|
|
|
|
| 1767 |
|
| 1768 |
static VALUE ruby_whisper_error_initialize(VALUE self, VALUE code) {
|
| 1769 |
const int c_code = NUM2INT(code);
|
| 1770 |
+
const char *raw_message;
|
| 1771 |
switch (c_code) {
|
| 1772 |
case -2:
|
| 1773 |
raw_message = "failed to compute log mel spectrogram";
|
|
|
|
| 1814 |
id_next = rb_intern("next");
|
| 1815 |
id_new = rb_intern("new");
|
| 1816 |
id_to_path = rb_intern("to_path");
|
| 1817 |
+
id_URI = rb_intern("URI");
|
| 1818 |
id_pre_converted_models = rb_intern("pre_converted_models");
|
| 1819 |
|
| 1820 |
mWhisper = rb_define_module("Whisper");
|
|
|
|
| 1954 |
rb_define_method(cModel, "n_mels", ruby_whisper_c_model_n_mels, 0);
|
| 1955 |
rb_define_method(cModel, "ftype", ruby_whisper_c_model_ftype, 0);
|
| 1956 |
rb_define_method(cModel, "type", ruby_whisper_c_model_type, 0);
|
| 1957 |
+
|
| 1958 |
+
rb_require("whisper/model/uri");
|
| 1959 |
}
|
| 1960 |
#ifdef __cplusplus
|
| 1961 |
}
|
bindings/ruby/ext/ruby_whisper.h
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
-
#ifndef
|
| 2 |
-
#define
|
| 3 |
|
| 4 |
#include "whisper.h"
|
| 5 |
|
|
|
|
| 1 |
+
#ifndef RUBY_WHISPER_H
|
| 2 |
+
#define RUBY_WHISPER_H
|
| 3 |
|
| 4 |
#include "whisper.h"
|
| 5 |
|
bindings/ruby/lib/whisper.rb
DELETED
|
@@ -1,2 +0,0 @@
|
|
| 1 |
-
require "whisper.so"
|
| 2 |
-
require "whisper/model/uri"
|
|
|
|
|
|
|
|
|
bindings/ruby/lib/whisper/model/uri.rb
CHANGED
|
@@ -1,163 +1,163 @@
|
|
| 1 |
-
require "whisper.so"
|
| 2 |
require "uri"
|
| 3 |
require "net/http"
|
| 4 |
require "time"
|
| 5 |
require "pathname"
|
| 6 |
require "io/console/size"
|
| 7 |
|
| 8 |
-
|
| 9 |
-
class
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
|
| 24 |
-
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
# noop
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
|
|
|
| 65 |
end
|
| 66 |
end
|
| 67 |
end
|
| 68 |
-
end
|
| 69 |
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
|
|
|
|
|
|
| 81 |
end
|
| 82 |
-
|
| 83 |
end
|
| 84 |
-
downloading_path.rename path
|
| 85 |
-
end
|
| 86 |
|
| 87 |
-
|
| 88 |
-
|
| 89 |
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
|
| 95 |
-
|
| 96 |
|
| 97 |
-
|
| 98 |
-
|
| 99 |
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
|
| 110 |
-
|
|
|
|
|
|
|
| 111 |
end
|
| 112 |
-
@prev = now
|
| 113 |
-
end
|
| 114 |
|
| 115 |
-
|
| 116 |
-
|
| 117 |
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
|
|
|
| 121 |
end
|
| 122 |
-
end
|
| 123 |
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
end
|
| 163 |
end
|
|
|
|
|
|
|
| 1 |
require "uri"
|
| 2 |
require "net/http"
|
| 3 |
require "time"
|
| 4 |
require "pathname"
|
| 5 |
require "io/console/size"
|
| 6 |
|
| 7 |
+
module Whisper
|
| 8 |
+
class Model
|
| 9 |
+
class URI
|
| 10 |
+
def initialize(uri)
|
| 11 |
+
@uri = URI(uri)
|
| 12 |
+
end
|
| 13 |
|
| 14 |
+
def to_path
|
| 15 |
+
cache
|
| 16 |
+
cache_path.to_path
|
| 17 |
+
end
|
| 18 |
|
| 19 |
+
def clear_cache
|
| 20 |
+
path = cache_path
|
| 21 |
+
path.delete if path.exist?
|
| 22 |
+
end
|
| 23 |
|
| 24 |
+
private
|
| 25 |
|
| 26 |
+
def cache_path
|
| 27 |
+
base_cache_dir/@uri.host/@uri.path[1..]
|
| 28 |
+
end
|
| 29 |
|
| 30 |
+
def base_cache_dir
|
| 31 |
+
base = case RUBY_PLATFORM
|
| 32 |
+
when /mswin|mingw/
|
| 33 |
+
ENV.key?("LOCALAPPDATA") ? Pathname(ENV["LOCALAPPDATA"]) : Pathname(Dir.home)/"AppData/Local"
|
| 34 |
+
when /darwin/
|
| 35 |
+
Pathname(Dir.home)/"Library/Caches"
|
| 36 |
+
else
|
| 37 |
+
ENV.key?("XDG_CACHE_HOME") ? ENV["XDG_CACHE_HOME"] : Pathname(Dir.home)/".cache"
|
| 38 |
+
end
|
| 39 |
+
base/"whisper.cpp"
|
| 40 |
+
end
|
| 41 |
|
| 42 |
+
def cache
|
| 43 |
+
path = cache_path
|
| 44 |
+
headers = {}
|
| 45 |
+
headers["if-modified-since"] = path.mtime.httpdate if path.exist?
|
| 46 |
+
request @uri, headers
|
| 47 |
+
path
|
| 48 |
+
end
|
| 49 |
|
| 50 |
+
def request(uri, headers)
|
| 51 |
+
Net::HTTP.start uri.host, uri.port, use_ssl: uri.scheme == "https" do |http|
|
| 52 |
+
request = Net::HTTP::Get.new(uri, headers)
|
| 53 |
+
http.request request do |response|
|
| 54 |
+
case response
|
| 55 |
+
when Net::HTTPNotModified
|
| 56 |
# noop
|
| 57 |
+
when Net::HTTPOK
|
| 58 |
+
download response
|
| 59 |
+
when Net::HTTPRedirection
|
| 60 |
+
request URI(response["location"]), headers
|
| 61 |
+
else
|
| 62 |
+
return if headers.key?("if-modified-since") # Use cache file
|
| 63 |
+
|
| 64 |
+
raise "#{response.code} #{response.message}\n#{response.body}"
|
| 65 |
+
end
|
| 66 |
end
|
| 67 |
end
|
| 68 |
end
|
|
|
|
| 69 |
|
| 70 |
+
def download(response)
|
| 71 |
+
path = cache_path
|
| 72 |
+
path.dirname.mkpath unless path.dirname.exist?
|
| 73 |
+
downloading_path = Pathname("#{path}.downloading")
|
| 74 |
+
size = response.content_length
|
| 75 |
+
downloading_path.open "wb" do |file|
|
| 76 |
+
downloaded = 0
|
| 77 |
+
response.read_body do |chunk|
|
| 78 |
+
file << chunk
|
| 79 |
+
downloaded += chunk.bytesize
|
| 80 |
+
show_progress downloaded, size
|
| 81 |
+
end
|
| 82 |
+
$stderr.puts
|
| 83 |
end
|
| 84 |
+
downloading_path.rename path
|
| 85 |
end
|
|
|
|
|
|
|
| 86 |
|
| 87 |
+
def show_progress(current, size)
|
| 88 |
+
progress_rate_available = size && $stderr.tty?
|
| 89 |
|
| 90 |
+
unless @prev
|
| 91 |
+
@prev = Time.now
|
| 92 |
+
$stderr.puts "Downloading #{@uri} to #{cache_path}"
|
| 93 |
+
end
|
| 94 |
|
| 95 |
+
now = Time.now
|
| 96 |
|
| 97 |
+
if progress_rate_available
|
| 98 |
+
return if now - @prev < 1 && current < size
|
| 99 |
|
| 100 |
+
progress_width = 20
|
| 101 |
+
progress = current.to_f / size
|
| 102 |
+
arrow_length = progress * progress_width
|
| 103 |
+
arrow = "=" * (arrow_length - 1) + ">" + " " * (progress_width - arrow_length)
|
| 104 |
+
line = "[#{arrow}] (#{format_bytesize(current)} / #{format_bytesize(size)})"
|
| 105 |
+
padding = ' ' * ($stderr.winsize[1] - line.size)
|
| 106 |
+
$stderr.print "\r#{line}#{padding}"
|
| 107 |
+
else
|
| 108 |
+
return if now - @prev < 1
|
| 109 |
|
| 110 |
+
$stderr.print "."
|
| 111 |
+
end
|
| 112 |
+
@prev = now
|
| 113 |
end
|
|
|
|
|
|
|
| 114 |
|
| 115 |
+
def format_bytesize(bytesize)
|
| 116 |
+
return "0.0 B" if bytesize.zero?
|
| 117 |
|
| 118 |
+
units = %w[B KiB MiB GiB TiB]
|
| 119 |
+
exp = (Math.log(bytesize) / Math.log(1024)).to_i
|
| 120 |
+
format("%.1f %s", bytesize.to_f / 1024 ** exp, units[exp])
|
| 121 |
+
end
|
| 122 |
end
|
|
|
|
| 123 |
|
| 124 |
+
@pre_converted_models = %w[
|
| 125 |
+
tiny
|
| 126 |
+
tiny.en
|
| 127 |
+
tiny-q5_1
|
| 128 |
+
tiny.en-q5_1
|
| 129 |
+
tiny-q8_0
|
| 130 |
+
base
|
| 131 |
+
base.en
|
| 132 |
+
base-q5_1
|
| 133 |
+
base.en-q5_1
|
| 134 |
+
base-q8_0
|
| 135 |
+
small
|
| 136 |
+
small.en
|
| 137 |
+
small.en-tdrz
|
| 138 |
+
small-q5_1
|
| 139 |
+
small.en-q5_1
|
| 140 |
+
small-q8_0
|
| 141 |
+
medium
|
| 142 |
+
medium.en
|
| 143 |
+
medium-q5_0
|
| 144 |
+
medium.en-q5_0
|
| 145 |
+
medium-q8_0
|
| 146 |
+
large-v1
|
| 147 |
+
large-v2
|
| 148 |
+
large-v2-q5_0
|
| 149 |
+
large-v2-q8_0
|
| 150 |
+
large-v3
|
| 151 |
+
large-v3-q5_0
|
| 152 |
+
large-v3-turbo
|
| 153 |
+
large-v3-turbo-q5_0
|
| 154 |
+
large-v3-turbo-q8_0
|
| 155 |
+
].each_with_object({}) {|name, models|
|
| 156 |
+
models[name] = URI.new("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-#{name}.bin")
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
class << self
|
| 160 |
+
attr_reader :pre_converted_models
|
| 161 |
+
end
|
| 162 |
end
|
| 163 |
end
|
bindings/ruby/sig/whisper.rbs
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
module Whisper
|
| 2 |
+
interface _Samples
|
| 3 |
+
def length: () -> Integer
|
| 4 |
+
def each: { (Float) -> void } -> void
|
| 5 |
+
end
|
| 6 |
+
|
| 7 |
+
type log_callback = ^(Integer level, String message, Object user_data) -> void
|
| 8 |
+
type new_segment_callback = ^(Whisper::Context, void, Integer n_new, Object user_data) -> void
|
| 9 |
+
type progress_callback = ^(Whisper::Context, void, Integer progress, Object user_data) -> void
|
| 10 |
+
type abort_callback = ^(Whisper::Context, void, Object user_data) -> boolish
|
| 11 |
+
|
| 12 |
+
LOG_LEVEL_NONE: Integer
|
| 13 |
+
LOG_LEVEL_INFO: Integer
|
| 14 |
+
LOG_LEVEL_WARN: Integer
|
| 15 |
+
LOG_LEVEL_ERROR: Integer
|
| 16 |
+
LOG_LEVEL_DEBUG: Integer
|
| 17 |
+
LOG_LEVEL_CONT: Integer
|
| 18 |
+
|
| 19 |
+
def self.lang_max_id: () -> Integer
|
| 20 |
+
def self.lang_id: (string name) -> Integer
|
| 21 |
+
def self.lang_str: (Integer id) -> String
|
| 22 |
+
def self.lang_str_full: (Integer id) -> String
|
| 23 |
+
def self.log_set=: (log_callback) -> log_callback
|
| 24 |
+
def self.finalize_log_callback: (void) -> void # Second argument of ObjectSpace.define_finalizer
|
| 25 |
+
|
| 26 |
+
class Context
|
| 27 |
+
def initialize: (string | _ToPath | ::URI::HTTP ) -> void
|
| 28 |
+
def transcribe: (string, Params) -> void
|
| 29 |
+
| (string, Params) { (String) -> void } -> void
|
| 30 |
+
def model_n_vocab: () -> Integer
|
| 31 |
+
def model_n_audio_ctx: () -> Integer
|
| 32 |
+
def model_n_audio_state: () -> Integer
|
| 33 |
+
def model_n_text_head: () -> Integer
|
| 34 |
+
def model_n_text_layer: () -> Integer
|
| 35 |
+
def model_n_mels: () -> Integer
|
| 36 |
+
def model_ftype: () -> Integer
|
| 37 |
+
def model_type: () -> String
|
| 38 |
+
def full_n_segments: () -> Integer
|
| 39 |
+
def full_lang_id: () -> Integer
|
| 40 |
+
def full_get_segment_t0: (Integer) -> Integer
|
| 41 |
+
def full_get_segment_t1: (Integer) -> Integer
|
| 42 |
+
def full_get_segment_speaker_turn_next: (Integer) -> (true | false)
|
| 43 |
+
def full_get_segment_text: (Integer) -> String
|
| 44 |
+
def full_get_segment_no_speech_prob: (Integer) -> Float
|
| 45 |
+
def full: (Params, Array[Float], ?Integer) -> void
|
| 46 |
+
| (Params, _Samples, ?Integer) -> void
|
| 47 |
+
def full_parallel: (Params, Array[Float], ?Integer) -> void
|
| 48 |
+
| (Params, _Samples, ?Integer) -> void
|
| 49 |
+
| (Params, _Samples, ?Integer?, Integer) -> void
|
| 50 |
+
def each_segment: { (Segment) -> void } -> void
|
| 51 |
+
| () -> Enumerator[Segment]
|
| 52 |
+
def model: () -> Model
|
| 53 |
+
end
|
| 54 |
+
|
| 55 |
+
class Params
|
| 56 |
+
def initialize: () -> void
|
| 57 |
+
def language=: (String) -> String # TODO: Enumerate lang names
|
| 58 |
+
def language: () -> String
|
| 59 |
+
def translate=: (boolish) -> boolish
|
| 60 |
+
def translate: () -> (true | false)
|
| 61 |
+
def no_context=: (boolish) -> boolish
|
| 62 |
+
def no_context: () -> (true | false)
|
| 63 |
+
def single_segment=: (boolish) -> boolish
|
| 64 |
+
def single_segment: () -> (true | false)
|
| 65 |
+
def print_special=: (boolish) -> boolish
|
| 66 |
+
def print_special: () -> (true | false)
|
| 67 |
+
def print_progress=: (boolish) -> boolish
|
| 68 |
+
def print_progress: () -> (true | false)
|
| 69 |
+
def print_realtime=: (boolish) -> boolish
|
| 70 |
+
def print_realtime: () -> (true | false)
|
| 71 |
+
def print_timestamps=: (boolish) -> boolish
|
| 72 |
+
def print_timestamps: () -> (true | false)
|
| 73 |
+
def suppress_blank=: (boolish) -> boolish
|
| 74 |
+
def suppress_blank: () -> (true | false)
|
| 75 |
+
def suppress_nst=: (boolish) -> boolish
|
| 76 |
+
def suppress_nst: () -> (true | false)
|
| 77 |
+
def token_timestamps=: (boolish) -> boolish
|
| 78 |
+
def token_timestamps: () -> (true | false)
|
| 79 |
+
def split_on_word=: (boolish) -> boolish
|
| 80 |
+
def split_on_word: () -> (true | false)
|
| 81 |
+
def initial_prompt=: (_ToS) -> _ToS
|
| 82 |
+
def initial_prompt: () -> String
|
| 83 |
+
def diarize=: (boolish) -> boolish
|
| 84 |
+
def diarize: () -> (true | false)
|
| 85 |
+
def offset=: (Integer) -> Integer
|
| 86 |
+
def offset: () -> Integer
|
| 87 |
+
def duration=: (Integer) -> Integer
|
| 88 |
+
def duration: () -> Integer
|
| 89 |
+
def max_text_tokens=: (Integer) -> Integer
|
| 90 |
+
def max_text_tokens: () -> Integer
|
| 91 |
+
def temperature=: (Float) -> Float
|
| 92 |
+
def temperature: () -> Float
|
| 93 |
+
def max_initial_ts=: (Float) -> Float
|
| 94 |
+
def max_initial_ts: () -> Float
|
| 95 |
+
def length_penalty=: (Float) -> Float
|
| 96 |
+
def length_penalty: () -> Float
|
| 97 |
+
def temperature_inc=: (Float) -> Float
|
| 98 |
+
def temperature_inc: () -> Float
|
| 99 |
+
def entropy_thold=: (Float) -> Float
|
| 100 |
+
def entropy_thold: () -> Float
|
| 101 |
+
def logprob_thold=: (Float) -> Float
|
| 102 |
+
def logprob_thold: () -> Float
|
| 103 |
+
def no_speech_thold=: (Float) -> Float
|
| 104 |
+
def no_speech_thold: () -> Float
|
| 105 |
+
def new_segment_callback=: (new_segment_callback) -> new_segment_callback
|
| 106 |
+
def new_segment_callback_user_data=: (Object) -> Object
|
| 107 |
+
def progress_callback=: (progress_callback) -> progress_callback
|
| 108 |
+
def progress_callback_user_data=: (Object) -> Object
|
| 109 |
+
def abort_callback=: (abort_callback) -> abort_callback
|
| 110 |
+
def abort_callback_user_data=: (Object) -> Object
|
| 111 |
+
def on_new_segment: { (Segment) -> void } -> void
|
| 112 |
+
def on_progress: { (Integer) -> void } -> void
|
| 113 |
+
def abort_on: { (Object) -> boolish } -> void
|
| 114 |
+
end
|
| 115 |
+
|
| 116 |
+
class Model
|
| 117 |
+
def self.pre_converted_models: () -> Hash[String, Model::URI]
|
| 118 |
+
def initialize: () -> void
|
| 119 |
+
def n_vocab: () -> Integer
|
| 120 |
+
def n_audio_ctx: () -> Integer
|
| 121 |
+
def n_audio_state: () -> Integer
|
| 122 |
+
def n_audio_head: () -> Integer
|
| 123 |
+
def n_audio_layer: () -> Integer
|
| 124 |
+
def n_text_ctx: () -> Integer
|
| 125 |
+
def n_text_state: () -> Integer
|
| 126 |
+
def n_text_head: () -> Integer
|
| 127 |
+
def n_text_layer: () -> Integer
|
| 128 |
+
def n_mels: () -> Integer
|
| 129 |
+
def ftype: () -> Integer
|
| 130 |
+
def type: () -> String
|
| 131 |
+
|
| 132 |
+
class URI
|
| 133 |
+
def initialize: (string | ::URI::HTTP) -> void
|
| 134 |
+
def to_path: -> String
|
| 135 |
+
def clear_cache: -> void
|
| 136 |
+
end
|
| 137 |
+
end
|
| 138 |
+
|
| 139 |
+
class Segment
|
| 140 |
+
def initialize: () -> void
|
| 141 |
+
def start_time: () -> Integer
|
| 142 |
+
def end_time: () -> Integer
|
| 143 |
+
def speaker_next_turn?: () -> (true | false)
|
| 144 |
+
def text: () -> String
|
| 145 |
+
def no_speech_prob: () -> Float
|
| 146 |
+
end
|
| 147 |
+
|
| 148 |
+
class Error < StandardError
|
| 149 |
+
attr_reader code: Integer
|
| 150 |
+
|
| 151 |
+
def initialize: (Integer) -> void
|
| 152 |
+
end
|
| 153 |
+
end
|
bindings/ruby/tests/test_model.rb
CHANGED
|
@@ -68,4 +68,42 @@ class TestModel < TestBase
|
|
| 68 |
assert_path_exist path
|
| 69 |
assert_equal 147964211, File.size(path)
|
| 70 |
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
end
|
|
|
|
| 68 |
assert_path_exist path
|
| 69 |
assert_equal 147964211, File.size(path)
|
| 70 |
end
|
| 71 |
+
|
| 72 |
+
def test_uri_string
|
| 73 |
+
path = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin"
|
| 74 |
+
whisper = Whisper::Context.new(path)
|
| 75 |
+
model = whisper.model
|
| 76 |
+
|
| 77 |
+
assert_equal 51864, model.n_vocab
|
| 78 |
+
assert_equal 1500, model.n_audio_ctx
|
| 79 |
+
assert_equal 512, model.n_audio_state
|
| 80 |
+
assert_equal 8, model.n_audio_head
|
| 81 |
+
assert_equal 6, model.n_audio_layer
|
| 82 |
+
assert_equal 448, model.n_text_ctx
|
| 83 |
+
assert_equal 512, model.n_text_state
|
| 84 |
+
assert_equal 8, model.n_text_head
|
| 85 |
+
assert_equal 6, model.n_text_layer
|
| 86 |
+
assert_equal 80, model.n_mels
|
| 87 |
+
assert_equal 1, model.ftype
|
| 88 |
+
assert_equal "base", model.type
|
| 89 |
+
end
|
| 90 |
+
|
| 91 |
+
def test_uri
|
| 92 |
+
path = URI("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin")
|
| 93 |
+
whisper = Whisper::Context.new(path)
|
| 94 |
+
model = whisper.model
|
| 95 |
+
|
| 96 |
+
assert_equal 51864, model.n_vocab
|
| 97 |
+
assert_equal 1500, model.n_audio_ctx
|
| 98 |
+
assert_equal 512, model.n_audio_state
|
| 99 |
+
assert_equal 8, model.n_audio_head
|
| 100 |
+
assert_equal 6, model.n_audio_layer
|
| 101 |
+
assert_equal 448, model.n_text_ctx
|
| 102 |
+
assert_equal 512, model.n_text_state
|
| 103 |
+
assert_equal 8, model.n_text_head
|
| 104 |
+
assert_equal 6, model.n_text_layer
|
| 105 |
+
assert_equal 80, model.n_mels
|
| 106 |
+
assert_equal 1, model.ftype
|
| 107 |
+
assert_equal "base", model.type
|
| 108 |
+
end
|
| 109 |
end
|