Spaces:
Running
Running
| extern "C" { | |
| VALUE mWhisper; | |
| VALUE cContext; | |
| VALUE cParams; | |
| VALUE eError; | |
| static ID id_to_s; | |
| static ID id_call; | |
| static ID id___method__; | |
| static ID id_to_enum; | |
| static ID id_length; | |
| static ID id_next; | |
| static ID id_new; | |
| static bool is_log_callback_finalized = false; | |
| /* | |
| * call-seq: | |
| * lang_max_id -> Integer | |
| */ | |
| static VALUE ruby_whisper_s_lang_max_id(VALUE self) { | |
| return INT2NUM(whisper_lang_max_id()); | |
| } | |
| /* | |
| * call-seq: | |
| * lang_id(lang_name) -> Integer | |
| */ | |
| static VALUE ruby_whisper_s_lang_id(VALUE self, VALUE lang) { | |
| const char * lang_str = StringValueCStr(lang); | |
| const int id = whisper_lang_id(lang_str); | |
| if (-1 == id) { | |
| rb_raise(rb_eArgError, "language not found: %s", lang_str); | |
| } | |
| return INT2NUM(id); | |
| } | |
| /* | |
| * call-seq: | |
| * lang_str(lang_id) -> String | |
| */ | |
| static VALUE ruby_whisper_s_lang_str(VALUE self, VALUE id) { | |
| const int lang_id = NUM2INT(id); | |
| const char * str = whisper_lang_str(lang_id); | |
| if (nullptr == str) { | |
| rb_raise(rb_eIndexError, "id %d outside of language id", lang_id); | |
| } | |
| return rb_str_new2(str); | |
| } | |
| /* | |
| * call-seq: | |
| * lang_str(lang_id) -> String | |
| */ | |
| static VALUE ruby_whisper_s_lang_str_full(VALUE self, VALUE id) { | |
| const int lang_id = NUM2INT(id); | |
| const char * str_full = whisper_lang_str_full(lang_id); | |
| if (nullptr == str_full) { | |
| rb_raise(rb_eIndexError, "id %d outside of language id", lang_id); | |
| } | |
| return rb_str_new2(str_full); | |
| } | |
| static VALUE ruby_whisper_s_finalize_log_callback(VALUE self, VALUE id) { | |
| is_log_callback_finalized = true; | |
| return Qnil; | |
| } | |
| /* | |
| * call-seq: | |
| * log_set ->(level, buffer, user_data) { ... }, user_data -> nil | |
| */ | |
| static VALUE ruby_whisper_s_log_set(VALUE self, VALUE log_callback, VALUE user_data) { | |
| VALUE old_callback = rb_iv_get(self, "log_callback"); | |
| if (!NIL_P(old_callback)) { | |
| rb_undefine_finalizer(old_callback); | |
| } | |
| rb_iv_set(self, "log_callback", log_callback); | |
| rb_iv_set(self, "user_data", user_data); | |
| VALUE finalize_log_callback = rb_funcall(mWhisper, rb_intern("method"), 1, rb_str_new2("finalize_log_callback")); | |
| rb_define_finalizer(log_callback, finalize_log_callback); | |
| whisper_log_set([](ggml_log_level level, const char * buffer, void * user_data) { | |
| if (is_log_callback_finalized) { | |
| return; | |
| } | |
| VALUE log_callback = rb_iv_get(mWhisper, "log_callback"); | |
| VALUE udata = rb_iv_get(mWhisper, "user_data"); | |
| rb_funcall(log_callback, id_call, 3, INT2NUM(level), rb_str_new2(buffer), udata); | |
| }, nullptr); | |
| return Qnil; | |
| } | |
| static void ruby_whisper_free(ruby_whisper *rw) { | |
| if (rw->context) { | |
| whisper_free(rw->context); | |
| rw->context = NULL; | |
| } | |
| } | |
| static void ruby_whisper_params_free(ruby_whisper_params *rwp) { | |
| } | |
| void rb_whisper_mark(ruby_whisper *rw) { | |
| // call rb_gc_mark on any ruby references in rw | |
| } | |
| void rb_whisper_free(ruby_whisper *rw) { | |
| ruby_whisper_free(rw); | |
| free(rw); | |
| } | |
| void rb_whisper_callbcack_container_mark(ruby_whisper_callback_container *rwc) { | |
| rb_gc_mark(rwc->user_data); | |
| rb_gc_mark(rwc->callback); | |
| rb_gc_mark(rwc->callbacks); | |
| } | |
| void rb_whisper_params_mark(ruby_whisper_params *rwp) { | |
| rb_whisper_callbcack_container_mark(rwp->new_segment_callback_container); | |
| rb_whisper_callbcack_container_mark(rwp->progress_callback_container); | |
| rb_whisper_callbcack_container_mark(rwp->abort_callback_container); | |
| } | |
| void rb_whisper_params_free(ruby_whisper_params *rwp) { | |
| // How to free user_data and callback only when not referred to by others? | |
| ruby_whisper_params_free(rwp); | |
| free(rwp); | |
| } | |
| static VALUE ruby_whisper_allocate(VALUE klass) { | |
| ruby_whisper *rw; | |
| rw = ALLOC(ruby_whisper); | |
| rw->context = NULL; | |
| return Data_Wrap_Struct(klass, rb_whisper_mark, rb_whisper_free, rw); | |
| } | |
| static ruby_whisper_callback_container * rb_whisper_callback_container_allocate() { | |
| ruby_whisper_callback_container *container; | |
| container = ALLOC(ruby_whisper_callback_container); | |
| container->context = nullptr; | |
| container->user_data = Qnil; | |
| container->callback = Qnil; | |
| container->callbacks = rb_ary_new(); | |
| return container; | |
| } | |
| static VALUE ruby_whisper_params_allocate(VALUE klass) { | |
| ruby_whisper_params *rwp; | |
| rwp = ALLOC(ruby_whisper_params); | |
| rwp->params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY); | |
| rwp->new_segment_callback_container = rb_whisper_callback_container_allocate(); | |
| rwp->progress_callback_container = rb_whisper_callback_container_allocate(); | |
| rwp->abort_callback_container = rb_whisper_callback_container_allocate(); | |
| return Data_Wrap_Struct(klass, rb_whisper_params_mark, rb_whisper_params_free, rwp); | |
| } | |
| /* | |
| * call-seq: | |
| * new("path/to/model.bin") -> Whisper::Context | |
| */ | |
| static VALUE ruby_whisper_initialize(int argc, VALUE *argv, VALUE self) { | |
| ruby_whisper *rw; | |
| VALUE whisper_model_file_path; | |
| // TODO: we can support init from buffer here too maybe another ruby object to expose | |
| rb_scan_args(argc, argv, "01", &whisper_model_file_path); | |
| Data_Get_Struct(self, ruby_whisper, rw); | |
| if (!rb_respond_to(whisper_model_file_path, id_to_s)) { | |
| rb_raise(rb_eRuntimeError, "Expected file path to model to initialize Whisper::Context"); | |
| } | |
| rw->context = whisper_init_from_file_with_params(StringValueCStr(whisper_model_file_path), whisper_context_default_params()); | |
| if (rw->context == nullptr) { | |
| rb_raise(rb_eRuntimeError, "error: failed to initialize whisper context"); | |
| } | |
| return self; | |
| } | |
| // High level API | |
| static VALUE rb_whisper_segment_initialize(VALUE context, int index); | |
| /* | |
| * transcribe a single file | |
| * can emit to a block results | |
| * | |
| * params = Whisper::Params.new | |
| * params.duration = 60_000 | |
| * whisper.transcribe "path/to/audio.wav", params do |text| | |
| * puts text | |
| * end | |
| * | |
| * call-seq: | |
| * transcribe(path_to_audio, params) {|text| ...} | |
| **/ | |
| static VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) { | |
| ruby_whisper *rw; | |
| ruby_whisper_params *rwp; | |
| VALUE wave_file_path, blk, params; | |
| rb_scan_args(argc, argv, "02&", &wave_file_path, ¶ms, &blk); | |
| Data_Get_Struct(self, ruby_whisper, rw); | |
| Data_Get_Struct(params, ruby_whisper_params, rwp); | |
| if (!rb_respond_to(wave_file_path, id_to_s)) { | |
| rb_raise(rb_eRuntimeError, "Expected file path to wave file"); | |
| } | |
| std::string fname_inp = StringValueCStr(wave_file_path); | |
| std::vector<float> pcmf32; // mono-channel F32 PCM | |
| std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM | |
| // WAV input - this is directly from main.cpp example | |
| { | |
| drwav wav; | |
| std::vector<uint8_t> wav_data; // used for pipe input from stdin | |
| if (fname_inp == "-") { | |
| { | |
| uint8_t buf[1024]; | |
| while (true) { | |
| const size_t n = fread(buf, 1, sizeof(buf), stdin); | |
| if (n == 0) { | |
| break; | |
| } | |
| wav_data.insert(wav_data.end(), buf, buf + n); | |
| } | |
| } | |
| if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) { | |
| fprintf(stderr, "error: failed to open WAV file from stdin\n"); | |
| return self; | |
| } | |
| fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size()); | |
| } else if (drwav_init_file(&wav, fname_inp.c_str(), nullptr) == false) { | |
| fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str()); | |
| return self; | |
| } | |
| if (wav.channels != 1 && wav.channels != 2) { | |
| fprintf(stderr, "WAV file '%s' must be mono or stereo\n", fname_inp.c_str()); | |
| return self; | |
| } | |
| if (rwp->diarize && wav.channels != 2 && rwp->params.print_timestamps == false) { | |
| fprintf(stderr, "WAV file '%s' must be stereo for diarization and timestamps have to be enabled\n", fname_inp.c_str()); | |
| return self; | |
| } | |
| if (wav.sampleRate != WHISPER_SAMPLE_RATE) { | |
| fprintf(stderr, "WAV file '%s' must be %i kHz\n", fname_inp.c_str(), WHISPER_SAMPLE_RATE/1000); | |
| return self; | |
| } | |
| if (wav.bitsPerSample != 16) { | |
| fprintf(stderr, "WAV file '%s' must be 16-bit\n", fname_inp.c_str()); | |
| return self; | |
| } | |
| const uint64_t n = wav_data.empty() ? wav.totalPCMFrameCount : wav_data.size()/(wav.channels*wav.bitsPerSample/8); | |
| std::vector<int16_t> pcm16; | |
| pcm16.resize(n*wav.channels); | |
| drwav_read_pcm_frames_s16(&wav, n, pcm16.data()); | |
| drwav_uninit(&wav); | |
| // convert to mono, float | |
| pcmf32.resize(n); | |
| if (wav.channels == 1) { | |
| for (uint64_t i = 0; i < n; i++) { | |
| pcmf32[i] = float(pcm16[i])/32768.0f; | |
| } | |
| } else { | |
| for (uint64_t i = 0; i < n; i++) { | |
| pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f; | |
| } | |
| } | |
| if (rwp->diarize) { | |
| // convert to stereo, float | |
| pcmf32s.resize(2); | |
| pcmf32s[0].resize(n); | |
| pcmf32s[1].resize(n); | |
| for (uint64_t i = 0; i < n; i++) { | |
| pcmf32s[0][i] = float(pcm16[2*i])/32768.0f; | |
| pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f; | |
| } | |
| } | |
| } | |
| { | |
| static bool is_aborted = false; // NOTE: this should be atomic to avoid data race | |
| rwp->params.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) { | |
| bool is_aborted = *(bool*)user_data; | |
| return !is_aborted; | |
| }; | |
| rwp->params.encoder_begin_callback_user_data = &is_aborted; | |
| } | |
| if (!NIL_P(rwp->new_segment_callback_container->callback) || 0 != RARRAY_LEN(rwp->new_segment_callback_container->callbacks)) { | |
| rwp->params.new_segment_callback = [](struct whisper_context * ctx, struct whisper_state * state, int n_new, void * user_data) { | |
| const ruby_whisper_callback_container *container = (ruby_whisper_callback_container *)user_data; | |
| // Currently, doesn't support state because | |
| // those require to resolve GC-related problems. | |
| if (!NIL_P(container->callback)) { | |
| rb_funcall(container->callback, id_call, 4, *container->context, Qnil, INT2NUM(n_new), container->user_data); | |
| } | |
| const long callbacks_len = RARRAY_LEN(container->callbacks); | |
| if (0 == callbacks_len) { | |
| return; | |
| } | |
| const int n_segments = whisper_full_n_segments_from_state(state); | |
| for (int i = n_new; i > 0; i--) { | |
| int i_segment = n_segments - i; | |
| VALUE segment = rb_whisper_segment_initialize(*container->context, i_segment); | |
| for (int j = 0; j < callbacks_len; j++) { | |
| VALUE cb = rb_ary_entry(container->callbacks, j); | |
| rb_funcall(cb, id_call, 1, segment); | |
| } | |
| } | |
| }; | |
| rwp->new_segment_callback_container->context = &self; | |
| rwp->params.new_segment_callback_user_data = rwp->new_segment_callback_container; | |
| } | |
| if (!NIL_P(rwp->progress_callback_container->callback) || 0 != RARRAY_LEN(rwp->progress_callback_container->callbacks)) { | |
| rwp->params.progress_callback = [](struct whisper_context *ctx, struct whisper_state * /*state*/, int progress_cur, void *user_data) { | |
| const ruby_whisper_callback_container *container = (ruby_whisper_callback_container *)user_data; | |
| const VALUE progress = INT2NUM(progress_cur); | |
| // Currently, doesn't support state because | |
| // those require to resolve GC-related problems. | |
| if (!NIL_P(container->callback)) { | |
| rb_funcall(container->callback, id_call, 4, *container->context, Qnil, progress, container->user_data); | |
| } | |
| const long callbacks_len = RARRAY_LEN(container->callbacks); | |
| if (0 == callbacks_len) { | |
| return; | |
| } | |
| for (int j = 0; j < callbacks_len; j++) { | |
| VALUE cb = rb_ary_entry(container->callbacks, j); | |
| rb_funcall(cb, id_call, 1, progress); | |
| } | |
| }; | |
| rwp->progress_callback_container->context = &self; | |
| rwp->params.progress_callback_user_data = rwp->progress_callback_container; | |
| } | |
| if (!NIL_P(rwp->abort_callback_container->callback) || 0 != RARRAY_LEN(rwp->abort_callback_container->callbacks)) { | |
| rwp->params.abort_callback = [](void * user_data) { | |
| const ruby_whisper_callback_container *container = (ruby_whisper_callback_container *)user_data; | |
| if (!NIL_P(container->callback)) { | |
| VALUE result = rb_funcall(container->callback, id_call, 1, container->user_data); | |
| if (!NIL_P(result) && Qfalse != result) { | |
| return true; | |
| } | |
| } | |
| const long callbacks_len = RARRAY_LEN(container->callbacks); | |
| if (0 == callbacks_len) { | |
| return false; | |
| } | |
| for (int j = 0; j < callbacks_len; j++) { | |
| VALUE cb = rb_ary_entry(container->callbacks, j); | |
| VALUE result = rb_funcall(cb, id_call, 1, container->user_data); | |
| if (!NIL_P(result) && Qfalse != result) { | |
| return true; | |
| } | |
| } | |
| return false; | |
| }; | |
| rwp->abort_callback_container->context = &self; | |
| rwp->params.abort_callback_user_data = rwp->abort_callback_container; | |
| } | |
| if (whisper_full_parallel(rw->context, rwp->params, pcmf32.data(), pcmf32.size(), 1) != 0) { | |
| fprintf(stderr, "failed to process audio\n"); | |
| return self; | |
| } | |
| const int n_segments = whisper_full_n_segments(rw->context); | |
| VALUE output = rb_str_new2(""); | |
| for (int i = 0; i < n_segments; ++i) { | |
| const char * text = whisper_full_get_segment_text(rw->context, i); | |
| output = rb_str_concat(output, rb_str_new2(text)); | |
| } | |
| VALUE idCall = id_call; | |
| if (blk != Qnil) { | |
| rb_funcall(blk, idCall, 1, output); | |
| } | |
| return self; | |
| } | |
| /* | |
| * call-seq: | |
| * model_n_vocab -> Integer | |
| */ | |
| VALUE ruby_whisper_model_n_vocab(VALUE self) { | |
| ruby_whisper *rw; | |
| Data_Get_Struct(self, ruby_whisper, rw); | |
| return INT2NUM(whisper_model_n_vocab(rw->context)); | |
| } | |
| /* | |
| * call-seq: | |
| * model_n_audio_ctx -> Integer | |
| */ | |
| VALUE ruby_whisper_model_n_audio_ctx(VALUE self) { | |
| ruby_whisper *rw; | |
| Data_Get_Struct(self, ruby_whisper, rw); | |
| return INT2NUM(whisper_model_n_audio_ctx(rw->context)); | |
| } | |
| /* | |
| * call-seq: | |
| * model_n_audio_state -> Integer | |
| */ | |
| VALUE ruby_whisper_model_n_audio_state(VALUE self) { | |
| ruby_whisper *rw; | |
| Data_Get_Struct(self, ruby_whisper, rw); | |
| return INT2NUM(whisper_model_n_audio_state(rw->context)); | |
| } | |
| /* | |
| * call-seq: | |
| * model_n_audio_head -> Integer | |
| */ | |
| VALUE ruby_whisper_model_n_audio_head(VALUE self) { | |
| ruby_whisper *rw; | |
| Data_Get_Struct(self, ruby_whisper, rw); | |
| return INT2NUM(whisper_model_n_audio_head(rw->context)); | |
| } | |
| /* | |
| * call-seq: | |
| * model_n_audio_layer -> Integer | |
| */ | |
| VALUE ruby_whisper_model_n_audio_layer(VALUE self) { | |
| ruby_whisper *rw; | |
| Data_Get_Struct(self, ruby_whisper, rw); | |
| return INT2NUM(whisper_model_n_audio_layer(rw->context)); | |
| } | |
| /* | |
| * call-seq: | |
| * model_n_text_ctx -> Integer | |
| */ | |
| VALUE ruby_whisper_model_n_text_ctx(VALUE self) { | |
| ruby_whisper *rw; | |
| Data_Get_Struct(self, ruby_whisper, rw); | |
| return INT2NUM(whisper_model_n_text_ctx(rw->context)); | |
| } | |
| /* | |
| * call-seq: | |
| * model_n_text_state -> Integer | |
| */ | |
| VALUE ruby_whisper_model_n_text_state(VALUE self) { | |
| ruby_whisper *rw; | |
| Data_Get_Struct(self, ruby_whisper, rw); | |
| return INT2NUM(whisper_model_n_text_state(rw->context)); | |
| } | |
| /* | |
| * call-seq: | |
| * model_n_text_head -> Integer | |
| */ | |
| VALUE ruby_whisper_model_n_text_head(VALUE self) { | |
| ruby_whisper *rw; | |
| Data_Get_Struct(self, ruby_whisper, rw); | |
| return INT2NUM(whisper_model_n_text_head(rw->context)); | |
| } | |
| /* | |
| * call-seq: | |
| * model_n_text_layer -> Integer | |
| */ | |
| VALUE ruby_whisper_model_n_text_layer(VALUE self) { | |
| ruby_whisper *rw; | |
| Data_Get_Struct(self, ruby_whisper, rw); | |
| return INT2NUM(whisper_model_n_text_layer(rw->context)); | |
| } | |
| /* | |
| * call-seq: | |
| * model_n_mels -> Integer | |
| */ | |
| VALUE ruby_whisper_model_n_mels(VALUE self) { | |
| ruby_whisper *rw; | |
| Data_Get_Struct(self, ruby_whisper, rw); | |
| return INT2NUM(whisper_model_n_mels(rw->context)); | |
| } | |
| /* | |
| * call-seq: | |
| * model_ftype -> Integer | |
| */ | |
| VALUE ruby_whisper_model_ftype(VALUE self) { | |
| ruby_whisper *rw; | |
| Data_Get_Struct(self, ruby_whisper, rw); | |
| return INT2NUM(whisper_model_ftype(rw->context)); | |
| } | |
| /* | |
| * call-seq: | |
| * model_type -> String | |
| */ | |
| VALUE ruby_whisper_model_type(VALUE self) { | |
| ruby_whisper *rw; | |
| Data_Get_Struct(self, ruby_whisper, rw); | |
| return rb_str_new2(whisper_model_type_readable(rw->context)); | |
| } | |
| /* | |
| * Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text | |
| * Not thread safe for same context | |
| * Uses the specified decoding strategy to obtain the text. | |
| * | |
| * call-seq: | |
| * full(params, samples, n_samples) -> nil | |
| * full(params, samples) -> nil | |
| * | |
| * The second argument +samples+ must be an array of samples, respond to :length, or be a MemoryView of an array of float. It must be 32 bit float PCM audio data. | |
| */ | |
| VALUE ruby_whisper_full(int argc, VALUE *argv, VALUE self) { | |
| if (argc < 2 || argc > 3) { | |
| rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 2..3)", argc); | |
| } | |
| ruby_whisper *rw; | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper, rw); | |
| VALUE params = argv[0]; | |
| Data_Get_Struct(params, ruby_whisper_params, rwp); | |
| VALUE samples = argv[1]; | |
| int n_samples; | |
| rb_memory_view_t view; | |
| const bool memory_view_available_p = rb_memory_view_available_p(samples); | |
| if (argc == 3) { | |
| n_samples = NUM2INT(argv[2]); | |
| if (TYPE(samples) == T_ARRAY) { | |
| if (RARRAY_LEN(samples) < n_samples) { | |
| rb_raise(rb_eArgError, "samples length %ld is less than n_samples %d", RARRAY_LEN(samples), n_samples); | |
| } | |
| } | |
| // Should check when samples.respond_to?(:length)? | |
| } else { | |
| if (TYPE(samples) == T_ARRAY) { | |
| n_samples = RARRAY_LEN(samples); | |
| } else if (memory_view_available_p) { | |
| if (!rb_memory_view_get(samples, &view, RUBY_MEMORY_VIEW_SIMPLE)) { | |
| view.obj = Qnil; | |
| rb_raise(rb_eArgError, "unable to get a memory view"); | |
| } | |
| n_samples = view.byte_size / view.item_size; | |
| } else if (rb_respond_to(samples, id_length)) { | |
| n_samples = NUM2INT(rb_funcall(samples, id_length, 0)); | |
| } else { | |
| rb_raise(rb_eArgError, "samples must respond to :length or be a MemoryView of an array of flaot when n_samples is not given"); | |
| } | |
| } | |
| float * c_samples = (float *)malloc(n_samples * sizeof(float)); | |
| if (memory_view_available_p) { | |
| c_samples = (float *)view.data; | |
| } else { | |
| if (TYPE(samples) == T_ARRAY) { | |
| for (int i = 0; i < n_samples; i++) { | |
| c_samples[i] = RFLOAT_VALUE(rb_ary_entry(samples, i)); | |
| } | |
| } else { | |
| // TODO: use rb_block_call | |
| VALUE iter = rb_funcall(samples, id_to_enum, 1, rb_str_new2("each")); | |
| for (int i = 0; i < n_samples; i++) { | |
| // TODO: check if iter is exhausted and raise ArgumentError appropriately | |
| VALUE sample = rb_funcall(iter, id_next, 0); | |
| c_samples[i] = RFLOAT_VALUE(sample); | |
| } | |
| } | |
| } | |
| const int result = whisper_full(rw->context, rwp->params, c_samples, n_samples); | |
| if (0 == result) { | |
| return Qnil; | |
| } else { | |
| rb_exc_raise(rb_funcall(eError, id_new, 1, result)); | |
| } | |
| } | |
| /* | |
| * Split the input audio in chunks and process each chunk separately using whisper_full_with_state() | |
| * Result is stored in the default state of the context | |
| * Not thread safe if executed in parallel on the same context. | |
| * It seems this approach can offer some speedup in some cases. | |
| * However, the transcription accuracy can be worse at the beginning and end of each chunk. | |
| * | |
| * call-seq: | |
| * full_parallel(params, samples) -> nil | |
| * full_parallel(params, samples, n_samples) -> nil | |
| * full_parallel(params, samples, n_samples, n_processors) -> nil | |
| * full_parallel(params, samples, nil, n_processors) -> nil | |
| */ | |
| static VALUE ruby_whisper_full_parallel(int argc, VALUE *argv,VALUE self) { | |
| if (argc < 2 || argc > 4) { | |
| rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 2..3)", argc); | |
| } | |
| ruby_whisper *rw; | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper, rw); | |
| VALUE params = argv[0]; | |
| Data_Get_Struct(params, ruby_whisper_params, rwp); | |
| VALUE samples = argv[1]; | |
| int n_samples; | |
| int n_processors; | |
| rb_memory_view_t view; | |
| const bool memory_view_available_p = rb_memory_view_available_p(samples); | |
| switch (argc) { | |
| case 2: | |
| n_processors = 1; | |
| break; | |
| case 3: | |
| n_processors = 1; | |
| break; | |
| case 4: | |
| n_processors = NUM2INT(argv[3]); | |
| break; | |
| } | |
| if (argc >= 3 && !NIL_P(argv[2])) { | |
| n_samples = NUM2INT(argv[2]); | |
| if (TYPE(samples) == T_ARRAY) { | |
| if (RARRAY_LEN(samples) < n_samples) { | |
| rb_raise(rb_eArgError, "samples length %ld is less than n_samples %d", RARRAY_LEN(samples), n_samples); | |
| } | |
| } | |
| // Should check when samples.respond_to?(:length)? | |
| } else if (memory_view_available_p) { | |
| if (!rb_memory_view_get(samples, &view, RUBY_MEMORY_VIEW_SIMPLE)) { | |
| view.obj = Qnil; | |
| rb_raise(rb_eArgError, "unable to get a memory view"); | |
| } | |
| n_samples = view.byte_size / view.item_size; | |
| } else { | |
| if (TYPE(samples) == T_ARRAY) { | |
| n_samples = RARRAY_LEN(samples); | |
| } else if (rb_respond_to(samples, id_length)) { | |
| n_samples = NUM2INT(rb_funcall(samples, id_length, 0)); | |
| } else { | |
| rb_raise(rb_eArgError, "samples must respond to :length or be a MemoryView of an array of flaot when n_samples is not given"); | |
| } | |
| } | |
| float * c_samples = (float *)malloc(n_samples * sizeof(float)); | |
| if (memory_view_available_p) { | |
| c_samples = (float *)view.data; | |
| } else { | |
| if (TYPE(samples) == T_ARRAY) { | |
| for (int i = 0; i < n_samples; i++) { | |
| c_samples[i] = RFLOAT_VALUE(rb_ary_entry(samples, i)); | |
| } | |
| } else { | |
| // FIXME: use rb_block_call | |
| VALUE iter = rb_funcall(samples, id_to_enum, 1, rb_str_new2("each")); | |
| for (int i = 0; i < n_samples; i++) { | |
| // TODO: check if iter is exhausted and raise ArgumentError | |
| VALUE sample = rb_funcall(iter, id_next, 0); | |
| c_samples[i] = RFLOAT_VALUE(sample); | |
| } | |
| } | |
| } | |
| const int result = whisper_full_parallel(rw->context, rwp->params, c_samples, n_samples, n_processors); | |
| if (0 == result) { | |
| return Qnil; | |
| } else { | |
| rb_exc_raise(rb_funcall(eError, id_new, 1, result)); | |
| } | |
| } | |
| /* | |
| * Number of segments. | |
| * | |
| * call-seq: | |
| * full_n_segments -> Integer | |
| */ | |
| static VALUE ruby_whisper_full_n_segments(VALUE self) { | |
| ruby_whisper *rw; | |
| Data_Get_Struct(self, ruby_whisper, rw); | |
| return INT2NUM(whisper_full_n_segments(rw->context)); | |
| } | |
| /* | |
| * Language ID, which can be converted to string by Whisper.lang_str and Whisper.lang_str_full. | |
| * | |
| * call-seq: | |
| * full_lang_id -> Integer | |
| */ | |
| static VALUE ruby_whisper_full_lang_id(VALUE self) { | |
| ruby_whisper *rw; | |
| Data_Get_Struct(self, ruby_whisper, rw); | |
| return INT2NUM(whisper_full_lang_id(rw->context)); | |
| } | |
| static int ruby_whisper_full_check_segment_index(const ruby_whisper * rw, const VALUE i_segment) { | |
| const int c_i_segment = NUM2INT(i_segment); | |
| if (c_i_segment < 0 || c_i_segment >= whisper_full_n_segments(rw->context)) { | |
| rb_raise(rb_eIndexError, "segment index %d out of range", c_i_segment); | |
| } | |
| return c_i_segment; | |
| } | |
| /* | |
| * Start time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds). | |
| * | |
| * full_get_segment_t0(3) # => 1668 (16680 ms) | |
| * | |
| * call-seq: | |
| * full_get_segment_t0(segment_index) -> Integer | |
| */ | |
| static VALUE ruby_whisper_full_get_segment_t0(VALUE self, VALUE i_segment) { | |
| ruby_whisper *rw; | |
| Data_Get_Struct(self, ruby_whisper, rw); | |
| const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment); | |
| const int64_t t0 = whisper_full_get_segment_t0(rw->context, c_i_segment); | |
| return INT2NUM(t0); | |
| } | |
| /* | |
| * End time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds). | |
| * | |
| * full_get_segment_t1(3) # => 1668 (16680 ms) | |
| * | |
| * call-seq: | |
| * full_get_segment_t1(segment_index) -> Integer | |
| */ | |
| static VALUE ruby_whisper_full_get_segment_t1(VALUE self, VALUE i_segment) { | |
| ruby_whisper *rw; | |
| Data_Get_Struct(self, ruby_whisper, rw); | |
| const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment); | |
| const int64_t t1 = whisper_full_get_segment_t1(rw->context, c_i_segment); | |
| return INT2NUM(t1); | |
| } | |
| /* | |
| * Whether the next segment indexed by +segment_index+ is predicated as a speaker turn. | |
| * | |
| * full_get_segment_speacker_turn_next(3) # => true | |
| * | |
| * call-seq: | |
| * full_get_segment_speacker_turn_next(segment_index) -> bool | |
| */ | |
| static VALUE ruby_whisper_full_get_segment_speaker_turn_next(VALUE self, VALUE i_segment) { | |
| ruby_whisper *rw; | |
| Data_Get_Struct(self, ruby_whisper, rw); | |
| const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment); | |
| const bool speaker_turn_next = whisper_full_get_segment_speaker_turn_next(rw->context, c_i_segment); | |
| return speaker_turn_next ? Qtrue : Qfalse; | |
| } | |
| /* | |
| * Text of a segment indexed by +segment_index+. | |
| * | |
| * full_get_segment_text(3) # => "ask not what your country can do for you, ..." | |
| * | |
| * call-seq: | |
| * full_get_segment_text(segment_index) -> String | |
| */ | |
| static VALUE ruby_whisper_full_get_segment_text(VALUE self, VALUE i_segment) { | |
| ruby_whisper *rw; | |
| Data_Get_Struct(self, ruby_whisper, rw); | |
| const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment); | |
| const char * text = whisper_full_get_segment_text(rw->context, c_i_segment); | |
| return rb_str_new2(text); | |
| } | |
| /* | |
| * params.language = "auto" | "en", etc... | |
| * | |
| * call-seq: | |
| * language = lang_name -> lang_name | |
| */ | |
| static VALUE ruby_whisper_params_set_language(VALUE self, VALUE value) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| if (value == Qfalse || value == Qnil) { | |
| rwp->params.language = "auto"; | |
| } else { | |
| rwp->params.language = StringValueCStr(value); | |
| } | |
| return value; | |
| } | |
| /* | |
| * call-seq: | |
| * language -> String | |
| */ | |
| static VALUE ruby_whisper_params_get_language(VALUE self) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| if (rwp->params.language) { | |
| return rb_str_new2(rwp->params.language); | |
| } else { | |
| return rb_str_new2("auto"); | |
| } | |
| } | |
| /* | |
| * call-seq: | |
| * translate = do_translate -> do_translate | |
| */ | |
| static VALUE ruby_whisper_params_set_translate(VALUE self, VALUE value) { | |
| BOOL_PARAMS_SETTER(self, translate, value) | |
| } | |
| /* | |
| * call-seq: | |
| * translate -> bool | |
| */ | |
| static VALUE ruby_whisper_params_get_translate(VALUE self) { | |
| BOOL_PARAMS_GETTER(self, translate) | |
| } | |
| /* | |
| * call-seq: | |
| * no_context = dont_use_context -> dont_use_context | |
| */ | |
| static VALUE ruby_whisper_params_set_no_context(VALUE self, VALUE value) { | |
| BOOL_PARAMS_SETTER(self, no_context, value) | |
| } | |
| /* | |
| * If true, does not use past transcription (if any) as initial prompt for the decoder. | |
| * | |
| * call-seq: | |
| * no_context -> bool | |
| */ | |
| static VALUE ruby_whisper_params_get_no_context(VALUE self) { | |
| BOOL_PARAMS_GETTER(self, no_context) | |
| } | |
| /* | |
| * call-seq: | |
| * single_segment = force_single -> force_single | |
| */ | |
| static VALUE ruby_whisper_params_set_single_segment(VALUE self, VALUE value) { | |
| BOOL_PARAMS_SETTER(self, single_segment, value) | |
| } | |
| /* | |
| * If true, forces single segment output (useful for streaming). | |
| * | |
| * call-seq: | |
| * single_segment -> bool | |
| */ | |
| static VALUE ruby_whisper_params_get_single_segment(VALUE self) { | |
| BOOL_PARAMS_GETTER(self, single_segment) | |
| } | |
| /* | |
| * call-seq: | |
| * print_special = force_print -> force_print | |
| */ | |
| static VALUE ruby_whisper_params_set_print_special(VALUE self, VALUE value) { | |
| BOOL_PARAMS_SETTER(self, print_special, value) | |
| } | |
| /* | |
| * If true, prints special tokens (e.g. <SOT>, <EOT>, <BEG>, etc.). | |
| * | |
| * call-seq: | |
| * print_special -> bool | |
| */ | |
| static VALUE ruby_whisper_params_get_print_special(VALUE self) { | |
| BOOL_PARAMS_GETTER(self, print_special) | |
| } | |
| /* | |
| * call-seq: | |
| * print_progress = force_print -> force_print | |
| */ | |
| static VALUE ruby_whisper_params_set_print_progress(VALUE self, VALUE value) { | |
| BOOL_PARAMS_SETTER(self, print_progress, value) | |
| } | |
| /* | |
| * If true, prints progress information. | |
| * | |
| * call-seq: | |
| * print_progress -> bool | |
| */ | |
| static VALUE ruby_whisper_params_get_print_progress(VALUE self) { | |
| BOOL_PARAMS_GETTER(self, print_progress) | |
| } | |
| /* | |
| * call-seq: | |
| * print_realtime = force_print -> force_print | |
| */ | |
| static VALUE ruby_whisper_params_set_print_realtime(VALUE self, VALUE value) { | |
| BOOL_PARAMS_SETTER(self, print_realtime, value) | |
| } | |
| /* | |
| * If true, prints results from within whisper.cpp. (avoid it, use callback instead) | |
| * call-seq: | |
| * print_realtime -> bool | |
| */ | |
| static VALUE ruby_whisper_params_get_print_realtime(VALUE self) { | |
| BOOL_PARAMS_GETTER(self, print_realtime) | |
| } | |
| /* | |
| * call-seq: | |
| * print_timestamps = force_print -> force_print | |
| */ | |
| static VALUE ruby_whisper_params_set_print_timestamps(VALUE self, VALUE value) { | |
| BOOL_PARAMS_SETTER(self, print_timestamps, value) | |
| } | |
| /* | |
| * If true, prints timestamps for each text segment when printing realtime. | |
| * | |
| * call-seq: | |
| * print_timestamps -> bool | |
| */ | |
| static VALUE ruby_whisper_params_get_print_timestamps(VALUE self) { | |
| BOOL_PARAMS_GETTER(self, print_timestamps) | |
| } | |
| /* | |
| * call-seq: | |
| * suppress_blank = force_suppress -> force_suppress | |
| */ | |
| static VALUE ruby_whisper_params_set_suppress_blank(VALUE self, VALUE value) { | |
| BOOL_PARAMS_SETTER(self, suppress_blank, value) | |
| } | |
| /* | |
| * If true, suppresses blank outputs. | |
| * | |
| * call-seq: | |
| * suppress_blank -> bool | |
| */ | |
| static VALUE ruby_whisper_params_get_suppress_blank(VALUE self) { | |
| BOOL_PARAMS_GETTER(self, suppress_blank) | |
| } | |
| /* | |
| * call-seq: | |
| * suppress_non_speech_tokens = force_suppress -> force_suppress | |
| */ | |
| static VALUE ruby_whisper_params_set_suppress_non_speech_tokens(VALUE self, VALUE value) { | |
| BOOL_PARAMS_SETTER(self, suppress_non_speech_tokens, value) | |
| } | |
| /* | |
| * If true, suppresses non-speech-tokens. | |
| * | |
| * call-seq: | |
| * suppress_non_speech_tokens -> bool | |
| */ | |
| static VALUE ruby_whisper_params_get_suppress_non_speech_tokens(VALUE self) { | |
| BOOL_PARAMS_GETTER(self, suppress_non_speech_tokens) | |
| } | |
| /* | |
| * If true, enables token-level timestamps. | |
| * | |
| * call-seq: | |
| * token_timestamps -> bool | |
| */ | |
| static VALUE ruby_whisper_params_get_token_timestamps(VALUE self) { | |
| BOOL_PARAMS_GETTER(self, token_timestamps) | |
| } | |
| /* | |
| * call-seq: | |
| * token_timestamps = force_timestamps -> force_timestamps | |
| */ | |
| static VALUE ruby_whisper_params_set_token_timestamps(VALUE self, VALUE value) { | |
| BOOL_PARAMS_SETTER(self, token_timestamps, value) | |
| } | |
| /* | |
| * If true, split on word rather than on token (when used with max_len). | |
| * | |
| * call-seq: | |
| * translate -> bool | |
| */ | |
| static VALUE ruby_whisper_params_get_split_on_word(VALUE self) { | |
| BOOL_PARAMS_GETTER(self, split_on_word) | |
| } | |
| /* | |
| * call-seq: | |
| * split_on_word = force_split -> force_split | |
| */ | |
| static VALUE ruby_whisper_params_set_split_on_word(VALUE self, VALUE value) { | |
| BOOL_PARAMS_SETTER(self, split_on_word, value) | |
| } | |
| /* | |
| * Tokens to provide to the whisper decoder as initial prompt | |
| * these are prepended to any existing text context from a previous call | |
| * use whisper_tokenize() to convert text to tokens. | |
| * Maximum of whisper_n_text_ctx()/2 tokens are used (typically 224). | |
| * | |
| * call-seq: | |
| * initial_prompt -> String | |
| */ | |
| static VALUE ruby_whisper_params_get_initial_prompt(VALUE self) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| return rwp->params.initial_prompt == nullptr ? Qnil : rb_str_new2(rwp->params.initial_prompt); | |
| } | |
| /* | |
| * call-seq: | |
| * initial_prompt = prompt -> prompt | |
| */ | |
| static VALUE ruby_whisper_params_set_initial_prompt(VALUE self, VALUE value) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| rwp->params.initial_prompt = StringValueCStr(value); | |
| return value; | |
| } | |
| /* | |
| * If true, enables diarization. | |
| * | |
| * call-seq: | |
| * diarize -> bool | |
| */ | |
| static VALUE ruby_whisper_params_get_diarize(VALUE self) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| if (rwp->diarize) { | |
| return Qtrue; | |
| } else { | |
| return Qfalse; | |
| } | |
| } | |
| /* | |
| * call-seq: | |
| * diarize = force_diarize -> force_diarize | |
| */ | |
| static VALUE ruby_whisper_params_set_diarize(VALUE self, VALUE value) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| if (value == Qfalse || value == Qnil) { | |
| rwp->diarize = false; | |
| } else { | |
| rwp->diarize = true; | |
| } \ | |
| return value; | |
| } | |
| /* | |
| * Start offset in ms. | |
| * | |
| * call-seq: | |
| * offset -> Integer | |
| */ | |
| static VALUE ruby_whisper_params_get_offset(VALUE self) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| return INT2NUM(rwp->params.offset_ms); | |
| } | |
| /* | |
| * call-seq: | |
| * offset = offset_ms -> offset_ms | |
| */ | |
| static VALUE ruby_whisper_params_set_offset(VALUE self, VALUE value) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| rwp->params.offset_ms = NUM2INT(value); | |
| return value; | |
| } | |
| /* | |
| * Audio duration to process in ms. | |
| * | |
| * call-seq: | |
| * duration -> Integer | |
| */ | |
| static VALUE ruby_whisper_params_get_duration(VALUE self) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| return INT2NUM(rwp->params.duration_ms); | |
| } | |
| /* | |
| * call-seq: | |
| * duration = duration_ms -> duration_ms | |
| */ | |
| static VALUE ruby_whisper_params_set_duration(VALUE self, VALUE value) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| rwp->params.duration_ms = NUM2INT(value); | |
| return value; | |
| } | |
| /* | |
| * Max tokens to use from past text as prompt for the decoder. | |
| * | |
| * call-seq: | |
| * max_text_tokens -> Integer | |
| */ | |
| static VALUE ruby_whisper_params_get_max_text_tokens(VALUE self) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| return INT2NUM(rwp->params.n_max_text_ctx); | |
| } | |
| /* | |
| * call-seq: | |
| * max_text_tokens = n_tokens -> n_tokens | |
| */ | |
| static VALUE ruby_whisper_params_set_max_text_tokens(VALUE self, VALUE value) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| rwp->params.n_max_text_ctx = NUM2INT(value); | |
| return value; | |
| } | |
| /* | |
| * call-seq: | |
| * temperature -> Float | |
| */ | |
| static VALUE ruby_whisper_params_get_temperature(VALUE self) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| return DBL2NUM(rwp->params.temperature); | |
| } | |
| /* | |
| * call-seq: | |
| * temperature = temp -> temp | |
| */ | |
| static VALUE ruby_whisper_params_set_temperature(VALUE self, VALUE value) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| rwp->params.temperature = RFLOAT_VALUE(value); | |
| return value; | |
| } | |
| /* | |
| * See https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L97 | |
| * | |
| * call-seq: | |
| * max_initial_ts -> Flaot | |
| */ | |
| static VALUE ruby_whisper_params_get_max_initial_ts(VALUE self) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| return DBL2NUM(rwp->params.max_initial_ts); | |
| } | |
| /* | |
| * call-seq: | |
| * max_initial_ts = timestamp -> timestamp | |
| */ | |
| static VALUE ruby_whisper_params_set_max_initial_ts(VALUE self, VALUE value) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| rwp->params.max_initial_ts = RFLOAT_VALUE(value); | |
| return value; | |
| } | |
| /* | |
| * call-seq: | |
| * length_penalty -> Float | |
| */ | |
| static VALUE ruby_whisper_params_get_length_penalty(VALUE self) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| return DBL2NUM(rwp->params.length_penalty); | |
| } | |
| /* | |
| * call-seq: | |
| * length_penalty = penalty -> penalty | |
| */ | |
| static VALUE ruby_whisper_params_set_length_penalty(VALUE self, VALUE value) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| rwp->params.length_penalty = RFLOAT_VALUE(value); | |
| return value; | |
| } | |
| /* | |
| * call-seq: | |
| * temperature_inc -> Float | |
| */ | |
| static VALUE ruby_whisper_params_get_temperature_inc(VALUE self) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| return DBL2NUM(rwp->params.temperature_inc); | |
| } | |
| /* | |
| * call-seq: | |
| * temperature_inc = inc -> inc | |
| */ | |
| static VALUE ruby_whisper_params_set_temperature_inc(VALUE self, VALUE value) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| rwp->params.temperature_inc = RFLOAT_VALUE(value); | |
| return value; | |
| } | |
| /* | |
| * Similar to OpenAI's "compression_ratio_threshold" | |
| * | |
| * call-seq: | |
| * entropy_thold -> Float | |
| */ | |
| static VALUE ruby_whisper_params_get_entropy_thold(VALUE self) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| return DBL2NUM(rwp->params.entropy_thold); | |
| } | |
| /* | |
| * call-seq: | |
| * entropy_thold = threshold -> threshold | |
| */ | |
| static VALUE ruby_whisper_params_set_entropy_thold(VALUE self, VALUE value) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| rwp->params.entropy_thold = RFLOAT_VALUE(value); | |
| return value; | |
| } | |
| /* | |
| * call-seq: | |
| * logprob_thold -> Float | |
| */ | |
| static VALUE ruby_whisper_params_get_logprob_thold(VALUE self) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| return DBL2NUM(rwp->params.logprob_thold); | |
| } | |
| /* | |
| * call-seq: | |
| * logprob_thold = threshold -> threshold | |
| */ | |
| static VALUE ruby_whisper_params_set_logprob_thold(VALUE self, VALUE value) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| rwp->params.logprob_thold = RFLOAT_VALUE(value); | |
| return value; | |
| } | |
| /* | |
| * Sets new segment callback, called for every newly generated text segment. | |
| * | |
| * params.new_segment_callback = ->(context, _, n_new, user_data) { | |
| * # ... | |
| * } | |
| * | |
| * call-seq: | |
| * new_segment_callback = callback -> callback | |
| */ | |
| static VALUE ruby_whisper_params_set_new_segment_callback(VALUE self, VALUE value) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| rwp->new_segment_callback_container->callback = value; | |
| return value; | |
| } | |
| /* | |
| * Sets user data passed to the last argument of new segment callback. | |
| * | |
| * call-seq: | |
| * new_segment_callback_user_data = user_data -> use_data | |
| */ | |
| static VALUE ruby_whisper_params_set_new_segment_callback_user_data(VALUE self, VALUE value) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| rwp->new_segment_callback_container->user_data = value; | |
| return value; | |
| } | |
| /* | |
| * Sets progress callback, called on each progress update. | |
| * | |
| * params.new_segment_callback = ->(context, _, n_new, user_data) { | |
| * # ... | |
| * } | |
| * | |
| * call-seq: | |
| * progress_callback = callback -> callback | |
| */ | |
| static VALUE ruby_whisper_params_set_progress_callback(VALUE self, VALUE value) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| rwp->progress_callback_container->callback = value; | |
| return value; | |
| } | |
| /* | |
| * Sets user data passed to the last argument of progress callback. | |
| * | |
| * call-seq: | |
| * progress_callback_user_data = user_data -> use_data | |
| */ | |
| static VALUE ruby_whisper_params_set_progress_callback_user_data(VALUE self, VALUE value) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| rwp->progress_callback_container->user_data = value; | |
| return value; | |
| } | |
| /* | |
| * Sets abort callback, called to check if the process should be aborted. | |
| * | |
| * params.abort_callback = ->(user_data) { | |
| * # ... | |
| * } | |
| * | |
| * call-seq: | |
| * abort_callback = callback -> callback | |
| */ | |
| static VALUE ruby_whisper_params_set_abort_callback(VALUE self, VALUE value) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| rwp->abort_callback_container->callback = value; | |
| return value; | |
| } | |
| /* | |
| * Sets user data passed to the last argument of abort callback. | |
| * | |
| * call-seq: | |
| * abort_callback_user_data = user_data -> use_data | |
| */ | |
| static VALUE ruby_whisper_params_set_abort_callback_user_data(VALUE self, VALUE value) { | |
| ruby_whisper_params *rwp; | |
| Data_Get_Struct(self, ruby_whisper_params, rwp); | |
| rwp->abort_callback_container->user_data = value; | |
| return value; | |
| } | |
| // High level API | |
| typedef struct { | |
| VALUE context; | |
| int index; | |
| } ruby_whisper_segment; | |
| typedef struct { | |
| VALUE context; | |
| } ruby_whisper_model; | |
| VALUE cSegment; | |
| VALUE cModel; | |
| static void rb_whisper_segment_mark(ruby_whisper_segment *rws) { | |
| rb_gc_mark(rws->context); | |
| } | |
| static VALUE ruby_whisper_segment_allocate(VALUE klass) { | |
| ruby_whisper_segment *rws; | |
| rws = ALLOC(ruby_whisper_segment); | |
| return Data_Wrap_Struct(klass, rb_whisper_segment_mark, RUBY_DEFAULT_FREE, rws); | |
| } | |
| static VALUE rb_whisper_segment_initialize(VALUE context, int index) { | |
| ruby_whisper_segment *rws; | |
| const VALUE segment = ruby_whisper_segment_allocate(cSegment); | |
| Data_Get_Struct(segment, ruby_whisper_segment, rws); | |
| rws->context = context; | |
| rws->index = index; | |
| return segment; | |
| }; | |
| /* | |
| * Yields each Whisper::Segment: | |
| * | |
| * whisper.transcribe("path/to/audio.wav", params) | |
| * whisper.each_segment do |segment| | |
| * puts segment.text | |
| * end | |
| * | |
| * Returns an Enumerator if no block given: | |
| * | |
| * whisper.transcribe("path/to/audio.wav", params) | |
| * enum = whisper.each_segment | |
| * enum.to_a # => [#<Whisper::Segment>, ...] | |
| * | |
| * call-seq: | |
| * each_segment {|segment| ... } | |
| * each_segment -> Enumerator | |
| */ | |
| static VALUE ruby_whisper_each_segment(VALUE self) { | |
| if (!rb_block_given_p()) { | |
| const VALUE method_name = rb_funcall(self, id___method__, 0); | |
| return rb_funcall(self, id_to_enum, 1, method_name); | |
| } | |
| ruby_whisper *rw; | |
| Data_Get_Struct(self, ruby_whisper, rw); | |
| const int n_segments = whisper_full_n_segments(rw->context); | |
| for (int i = 0; i < n_segments; ++i) { | |
| rb_yield(rb_whisper_segment_initialize(self, i)); | |
| } | |
| return self; | |
| } | |
| /* | |
| * Hook called on new segment. Yields each Whisper::Segment. | |
| * | |
| * whisper.on_new_segment do |segment| | |
| * # ... | |
| * end | |
| * | |
| * call-seq: | |
| * on_new_segment {|segment| ... } | |
| */ | |
| static VALUE ruby_whisper_params_on_new_segment(VALUE self) { | |
| ruby_whisper_params *rws; | |
| Data_Get_Struct(self, ruby_whisper_params, rws); | |
| const VALUE blk = rb_block_proc(); | |
| rb_ary_push(rws->new_segment_callback_container->callbacks, blk); | |
| return Qnil; | |
| } | |
| /* | |
| * Hook called on progress update. Yields each progress Integer between 0 and 100. | |
| * | |
| * whisper.on_progress do |progress| | |
| * # ... | |
| * end | |
| * | |
| * call-seq: | |
| * on_progress {|progress| ... } | |
| */ | |
| static VALUE ruby_whisper_params_on_progress(VALUE self) { | |
| ruby_whisper_params *rws; | |
| Data_Get_Struct(self, ruby_whisper_params, rws); | |
| const VALUE blk = rb_block_proc(); | |
| rb_ary_push(rws->progress_callback_container->callbacks, blk); | |
| return Qnil; | |
| } | |
| /* | |
| * Call block to determine whether abort or not. Return +true+ when you want to abort. | |
| * | |
| * params.abort_on do | |
| * if some_condition | |
| * true # abort | |
| * else | |
| * false # continue | |
| * end | |
| * end | |
| * | |
| * call-seq: | |
| * abort_on { ... } | |
| */ | |
| static VALUE ruby_whisper_params_abort_on(VALUE self) { | |
| ruby_whisper_params *rws; | |
| Data_Get_Struct(self, ruby_whisper_params, rws); | |
| const VALUE blk = rb_block_proc(); | |
| rb_ary_push(rws->abort_callback_container->callbacks, blk); | |
| return Qnil; | |
| } | |
| /* | |
| * Start time in milliseconds. | |
| * | |
| * call-seq: | |
| * start_time -> Integer | |
| */ | |
| static VALUE ruby_whisper_segment_get_start_time(VALUE self) { | |
| ruby_whisper_segment *rws; | |
| Data_Get_Struct(self, ruby_whisper_segment, rws); | |
| ruby_whisper *rw; | |
| Data_Get_Struct(rws->context, ruby_whisper, rw); | |
| const int64_t t0 = whisper_full_get_segment_t0(rw->context, rws->index); | |
| // able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it | |
| return INT2NUM(t0 * 10); | |
| } | |
| /* | |
| * End time in milliseconds. | |
| * | |
| * call-seq: | |
| * end_time -> Integer | |
| */ | |
| static VALUE ruby_whisper_segment_get_end_time(VALUE self) { | |
| ruby_whisper_segment *rws; | |
| Data_Get_Struct(self, ruby_whisper_segment, rws); | |
| ruby_whisper *rw; | |
| Data_Get_Struct(rws->context, ruby_whisper, rw); | |
| const int64_t t1 = whisper_full_get_segment_t1(rw->context, rws->index); | |
| // able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it | |
| return INT2NUM(t1 * 10); | |
| } | |
| /* | |
| * Whether the next segment is predicted as a speaker turn. | |
| * | |
| * call-seq: | |
| * speaker_turn_next? -> bool | |
| */ | |
| static VALUE ruby_whisper_segment_get_speaker_turn_next(VALUE self) { | |
| ruby_whisper_segment *rws; | |
| Data_Get_Struct(self, ruby_whisper_segment, rws); | |
| ruby_whisper *rw; | |
| Data_Get_Struct(rws->context, ruby_whisper, rw); | |
| return whisper_full_get_segment_speaker_turn_next(rw->context, rws->index) ? Qtrue : Qfalse; | |
| } | |
| /* | |
| * call-seq: | |
| * text -> String | |
| */ | |
| static VALUE ruby_whisper_segment_get_text(VALUE self) { | |
| ruby_whisper_segment *rws; | |
| Data_Get_Struct(self, ruby_whisper_segment, rws); | |
| ruby_whisper *rw; | |
| Data_Get_Struct(rws->context, ruby_whisper, rw); | |
| const char * text = whisper_full_get_segment_text(rw->context, rws->index); | |
| return rb_str_new2(text); | |
| } | |
| static void rb_whisper_model_mark(ruby_whisper_model *rwm) { | |
| rb_gc_mark(rwm->context); | |
| } | |
| static VALUE ruby_whisper_model_allocate(VALUE klass) { | |
| ruby_whisper_model *rwm; | |
| rwm = ALLOC(ruby_whisper_model); | |
| return Data_Wrap_Struct(klass, rb_whisper_model_mark, RUBY_DEFAULT_FREE, rwm); | |
| } | |
| static VALUE rb_whisper_model_initialize(VALUE context) { | |
| ruby_whisper_model *rwm; | |
| const VALUE model = ruby_whisper_model_allocate(cModel); | |
| Data_Get_Struct(model, ruby_whisper_model, rwm); | |
| rwm->context = context; | |
| return model; | |
| }; | |
| /* | |
| * call-seq: | |
| * model -> Whisper::Model | |
| */ | |
| static VALUE ruby_whisper_get_model(VALUE self) { | |
| return rb_whisper_model_initialize(self); | |
| } | |
| /* | |
| * call-seq: | |
| * n_vocab -> Integer | |
| */ | |
| static VALUE ruby_whisper_c_model_n_vocab(VALUE self) { | |
| ruby_whisper_model *rwm; | |
| Data_Get_Struct(self, ruby_whisper_model, rwm); | |
| ruby_whisper *rw; | |
| Data_Get_Struct(rwm->context, ruby_whisper, rw); | |
| return INT2NUM(whisper_model_n_vocab(rw->context)); | |
| } | |
| /* | |
| * call-seq: | |
| * n_audio_ctx -> Integer | |
| */ | |
| static VALUE ruby_whisper_c_model_n_audio_ctx(VALUE self) { | |
| ruby_whisper_model *rwm; | |
| Data_Get_Struct(self, ruby_whisper_model, rwm); | |
| ruby_whisper *rw; | |
| Data_Get_Struct(rwm->context, ruby_whisper, rw); | |
| return INT2NUM(whisper_model_n_audio_ctx(rw->context)); | |
| } | |
| /* | |
| * call-seq: | |
| * n_audio_state -> Integer | |
| */ | |
| static VALUE ruby_whisper_c_model_n_audio_state(VALUE self) { | |
| ruby_whisper_model *rwm; | |
| Data_Get_Struct(self, ruby_whisper_model, rwm); | |
| ruby_whisper *rw; | |
| Data_Get_Struct(rwm->context, ruby_whisper, rw); | |
| return INT2NUM(whisper_model_n_audio_state(rw->context)); | |
| } | |
| /* | |
| * call-seq: | |
| * n_audio_head -> Integer | |
| */ | |
| static VALUE ruby_whisper_c_model_n_audio_head(VALUE self) { | |
| ruby_whisper_model *rwm; | |
| Data_Get_Struct(self, ruby_whisper_model, rwm); | |
| ruby_whisper *rw; | |
| Data_Get_Struct(rwm->context, ruby_whisper, rw); | |
| return INT2NUM(whisper_model_n_audio_head(rw->context)); | |
| } | |
| /* | |
| * call-seq: | |
| * n_audio_layer -> Integer | |
| */ | |
| static VALUE ruby_whisper_c_model_n_audio_layer(VALUE self) { | |
| ruby_whisper_model *rwm; | |
| Data_Get_Struct(self, ruby_whisper_model, rwm); | |
| ruby_whisper *rw; | |
| Data_Get_Struct(rwm->context, ruby_whisper, rw); | |
| return INT2NUM(whisper_model_n_audio_layer(rw->context)); | |
| } | |
| /* | |
| * call-seq: | |
| * n_text_ctx -> Integer | |
| */ | |
| static VALUE ruby_whisper_c_model_n_text_ctx(VALUE self) { | |
| ruby_whisper_model *rwm; | |
| Data_Get_Struct(self, ruby_whisper_model, rwm); | |
| ruby_whisper *rw; | |
| Data_Get_Struct(rwm->context, ruby_whisper, rw); | |
| return INT2NUM(whisper_model_n_text_ctx(rw->context)); | |
| } | |
| /* | |
| * call-seq: | |
| * n_text_state -> Integer | |
| */ | |
| static VALUE ruby_whisper_c_model_n_text_state(VALUE self) { | |
| ruby_whisper_model *rwm; | |
| Data_Get_Struct(self, ruby_whisper_model, rwm); | |
| ruby_whisper *rw; | |
| Data_Get_Struct(rwm->context, ruby_whisper, rw); | |
| return INT2NUM(whisper_model_n_text_state(rw->context)); | |
| } | |
| /* | |
| * call-seq: | |
| * n_text_head -> Integer | |
| */ | |
| static VALUE ruby_whisper_c_model_n_text_head(VALUE self) { | |
| ruby_whisper_model *rwm; | |
| Data_Get_Struct(self, ruby_whisper_model, rwm); | |
| ruby_whisper *rw; | |
| Data_Get_Struct(rwm->context, ruby_whisper, rw); | |
| return INT2NUM(whisper_model_n_text_head(rw->context)); | |
| } | |
| /* | |
| * call-seq: | |
| * n_text_layer -> Integer | |
| */ | |
| static VALUE ruby_whisper_c_model_n_text_layer(VALUE self) { | |
| ruby_whisper_model *rwm; | |
| Data_Get_Struct(self, ruby_whisper_model, rwm); | |
| ruby_whisper *rw; | |
| Data_Get_Struct(rwm->context, ruby_whisper, rw); | |
| return INT2NUM(whisper_model_n_text_layer(rw->context)); | |
| } | |
| /* | |
| * call-seq: | |
| * n_mels -> Integer | |
| */ | |
| static VALUE ruby_whisper_c_model_n_mels(VALUE self) { | |
| ruby_whisper_model *rwm; | |
| Data_Get_Struct(self, ruby_whisper_model, rwm); | |
| ruby_whisper *rw; | |
| Data_Get_Struct(rwm->context, ruby_whisper, rw); | |
| return INT2NUM(whisper_model_n_mels(rw->context)); | |
| } | |
| /* | |
| * call-seq: | |
| * ftype -> Integer | |
| */ | |
| static VALUE ruby_whisper_c_model_ftype(VALUE self) { | |
| ruby_whisper_model *rwm; | |
| Data_Get_Struct(self, ruby_whisper_model, rwm); | |
| ruby_whisper *rw; | |
| Data_Get_Struct(rwm->context, ruby_whisper, rw); | |
| return INT2NUM(whisper_model_ftype(rw->context)); | |
| } | |
| /* | |
| * call-seq: | |
| * type -> String | |
| */ | |
| static VALUE ruby_whisper_c_model_type(VALUE self) { | |
| ruby_whisper_model *rwm; | |
| Data_Get_Struct(self, ruby_whisper_model, rwm); | |
| ruby_whisper *rw; | |
| Data_Get_Struct(rwm->context, ruby_whisper, rw); | |
| return rb_str_new2(whisper_model_type_readable(rw->context)); | |
| } | |
| static VALUE ruby_whisper_error_initialize(VALUE self, VALUE code) { | |
| const int c_code = NUM2INT(code); | |
| char *raw_message; | |
| switch (c_code) { | |
| case -2: | |
| raw_message = "failed to compute log mel spectrogram"; | |
| break; | |
| case -3: | |
| raw_message = "failed to auto-detect language"; | |
| break; | |
| case -4: | |
| raw_message = "too many decoders requested"; | |
| break; | |
| case -5: | |
| raw_message = "audio_ctx is larger than the maximum allowed"; | |
| break; | |
| case -6: | |
| raw_message = "failed to encode"; | |
| break; | |
| case -7: | |
| raw_message = "whisper_kv_cache_init() failed for self-attention cache"; | |
| break; | |
| case -8: | |
| raw_message = "failed to decode"; | |
| break; | |
| case -9: | |
| raw_message = "failed to decode"; | |
| break; | |
| default: | |
| raw_message = "unknown error"; | |
| break; | |
| } | |
| const VALUE message = rb_str_new2(raw_message); | |
| rb_call_super(1, &message); | |
| rb_iv_set(self, "@code", code); | |
| return self; | |
| } | |
| void Init_whisper() { | |
| id_to_s = rb_intern("to_s"); | |
| id_call = rb_intern("call"); | |
| id___method__ = rb_intern("__method__"); | |
| id_to_enum = rb_intern("to_enum"); | |
| id_length = rb_intern("length"); | |
| id_next = rb_intern("next"); | |
| id_new = rb_intern("new"); | |
| mWhisper = rb_define_module("Whisper"); | |
| cContext = rb_define_class_under(mWhisper, "Context", rb_cObject); | |
| cParams = rb_define_class_under(mWhisper, "Params", rb_cObject); | |
| eError = rb_define_class_under(mWhisper, "Error", rb_eStandardError); | |
| rb_define_const(mWhisper, "LOG_LEVEL_NONE", INT2NUM(GGML_LOG_LEVEL_NONE)); | |
| rb_define_const(mWhisper, "LOG_LEVEL_INFO", INT2NUM(GGML_LOG_LEVEL_INFO)); | |
| rb_define_const(mWhisper, "LOG_LEVEL_WARN", INT2NUM(GGML_LOG_LEVEL_WARN)); | |
| rb_define_const(mWhisper, "LOG_LEVEL_ERROR", INT2NUM(GGML_LOG_LEVEL_ERROR)); | |
| rb_define_const(mWhisper, "LOG_LEVEL_DEBUG", INT2NUM(GGML_LOG_LEVEL_DEBUG)); | |
| rb_define_const(mWhisper, "LOG_LEVEL_CONT", INT2NUM(GGML_LOG_LEVEL_CONT)); | |
| rb_define_singleton_method(mWhisper, "lang_max_id", ruby_whisper_s_lang_max_id, 0); | |
| rb_define_singleton_method(mWhisper, "lang_id", ruby_whisper_s_lang_id, 1); | |
| rb_define_singleton_method(mWhisper, "lang_str", ruby_whisper_s_lang_str, 1); | |
| rb_define_singleton_method(mWhisper, "lang_str_full", ruby_whisper_s_lang_str_full, 1); | |
| rb_define_singleton_method(mWhisper, "log_set", ruby_whisper_s_log_set, 2); | |
| rb_define_singleton_method(mWhisper, "finalize_log_callback", ruby_whisper_s_finalize_log_callback, 1); | |
| rb_define_alloc_func(cContext, ruby_whisper_allocate); | |
| rb_define_method(cContext, "initialize", ruby_whisper_initialize, -1); | |
| rb_define_method(cContext, "transcribe", ruby_whisper_transcribe, -1); | |
| rb_define_method(cContext, "model_n_vocab", ruby_whisper_model_n_vocab, 0); | |
| rb_define_method(cContext, "model_n_audio_ctx", ruby_whisper_model_n_audio_ctx, 0); | |
| rb_define_method(cContext, "model_n_audio_state", ruby_whisper_model_n_audio_state, 0); | |
| rb_define_method(cContext, "model_n_audio_head", ruby_whisper_model_n_audio_head, 0); | |
| rb_define_method(cContext, "model_n_audio_layer", ruby_whisper_model_n_audio_layer, 0); | |
| rb_define_method(cContext, "model_n_text_ctx", ruby_whisper_model_n_text_ctx, 0); | |
| rb_define_method(cContext, "model_n_text_state", ruby_whisper_model_n_text_state, 0); | |
| rb_define_method(cContext, "model_n_text_head", ruby_whisper_model_n_text_head, 0); | |
| rb_define_method(cContext, "model_n_text_layer", ruby_whisper_model_n_text_layer, 0); | |
| rb_define_method(cContext, "model_n_mels", ruby_whisper_model_n_mels, 0); | |
| rb_define_method(cContext, "model_ftype", ruby_whisper_model_ftype, 0); | |
| rb_define_method(cContext, "model_type", ruby_whisper_model_type, 0); | |
| rb_define_method(cContext, "full_n_segments", ruby_whisper_full_n_segments, 0); | |
| rb_define_method(cContext, "full_lang_id", ruby_whisper_full_lang_id, 0); | |
| rb_define_method(cContext, "full_get_segment_t0", ruby_whisper_full_get_segment_t0, 1); | |
| rb_define_method(cContext, "full_get_segment_t1", ruby_whisper_full_get_segment_t1, 1); | |
| rb_define_method(cContext, "full_get_segment_speaker_turn_next", ruby_whisper_full_get_segment_speaker_turn_next, 1); | |
| rb_define_method(cContext, "full_get_segment_text", ruby_whisper_full_get_segment_text, 1); | |
| rb_define_method(cContext, "full", ruby_whisper_full, -1); | |
| rb_define_method(cContext, "full_parallel", ruby_whisper_full_parallel, -1); | |
| rb_define_alloc_func(cParams, ruby_whisper_params_allocate); | |
| rb_define_method(cParams, "language=", ruby_whisper_params_set_language, 1); | |
| rb_define_method(cParams, "language", ruby_whisper_params_get_language, 0); | |
| rb_define_method(cParams, "translate=", ruby_whisper_params_set_translate, 1); | |
| rb_define_method(cParams, "translate", ruby_whisper_params_get_translate, 0); | |
| rb_define_method(cParams, "no_context=", ruby_whisper_params_set_no_context, 1); | |
| rb_define_method(cParams, "no_context", ruby_whisper_params_get_no_context, 0); | |
| rb_define_method(cParams, "single_segment=", ruby_whisper_params_set_single_segment, 1); | |
| rb_define_method(cParams, "single_segment", ruby_whisper_params_get_single_segment, 0); | |
| rb_define_method(cParams, "print_special", ruby_whisper_params_get_print_special, 0); | |
| rb_define_method(cParams, "print_special=", ruby_whisper_params_set_print_special, 1); | |
| rb_define_method(cParams, "print_progress", ruby_whisper_params_get_print_progress, 0); | |
| rb_define_method(cParams, "print_progress=", ruby_whisper_params_set_print_progress, 1); | |
| rb_define_method(cParams, "print_realtime", ruby_whisper_params_get_print_realtime, 0); | |
| rb_define_method(cParams, "print_realtime=", ruby_whisper_params_set_print_realtime, 1); | |
| rb_define_method(cParams, "print_timestamps", ruby_whisper_params_get_print_timestamps, 0); | |
| rb_define_method(cParams, "print_timestamps=", ruby_whisper_params_set_print_timestamps, 1); | |
| rb_define_method(cParams, "suppress_blank", ruby_whisper_params_get_suppress_blank, 0); | |
| rb_define_method(cParams, "suppress_blank=", ruby_whisper_params_set_suppress_blank, 1); | |
| rb_define_method(cParams, "suppress_non_speech_tokens", ruby_whisper_params_get_suppress_non_speech_tokens, 0); | |
| rb_define_method(cParams, "suppress_non_speech_tokens=", ruby_whisper_params_set_suppress_non_speech_tokens, 1); | |
| rb_define_method(cParams, "token_timestamps", ruby_whisper_params_get_token_timestamps, 0); | |
| rb_define_method(cParams, "token_timestamps=", ruby_whisper_params_set_token_timestamps, 1); | |
| rb_define_method(cParams, "split_on_word", ruby_whisper_params_get_split_on_word, 0); | |
| rb_define_method(cParams, "split_on_word=", ruby_whisper_params_set_split_on_word, 1); | |
| rb_define_method(cParams, "initial_prompt", ruby_whisper_params_get_initial_prompt, 0); | |
| rb_define_method(cParams, "initial_prompt=", ruby_whisper_params_set_initial_prompt, 1); | |
| rb_define_method(cParams, "diarize", ruby_whisper_params_get_diarize, 0); | |
| rb_define_method(cParams, "diarize=", ruby_whisper_params_set_diarize, 1); | |
| rb_define_method(cParams, "offset", ruby_whisper_params_get_offset, 0); | |
| rb_define_method(cParams, "offset=", ruby_whisper_params_set_offset, 1); | |
| rb_define_method(cParams, "duration", ruby_whisper_params_get_duration, 0); | |
| rb_define_method(cParams, "duration=", ruby_whisper_params_set_duration, 1); | |
| rb_define_method(cParams, "max_text_tokens", ruby_whisper_params_get_max_text_tokens, 0); | |
| rb_define_method(cParams, "max_text_tokens=", ruby_whisper_params_set_max_text_tokens, 1); | |
| rb_define_method(cParams, "temperature", ruby_whisper_params_get_temperature, 0); | |
| rb_define_method(cParams, "temperature=", ruby_whisper_params_set_temperature, 1); | |
| rb_define_method(cParams, "max_initial_ts", ruby_whisper_params_get_max_initial_ts, 0); | |
| rb_define_method(cParams, "max_initial_ts=", ruby_whisper_params_set_max_initial_ts, 1); | |
| rb_define_method(cParams, "length_penalty", ruby_whisper_params_get_length_penalty, 0); | |
| rb_define_method(cParams, "length_penalty=", ruby_whisper_params_set_length_penalty, 1); | |
| rb_define_method(cParams, "temperature_inc", ruby_whisper_params_get_temperature_inc, 0); | |
| rb_define_method(cParams, "temperature_inc=", ruby_whisper_params_set_temperature_inc, 1); | |
| rb_define_method(cParams, "entropy_thold", ruby_whisper_params_get_entropy_thold, 0); | |
| rb_define_method(cParams, "entropy_thold=", ruby_whisper_params_set_entropy_thold, 1); | |
| rb_define_method(cParams, "logprob_thold", ruby_whisper_params_get_logprob_thold, 0); | |
| rb_define_method(cParams, "logprob_thold=", ruby_whisper_params_set_logprob_thold, 1); | |
| rb_define_method(cParams, "new_segment_callback=", ruby_whisper_params_set_new_segment_callback, 1); | |
| rb_define_method(cParams, "new_segment_callback_user_data=", ruby_whisper_params_set_new_segment_callback_user_data, 1); | |
| rb_define_method(cParams, "progress_callback=", ruby_whisper_params_set_progress_callback, 1); | |
| rb_define_method(cParams, "progress_callback_user_data=", ruby_whisper_params_set_progress_callback_user_data, 1); | |
| rb_define_method(cParams, "abort_callback=", ruby_whisper_params_set_abort_callback, 1); | |
| rb_define_method(cParams, "abort_callback_user_data=", ruby_whisper_params_set_abort_callback_user_data, 1); | |
| rb_define_attr(eError, "code", true, false); | |
| rb_define_method(eError, "initialize", ruby_whisper_error_initialize, 1); | |
| // High leve | |
| cSegment = rb_define_class_under(mWhisper, "Segment", rb_cObject); | |
| rb_define_alloc_func(cSegment, ruby_whisper_segment_allocate); | |
| rb_define_method(cContext, "each_segment", ruby_whisper_each_segment, 0); | |
| rb_define_method(cParams, "on_new_segment", ruby_whisper_params_on_new_segment, 0); | |
| rb_define_method(cParams, "on_progress", ruby_whisper_params_on_progress, 0); | |
| rb_define_method(cParams, "abort_on", ruby_whisper_params_abort_on, 0); | |
| rb_define_method(cSegment, "start_time", ruby_whisper_segment_get_start_time, 0); | |
| rb_define_method(cSegment, "end_time", ruby_whisper_segment_get_end_time, 0); | |
| rb_define_method(cSegment, "speaker_next_turn?", ruby_whisper_segment_get_speaker_turn_next, 0); | |
| rb_define_method(cSegment, "text", ruby_whisper_segment_get_text, 0); | |
| cModel = rb_define_class_under(mWhisper, "Model", rb_cObject); | |
| rb_define_alloc_func(cModel, ruby_whisper_model_allocate); | |
| rb_define_method(cContext, "model", ruby_whisper_get_model, 0); | |
| rb_define_method(cModel, "n_vocab", ruby_whisper_c_model_n_vocab, 0); | |
| rb_define_method(cModel, "n_audio_ctx", ruby_whisper_c_model_n_audio_ctx, 0); | |
| rb_define_method(cModel, "n_audio_state", ruby_whisper_c_model_n_audio_state, 0); | |
| rb_define_method(cModel, "n_audio_head", ruby_whisper_c_model_n_audio_head, 0); | |
| rb_define_method(cModel, "n_audio_layer", ruby_whisper_c_model_n_audio_layer, 0); | |
| rb_define_method(cModel, "n_text_ctx", ruby_whisper_c_model_n_text_ctx, 0); | |
| rb_define_method(cModel, "n_text_state", ruby_whisper_c_model_n_text_state, 0); | |
| rb_define_method(cModel, "n_text_head", ruby_whisper_c_model_n_text_head, 0); | |
| rb_define_method(cModel, "n_text_layer", ruby_whisper_c_model_n_text_layer, 0); | |
| rb_define_method(cModel, "n_mels", ruby_whisper_c_model_n_mels, 0); | |
| rb_define_method(cModel, "ftype", ruby_whisper_c_model_ftype, 0); | |
| rb_define_method(cModel, "type", ruby_whisper_c_model_type, 0); | |
| } | |
| } | |