Spaces:
Running
Running
talk-llama : sync llama.cpp
Browse files
examples/talk-llama/llama.cpp
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
examples/talk-llama/llama.h
CHANGED
|
@@ -67,6 +67,7 @@ extern "C" {
|
|
| 67 |
LLAMA_VOCAB_TYPE_SPM = 1, // LLaMA tokenizer based on byte-level BPE with byte fallback
|
| 68 |
LLAMA_VOCAB_TYPE_BPE = 2, // GPT-2 tokenizer based on byte-level BPE
|
| 69 |
LLAMA_VOCAB_TYPE_WPM = 3, // BERT tokenizer based on WordPiece
|
|
|
|
| 70 |
};
|
| 71 |
|
| 72 |
// pre-tokenization types
|
|
@@ -87,6 +88,10 @@ extern "C" {
|
|
| 87 |
LLAMA_VOCAB_PRE_TYPE_DBRX = 13,
|
| 88 |
LLAMA_VOCAB_PRE_TYPE_SMAUG = 14,
|
| 89 |
LLAMA_VOCAB_PRE_TYPE_PORO = 15,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
};
|
| 91 |
|
| 92 |
// note: these values should be synchronized with ggml_rope
|
|
@@ -177,6 +182,12 @@ extern "C" {
|
|
| 177 |
LLAMA_POOLING_TYPE_LAST = 3,
|
| 178 |
};
|
| 179 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
enum llama_split_mode {
|
| 181 |
LLAMA_SPLIT_MODE_NONE = 0, // single GPU
|
| 182 |
LLAMA_SPLIT_MODE_LAYER = 1, // split layers and KV across GPUs
|
|
@@ -294,6 +305,7 @@ extern "C" {
|
|
| 294 |
|
| 295 |
enum llama_rope_scaling_type rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type`
|
| 296 |
enum llama_pooling_type pooling_type; // whether to pool (sum) embedding results by sequence id
|
|
|
|
| 297 |
|
| 298 |
// ref: https://github.com/ggerganov/llama.cpp/pull/2054
|
| 299 |
float rope_freq_base; // RoPE base frequency, 0 = from model
|
|
@@ -482,6 +494,13 @@ extern "C" {
|
|
| 482 |
// Get a llama model tensor
|
| 483 |
LLAMA_API struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const char * name);
|
| 484 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 485 |
// Returns 0 on success
|
| 486 |
LLAMA_API uint32_t llama_model_quantize(
|
| 487 |
const char * fname_inp,
|
|
@@ -767,6 +786,14 @@ extern "C" {
|
|
| 767 |
// Frees a batch of tokens allocated with llama_batch_init()
|
| 768 |
LLAMA_API void llama_batch_free(struct llama_batch batch);
|
| 769 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 770 |
// Positive return values does not mean a fatal error, but rather a warning.
|
| 771 |
// 0 - success
|
| 772 |
// 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
|
|
@@ -857,6 +884,7 @@ extern "C" {
|
|
| 857 |
LLAMA_API llama_token llama_token_cls(const struct llama_model * model); // classification
|
| 858 |
LLAMA_API llama_token llama_token_sep(const struct llama_model * model); // sentence separator
|
| 859 |
LLAMA_API llama_token llama_token_nl (const struct llama_model * model); // next-line
|
|
|
|
| 860 |
|
| 861 |
// Returns -1 if unknown, 1 for true or 0 for false.
|
| 862 |
LLAMA_API int32_t llama_add_bos_token(const struct llama_model * model);
|
|
@@ -878,6 +906,7 @@ extern "C" {
|
|
| 878 |
/// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
|
| 879 |
/// @return Returns the number of tokens on success, no more than n_tokens_max
|
| 880 |
/// @return Returns a negative number on failure - the number of tokens that would have been returned
|
|
|
|
| 881 |
/// @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated
|
| 882 |
/// as plaintext. Does not insert a leading space.
|
| 883 |
LLAMA_API int32_t llama_tokenize(
|
|
@@ -892,15 +921,31 @@ extern "C" {
|
|
| 892 |
// Token Id -> Piece.
|
| 893 |
// Uses the vocabulary in the provided context.
|
| 894 |
// Does not write null terminator to the buffer.
|
| 895 |
-
// User
|
| 896 |
// @param special If true, special tokens are rendered in the output.
|
| 897 |
LLAMA_API int32_t llama_token_to_piece(
|
| 898 |
const struct llama_model * model,
|
| 899 |
llama_token token,
|
| 900 |
char * buf,
|
| 901 |
int32_t length,
|
|
|
|
| 902 |
bool special);
|
| 903 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 904 |
/// Apply chat template. Inspired by hf apply_chat_template() on python.
|
| 905 |
/// Both "model" and "custom_template" are optional, but at least one is required. "custom_template" has higher precedence than "model"
|
| 906 |
/// NOTE: This function does not use a jinja parser. It only support a pre-defined list of template. See more: https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template
|
|
@@ -924,6 +969,12 @@ extern "C" {
|
|
| 924 |
// Grammar
|
| 925 |
//
|
| 926 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 927 |
LLAMA_API struct llama_grammar * llama_grammar_init(
|
| 928 |
const llama_grammar_element ** rules,
|
| 929 |
size_t n_rules,
|
|
|
|
| 67 |
LLAMA_VOCAB_TYPE_SPM = 1, // LLaMA tokenizer based on byte-level BPE with byte fallback
|
| 68 |
LLAMA_VOCAB_TYPE_BPE = 2, // GPT-2 tokenizer based on byte-level BPE
|
| 69 |
LLAMA_VOCAB_TYPE_WPM = 3, // BERT tokenizer based on WordPiece
|
| 70 |
+
LLAMA_VOCAB_TYPE_UGM = 4, // T5 tokenizer based on Unigram
|
| 71 |
};
|
| 72 |
|
| 73 |
// pre-tokenization types
|
|
|
|
| 88 |
LLAMA_VOCAB_PRE_TYPE_DBRX = 13,
|
| 89 |
LLAMA_VOCAB_PRE_TYPE_SMAUG = 14,
|
| 90 |
LLAMA_VOCAB_PRE_TYPE_PORO = 15,
|
| 91 |
+
LLAMA_VOCAB_PRE_TYPE_CHATGLM3 = 16,
|
| 92 |
+
LLAMA_VOCAB_PRE_TYPE_CHATGLM4 = 17,
|
| 93 |
+
LLAMA_VOCAB_PRE_TYPE_VIKING = 18,
|
| 94 |
+
LLAMA_VOCAB_PRE_TYPE_JAIS = 19,
|
| 95 |
};
|
| 96 |
|
| 97 |
// note: these values should be synchronized with ggml_rope
|
|
|
|
| 182 |
LLAMA_POOLING_TYPE_LAST = 3,
|
| 183 |
};
|
| 184 |
|
| 185 |
+
enum llama_attention_type {
|
| 186 |
+
LLAMA_ATTENTION_TYPE_UNSPECIFIED = -1,
|
| 187 |
+
LLAMA_ATTENTION_TYPE_CAUSAL = 0,
|
| 188 |
+
LLAMA_ATTENTION_TYPE_NON_CAUSAL = 1,
|
| 189 |
+
};
|
| 190 |
+
|
| 191 |
enum llama_split_mode {
|
| 192 |
LLAMA_SPLIT_MODE_NONE = 0, // single GPU
|
| 193 |
LLAMA_SPLIT_MODE_LAYER = 1, // split layers and KV across GPUs
|
|
|
|
| 305 |
|
| 306 |
enum llama_rope_scaling_type rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type`
|
| 307 |
enum llama_pooling_type pooling_type; // whether to pool (sum) embedding results by sequence id
|
| 308 |
+
enum llama_attention_type attention_type; // attention type to use for embeddings
|
| 309 |
|
| 310 |
// ref: https://github.com/ggerganov/llama.cpp/pull/2054
|
| 311 |
float rope_freq_base; // RoPE base frequency, 0 = from model
|
|
|
|
| 494 |
// Get a llama model tensor
|
| 495 |
LLAMA_API struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const char * name);
|
| 496 |
|
| 497 |
+
// Returns true if the model contains an encoder that requires llama_encode() call
|
| 498 |
+
LLAMA_API bool llama_model_has_encoder(const struct llama_model * model);
|
| 499 |
+
|
| 500 |
+
// For encoder-decoder models, this function returns id of the token that must be provided
|
| 501 |
+
// to the decoder to start generating output sequence. For other models, it returns -1.
|
| 502 |
+
LLAMA_API llama_token llama_model_decoder_start_token(const struct llama_model * model);
|
| 503 |
+
|
| 504 |
// Returns 0 on success
|
| 505 |
LLAMA_API uint32_t llama_model_quantize(
|
| 506 |
const char * fname_inp,
|
|
|
|
| 786 |
// Frees a batch of tokens allocated with llama_batch_init()
|
| 787 |
LLAMA_API void llama_batch_free(struct llama_batch batch);
|
| 788 |
|
| 789 |
+
// Processes a batch of tokens with the ecoder part of the encoder-decoder model.
|
| 790 |
+
// Stores the encoder output internally for later use by the decoder cross-attention layers.
|
| 791 |
+
// 0 - success
|
| 792 |
+
// < 0 - error
|
| 793 |
+
LLAMA_API int32_t llama_encode(
|
| 794 |
+
struct llama_context * ctx,
|
| 795 |
+
struct llama_batch batch);
|
| 796 |
+
|
| 797 |
// Positive return values does not mean a fatal error, but rather a warning.
|
| 798 |
// 0 - success
|
| 799 |
// 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
|
|
|
|
| 884 |
LLAMA_API llama_token llama_token_cls(const struct llama_model * model); // classification
|
| 885 |
LLAMA_API llama_token llama_token_sep(const struct llama_model * model); // sentence separator
|
| 886 |
LLAMA_API llama_token llama_token_nl (const struct llama_model * model); // next-line
|
| 887 |
+
LLAMA_API llama_token llama_token_pad(const struct llama_model * model); // padding
|
| 888 |
|
| 889 |
// Returns -1 if unknown, 1 for true or 0 for false.
|
| 890 |
LLAMA_API int32_t llama_add_bos_token(const struct llama_model * model);
|
|
|
|
| 906 |
/// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
|
| 907 |
/// @return Returns the number of tokens on success, no more than n_tokens_max
|
| 908 |
/// @return Returns a negative number on failure - the number of tokens that would have been returned
|
| 909 |
+
/// @param add_special Allow to add BOS and EOS tokens if model is configured to do so.
|
| 910 |
/// @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated
|
| 911 |
/// as plaintext. Does not insert a leading space.
|
| 912 |
LLAMA_API int32_t llama_tokenize(
|
|
|
|
| 921 |
// Token Id -> Piece.
|
| 922 |
// Uses the vocabulary in the provided context.
|
| 923 |
// Does not write null terminator to the buffer.
|
| 924 |
+
// User can skip up to 'lstrip' leading spaces before copying (useful when encoding/decoding multiple tokens with 'add_space_prefix')
|
| 925 |
// @param special If true, special tokens are rendered in the output.
|
| 926 |
LLAMA_API int32_t llama_token_to_piece(
|
| 927 |
const struct llama_model * model,
|
| 928 |
llama_token token,
|
| 929 |
char * buf,
|
| 930 |
int32_t length,
|
| 931 |
+
int32_t lstrip,
|
| 932 |
bool special);
|
| 933 |
|
| 934 |
+
/// @details Convert the provided tokens into text (inverse of llama_tokenize()).
|
| 935 |
+
/// @param text The char pointer must be large enough to hold the resulting text.
|
| 936 |
+
/// @return Returns the number of chars/bytes on success, no more than text_len_max.
|
| 937 |
+
/// @return Returns a negative number on failure - the number of chars/bytes that would have been returned.
|
| 938 |
+
/// @param remove_special Allow to remove BOS and EOS tokens if model is configured to do so.
|
| 939 |
+
/// @param unparse_special If true, special tokens are rendered in the output.
|
| 940 |
+
LLAMA_API int32_t llama_detokenize(
|
| 941 |
+
const struct llama_model * model,
|
| 942 |
+
const llama_token * tokens,
|
| 943 |
+
int32_t n_tokens,
|
| 944 |
+
char * text,
|
| 945 |
+
int32_t text_len_max,
|
| 946 |
+
bool remove_special,
|
| 947 |
+
bool unparse_special);
|
| 948 |
+
|
| 949 |
/// Apply chat template. Inspired by hf apply_chat_template() on python.
|
| 950 |
/// Both "model" and "custom_template" are optional, but at least one is required. "custom_template" has higher precedence than "model"
|
| 951 |
/// NOTE: This function does not use a jinja parser. It only support a pre-defined list of template. See more: https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template
|
|
|
|
| 969 |
// Grammar
|
| 970 |
//
|
| 971 |
|
| 972 |
+
/// Initialize a llama_grammar.
|
| 973 |
+
///
|
| 974 |
+
/// @param rules The rule elements of the grammar to initialize.
|
| 975 |
+
/// @param n_rules The number of rules.
|
| 976 |
+
/// @param start_rule_index The index of the root rule (the starting point of the grammar).
|
| 977 |
+
/// @return The initialized llama_grammar or nullptr if initialization failed.
|
| 978 |
LLAMA_API struct llama_grammar * llama_grammar_init(
|
| 979 |
const llama_grammar_element ** rules,
|
| 980 |
size_t n_rules,
|
examples/talk-llama/talk-llama.cpp
CHANGED
|
@@ -35,10 +35,10 @@ static std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const
|
|
| 35 |
|
| 36 |
static std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
|
| 37 |
std::vector<char> result(8, 0);
|
| 38 |
-
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), false);
|
| 39 |
if (n_tokens < 0) {
|
| 40 |
result.resize(-n_tokens);
|
| 41 |
-
int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), false);
|
| 42 |
GGML_ASSERT(check == -n_tokens);
|
| 43 |
} else {
|
| 44 |
result.resize(n_tokens);
|
|
|
|
| 35 |
|
| 36 |
static std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
|
| 37 |
std::vector<char> result(8, 0);
|
| 38 |
+
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), 0, false);
|
| 39 |
if (n_tokens < 0) {
|
| 40 |
result.resize(-n_tokens);
|
| 41 |
+
int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), 0, false);
|
| 42 |
GGML_ASSERT(check == -n_tokens);
|
| 43 |
} else {
|
| 44 |
result.resize(n_tokens);
|
examples/talk-llama/unicode-data.cpp
CHANGED
|
@@ -7030,4 +7030,3 @@ const std::vector<range_nfd> unicode_ranges_nfd = { // start, last, nfd
|
|
| 7030 |
{0x02FA1C, 0x02FA1C, 0x009F3B},
|
| 7031 |
{0x02FA1D, 0x02FA1D, 0x02A600},
|
| 7032 |
};
|
| 7033 |
-
|
|
|
|
| 7030 |
{0x02FA1C, 0x02FA1C, 0x009F3B},
|
| 7031 |
{0x02FA1D, 0x02FA1D, 0x02A600},
|
| 7032 |
};
|
|
|
examples/talk-llama/unicode.cpp
CHANGED
|
@@ -23,7 +23,7 @@ static std::string unicode_cpts_to_utf8(const std::vector<uint32_t> & cps) {
|
|
| 23 |
return result;
|
| 24 |
}
|
| 25 |
|
| 26 |
-
|
| 27 |
assert(offset < utf8.size());
|
| 28 |
if (!(utf8[offset + 0] & 0x80)) {
|
| 29 |
auto result = utf8[offset + 0];
|
|
@@ -232,8 +232,7 @@ static std::vector<size_t> unicode_regex_split_custom_gpt2(const std::string & t
|
|
| 232 |
};
|
| 233 |
|
| 234 |
auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
|
| 235 |
-
|
| 236 |
-
return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags(cpts[pos]) : undef;
|
| 237 |
};
|
| 238 |
|
| 239 |
size_t _prev_end = offset_ini;
|
|
@@ -295,9 +294,9 @@ static std::vector<size_t> unicode_regex_split_custom_gpt2(const std::string & t
|
|
| 295 |
continue;
|
| 296 |
}
|
| 297 |
// regex: <space>?[^\s\p{L}\p{N}]+
|
| 298 |
-
if (!(flags2.is_whitespace
|
| 299 |
pos += (cpt == ' ');
|
| 300 |
-
while (!(flags2.is_whitespace
|
| 301 |
flags2 = _get_flags(++pos);
|
| 302 |
}
|
| 303 |
_add_token(pos);
|
|
@@ -351,8 +350,7 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|
| 351 |
};
|
| 352 |
|
| 353 |
auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
|
| 354 |
-
|
| 355 |
-
return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags(cpts[pos]) : undef;
|
| 356 |
};
|
| 357 |
|
| 358 |
size_t _prev_end = offset_ini;
|
|
@@ -394,8 +392,8 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|
| 394 |
}
|
| 395 |
}
|
| 396 |
|
| 397 |
-
// regex: [^\r\n\p{L}\p{N}]?\p{L}+
|
| 398 |
-
if (!(cpt == '\r' || cpt == '\n' ||
|
| 399 |
if (flags.is_letter || _get_flags(pos+1).is_letter) { // one or more letters
|
| 400 |
pos++;
|
| 401 |
while (_get_flags(pos).is_letter) {
|
|
@@ -421,9 +419,9 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
|
|
| 421 |
|
| 422 |
// regex: <space>?[^\s\p{L}\p{N}]+[\r\n]*
|
| 423 |
auto flags2 = (cpt == ' ' ? _get_flags(pos+1) : flags);
|
| 424 |
-
if (!(flags2.is_whitespace
|
| 425 |
pos += (cpt == ' ');
|
| 426 |
-
while (!(flags2.is_whitespace
|
| 427 |
flags2 = _get_flags(++pos);
|
| 428 |
}
|
| 429 |
uint32_t cpt2 = _get_cpt(pos);
|
|
|
|
| 23 |
return result;
|
| 24 |
}
|
| 25 |
|
| 26 |
+
uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset) {
|
| 27 |
assert(offset < utf8.size());
|
| 28 |
if (!(utf8[offset + 0] & 0x80)) {
|
| 29 |
auto result = utf8[offset + 0];
|
|
|
|
| 232 |
};
|
| 233 |
|
| 234 |
auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
|
| 235 |
+
return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags(cpts[pos]) : codepoint_flags{};
|
|
|
|
| 236 |
};
|
| 237 |
|
| 238 |
size_t _prev_end = offset_ini;
|
|
|
|
| 294 |
continue;
|
| 295 |
}
|
| 296 |
// regex: <space>?[^\s\p{L}\p{N}]+
|
| 297 |
+
if (!(flags2.is_whitespace | flags2.is_letter | flags2.is_number) && flags2.as_uint()) {
|
| 298 |
pos += (cpt == ' ');
|
| 299 |
+
while (!(flags2.is_whitespace | flags2.is_letter | flags2.is_number) && flags2.as_uint()) {
|
| 300 |
flags2 = _get_flags(++pos);
|
| 301 |
}
|
| 302 |
_add_token(pos);
|
|
|
|
| 350 |
};
|
| 351 |
|
| 352 |
auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
|
| 353 |
+
return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags(cpts[pos]) : codepoint_flags{};
|
|
|
|
| 354 |
};
|
| 355 |
|
| 356 |
size_t _prev_end = offset_ini;
|
|
|
|
| 392 |
}
|
| 393 |
}
|
| 394 |
|
| 395 |
+
// regex: [^\r\n\p{L}\p{N}]?\p{L}+
|
| 396 |
+
if (!(cpt == '\r' || cpt == '\n' || flags.is_number)) {
|
| 397 |
if (flags.is_letter || _get_flags(pos+1).is_letter) { // one or more letters
|
| 398 |
pos++;
|
| 399 |
while (_get_flags(pos).is_letter) {
|
|
|
|
| 419 |
|
| 420 |
// regex: <space>?[^\s\p{L}\p{N}]+[\r\n]*
|
| 421 |
auto flags2 = (cpt == ' ' ? _get_flags(pos+1) : flags);
|
| 422 |
+
if (!(flags2.is_whitespace | flags2.is_letter | flags2.is_number) && flags.as_uint()) {
|
| 423 |
pos += (cpt == ' ');
|
| 424 |
+
while (!(flags2.is_whitespace | flags2.is_letter | flags2.is_number) && flags2.as_uint()) {
|
| 425 |
flags2 = _get_flags(++pos);
|
| 426 |
}
|
| 427 |
uint32_t cpt2 = _get_cpt(pos);
|
examples/talk-llama/unicode.h
CHANGED
|
@@ -48,6 +48,7 @@ struct codepoint_flags {
|
|
| 48 |
|
| 49 |
|
| 50 |
std::string unicode_cpt_to_utf8(uint32_t cp);
|
|
|
|
| 51 |
std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8);
|
| 52 |
|
| 53 |
std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & cpts);
|
|
|
|
| 48 |
|
| 49 |
|
| 50 |
std::string unicode_cpt_to_utf8(uint32_t cp);
|
| 51 |
+
uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset);
|
| 52 |
std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8);
|
| 53 |
|
| 54 |
std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & cpts);
|
src/whisper.cpp
CHANGED
|
@@ -2949,7 +2949,7 @@ struct whisper_global_cache {
|
|
| 2949 |
// Mel spectrogram
|
| 2950 |
|
| 2951 |
void whisper_mel_init(whisper_mel & mel, ggml_backend_t backend, int n_len, int n_len_org, int n_mel) {
|
| 2952 |
-
WHISPER_LOG_INFO("%s: n_len = %d, n_len_org = %d, n_mel = %d\n", __func__, n_len, n_len_org, n_mel);
|
| 2953 |
mel.n_len_org = n_len_org;
|
| 2954 |
assert(!mel.ctx);
|
| 2955 |
mel.ctx = ggml_init({ggml_tensor_overhead(), nullptr, true});
|
|
|
|
| 2949 |
// Mel spectrogram
|
| 2950 |
|
| 2951 |
void whisper_mel_init(whisper_mel & mel, ggml_backend_t backend, int n_len, int n_len_org, int n_mel) {
|
| 2952 |
+
//WHISPER_LOG_INFO("%s: n_len = %d, n_len_org = %d, n_mel = %d\n", __func__, n_len, n_len_org, n_mel);
|
| 2953 |
mel.n_len_org = n_len_org;
|
| 2954 |
assert(!mel.ctx);
|
| 2955 |
mel.ctx = ggml_init({ggml_tensor_overhead(), nullptr, true});
|