bobqianic commited on
Commit
5ad04c9
·
unverified ·
1 Parent(s): aa86ade

whisper : Replace WHISPER_PRINT_DEBUG with WHISPER_LOG_DEBUG (#1681)

Browse files
Files changed (1) hide show
  1. whisper.cpp +33 -36
whisper.cpp CHANGED
@@ -122,9 +122,18 @@ WHISPER_ATTRIBUTE_FORMAT(2, 3)
122
  static void whisper_log_internal (ggml_log_level level, const char * format, ...);
123
  static void whisper_log_callback_default(ggml_log_level level, const char * text, void * user_data);
124
 
125
- #define WHISPER_LOG_INFO(...) whisper_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
126
- #define WHISPER_LOG_WARN(...) whisper_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
127
  #define WHISPER_LOG_ERROR(...) whisper_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
 
 
 
 
 
 
 
 
 
 
 
128
 
129
  #define WHISPER_ASSERT(x) \
130
  do { \
@@ -134,18 +143,6 @@ static void whisper_log_callback_default(ggml_log_level level, const char * text
134
  } \
135
  } while (0)
136
 
137
- // define this to enable verbose trace logging - useful for debugging purposes
138
- //#define WHISPER_DEBUG
139
-
140
- #if defined(WHISPER_DEBUG)
141
- #define WHISPER_PRINT_DEBUG(...) \
142
- do { \
143
- fprintf(stderr, __VA_ARGS__); \
144
- } while (0)
145
- #else
146
- #define WHISPER_PRINT_DEBUG(...)
147
- #endif
148
-
149
  //#define WHISPER_USE_FLASH_ATTN
150
  //#define WHISPER_USE_FLASH_FF
151
  #define WHISPER_MAX_DECODERS 8
@@ -2166,7 +2163,7 @@ static struct ggml_cgraph * whisper_build_graph_decoder(
2166
  const int32_t n_kv = ggml_allocr_is_measure(alloc) ? n_ctx : kv_self.n;
2167
  const int32_t kv_head = ggml_allocr_is_measure(alloc) ? n_ctx - n_tokens : kv_self.head;
2168
 
2169
- //WHISPER_PRINT_DEBUG("%s: n_past = %d, n_tokens = %d, n_audio_ctx = %d, n_ctx = %d\n", __func__, n_past, n_tokens, n_audio_ctx, n_ctx);
2170
 
2171
  struct ggml_init_params params = {
2172
  /*.mem_size =*/ wstate.alloc_decode.meta.size(),
@@ -4944,7 +4941,7 @@ static void whisper_sequence_score(
4944
  const auto p = kv.second/(double)cnt;
4945
  entropy -= p*log(p);
4946
 
4947
- //WHISPER_PRINT_DEBUG("entropy: %d %f %f, count %d\n", kv.first, p, log(p), kv.second);
4948
  }
4949
 
4950
  sequence.entropy = entropy;
@@ -5010,7 +5007,7 @@ int whisper_full_with_state(
5010
  // basically don't process anything that is less than 1.0s
5011
  // see issue #39: https://github.com/ggerganov/whisper.cpp/issues/39
5012
  if (seek_end < seek_start + (params.speed_up ? 50 : 100)) {
5013
- WHISPER_PRINT_DEBUG("%s: input is too short - %d ms < 1000 ms\n", __func__, (seek_end - seek_start)*10);
5014
  return 0;
5015
  }
5016
 
@@ -5199,7 +5196,7 @@ int whisper_full_with_state(
5199
 
5200
  n_decoders_cur = std::max(1, n_decoders_cur);
5201
 
5202
- WHISPER_PRINT_DEBUG("\n%s: strategy = %d, decoding with %d decoders, temperature = %.2f\n", __func__, params.strategy, n_decoders_cur, t_cur);
5203
 
5204
  // TAGS: WHISPER_DECODER_INIT
5205
  for (int j = 0; j < n_decoders_cur; ++j) {
@@ -5243,11 +5240,11 @@ int whisper_full_with_state(
5243
  prompt.insert(prompt.end(), prompt_init.begin(), prompt_init.end());
5244
 
5245
  // print the prompt
5246
- WHISPER_PRINT_DEBUG("\n\n");
5247
  for (int i = 0; i < (int) prompt.size(); i++) {
5248
- WHISPER_PRINT_DEBUG("%s: prompt[%d] = %s\n", __func__, i, ctx->vocab.id_to_token.at(prompt[i]).c_str());
5249
  }
5250
- WHISPER_PRINT_DEBUG("\n\n");
5251
 
5252
  whisper_kv_cache_clear(state->kv_self);
5253
 
@@ -5395,7 +5392,7 @@ int whisper_full_with_state(
5395
 
5396
  whisper_kv_cache_seq_cp(state->kv_self, cur.decoder_idx, WHISPER_MAX_DECODERS + j, -1, -1);
5397
 
5398
- WHISPER_PRINT_DEBUG("%s: beam search: decoder %d: from decoder %d: token = %10s, plog = %8.5f, sum_logprobs = %8.5f\n",
5399
  __func__, j, cur.decoder_idx, ctx->vocab.id_to_token.at(decoder.sequence.tokens.back().id).c_str(), decoder.sequence.tokens.back().plog, decoder.sequence.sum_logprobs_all);
5400
  }
5401
 
@@ -5438,7 +5435,7 @@ int whisper_full_with_state(
5438
 
5439
  // do not allow to go back in time
5440
  if (has_ts && seek_delta > seek_delta_new && result_len < i) {
5441
- WHISPER_PRINT_DEBUG("%s: decoder %d: failed due to seek_delta (%d > %d)\n", __func__, j, seek_delta, seek_delta_new);
5442
  failed = true; // TODO: maybe this is not a failure ?
5443
  continue;
5444
  }
@@ -5453,7 +5450,7 @@ int whisper_full_with_state(
5453
  #ifdef WHISPER_DEBUG
5454
  {
5455
  const auto tt = token.pt > 0.10 ? ctx->vocab.id_to_token.at(token.tid) : "[?]";
5456
- WHISPER_PRINT_DEBUG("%s: id = %3d, decoder = %d, token = %6d, p = %6.3f, ts = %10s, %6.3f, result_len = %4d '%s'\n",
5457
  __func__, i, j, token.id, token.p, tt.c_str(), token.pt, result_len, ctx->vocab.id_to_token.at(token.id).c_str());
5458
  }
5459
  #endif
@@ -5467,7 +5464,7 @@ int whisper_full_with_state(
5467
  if (seek + seek_delta + 100 >= seek_end) {
5468
  result_len = i + 1;
5469
  } else {
5470
- WHISPER_PRINT_DEBUG("%s: decoder %d failed (result_len = 0)\n", __func__, j);
5471
  failed = true;
5472
  continue;
5473
  }
@@ -5478,7 +5475,7 @@ int whisper_full_with_state(
5478
  seek_delta = 100*WHISPER_CHUNK_SIZE;
5479
  }
5480
 
5481
- WHISPER_PRINT_DEBUG("%s: decoder %d completed\n", __func__, j);
5482
  completed = true;
5483
  continue;
5484
  }
@@ -5494,7 +5491,7 @@ int whisper_full_with_state(
5494
  // sometimes, the decoding can get stuck in a repetition loop
5495
  // this is an attempt to mitigate such cases - we flag the decoding as failed and use a fallback strategy
5496
  if (i == n_max - 1 && (result_len == 0 || seek_delta < 100*WHISPER_CHUNK_SIZE/2)) {
5497
- WHISPER_PRINT_DEBUG("%s: decoder %d: failed due to repetition loop\n", __func__, j);
5498
  failed = true;
5499
  continue;
5500
  }
@@ -5536,7 +5533,7 @@ int whisper_full_with_state(
5536
  continue;
5537
  }
5538
 
5539
- //WHISPER_PRINT_DEBUG("%s: decoder %d: token %d, seek_delta %d\n", __func__, j, decoder.sequence.tokens.back().id, decoder.seek_delta);
5540
 
5541
  decoder.i_batch = batch.n_tokens;
5542
 
@@ -5616,11 +5613,11 @@ int whisper_full_with_state(
5616
  decoder.sequence.tokens.resize(decoder.sequence.result_len);
5617
  whisper_sequence_score(params, decoder.sequence);
5618
 
5619
- WHISPER_PRINT_DEBUG("%s: decoder %2d: score = %8.5f, result_len = %3d, avg_logprobs = %8.5f, entropy = %8.5f\n",
5620
  __func__, j, decoder.sequence.score, decoder.sequence.result_len, decoder.sequence.avg_logprobs, decoder.sequence.entropy);
5621
 
5622
  if (decoder.sequence.result_len > 32 && decoder.sequence.entropy < params.entropy_thold) {
5623
- WHISPER_PRINT_DEBUG("%s: decoder %2d: failed due to entropy %8.5f < %8.5f\n",
5624
  __func__, j, decoder.sequence.entropy, params.entropy_thold);
5625
 
5626
  decoder.failed = true;
@@ -5635,7 +5632,7 @@ int whisper_full_with_state(
5635
  }
5636
  }
5637
 
5638
- WHISPER_PRINT_DEBUG("%s: best decoder = %d\n", __func__, best_decoder_id);
5639
  }
5640
 
5641
  bool success = true;
@@ -5647,7 +5644,7 @@ int whisper_full_with_state(
5647
  const auto & decoder = state->decoders[best_decoder_id];
5648
 
5649
  if (decoder.failed || decoder.sequence.avg_logprobs < params.logprob_thold) {
5650
- WHISPER_PRINT_DEBUG("%s: failed due to avg_logprobs %8.5f < %8.5f\n", __func__, decoder.sequence.avg_logprobs, params.logprob_thold);
5651
  success = false;
5652
  state->n_fail_p++;
5653
  }
@@ -5655,13 +5652,13 @@ int whisper_full_with_state(
5655
 
5656
  if (success) {
5657
  //for (auto & token : ctx->decoders[best_decoder_id].sequence.tokens) {
5658
- // WHISPER_PRINT_DEBUG("%s: token = %d, p = %6.3f, pt = %6.3f, ts = %s, str = %s\n", __func__, token.id, token.p, token.pt, ctx->vocab.id_to_token.at(token.tid).c_str(), ctx->vocab.id_to_token.at(token.id).c_str());
5659
  //}
5660
 
5661
  break;
5662
  }
5663
 
5664
- WHISPER_PRINT_DEBUG("\n%s: failed to decode with temperature = %.2f\n", __func__, t_cur);
5665
  }
5666
 
5667
  // output results through a user-provided callback
@@ -5673,7 +5670,7 @@ int whisper_full_with_state(
5673
 
5674
  const auto & tokens_cur = best_decoder.sequence.tokens;
5675
 
5676
- //WHISPER_PRINT_DEBUG("prompt_init.size() = %d, prompt.size() = %d, result_len = %d, seek_delta = %d\n", prompt_init.size(), prompt.size(), result_len, seek_delta);
5677
 
5678
  // update prompt_past
5679
  prompt_past.clear();
@@ -5793,7 +5790,7 @@ int whisper_full_with_state(
5793
  // update audio window
5794
  seek += seek_delta;
5795
 
5796
- WHISPER_PRINT_DEBUG("seek = %d, seek_delta = %d\n", seek, seek_delta);
5797
  }
5798
  }
5799
 
 
122
  static void whisper_log_internal (ggml_log_level level, const char * format, ...);
123
  static void whisper_log_callback_default(ggml_log_level level, const char * text, void * user_data);
124
 
 
 
125
  #define WHISPER_LOG_ERROR(...) whisper_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
126
+ #define WHISPER_LOG_WARN(...) whisper_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
127
+ #define WHISPER_LOG_INFO(...) whisper_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
128
+
129
+ // define this to enable verbose trace logging - useful for debugging purposes
130
+ // #define WHISPER_DEBUG
131
+
132
+ #if defined(WHISPER_DEBUG)
133
+ #define WHISPER_LOG_DEBUG(...) whisper_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
134
+ #else
135
+ #define WHISPER_LOG_DEBUG(...)
136
+ #endif
137
 
138
  #define WHISPER_ASSERT(x) \
139
  do { \
 
143
  } \
144
  } while (0)
145
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  //#define WHISPER_USE_FLASH_ATTN
147
  //#define WHISPER_USE_FLASH_FF
148
  #define WHISPER_MAX_DECODERS 8
 
2163
  const int32_t n_kv = ggml_allocr_is_measure(alloc) ? n_ctx : kv_self.n;
2164
  const int32_t kv_head = ggml_allocr_is_measure(alloc) ? n_ctx - n_tokens : kv_self.head;
2165
 
2166
+ //WHISPER_LOG_DEBUG("%s: n_past = %d, n_tokens = %d, n_audio_ctx = %d, n_ctx = %d\n", __func__, n_past, n_tokens, n_audio_ctx, n_ctx);
2167
 
2168
  struct ggml_init_params params = {
2169
  /*.mem_size =*/ wstate.alloc_decode.meta.size(),
 
4941
  const auto p = kv.second/(double)cnt;
4942
  entropy -= p*log(p);
4943
 
4944
+ //WHISPER_LOG_DEBUG("entropy: %d %f %f, count %d\n", kv.first, p, log(p), kv.second);
4945
  }
4946
 
4947
  sequence.entropy = entropy;
 
5007
  // basically don't process anything that is less than 1.0s
5008
  // see issue #39: https://github.com/ggerganov/whisper.cpp/issues/39
5009
  if (seek_end < seek_start + (params.speed_up ? 50 : 100)) {
5010
+ WHISPER_LOG_DEBUG("%s: input is too short - %d ms < 1000 ms\n", __func__, (seek_end - seek_start)*10);
5011
  return 0;
5012
  }
5013
 
 
5196
 
5197
  n_decoders_cur = std::max(1, n_decoders_cur);
5198
 
5199
+ WHISPER_LOG_DEBUG("\n%s: strategy = %d, decoding with %d decoders, temperature = %.2f\n", __func__, params.strategy, n_decoders_cur, t_cur);
5200
 
5201
  // TAGS: WHISPER_DECODER_INIT
5202
  for (int j = 0; j < n_decoders_cur; ++j) {
 
5240
  prompt.insert(prompt.end(), prompt_init.begin(), prompt_init.end());
5241
 
5242
  // print the prompt
5243
+ WHISPER_LOG_DEBUG("\n\n");
5244
  for (int i = 0; i < (int) prompt.size(); i++) {
5245
+ WHISPER_LOG_DEBUG("%s: prompt[%d] = %s\n", __func__, i, ctx->vocab.id_to_token.at(prompt[i]).c_str());
5246
  }
5247
+ WHISPER_LOG_DEBUG("\n\n");
5248
 
5249
  whisper_kv_cache_clear(state->kv_self);
5250
 
 
5392
 
5393
  whisper_kv_cache_seq_cp(state->kv_self, cur.decoder_idx, WHISPER_MAX_DECODERS + j, -1, -1);
5394
 
5395
+ WHISPER_LOG_DEBUG("%s: beam search: decoder %d: from decoder %d: token = %10s, plog = %8.5f, sum_logprobs = %8.5f\n",
5396
  __func__, j, cur.decoder_idx, ctx->vocab.id_to_token.at(decoder.sequence.tokens.back().id).c_str(), decoder.sequence.tokens.back().plog, decoder.sequence.sum_logprobs_all);
5397
  }
5398
 
 
5435
 
5436
  // do not allow to go back in time
5437
  if (has_ts && seek_delta > seek_delta_new && result_len < i) {
5438
+ WHISPER_LOG_DEBUG("%s: decoder %d: failed due to seek_delta (%d > %d)\n", __func__, j, seek_delta, seek_delta_new);
5439
  failed = true; // TODO: maybe this is not a failure ?
5440
  continue;
5441
  }
 
5450
  #ifdef WHISPER_DEBUG
5451
  {
5452
  const auto tt = token.pt > 0.10 ? ctx->vocab.id_to_token.at(token.tid) : "[?]";
5453
+ WHISPER_LOG_DEBUG("%s: id = %3d, decoder = %d, token = %6d, p = %6.3f, ts = %10s, %6.3f, result_len = %4d '%s'\n",
5454
  __func__, i, j, token.id, token.p, tt.c_str(), token.pt, result_len, ctx->vocab.id_to_token.at(token.id).c_str());
5455
  }
5456
  #endif
 
5464
  if (seek + seek_delta + 100 >= seek_end) {
5465
  result_len = i + 1;
5466
  } else {
5467
+ WHISPER_LOG_DEBUG("%s: decoder %d failed (result_len = 0)\n", __func__, j);
5468
  failed = true;
5469
  continue;
5470
  }
 
5475
  seek_delta = 100*WHISPER_CHUNK_SIZE;
5476
  }
5477
 
5478
+ WHISPER_LOG_DEBUG("%s: decoder %d completed\n", __func__, j);
5479
  completed = true;
5480
  continue;
5481
  }
 
5491
  // sometimes, the decoding can get stuck in a repetition loop
5492
  // this is an attempt to mitigate such cases - we flag the decoding as failed and use a fallback strategy
5493
  if (i == n_max - 1 && (result_len == 0 || seek_delta < 100*WHISPER_CHUNK_SIZE/2)) {
5494
+ WHISPER_LOG_DEBUG("%s: decoder %d: failed due to repetition loop\n", __func__, j);
5495
  failed = true;
5496
  continue;
5497
  }
 
5533
  continue;
5534
  }
5535
 
5536
+ //WHISPER_LOG_DEBUG("%s: decoder %d: token %d, seek_delta %d\n", __func__, j, decoder.sequence.tokens.back().id, decoder.seek_delta);
5537
 
5538
  decoder.i_batch = batch.n_tokens;
5539
 
 
5613
  decoder.sequence.tokens.resize(decoder.sequence.result_len);
5614
  whisper_sequence_score(params, decoder.sequence);
5615
 
5616
+ WHISPER_LOG_DEBUG("%s: decoder %2d: score = %8.5f, result_len = %3d, avg_logprobs = %8.5f, entropy = %8.5f\n",
5617
  __func__, j, decoder.sequence.score, decoder.sequence.result_len, decoder.sequence.avg_logprobs, decoder.sequence.entropy);
5618
 
5619
  if (decoder.sequence.result_len > 32 && decoder.sequence.entropy < params.entropy_thold) {
5620
+ WHISPER_LOG_DEBUG("%s: decoder %2d: failed due to entropy %8.5f < %8.5f\n",
5621
  __func__, j, decoder.sequence.entropy, params.entropy_thold);
5622
 
5623
  decoder.failed = true;
 
5632
  }
5633
  }
5634
 
5635
+ WHISPER_LOG_DEBUG("%s: best decoder = %d\n", __func__, best_decoder_id);
5636
  }
5637
 
5638
  bool success = true;
 
5644
  const auto & decoder = state->decoders[best_decoder_id];
5645
 
5646
  if (decoder.failed || decoder.sequence.avg_logprobs < params.logprob_thold) {
5647
+ WHISPER_LOG_DEBUG("%s: failed due to avg_logprobs %8.5f < %8.5f\n", __func__, decoder.sequence.avg_logprobs, params.logprob_thold);
5648
  success = false;
5649
  state->n_fail_p++;
5650
  }
 
5652
 
5653
  if (success) {
5654
  //for (auto & token : ctx->decoders[best_decoder_id].sequence.tokens) {
5655
+ // WHISPER_LOG_DEBUG("%s: token = %d, p = %6.3f, pt = %6.3f, ts = %s, str = %s\n", __func__, token.id, token.p, token.pt, ctx->vocab.id_to_token.at(token.tid).c_str(), ctx->vocab.id_to_token.at(token.id).c_str());
5656
  //}
5657
 
5658
  break;
5659
  }
5660
 
5661
+ WHISPER_LOG_DEBUG("\n%s: failed to decode with temperature = %.2f\n", __func__, t_cur);
5662
  }
5663
 
5664
  // output results through a user-provided callback
 
5670
 
5671
  const auto & tokens_cur = best_decoder.sequence.tokens;
5672
 
5673
+ //WHISPER_LOG_DEBUG("prompt_init.size() = %d, prompt.size() = %d, result_len = %d, seek_delta = %d\n", prompt_init.size(), prompt.size(), result_len, seek_delta);
5674
 
5675
  // update prompt_past
5676
  prompt_past.clear();
 
5790
  // update audio window
5791
  seek += seek_delta;
5792
 
5793
+ WHISPER_LOG_DEBUG("seek = %d, seek_delta = %d\n", seek, seek_delta);
5794
  }
5795
  }
5796