ggerganov commited on
Commit
7ffa531
·
unverified ·
1 Parent(s): 061b71e

cmake : enable and fix -Wall -Wextra -Wpedantic C++ warnings

Browse files
CMakeLists.txt CHANGED
@@ -132,6 +132,12 @@ if (WHISPER_ALL_WARNINGS)
132
  -Wstrict-prototypes \
133
  -Wpointer-arith \
134
  ")
 
 
 
 
 
 
135
  else()
136
  # todo : msvc
137
  endif()
 
132
  -Wstrict-prototypes \
133
  -Wpointer-arith \
134
  ")
135
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \
136
+ -Wall \
137
+ -Wextra \
138
+ -Wpedantic \
139
+ -Wcast-qual \
140
+ ")
141
  else()
142
  # todo : msvc
143
  endif()
examples/bench/bench.cpp CHANGED
@@ -33,7 +33,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
33
  return true;
34
  }
35
 
36
- void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
37
  fprintf(stderr, "\n");
38
  fprintf(stderr, "usage: %s [options]\n", argv[0]);
39
  fprintf(stderr, "\n");
 
33
  return true;
34
  }
35
 
36
+ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
37
  fprintf(stderr, "\n");
38
  fprintf(stderr, "usage: %s [options]\n", argv[0]);
39
  fprintf(stderr, "\n");
examples/command/command.cpp CHANGED
@@ -81,7 +81,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
81
  return true;
82
  }
83
 
84
- void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
85
  fprintf(stderr, "\n");
86
  fprintf(stderr, "usage: %s [options]\n", argv[0]);
87
  fprintf(stderr, "\n");
@@ -387,7 +387,7 @@ bool vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float
387
  float energy_all = 0.0f;
388
  float energy_last = 0.0f;
389
 
390
- for (size_t i = 0; i < n_samples; i++) {
391
  energy_all += fabsf(pcmf32[i]);
392
 
393
  if (i >= n_samples - n_samples_last) {
@@ -594,7 +594,7 @@ int main(int argc, char ** argv) {
594
  whisper_token tokens[1024];
595
  allowed_tokens.emplace_back();
596
 
597
- for (int l = 0; l < cmd.size(); ++l) {
598
  // NOTE: very important to add the whitespace !
599
  // the reason is that the first decoded token starts with a whitespace too!
600
  std::string ss = std::string(" ") + cmd.substr(0, l + 1);
@@ -843,15 +843,15 @@ int main(int argc, char ** argv) {
843
 
844
  // best command
845
  {
 
 
846
  fprintf(stdout, "\n");
847
  fprintf(stdout, "%s: detected command: %s%s%s | p = %f | t = %d ms\n", __func__,
848
  "\033[1m", allowed_commands[probs_id[0].second].c_str(), "\033[0m", probs_id[0].first,
849
- (int) std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - t_start).count());
850
  fprintf(stdout, "\n");
851
  }
852
 
853
- const auto t_end = std::chrono::high_resolution_clock::now();
854
-
855
  audio.clear();
856
  }
857
  }
 
81
  return true;
82
  }
83
 
84
+ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
85
  fprintf(stderr, "\n");
86
  fprintf(stderr, "usage: %s [options]\n", argv[0]);
87
  fprintf(stderr, "\n");
 
387
  float energy_all = 0.0f;
388
  float energy_last = 0.0f;
389
 
390
+ for (int i = 0; i < n_samples; i++) {
391
  energy_all += fabsf(pcmf32[i]);
392
 
393
  if (i >= n_samples - n_samples_last) {
 
594
  whisper_token tokens[1024];
595
  allowed_tokens.emplace_back();
596
 
597
+ for (int l = 0; l < (int) cmd.size(); ++l) {
598
  // NOTE: very important to add the whitespace !
599
  // the reason is that the first decoded token starts with a whitespace too!
600
  std::string ss = std::string(" ") + cmd.substr(0, l + 1);
 
843
 
844
  // best command
845
  {
846
+ const auto t_end = std::chrono::high_resolution_clock::now();
847
+
848
  fprintf(stdout, "\n");
849
  fprintf(stdout, "%s: detected command: %s%s%s | p = %f | t = %d ms\n", __func__,
850
  "\033[1m", allowed_commands[probs_id[0].second].c_str(), "\033[0m", probs_id[0].first,
851
+ (int) std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count());
852
  fprintf(stdout, "\n");
853
  }
854
 
 
 
855
  audio.clear();
856
  }
857
  }
examples/main/main.cpp CHANGED
@@ -129,7 +129,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
129
  return true;
130
  }
131
 
132
- void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
133
  fprintf(stderr, "\n");
134
  fprintf(stderr, "usage: %s [options] file0.wav file1.wav ...\n", argv[0]);
135
  fprintf(stderr, "\n");
@@ -328,7 +328,7 @@ bool output_srt(struct whisper_context * ctx, const char * fname, const whisper_
328
  // karaoke video generation
329
  // outputs a bash script that uses ffmpeg to generate a video with the subtitles
330
  // TODO: font parameter adjustments
331
- bool output_wts(struct whisper_context * ctx, const char * fname, const char * fname_inp, const whisper_params & params, float t_sec) {
332
  std::ofstream fout(fname);
333
 
334
  fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
@@ -377,7 +377,6 @@ bool output_wts(struct whisper_context * ctx, const char * fname, const char * f
377
  txt_ul = "\\ \\ ";
378
 
379
  {
380
- int ncnt = 0;
381
  for (int k = 0; k < n; ++k) {
382
  const auto & token2 = tokens[k];
383
 
@@ -401,8 +400,6 @@ bool output_wts(struct whisper_context * ctx, const char * fname, const char * f
401
  txt_ul += "\\ ";
402
  }
403
  }
404
-
405
- ncnt += txt.size();
406
  }
407
 
408
  ::replace_all(txt_bg, "'", "\u2019");
@@ -637,7 +634,7 @@ int main(int argc, char ** argv) {
637
  {
638
  static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
639
 
640
- wparams.encoder_begin_callback = [](struct whisper_context * ctx, void * user_data) {
641
  bool is_aborted = *(bool*)user_data;
642
  return !is_aborted;
643
  };
 
129
  return true;
130
  }
131
 
132
+ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
133
  fprintf(stderr, "\n");
134
  fprintf(stderr, "usage: %s [options] file0.wav file1.wav ...\n", argv[0]);
135
  fprintf(stderr, "\n");
 
328
  // karaoke video generation
329
  // outputs a bash script that uses ffmpeg to generate a video with the subtitles
330
  // TODO: font parameter adjustments
331
+ bool output_wts(struct whisper_context * ctx, const char * fname, const char * fname_inp, const whisper_params & /*params*/, float t_sec) {
332
  std::ofstream fout(fname);
333
 
334
  fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
 
377
  txt_ul = "\\ \\ ";
378
 
379
  {
 
380
  for (int k = 0; k < n; ++k) {
381
  const auto & token2 = tokens[k];
382
 
 
400
  txt_ul += "\\ ";
401
  }
402
  }
 
 
403
  }
404
 
405
  ::replace_all(txt_bg, "'", "\u2019");
 
634
  {
635
  static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
636
 
637
+ wparams.encoder_begin_callback = [](struct whisper_context * /*ctx*/, void * user_data) {
638
  bool is_aborted = *(bool*)user_data;
639
  return !is_aborted;
640
  };
examples/stream/stream.cpp CHANGED
@@ -90,7 +90,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
90
  return true;
91
  }
92
 
93
- void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
94
  fprintf(stderr, "\n");
95
  fprintf(stderr, "usage: %s [options]\n", argv[0]);
96
  fprintf(stderr, "\n");
@@ -391,7 +391,7 @@ bool vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float
391
  float energy_all = 0.0f;
392
  float energy_last = 0.0f;
393
 
394
- for (size_t i = 0; i < n_samples; i++) {
395
  energy_all += fabsf(pcmf32[i]);
396
 
397
  if (i >= n_samples - n_samples_last) {
 
90
  return true;
91
  }
92
 
93
+ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
94
  fprintf(stderr, "\n");
95
  fprintf(stderr, "usage: %s [options]\n", argv[0]);
96
  fprintf(stderr, "\n");
 
391
  float energy_all = 0.0f;
392
  float energy_last = 0.0f;
393
 
394
+ for (int i = 0; i < n_samples; i++) {
395
  energy_all += fabsf(pcmf32[i]);
396
 
397
  if (i >= n_samples - n_samples_last) {
examples/talk/gpt-2.cpp CHANGED
@@ -78,7 +78,7 @@ gpt_vocab::id gpt_sample_top_k_top_p(
78
  const float * logits,
79
  int top_k,
80
  double top_p,
81
- double temp,
82
  std::mt19937 & rng) {
83
  int n_logits = vocab.id_to_token.size();
84
 
@@ -268,7 +268,7 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
268
  fin.read((char *) &len, sizeof(len));
269
 
270
  word.resize(len);
271
- fin.read((char *) word.data(), len);
272
 
273
  vocab.token_to_id[word] = i;
274
  vocab.id_to_token[i] = word;
@@ -884,7 +884,7 @@ std::string gpt2_gen_text(gpt2_context * ctx, const char * text, int max_tokens)
884
 
885
  std::string result;
886
 
887
- for (int i = embd.size(); i < embd_inp.size() + n_predict; i++) {
888
  // predict
889
  if (embd.size() > 0) {
890
  if (!gpt2_eval(ctx->model, ctx->n_threads, n_past, embd, embd_w, mem_per_token)) {
 
78
  const float * logits,
79
  int top_k,
80
  double top_p,
81
+ double /*temp*/,
82
  std::mt19937 & rng) {
83
  int n_logits = vocab.id_to_token.size();
84
 
 
268
  fin.read((char *) &len, sizeof(len));
269
 
270
  word.resize(len);
271
+ fin.read((char *) &word[0], len);
272
 
273
  vocab.token_to_id[word] = i;
274
  vocab.id_to_token[i] = word;
 
884
 
885
  std::string result;
886
 
887
+ for (int i = embd.size(); i < (int) embd_inp.size() + n_predict; i++) {
888
  // predict
889
  if (embd.size() > 0) {
890
  if (!gpt2_eval(ctx->model, ctx->n_threads, n_past, embd, embd_w, mem_per_token)) {
examples/talk/talk.cpp CHANGED
@@ -79,7 +79,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
79
  return true;
80
  }
81
 
82
- void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
83
  fprintf(stderr, "\n");
84
  fprintf(stderr, "usage: %s [options]\n", argv[0]);
85
  fprintf(stderr, "\n");
@@ -397,7 +397,7 @@ bool vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float
397
  float energy_all = 0.0f;
398
  float energy_last = 0.0f;
399
 
400
- for (size_t i = 0; i < n_samples; i++) {
401
  energy_all += fabsf(pcmf32[i]);
402
 
403
  if (i >= n_samples - n_samples_last) {
@@ -541,7 +541,6 @@ int main(int argc, char ** argv) {
541
  bool force_speak = false;
542
 
543
  float prob0 = 0.0f;
544
- float prob = 0.0f;
545
 
546
  std::vector<float> pcmf32_cur;
547
  std::vector<float> pcmf32_prompt;
 
79
  return true;
80
  }
81
 
82
+ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
83
  fprintf(stderr, "\n");
84
  fprintf(stderr, "usage: %s [options]\n", argv[0]);
85
  fprintf(stderr, "\n");
 
397
  float energy_all = 0.0f;
398
  float energy_last = 0.0f;
399
 
400
+ for (int i = 0; i < n_samples; i++) {
401
  energy_all += fabsf(pcmf32[i]);
402
 
403
  if (i >= n_samples - n_samples_last) {
 
541
  bool force_speak = false;
542
 
543
  float prob0 = 0.0f;
 
544
 
545
  std::vector<float> pcmf32_cur;
546
  std::vector<float> pcmf32_prompt;
whisper.cpp CHANGED
@@ -621,7 +621,6 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
621
  const ggml_type wtype = model.hparams.f16 ? GGML_TYPE_F16 : GGML_TYPE_F32;
622
 
623
  size_t ctx_size = 0;
624
- size_t ctx_mem_size = 0;
625
 
626
  {
627
  const auto & hparams = model.hparams;
@@ -730,12 +729,6 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
730
  ctx_size += n_text_layer*( n_text_state*ggml_type_size(GGML_TYPE_F32)); // cross_attn_ln_1_b
731
  }
732
 
733
- ctx_mem_size += n_text_layer*n_text_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_k
734
- ctx_mem_size += n_text_layer*n_text_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_v
735
-
736
- ctx_mem_size += n_text_layer*n_audio_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_cross_k
737
- ctx_mem_size += n_text_layer*n_audio_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_cross_v
738
-
739
  ctx_size += (15 + 15*n_audio_layer + 24*n_text_layer)*256; // object overhead
740
 
741
  fprintf(stderr, "%s: ggml ctx size = %7.2f MB\n", __func__, ctx_size/(1024.0*1024.0));
@@ -2043,7 +2036,7 @@ static void fft(const std::vector<float> & in, std::vector<float> & out) {
2043
  static bool log_mel_spectrogram(
2044
  const float * samples,
2045
  const int n_samples,
2046
- const int sample_rate,
2047
  const int fft_size,
2048
  const int fft_step,
2049
  const int n_mel,
 
621
  const ggml_type wtype = model.hparams.f16 ? GGML_TYPE_F16 : GGML_TYPE_F32;
622
 
623
  size_t ctx_size = 0;
 
624
 
625
  {
626
  const auto & hparams = model.hparams;
 
729
  ctx_size += n_text_layer*( n_text_state*ggml_type_size(GGML_TYPE_F32)); // cross_attn_ln_1_b
730
  }
731
 
 
 
 
 
 
 
732
  ctx_size += (15 + 15*n_audio_layer + 24*n_text_layer)*256; // object overhead
733
 
734
  fprintf(stderr, "%s: ggml ctx size = %7.2f MB\n", __func__, ctx_size/(1024.0*1024.0));
 
2036
  static bool log_mel_spectrogram(
2037
  const float * samples,
2038
  const int n_samples,
2039
+ const int /*sample_rate*/,
2040
  const int fft_size,
2041
  const int fft_step,
2042
  const int n_mel,