Andy Maloney ggerganov commited on
Commit
142f526
·
unverified ·
1 Parent(s): 71de15e

minor : small code cleanups (#302)

Browse files

* Small code cleanups

- fix indentation
- remove extra semicolons
- remove extra break after returns in case statements
- remove unnecessary call to .data() on string
- use empty() instead of checking size()
- no need to check for nullptr before free
- remove unnecessary initialization of string to ""

* minor : switch case always break

Co-authored-by: Georgi Gerganov <[email protected]>

Files changed (2) hide show
  1. ggml.c +8 -8
  2. whisper.cpp +7 -7
ggml.c CHANGED
@@ -1134,10 +1134,10 @@ inline static void ggml_vec_scale_f32(const int n, float * y, const float v) {
1134
  y2 = _mm256_loadu_ps(y + i + 16);
1135
  y3 = _mm256_loadu_ps(y + i + 24);
1136
 
1137
- y0 = _mm256_mul_ps(y0, v4);
1138
- y1 = _mm256_mul_ps(y1, v4);
1139
- y2 = _mm256_mul_ps(y2, v4);
1140
- y3 = _mm256_mul_ps(y3, v4);
1141
 
1142
  _mm256_storeu_ps(y + i + 0, y0);
1143
  _mm256_storeu_ps(y + i + 8, y1);
@@ -1475,7 +1475,7 @@ bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
1475
  return
1476
  tensor->nb[0] == GGML_TYPE_SIZE[tensor->type] &&
1477
  tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
1478
- tensor->nb[3] == tensor->nb[2]*tensor->ne[2];;
1479
  }
1480
 
1481
  bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
@@ -6624,7 +6624,7 @@ void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tenso
6624
  {
6625
  GGML_ASSERT(false);
6626
  } break;
6627
- };
6628
  }
6629
 
6630
  ////////////////////////////////////////////////////////////////////////////////
@@ -6870,7 +6870,7 @@ void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tenso
6870
  {
6871
  GGML_ASSERT(false);
6872
  } break;
6873
- };
6874
  }
6875
 
6876
  void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
@@ -7339,7 +7339,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
7339
  {
7340
  assert(false);
7341
  } break;
7342
- };
7343
  }
7344
 
7345
  if (cgraph->work != NULL && work_size > cgraph->work_size) {
 
1134
  y2 = _mm256_loadu_ps(y + i + 16);
1135
  y3 = _mm256_loadu_ps(y + i + 24);
1136
 
1137
+ y0 = _mm256_mul_ps(y0, v4);
1138
+ y1 = _mm256_mul_ps(y1, v4);
1139
+ y2 = _mm256_mul_ps(y2, v4);
1140
+ y3 = _mm256_mul_ps(y3, v4);
1141
 
1142
  _mm256_storeu_ps(y + i + 0, y0);
1143
  _mm256_storeu_ps(y + i + 8, y1);
 
1475
  return
1476
  tensor->nb[0] == GGML_TYPE_SIZE[tensor->type] &&
1477
  tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
1478
+ tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
1479
  }
1480
 
1481
  bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
 
6624
  {
6625
  GGML_ASSERT(false);
6626
  } break;
6627
+ }
6628
  }
6629
 
6630
  ////////////////////////////////////////////////////////////////////////////////
 
6870
  {
6871
  GGML_ASSERT(false);
6872
  } break;
6873
+ }
6874
  }
6875
 
6876
  void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
 
7339
  {
7340
  assert(false);
7341
  } break;
7342
+ }
7343
  }
7344
 
7345
  if (cgraph->work != NULL && work_size > cgraph->work_size) {
whisper.cpp CHANGED
@@ -1024,7 +1024,7 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
1024
  fin.read( &tmp[0], tmp.size() ); // read to buffer
1025
  name.assign(&tmp[0], tmp.size());
1026
 
1027
- if (model.tensors.find(name.data()) == model.tensors.end()) {
1028
  fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, name.data());
1029
  return false;
1030
  }
@@ -2187,7 +2187,7 @@ static std::vector<whisper_vocab::id> tokenize(const whisper_vocab & vocab, cons
2187
  // find the longest tokens that form the words:
2188
  std::vector<whisper_vocab::id> tokens;
2189
  for (const auto & word : words) {
2190
- if (word.size() == 0) continue;
2191
 
2192
  int i = 0;
2193
  int n = word.size();
@@ -2868,7 +2868,7 @@ int whisper_full(
2868
  prompt.clear();
2869
 
2870
  // if we have already generated some text, use it as a prompt to condition the next generation
2871
- if (prompt_past.size() > 0) {
2872
  int n_take = std::min(std::min(params.n_max_text_ctx, whisper_n_text_ctx(ctx)/2), int(prompt_past.size()));
2873
 
2874
  prompt = { whisper_token_prev(ctx) };
@@ -2979,7 +2979,7 @@ int whisper_full(
2979
  if (failed) {
2980
  // when we fail to sample timestamp token, retry by clearing the past prompt
2981
  // if it fails again, then we advance the window by 1 second
2982
- if (prompt_past.size() > 0) {
2983
  prompt_past.clear();
2984
  } else {
2985
  fprintf(stderr, "\n%s: failed to generate timestamp token - skipping one second\n\n", __func__);
@@ -2996,11 +2996,11 @@ int whisper_full(
2996
  }
2997
 
2998
  // store the text from this iteration
2999
- if (tokens_cur.size() > 0) {
3000
  int i0 = 0;
3001
  auto t0 = seek + 2*(tokens_cur.front().tid - whisper_token_beg(ctx));
3002
 
3003
- std::string text = "";
3004
 
3005
  for (int i = 0; i < (int) tokens_cur.size(); i++) {
3006
  //printf("%s: %18s %6.3f %18s %6.3f\n", __func__,
@@ -3207,7 +3207,7 @@ int whisper_full_parallel(
3207
  results_i[j].t1 += 100*((i + 1)*n_samples_per_processor)/WHISPER_SAMPLE_RATE + offset_t;
3208
 
3209
  // make sure that segments are not overlapping
3210
- if (ctx->result_all.size() > 0) {
3211
  results_i[j].t0 = std::max(results_i[j].t0, ctx->result_all.back().t1);
3212
  }
3213
 
 
1024
  fin.read( &tmp[0], tmp.size() ); // read to buffer
1025
  name.assign(&tmp[0], tmp.size());
1026
 
1027
+ if (model.tensors.find(name) == model.tensors.end()) {
1028
  fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, name.data());
1029
  return false;
1030
  }
 
2187
  // find the longest tokens that form the words:
2188
  std::vector<whisper_vocab::id> tokens;
2189
  for (const auto & word : words) {
2190
+ if (word.empty()) continue;
2191
 
2192
  int i = 0;
2193
  int n = word.size();
 
2868
  prompt.clear();
2869
 
2870
  // if we have already generated some text, use it as a prompt to condition the next generation
2871
+ if (!prompt_past.empty()) {
2872
  int n_take = std::min(std::min(params.n_max_text_ctx, whisper_n_text_ctx(ctx)/2), int(prompt_past.size()));
2873
 
2874
  prompt = { whisper_token_prev(ctx) };
 
2979
  if (failed) {
2980
  // when we fail to sample timestamp token, retry by clearing the past prompt
2981
  // if it fails again, then we advance the window by 1 second
2982
+ if (!prompt_past.empty()) {
2983
  prompt_past.clear();
2984
  } else {
2985
  fprintf(stderr, "\n%s: failed to generate timestamp token - skipping one second\n\n", __func__);
 
2996
  }
2997
 
2998
  // store the text from this iteration
2999
+ if (!tokens_cur.empty()) {
3000
  int i0 = 0;
3001
  auto t0 = seek + 2*(tokens_cur.front().tid - whisper_token_beg(ctx));
3002
 
3003
+ std::string text;
3004
 
3005
  for (int i = 0; i < (int) tokens_cur.size(); i++) {
3006
  //printf("%s: %18s %6.3f %18s %6.3f\n", __func__,
 
3207
  results_i[j].t1 += 100*((i + 1)*n_samples_per_processor)/WHISPER_SAMPLE_RATE + offset_t;
3208
 
3209
  // make sure that segments are not overlapping
3210
+ if (!ctx->result_all.empty()) {
3211
  results_i[j].t0 = std::max(results_i[j].t0, ctx->result_all.back().t1);
3212
  }
3213