whisper.cpp

Running

App Files Files Community

Andy Maloney

ggerganov commited on Dec 22, 2022

Commit

142f526

unverified ·

1 Parent(s): 71de15e

minor : small code cleanups (#302)

Browse files

* Small code cleanups

- fix indentation
- remove extra semicolons
- remove extra break after returns in case statements
- remove unnecessary call to .data() on string
- use empty() instead of checking size()
- no need to check for nullptr before free
- remove unnecessary initialization of string to ""

* minor : switch case always break

Co-authored-by: Georgi Gerganov <[email protected]>

Files changed (2) hide show

ggml.c +8 -8
whisper.cpp +7 -7

ggml.c CHANGED Viewed

@@ -1134,10 +1134,10 @@ inline static void ggml_vec_scale_f32(const int n, float * y, const float   v) {
         y2 = _mm256_loadu_ps(y + i + 16);
         y3 = _mm256_loadu_ps(y + i + 24);
-	y0 = _mm256_mul_ps(y0, v4);
-	y1 = _mm256_mul_ps(y1, v4);
-	y2 = _mm256_mul_ps(y2, v4);
-	y3 = _mm256_mul_ps(y3, v4);
         _mm256_storeu_ps(y + i + 0, y0);
         _mm256_storeu_ps(y + i + 8, y1);
@@ -1475,7 +1475,7 @@ bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
     return
         tensor->nb[0] == GGML_TYPE_SIZE[tensor->type] &&
         tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
-        tensor->nb[3] == tensor->nb[2]*tensor->ne[2];;
 }
 bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
@@ -6624,7 +6624,7 @@ void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tenso
             {
                 GGML_ASSERT(false);
             } break;
-    };
 }
 ////////////////////////////////////////////////////////////////////////////////
@@ -6870,7 +6870,7 @@ void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tenso
             {
                 GGML_ASSERT(false);
             } break;
-    };
 }
 void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
@@ -7339,7 +7339,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
                     {
                         assert(false);
                     } break;
-            };
         }
         if (cgraph->work != NULL && work_size > cgraph->work_size) {

         y2 = _mm256_loadu_ps(y + i + 16);
         y3 = _mm256_loadu_ps(y + i + 24);
+        y0 = _mm256_mul_ps(y0, v4);
+        y1 = _mm256_mul_ps(y1, v4);
+        y2 = _mm256_mul_ps(y2, v4);
+        y3 = _mm256_mul_ps(y3, v4);
         _mm256_storeu_ps(y + i + 0, y0);
         _mm256_storeu_ps(y + i + 8, y1);
     return
         tensor->nb[0] == GGML_TYPE_SIZE[tensor->type] &&
         tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
+        tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
 }
 bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
             {
                 GGML_ASSERT(false);
             } break;
+    }
 }
 ////////////////////////////////////////////////////////////////////////////////
             {
                 GGML_ASSERT(false);
             } break;
+    }
 }
 void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
                     {
                         assert(false);
                     } break;
+            }
         }
         if (cgraph->work != NULL && work_size > cgraph->work_size) {

whisper.cpp CHANGED Viewed

@@ -1024,7 +1024,7 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
             fin.read( &tmp[0], tmp.size() ); // read to buffer
             name.assign(&tmp[0], tmp.size());
-            if (model.tensors.find(name.data()) == model.tensors.end()) {
                 fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, name.data());
                 return false;
             }
@@ -2187,7 +2187,7 @@ static std::vector<whisper_vocab::id> tokenize(const whisper_vocab & vocab, cons
     // find the longest tokens that form the words:
     std::vector<whisper_vocab::id> tokens;
     for (const auto & word : words) {
-        if (word.size() == 0) continue;
         int i = 0;
         int n = word.size();
@@ -2868,7 +2868,7 @@ int whisper_full(
         prompt.clear();
         // if we have already generated some text, use it as a prompt to condition the next generation
-        if (prompt_past.size() > 0) {
             int n_take = std::min(std::min(params.n_max_text_ctx, whisper_n_text_ctx(ctx)/2), int(prompt_past.size()));
             prompt = { whisper_token_prev(ctx) };
@@ -2979,7 +2979,7 @@ int whisper_full(
         if (failed) {
             // when we fail to sample timestamp token, retry by clearing the past prompt
             // if it fails again, then we advance the window by 1 second
-            if (prompt_past.size() > 0) {
                 prompt_past.clear();
             } else {
                 fprintf(stderr, "\n%s: failed to generate timestamp token - skipping one second\n\n", __func__);
@@ -2996,11 +2996,11 @@ int whisper_full(
         }
         // store the text from this iteration
-        if (tokens_cur.size() > 0) {
             int  i0 = 0;
             auto t0 = seek + 2*(tokens_cur.front().tid - whisper_token_beg(ctx));
-            std::string text = "";
             for (int i = 0; i < (int) tokens_cur.size(); i++) {
                 //printf("%s: %18s %6.3f %18s %6.3f\n", __func__,
@@ -3207,7 +3207,7 @@ int whisper_full_parallel(
             results_i[j].t1 += 100*((i + 1)*n_samples_per_processor)/WHISPER_SAMPLE_RATE + offset_t;
             // make sure that segments are not overlapping
-            if (ctx->result_all.size() > 0) {
                 results_i[j].t0 = std::max(results_i[j].t0, ctx->result_all.back().t1);
             }

             fin.read( &tmp[0], tmp.size() ); // read to buffer
             name.assign(&tmp[0], tmp.size());
+            if (model.tensors.find(name) == model.tensors.end()) {
                 fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, name.data());
                 return false;
             }
     // find the longest tokens that form the words:
     std::vector<whisper_vocab::id> tokens;
     for (const auto & word : words) {
+        if (word.empty()) continue;
         int i = 0;
         int n = word.size();
         prompt.clear();
         // if we have already generated some text, use it as a prompt to condition the next generation
+        if (!prompt_past.empty()) {
             int n_take = std::min(std::min(params.n_max_text_ctx, whisper_n_text_ctx(ctx)/2), int(prompt_past.size()));
             prompt = { whisper_token_prev(ctx) };
         if (failed) {
             // when we fail to sample timestamp token, retry by clearing the past prompt
             // if it fails again, then we advance the window by 1 second
+            if (!prompt_past.empty()) {
                 prompt_past.clear();
             } else {
                 fprintf(stderr, "\n%s: failed to generate timestamp token - skipping one second\n\n", __func__);
         }
         // store the text from this iteration
+        if (!tokens_cur.empty()) {
             int  i0 = 0;
             auto t0 = seek + 2*(tokens_cur.front().tid - whisper_token_beg(ctx));
+            std::string text;
             for (int i = 0; i < (int) tokens_cur.size(); i++) {
                 //printf("%s: %18s %6.3f %18s %6.3f\n", __func__,
             results_i[j].t1 += 100*((i + 1)*n_samples_per_processor)/WHISPER_SAMPLE_RATE + offset_t;
             // make sure that segments are not overlapping
+            if (!ctx->result_all.empty()) {
                 results_i[j].t0 = std::max(results_i[j].t0, ctx->result_all.back().t1);
             }