Spaces:

natasa365
/

whisper.cpp

Sleeping

ggerganov commited on May 22, 2024

Commit

215ce5c

1 Parent(s): ef68527

cuda : fix rope + add tests (llama/7452)

* cuda : fix rope pos data

ggml-ci

* ggml : drop mode & 1 == 1 support for ggml_rope

ggml-ci

* ggml : support freq_factors for f16 rope (CPU)

ggml-ci

* tests : add rope tests using frequency factors

ggml-ci

Files changed (3) hide show

ggml-cuda/rope.cu +2 -2
ggml.c +18 -2
ggml.h +1 -1

ggml-cuda/rope.cu CHANGED Viewed

@@ -283,9 +283,9 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     const bool is_neox = mode & 2;
     const bool is_glm  = mode & 4;
-    if (is_neox) {
-        pos = (const int32_t *) src1_d;
         if (src2 != nullptr) {
             freq_factors = (const float *) src2->data;
         }

     const bool is_neox = mode & 2;
     const bool is_glm  = mode & 4;
+    pos = (const int32_t *) src1_d;
+    if (is_neox) {
         if (src2 != nullptr) {
             freq_factors = (const float *) src2->data;
         }

ggml.c CHANGED Viewed

@@ -6245,6 +6245,8 @@ static struct ggml_tensor * ggml_rope_impl(
         float                 xpos_base,
         bool                  xpos_down,
         bool                  inplace) {
     GGML_ASSERT(ggml_is_vector(b));
     GGML_ASSERT(b->type == GGML_TYPE_I32);
     GGML_ASSERT(a->ne[2] == b->ne[0]);
@@ -14413,7 +14415,7 @@ static void ggml_compute_forward_rope_f32(
             freq_factors = (const float *) src2->data;
         }
     } else {
-        GGML_ASSERT(src2 == NULL && "TODO: freq_factors not implemented for mode 1");
     }
     // backward process uses inverse rotation by cos and sin.
@@ -14529,6 +14531,7 @@ static void ggml_compute_forward_rope_f32(
     }
 }
 static void ggml_compute_forward_rope_f16(
         const struct ggml_compute_params * params,
         struct ggml_tensor * dst,
@@ -14536,6 +14539,7 @@ static void ggml_compute_forward_rope_f16(
     const struct ggml_tensor * src0 = dst->src[0];
     const struct ggml_tensor * src1 = dst->src[1];
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         return;
@@ -14588,6 +14592,17 @@ static void ggml_compute_forward_rope_f16(
     const bool is_neox = mode & 2;
     const bool is_glm  = mode & 4;
     // backward process uses inverse rotation by cos and sin.
     // cos and sin build a rotation matrix, where the inverse is the transpose.
     // this essentially just switches the sign of sin.
@@ -14660,10 +14675,11 @@ static void ggml_compute_forward_rope_f16(
                             // simplified from `(ib * n_dims + ic) * inv_ndims`
                             float cur_rot = inv_ndims * ic - ib;
                             float cos_theta, sin_theta;
                             rope_yarn(
-                                theta_base, freq_scale, corr_dims, cur_rot, ext_factor, attn_factor,
                                 &cos_theta, &sin_theta
                             );
                             sin_theta *= sin_sign;

         float                 xpos_base,
         bool                  xpos_down,
         bool                  inplace) {
+    GGML_ASSERT((mode & 1) == 0 && "mode & 1 == 1 is no longer supported");
     GGML_ASSERT(ggml_is_vector(b));
     GGML_ASSERT(b->type == GGML_TYPE_I32);
     GGML_ASSERT(a->ne[2] == b->ne[0]);
             freq_factors = (const float *) src2->data;
         }
     } else {
+        GGML_ASSERT(src2 == NULL && "TODO: freq_factors not implemented for !is_neox");
     }
     // backward process uses inverse rotation by cos and sin.
     }
 }
+// TODO: deduplicate f16/f32 code
 static void ggml_compute_forward_rope_f16(
         const struct ggml_compute_params * params,
         struct ggml_tensor * dst,
     const struct ggml_tensor * src0 = dst->src[0];
     const struct ggml_tensor * src1 = dst->src[1];
+    const struct ggml_tensor * src2 = dst->src[2];
     if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
         return;
     const bool is_neox = mode & 2;
     const bool is_glm  = mode & 4;
+    const float * freq_factors = NULL;
+    if (is_neox) {
+        if (src2 != NULL) {
+            GGML_ASSERT(src2->type == GGML_TYPE_F32);
+            GGML_ASSERT(src2->ne[0] >= n_dims / 2);
+            freq_factors = (const float *) src2->data;
+        }
+    } else {
+        GGML_ASSERT(src2 == NULL && "TODO: freq_factors not implemented for !is_neox");
+    }
     // backward process uses inverse rotation by cos and sin.
     // cos and sin build a rotation matrix, where the inverse is the transpose.
     // this essentially just switches the sign of sin.
                             // simplified from `(ib * n_dims + ic) * inv_ndims`
                             float cur_rot = inv_ndims * ic - ib;
+                            float freq_factor = freq_factors ? freq_factors[ic/2] : 1.0f;
                             float cos_theta, sin_theta;
                             rope_yarn(
+                                theta_base/freq_factor, freq_scale, corr_dims, cur_rot, ext_factor, attn_factor,
                                 &cos_theta, &sin_theta
                             );
                             sin_theta *= sin_sign;

ggml.h CHANGED Viewed

@@ -1460,7 +1460,7 @@ extern "C" {
             struct ggml_tensor  * b);
     // rotary position embedding
-    // if mode & 1 == 1, skip n_past elements (DEPRECATED)
     // if mode & 2 == 1, GPT-NeoX style
     // if mode & 4 == 1, ChatGLM style
     //

             struct ggml_tensor  * b);
     // rotary position embedding
+    // if mode & 1 == 1, skip n_past elements (NOT SUPPORTED)
     // if mode & 2 == 1, GPT-NeoX style
     // if mode & 4 == 1, ChatGLM style
     //