Chenguang Li noemotiovon commited on
Commit
3ad7b0a
·
1 Parent(s): e1c1e73

CANN: RoPE operator optimization (llama/10563)

Browse files

* [cann] RoPE operator optimization

* [CANN]Code Formatting

---------

Co-authored-by: noemotiovon <[email protected]>

ggml/src/ggml-cann/aclnn_ops.cpp CHANGED
@@ -2965,7 +2965,7 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
2965
  aclTensor* acl_cos_repeat_tensor,
2966
  aclTensor* acl_sin_repeat_tensor,
2967
  float theta_scale, float freq_scale,
2968
- bool is_neox) {
2969
  // int sin/cos cache, cache has different repeat method depond on
2970
  // @param.is_neox
2971
 
@@ -3017,6 +3017,7 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
3017
  ggml_type_size(src2->type), arange_ne, arange_nb, GGML_MAX_DIMS);
3018
  aclnn_div_tensor(ctx, acl_theta_scale_tensor, acl_freq_factors_tensor,
3019
  nullptr, true);
 
3020
  }
3021
 
3022
  // position
@@ -3047,16 +3048,6 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
3047
  aclnn_mul(ctx, acl_position_tensor, acl_theta_scale_tensor,
3048
  acl_theta_tensor);
3049
 
3050
- // // power[] * position[] * freq_scale / freq_factors[]
3051
- // ggml_cann_pool_alloc theta_final_allocator(ctx.pool(),
3052
- // theta_length *
3053
- // sizeof(float_t));
3054
- // aclTensor* acl_theat_final_tensor = aclnn_zero(
3055
- // ctx, theta_final_allocator.get(), sizeof(float_t) * theta_length,
3056
- // theta_ne, GGML_MAX_DIMS, ACL_FLOAT, sizeof(float_t));
3057
- // aclnn_inplace_addcdiv(ctx, acl_theat_final_tensor, acl_theta_tensor,
3058
- // acl_freq_factors_tensor, freq_scale);
3059
-
3060
  // permute: [0,1,2,3]->[0,2,1,3]
3061
  int64_t permute_ne[] = {arange_length, 1, position_length, 1};
3062
  size_t permute_nb[GGML_MAX_DIMS];
@@ -3092,6 +3083,12 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
3092
  GGML_MAX_DIMS, ACL_FORMAT_ND);
3093
  aclnn_cos(ctx, acl_permute_tensor, acl_cos_tensor);
3094
 
 
 
 
 
 
 
3095
  // repeat
3096
  if (is_neox) {
3097
  int64_t repeatsArray[] = {1, 1, 1, 2};
@@ -3155,15 +3152,11 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
3155
  memcpy(&beta_fast, (int32_t*)dst->op_params + 9, sizeof(float));
3156
  memcpy(&beta_slow, (int32_t*)dst->op_params + 10, sizeof(float));
3157
 
3158
- // TODO: attn_factor != 1
3159
- GGML_ASSERT(attn_factor == 1);
3160
  // TODO: n_dims <= ne0
3161
  GGML_ASSERT(n_dims == ne0);
3162
  GGML_ASSERT(n_dims % 2 == 0);
3163
  // TODO: ext_factor != 0
3164
  GGML_ASSERT(ext_factor == 0);
3165
- // TODO: type == GGML_TYPE_F16
3166
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
3167
 
3168
  const float theta_scale = powf(freq_base, -2.0f / n_dims);
3169
 
@@ -3194,7 +3187,217 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
3194
  ggml_cann_create_tensor(cos_buffer, ACL_FLOAT, sizeof(float_t),
3195
  sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
3196
  aclnn_cache_init(ctx, dst, acl_cos_reshape_tensor, acl_sin_reshape_tensor,
3197
- theta_scale, freq_scale, is_neox);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3198
 
3199
  uint64_t workspaceSize = 0;
3200
  aclOpExecutor* executor;
@@ -3206,10 +3409,8 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
3206
  acl_mode = 1;
3207
  }
3208
 
3209
- aclTensor* acl_x = ggml_cann_create_tensor(src0);
3210
- aclTensor* acl_dst = ggml_cann_create_tensor(dst);
3211
  ACL_CHECK(aclnnRotaryPositionEmbeddingGetWorkspaceSize(
3212
- acl_x, acl_cos_reshape_tensor, acl_sin_reshape_tensor, acl_mode,
3213
  acl_dst, &workspaceSize, &executor));
3214
  if (workspaceSize > 0) {
3215
  ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
@@ -3219,7 +3420,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
3219
  ACL_CHECK(aclnnRotaryPositionEmbedding(workspaceAddr, workspaceSize,
3220
  executor, ctx.stream()));
3221
 
3222
- ACL_CHECK(aclDestroyTensor(acl_x));
3223
  ACL_CHECK(aclDestroyTensor(acl_cos_reshape_tensor));
3224
  ACL_CHECK(aclDestroyTensor(acl_sin_reshape_tensor));
3225
  ACL_CHECK(aclDestroyTensor(acl_dst));
 
2965
  aclTensor* acl_cos_repeat_tensor,
2966
  aclTensor* acl_sin_repeat_tensor,
2967
  float theta_scale, float freq_scale,
2968
+ float attn_factor, bool is_neox) {
2969
  // int sin/cos cache, cache has different repeat method depond on
2970
  // @param.is_neox
2971
 
 
3017
  ggml_type_size(src2->type), arange_ne, arange_nb, GGML_MAX_DIMS);
3018
  aclnn_div_tensor(ctx, acl_theta_scale_tensor, acl_freq_factors_tensor,
3019
  nullptr, true);
3020
+ ACL_CHECK(aclDestroyTensor(acl_freq_factors_tensor));
3021
  }
3022
 
3023
  // position
 
3048
  aclnn_mul(ctx, acl_position_tensor, acl_theta_scale_tensor,
3049
  acl_theta_tensor);
3050
 
 
 
 
 
 
 
 
 
 
 
3051
  // permute: [0,1,2,3]->[0,2,1,3]
3052
  int64_t permute_ne[] = {arange_length, 1, position_length, 1};
3053
  size_t permute_nb[GGML_MAX_DIMS];
 
3083
  GGML_MAX_DIMS, ACL_FORMAT_ND);
3084
  aclnn_cos(ctx, acl_permute_tensor, acl_cos_tensor);
3085
 
3086
+ // attn_factor
3087
+ if (attn_factor != 1) {
3088
+ aclnn_muls(ctx, acl_sin_tensor, attn_factor, nullptr, true);
3089
+ aclnn_muls(ctx, acl_cos_tensor, attn_factor, nullptr, true);
3090
+ }
3091
+
3092
  // repeat
3093
  if (is_neox) {
3094
  int64_t repeatsArray[] = {1, 1, 1, 2};
 
3152
  memcpy(&beta_fast, (int32_t*)dst->op_params + 9, sizeof(float));
3153
  memcpy(&beta_slow, (int32_t*)dst->op_params + 10, sizeof(float));
3154
 
 
 
3155
  // TODO: n_dims <= ne0
3156
  GGML_ASSERT(n_dims == ne0);
3157
  GGML_ASSERT(n_dims % 2 == 0);
3158
  // TODO: ext_factor != 0
3159
  GGML_ASSERT(ext_factor == 0);
 
 
3160
 
3161
  const float theta_scale = powf(freq_base, -2.0f / n_dims);
3162
 
 
3187
  ggml_cann_create_tensor(cos_buffer, ACL_FLOAT, sizeof(float_t),
3188
  sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
3189
  aclnn_cache_init(ctx, dst, acl_cos_reshape_tensor, acl_sin_reshape_tensor,
3190
+ theta_scale, freq_scale, attn_factor, is_neox);
3191
+
3192
+ aclTensor* acl_src = ggml_cann_create_tensor(src0);
3193
+ aclTensor* acl_dst = ggml_cann_create_tensor(dst);
3194
+
3195
+ #ifdef ASCEND_310P
3196
+ // Special ROPE operation for 310P
3197
+
3198
+ // roll input
3199
+ void* input_roll_buffer;
3200
+ aclTensor* acl_minus_one_tensor;
3201
+ void* minus_one_scale_buffer = nullptr;
3202
+ ggml_cann_pool_alloc roll_allocator(ctx.pool(), ggml_nbytes(src0));
3203
+ ggml_cann_pool_alloc minus_one_scale_allocator(
3204
+ ctx.pool(), sizeof(float_t) * src0->ne[0]);
3205
+ if (!is_neox) {
3206
+ // roll input: [q0,q1,q2,q3,...] -> [q1,q0,q3,q2,...]
3207
+ input_roll_buffer = roll_allocator.get();
3208
+ int64_t input_roll_ne[4] = {2, src0->ne[1] * (src0->ne[0] / 2),
3209
+ src0->ne[2], src0->ne[3]};
3210
+ size_t input_roll_nb[GGML_MAX_DIMS];
3211
+ input_roll_nb[0] = ggml_type_size(src0->type);
3212
+ for (int i = 1; i < GGML_MAX_DIMS; i++) {
3213
+ input_roll_nb[i] = input_roll_nb[i - 1] * input_roll_ne[i - 1];
3214
+ }
3215
+ aclTensor* acl_input_roll_tensor = ggml_cann_create_tensor(
3216
+ input_roll_buffer, ggml_cann_type_mapping(src0->type),
3217
+ ggml_type_size(src0->type), input_roll_ne, input_roll_nb,
3218
+ GGML_MAX_DIMS);
3219
+ aclTensor* acl_input_tensor = ggml_cann_create_tensor(
3220
+ src0->data, ggml_cann_type_mapping(src0->type),
3221
+ ggml_type_size(src0->type), input_roll_ne, input_roll_nb,
3222
+ GGML_MAX_DIMS);
3223
+
3224
+ int64_t shifts[] = {1};
3225
+ int64_t dims[] = {3};
3226
+ aclnn_roll(ctx, acl_input_tensor, acl_input_roll_tensor, shifts, dims);
3227
+ ACL_CHECK(aclDestroyTensor(acl_input_roll_tensor));
3228
+ ACL_CHECK(aclDestroyTensor(acl_input_tensor));
3229
+
3230
+ // init [-1, 1, -1, 1, ...]
3231
+ minus_one_scale_buffer = minus_one_scale_allocator.get();
3232
+
3233
+ int64_t minus_one_ne[4] = {src0->ne[0], 1, 1, 1};
3234
+ size_t minus_one_nb[GGML_MAX_DIMS];
3235
+ minus_one_nb[0] = sizeof(float_t);
3236
+ for (int i = 1; i < GGML_MAX_DIMS; i++) {
3237
+ minus_one_nb[i] = minus_one_nb[i - 1] * minus_one_ne[i - 1];
3238
+ }
3239
+ acl_minus_one_tensor = aclnn_values(
3240
+ ctx, minus_one_scale_buffer, sizeof(float_t) * src0->ne[0],
3241
+ minus_one_ne, GGML_MAX_DIMS, ACL_FLOAT, sizeof(float_t), 1);
3242
+ int64_t dim = 3;
3243
+ int64_t* index = new int64_t[src0->ne[0]];
3244
+ for (int i = 0; i < src0->ne[0]; i++) {
3245
+ index[i] = i / 2 * 2;
3246
+ }
3247
+ int64_t index_num = src0->ne[0];
3248
+ float value = -1;
3249
+ aclnn_index_fill_tensor(ctx, acl_minus_one_tensor, dim, index,
3250
+ index_num, value);
3251
+ } else {
3252
+ // roll input: [q0,q1,q2,...] ->
3253
+ // [q_half,q_half+1,...,q_end,q0,q1,...q_half-1]
3254
+ input_roll_buffer = roll_allocator.get();
3255
+ aclTensor* acl_input_roll_tensor = ggml_cann_create_tensor(
3256
+ input_roll_buffer, ggml_cann_type_mapping(src0->type),
3257
+ ggml_type_size(src0->type), src0->ne, src0->nb, GGML_MAX_DIMS);
3258
+ aclTensor* acl_input_tensor = ggml_cann_create_tensor(src0);
3259
+
3260
+ int64_t shifts[] = {src0->ne[0] / 2};
3261
+ int64_t dims[] = {3};
3262
+ aclnn_roll(ctx, acl_input_tensor, acl_input_roll_tensor, shifts, dims);
3263
+
3264
+ ACL_CHECK(aclDestroyTensor(acl_input_roll_tensor));
3265
+ ACL_CHECK(aclDestroyTensor(acl_input_tensor));
3266
+ // init [-1, -1, -1, 1, 1,1,...]
3267
+ minus_one_scale_buffer = minus_one_scale_allocator.get();
3268
+ int64_t minus_one_ne[4] = {src0->ne[0], 1, 1, 1};
3269
+ size_t minus_one_nb[GGML_MAX_DIMS];
3270
+ minus_one_nb[0] = sizeof(float_t);
3271
+ for (int i = 1; i < GGML_MAX_DIMS; i++) {
3272
+ minus_one_nb[i] = minus_one_nb[i - 1] * minus_one_ne[i - 1];
3273
+ }
3274
+ acl_minus_one_tensor = aclnn_values(
3275
+ ctx, minus_one_scale_buffer, sizeof(float_t) * src0->ne[0],
3276
+ minus_one_ne, GGML_MAX_DIMS, ACL_FLOAT, sizeof(float_t), 1);
3277
+ // -1 * first half
3278
+ int64_t first_half_ne[4] = {src0->ne[0] / 2, 1, 1, 1};
3279
+ size_t first_half_nb[GGML_MAX_DIMS];
3280
+ first_half_nb[0] = sizeof(float_t);
3281
+ for (int i = 1; i < GGML_MAX_DIMS; i++) {
3282
+ first_half_nb[i] = first_half_nb[i - 1] * first_half_ne[i - 1];
3283
+ }
3284
+ aclTensor* acl_first_half_tensor = ggml_cann_create_tensor(
3285
+ minus_one_scale_buffer, ACL_FLOAT, sizeof(float_t), first_half_ne,
3286
+ first_half_nb, GGML_MAX_DIMS);
3287
+ bool inplace = true;
3288
+ float scale = -1;
3289
+ aclnn_muls(ctx, acl_first_half_tensor, scale, nullptr, inplace);
3290
+ ACL_CHECK(aclDestroyTensor(acl_first_half_tensor));
3291
+ }
3292
+
3293
+ // TODO: n_dims < ne0
3294
+ GGML_ASSERT(n_dims == src0->ne[0]);
3295
+
3296
+ // input * scale
3297
+ ggml_cann_pool_alloc roll_mul_scale_allocator(ctx.pool(),
3298
+ ggml_nbytes(src0));
3299
+ void* input_roll_mul_scale_buffer = roll_mul_scale_allocator.get();
3300
+ size_t input_nb[GGML_MAX_DIMS];
3301
+ input_nb[0] = ggml_type_size(src0->type);
3302
+ for (int i = 1; i < GGML_MAX_DIMS; i++) {
3303
+ input_nb[i] = input_nb[i - 1] * src0->ne[i - 1];
3304
+ }
3305
+ aclTensor* acl_input_roll_mul_scale_tensor = ggml_cann_create_tensor(
3306
+ input_roll_mul_scale_buffer, ggml_cann_type_mapping(src0->type),
3307
+ ggml_type_size(src0->type), src0->ne, input_nb, GGML_MAX_DIMS);
3308
+ aclTensor* acl_input_roll_reshape_tensor = ggml_cann_create_tensor(
3309
+ input_roll_buffer, ggml_cann_type_mapping(src0->type),
3310
+ ggml_type_size(src0->type), src0->ne, input_nb, GGML_MAX_DIMS);
3311
+
3312
+ aclnn_mul(ctx, acl_input_roll_reshape_tensor, acl_minus_one_tensor,
3313
+ acl_input_roll_mul_scale_tensor);
3314
+
3315
+ // output
3316
+ void* output_fp32_buffer;
3317
+ if (src0->type == GGML_TYPE_F32) {
3318
+ aclnn_inplace_mul(ctx, acl_src, acl_cos_reshape_tensor);
3319
+ aclnn_inplace_mul(ctx, acl_input_roll_mul_scale_tensor,
3320
+ acl_sin_reshape_tensor);
3321
+ aclnn_add(ctx, acl_src, acl_input_roll_mul_scale_tensor, acl_dst);
3322
+ // TODO: ne0 != n_dims in mode2
3323
+ } else if (src0->type == GGML_TYPE_F16) {
3324
+ size_t input_fp32_nb[GGML_MAX_DIMS];
3325
+ input_fp32_nb[0] = sizeof(float_t);
3326
+ for (int i = 1; i < GGML_MAX_DIMS; i++) {
3327
+ input_fp32_nb[i] = input_fp32_nb[i - 1] * dst->ne[i - 1];
3328
+ }
3329
+ ggml_cann_pool_alloc fp32_allocator1(
3330
+ ctx.pool(), ggml_nelements(dst) * sizeof(float_t));
3331
+ void* input_fp32_buffer1 = fp32_allocator1.get();
3332
+ aclTensor* input_fp32_tensor1 = ggml_cann_create_tensor(
3333
+ input_fp32_buffer1, ACL_FLOAT, sizeof(float_t), dst->ne,
3334
+ input_fp32_nb, GGML_MAX_DIMS);
3335
+ ggml_cann_pool_alloc fp32_allocator2(
3336
+ ctx.pool(), ggml_nelements(dst) * sizeof(float_t));
3337
+ void* input_fp32_buffer2 = fp32_allocator2.get();
3338
+ aclTensor* input_fp32_tensor2 = ggml_cann_create_tensor(
3339
+ input_fp32_buffer2, ACL_FLOAT, sizeof(float_t), dst->ne,
3340
+ input_fp32_nb, GGML_MAX_DIMS);
3341
+
3342
+ ggml_cann_pool_alloc fp32_allocator(
3343
+ ctx.pool(), ggml_nelements(dst) * sizeof(float_t));
3344
+ output_fp32_buffer = fp32_allocator.get();
3345
+ aclTensor* output_fp32_tensor = ggml_cann_create_tensor(
3346
+ output_fp32_buffer, ACL_FLOAT, sizeof(float_t), dst->ne,
3347
+ input_fp32_nb, GGML_MAX_DIMS);
3348
+ aclnn_mul(ctx, acl_src, acl_cos_reshape_tensor, input_fp32_tensor1);
3349
+ aclnn_mul(ctx, acl_input_roll_mul_scale_tensor, acl_sin_reshape_tensor,
3350
+ input_fp32_tensor2);
3351
+ aclnn_add(ctx, input_fp32_tensor1, input_fp32_tensor2,
3352
+ output_fp32_tensor);
3353
+ aclnn_cast(ctx, output_fp32_tensor, acl_dst, ACL_FLOAT16);
3354
+
3355
+ ACL_CHECK(aclDestroyTensor(input_fp32_tensor1));
3356
+ ACL_CHECK(aclDestroyTensor(input_fp32_tensor2));
3357
+ ACL_CHECK(aclDestroyTensor(output_fp32_tensor));
3358
+ ACL_CHECK(aclDestroyTensor(acl_sin_reshape_tensor));
3359
+ ACL_CHECK(aclDestroyTensor(acl_minus_one_tensor));
3360
+ ACL_CHECK(aclDestroyTensor(acl_input_roll_mul_scale_tensor));
3361
+ ACL_CHECK(aclDestroyTensor(acl_input_roll_reshape_tensor));
3362
+ ACL_CHECK(aclDestroyTensor(acl_src));
3363
+ }
3364
+ return;
3365
+ #endif
3366
+
3367
+ // src0 == GGML_TYPE_F16
3368
+ // TODO: optimization this `if` code
3369
+ if (src0->type == GGML_TYPE_F16) {
3370
+ ggml_cann_pool_alloc sin_final_allocator(
3371
+ ctx.pool(), src0->ne[0] * src0->ne[2] * ggml_type_size(src0->type));
3372
+ ggml_cann_pool_alloc cos_final_allocator(
3373
+ ctx.pool(), src0->ne[0] * src0->ne[2] * ggml_type_size(src0->type));
3374
+ void* sin_final_buffer = sin_final_allocator.get();
3375
+ void* cos_final_buffer = cos_final_allocator.get();
3376
+
3377
+ int64_t sin_final_ne[4] = {src0->ne[0], 1, src0->ne[2], 1};
3378
+ size_t sin_final_nb[GGML_MAX_DIMS];
3379
+ sin_final_nb[0] = ggml_type_size(src0->type);
3380
+ for (int i = 1; i < GGML_MAX_DIMS; i++) {
3381
+ sin_final_nb[i] = sin_final_nb[i - 1] * sin_final_ne[i - 1];
3382
+ }
3383
+ aclTensor* acl_sin_final_tensor = ggml_cann_create_tensor(
3384
+ sin_final_buffer, ggml_cann_type_mapping(src0->type),
3385
+ ggml_type_size(src0->type), sin_final_ne, sin_final_nb,
3386
+ GGML_MAX_DIMS);
3387
+ aclTensor* acl_cos_final_tensor = ggml_cann_create_tensor(
3388
+ cos_final_buffer, ggml_cann_type_mapping(src0->type),
3389
+ ggml_type_size(src0->type), sin_final_ne, sin_final_nb,
3390
+ GGML_MAX_DIMS);
3391
+
3392
+ aclnn_cast(ctx, acl_sin_reshape_tensor, acl_sin_final_tensor,
3393
+ ggml_cann_type_mapping(src0->type));
3394
+ aclnn_cast(ctx, acl_cos_reshape_tensor, acl_cos_final_tensor,
3395
+ ggml_cann_type_mapping(src0->type));
3396
+ ACL_CHECK(aclDestroyTensor(acl_cos_reshape_tensor));
3397
+ ACL_CHECK(aclDestroyTensor(acl_sin_reshape_tensor));
3398
+ acl_sin_reshape_tensor = acl_sin_final_tensor;
3399
+ acl_cos_reshape_tensor = acl_cos_final_tensor;
3400
+ }
3401
 
3402
  uint64_t workspaceSize = 0;
3403
  aclOpExecutor* executor;
 
3409
  acl_mode = 1;
3410
  }
3411
 
 
 
3412
  ACL_CHECK(aclnnRotaryPositionEmbeddingGetWorkspaceSize(
3413
+ acl_src, acl_cos_reshape_tensor, acl_sin_reshape_tensor, acl_mode,
3414
  acl_dst, &workspaceSize, &executor));
3415
  if (workspaceSize > 0) {
3416
  ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
 
3420
  ACL_CHECK(aclnnRotaryPositionEmbedding(workspaceAddr, workspaceSize,
3421
  executor, ctx.stream()));
3422
 
3423
+ ACL_CHECK(aclDestroyTensor(acl_src));
3424
  ACL_CHECK(aclDestroyTensor(acl_cos_reshape_tensor));
3425
  ACL_CHECK(aclDestroyTensor(acl_sin_reshape_tensor));
3426
  ACL_CHECK(aclDestroyTensor(acl_dst));
ggml/src/ggml-cann/ggml-cann.cpp CHANGED
@@ -1739,7 +1739,6 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
1739
  case GGML_OP_ROPE: {
1740
  // TODO: with ops-test v == 1
1741
  float * ext_factor = (float*)((int32_t*)op->op_params + 7);
1742
- float * attn_factor = (float*)((int32_t*)op->op_params + 8);
1743
  // TODO: n_dims <= ne0
1744
  if (op->src[0]->ne[0] != op->op_params[1]) {
1745
  return false;
@@ -1748,17 +1747,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
1748
  if (*ext_factor != 0) {
1749
  return false;
1750
  }
1751
- // TODO: attn_factor != 1
1752
- if (*attn_factor != 1) {
1753
- return false;
1754
- }
1755
- //TODO: type == GGML_TYPE_F16
1756
- switch (op->src[0]->type) {
1757
- case GGML_TYPE_F32:
1758
- return true;
1759
- default:
1760
- return false;
1761
- }
1762
  }
1763
  case GGML_OP_UPSCALE: {
1764
  // aclnnUpsampleNearest2dGetWorkspaceSize not support
 
1739
  case GGML_OP_ROPE: {
1740
  // TODO: with ops-test v == 1
1741
  float * ext_factor = (float*)((int32_t*)op->op_params + 7);
 
1742
  // TODO: n_dims <= ne0
1743
  if (op->src[0]->ne[0] != op->op_params[1]) {
1744
  return false;
 
1747
  if (*ext_factor != 0) {
1748
  return false;
1749
  }
1750
+ return true;
 
 
 
 
 
 
 
 
 
 
1751
  }
1752
  case GGML_OP_UPSCALE: {
1753
  // aclnnUpsampleNearest2dGetWorkspaceSize not support