Chenguang Li commited on
Commit
14d0d7c
·
1 Parent(s): f1f88b8

CANN: Add 310P operator support check (llama/12962)

Browse files
ggml/src/ggml-cann/aclnn_ops.cpp CHANGED
@@ -625,6 +625,10 @@ static void ggml_cann_avg_pool2d(ggml_backend_cann_context& ctx,
625
  bool count_include_pad = true;
626
  int64_t divisor_override = 0;
627
  int8_t cube_math_type = 0;
 
 
 
 
628
  GGML_CANN_CALL_ACLNN_OP(AvgPool2d, acl_src, kernel_size, strides, paddings_avg,
629
  ceil_mode, count_include_pad, divisor_override,
630
  cube_math_type, acl_dst);
@@ -2590,6 +2594,10 @@ void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* ds
2590
  int64_t groups = 1;
2591
  int8_t cubeMathType = 0;
2592
 
 
 
 
 
2593
  GGML_CANN_CALL_ACLNN_OP(Convolution, acl_input, acl_weight, nullptr, stride,
2594
  padding, dilation, transposed, padding, groups, acl_dst, cubeMathType);
2595
 
 
625
  bool count_include_pad = true;
626
  int64_t divisor_override = 0;
627
  int8_t cube_math_type = 0;
628
+ #ifdef ASCEND_310P
629
+ cube_math_type = 1;
630
+ #endif
631
+
632
  GGML_CANN_CALL_ACLNN_OP(AvgPool2d, acl_src, kernel_size, strides, paddings_avg,
633
  ceil_mode, count_include_pad, divisor_override,
634
  cube_math_type, acl_dst);
 
2594
  int64_t groups = 1;
2595
  int8_t cubeMathType = 0;
2596
 
2597
+ #ifdef ASCEND_310P
2598
+ cubeMathType = 1;
2599
+ #endif
2600
+
2601
  GGML_CANN_CALL_ACLNN_OP(Convolution, acl_input, acl_weight, nullptr, stride,
2602
  padding, dilation, transposed, padding, groups, acl_dst, cubeMathType);
2603
 
ggml/src/ggml-cann/ggml-cann.cpp CHANGED
@@ -2022,6 +2022,10 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
2022
  return true;
2023
  case GGML_TYPE_Q8_0:
2024
  case GGML_TYPE_Q4_0:
 
 
 
 
2025
  // only support contiguous for quantized types.
2026
  return ggml_is_contiguous(op->src[0]) &&
2027
  ggml_is_contiguous(op->src[1]);
@@ -2107,6 +2111,12 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
2107
  }
2108
  case GGML_OP_POOL_2D: {
2109
  const int32_t * opts = (const int32_t *) op->op_params;
 
 
 
 
 
 
2110
  const int k0 = opts[1];
2111
  const int k1 = opts[2];
2112
  const int p0 = opts[5];
 
2022
  return true;
2023
  case GGML_TYPE_Q8_0:
2024
  case GGML_TYPE_Q4_0:
2025
+ #ifdef ASCEND_310P
2026
+ // Q4 && Q8 per group is not suppor on 310p device
2027
+ return false;
2028
+ #endif
2029
  // only support contiguous for quantized types.
2030
  return ggml_is_contiguous(op->src[0]) &&
2031
  ggml_is_contiguous(op->src[1]);
 
2111
  }
2112
  case GGML_OP_POOL_2D: {
2113
  const int32_t * opts = (const int32_t *) op->op_params;
2114
+ #ifdef ASCEND_310P
2115
+ enum ggml_op_pool opt = static_cast<ggml_op_pool>(opts[0]);
2116
+ if(opt == GGML_OP_POOL_MAX){
2117
+ return false;
2118
+ }
2119
+ #endif
2120
  const int k0 = opts[1];
2121
  const int k1 = opts[2];
2122
  const int p0 = opts[5];