hipudding commited on
Commit
7d5f3d4
·
1 Parent(s): 3847456

CANN: Fix failed test cases (llama/12708)

Browse files

* CANN: Fix memory waste in aclnn_tensor

* CANN: fix backend ops fail

* CANN: fix acl_tensor memory alloc.

* CANN: format

* CANN: remove trailing whitespace

ggml/src/ggml-cann/acl_tensor.cpp CHANGED
@@ -54,9 +54,7 @@ aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne,
54
  // added.
55
  int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2];
56
 
57
- int64_t acl_storage_len = 0;
58
  if (ne == nullptr) {
59
- acl_storage_len = ggml_nbytes(tensor);
60
  for (int i = 0; i < GGML_MAX_DIMS; i++) {
61
  acl_ne[i] = tensor->ne[i];
62
  // The step size of acl is in elements.
@@ -65,14 +63,18 @@ aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne,
65
  } else {
66
  // With bcast
67
  for (int i = 0; i < dims; i++) {
68
- acl_storage_len += (ne[i] - 1) * nb[i];
69
  acl_ne[i] = ne[i];
70
  acl_stride[i] = nb[i] / ggml_element_size(tensor);
71
  }
72
  }
73
 
74
- // Reverse ne and stride.
75
  int64_t final_dims = (dims == 0 ? GGML_MAX_DIMS : dims);
 
 
 
 
 
 
76
  std::reverse(acl_ne, acl_ne + final_dims);
77
  std::reverse(acl_stride, acl_stride + final_dims);
78
 
 
54
  // added.
55
  int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2];
56
 
 
57
  if (ne == nullptr) {
 
58
  for (int i = 0; i < GGML_MAX_DIMS; i++) {
59
  acl_ne[i] = tensor->ne[i];
60
  // The step size of acl is in elements.
 
63
  } else {
64
  // With bcast
65
  for (int i = 0; i < dims; i++) {
 
66
  acl_ne[i] = ne[i];
67
  acl_stride[i] = nb[i] / ggml_element_size(tensor);
68
  }
69
  }
70
 
 
71
  int64_t final_dims = (dims == 0 ? GGML_MAX_DIMS : dims);
72
+ int64_t acl_storage_len = 1;
73
+ for (int i = 0; i < final_dims; i++) {
74
+ acl_storage_len += (acl_ne[i] - 1) * acl_stride[i];
75
+ }
76
+
77
+ // Reverse ne and stride.
78
  std::reverse(acl_ne, acl_ne + final_dims);
79
  std::reverse(acl_stride, acl_stride + final_dims);
80
 
ggml/src/ggml-cann/acl_tensor.h CHANGED
@@ -101,14 +101,14 @@ aclTensor* ggml_cann_create_tensor(void* data_ptr, aclDataType dtype,
101
  tmp_stride[i] = nb[i] / type_size;
102
  }
103
 
104
- std::reverse(tmp_ne, tmp_ne + dims);
105
- std::reverse(tmp_stride, tmp_stride + dims);
106
-
107
- int64_t acl_storage_len = 0;
108
  for (int i = 0; i < dims; i++) {
109
- acl_storage_len += (ne[i] - 1) * nb[i];
110
  }
111
 
 
 
 
112
  aclTensor* acl_tensor =
113
  aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, offset / type_size,
114
  format, &acl_storage_len, 1, data_ptr);
 
101
  tmp_stride[i] = nb[i] / type_size;
102
  }
103
 
104
+ int64_t acl_storage_len = 1;
 
 
 
105
  for (int i = 0; i < dims; i++) {
106
+ acl_storage_len += (tmp_ne[i] - 1) * tmp_stride[i];
107
  }
108
 
109
+ std::reverse(tmp_ne, tmp_ne + dims);
110
+ std::reverse(tmp_stride, tmp_stride + dims);
111
+
112
  aclTensor* acl_tensor =
113
  aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, offset / type_size,
114
  format, &acl_storage_len, 1, data_ptr);
ggml/src/ggml-cann/aclnn_ops.cpp CHANGED
@@ -358,8 +358,6 @@ void ggml_cann_sqr(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
358
 
359
  void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
360
  ggml_tensor* src = dst->src[0];
361
- GGML_ASSERT(src->type == GGML_TYPE_F32);
362
- GGML_ASSERT(dst->type == GGML_TYPE_F32);
363
 
364
  float min;
365
  float max;
@@ -1090,8 +1088,6 @@ void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
1090
  float eps;
1091
  memcpy(&eps, dst->op_params, sizeof(float));
1092
 
1093
- GGML_ASSERT(eps > 0.0f);
1094
-
1095
  uint64_t workspaceSize = 0;
1096
  aclOpExecutor* executor;
1097
  void* workspaceAddr = nullptr;
@@ -3152,7 +3148,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
3152
  // TODO: use ascendc
3153
  // Only test with LLAMA model.
3154
  ggml_tensor* src0 = dst->src[0]; // input
3155
- ggml_tensor* src2 = dst->src[2]; // freq_factors
3156
 
3157
  // param
3158
  float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
 
358
 
359
  void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
360
  ggml_tensor* src = dst->src[0];
 
 
361
 
362
  float min;
363
  float max;
 
1088
  float eps;
1089
  memcpy(&eps, dst->op_params, sizeof(float));
1090
 
 
 
1091
  uint64_t workspaceSize = 0;
1092
  aclOpExecutor* executor;
1093
  void* workspaceAddr = nullptr;
 
3148
  // TODO: use ascendc
3149
  // Only test with LLAMA model.
3150
  ggml_tensor* src0 = dst->src[0]; // input
3151
+ // ggml_tensor* src2 = dst->src[2]; // freq_factors, not used now.
3152
 
3153
  // param
3154
  float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
ggml/src/ggml-cann/aclnn_ops.h CHANGED
@@ -535,9 +535,6 @@ template <aclnnStatus getWorkspaceSize(const aclTensor*, aclTensor*, uint64_t*,
535
  void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
536
  ggml_tensor* src = dst->src[0];
537
 
538
- GGML_ASSERT(src->type == GGML_TYPE_F32);
539
- GGML_ASSERT(dst->type == GGML_TYPE_F32);
540
-
541
  aclTensor* acl_src = ggml_cann_create_tensor(src);
542
  aclTensor* acl_dst = ggml_cann_create_tensor(dst);
543
 
@@ -566,9 +563,6 @@ template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*,
566
  void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
567
  ggml_tensor* src = dst->src[0];
568
 
569
- GGML_ASSERT(src->type == GGML_TYPE_F32);
570
- GGML_ASSERT(dst->type == GGML_TYPE_F32);
571
-
572
  aclTensor* acl_src = ggml_cann_create_tensor(src);
573
  aclTensor* acl_dst = ggml_cann_create_tensor(dst);
574
 
 
535
  void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
536
  ggml_tensor* src = dst->src[0];
537
 
 
 
 
538
  aclTensor* acl_src = ggml_cann_create_tensor(src);
539
  aclTensor* acl_dst = ggml_cann_create_tensor(dst);
540
 
 
563
  void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
564
  ggml_tensor* src = dst->src[0];
565
 
 
 
 
566
  aclTensor* acl_src = ggml_cann_create_tensor(src);
567
  aclTensor* acl_dst = ggml_cann_create_tensor(dst);
568
 
ggml/src/ggml-cann/ggml-cann.cpp CHANGED
@@ -1458,11 +1458,6 @@ static void ggml_backend_cann_free(ggml_backend_t backend) {
1458
  ACL_CHECK(aclrtSynchronizeDevice());
1459
  ACL_CHECK(aclrtResetDevice(cann_ctx->device));
1460
 
1461
- // finalize when last backend freed.
1462
- if (cann_ctx->device == ggml_backend_cann_get_device_count() - 1) {
1463
- ACL_CHECK(aclFinalize());
1464
- }
1465
-
1466
  delete cann_ctx;
1467
  delete backend;
1468
  }
@@ -1688,11 +1683,14 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
1688
  }
1689
  case GGML_OP_MUL_MAT: {
1690
  switch (op->src[0]->type) {
1691
- case GGML_TYPE_Q8_0:
1692
  case GGML_TYPE_F16:
1693
  case GGML_TYPE_F32:
1694
- case GGML_TYPE_Q4_0:
1695
  return true;
 
 
 
 
 
1696
  default:
1697
  return false;
1698
  }
@@ -1738,13 +1736,14 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
1738
  }
1739
  case GGML_OP_ROPE: {
1740
  // TODO: with ops-test v == 1
1741
- float * ext_factor = (float*)((int32_t*)op->op_params + 7);
 
1742
  // TODO: n_dims <= ne0
1743
  if (op->src[0]->ne[0] != op->op_params[1]) {
1744
  return false;
1745
  }
1746
  // TODO: ext_factor != 0
1747
- if (*ext_factor != 0) {
1748
  return false;
1749
  }
1750
 
@@ -1766,6 +1765,16 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
1766
  }
1767
  return true;
1768
  }
 
 
 
 
 
 
 
 
 
 
1769
  case GGML_OP_DUP:
1770
  case GGML_OP_IM2COL:
1771
  case GGML_OP_CONCAT:
@@ -1785,7 +1794,6 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
1785
  case GGML_OP_CLAMP:
1786
  case GGML_OP_DIAG_MASK_INF:
1787
  case GGML_OP_SOFT_MAX:
1788
- case GGML_OP_POOL_2D:
1789
  case GGML_OP_SUM_ROWS:
1790
  case GGML_OP_ARGSORT:
1791
  case GGML_OP_ACC:
 
1458
  ACL_CHECK(aclrtSynchronizeDevice());
1459
  ACL_CHECK(aclrtResetDevice(cann_ctx->device));
1460
 
 
 
 
 
 
1461
  delete cann_ctx;
1462
  delete backend;
1463
  }
 
1683
  }
1684
  case GGML_OP_MUL_MAT: {
1685
  switch (op->src[0]->type) {
 
1686
  case GGML_TYPE_F16:
1687
  case GGML_TYPE_F32:
 
1688
  return true;
1689
+ case GGML_TYPE_Q8_0:
1690
+ case GGML_TYPE_Q4_0:
1691
+ // only support contiguous for quantized types.
1692
+ return ggml_is_contiguous(op->src[0]) &&
1693
+ ggml_is_contiguous(op->src[1]);
1694
  default:
1695
  return false;
1696
  }
 
1736
  }
1737
  case GGML_OP_ROPE: {
1738
  // TODO: with ops-test v == 1
1739
+ float ext_factor = 0.0f;
1740
+ memcpy(&ext_factor, (const float *) op->op_params + 7, sizeof(float));
1741
  // TODO: n_dims <= ne0
1742
  if (op->src[0]->ne[0] != op->op_params[1]) {
1743
  return false;
1744
  }
1745
  // TODO: ext_factor != 0
1746
+ if (ext_factor != 0) {
1747
  return false;
1748
  }
1749
 
 
1765
  }
1766
  return true;
1767
  }
1768
+ case GGML_OP_POOL_2D: {
1769
+ const int32_t * opts = (const int32_t *) op->op_params;
1770
+ const int k0 = opts[1];
1771
+ const int k1 = opts[2];
1772
+ const int p0 = opts[5];
1773
+ const int p1 = opts[6];
1774
+ // value of paddingH should be at most half of kernelH
1775
+ // value of paddingW should be at most half of kernelW
1776
+ return (p0 <= (k0 / 2)) && (p1 <= (k1 / 2));
1777
+ }
1778
  case GGML_OP_DUP:
1779
  case GGML_OP_IM2COL:
1780
  case GGML_OP_CONCAT:
 
1794
  case GGML_OP_CLAMP:
1795
  case GGML_OP_DIAG_MASK_INF:
1796
  case GGML_OP_SOFT_MAX:
 
1797
  case GGML_OP_SUM_ROWS:
1798
  case GGML_OP_ARGSORT:
1799
  case GGML_OP_ACC: