Spaces:
Sleeping
Sleeping
CANN: Fix failed test cases (llama/12708)
Browse files* CANN: Fix memory waste in aclnn_tensor
* CANN: fix backend ops fail
* CANN: fix acl_tensor memory alloc.
* CANN: format
* CANN: remove trailing whitespace
ggml/src/ggml-cann/acl_tensor.cpp
CHANGED
|
@@ -54,9 +54,7 @@ aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne,
|
|
| 54 |
// added.
|
| 55 |
int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2];
|
| 56 |
|
| 57 |
-
int64_t acl_storage_len = 0;
|
| 58 |
if (ne == nullptr) {
|
| 59 |
-
acl_storage_len = ggml_nbytes(tensor);
|
| 60 |
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
| 61 |
acl_ne[i] = tensor->ne[i];
|
| 62 |
// The step size of acl is in elements.
|
|
@@ -65,14 +63,18 @@ aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne,
|
|
| 65 |
} else {
|
| 66 |
// With bcast
|
| 67 |
for (int i = 0; i < dims; i++) {
|
| 68 |
-
acl_storage_len += (ne[i] - 1) * nb[i];
|
| 69 |
acl_ne[i] = ne[i];
|
| 70 |
acl_stride[i] = nb[i] / ggml_element_size(tensor);
|
| 71 |
}
|
| 72 |
}
|
| 73 |
|
| 74 |
-
// Reverse ne and stride.
|
| 75 |
int64_t final_dims = (dims == 0 ? GGML_MAX_DIMS : dims);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
std::reverse(acl_ne, acl_ne + final_dims);
|
| 77 |
std::reverse(acl_stride, acl_stride + final_dims);
|
| 78 |
|
|
|
|
| 54 |
// added.
|
| 55 |
int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2];
|
| 56 |
|
|
|
|
| 57 |
if (ne == nullptr) {
|
|
|
|
| 58 |
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
| 59 |
acl_ne[i] = tensor->ne[i];
|
| 60 |
// The step size of acl is in elements.
|
|
|
|
| 63 |
} else {
|
| 64 |
// With bcast
|
| 65 |
for (int i = 0; i < dims; i++) {
|
|
|
|
| 66 |
acl_ne[i] = ne[i];
|
| 67 |
acl_stride[i] = nb[i] / ggml_element_size(tensor);
|
| 68 |
}
|
| 69 |
}
|
| 70 |
|
|
|
|
| 71 |
int64_t final_dims = (dims == 0 ? GGML_MAX_DIMS : dims);
|
| 72 |
+
int64_t acl_storage_len = 1;
|
| 73 |
+
for (int i = 0; i < final_dims; i++) {
|
| 74 |
+
acl_storage_len += (acl_ne[i] - 1) * acl_stride[i];
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
// Reverse ne and stride.
|
| 78 |
std::reverse(acl_ne, acl_ne + final_dims);
|
| 79 |
std::reverse(acl_stride, acl_stride + final_dims);
|
| 80 |
|
ggml/src/ggml-cann/acl_tensor.h
CHANGED
|
@@ -101,14 +101,14 @@ aclTensor* ggml_cann_create_tensor(void* data_ptr, aclDataType dtype,
|
|
| 101 |
tmp_stride[i] = nb[i] / type_size;
|
| 102 |
}
|
| 103 |
|
| 104 |
-
|
| 105 |
-
std::reverse(tmp_stride, tmp_stride + dims);
|
| 106 |
-
|
| 107 |
-
int64_t acl_storage_len = 0;
|
| 108 |
for (int i = 0; i < dims; i++) {
|
| 109 |
-
acl_storage_len += (
|
| 110 |
}
|
| 111 |
|
|
|
|
|
|
|
|
|
|
| 112 |
aclTensor* acl_tensor =
|
| 113 |
aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, offset / type_size,
|
| 114 |
format, &acl_storage_len, 1, data_ptr);
|
|
|
|
| 101 |
tmp_stride[i] = nb[i] / type_size;
|
| 102 |
}
|
| 103 |
|
| 104 |
+
int64_t acl_storage_len = 1;
|
|
|
|
|
|
|
|
|
|
| 105 |
for (int i = 0; i < dims; i++) {
|
| 106 |
+
acl_storage_len += (tmp_ne[i] - 1) * tmp_stride[i];
|
| 107 |
}
|
| 108 |
|
| 109 |
+
std::reverse(tmp_ne, tmp_ne + dims);
|
| 110 |
+
std::reverse(tmp_stride, tmp_stride + dims);
|
| 111 |
+
|
| 112 |
aclTensor* acl_tensor =
|
| 113 |
aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, offset / type_size,
|
| 114 |
format, &acl_storage_len, 1, data_ptr);
|
ggml/src/ggml-cann/aclnn_ops.cpp
CHANGED
|
@@ -358,8 +358,6 @@ void ggml_cann_sqr(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
| 358 |
|
| 359 |
void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
| 360 |
ggml_tensor* src = dst->src[0];
|
| 361 |
-
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
| 362 |
-
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
| 363 |
|
| 364 |
float min;
|
| 365 |
float max;
|
|
@@ -1090,8 +1088,6 @@ void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
| 1090 |
float eps;
|
| 1091 |
memcpy(&eps, dst->op_params, sizeof(float));
|
| 1092 |
|
| 1093 |
-
GGML_ASSERT(eps > 0.0f);
|
| 1094 |
-
|
| 1095 |
uint64_t workspaceSize = 0;
|
| 1096 |
aclOpExecutor* executor;
|
| 1097 |
void* workspaceAddr = nullptr;
|
|
@@ -3152,7 +3148,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
| 3152 |
// TODO: use ascendc
|
| 3153 |
// Only test with LLAMA model.
|
| 3154 |
ggml_tensor* src0 = dst->src[0]; // input
|
| 3155 |
-
ggml_tensor* src2 = dst->src[2]; // freq_factors
|
| 3156 |
|
| 3157 |
// param
|
| 3158 |
float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
|
|
|
|
| 358 |
|
| 359 |
void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
| 360 |
ggml_tensor* src = dst->src[0];
|
|
|
|
|
|
|
| 361 |
|
| 362 |
float min;
|
| 363 |
float max;
|
|
|
|
| 1088 |
float eps;
|
| 1089 |
memcpy(&eps, dst->op_params, sizeof(float));
|
| 1090 |
|
|
|
|
|
|
|
| 1091 |
uint64_t workspaceSize = 0;
|
| 1092 |
aclOpExecutor* executor;
|
| 1093 |
void* workspaceAddr = nullptr;
|
|
|
|
| 3148 |
// TODO: use ascendc
|
| 3149 |
// Only test with LLAMA model.
|
| 3150 |
ggml_tensor* src0 = dst->src[0]; // input
|
| 3151 |
+
// ggml_tensor* src2 = dst->src[2]; // freq_factors, not used now.
|
| 3152 |
|
| 3153 |
// param
|
| 3154 |
float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
|
ggml/src/ggml-cann/aclnn_ops.h
CHANGED
|
@@ -535,9 +535,6 @@ template <aclnnStatus getWorkspaceSize(const aclTensor*, aclTensor*, uint64_t*,
|
|
| 535 |
void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
| 536 |
ggml_tensor* src = dst->src[0];
|
| 537 |
|
| 538 |
-
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
| 539 |
-
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
| 540 |
-
|
| 541 |
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
| 542 |
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
| 543 |
|
|
@@ -566,9 +563,6 @@ template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*,
|
|
| 566 |
void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
| 567 |
ggml_tensor* src = dst->src[0];
|
| 568 |
|
| 569 |
-
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
| 570 |
-
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
| 571 |
-
|
| 572 |
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
| 573 |
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
| 574 |
|
|
|
|
| 535 |
void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
| 536 |
ggml_tensor* src = dst->src[0];
|
| 537 |
|
|
|
|
|
|
|
|
|
|
| 538 |
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
| 539 |
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
| 540 |
|
|
|
|
| 563 |
void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
| 564 |
ggml_tensor* src = dst->src[0];
|
| 565 |
|
|
|
|
|
|
|
|
|
|
| 566 |
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
| 567 |
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
| 568 |
|
ggml/src/ggml-cann/ggml-cann.cpp
CHANGED
|
@@ -1458,11 +1458,6 @@ static void ggml_backend_cann_free(ggml_backend_t backend) {
|
|
| 1458 |
ACL_CHECK(aclrtSynchronizeDevice());
|
| 1459 |
ACL_CHECK(aclrtResetDevice(cann_ctx->device));
|
| 1460 |
|
| 1461 |
-
// finalize when last backend freed.
|
| 1462 |
-
if (cann_ctx->device == ggml_backend_cann_get_device_count() - 1) {
|
| 1463 |
-
ACL_CHECK(aclFinalize());
|
| 1464 |
-
}
|
| 1465 |
-
|
| 1466 |
delete cann_ctx;
|
| 1467 |
delete backend;
|
| 1468 |
}
|
|
@@ -1688,11 +1683,14 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
| 1688 |
}
|
| 1689 |
case GGML_OP_MUL_MAT: {
|
| 1690 |
switch (op->src[0]->type) {
|
| 1691 |
-
case GGML_TYPE_Q8_0:
|
| 1692 |
case GGML_TYPE_F16:
|
| 1693 |
case GGML_TYPE_F32:
|
| 1694 |
-
case GGML_TYPE_Q4_0:
|
| 1695 |
return true;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1696 |
default:
|
| 1697 |
return false;
|
| 1698 |
}
|
|
@@ -1738,13 +1736,14 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
| 1738 |
}
|
| 1739 |
case GGML_OP_ROPE: {
|
| 1740 |
// TODO: with ops-test v == 1
|
| 1741 |
-
float
|
|
|
|
| 1742 |
// TODO: n_dims <= ne0
|
| 1743 |
if (op->src[0]->ne[0] != op->op_params[1]) {
|
| 1744 |
return false;
|
| 1745 |
}
|
| 1746 |
// TODO: ext_factor != 0
|
| 1747 |
-
if (
|
| 1748 |
return false;
|
| 1749 |
}
|
| 1750 |
|
|
@@ -1766,6 +1765,16 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
| 1766 |
}
|
| 1767 |
return true;
|
| 1768 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1769 |
case GGML_OP_DUP:
|
| 1770 |
case GGML_OP_IM2COL:
|
| 1771 |
case GGML_OP_CONCAT:
|
|
@@ -1785,7 +1794,6 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
| 1785 |
case GGML_OP_CLAMP:
|
| 1786 |
case GGML_OP_DIAG_MASK_INF:
|
| 1787 |
case GGML_OP_SOFT_MAX:
|
| 1788 |
-
case GGML_OP_POOL_2D:
|
| 1789 |
case GGML_OP_SUM_ROWS:
|
| 1790 |
case GGML_OP_ARGSORT:
|
| 1791 |
case GGML_OP_ACC:
|
|
|
|
| 1458 |
ACL_CHECK(aclrtSynchronizeDevice());
|
| 1459 |
ACL_CHECK(aclrtResetDevice(cann_ctx->device));
|
| 1460 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1461 |
delete cann_ctx;
|
| 1462 |
delete backend;
|
| 1463 |
}
|
|
|
|
| 1683 |
}
|
| 1684 |
case GGML_OP_MUL_MAT: {
|
| 1685 |
switch (op->src[0]->type) {
|
|
|
|
| 1686 |
case GGML_TYPE_F16:
|
| 1687 |
case GGML_TYPE_F32:
|
|
|
|
| 1688 |
return true;
|
| 1689 |
+
case GGML_TYPE_Q8_0:
|
| 1690 |
+
case GGML_TYPE_Q4_0:
|
| 1691 |
+
// only support contiguous for quantized types.
|
| 1692 |
+
return ggml_is_contiguous(op->src[0]) &&
|
| 1693 |
+
ggml_is_contiguous(op->src[1]);
|
| 1694 |
default:
|
| 1695 |
return false;
|
| 1696 |
}
|
|
|
|
| 1736 |
}
|
| 1737 |
case GGML_OP_ROPE: {
|
| 1738 |
// TODO: with ops-test v == 1
|
| 1739 |
+
float ext_factor = 0.0f;
|
| 1740 |
+
memcpy(&ext_factor, (const float *) op->op_params + 7, sizeof(float));
|
| 1741 |
// TODO: n_dims <= ne0
|
| 1742 |
if (op->src[0]->ne[0] != op->op_params[1]) {
|
| 1743 |
return false;
|
| 1744 |
}
|
| 1745 |
// TODO: ext_factor != 0
|
| 1746 |
+
if (ext_factor != 0) {
|
| 1747 |
return false;
|
| 1748 |
}
|
| 1749 |
|
|
|
|
| 1765 |
}
|
| 1766 |
return true;
|
| 1767 |
}
|
| 1768 |
+
case GGML_OP_POOL_2D: {
|
| 1769 |
+
const int32_t * opts = (const int32_t *) op->op_params;
|
| 1770 |
+
const int k0 = opts[1];
|
| 1771 |
+
const int k1 = opts[2];
|
| 1772 |
+
const int p0 = opts[5];
|
| 1773 |
+
const int p1 = opts[6];
|
| 1774 |
+
// value of paddingH should be at most half of kernelH
|
| 1775 |
+
// value of paddingW should be at most half of kernelW
|
| 1776 |
+
return (p0 <= (k0 / 2)) && (p1 <= (k1 / 2));
|
| 1777 |
+
}
|
| 1778 |
case GGML_OP_DUP:
|
| 1779 |
case GGML_OP_IM2COL:
|
| 1780 |
case GGML_OP_CONCAT:
|
|
|
|
| 1794 |
case GGML_OP_CLAMP:
|
| 1795 |
case GGML_OP_DIAG_MASK_INF:
|
| 1796 |
case GGML_OP_SOFT_MAX:
|
|
|
|
| 1797 |
case GGML_OP_SUM_ROWS:
|
| 1798 |
case GGML_OP_ARGSORT:
|
| 1799 |
case GGML_OP_ACC:
|