Spaces:
Sleeping
Sleeping
Chenguang Li
commited on
Commit
·
3b46fdc
1
Parent(s):
4b7a407
CANN: Support Opt CONV_TRANSPOSE_1D and ELU (llama/12786)
Browse files* [CANN] Support ELU and CONV_TRANSPOSE_1D
* [CANN]Modification review comments
* [CANN]Modification review comments
* [CANN]name adjustment
* [CANN]remove lambda used in template
* [CANN]Use std::func instead of template
* [CANN]Modify the code according to the review comments
---------
Signed-off-by: noemotiovon <[email protected]>
- ggml/src/ggml-cann/aclnn_ops.cpp +62 -0
- ggml/src/ggml-cann/aclnn_ops.h +111 -56
- ggml/src/ggml-cann/ggml-cann.cpp +16 -7
ggml/src/ggml-cann/aclnn_ops.cpp
CHANGED
|
@@ -57,6 +57,8 @@
|
|
| 57 |
#include <aclnnop/aclnn_sub.h>
|
| 58 |
#include <aclnnop/aclnn_mul.h>
|
| 59 |
#include <aclnnop/aclnn_div.h>
|
|
|
|
|
|
|
| 60 |
#include <float.h>
|
| 61 |
|
| 62 |
#include <cmath>
|
|
@@ -86,6 +88,20 @@ void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst, aclT
|
|
| 86 |
}
|
| 87 |
}
|
| 88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
/**
|
| 90 |
* @brief Repeats elements of a tensor along each dimension according to the
|
| 91 |
* specified repeat array.
|
|
@@ -2585,3 +2601,49 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
| 2585 |
ACL_CHECK(aclDestroyTensor(acl_src));
|
| 2586 |
ACL_CHECK(aclDestroyTensor(acl_dst));
|
| 2587 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
#include <aclnnop/aclnn_sub.h>
|
| 58 |
#include <aclnnop/aclnn_mul.h>
|
| 59 |
#include <aclnnop/aclnn_div.h>
|
| 60 |
+
#include <aclnnop/aclnn_convolution.h>
|
| 61 |
+
#include <aclnnop/aclnn_elu.h>
|
| 62 |
#include <float.h>
|
| 63 |
|
| 64 |
#include <cmath>
|
|
|
|
| 88 |
}
|
| 89 |
}
|
| 90 |
|
| 91 |
+
void ggml_cann_unary_op(
|
| 92 |
+
std::function<void(ggml_backend_cann_context&, aclTensor*, aclTensor*)> unary_op,
|
| 93 |
+
ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
| 94 |
+
ggml_tensor* src = dst->src[0];
|
| 95 |
+
|
| 96 |
+
aclTensor* acl_src = ggml_cann_create_tensor(src);
|
| 97 |
+
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
| 98 |
+
|
| 99 |
+
unary_op(ctx, acl_src, acl_dst);
|
| 100 |
+
|
| 101 |
+
ACL_CHECK(aclDestroyTensor(acl_src));
|
| 102 |
+
ACL_CHECK(aclDestroyTensor(acl_dst));
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
/**
|
| 106 |
* @brief Repeats elements of a tensor along each dimension according to the
|
| 107 |
* specified repeat array.
|
|
|
|
| 2601 |
ACL_CHECK(aclDestroyTensor(acl_src));
|
| 2602 |
ACL_CHECK(aclDestroyTensor(acl_dst));
|
| 2603 |
}
|
| 2604 |
+
|
| 2605 |
+
void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst){
|
| 2606 |
+
ggml_tensor * src0 = dst->src[0];
|
| 2607 |
+
ggml_tensor * src1 = dst->src[1];
|
| 2608 |
+
|
| 2609 |
+
// stride
|
| 2610 |
+
int64_t s0 = ((const int32_t*)(dst->op_params))[0];
|
| 2611 |
+
|
| 2612 |
+
aclTensor* acl_input = ggml_cann_create_tensor(src1, src1->ne, src1->nb, 3, ACL_FORMAT_NCL);
|
| 2613 |
+
aclTensor* acl_weight = ggml_cann_create_tensor(src0, src0->ne, src0->nb, 3, ACL_FORMAT_NCL);
|
| 2614 |
+
aclTensor* acl_dst = ggml_cann_create_tensor(dst, dst->ne, dst->nb, 3, ACL_FORMAT_NCL);
|
| 2615 |
+
|
| 2616 |
+
int64_t strideVal[1];
|
| 2617 |
+
strideVal[0] = s0;
|
| 2618 |
+
aclIntArray *stride = aclCreateIntArray(strideVal, 1);
|
| 2619 |
+
int64_t paddingVal[] = {0};
|
| 2620 |
+
aclIntArray *padding = aclCreateIntArray(paddingVal, 1);
|
| 2621 |
+
int64_t dilationVal[] = {1};
|
| 2622 |
+
aclIntArray *dilation = aclCreateIntArray(dilationVal, 1);
|
| 2623 |
+
bool transposed = true;
|
| 2624 |
+
int64_t groups = 1;
|
| 2625 |
+
int8_t cubeMathType = 0;
|
| 2626 |
+
|
| 2627 |
+
GGML_CANN_CALL_ACLNN_OP(Convolution, acl_input, acl_weight, nullptr, stride,
|
| 2628 |
+
padding, dilation, transposed, padding, groups, acl_dst, cubeMathType);
|
| 2629 |
+
|
| 2630 |
+
ACL_CHECK(aclDestroyTensor(acl_weight));
|
| 2631 |
+
ACL_CHECK(aclDestroyTensor(acl_dst));
|
| 2632 |
+
}
|
| 2633 |
+
|
| 2634 |
+
void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst){
|
| 2635 |
+
ggml_tensor * src0 = dst->src[0];
|
| 2636 |
+
|
| 2637 |
+
aclTensor* acl_input = ggml_cann_create_tensor(src0);
|
| 2638 |
+
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
| 2639 |
+
|
| 2640 |
+
float alphaValue = 1.0f;
|
| 2641 |
+
aclScalar* alpha = nullptr;
|
| 2642 |
+
alpha = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);
|
| 2643 |
+
|
| 2644 |
+
GGML_CANN_CALL_ACLNN_OP(Elu, acl_input, alpha, alpha, alpha,
|
| 2645 |
+
acl_dst);
|
| 2646 |
+
|
| 2647 |
+
ACL_CHECK(aclDestroyTensor(acl_input));
|
| 2648 |
+
ACL_CHECK(aclDestroyTensor(acl_dst));
|
| 2649 |
+
}
|
ggml/src/ggml-cann/aclnn_ops.h
CHANGED
|
@@ -1,15 +1,4 @@
|
|
| 1 |
-
#ifndef CANN_ACLNN_OPS
|
| 2 |
-
#define CANN_ACLNN_OPS
|
| 3 |
-
|
| 4 |
/**
|
| 5 |
-
* @file acl_tensor
|
| 6 |
-
* @brief This file contains related functions of ggml_tensor and acl_tensor.
|
| 7 |
-
* Contains conversion from ggml_tensor to acl_tensor, broadcast and other
|
| 8 |
-
* functions.
|
| 9 |
-
* @author hipudding <[email protected]>
|
| 10 |
-
* @author wangshuai09 <[email protected]>
|
| 11 |
-
* @date July 15, 2024
|
| 12 |
-
*
|
| 13 |
* Copyright (c) 2023-2024 The ggml authors
|
| 14 |
*
|
| 15 |
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
@@ -31,6 +20,9 @@
|
|
| 31 |
* IN THE SOFTWARE.
|
| 32 |
*/
|
| 33 |
|
|
|
|
|
|
|
|
|
|
| 34 |
#include <aclnnop/aclnn_abs.h>
|
| 35 |
#include <aclnnop/aclnn_neg.h>
|
| 36 |
#include <aclnnop/aclnn_exp.h>
|
|
@@ -483,8 +475,8 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
|
| 483 |
* operation is executed using the CANN backend for optimized performance.
|
| 484 |
*
|
| 485 |
* @param ctx The CANN context used for operations.
|
| 486 |
-
* @param dst The destination tensor where the indices of the maximum values will
|
| 487 |
-
* dst->op is `GGML_OP_ARGMAX`.
|
| 488 |
*/
|
| 489 |
void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
| 490 |
|
|
@@ -600,40 +592,8 @@ void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
| 600 |
aclTensor* acl_dst);
|
| 601 |
|
| 602 |
/**
|
| 603 |
-
* @brief
|
| 604 |
-
*
|
| 605 |
-
* This macro submit an asynchronous task on the specified stream.
|
| 606 |
-
* The task uses memory allocated by the allocator. It is guaranteed
|
| 607 |
-
* that the memory will not be accessed by other tasks until this task
|
| 608 |
-
* completes, due to the sequential execution order within the same stream.
|
| 609 |
-
*
|
| 610 |
-
* @param OP_NAME aclnn operator name.
|
| 611 |
-
* @param args Additional arguments required by the task.
|
| 612 |
-
*
|
| 613 |
-
* @note
|
| 614 |
-
* Memory from the allocator will be "freed" immediately and can be
|
| 615 |
-
* reallocated to other pointers. However, it won't be accessed by any
|
| 616 |
-
* other task before this asynchronous task ends, because all tasks in the
|
| 617 |
-
* same stream are executed in queue order.
|
| 618 |
-
*/
|
| 619 |
-
#define GGML_CANN_CALL_ACLNN_OP(OP_NAME, ...) \
|
| 620 |
-
do { \
|
| 621 |
-
uint64_t workspaceSize = 0; \
|
| 622 |
-
aclOpExecutor * executor; \
|
| 623 |
-
void * workspaceAddr = nullptr; \
|
| 624 |
-
\
|
| 625 |
-
ACL_CHECK(aclnn##OP_NAME##GetWorkspaceSize(__VA_ARGS__, &workspaceSize, &executor)); \
|
| 626 |
-
\
|
| 627 |
-
if (workspaceSize > 0) { \
|
| 628 |
-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize); \
|
| 629 |
-
workspaceAddr = workspace_allocator.get(); \
|
| 630 |
-
} \
|
| 631 |
-
ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, ctx.stream())); \
|
| 632 |
-
} while (0)
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
/**
|
| 636 |
-
* @brief Prepares broadcast-compatible ACL tensors for two input tensors and one output tensor.
|
| 637 |
*
|
| 638 |
* This function checks whether broadcasting is needed between `src0` and `src1`.
|
| 639 |
* If broadcasting is required, it calculates the proper shapes and creates
|
|
@@ -647,14 +607,57 @@ void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
|
|
| 647 |
* @param acl_src1 Output pointer to the created ACL tensor corresponding to src1.
|
| 648 |
* @param acl_dst Output pointer to the created ACL tensor corresponding to dst.
|
| 649 |
*/
|
| 650 |
-
void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst,
|
| 651 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 652 |
|
| 653 |
/**
|
| 654 |
-
* @brief
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 655 |
*
|
| 656 |
-
*
|
| 657 |
-
*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 658 |
*
|
| 659 |
* @tparam binary_op A callable object (e.g., lambda or function pointer) representing
|
| 660 |
* the binary operation to be performed. It must take three arguments:
|
|
@@ -681,6 +684,38 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
| 681 |
ACL_CHECK(aclDestroyTensor(acl_dst));
|
| 682 |
}
|
| 683 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 684 |
/**
|
| 685 |
* @brief Applies a unary operation to an input tensor using the CANN backend.
|
| 686 |
*
|
|
@@ -690,7 +725,6 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|
| 690 |
* @tparam unary_op A callable with the signature:
|
| 691 |
* void(ggml_backend_cann_context&, aclTensor*, aclTensor*)
|
| 692 |
* where the first aclTensor is the source and the second is the destination.
|
| 693 |
-
*
|
| 694 |
* @param ctx The CANN backend context for managing resources and execution.
|
| 695 |
* @param dst The destination tensor. Its src[0] is treated as the input tensor.
|
| 696 |
*/
|
|
@@ -702,10 +736,30 @@ template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
|
|
| 702 |
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
| 703 |
|
| 704 |
unary_op(ctx, acl_src, acl_dst);
|
|
|
|
| 705 |
ACL_CHECK(aclDestroyTensor(acl_src));
|
| 706 |
ACL_CHECK(aclDestroyTensor(acl_dst));
|
| 707 |
}
|
| 708 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 709 |
/**
|
| 710 |
* @brief Helper macro to invoke a unary ACL operation using ggml_cann_unary_op.
|
| 711 |
*
|
|
@@ -725,11 +779,12 @@ template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
|
|
| 725 |
*/
|
| 726 |
#define GGML_CANN_CALL_UNARY_OP(OP_NAME) \
|
| 727 |
do { \
|
| 728 |
-
auto lambda = [](
|
|
|
|
|
|
|
| 729 |
GGML_CANN_CALL_ACLNN_OP(OP_NAME, acl_src, acl_dst); \
|
| 730 |
}; \
|
| 731 |
-
ggml_cann_unary_op
|
| 732 |
} \
|
| 733 |
while (0)
|
| 734 |
-
|
| 735 |
#endif // CANN_ACLNN_OPS
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
/**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
* Copyright (c) 2023-2024 The ggml authors
|
| 3 |
*
|
| 4 |
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
| 20 |
* IN THE SOFTWARE.
|
| 21 |
*/
|
| 22 |
|
| 23 |
+
#ifndef CANN_ACLNN_OPS
|
| 24 |
+
#define CANN_ACLNN_OPS
|
| 25 |
+
|
| 26 |
#include <aclnnop/aclnn_abs.h>
|
| 27 |
#include <aclnnop/aclnn_neg.h>
|
| 28 |
#include <aclnnop/aclnn_exp.h>
|
|
|
|
| 475 |
* operation is executed using the CANN backend for optimized performance.
|
| 476 |
*
|
| 477 |
* @param ctx The CANN context used for operations.
|
| 478 |
+
* @param dst The destination tensor where the indices of the maximum values will
|
| 479 |
+
* be stored. dst->op is `GGML_OP_ARGMAX`.
|
| 480 |
*/
|
| 481 |
void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
| 482 |
|
|
|
|
| 592 |
aclTensor* acl_dst);
|
| 593 |
|
| 594 |
/**
|
| 595 |
+
* @brief Prepares broadcast-compatible ACL tensors for two input tensors and one
|
| 596 |
+
* output tensor.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 597 |
*
|
| 598 |
* This function checks whether broadcasting is needed between `src0` and `src1`.
|
| 599 |
* If broadcasting is required, it calculates the proper shapes and creates
|
|
|
|
| 607 |
* @param acl_src1 Output pointer to the created ACL tensor corresponding to src1.
|
| 608 |
* @param acl_dst Output pointer to the created ACL tensor corresponding to dst.
|
| 609 |
*/
|
| 610 |
+
void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst,
|
| 611 |
+
aclTensor ** acl_src0, aclTensor ** acl_src1, aclTensor ** acl_dst);
|
| 612 |
+
|
| 613 |
+
/**
|
| 614 |
+
* @brief Computes the 1D transposed convolution (deconvolution) of a ggml
|
| 615 |
+
* tensor using the CANN backend.
|
| 616 |
+
*
|
| 617 |
+
* @details This function performs a 1D transposed convolution (also known as
|
| 618 |
+
* deconvolution) operation on the input tensor. The computed result is stored
|
| 619 |
+
* in the destination tensor `dst`. The operation is optimized using the CANN
|
| 620 |
+
* backend for improved performance.
|
| 621 |
+
*
|
| 622 |
+
* @param ctx The CANN context used for operations.
|
| 623 |
+
* @param dst The destination tensor where the transposed convolution result
|
| 624 |
+
* will be stored. dst->op is `GGML_OP_CONV_TRANSPOSE_1D`.
|
| 625 |
+
*/
|
| 626 |
+
void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
| 627 |
|
| 628 |
/**
|
| 629 |
+
* @brief Applies the ELU (Exponential Linear Unit) activation to a ggml tensor
|
| 630 |
+
* using the CANN backend.
|
| 631 |
+
*
|
| 632 |
+
* @details This function performs an element-wise ELU activation on the input
|
| 633 |
+
* tensor.
|
| 634 |
+
* The result is written to the destination tensor `dst` in-place.
|
| 635 |
+
* The ELU function is defined as:
|
| 636 |
+
*
|
| 637 |
+
* \text{ELU}(x) =
|
| 638 |
+
* \begin{cases}
|
| 639 |
+
* x, & \text{if } x > 0 \\
|
| 640 |
+
* \alpha \left( \exp(x) - 1 \right), & \text{if } x \leq 0
|
| 641 |
+
* \end{cases}
|
| 642 |
*
|
| 643 |
+
* where α (alpha) is a hyperparameter, typically set to 1.0.
|
| 644 |
+
* This operation is optimized using the CANN backend for high-performance
|
| 645 |
+
* inference or training.
|
| 646 |
+
*
|
| 647 |
+
* @param ctx The CANN context used for operations.
|
| 648 |
+
* @param dst The destination tensor where the ELU-activated result will be stored.
|
| 649 |
+
* dst->op is expected to be `GGML_OP_ELU`.
|
| 650 |
+
*/
|
| 651 |
+
void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
| 652 |
+
|
| 653 |
+
/**
|
| 654 |
+
* @brief Applies a element-wise operation to two input tensors using the CANN
|
| 655 |
+
* backend.
|
| 656 |
+
*
|
| 657 |
+
* This templated function takes a binary operator and applies it to two source
|
| 658 |
+
* tensors
|
| 659 |
+
* associated with the destination tensor. The function handles broadcasting as
|
| 660 |
+
* needed.
|
| 661 |
*
|
| 662 |
* @tparam binary_op A callable object (e.g., lambda or function pointer) representing
|
| 663 |
* the binary operation to be performed. It must take three arguments:
|
|
|
|
| 684 |
ACL_CHECK(aclDestroyTensor(acl_dst));
|
| 685 |
}
|
| 686 |
|
| 687 |
+
/**
|
| 688 |
+
* @brief Launches an asynchronous task using the memory allocator.
|
| 689 |
+
*
|
| 690 |
+
* This macro submit an asynchronous task on the specified stream.
|
| 691 |
+
* The task uses memory allocated by the allocator. It is guaranteed
|
| 692 |
+
* that the memory will not be accessed by other tasks until this task
|
| 693 |
+
* completes, due to the sequential execution order within the same stream.
|
| 694 |
+
*
|
| 695 |
+
* @param OP_NAME aclnn operator name.
|
| 696 |
+
* @param args Additional arguments required by the task.
|
| 697 |
+
*
|
| 698 |
+
* @note
|
| 699 |
+
* Memory from the allocator will be "freed" immediately and can be
|
| 700 |
+
* reallocated to other pointers. However, it won't be accessed by any
|
| 701 |
+
* other task before this asynchronous task ends, because all tasks in the
|
| 702 |
+
* same stream are executed in queue order.
|
| 703 |
+
*/
|
| 704 |
+
#define GGML_CANN_CALL_ACLNN_OP(OP_NAME, ...) \
|
| 705 |
+
do { \
|
| 706 |
+
uint64_t workspaceSize = 0; \
|
| 707 |
+
aclOpExecutor * executor; \
|
| 708 |
+
void * workspaceAddr = nullptr; \
|
| 709 |
+
\
|
| 710 |
+
ACL_CHECK(aclnn##OP_NAME##GetWorkspaceSize(__VA_ARGS__, &workspaceSize, &executor)); \
|
| 711 |
+
\
|
| 712 |
+
if (workspaceSize > 0) { \
|
| 713 |
+
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize); \
|
| 714 |
+
workspaceAddr = workspace_allocator.get(); \
|
| 715 |
+
} \
|
| 716 |
+
ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, ctx.stream())); \
|
| 717 |
+
} while (0)
|
| 718 |
+
|
| 719 |
/**
|
| 720 |
* @brief Applies a unary operation to an input tensor using the CANN backend.
|
| 721 |
*
|
|
|
|
| 725 |
* @tparam unary_op A callable with the signature:
|
| 726 |
* void(ggml_backend_cann_context&, aclTensor*, aclTensor*)
|
| 727 |
* where the first aclTensor is the source and the second is the destination.
|
|
|
|
| 728 |
* @param ctx The CANN backend context for managing resources and execution.
|
| 729 |
* @param dst The destination tensor. Its src[0] is treated as the input tensor.
|
| 730 |
*/
|
|
|
|
| 736 |
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
| 737 |
|
| 738 |
unary_op(ctx, acl_src, acl_dst);
|
| 739 |
+
|
| 740 |
ACL_CHECK(aclDestroyTensor(acl_src));
|
| 741 |
ACL_CHECK(aclDestroyTensor(acl_dst));
|
| 742 |
}
|
| 743 |
|
| 744 |
+
/**
|
| 745 |
+
* @brief Applies a unary operation to a ggml tensor using the CANN backend.
|
| 746 |
+
*
|
| 747 |
+
* @details This function performs a unary operation on the input tensor using
|
| 748 |
+
* a user-provided lambda or callable object `unary_op`, which accepts the CANN
|
| 749 |
+
* context and two ACL tensors (source and destination). Internally, this function
|
| 750 |
+
* creates ACL representations of the ggml tensors and invokes the unary operation.
|
| 751 |
+
* The result is stored in the destination tensor `dst`. This utility abstracts the
|
| 752 |
+
* common boilerplate of tensor conversion and cleanup when implementing unary ops.
|
| 753 |
+
*
|
| 754 |
+
* @param unary_op A callable that performs the unary operation using CANN APIs.
|
| 755 |
+
* @param ctx The CANN context used for operations.
|
| 756 |
+
* @param dst The destination tensor where the result will be stored.
|
| 757 |
+
* The source tensor is retrieved from `dst->src[0]`.
|
| 758 |
+
*/
|
| 759 |
+
void ggml_cann_unary_op(
|
| 760 |
+
std::function<void(ggml_backend_cann_context&, aclTensor*, aclTensor*)> unary_op,
|
| 761 |
+
ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
| 762 |
+
|
| 763 |
/**
|
| 764 |
* @brief Helper macro to invoke a unary ACL operation using ggml_cann_unary_op.
|
| 765 |
*
|
|
|
|
| 779 |
*/
|
| 780 |
#define GGML_CANN_CALL_UNARY_OP(OP_NAME) \
|
| 781 |
do { \
|
| 782 |
+
auto lambda = [](ggml_backend_cann_context& ctx, \
|
| 783 |
+
aclTensor* acl_src, \
|
| 784 |
+
aclTensor* acl_dst) { \
|
| 785 |
GGML_CANN_CALL_ACLNN_OP(OP_NAME, acl_src, acl_dst); \
|
| 786 |
}; \
|
| 787 |
+
ggml_cann_unary_op(lambda, ctx, dst); \
|
| 788 |
} \
|
| 789 |
while (0)
|
|
|
|
| 790 |
#endif // CANN_ACLNN_OPS
|
ggml/src/ggml-cann/ggml-cann.cpp
CHANGED
|
@@ -1330,12 +1330,13 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
|
| 1330 |
GGML_CANN_CALL_UNARY_OP(Silu);
|
| 1331 |
break;
|
| 1332 |
case GGML_UNARY_OP_GELU_QUICK: {
|
| 1333 |
-
|
| 1334 |
-
|
| 1335 |
-
|
| 1336 |
-
|
| 1337 |
-
}
|
| 1338 |
-
|
|
|
|
| 1339 |
case GGML_UNARY_OP_TANH:
|
| 1340 |
GGML_CANN_CALL_UNARY_OP(Tanh);
|
| 1341 |
break;
|
|
@@ -1354,6 +1355,9 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
|
| 1354 |
case GGML_UNARY_OP_EXP:
|
| 1355 |
GGML_CANN_CALL_UNARY_OP(Exp);
|
| 1356 |
break;
|
|
|
|
|
|
|
|
|
|
| 1357 |
default:
|
| 1358 |
return false;
|
| 1359 |
}
|
|
@@ -1448,7 +1452,10 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
|
| 1448 |
break;
|
| 1449 |
case GGML_OP_SIN:
|
| 1450 |
ggml_cann_unary_op<aclnn_sin>(ctx, dst);
|
| 1451 |
-
|
|
|
|
|
|
|
|
|
|
| 1452 |
default:
|
| 1453 |
return false;
|
| 1454 |
}
|
|
@@ -1710,6 +1717,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
| 1710 |
case GGML_UNARY_OP_GELU_QUICK:
|
| 1711 |
case GGML_UNARY_OP_TANH:
|
| 1712 |
case GGML_UNARY_OP_EXP:
|
|
|
|
| 1713 |
return true;
|
| 1714 |
default:
|
| 1715 |
return false;
|
|
@@ -1845,6 +1853,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|
| 1845 |
case GGML_OP_ARGMAX:
|
| 1846 |
case GGML_OP_COS:
|
| 1847 |
case GGML_OP_SIN:
|
|
|
|
| 1848 |
return true;
|
| 1849 |
default:
|
| 1850 |
return false;
|
|
|
|
| 1330 |
GGML_CANN_CALL_UNARY_OP(Silu);
|
| 1331 |
break;
|
| 1332 |
case GGML_UNARY_OP_GELU_QUICK: {
|
| 1333 |
+
auto lambda = [](ggml_backend_cann_context& ctx,
|
| 1334 |
+
aclTensor* acl_src,
|
| 1335 |
+
aclTensor* acl_dst) {
|
| 1336 |
+
GGML_CANN_CALL_ACLNN_OP(GeluV2, acl_src, 0, acl_dst);
|
| 1337 |
+
};
|
| 1338 |
+
ggml_cann_unary_op(lambda, ctx, dst);
|
| 1339 |
+
} break;
|
| 1340 |
case GGML_UNARY_OP_TANH:
|
| 1341 |
GGML_CANN_CALL_UNARY_OP(Tanh);
|
| 1342 |
break;
|
|
|
|
| 1355 |
case GGML_UNARY_OP_EXP:
|
| 1356 |
GGML_CANN_CALL_UNARY_OP(Exp);
|
| 1357 |
break;
|
| 1358 |
+
case GGML_UNARY_OP_ELU:
|
| 1359 |
+
ggml_cann_elu(ctx, dst);
|
| 1360 |
+
break;
|
| 1361 |
default:
|
| 1362 |
return false;
|
| 1363 |
}
|
|
|
|
| 1452 |
break;
|
| 1453 |
case GGML_OP_SIN:
|
| 1454 |
ggml_cann_unary_op<aclnn_sin>(ctx, dst);
|
| 1455 |
+
break;
|
| 1456 |
+
case GGML_OP_CONV_TRANSPOSE_1D:
|
| 1457 |
+
ggml_cann_conv_transpose_1d(ctx, dst);
|
| 1458 |
+
break;
|
| 1459 |
default:
|
| 1460 |
return false;
|
| 1461 |
}
|
|
|
|
| 1717 |
case GGML_UNARY_OP_GELU_QUICK:
|
| 1718 |
case GGML_UNARY_OP_TANH:
|
| 1719 |
case GGML_UNARY_OP_EXP:
|
| 1720 |
+
case GGML_UNARY_OP_ELU:
|
| 1721 |
return true;
|
| 1722 |
default:
|
| 1723 |
return false;
|
|
|
|
| 1853 |
case GGML_OP_ARGMAX:
|
| 1854 |
case GGML_OP_COS:
|
| 1855 |
case GGML_OP_SIN:
|
| 1856 |
+
case GGML_OP_CONV_TRANSPOSE_1D:
|
| 1857 |
return true;
|
| 1858 |
default:
|
| 1859 |
return false;
|