qnixsynapse commited on
Commit
fa23a38
·
1 Parent(s): 8c60d6a

SYCL: Refactor ggml_sycl_compute_forward (llama/11121)

Browse files

* SYCL: refactor ggml_sycl_compute_forward

* SYCL: add back GGML_USED(dst) to ggml_sycl_cpy

* SYCL: add function name to noop debug

* SYCL: Some device info print refactoring and add details of XMX availability

ggml/src/ggml-sycl/common.cpp CHANGED
@@ -51,6 +51,10 @@ void ggml_sycl_host_free(void* ptr) try {
51
  std::exit(1);
52
  }
53
 
 
 
 
 
54
  int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block_size) {
55
  const int64_t max_range = std::numeric_limits<int>::max();
56
  int64_t sycl_down_blk_size = block_size;
 
51
  std::exit(1);
52
  }
53
 
54
+ bool gpu_has_xmx(sycl::device &dev) {
55
+ return dev.has(sycl::aspect::ext_intel_matrix);
56
+ }
57
+
58
  int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block_size) {
59
  const int64_t max_range = std::numeric_limits<int>::max();
60
  int64_t sycl_down_blk_size = block_size;
ggml/src/ggml-sycl/common.hpp CHANGED
@@ -662,6 +662,7 @@ inline void ggml_sycl_op_bin_bcast(ggml_backend_sycl_context & ctx, const ggml_t
662
  }
663
  }
664
 
 
665
 
666
  void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
667
  const ggml_tensor *src1, ggml_tensor *dst,
 
662
  }
663
  }
664
 
665
+ bool gpu_has_xmx(sycl::device &dev);
666
 
667
  void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
668
  const ggml_tensor *src1, ggml_tensor *dst,
ggml/src/ggml-sycl/concat.cpp CHANGED
@@ -158,8 +158,9 @@ static void concat_f32_sycl_non_cont(
158
  });
159
  }
160
 
161
- void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
162
- const ggml_tensor *src1, ggml_tensor *dst) {
 
163
  queue_ptr stream = ctx.stream();
164
 
165
  const int32_t dim = ((int32_t *)dst->op_params)[0];
 
158
  });
159
  }
160
 
161
+ void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
162
+ const ggml_tensor *src0 = dst->src[0];
163
+ const ggml_tensor *src1 = dst->src[1];
164
  queue_ptr stream = ctx.stream();
165
 
166
  const int32_t dim = ((int32_t *)dst->op_params)[0];
ggml/src/ggml-sycl/concat.hpp CHANGED
@@ -15,7 +15,6 @@
15
 
16
  #include "common.hpp"
17
 
18
- void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
19
- const ggml_tensor *src1, ggml_tensor *dst);
20
 
21
  #endif // GGML_SYCL_CONCAT_HPP
 
15
 
16
  #include "common.hpp"
17
 
18
+ void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst);
 
19
 
20
  #endif // GGML_SYCL_CONCAT_HPP
ggml/src/ggml-sycl/conv.cpp CHANGED
@@ -71,8 +71,9 @@ static void conv_transpose_1d_f32_f32_sycl(
71
  });
72
  }
73
 
74
- void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
75
- const ggml_tensor *src1, ggml_tensor *dst) {
 
76
  const float * src0_d = (const float *)src0->data;
77
  const float * src1_d = (const float *)src1->data;
78
 
 
71
  });
72
  }
73
 
74
+ void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
75
+ const ggml_tensor *src0 = dst->src[0];
76
+ const ggml_tensor *src1 = dst->src[1];
77
  const float * src0_d = (const float *)src0->data;
78
  const float * src1_d = (const float *)src1->data;
79
 
ggml/src/ggml-sycl/conv.hpp CHANGED
@@ -15,7 +15,6 @@
15
 
16
  #include "common.hpp"
17
 
18
- void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
19
- const ggml_tensor *src1, ggml_tensor *dst);
20
 
21
  #endif // GGML_SYCL_CONV_HPP
 
15
 
16
  #include "common.hpp"
17
 
18
+ void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor *dst);
 
19
 
20
  #endif // GGML_SYCL_CONV_HPP
ggml/src/ggml-sycl/element_wise.cpp CHANGED
@@ -882,149 +882,149 @@ inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, const ggml_tensor
882
  }
883
 
884
 
885
- void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
886
  GGML_SYCL_DEBUG("call %s\n", __func__);
887
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sqrt);
888
  GGML_SYCL_DEBUG("call %s done\n", __func__);
889
  }
890
 
891
- void ggml_sycl_sin(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
892
  GGML_SYCL_DEBUG("call %s\n", __func__);
893
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sin);
894
  GGML_SYCL_DEBUG("call %s done\n", __func__);
895
  }
896
 
897
- void ggml_sycl_cos(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
898
  GGML_SYCL_DEBUG("call %s\n", __func__);
899
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_cos);
900
  GGML_SYCL_DEBUG("call %s done\n", __func__);
901
  }
902
 
903
- void ggml_sycl_acc(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
904
  GGML_SYCL_DEBUG("call %s\n", __func__);
905
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_acc);
906
  GGML_SYCL_DEBUG("call %s done\n", __func__);
907
  }
908
 
909
- void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
910
  GGML_SYCL_DEBUG("call %s\n", __func__);
911
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_gelu);
912
  GGML_SYCL_DEBUG("call %s done\n", __func__);
913
  }
914
 
915
- void ggml_sycl_silu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
916
  GGML_SYCL_DEBUG("call %s\n", __func__);
917
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_silu);
918
  GGML_SYCL_DEBUG("call %s done\n", __func__);
919
  }
920
 
921
- void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
922
  GGML_SYCL_DEBUG("call %s\n", __func__);
923
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_gelu_quick);
924
  GGML_SYCL_DEBUG("call %s done\n", __func__);
925
  }
926
 
927
- void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
928
  GGML_SYCL_DEBUG("call %s\n", __func__);
929
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_tanh);
930
  GGML_SYCL_DEBUG("call %s done\n", __func__);
931
  }
932
 
933
- void ggml_sycl_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
934
  GGML_SYCL_DEBUG("call %s\n", __func__);
935
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_relu);
936
  GGML_SYCL_DEBUG("call %s done\n", __func__);
937
  }
938
 
939
- void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
940
  GGML_SYCL_DEBUG("call %s\n", __func__);
941
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sigmoid);
942
  GGML_SYCL_DEBUG("call %s done\n", __func__);
943
  }
944
 
945
- void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
946
  GGML_SYCL_DEBUG("call %s\n", __func__);
947
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_hardsigmoid);
948
  GGML_SYCL_DEBUG("call %s done\n", __func__);
949
  }
950
 
951
- void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
952
  GGML_SYCL_DEBUG("call %s\n", __func__);
953
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_hardswish);
954
  GGML_SYCL_DEBUG("call %s done\n", __func__);
955
  }
956
 
957
 
958
- void ggml_sycl_exp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
959
  GGML_SYCL_DEBUG("call %s\n", __func__);
960
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_exp);
961
  GGML_SYCL_DEBUG("call %s done\n", __func__);
962
  }
963
 
964
- void ggml_sycl_log(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
965
  GGML_SYCL_DEBUG("call %s\n", __func__);
966
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_log);
967
  GGML_SYCL_DEBUG("call %s done\n", __func__);
968
  }
969
 
970
- void ggml_sycl_neg(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
971
  GGML_SYCL_DEBUG("call %s\n", __func__);
972
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_neg);
973
  GGML_SYCL_DEBUG("call %s done\n", __func__);
974
  }
975
 
976
- void ggml_sycl_step(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
977
  GGML_SYCL_DEBUG("call %s\n", __func__);
978
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_step);
979
  GGML_SYCL_DEBUG("call %s done\n", __func__);
980
  }
981
 
982
- void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
983
  GGML_SYCL_DEBUG("call %s\n", __func__);
984
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_leaky_relu);
985
  GGML_SYCL_DEBUG("call %s done\n", __func__);
986
  }
987
 
988
- void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
989
  GGML_SYCL_DEBUG("call %s\n", __func__);
990
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sqr);
991
  GGML_SYCL_DEBUG("call %s done\n", __func__);
992
  }
993
 
994
- void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
995
  GGML_SYCL_DEBUG("call %s\n", __func__);
996
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_upscale);
997
  GGML_SYCL_DEBUG("call %s done\n", __func__);
998
  }
999
 
1000
- void ggml_sycl_pad(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
1001
  GGML_SYCL_DEBUG("call %s\n", __func__);
1002
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_pad);
1003
  GGML_SYCL_DEBUG("call %s done\n", __func__);
1004
  }
1005
 
1006
 
1007
 
1008
- void ggml_sycl_add(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
1009
  GGML_SYCL_DEBUG("call %s\n", __func__);
1010
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_add);
1011
  GGML_SYCL_DEBUG("call %s done\n", __func__);
1012
  }
1013
 
1014
- void ggml_sycl_sub(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
1015
  GGML_SYCL_DEBUG("call %s\n", __func__);
1016
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sub);
1017
  GGML_SYCL_DEBUG("call %s done\n", __func__);
1018
  }
1019
 
1020
- void ggml_sycl_mul(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
1021
  GGML_SYCL_DEBUG("call %s\n", __func__);
1022
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_mul);
1023
  GGML_SYCL_DEBUG("call %s done\n", __func__);
1024
  }
1025
 
1026
- void ggml_sycl_div(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
1027
  GGML_SYCL_DEBUG("call %s\n", __func__);
1028
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_div);
1029
  GGML_SYCL_DEBUG("call %s done\n", __func__);
1030
  }
 
882
  }
883
 
884
 
885
+ void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
886
  GGML_SYCL_DEBUG("call %s\n", __func__);
887
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sqrt);
888
  GGML_SYCL_DEBUG("call %s done\n", __func__);
889
  }
890
 
891
+ void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
892
  GGML_SYCL_DEBUG("call %s\n", __func__);
893
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sin);
894
  GGML_SYCL_DEBUG("call %s done\n", __func__);
895
  }
896
 
897
+ void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
898
  GGML_SYCL_DEBUG("call %s\n", __func__);
899
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_cos);
900
  GGML_SYCL_DEBUG("call %s done\n", __func__);
901
  }
902
 
903
+ void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
904
  GGML_SYCL_DEBUG("call %s\n", __func__);
905
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_acc);
906
  GGML_SYCL_DEBUG("call %s done\n", __func__);
907
  }
908
 
909
+ void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
910
  GGML_SYCL_DEBUG("call %s\n", __func__);
911
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_gelu);
912
  GGML_SYCL_DEBUG("call %s done\n", __func__);
913
  }
914
 
915
+ void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
916
  GGML_SYCL_DEBUG("call %s\n", __func__);
917
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_silu);
918
  GGML_SYCL_DEBUG("call %s done\n", __func__);
919
  }
920
 
921
+ void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
922
  GGML_SYCL_DEBUG("call %s\n", __func__);
923
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_gelu_quick);
924
  GGML_SYCL_DEBUG("call %s done\n", __func__);
925
  }
926
 
927
+ void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
928
  GGML_SYCL_DEBUG("call %s\n", __func__);
929
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_tanh);
930
  GGML_SYCL_DEBUG("call %s done\n", __func__);
931
  }
932
 
933
+ void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
934
  GGML_SYCL_DEBUG("call %s\n", __func__);
935
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_relu);
936
  GGML_SYCL_DEBUG("call %s done\n", __func__);
937
  }
938
 
939
+ void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
940
  GGML_SYCL_DEBUG("call %s\n", __func__);
941
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sigmoid);
942
  GGML_SYCL_DEBUG("call %s done\n", __func__);
943
  }
944
 
945
+ void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
946
  GGML_SYCL_DEBUG("call %s\n", __func__);
947
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_hardsigmoid);
948
  GGML_SYCL_DEBUG("call %s done\n", __func__);
949
  }
950
 
951
+ void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
952
  GGML_SYCL_DEBUG("call %s\n", __func__);
953
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_hardswish);
954
  GGML_SYCL_DEBUG("call %s done\n", __func__);
955
  }
956
 
957
 
958
+ void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
959
  GGML_SYCL_DEBUG("call %s\n", __func__);
960
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_exp);
961
  GGML_SYCL_DEBUG("call %s done\n", __func__);
962
  }
963
 
964
+ void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
965
  GGML_SYCL_DEBUG("call %s\n", __func__);
966
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_log);
967
  GGML_SYCL_DEBUG("call %s done\n", __func__);
968
  }
969
 
970
+ void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
971
  GGML_SYCL_DEBUG("call %s\n", __func__);
972
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_neg);
973
  GGML_SYCL_DEBUG("call %s done\n", __func__);
974
  }
975
 
976
+ void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
977
  GGML_SYCL_DEBUG("call %s\n", __func__);
978
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_step);
979
  GGML_SYCL_DEBUG("call %s done\n", __func__);
980
  }
981
 
982
+ void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
983
  GGML_SYCL_DEBUG("call %s\n", __func__);
984
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_leaky_relu);
985
  GGML_SYCL_DEBUG("call %s done\n", __func__);
986
  }
987
 
988
+ void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
989
  GGML_SYCL_DEBUG("call %s\n", __func__);
990
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sqr);
991
  GGML_SYCL_DEBUG("call %s done\n", __func__);
992
  }
993
 
994
+ void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
995
  GGML_SYCL_DEBUG("call %s\n", __func__);
996
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_upscale);
997
  GGML_SYCL_DEBUG("call %s done\n", __func__);
998
  }
999
 
1000
+ void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1001
  GGML_SYCL_DEBUG("call %s\n", __func__);
1002
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_pad);
1003
  GGML_SYCL_DEBUG("call %s done\n", __func__);
1004
  }
1005
 
1006
 
1007
 
1008
+ void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1009
  GGML_SYCL_DEBUG("call %s\n", __func__);
1010
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_add);
1011
  GGML_SYCL_DEBUG("call %s done\n", __func__);
1012
  }
1013
 
1014
+ void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1015
  GGML_SYCL_DEBUG("call %s\n", __func__);
1016
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sub);
1017
  GGML_SYCL_DEBUG("call %s done\n", __func__);
1018
  }
1019
 
1020
+ void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1021
  GGML_SYCL_DEBUG("call %s\n", __func__);
1022
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_mul);
1023
  GGML_SYCL_DEBUG("call %s done\n", __func__);
1024
  }
1025
 
1026
+ void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
1027
  GGML_SYCL_DEBUG("call %s\n", __func__);
1028
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_div);
1029
  GGML_SYCL_DEBUG("call %s done\n", __func__);
1030
  }
ggml/src/ggml-sycl/element_wise.hpp CHANGED
@@ -25,52 +25,52 @@ static __dpct_inline__ float op_div(const float a, const float b) {
25
  }
26
 
27
 
28
- void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
29
 
30
- void ggml_sycl_sin(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
31
 
32
- void ggml_sycl_cos(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
33
 
34
- void ggml_sycl_acc(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
35
 
36
- void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
37
 
38
- void ggml_sycl_silu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
39
 
40
- void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
41
 
42
- void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
43
 
44
- void ggml_sycl_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
45
 
46
- void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
47
 
48
- void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
49
 
50
- void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
51
 
52
- void ggml_sycl_exp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
53
 
54
- void ggml_sycl_log(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
55
 
56
- void ggml_sycl_neg(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
57
 
58
- void ggml_sycl_step(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
59
 
60
- void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
61
 
62
- void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
63
 
64
- void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
65
 
66
- void ggml_sycl_pad(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
67
 
68
- void ggml_sycl_add(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
69
 
70
- void ggml_sycl_sub(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
71
 
72
- void ggml_sycl_mul(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
73
 
74
- void ggml_sycl_div(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
75
 
76
  #endif // GGML_SYCL_ELEMENTWISE_HPP
 
25
  }
26
 
27
 
28
+ void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
29
 
30
+ void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
31
 
32
+ void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
33
 
34
+ void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
35
 
36
+ void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
37
 
38
+ void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
39
 
40
+ void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
41
 
42
+ void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
43
 
44
+ void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
45
 
46
+ void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
47
 
48
+ void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
49
 
50
+ void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
51
 
52
+ void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
53
 
54
+ void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
55
 
56
+ void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
57
 
58
+ void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
59
 
60
+ void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
61
 
62
+ void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
63
 
64
+ void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
65
 
66
+ void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
67
 
68
+ void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
69
 
70
+ void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
71
 
72
+ void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
73
 
74
+ void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
75
 
76
  #endif // GGML_SYCL_ELEMENTWISE_HPP
ggml/src/ggml-sycl/ggml-sycl.cpp CHANGED
@@ -54,18 +54,12 @@ static ggml_sycl_device_info ggml_sycl_init() {
54
  GGML_ASSERT(info.device_count <= GGML_SYCL_MAX_DEVICES);
55
 
56
  int64_t total_vram = 0;
57
- #if defined(GGML_SYCL_FORCE_MMQ)
58
- GGML_LOG_INFO("%s: GGML_SYCL_FORCE_MMQ: yes\n", __func__);
59
- #else
60
- GGML_LOG_INFO("%s: GGML_SYCL_FORCE_MMQ: no\n", __func__);
61
- #endif
62
- #if defined(SYCL_USE_XMX)
63
- GGML_LOG_INFO("%s: SYCL_USE_XMX: yes\n", __func__);
64
- #else
65
- GGML_LOG_INFO("%s: SYCL_USE_XMX: no\n", __func__);
66
- #endif
67
- GGML_LOG_INFO("%s: found %d %s devices:\n", __func__, info.device_count, GGML_SYCL_NAME);
68
-
69
  for (int i = 0; i < info.device_count; ++i) {
70
  info.devices[i].vmm = 0;
71
  dpct::device_info prop;
@@ -109,11 +103,11 @@ void print_device_detail(int id, sycl::device &device, std::string device_type)
109
  name = std::regex_replace(name, std::regex("\\(TM\\)"), "");
110
 
111
  auto global_mem_size = prop.get_global_mem_size()/1000000;
112
-
113
- GGML_LOG_INFO("|%2d|%19s|%39s|%7s|%7d|%8d|%5d|%6luM|%21s|\n", id, device_type.c_str(),
114
  name.c_str(), version.c_str(), prop.get_max_compute_units(),
115
  prop.get_max_work_group_size(), prop.get_max_sub_group_size(),
116
- global_mem_size, device.get_info<sycl::info::device::driver_version>().c_str());
117
  }
118
 
119
  void ggml_backend_sycl_print_sycl_devices() {
@@ -124,16 +118,16 @@ void ggml_backend_sycl_print_sycl_devices() {
124
 
125
  GGML_LOG_INFO(
126
  "| | | | "
127
- " |Max | |Max |Global | |\n");
128
  GGML_LOG_INFO(
129
  "| | | | "
130
- " |compute|Max work|sub |mem | |\n");
131
  GGML_LOG_INFO(
132
  "|ID| Device Type| "
133
- "Name|Version|units |group |group|size | Driver version|\n");
134
  GGML_LOG_INFO(
135
  "|--|-------------------|---------------------------------------|------"
136
- "-|-------|--------|-----|-------|---------------------|\n");
137
 
138
  for (int id = 0; id < device_count; ++id) {
139
  sycl::device device = dpct::dev_mgr::instance().get_device(id);
@@ -164,14 +158,18 @@ static void ggml_check_sycl() try {
164
  static bool initialized = false;
165
 
166
  if (!initialized) {
167
- GGML_LOG_INFO("[SYCL] call ggml_check_sycl\n");
168
  g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
169
- GGML_LOG_INFO("%s: GGML_SYCL_DEBUG: %d\n", __func__, g_ggml_sycl_debug);
170
-
 
 
 
 
171
  #if defined(GGML_SYCL_F16)
172
- GGML_LOG_INFO("%s: GGML_SYCL_F16: yes\n", __func__);
173
  #else
174
- GGML_LOG_INFO("%s: GGML_SYCL_F16: no\n", __func__);
175
  #endif
176
 
177
  /* NOT REMOVE, keep it for next optimize for XMX.
@@ -1189,7 +1187,6 @@ std::unique_ptr<ggml_sycl_pool> ggml_backend_sycl_context::new_pool_for_device(q
1189
  /// kernels
1190
 
1191
  typedef void (*cpy_kernel_t)(const char * cx, char * cdst);
1192
- typedef void (*ggml_sycl_func_t)(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
1193
  typedef void (*ggml_sycl_op_mul_mat_t)(
1194
  ggml_backend_sycl_context & ctx,
1195
  const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst,
@@ -3171,33 +3168,33 @@ catch (sycl::exception const &exc) {
3171
  }
3172
 
3173
 
3174
- static void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
3175
  GGML_SYCL_DEBUG("call %s\n", __func__);
3176
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_repeat);
3177
  GGML_SYCL_DEBUG("call %s done\n", __func__);
3178
  }
3179
 
3180
- static void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
3181
  GGML_SYCL_DEBUG("call %s\n", __func__);
3182
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_get_rows);
3183
  GGML_SYCL_DEBUG("call %s done\n", __func__);
3184
  }
3185
 
3186
- static void ggml_sycl_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
3187
  GGML_SYCL_DEBUG("call %s\n", __func__);
3188
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_norm);
3189
  GGML_SYCL_DEBUG("call %s done\n", __func__);
3190
  }
3191
 
3192
- static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
3193
  GGML_SYCL_DEBUG("call %s\n", __func__);
3194
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_rms_norm);
3195
  GGML_SYCL_DEBUG("call %s done\n", __func__);
3196
  }
3197
 
3198
- static void ggml_sycl_group_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
3199
  GGML_SYCL_DEBUG("call %s\n", __func__);
3200
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_group_norm);
3201
  GGML_SYCL_DEBUG("call %s done\n", __func__);
3202
  }
3203
 
@@ -3572,9 +3569,10 @@ __dpct_inline__ static void k_copy_dst_from_contiguous(
3572
  }
3573
  }
3574
 
3575
- static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
3576
- const ggml_tensor *src1,
3577
  ggml_tensor *dst) try {
 
 
3578
  GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer) && "mul_mat_id does not support split buffers");
3579
 
3580
  const ggml_tensor *ids = dst->src[2];
@@ -3740,12 +3738,12 @@ catch (sycl::exception const &exc) {
3740
  std::exit(1);
3741
  }
3742
 
3743
- static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
3744
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_scale);
3745
  }
3746
 
3747
- static void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
3748
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_clamp);
3749
  }
3750
 
3751
  static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@@ -3787,7 +3785,6 @@ static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
3787
  ggml_type_name(src0->type), ggml_type_name(src1->type));
3788
  GGML_ABORT("fatal error");
3789
  }
3790
-
3791
  GGML_UNUSED(dst);
3792
  }
3793
  catch (sycl::exception const &exc) {
@@ -3796,59 +3793,52 @@ catch (sycl::exception const &exc) {
3796
  std::exit(1);
3797
  }
3798
 
3799
- static void ggml_sycl_dup(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
3800
  // TODO: why do we pass dst as src1 here?
3801
- ggml_sycl_cpy(ctx, src0, dst, nullptr);
3802
- GGML_UNUSED(src1);
3803
  }
3804
 
3805
- static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
3806
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_diag_mask_inf);
3807
  }
3808
 
3809
- static void ggml_sycl_soft_max(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
3810
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_soft_max);
3811
  }
3812
 
3813
- static void ggml_sycl_rope(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
3814
- GGML_ASSERT(ggml_is_contiguous(src0)); // TODO: this restriction is temporary until non-cont support is implemented
3815
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_rope);
3816
  }
3817
 
3818
- static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
3819
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_pool2d);
3820
  }
3821
 
3822
- static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
3823
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_im2col);
3824
  }
3825
 
3826
- static void ggml_sycl_sum(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
3827
- GGML_ASSERT(ggml_is_contiguous(src0));
3828
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sum);
3829
  }
3830
 
3831
- static void ggml_sycl_sum_rows(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
3832
- GGML_ASSERT(ggml_is_contiguous(src0));
3833
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sum_rows);
3834
  }
3835
 
3836
- static void ggml_sycl_argsort(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
3837
- GGML_ASSERT(ggml_is_contiguous(src0));
3838
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_argsort);
3839
  }
3840
 
3841
- static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
3842
- GGML_ASSERT(ggml_is_contiguous(src0));
3843
- ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_argmax);
3844
  }
3845
 
3846
- static void ggml_sycl_nop(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
3847
- GGML_UNUSED(src0);
3848
- GGML_UNUSED(src1);
3849
- GGML_UNUSED(dst);
3850
- GGML_UNUSED(ctx);
3851
- }
3852
 
3853
  void ggml_sycl_set_main_device(const int main_device) try {
3854
  if (dpct::get_current_device_id() == static_cast<unsigned int> (main_device)) {
@@ -3871,191 +3861,189 @@ catch (sycl::exception const &exc) {
3871
  std::exit(1);
3872
  }
3873
 
3874
- bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tensor * tensor) {
3875
  if (!g_sycl_loaded) return false;
3876
 
3877
- ggml_sycl_func_t func;
 
 
3878
 
3879
- switch (tensor->op) {
3880
  case GGML_OP_ARGMAX:
3881
- func = ggml_sycl_argmax;
3882
  break;
3883
  case GGML_OP_CONV_TRANSPOSE_1D:
3884
- func = ggml_sycl_op_conv_transpose_1d;
3885
  break;
3886
  case GGML_OP_REPEAT:
3887
- func = ggml_sycl_repeat;
3888
  break;
3889
  case GGML_OP_GET_ROWS:
3890
- func = ggml_sycl_get_rows;
3891
  break;
3892
  case GGML_OP_DUP:
3893
- func = ggml_sycl_dup;
3894
  break;
3895
  case GGML_OP_ADD:
3896
  case GGML_OP_ADD1: // TODO: more efficient implementation
3897
- func = ggml_sycl_add;
3898
  break;
3899
  case GGML_OP_SUB:
3900
- func = ggml_sycl_sub;
3901
  break;
3902
  case GGML_OP_ACC:
3903
- func = ggml_sycl_acc;
3904
  break;
3905
  case GGML_OP_MUL:
3906
- func = ggml_sycl_mul;
3907
  break;
3908
  case GGML_OP_LOG:
3909
- func = ggml_sycl_log;
3910
  break;
3911
  case GGML_OP_DIV:
3912
- func = ggml_sycl_div;
3913
  break;
3914
  case GGML_OP_UNARY:
3915
- switch (ggml_get_unary_op(tensor)) {
3916
  case GGML_UNARY_OP_NEG:
3917
- func = ggml_sycl_neg;
3918
  break;
3919
  case GGML_UNARY_OP_STEP:
3920
- func = ggml_sycl_step;
3921
  break;
3922
  case GGML_UNARY_OP_GELU:
3923
- func = ggml_sycl_gelu;
3924
  break;
3925
  case GGML_UNARY_OP_SILU:
3926
- func = ggml_sycl_silu;
3927
  break;
3928
  case GGML_UNARY_OP_GELU_QUICK:
3929
- func = ggml_sycl_gelu_quick;
3930
  break;
3931
  case GGML_UNARY_OP_TANH:
3932
- func = ggml_sycl_tanh;
3933
  break;
3934
  case GGML_UNARY_OP_RELU:
3935
- func = ggml_sycl_relu;
3936
  break;
3937
  case GGML_UNARY_OP_SIGMOID:
3938
- func = ggml_sycl_sigmoid;
3939
  break;
3940
  case GGML_UNARY_OP_HARDSIGMOID:
3941
- func = ggml_sycl_hardsigmoid;
3942
  break;
3943
  case GGML_UNARY_OP_HARDSWISH:
3944
- func = ggml_sycl_hardswish;
3945
  break;
3946
  case GGML_UNARY_OP_EXP:
3947
- func = ggml_sycl_exp;
3948
  break;
3949
  default:
3950
  return false;
3951
  }
3952
  break;
3953
  case GGML_OP_NORM:
3954
- func = ggml_sycl_norm;
3955
  break;
3956
  case GGML_OP_GROUP_NORM:
3957
- func = ggml_sycl_group_norm;
3958
  break;
3959
  case GGML_OP_CONCAT:
3960
- func = ggml_sycl_op_concat;
3961
  break;
3962
  case GGML_OP_UPSCALE:
3963
- func = ggml_sycl_upscale;
3964
  break;
3965
  case GGML_OP_PAD:
3966
- func = ggml_sycl_pad;
3967
  break;
3968
  case GGML_OP_LEAKY_RELU:
3969
- func = ggml_sycl_leaky_relu;
3970
  break;
3971
  case GGML_OP_RMS_NORM:
3972
- func = ggml_sycl_rms_norm;
3973
  break;
3974
  case GGML_OP_MUL_MAT:
3975
- if (tensor->src[0]->ne[3] != tensor->src[1]->ne[3]) {
3976
  return false;
3977
  }
3978
- func = ggml_sycl_mul_mat;
 
3979
  break;
3980
  case GGML_OP_MUL_MAT_ID:
3981
- if (tensor->src[0]->ne[3] != tensor->src[1]->ne[3]) {
3982
  return false;
3983
  }
3984
- func = ggml_sycl_mul_mat_id;
3985
  break;
3986
  case GGML_OP_OUT_PROD:
3987
- func = ggml_sycl_op_out_prod;
3988
  break;
3989
  case GGML_OP_SCALE:
3990
- func = ggml_sycl_scale;
3991
  break;
3992
  case GGML_OP_SQR:
3993
- func = ggml_sycl_sqr;
3994
  break;
3995
  case GGML_OP_SQRT:
3996
- func = ggml_sycl_sqrt;
3997
  break;
3998
  case GGML_OP_SIN:
3999
- func = ggml_sycl_sin;
4000
  break;
4001
  case GGML_OP_COS:
4002
- func = ggml_sycl_cos;
4003
  break;
4004
  case GGML_OP_CLAMP:
4005
- func = ggml_sycl_clamp;
4006
  break;
4007
  case GGML_OP_CPY:
4008
- func = ggml_sycl_cpy;
4009
  break;
4010
  case GGML_OP_CONT:
4011
- func = ggml_sycl_dup;
4012
  break;
4013
  case GGML_OP_NONE:
4014
  case GGML_OP_RESHAPE:
4015
  case GGML_OP_VIEW:
4016
  case GGML_OP_PERMUTE:
4017
  case GGML_OP_TRANSPOSE:
4018
- func = ggml_sycl_nop;
4019
  break;
4020
  case GGML_OP_DIAG_MASK_INF:
4021
- func = ggml_sycl_diag_mask_inf;
4022
  break;
4023
  case GGML_OP_SOFT_MAX:
4024
- func = ggml_sycl_soft_max;
4025
  break;
4026
  case GGML_OP_ROPE:
4027
- func = ggml_sycl_rope;
4028
  break;
4029
  case GGML_OP_IM2COL:
4030
- func = ggml_sycl_im2col;
4031
  break;
4032
  case GGML_OP_POOL_2D:
4033
- func = ggml_sycl_pool2d;
4034
  break;
4035
  case GGML_OP_SUM:
4036
- func = ggml_sycl_sum;
4037
  break;
4038
  case GGML_OP_SUM_ROWS:
4039
- func = ggml_sycl_sum_rows;
4040
  break;
4041
  case GGML_OP_ARGSORT:
4042
- func = ggml_sycl_argsort;
4043
  break;
4044
  case GGML_OP_TIMESTEP_EMBEDDING:
4045
- func = ggml_sycl_op_timestep_embedding;
4046
  break;
4047
  case GGML_OP_RWKV_WKV6:
4048
- func = ggml_sycl_op_rwkv_wkv6;
4049
  break;
4050
  default:
4051
  return false;
4052
  }
4053
 
4054
- if (tensor->src[0] != nullptr && ggml_backend_buffer_is_sycl_split(tensor->src[0]->buffer)) {
4055
- ggml_sycl_set_peer_access(tensor->src[1]->ne[1], ctx.device);
4056
- }
4057
-
4058
- func(ctx, tensor->src[0], tensor->src[1], tensor);
4059
  return true;
4060
  }
4061
 
 
54
  GGML_ASSERT(info.device_count <= GGML_SYCL_MAX_DEVICES);
55
 
56
  int64_t total_vram = 0;
57
+ /* This is a bit misleading; reserved for later */
58
+ // #if defined(SYCL_USE_XMX)
59
+ // GGML_LOG_INFO("%s: SYCL_USE_XMX: yes\n", __func__);
60
+ // #else
61
+ // GGML_LOG_INFO("%s: SYCL_USE_XMX: no\n", __func__);
62
+ // #endif
 
 
 
 
 
 
63
  for (int i = 0; i < info.device_count; ++i) {
64
  info.devices[i].vmm = 0;
65
  dpct::device_info prop;
 
103
  name = std::regex_replace(name, std::regex("\\(TM\\)"), "");
104
 
105
  auto global_mem_size = prop.get_global_mem_size()/1000000;
106
+ std::string xmx = gpu_has_xmx(device) ? "yes" : "no";
107
+ GGML_LOG_INFO("|%2d|%19s|%39s|%7s|%7d|%8d|%5d|%6luM|%21s|%14s|\n", id, device_type.c_str(),
108
  name.c_str(), version.c_str(), prop.get_max_compute_units(),
109
  prop.get_max_work_group_size(), prop.get_max_sub_group_size(),
110
+ global_mem_size, device.get_info<sycl::info::device::driver_version>().c_str(), xmx.c_str());
111
  }
112
 
113
  void ggml_backend_sycl_print_sycl_devices() {
 
118
 
119
  GGML_LOG_INFO(
120
  "| | | | "
121
+ " |Max | |Max |Global | | XMX |\n");
122
  GGML_LOG_INFO(
123
  "| | | | "
124
+ " |compute|Max work|sub |mem | | or |\n");
125
  GGML_LOG_INFO(
126
  "|ID| Device Type| "
127
+ "Name|Version|units |group |group|size | Driver version| Tensor Cores |\n");
128
  GGML_LOG_INFO(
129
  "|--|-------------------|---------------------------------------|------"
130
+ "-|-------|--------|-----|-------|---------------------|--------------|\n");
131
 
132
  for (int id = 0; id < device_count; ++id) {
133
  sycl::device device = dpct::dev_mgr::instance().get_device(id);
 
158
  static bool initialized = false;
159
 
160
  if (!initialized) {
161
+ GGML_SYCL_DEBUG("[SYCL] call ggml_check_sycl\n");
162
  g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
163
+ GGML_LOG_INFO("GGML_SYCL_DEBUG: %d\n", g_ggml_sycl_debug);
164
+ #if defined(GGML_SYCL_FORCE_MMQ)
165
+ GGML_LOG_INFO("GGML_SYCL_FORCE_MMQ: yes\n");
166
+ #else
167
+ GGML_LOG_INFO("GGML_SYCL_FORCE_MMQ: no\n");
168
+ #endif
169
  #if defined(GGML_SYCL_F16)
170
+ GGML_LOG_INFO("GGML_SYCL_F16: yes\n");
171
  #else
172
+ GGML_LOG_INFO("GGML_SYCL_F16: no\n");
173
  #endif
174
 
175
  /* NOT REMOVE, keep it for next optimize for XMX.
 
1187
  /// kernels
1188
 
1189
  typedef void (*cpy_kernel_t)(const char * cx, char * cdst);
 
1190
  typedef void (*ggml_sycl_op_mul_mat_t)(
1191
  ggml_backend_sycl_context & ctx,
1192
  const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst,
 
3168
  }
3169
 
3170
 
3171
+ static void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3172
  GGML_SYCL_DEBUG("call %s\n", __func__);
3173
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_repeat);
3174
  GGML_SYCL_DEBUG("call %s done\n", __func__);
3175
  }
3176
 
3177
+ static void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3178
  GGML_SYCL_DEBUG("call %s\n", __func__);
3179
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_get_rows);
3180
  GGML_SYCL_DEBUG("call %s done\n", __func__);
3181
  }
3182
 
3183
+ static void ggml_sycl_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3184
  GGML_SYCL_DEBUG("call %s\n", __func__);
3185
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_norm);
3186
  GGML_SYCL_DEBUG("call %s done\n", __func__);
3187
  }
3188
 
3189
+ static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3190
  GGML_SYCL_DEBUG("call %s\n", __func__);
3191
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_rms_norm);
3192
  GGML_SYCL_DEBUG("call %s done\n", __func__);
3193
  }
3194
 
3195
+ static void ggml_sycl_group_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3196
  GGML_SYCL_DEBUG("call %s\n", __func__);
3197
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_group_norm);
3198
  GGML_SYCL_DEBUG("call %s done\n", __func__);
3199
  }
3200
 
 
3569
  }
3570
  }
3571
 
3572
+ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx,
 
3573
  ggml_tensor *dst) try {
3574
+ const ggml_tensor *src0 = dst->src[0];
3575
+ const ggml_tensor *src1 = dst->src[1];
3576
  GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer) && "mul_mat_id does not support split buffers");
3577
 
3578
  const ggml_tensor *ids = dst->src[2];
 
3738
  std::exit(1);
3739
  }
3740
 
3741
+ static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3742
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_scale);
3743
  }
3744
 
3745
+ static void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3746
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_clamp);
3747
  }
3748
 
3749
  static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
 
3785
  ggml_type_name(src0->type), ggml_type_name(src1->type));
3786
  GGML_ABORT("fatal error");
3787
  }
 
3788
  GGML_UNUSED(dst);
3789
  }
3790
  catch (sycl::exception const &exc) {
 
3793
  std::exit(1);
3794
  }
3795
 
3796
+ static void ggml_sycl_dup(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3797
  // TODO: why do we pass dst as src1 here?
3798
+ ggml_sycl_cpy(ctx, dst->src[0], dst, nullptr);
 
3799
  }
3800
 
3801
+ static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3802
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_diag_mask_inf);
3803
  }
3804
 
3805
+ static void ggml_sycl_soft_max(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3806
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_soft_max);
3807
  }
3808
 
3809
+ static void ggml_sycl_rope(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3810
+ GGML_ASSERT(ggml_is_contiguous(dst->src[0])); // TODO: this restriction is temporary until non-cont support is implemented
3811
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_rope);
3812
  }
3813
 
3814
+ static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3815
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_pool2d);
3816
  }
3817
 
3818
+ static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3819
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_im2col);
3820
  }
3821
 
3822
+ static void ggml_sycl_sum(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3823
+ GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
3824
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sum);
3825
  }
3826
 
3827
+ static void ggml_sycl_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3828
+ GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
3829
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sum_rows);
3830
  }
3831
 
3832
+ static void ggml_sycl_argsort(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3833
+ GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
3834
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_argsort);
3835
  }
3836
 
3837
+ static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
3838
+ GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
3839
+ ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_argmax);
3840
  }
3841
 
 
 
 
 
 
 
3842
 
3843
  void ggml_sycl_set_main_device(const int main_device) try {
3844
  if (dpct::get_current_device_id() == static_cast<unsigned int> (main_device)) {
 
3861
  std::exit(1);
3862
  }
3863
 
3864
+ bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tensor * dst) {
3865
  if (!g_sycl_loaded) return false;
3866
 
3867
+ if (dst->src[0] != nullptr && ggml_backend_buffer_is_sycl_split(dst->src[0]->buffer)) {
3868
+ ggml_sycl_set_peer_access(dst->src[1]->ne[1], ctx.device);
3869
+ }
3870
 
3871
+ switch (dst->op) {
3872
  case GGML_OP_ARGMAX:
3873
+ ggml_sycl_argmax(ctx, dst);
3874
  break;
3875
  case GGML_OP_CONV_TRANSPOSE_1D:
3876
+ ggml_sycl_op_conv_transpose_1d(ctx, dst);
3877
  break;
3878
  case GGML_OP_REPEAT:
3879
+ ggml_sycl_repeat(ctx, dst);
3880
  break;
3881
  case GGML_OP_GET_ROWS:
3882
+ ggml_sycl_get_rows(ctx, dst);
3883
  break;
3884
  case GGML_OP_DUP:
3885
+ ggml_sycl_dup(ctx, dst);
3886
  break;
3887
  case GGML_OP_ADD:
3888
  case GGML_OP_ADD1: // TODO: more efficient implementation
3889
+ ggml_sycl_add(ctx, dst);
3890
  break;
3891
  case GGML_OP_SUB:
3892
+ ggml_sycl_sub(ctx, dst);
3893
  break;
3894
  case GGML_OP_ACC:
3895
+ ggml_sycl_acc(ctx, dst);
3896
  break;
3897
  case GGML_OP_MUL:
3898
+ ggml_sycl_mul(ctx, dst);
3899
  break;
3900
  case GGML_OP_LOG:
3901
+ ggml_sycl_log(ctx, dst);
3902
  break;
3903
  case GGML_OP_DIV:
3904
+ ggml_sycl_div(ctx, dst);
3905
  break;
3906
  case GGML_OP_UNARY:
3907
+ switch (ggml_get_unary_op(dst)) {
3908
  case GGML_UNARY_OP_NEG:
3909
+ ggml_sycl_neg(ctx, dst);
3910
  break;
3911
  case GGML_UNARY_OP_STEP:
3912
+ ggml_sycl_step(ctx, dst);
3913
  break;
3914
  case GGML_UNARY_OP_GELU:
3915
+ ggml_sycl_gelu(ctx, dst);
3916
  break;
3917
  case GGML_UNARY_OP_SILU:
3918
+ ggml_sycl_silu(ctx, dst);
3919
  break;
3920
  case GGML_UNARY_OP_GELU_QUICK:
3921
+ ggml_sycl_gelu_quick(ctx, dst);
3922
  break;
3923
  case GGML_UNARY_OP_TANH:
3924
+ ggml_sycl_tanh(ctx, dst);
3925
  break;
3926
  case GGML_UNARY_OP_RELU:
3927
+ ggml_sycl_relu(ctx, dst);
3928
  break;
3929
  case GGML_UNARY_OP_SIGMOID:
3930
+ ggml_sycl_sigmoid(ctx, dst);
3931
  break;
3932
  case GGML_UNARY_OP_HARDSIGMOID:
3933
+ ggml_sycl_hardsigmoid(ctx, dst);
3934
  break;
3935
  case GGML_UNARY_OP_HARDSWISH:
3936
+ ggml_sycl_hardswish(ctx, dst);
3937
  break;
3938
  case GGML_UNARY_OP_EXP:
3939
+ ggml_sycl_exp(ctx, dst);
3940
  break;
3941
  default:
3942
  return false;
3943
  }
3944
  break;
3945
  case GGML_OP_NORM:
3946
+ ggml_sycl_norm(ctx, dst);
3947
  break;
3948
  case GGML_OP_GROUP_NORM:
3949
+ ggml_sycl_group_norm(ctx, dst);
3950
  break;
3951
  case GGML_OP_CONCAT:
3952
+ ggml_sycl_op_concat(ctx, dst);
3953
  break;
3954
  case GGML_OP_UPSCALE:
3955
+ ggml_sycl_upscale(ctx, dst);
3956
  break;
3957
  case GGML_OP_PAD:
3958
+ ggml_sycl_pad(ctx, dst);
3959
  break;
3960
  case GGML_OP_LEAKY_RELU:
3961
+ ggml_sycl_leaky_relu(ctx, dst);
3962
  break;
3963
  case GGML_OP_RMS_NORM:
3964
+ ggml_sycl_rms_norm(ctx, dst);
3965
  break;
3966
  case GGML_OP_MUL_MAT:
3967
+ if (dst->src[0]->ne[3] != dst->src[1]->ne[3]) {
3968
  return false;
3969
  }
3970
+ /* ggml_sycl_mul_mat_id is dependent on ggml_sycl_mul_mat */
3971
+ ggml_sycl_mul_mat(ctx, dst->src[0], dst->src[1], dst);
3972
  break;
3973
  case GGML_OP_MUL_MAT_ID:
3974
+ if (dst->src[0]->ne[3] != dst->src[1]->ne[3]) {
3975
  return false;
3976
  }
3977
+ ggml_sycl_mul_mat_id(ctx, dst);
3978
  break;
3979
  case GGML_OP_OUT_PROD:
3980
+ ggml_sycl_op_out_prod(ctx, dst);
3981
  break;
3982
  case GGML_OP_SCALE:
3983
+ ggml_sycl_scale(ctx, dst);
3984
  break;
3985
  case GGML_OP_SQR:
3986
+ ggml_sycl_sqr(ctx, dst);
3987
  break;
3988
  case GGML_OP_SQRT:
3989
+ ggml_sycl_sqrt(ctx, dst);
3990
  break;
3991
  case GGML_OP_SIN:
3992
+ ggml_sycl_sin(ctx, dst);
3993
  break;
3994
  case GGML_OP_COS:
3995
+ ggml_sycl_cos(ctx, dst);
3996
  break;
3997
  case GGML_OP_CLAMP:
3998
+ ggml_sycl_clamp(ctx, dst);
3999
  break;
4000
  case GGML_OP_CPY:
4001
+ ggml_sycl_cpy(ctx, dst->src[0], dst->src[1], dst);
4002
  break;
4003
  case GGML_OP_CONT:
4004
+ ggml_sycl_dup(ctx, dst);
4005
  break;
4006
  case GGML_OP_NONE:
4007
  case GGML_OP_RESHAPE:
4008
  case GGML_OP_VIEW:
4009
  case GGML_OP_PERMUTE:
4010
  case GGML_OP_TRANSPOSE:
4011
+ GGML_SYCL_DEBUG("%s: Tensor NO-OP\n", __func__);
4012
  break;
4013
  case GGML_OP_DIAG_MASK_INF:
4014
+ ggml_sycl_diag_mask_inf(ctx, dst);
4015
  break;
4016
  case GGML_OP_SOFT_MAX:
4017
+ ggml_sycl_soft_max(ctx, dst);
4018
  break;
4019
  case GGML_OP_ROPE:
4020
+ ggml_sycl_rope(ctx, dst);
4021
  break;
4022
  case GGML_OP_IM2COL:
4023
+ ggml_sycl_im2col(ctx, dst);
4024
  break;
4025
  case GGML_OP_POOL_2D:
4026
+ ggml_sycl_pool2d(ctx, dst);
4027
  break;
4028
  case GGML_OP_SUM:
4029
+ ggml_sycl_sum(ctx, dst);
4030
  break;
4031
  case GGML_OP_SUM_ROWS:
4032
+ ggml_sycl_sum_rows(ctx, dst);
4033
  break;
4034
  case GGML_OP_ARGSORT:
4035
+ ggml_sycl_argsort(ctx, dst);
4036
  break;
4037
  case GGML_OP_TIMESTEP_EMBEDDING:
4038
+ ggml_sycl_op_timestep_embedding(ctx, dst);
4039
  break;
4040
  case GGML_OP_RWKV_WKV6:
4041
+ ggml_sycl_op_rwkv_wkv6(ctx, dst);
4042
  break;
4043
  default:
4044
  return false;
4045
  }
4046
 
 
 
 
 
 
4047
  return true;
4048
  }
4049
 
ggml/src/ggml-sycl/outprod.cpp CHANGED
@@ -3,9 +3,9 @@
3
  #include "outprod.hpp"
4
 
5
 
6
- void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
7
- const ggml_tensor* src1, ggml_tensor* dst) {
8
-
9
 
10
  GGML_ASSERT(src0->type == GGML_TYPE_F32);
11
  GGML_ASSERT(src1->type == GGML_TYPE_F32);
 
3
  #include "outprod.hpp"
4
 
5
 
6
+ void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
7
+ const ggml_tensor *src0 = dst->src[0];
8
+ const ggml_tensor *src1 = dst->src[1];
9
 
10
  GGML_ASSERT(src0->type == GGML_TYPE_F32);
11
  GGML_ASSERT(src1->type == GGML_TYPE_F32);
ggml/src/ggml-sycl/outprod.hpp CHANGED
@@ -3,8 +3,7 @@
3
 
4
  #include "common.hpp"
5
 
6
- void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
7
- const ggml_tensor* src1, ggml_tensor* dst);
8
 
9
 
10
  #endif // GGML_SYCL_OUTPROD_HPP
 
3
 
4
  #include "common.hpp"
5
 
6
+ void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, ggml_tensor* dst);
 
7
 
8
 
9
  #endif // GGML_SYCL_OUTPROD_HPP
ggml/src/ggml-sycl/tsembd.cpp CHANGED
@@ -55,8 +55,9 @@ static void timestep_embedding_f32_sycl(
55
  });
56
  }
57
 
58
- void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
59
- const ggml_tensor *src1, ggml_tensor * dst) {
 
60
  const float * src0_d = (const float *)src0->data;
61
  float * dst_d = (float *)dst->data;
62
  dpct::queue_ptr stream = ctx.stream();
 
55
  });
56
  }
57
 
58
+ void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
59
+ const ggml_tensor *src0 = dst->src[0];
60
+ const ggml_tensor *src1 = dst->src[1];
61
  const float * src0_d = (const float *)src0->data;
62
  float * dst_d = (float *)dst->data;
63
  dpct::queue_ptr stream = ctx.stream();
ggml/src/ggml-sycl/tsembd.hpp CHANGED
@@ -15,7 +15,6 @@
15
 
16
  #include "common.hpp"
17
 
18
- void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
19
- const ggml_tensor *src1, ggml_tensor * dst);
20
 
21
  #endif // GGML_SYCL_TSEMBD_HPP
 
15
 
16
  #include "common.hpp"
17
 
18
+ void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 
19
 
20
  #endif // GGML_SYCL_TSEMBD_HPP
ggml/src/ggml-sycl/wkv6.cpp CHANGED
@@ -95,8 +95,10 @@ static void rwkv_wkv_f32_kernel(
95
  }
96
  }
97
 
98
- void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
99
- const ggml_tensor* src1, ggml_tensor* dst) {
 
 
100
 
101
  const float* k_d = (const float*)dst->src[0]->data;
102
  const float* v_d = (const float*)dst->src[1]->data;
 
95
  }
96
  }
97
 
98
+ void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
99
+
100
+ const ggml_tensor *src0 = dst->src[0];
101
+ const ggml_tensor *src1 = dst->src[1];
102
 
103
  const float* k_d = (const float*)dst->src[0]->data;
104
  const float* v_d = (const float*)dst->src[1]->data;
ggml/src/ggml-sycl/wkv6.hpp CHANGED
@@ -3,8 +3,7 @@
3
 
4
  #include "common.hpp"
5
 
6
- void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
7
- const ggml_tensor *src1, ggml_tensor * dst);
8
 
9
 
10
  #endif // GGML_SYCL_WKV6_HPP
 
3
 
4
  #include "common.hpp"
5
 
6
+ void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 
7
 
8
 
9
  #endif // GGML_SYCL_WKV6_HPP