Spaces:
Running
Running
fix the mul_mat_id ut issues (llama/8427)
Browse files* fix part of mul_mat_id
* skip the bfloat 16 sycl ut
Signed-off-by: Chen Xi <[email protected]>
---------
Signed-off-by: Chen Xi <[email protected]>
Co-authored-by: Meng, Hengyu <[email protected]>
Co-authored-by: Chen Xi <[email protected]>
- ggml/src/ggml-backend.c +1 -1
- ggml/src/ggml-sycl.cpp +13 -36
ggml/src/ggml-backend.c
CHANGED
|
@@ -394,7 +394,7 @@ void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event)
|
|
| 394 |
|
| 395 |
// backend registry
|
| 396 |
|
| 397 |
-
#define GGML_REG_MAX_BACKENDS
|
| 398 |
|
| 399 |
struct ggml_backend_reg {
|
| 400 |
char name[128];
|
|
|
|
| 394 |
|
| 395 |
// backend registry
|
| 396 |
|
| 397 |
+
#define GGML_REG_MAX_BACKENDS 64
|
| 398 |
|
| 399 |
struct ggml_backend_reg {
|
| 400 |
char name[128];
|
ggml/src/ggml-sycl.cpp
CHANGED
|
@@ -3768,37 +3768,13 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
|
|
| 3768 |
stream->memcpy(ids_host.data(), ids_dev, ggml_nbytes(ids))));
|
| 3769 |
SYCL_CHECK(CHECK_TRY_ERROR(stream->wait()));
|
| 3770 |
|
| 3771 |
-
const ggml_tensor_extra_gpu *src0_extra =
|
| 3772 |
-
(const ggml_tensor_extra_gpu *)src0->extra;
|
| 3773 |
-
const ggml_tensor_extra_gpu *src1_extra =
|
| 3774 |
-
(const ggml_tensor_extra_gpu *)src1->extra;
|
| 3775 |
-
const ggml_tensor_extra_gpu *dst_extra =
|
| 3776 |
-
(const ggml_tensor_extra_gpu *)dst->extra;
|
| 3777 |
-
|
| 3778 |
-
ggml_tensor_extra_gpu src0_row_extra;
|
| 3779 |
-
ggml_tensor_extra_gpu src1_row_extra;
|
| 3780 |
-
ggml_tensor_extra_gpu dst_row_extra;
|
| 3781 |
-
|
| 3782 |
ggml_tensor src0_row = *src0;
|
| 3783 |
ggml_tensor src1_row = *src1;
|
| 3784 |
ggml_tensor dst_row = *dst;
|
| 3785 |
|
| 3786 |
-
|
| 3787 |
-
|
| 3788 |
-
|
| 3789 |
-
src0_row.extra = &src0_row_extra;
|
| 3790 |
-
src1_row.extra = &src1_row_extra;
|
| 3791 |
-
dst_row.extra = &dst_row_extra;
|
| 3792 |
-
|
| 3793 |
-
char *src0_original = src1->backend == GGML_BACKEND_TYPE_CPU
|
| 3794 |
-
? (char *)src0->data
|
| 3795 |
-
: (char *)src0_extra->data_device[ctx.device];
|
| 3796 |
-
char *src1_original = src1->backend == GGML_BACKEND_TYPE_CPU
|
| 3797 |
-
? (char *)src1->data
|
| 3798 |
-
: (char *)src1_extra->data_device[ctx.device];
|
| 3799 |
-
char *dst_original = dst->backend == GGML_BACKEND_TYPE_CPU
|
| 3800 |
-
? (char *)dst->data
|
| 3801 |
-
: (char *)dst_extra->data_device[ctx.device];
|
| 3802 |
|
| 3803 |
src0_row.ne[2] = 1;
|
| 3804 |
src0_row.ne[3] = 1;
|
|
@@ -3827,12 +3803,9 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
|
|
| 3827 |
const int64_t i1 = id;
|
| 3828 |
const int64_t i2 = i12;
|
| 3829 |
|
| 3830 |
-
|
| 3831 |
-
|
| 3832 |
-
|
| 3833 |
-
src1_original + + i11*nb11 + i12*nb12;
|
| 3834 |
-
dst_row_extra.data_device[ctx.device] =
|
| 3835 |
-
dst_original + i1*nb1 + i2*nb2;
|
| 3836 |
|
| 3837 |
ggml_sycl_mul_mat(ctx, &src0_row, &src1_row, &dst_row);
|
| 3838 |
}
|
|
@@ -3841,8 +3814,8 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
|
|
| 3841 |
ggml_sycl_pool_alloc<char> src1_contiguous(ctx.pool(), sizeof(float)*ggml_nelements(src1));
|
| 3842 |
ggml_sycl_pool_alloc<char> dst_contiguous(ctx.pool(), sizeof(float)*ggml_nelements(dst));
|
| 3843 |
|
| 3844 |
-
|
| 3845 |
-
|
| 3846 |
|
| 3847 |
for (int64_t i02 = 0; i02 < n_as; i02++) {
|
| 3848 |
int64_t num_src1_rows = 0;
|
|
@@ -3898,7 +3871,7 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
|
|
| 3898 |
});
|
| 3899 |
}
|
| 3900 |
|
| 3901 |
-
|
| 3902 |
|
| 3903 |
GGML_ASSERT(nb11 == sizeof(float)*ne10);
|
| 3904 |
GGML_ASSERT(nb1 == sizeof(float)*ne0);
|
|
@@ -5221,6 +5194,10 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons
|
|
| 5221 |
return false;
|
| 5222 |
}
|
| 5223 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5224 |
return true;
|
| 5225 |
} break;
|
| 5226 |
case GGML_OP_GET_ROWS:
|
|
|
|
| 3768 |
stream->memcpy(ids_host.data(), ids_dev, ggml_nbytes(ids))));
|
| 3769 |
SYCL_CHECK(CHECK_TRY_ERROR(stream->wait()));
|
| 3770 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3771 |
ggml_tensor src0_row = *src0;
|
| 3772 |
ggml_tensor src1_row = *src1;
|
| 3773 |
ggml_tensor dst_row = *dst;
|
| 3774 |
|
| 3775 |
+
char *src0_original = (char *)src0->data;
|
| 3776 |
+
char *src1_original = (char *)src1->data;
|
| 3777 |
+
char *dst_original = (char *)dst->data;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3778 |
|
| 3779 |
src0_row.ne[2] = 1;
|
| 3780 |
src0_row.ne[3] = 1;
|
|
|
|
| 3803 |
const int64_t i1 = id;
|
| 3804 |
const int64_t i2 = i12;
|
| 3805 |
|
| 3806 |
+
src0_row.data = src0_original + i02*nb02;
|
| 3807 |
+
src1_row.data = src1_original + + i11*nb11 + i12*nb12;
|
| 3808 |
+
dst_row.data = dst_original + i1*nb1 + i2*nb2;
|
|
|
|
|
|
|
|
|
|
| 3809 |
|
| 3810 |
ggml_sycl_mul_mat(ctx, &src0_row, &src1_row, &dst_row);
|
| 3811 |
}
|
|
|
|
| 3814 |
ggml_sycl_pool_alloc<char> src1_contiguous(ctx.pool(), sizeof(float)*ggml_nelements(src1));
|
| 3815 |
ggml_sycl_pool_alloc<char> dst_contiguous(ctx.pool(), sizeof(float)*ggml_nelements(dst));
|
| 3816 |
|
| 3817 |
+
src1_row.data = src1_contiguous.get();
|
| 3818 |
+
dst_row.data = dst_contiguous.get();
|
| 3819 |
|
| 3820 |
for (int64_t i02 = 0; i02 < n_as; i02++) {
|
| 3821 |
int64_t num_src1_rows = 0;
|
|
|
|
| 3871 |
});
|
| 3872 |
}
|
| 3873 |
|
| 3874 |
+
src0_row.data = src0_original + i02*nb02;
|
| 3875 |
|
| 3876 |
GGML_ASSERT(nb11 == sizeof(float)*ne10);
|
| 3877 |
GGML_ASSERT(nb1 == sizeof(float)*ne0);
|
|
|
|
| 5194 |
return false;
|
| 5195 |
}
|
| 5196 |
}
|
| 5197 |
+
ggml_type src0_type = op->src[0]->type;
|
| 5198 |
+
if (src0_type == GGML_TYPE_BF16) {
|
| 5199 |
+
return false;
|
| 5200 |
+
}
|
| 5201 |
return true;
|
| 5202 |
} break;
|
| 5203 |
case GGML_OP_GET_ROWS:
|