ClarkChin hengyu Chen Xi commited on
Commit
374488a
·
1 Parent(s): 4816a87

fix the mul_mat_id ut issues (llama/8427)

Browse files

* fix part of mul_mat_id

* skip the bfloat 16 sycl ut

Signed-off-by: Chen Xi <[email protected]>

---------

Signed-off-by: Chen Xi <[email protected]>
Co-authored-by: Meng, Hengyu <[email protected]>
Co-authored-by: Chen Xi <[email protected]>

Files changed (2) hide show
  1. ggml/src/ggml-backend.c +1 -1
  2. ggml/src/ggml-sycl.cpp +13 -36
ggml/src/ggml-backend.c CHANGED
@@ -394,7 +394,7 @@ void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event)
394
 
395
  // backend registry
396
 
397
- #define GGML_REG_MAX_BACKENDS 16
398
 
399
  struct ggml_backend_reg {
400
  char name[128];
 
394
 
395
  // backend registry
396
 
397
+ #define GGML_REG_MAX_BACKENDS 64
398
 
399
  struct ggml_backend_reg {
400
  char name[128];
ggml/src/ggml-sycl.cpp CHANGED
@@ -3768,37 +3768,13 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
3768
  stream->memcpy(ids_host.data(), ids_dev, ggml_nbytes(ids))));
3769
  SYCL_CHECK(CHECK_TRY_ERROR(stream->wait()));
3770
 
3771
- const ggml_tensor_extra_gpu *src0_extra =
3772
- (const ggml_tensor_extra_gpu *)src0->extra;
3773
- const ggml_tensor_extra_gpu *src1_extra =
3774
- (const ggml_tensor_extra_gpu *)src1->extra;
3775
- const ggml_tensor_extra_gpu *dst_extra =
3776
- (const ggml_tensor_extra_gpu *)dst->extra;
3777
-
3778
- ggml_tensor_extra_gpu src0_row_extra;
3779
- ggml_tensor_extra_gpu src1_row_extra;
3780
- ggml_tensor_extra_gpu dst_row_extra;
3781
-
3782
  ggml_tensor src0_row = *src0;
3783
  ggml_tensor src1_row = *src1;
3784
  ggml_tensor dst_row = *dst;
3785
 
3786
- src1_row.backend = GGML_BACKEND_TYPE_GPU;
3787
- dst_row.backend = GGML_BACKEND_TYPE_GPU;
3788
-
3789
- src0_row.extra = &src0_row_extra;
3790
- src1_row.extra = &src1_row_extra;
3791
- dst_row.extra = &dst_row_extra;
3792
-
3793
- char *src0_original = src1->backend == GGML_BACKEND_TYPE_CPU
3794
- ? (char *)src0->data
3795
- : (char *)src0_extra->data_device[ctx.device];
3796
- char *src1_original = src1->backend == GGML_BACKEND_TYPE_CPU
3797
- ? (char *)src1->data
3798
- : (char *)src1_extra->data_device[ctx.device];
3799
- char *dst_original = dst->backend == GGML_BACKEND_TYPE_CPU
3800
- ? (char *)dst->data
3801
- : (char *)dst_extra->data_device[ctx.device];
3802
 
3803
  src0_row.ne[2] = 1;
3804
  src0_row.ne[3] = 1;
@@ -3827,12 +3803,9 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
3827
  const int64_t i1 = id;
3828
  const int64_t i2 = i12;
3829
 
3830
- src0_row_extra.data_device[ctx.device] =
3831
- src0_original + i02*nb02;
3832
- src1_row_extra.data_device[ctx.device] =
3833
- src1_original + + i11*nb11 + i12*nb12;
3834
- dst_row_extra.data_device[ctx.device] =
3835
- dst_original + i1*nb1 + i2*nb2;
3836
 
3837
  ggml_sycl_mul_mat(ctx, &src0_row, &src1_row, &dst_row);
3838
  }
@@ -3841,8 +3814,8 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
3841
  ggml_sycl_pool_alloc<char> src1_contiguous(ctx.pool(), sizeof(float)*ggml_nelements(src1));
3842
  ggml_sycl_pool_alloc<char> dst_contiguous(ctx.pool(), sizeof(float)*ggml_nelements(dst));
3843
 
3844
- src1_row_extra.data_device[ctx.device] = src1_contiguous.get();
3845
- dst_row_extra.data_device[ctx.device] = dst_contiguous.get();
3846
 
3847
  for (int64_t i02 = 0; i02 < n_as; i02++) {
3848
  int64_t num_src1_rows = 0;
@@ -3898,7 +3871,7 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
3898
  });
3899
  }
3900
 
3901
- src0_row_extra.data_device[ctx.device] = src0_original + i02*nb02;
3902
 
3903
  GGML_ASSERT(nb11 == sizeof(float)*ne10);
3904
  GGML_ASSERT(nb1 == sizeof(float)*ne0);
@@ -5221,6 +5194,10 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons
5221
  return false;
5222
  }
5223
  }
 
 
 
 
5224
  return true;
5225
  } break;
5226
  case GGML_OP_GET_ROWS:
 
3768
  stream->memcpy(ids_host.data(), ids_dev, ggml_nbytes(ids))));
3769
  SYCL_CHECK(CHECK_TRY_ERROR(stream->wait()));
3770
 
 
 
 
 
 
 
 
 
 
 
 
3771
  ggml_tensor src0_row = *src0;
3772
  ggml_tensor src1_row = *src1;
3773
  ggml_tensor dst_row = *dst;
3774
 
3775
+ char *src0_original = (char *)src0->data;
3776
+ char *src1_original = (char *)src1->data;
3777
+ char *dst_original = (char *)dst->data;
 
 
 
 
 
 
 
 
 
 
 
 
 
3778
 
3779
  src0_row.ne[2] = 1;
3780
  src0_row.ne[3] = 1;
 
3803
  const int64_t i1 = id;
3804
  const int64_t i2 = i12;
3805
 
3806
+ src0_row.data = src0_original + i02*nb02;
3807
+ src1_row.data = src1_original + + i11*nb11 + i12*nb12;
3808
+ dst_row.data = dst_original + i1*nb1 + i2*nb2;
 
 
 
3809
 
3810
  ggml_sycl_mul_mat(ctx, &src0_row, &src1_row, &dst_row);
3811
  }
 
3814
  ggml_sycl_pool_alloc<char> src1_contiguous(ctx.pool(), sizeof(float)*ggml_nelements(src1));
3815
  ggml_sycl_pool_alloc<char> dst_contiguous(ctx.pool(), sizeof(float)*ggml_nelements(dst));
3816
 
3817
+ src1_row.data = src1_contiguous.get();
3818
+ dst_row.data = dst_contiguous.get();
3819
 
3820
  for (int64_t i02 = 0; i02 < n_as; i02++) {
3821
  int64_t num_src1_rows = 0;
 
3871
  });
3872
  }
3873
 
3874
+ src0_row.data = src0_original + i02*nb02;
3875
 
3876
  GGML_ASSERT(nb11 == sizeof(float)*ne10);
3877
  GGML_ASSERT(nb1 == sizeof(float)*ne0);
 
5194
  return false;
5195
  }
5196
  }
5197
+ ggml_type src0_type = op->src[0]->type;
5198
+ if (src0_type == GGML_TYPE_BF16) {
5199
+ return false;
5200
+ }
5201
  return true;
5202
  } break;
5203
  case GGML_OP_GET_ROWS: