Diego Devesa commited on
Commit
747ad97
·
1 Parent(s): 4c88a27

cuda : fix buffer type check with integrated GPUs (llama/14069)

Browse files
Files changed (1) hide show
  1. ggml/src/ggml-cuda/ggml-cuda.cu +1 -4
ggml/src/ggml-cuda/ggml-cuda.cu CHANGED
@@ -1144,7 +1144,6 @@ typedef void (*ggml_cuda_op_mul_mat_t)(
1144
  static cudaError_t ggml_cuda_cpy_tensor_2d(
1145
  void * dst, const struct ggml_tensor * src, int64_t i3, int64_t i2, int64_t i1_low, int64_t i1_high, cudaStream_t stream) {
1146
 
1147
- GGML_ASSERT(ggml_backend_buffer_is_cuda(src->buffer));
1148
  const char * src_ptr = (const char *) src->data;
1149
  char * dst_ptr = (char *) dst;
1150
 
@@ -1427,8 +1426,6 @@ static void ggml_cuda_op_mul_mat(
1427
  const int64_t nb2 = dst->nb[2];
1428
  const int64_t nb3 = dst->nb[3];
1429
 
1430
- GGML_ASSERT(ggml_backend_buffer_is_cuda(dst->buffer));
1431
- GGML_ASSERT(ggml_backend_buffer_is_cuda(src1->buffer));
1432
  ggml_backend_cuda_buffer_context * src1_ctx = (ggml_backend_cuda_buffer_context *) src1->buffer->context;
1433
  ggml_backend_cuda_buffer_context * dst_ctx = (ggml_backend_cuda_buffer_context *) dst->buffer->context;
1434
 
@@ -1750,7 +1747,7 @@ static void ggml_cuda_mul_mat_batched_cublas(ggml_backend_cuda_context & ctx, co
1750
  GGML_ASSERT(!ggml_is_transposed(src0));
1751
  GGML_ASSERT(!ggml_is_transposed(src1));
1752
 
1753
- GGML_ASSERT(ggml_backend_buffer_is_cuda(src0->buffer));
1754
  GGML_ASSERT(src0->type == GGML_TYPE_F16);
1755
 
1756
  // Byte offsets and tensor dimensions are currently used in an inconsistent way for dst.
 
1144
  static cudaError_t ggml_cuda_cpy_tensor_2d(
1145
  void * dst, const struct ggml_tensor * src, int64_t i3, int64_t i2, int64_t i1_low, int64_t i1_high, cudaStream_t stream) {
1146
 
 
1147
  const char * src_ptr = (const char *) src->data;
1148
  char * dst_ptr = (char *) dst;
1149
 
 
1426
  const int64_t nb2 = dst->nb[2];
1427
  const int64_t nb3 = dst->nb[3];
1428
 
 
 
1429
  ggml_backend_cuda_buffer_context * src1_ctx = (ggml_backend_cuda_buffer_context *) src1->buffer->context;
1430
  ggml_backend_cuda_buffer_context * dst_ctx = (ggml_backend_cuda_buffer_context *) dst->buffer->context;
1431
 
 
1747
  GGML_ASSERT(!ggml_is_transposed(src0));
1748
  GGML_ASSERT(!ggml_is_transposed(src1));
1749
 
1750
+ GGML_ASSERT(!ggml_backend_buft_is_cuda_split(src0->buffer->buft));
1751
  GGML_ASSERT(src0->type == GGML_TYPE_F16);
1752
 
1753
  // Byte offsets and tensor dimensions are currently used in an inconsistent way for dst.