Spaces:
Running
Running
Diego Devesa
commited on
Commit
·
747ad97
1
Parent(s):
4c88a27
cuda : fix buffer type check with integrated GPUs (llama/14069)
Browse files
ggml/src/ggml-cuda/ggml-cuda.cu
CHANGED
|
@@ -1144,7 +1144,6 @@ typedef void (*ggml_cuda_op_mul_mat_t)(
|
|
| 1144 |
static cudaError_t ggml_cuda_cpy_tensor_2d(
|
| 1145 |
void * dst, const struct ggml_tensor * src, int64_t i3, int64_t i2, int64_t i1_low, int64_t i1_high, cudaStream_t stream) {
|
| 1146 |
|
| 1147 |
-
GGML_ASSERT(ggml_backend_buffer_is_cuda(src->buffer));
|
| 1148 |
const char * src_ptr = (const char *) src->data;
|
| 1149 |
char * dst_ptr = (char *) dst;
|
| 1150 |
|
|
@@ -1427,8 +1426,6 @@ static void ggml_cuda_op_mul_mat(
|
|
| 1427 |
const int64_t nb2 = dst->nb[2];
|
| 1428 |
const int64_t nb3 = dst->nb[3];
|
| 1429 |
|
| 1430 |
-
GGML_ASSERT(ggml_backend_buffer_is_cuda(dst->buffer));
|
| 1431 |
-
GGML_ASSERT(ggml_backend_buffer_is_cuda(src1->buffer));
|
| 1432 |
ggml_backend_cuda_buffer_context * src1_ctx = (ggml_backend_cuda_buffer_context *) src1->buffer->context;
|
| 1433 |
ggml_backend_cuda_buffer_context * dst_ctx = (ggml_backend_cuda_buffer_context *) dst->buffer->context;
|
| 1434 |
|
|
@@ -1750,7 +1747,7 @@ static void ggml_cuda_mul_mat_batched_cublas(ggml_backend_cuda_context & ctx, co
|
|
| 1750 |
GGML_ASSERT(!ggml_is_transposed(src0));
|
| 1751 |
GGML_ASSERT(!ggml_is_transposed(src1));
|
| 1752 |
|
| 1753 |
-
GGML_ASSERT(
|
| 1754 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
| 1755 |
|
| 1756 |
// Byte offsets and tensor dimensions are currently used in an inconsistent way for dst.
|
|
|
|
| 1144 |
static cudaError_t ggml_cuda_cpy_tensor_2d(
|
| 1145 |
void * dst, const struct ggml_tensor * src, int64_t i3, int64_t i2, int64_t i1_low, int64_t i1_high, cudaStream_t stream) {
|
| 1146 |
|
|
|
|
| 1147 |
const char * src_ptr = (const char *) src->data;
|
| 1148 |
char * dst_ptr = (char *) dst;
|
| 1149 |
|
|
|
|
| 1426 |
const int64_t nb2 = dst->nb[2];
|
| 1427 |
const int64_t nb3 = dst->nb[3];
|
| 1428 |
|
|
|
|
|
|
|
| 1429 |
ggml_backend_cuda_buffer_context * src1_ctx = (ggml_backend_cuda_buffer_context *) src1->buffer->context;
|
| 1430 |
ggml_backend_cuda_buffer_context * dst_ctx = (ggml_backend_cuda_buffer_context *) dst->buffer->context;
|
| 1431 |
|
|
|
|
| 1747 |
GGML_ASSERT(!ggml_is_transposed(src0));
|
| 1748 |
GGML_ASSERT(!ggml_is_transposed(src1));
|
| 1749 |
|
| 1750 |
+
GGML_ASSERT(!ggml_backend_buft_is_cuda_split(src0->buffer->buft));
|
| 1751 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
| 1752 |
|
| 1753 |
// Byte offsets and tensor dimensions are currently used in an inconsistent way for dst.
|