Spaces:
Running
Running
Diego Devesa
commited on
Commit
·
6b6155b
1
Parent(s):
e4b1812
cuda : prevent using split buffers with 3d/4d matrices (llama/13919)
Browse files
ggml/src/ggml-cuda/ggml-cuda.cu
CHANGED
|
@@ -2994,9 +2994,12 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
|
|
| 2994 |
{
|
| 2995 |
struct ggml_tensor * a = op->src[0];
|
| 2996 |
struct ggml_tensor * b = op->src[1];
|
| 2997 |
-
// for small weight matrices the active device can end up without any rows, don't use row split in those cases
|
| 2998 |
-
// this avoids some edge cases (and the performance would not be good anyways)
|
| 2999 |
if (a->buffer && ggml_backend_buft_is_cuda_split(a->buffer->buft)) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3000 |
ggml_backend_cuda_split_buffer_type_context * buft_ctx = (ggml_backend_cuda_split_buffer_type_context *) a->buffer->buft->context;
|
| 3001 |
int64_t row_low;
|
| 3002 |
int64_t row_high;
|
|
|
|
| 2994 |
{
|
| 2995 |
struct ggml_tensor * a = op->src[0];
|
| 2996 |
struct ggml_tensor * b = op->src[1];
|
|
|
|
|
|
|
| 2997 |
if (a->buffer && ggml_backend_buft_is_cuda_split(a->buffer->buft)) {
|
| 2998 |
+
if (a->ne[2] > 1 || a->ne[3] > 1) {
|
| 2999 |
+
return false;
|
| 3000 |
+
}
|
| 3001 |
+
// for small weight matrices the active device can end up without any rows, don't use row split in those cases
|
| 3002 |
+
// this avoids some edge cases (and the performance would not be good anyways)
|
| 3003 |
ggml_backend_cuda_split_buffer_type_context * buft_ctx = (ggml_backend_cuda_split_buffer_type_context *) a->buffer->buft->context;
|
| 3004 |
int64_t row_low;
|
| 3005 |
int64_t row_high;
|