Spaces:
Sleeping
Sleeping
Commit
·
ef3d018
1
Parent(s):
025493b
CUDA: fix MMQ stream-k for --split-mode row (llama/8167)
Browse files
ggml/src/ggml-cuda/mmq.cuh
CHANGED
|
@@ -2475,7 +2475,7 @@ static void launch_mul_mat_q(ggml_backend_cuda_context & ctx, const mmq_args & a
|
|
| 2475 |
|
| 2476 |
const dim3 block_nums_mmq(nsm, 1, 1);
|
| 2477 |
|
| 2478 |
-
ggml_cuda_pool & pool = ctx.pool();
|
| 2479 |
ggml_cuda_pool_alloc<float> tmp_fixup(pool, block_nums_mmq.x * mmq_x*mmq_y);
|
| 2480 |
|
| 2481 |
if (args.ne01 % mmq_y == 0) {
|
|
|
|
| 2475 |
|
| 2476 |
const dim3 block_nums_mmq(nsm, 1, 1);
|
| 2477 |
|
| 2478 |
+
ggml_cuda_pool & pool = ctx.pool(id);
|
| 2479 |
ggml_cuda_pool_alloc<float> tmp_fixup(pool, block_nums_mmq.x * mmq_x*mmq_y);
|
| 2480 |
|
| 2481 |
if (args.ne01 % mmq_y == 0) {
|