JohannesGaessler commited on
Commit
ef3d018
·
1 Parent(s): 025493b

CUDA: fix MMQ stream-k for --split-mode row (llama/8167)

Browse files
Files changed (1) hide show
  1. ggml/src/ggml-cuda/mmq.cuh +1 -1
ggml/src/ggml-cuda/mmq.cuh CHANGED
@@ -2475,7 +2475,7 @@ static void launch_mul_mat_q(ggml_backend_cuda_context & ctx, const mmq_args & a
2475
 
2476
  const dim3 block_nums_mmq(nsm, 1, 1);
2477
 
2478
- ggml_cuda_pool & pool = ctx.pool();
2479
  ggml_cuda_pool_alloc<float> tmp_fixup(pool, block_nums_mmq.x * mmq_x*mmq_y);
2480
 
2481
  if (args.ne01 % mmq_y == 0) {
 
2475
 
2476
  const dim3 block_nums_mmq(nsm, 1, 1);
2477
 
2478
+ ggml_cuda_pool & pool = ctx.pool(id);
2479
  ggml_cuda_pool_alloc<float> tmp_fixup(pool, block_nums_mmq.x * mmq_x*mmq_y);
2480
 
2481
  if (args.ne01 % mmq_y == 0) {