Spaces:
Running
Running
cuda : fix bounds check for src0 rows in MMVQ kernel (#2231)
Browse files* cuda : fix bounds check for src0 rows in MMVQ kernel
* Update ggml-cuda/mmvq.cu
Co-authored-by: Johannes Gäßler <[email protected]>
---------
Co-authored-by: Johannes Gäßler <[email protected]>
- ggml-cuda/mmvq.cu +1 -1
ggml-cuda/mmvq.cu
CHANGED
|
@@ -75,7 +75,7 @@ static __global__ void mul_mat_vec_q(
|
|
| 75 |
tmp[j][i] = warp_reduce_sum(tmp[j][i]);
|
| 76 |
}
|
| 77 |
|
| 78 |
-
if (threadIdx.x < rows_per_cuda_block) {
|
| 79 |
dst[j*nrows_dst + row0 + threadIdx.x] = tmp[j][threadIdx.x];
|
| 80 |
}
|
| 81 |
}
|
|
|
|
| 75 |
tmp[j][i] = warp_reduce_sum(tmp[j][i]);
|
| 76 |
}
|
| 77 |
|
| 78 |
+
if (threadIdx.x < rows_per_cuda_block && (rows_per_cuda_block == 1 || row0 + threadIdx.x < nrows_dst)) {
|
| 79 |
dst[j*nrows_dst + row0 + threadIdx.x] = tmp[j][threadIdx.x];
|
| 80 |
}
|
| 81 |
}
|