JohannesGaessler commited on
Commit
99a4546
·
1 Parent(s): cdcb67c

CUDA: fix shared memory access condition for mmv (llama/10740)

Browse files
Files changed (1) hide show
  1. ggml/src/ggml-cuda/mmv.cu +1 -1
ggml/src/ggml-cuda/mmv.cu CHANGED
@@ -57,7 +57,7 @@ static __global__ void mul_mat_vec(
57
  if (block_size > WARP_SIZE) {
58
  buf_iw[tid/WARP_SIZE] = sumf;
59
  __syncthreads();
60
- if (tid > WARP_SIZE) {
61
  return;
62
  }
63
  sumf = buf_iw[tid];
 
57
  if (block_size > WARP_SIZE) {
58
  buf_iw[tid/WARP_SIZE] = sumf;
59
  __syncthreads();
60
+ if (tid >= WARP_SIZE) {
61
  return;
62
  }
63
  sumf = buf_iw[tid];