JohannesGaessler commited on
Commit
6fb9674
·
1 Parent(s): 1c0a5c0

CUDA: fix typo in FlashAttention code (llama/13926)

Browse files
ggml/src/ggml-cuda/fattn-mma-f16.cuh CHANGED
@@ -1246,7 +1246,7 @@ static __global__ void flash_attn_ext_f16(
1246
  NO_DEVICE_CODE;
1247
  return;
1248
  }
1249
- #endif __CUDA_ARCH__ == GGML_CUDA_CC_TURING
1250
 
1251
  static_assert(!mla || DKQ >= DV, "MLA needs DKQ >= DV");
1252
 
 
1246
  NO_DEVICE_CODE;
1247
  return;
1248
  }
1249
+ #endif // __CUDA_ARCH__ == GGML_CUDA_CC_TURING
1250
 
1251
  static_assert(!mla || DKQ >= DV, "MLA needs DKQ >= DV");
1252