JohannesGaessler commited on
Commit
acfd94f
·
1 Parent(s): ed08269

HIP: fix flash_attn_stream_k_fixup warning (llama/11604)

Browse files
ggml/src/ggml-cuda/fattn-common.cuh CHANGED
@@ -516,6 +516,12 @@ constexpr __device__ dequantize_1_f32_t get_dequantize_1_f32(ggml_type type_V) {
516
  nullptr;
517
  }
518
 
 
 
 
 
 
 
519
  template<int D, int ncols, int KQ_stride> // D == head size
520
  #if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__))
521
  __launch_bounds__(D, 1)
@@ -614,6 +620,10 @@ static __global__ void flash_attn_stream_k_fixup(
614
  }
615
  }
616
 
 
 
 
 
617
  template<int D, int parallel_blocks> // D == head size
618
  #if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__))
619
  __launch_bounds__(D, 1)
 
516
  nullptr;
517
  }
518
 
519
+ // The HIP compiler for some reason complains that it can't unroll a loop because of the jt*ncols + j >= ne01 conditional.
520
+ #ifdef __clang__
521
+ #pragma clang diagnostic push
522
+ #pragma clang diagnostic ignored "-Wpass-failed"
523
+ #endif // __clang__
524
+
525
  template<int D, int ncols, int KQ_stride> // D == head size
526
  #if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__))
527
  __launch_bounds__(D, 1)
 
620
  }
621
  }
622
 
623
+ #ifdef __clang__
624
+ #pragma clang diagnostic pop
625
+ #endif // __clang__
626
+
627
  template<int D, int parallel_blocks> // D == head size
628
  #if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__))
629
  __launch_bounds__(D, 1)
ggml/src/ggml-cuda/softmax.cu CHANGED
@@ -18,7 +18,7 @@ __device__ float __forceinline__ t2f32<half>(half val) {
18
  #ifdef __clang__
19
  #pragma clang diagnostic push
20
  #pragma clang diagnostic ignored "-Wpass-failed"
21
- #endif
22
  template <bool use_shared, int ncols_template, int block_size_template, typename T>
23
  static __global__ void soft_max_f32(
24
  const float * x, const T * mask, float * dst, const int ncols_par, const int nrows_y,
@@ -126,7 +126,7 @@ static __global__ void soft_max_f32(
126
  }
127
  #ifdef __clang__
128
  #pragma clang diagnostic pop
129
- #endif
130
 
131
  static __global__ void soft_max_back_f32(
132
  const float * grad, const float * dstf, float * dst, const int ncols, const float scale) {
 
18
  #ifdef __clang__
19
  #pragma clang diagnostic push
20
  #pragma clang diagnostic ignored "-Wpass-failed"
21
+ #endif // __clang__
22
  template <bool use_shared, int ncols_template, int block_size_template, typename T>
23
  static __global__ void soft_max_f32(
24
  const float * x, const T * mask, float * dst, const int ncols_par, const int nrows_y,
 
126
  }
127
  #ifdef __clang__
128
  #pragma clang diagnostic pop
129
+ #endif // __clang__
130
 
131
  static __global__ void soft_max_back_f32(
132
  const float * grad, const float * dstf, float * dst, const int ncols, const float scale) {