uvos commited on
Commit
f9dbd96
·
1 Parent(s): 8e133f7

HIP: add GGML_HIP_MMQ_MFMA option to allow disableing the MFMA path. (llama/14930)

Browse files

This is useful for testing for regressions on GCN with CDNA hardware.

With GGML_HIP_MMQ_MFMA=Off and GGML_CUDA_FORCE_MMQ=On we can conveniently test the GCN code path on CDNA. As CDNA is just GCN renamed with MFMA added and limited use ACC registers, this provides a good alternative for regression testing when GCN hardware is not available.

ggml/CMakeLists.txt CHANGED
@@ -174,6 +174,7 @@ option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental,
174
  option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
175
  option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF)
176
  option(GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 "ggml: enable rocWMMA FlashAttention on GFX12" OFF)
 
177
  option(GGML_MUSA_GRAPHS "ggml: use MUSA graph, experimental, unstable" OFF)
178
  option(GGML_MUSA_MUDNN_COPY "ggml: enable muDNN for accelerated copy" OFF)
179
  option(GGML_VULKAN "ggml: use Vulkan" OFF)
 
174
  option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
175
  option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF)
176
  option(GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 "ggml: enable rocWMMA FlashAttention on GFX12" OFF)
177
+ option(GGML_HIP_MMQ_MFMA "ggml: enable MFMA MMA for CDNA in MMQ" ON)
178
  option(GGML_MUSA_GRAPHS "ggml: use MUSA graph, experimental, unstable" OFF)
179
  option(GGML_MUSA_MUDNN_COPY "ggml: enable muDNN for accelerated copy" OFF)
180
  option(GGML_VULKAN "ggml: use Vulkan" OFF)
ggml/src/ggml-cuda/common.cuh CHANGED
@@ -227,7 +227,7 @@ typedef float2 dfloat2;
227
  #define FP16_MMA_AVAILABLE
228
  #endif // defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || (defined(GGML_HIP_ROCWMMA_FATTN_GFX12) && defined(RDNA4)))
229
 
230
- #if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && defined(CDNA3)
231
  #define AMD_MFMA_AVAILABLE
232
  #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && defined(CDNA3)
233
 
@@ -295,7 +295,11 @@ static bool fp32_mma_hardware_available(const int cc) {
295
 
296
  // AMD CDNA3 matrix cores.. Will add support for other CDNA generations later.
297
  static bool amd_mfma_available(const int cc) {
298
- return cc >= GGML_CUDA_CC_OFFSET_AMD && GGML_CUDA_CC_IS_CDNA3(cc);
 
 
 
 
299
  }
300
 
301
  // Volta technically had FP16 tensor cores but they work very differently compared to Turing and later.
 
227
  #define FP16_MMA_AVAILABLE
228
  #endif // defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || (defined(GGML_HIP_ROCWMMA_FATTN_GFX12) && defined(RDNA4)))
229
 
230
+ #if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && defined(CDNA3) && !defined(GGML_HIP_NO_MMQ_MFMA)
231
  #define AMD_MFMA_AVAILABLE
232
  #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && defined(CDNA3)
233
 
 
295
 
296
  // AMD CDNA3 matrix cores.. Will add support for other CDNA generations later.
297
  static bool amd_mfma_available(const int cc) {
298
+ #if !defined(GGML_HIP_NO_MMQ_MFMA)
299
+ return GGML_CUDA_CC_IS_CDNA3(cc);
300
+ #else
301
+ return false;
302
+ #endif //!defined(GGML_HIP_NO_MMQ_MFMA)
303
  }
304
 
305
  // Volta technically had FP16 tensor cores but they work very differently compared to Turing and later.
ggml/src/ggml-hip/CMakeLists.txt CHANGED
@@ -113,6 +113,10 @@ if (GGML_HIP_ROCWMMA_FATTN)
113
  add_compile_definitions(GGML_HIP_ROCWMMA_FATTN)
114
  endif()
115
 
 
 
 
 
116
  if (GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 OR ${hip_VERSION} VERSION_GREATER_EQUAL 7.0)
117
  add_compile_definitions(GGML_HIP_ROCWMMA_FATTN_GFX12)
118
  endif()
 
113
  add_compile_definitions(GGML_HIP_ROCWMMA_FATTN)
114
  endif()
115
 
116
+ if (NOT GGML_HIP_MMQ_MFMA)
117
+ add_compile_definitions(GGML_HIP_NO_MMQ_MFMA)
118
+ endif()
119
+
120
  if (GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 OR ${hip_VERSION} VERSION_GREATER_EQUAL 7.0)
121
  add_compile_definitions(GGML_HIP_ROCWMMA_FATTN_GFX12)
122
  endif()