Spaces:
Sleeping
Sleeping
HIP: add GGML_HIP_MMQ_MFMA option to allow disableing the MFMA path. (llama/14930)
Browse filesThis is useful for testing for regressions on GCN with CDNA hardware.
With GGML_HIP_MMQ_MFMA=Off and GGML_CUDA_FORCE_MMQ=On we can conveniently test the GCN code path on CDNA. As CDNA is just GCN renamed with MFMA added and limited use ACC registers, this provides a good alternative for regression testing when GCN hardware is not available.
- ggml/CMakeLists.txt +1 -0
- ggml/src/ggml-cuda/common.cuh +6 -2
- ggml/src/ggml-hip/CMakeLists.txt +4 -0
ggml/CMakeLists.txt
CHANGED
|
@@ -174,6 +174,7 @@ option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental,
|
|
| 174 |
option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
|
| 175 |
option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF)
|
| 176 |
option(GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 "ggml: enable rocWMMA FlashAttention on GFX12" OFF)
|
|
|
|
| 177 |
option(GGML_MUSA_GRAPHS "ggml: use MUSA graph, experimental, unstable" OFF)
|
| 178 |
option(GGML_MUSA_MUDNN_COPY "ggml: enable muDNN for accelerated copy" OFF)
|
| 179 |
option(GGML_VULKAN "ggml: use Vulkan" OFF)
|
|
|
|
| 174 |
option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
|
| 175 |
option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF)
|
| 176 |
option(GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 "ggml: enable rocWMMA FlashAttention on GFX12" OFF)
|
| 177 |
+
option(GGML_HIP_MMQ_MFMA "ggml: enable MFMA MMA for CDNA in MMQ" ON)
|
| 178 |
option(GGML_MUSA_GRAPHS "ggml: use MUSA graph, experimental, unstable" OFF)
|
| 179 |
option(GGML_MUSA_MUDNN_COPY "ggml: enable muDNN for accelerated copy" OFF)
|
| 180 |
option(GGML_VULKAN "ggml: use Vulkan" OFF)
|
ggml/src/ggml-cuda/common.cuh
CHANGED
|
@@ -227,7 +227,7 @@ typedef float2 dfloat2;
|
|
| 227 |
#define FP16_MMA_AVAILABLE
|
| 228 |
#endif // defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || (defined(GGML_HIP_ROCWMMA_FATTN_GFX12) && defined(RDNA4)))
|
| 229 |
|
| 230 |
-
#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && defined(CDNA3)
|
| 231 |
#define AMD_MFMA_AVAILABLE
|
| 232 |
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && defined(CDNA3)
|
| 233 |
|
|
@@ -295,7 +295,11 @@ static bool fp32_mma_hardware_available(const int cc) {
|
|
| 295 |
|
| 296 |
// AMD CDNA3 matrix cores.. Will add support for other CDNA generations later.
|
| 297 |
static bool amd_mfma_available(const int cc) {
|
| 298 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
}
|
| 300 |
|
| 301 |
// Volta technically had FP16 tensor cores but they work very differently compared to Turing and later.
|
|
|
|
| 227 |
#define FP16_MMA_AVAILABLE
|
| 228 |
#endif // defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || (defined(GGML_HIP_ROCWMMA_FATTN_GFX12) && defined(RDNA4)))
|
| 229 |
|
| 230 |
+
#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && defined(CDNA3) && !defined(GGML_HIP_NO_MMQ_MFMA)
|
| 231 |
#define AMD_MFMA_AVAILABLE
|
| 232 |
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && defined(CDNA3)
|
| 233 |
|
|
|
|
| 295 |
|
| 296 |
// AMD CDNA3 matrix cores.. Will add support for other CDNA generations later.
|
| 297 |
static bool amd_mfma_available(const int cc) {
|
| 298 |
+
#if !defined(GGML_HIP_NO_MMQ_MFMA)
|
| 299 |
+
return GGML_CUDA_CC_IS_CDNA3(cc);
|
| 300 |
+
#else
|
| 301 |
+
return false;
|
| 302 |
+
#endif //!defined(GGML_HIP_NO_MMQ_MFMA)
|
| 303 |
}
|
| 304 |
|
| 305 |
// Volta technically had FP16 tensor cores but they work very differently compared to Turing and later.
|
ggml/src/ggml-hip/CMakeLists.txt
CHANGED
|
@@ -113,6 +113,10 @@ if (GGML_HIP_ROCWMMA_FATTN)
|
|
| 113 |
add_compile_definitions(GGML_HIP_ROCWMMA_FATTN)
|
| 114 |
endif()
|
| 115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
if (GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 OR ${hip_VERSION} VERSION_GREATER_EQUAL 7.0)
|
| 117 |
add_compile_definitions(GGML_HIP_ROCWMMA_FATTN_GFX12)
|
| 118 |
endif()
|
|
|
|
| 113 |
add_compile_definitions(GGML_HIP_ROCWMMA_FATTN)
|
| 114 |
endif()
|
| 115 |
|
| 116 |
+
if (NOT GGML_HIP_MMQ_MFMA)
|
| 117 |
+
add_compile_definitions(GGML_HIP_NO_MMQ_MFMA)
|
| 118 |
+
endif()
|
| 119 |
+
|
| 120 |
if (GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 OR ${hip_VERSION} VERSION_GREATER_EQUAL 7.0)
|
| 121 |
add_compile_definitions(GGML_HIP_ROCWMMA_FATTN_GFX12)
|
| 122 |
endif()
|