Spaces:
Running
Running
uvos
commited on
Commit
·
f95736f
1
Parent(s):
7ddd89c
HIP: disable rocwmma on gfx12 by default until rocm 7.0 (llama/14202)
Browse files- ggml/CMakeLists.txt +1 -0
- ggml/src/ggml-cuda/common.cuh +2 -2
- ggml/src/ggml-hip/CMakeLists.txt +4 -0
ggml/CMakeLists.txt
CHANGED
|
@@ -172,6 +172,7 @@ option(GGML_HIP "ggml: use HIP"
|
|
| 172 |
option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
|
| 173 |
option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
|
| 174 |
option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF)
|
|
|
|
| 175 |
option(GGML_VULKAN "ggml: use Vulkan" OFF)
|
| 176 |
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
|
| 177 |
option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF)
|
|
|
|
| 172 |
option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
|
| 173 |
option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
|
| 174 |
option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF)
|
| 175 |
+
option(GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 "ggml: enable rocWMMA FlashAttention on GFX12" OFF)
|
| 176 |
option(GGML_VULKAN "ggml: use Vulkan" OFF)
|
| 177 |
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
|
| 178 |
option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF)
|
ggml/src/ggml-cuda/common.cuh
CHANGED
|
@@ -207,9 +207,9 @@ typedef float2 dfloat2;
|
|
| 207 |
#define FP16_MMA_AVAILABLE
|
| 208 |
#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA
|
| 209 |
|
| 210 |
-
#if defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || defined(RDNA4))
|
| 211 |
#define FP16_MMA_AVAILABLE
|
| 212 |
-
#endif // defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || defined(RDNA4))
|
| 213 |
|
| 214 |
#if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_TURING
|
| 215 |
#define NEW_MMA_AVAILABLE
|
|
|
|
| 207 |
#define FP16_MMA_AVAILABLE
|
| 208 |
#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA
|
| 209 |
|
| 210 |
+
#if defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || (defined(GGML_HIP_ROCWMMA_FATTN_GFX12) && defined(RDNA4)))
|
| 211 |
#define FP16_MMA_AVAILABLE
|
| 212 |
+
#endif // defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || (defined(GGML_HIP_ROCWMMA_FATTN_GFX12) && defined(RDNA4)))
|
| 213 |
|
| 214 |
#if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_TURING
|
| 215 |
#define NEW_MMA_AVAILABLE
|
ggml/src/ggml-hip/CMakeLists.txt
CHANGED
|
@@ -113,6 +113,10 @@ if (GGML_HIP_ROCWMMA_FATTN)
|
|
| 113 |
add_compile_definitions(GGML_HIP_ROCWMMA_FATTN)
|
| 114 |
endif()
|
| 115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
if (NOT GGML_CUDA_FA)
|
| 117 |
add_compile_definitions(GGML_CUDA_NO_FA)
|
| 118 |
endif()
|
|
|
|
| 113 |
add_compile_definitions(GGML_HIP_ROCWMMA_FATTN)
|
| 114 |
endif()
|
| 115 |
|
| 116 |
+
if (GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 OR ${hip_VERSION} VERSION_GREATER_EQUAL 7.0)
|
| 117 |
+
add_compile_definitions(GGML_HIP_ROCWMMA_FATTN_GFX12)
|
| 118 |
+
endif()
|
| 119 |
+
|
| 120 |
if (NOT GGML_CUDA_FA)
|
| 121 |
add_compile_definitions(GGML_CUDA_NO_FA)
|
| 122 |
endif()
|