Spaces:
Running
Running
Nikita Sarychev
commited on
Commit
·
82bb7f3
1
Parent(s):
6406a6e
HIP: Only call rocblas_initialize on rocblas versions with the multiple instantation bug (llama/11080)
Browse filesThis disables the workaround on rocblas fixed versions (>=4.0.0) to eliminate the runtime cost and unnecessary VRAM allocation of loading all tensile objects.
ggml/src/ggml-cuda/ggml-cuda.cu
CHANGED
|
@@ -42,6 +42,7 @@
|
|
| 42 |
#include <algorithm>
|
| 43 |
#include <array>
|
| 44 |
#include <atomic>
|
|
|
|
| 45 |
#include <cinttypes>
|
| 46 |
#include <cstddef>
|
| 47 |
#include <cstdint>
|
|
@@ -172,8 +173,25 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
|
| 172 |
#ifdef __HIP_PLATFORM_AMD__
|
| 173 |
// Workaround for a rocBLAS bug when using multiple graphics cards:
|
| 174 |
// https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346
|
| 175 |
-
|
| 176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
#endif
|
| 178 |
|
| 179 |
ggml_cuda_device_info info = {};
|
|
|
|
| 42 |
#include <algorithm>
|
| 43 |
#include <array>
|
| 44 |
#include <atomic>
|
| 45 |
+
#include <charconv>
|
| 46 |
#include <cinttypes>
|
| 47 |
#include <cstddef>
|
| 48 |
#include <cstdint>
|
|
|
|
| 173 |
#ifdef __HIP_PLATFORM_AMD__
|
| 174 |
// Workaround for a rocBLAS bug when using multiple graphics cards:
|
| 175 |
// https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346
|
| 176 |
+
{
|
| 177 |
+
int major_version = 0;
|
| 178 |
+
size_t version_length = 0;
|
| 179 |
+
if (rocblas_get_version_string_size(&version_length) == rocblas_status_success) {
|
| 180 |
+
std::string version(version_length, '\0');
|
| 181 |
+
if (rocblas_get_version_string(version.data(), version.size()) == rocblas_status_success) {
|
| 182 |
+
version.resize(::strlen(version.c_str()));
|
| 183 |
+
int parsed_value = 0;
|
| 184 |
+
if (std::from_chars(version.c_str(), version.c_str() + version.length(), parsed_value).ec == std::errc()) {
|
| 185 |
+
major_version = parsed_value;
|
| 186 |
+
}
|
| 187 |
+
}
|
| 188 |
+
}
|
| 189 |
+
if (major_version < 4) {
|
| 190 |
+
GGML_LOG_DEBUG(GGML_CUDA_NAME " calling rocblas_initialize as a workaround for a rocBLAS bug\n");
|
| 191 |
+
rocblas_initialize();
|
| 192 |
+
CUDA_CHECK(cudaDeviceSynchronize());
|
| 193 |
+
}
|
| 194 |
+
}
|
| 195 |
#endif
|
| 196 |
|
| 197 |
ggml_cuda_device_info info = {};
|