Nikita Sarychev commited on
Commit
82bb7f3
·
1 Parent(s): 6406a6e

HIP: Only call rocblas_initialize on rocblas versions with the multiple instantation bug (llama/11080)

Browse files

This disables the workaround on rocblas fixed versions (>=4.0.0) to eliminate the runtime cost and unnecessary VRAM allocation of loading all tensile objects.

Files changed (1) hide show
  1. ggml/src/ggml-cuda/ggml-cuda.cu +20 -2
ggml/src/ggml-cuda/ggml-cuda.cu CHANGED
@@ -42,6 +42,7 @@
42
  #include <algorithm>
43
  #include <array>
44
  #include <atomic>
 
45
  #include <cinttypes>
46
  #include <cstddef>
47
  #include <cstdint>
@@ -172,8 +173,25 @@ static ggml_cuda_device_info ggml_cuda_init() {
172
  #ifdef __HIP_PLATFORM_AMD__
173
  // Workaround for a rocBLAS bug when using multiple graphics cards:
174
  // https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346
175
- rocblas_initialize();
176
- CUDA_CHECK(cudaDeviceSynchronize());
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  #endif
178
 
179
  ggml_cuda_device_info info = {};
 
42
  #include <algorithm>
43
  #include <array>
44
  #include <atomic>
45
+ #include <charconv>
46
  #include <cinttypes>
47
  #include <cstddef>
48
  #include <cstdint>
 
173
  #ifdef __HIP_PLATFORM_AMD__
174
  // Workaround for a rocBLAS bug when using multiple graphics cards:
175
  // https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346
176
+ {
177
+ int major_version = 0;
178
+ size_t version_length = 0;
179
+ if (rocblas_get_version_string_size(&version_length) == rocblas_status_success) {
180
+ std::string version(version_length, '\0');
181
+ if (rocblas_get_version_string(version.data(), version.size()) == rocblas_status_success) {
182
+ version.resize(::strlen(version.c_str()));
183
+ int parsed_value = 0;
184
+ if (std::from_chars(version.c_str(), version.c_str() + version.length(), parsed_value).ec == std::errc()) {
185
+ major_version = parsed_value;
186
+ }
187
+ }
188
+ }
189
+ if (major_version < 4) {
190
+ GGML_LOG_DEBUG(GGML_CUDA_NAME " calling rocblas_initialize as a workaround for a rocBLAS bug\n");
191
+ rocblas_initialize();
192
+ CUDA_CHECK(cudaDeviceSynchronize());
193
+ }
194
+ }
195
  #endif
196
 
197
  ggml_cuda_device_info info = {};