Spaces:
Sleeping
Sleeping
uvos
commited on
Commit
·
e538e2c
1
Parent(s):
bd93c1b
CUDA/HIP: add warp_size to cuda_device_info
Browse files
ggml/src/ggml-cuda/common.cuh
CHANGED
|
@@ -520,6 +520,7 @@ struct ggml_cuda_device_info {
|
|
| 520 |
bool vmm; // virtual memory support
|
| 521 |
size_t vmm_granularity; // granularity of virtual memory
|
| 522 |
size_t total_vram;
|
|
|
|
| 523 |
};
|
| 524 |
|
| 525 |
cuda_device_info devices[GGML_CUDA_MAX_DEVICES] = {};
|
|
|
|
| 520 |
bool vmm; // virtual memory support
|
| 521 |
size_t vmm_granularity; // granularity of virtual memory
|
| 522 |
size_t total_vram;
|
| 523 |
+
int warp_size; // Number of threads in a dispatch
|
| 524 |
};
|
| 525 |
|
| 526 |
cuda_device_info devices[GGML_CUDA_MAX_DEVICES] = {};
|
ggml/src/ggml-cuda/ggml-cuda.cu
CHANGED
|
@@ -242,6 +242,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
|
| 242 |
|
| 243 |
info.devices[id].nsm = prop.multiProcessorCount;
|
| 244 |
info.devices[id].smpb = prop.sharedMemPerBlock;
|
|
|
|
| 245 |
#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
|
| 246 |
info.devices[id].smpbo = prop.sharedMemPerBlock;
|
| 247 |
|
|
@@ -256,8 +257,9 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
|
| 256 |
info.devices[id].cc += prop.minor * 0x10;
|
| 257 |
}
|
| 258 |
}
|
| 259 |
-
GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s\n",
|
| 260 |
-
|
|
|
|
| 261 |
#else
|
| 262 |
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
|
| 263 |
info.devices[id].cc = 100*prop.major + 10*prop.minor;
|
|
|
|
| 242 |
|
| 243 |
info.devices[id].nsm = prop.multiProcessorCount;
|
| 244 |
info.devices[id].smpb = prop.sharedMemPerBlock;
|
| 245 |
+
info.devices[id].warp_size = prop.warpSize;
|
| 246 |
#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
|
| 247 |
info.devices[id].smpbo = prop.sharedMemPerBlock;
|
| 248 |
|
|
|
|
| 257 |
info.devices[id].cc += prop.minor * 0x10;
|
| 258 |
}
|
| 259 |
}
|
| 260 |
+
GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d\n",
|
| 261 |
+
id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff,
|
| 262 |
+
device_vmm ? "yes" : "no", prop.warpSize);
|
| 263 |
#else
|
| 264 |
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
|
| 265 |
info.devices[id].cc = 100*prop.major + 10*prop.minor;
|