Spaces:
Sleeping
Sleeping
lhez
commited on
Commit
·
3847456
1
Parent(s):
5ab06d6
opencl: use `max_alloc_size` in backend ctx instead of querying again (llama/12705)
Browse files
ggml/src/ggml-opencl/ggml-opencl.cpp
CHANGED
|
@@ -924,27 +924,24 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
|
|
| 924 |
// TODO: fixme: these sizes are hardcoded for now.
|
| 925 |
// they should be allocated based on the model's size
|
| 926 |
// and the device's max alloc size
|
| 927 |
-
size_t max_alloc_size;
|
| 928 |
-
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &max_alloc_size, NULL));
|
| 929 |
-
|
| 930 |
// Allocate intermediate buffers and images
|
| 931 |
size_t required_A_q_d_bytes = 311164928;
|
| 932 |
size_t required_A_s_d_bytes = 38895616;
|
| 933 |
size_t required_B_d_bytes = 45088768;
|
| 934 |
|
| 935 |
// Ensure buffer sizes do not exceed the maximum allocation size
|
| 936 |
-
size_t max_A_q_d_bytes = MIN(required_A_q_d_bytes, max_alloc_size);
|
| 937 |
-
size_t max_A_s_d_bytes = MIN(required_A_s_d_bytes, max_alloc_size);
|
| 938 |
-
size_t max_B_d_bytes = MIN(required_B_d_bytes, max_alloc_size);
|
| 939 |
-
if (required_A_q_d_bytes > max_alloc_size) {
|
| 940 |
GGML_LOG_WARN("ggml_opencl: A_q_d buffer size reduced from %zu to %zu due to device limitations.\n",
|
| 941 |
required_A_q_d_bytes, max_A_q_d_bytes);
|
| 942 |
}
|
| 943 |
-
if (required_A_s_d_bytes > max_alloc_size) {
|
| 944 |
GGML_LOG_WARN("ggml_opencl: A_s_d buffer size reduced from %zu to %zu due to device limitations.\n",
|
| 945 |
required_A_s_d_bytes, max_A_s_d_bytes);
|
| 946 |
}
|
| 947 |
-
if (required_B_d_bytes > max_alloc_size) {
|
| 948 |
GGML_LOG_WARN("ggml_opencl: B_d buffer size reduced from %zu to %zu due to device limitations.\n",
|
| 949 |
required_B_d_bytes, max_B_d_bytes);
|
| 950 |
}
|
|
|
|
| 924 |
// TODO: fixme: these sizes are hardcoded for now.
|
| 925 |
// they should be allocated based on the model's size
|
| 926 |
// and the device's max alloc size
|
|
|
|
|
|
|
|
|
|
| 927 |
// Allocate intermediate buffers and images
|
| 928 |
size_t required_A_q_d_bytes = 311164928;
|
| 929 |
size_t required_A_s_d_bytes = 38895616;
|
| 930 |
size_t required_B_d_bytes = 45088768;
|
| 931 |
|
| 932 |
// Ensure buffer sizes do not exceed the maximum allocation size
|
| 933 |
+
size_t max_A_q_d_bytes = MIN(required_A_q_d_bytes, backend_ctx->max_alloc_size);
|
| 934 |
+
size_t max_A_s_d_bytes = MIN(required_A_s_d_bytes, backend_ctx->max_alloc_size);
|
| 935 |
+
size_t max_B_d_bytes = MIN(required_B_d_bytes, backend_ctx->max_alloc_size);
|
| 936 |
+
if (required_A_q_d_bytes > backend_ctx->max_alloc_size) {
|
| 937 |
GGML_LOG_WARN("ggml_opencl: A_q_d buffer size reduced from %zu to %zu due to device limitations.\n",
|
| 938 |
required_A_q_d_bytes, max_A_q_d_bytes);
|
| 939 |
}
|
| 940 |
+
if (required_A_s_d_bytes > backend_ctx->max_alloc_size) {
|
| 941 |
GGML_LOG_WARN("ggml_opencl: A_s_d buffer size reduced from %zu to %zu due to device limitations.\n",
|
| 942 |
required_A_s_d_bytes, max_A_s_d_bytes);
|
| 943 |
}
|
| 944 |
+
if (required_B_d_bytes > backend_ctx->max_alloc_size) {
|
| 945 |
GGML_LOG_WARN("ggml_opencl: B_d buffer size reduced from %zu to %zu due to device limitations.\n",
|
| 946 |
required_B_d_bytes, max_B_d_bytes);
|
| 947 |
}
|