lhez commited on
Commit
3847456
·
1 Parent(s): 5ab06d6

opencl: use `max_alloc_size` in backend ctx instead of querying again (llama/12705)

Browse files
ggml/src/ggml-opencl/ggml-opencl.cpp CHANGED
@@ -924,27 +924,24 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
924
  // TODO: fixme: these sizes are hardcoded for now.
925
  // they should be allocated based on the model's size
926
  // and the device's max alloc size
927
- size_t max_alloc_size;
928
- CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &max_alloc_size, NULL));
929
-
930
  // Allocate intermediate buffers and images
931
  size_t required_A_q_d_bytes = 311164928;
932
  size_t required_A_s_d_bytes = 38895616;
933
  size_t required_B_d_bytes = 45088768;
934
 
935
  // Ensure buffer sizes do not exceed the maximum allocation size
936
- size_t max_A_q_d_bytes = MIN(required_A_q_d_bytes, max_alloc_size);
937
- size_t max_A_s_d_bytes = MIN(required_A_s_d_bytes, max_alloc_size);
938
- size_t max_B_d_bytes = MIN(required_B_d_bytes, max_alloc_size);
939
- if (required_A_q_d_bytes > max_alloc_size) {
940
  GGML_LOG_WARN("ggml_opencl: A_q_d buffer size reduced from %zu to %zu due to device limitations.\n",
941
  required_A_q_d_bytes, max_A_q_d_bytes);
942
  }
943
- if (required_A_s_d_bytes > max_alloc_size) {
944
  GGML_LOG_WARN("ggml_opencl: A_s_d buffer size reduced from %zu to %zu due to device limitations.\n",
945
  required_A_s_d_bytes, max_A_s_d_bytes);
946
  }
947
- if (required_B_d_bytes > max_alloc_size) {
948
  GGML_LOG_WARN("ggml_opencl: B_d buffer size reduced from %zu to %zu due to device limitations.\n",
949
  required_B_d_bytes, max_B_d_bytes);
950
  }
 
924
  // TODO: fixme: these sizes are hardcoded for now.
925
  // they should be allocated based on the model's size
926
  // and the device's max alloc size
 
 
 
927
  // Allocate intermediate buffers and images
928
  size_t required_A_q_d_bytes = 311164928;
929
  size_t required_A_s_d_bytes = 38895616;
930
  size_t required_B_d_bytes = 45088768;
931
 
932
  // Ensure buffer sizes do not exceed the maximum allocation size
933
+ size_t max_A_q_d_bytes = MIN(required_A_q_d_bytes, backend_ctx->max_alloc_size);
934
+ size_t max_A_s_d_bytes = MIN(required_A_s_d_bytes, backend_ctx->max_alloc_size);
935
+ size_t max_B_d_bytes = MIN(required_B_d_bytes, backend_ctx->max_alloc_size);
936
+ if (required_A_q_d_bytes > backend_ctx->max_alloc_size) {
937
  GGML_LOG_WARN("ggml_opencl: A_q_d buffer size reduced from %zu to %zu due to device limitations.\n",
938
  required_A_q_d_bytes, max_A_q_d_bytes);
939
  }
940
+ if (required_A_s_d_bytes > backend_ctx->max_alloc_size) {
941
  GGML_LOG_WARN("ggml_opencl: A_s_d buffer size reduced from %zu to %zu due to device limitations.\n",
942
  required_A_s_d_bytes, max_A_s_d_bytes);
943
  }
944
+ if (required_B_d_bytes > backend_ctx->max_alloc_size) {
945
  GGML_LOG_WARN("ggml_opencl: B_d buffer size reduced from %zu to %zu due to device limitations.\n",
946
  required_B_d_bytes, max_B_d_bytes);
947
  }