Sparkleholic commited on
Commit
b00a8a9
·
1 Parent(s): a49f5c2

opencl : fix memory allocation size (llama/12649)

Browse files

issue:
https://github.com/CodeLinaro/llama.cpp/pull/17#issuecomment-2760611283

This patch fixes the memory allocation size
not exceeding the maximum size of the OpenCL device.

ggml/src/ggml-opencl/ggml-opencl.cpp CHANGED
@@ -921,10 +921,33 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
921
  backend_ctx->program_CL_gemm = build_program_from_source(context, device, kernel_src_CL_gemm.c_str(), compile_opts);
922
  CL_CHECK((backend_ctx->CL_mul_mat_Ab_Bi_8x4 = clCreateKernel(backend_ctx->program_CL_gemm, "kernel_mul_mat_Ab_Bi_8x4", &err), err));
923
 
 
 
 
 
 
 
924
  // Allocate intermediate buffers and images
925
- size_t max_A_q_d_bytes = 311164928;
926
- size_t max_A_s_d_bytes = 38895616;
927
- size_t max_B_d_bytes = 45088768;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
928
 
929
  CL_CHECK((backend_ctx->A_q_d_max = clCreateBuffer(context, 0, max_A_q_d_bytes, NULL, &err), err));
930
  CL_CHECK((backend_ctx->A_s_d_max = clCreateBuffer(context, 0, max_A_s_d_bytes, NULL, &err), err));
 
921
  backend_ctx->program_CL_gemm = build_program_from_source(context, device, kernel_src_CL_gemm.c_str(), compile_opts);
922
  CL_CHECK((backend_ctx->CL_mul_mat_Ab_Bi_8x4 = clCreateKernel(backend_ctx->program_CL_gemm, "kernel_mul_mat_Ab_Bi_8x4", &err), err));
923
 
924
+ // TODO: fixme: these sizes are hardcoded for now.
925
+ // they should be allocated based on the model's size
926
+ // and the device's max alloc size
927
+ size_t max_alloc_size;
928
+ CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &max_alloc_size, NULL));
929
+
930
  // Allocate intermediate buffers and images
931
+ size_t required_A_q_d_bytes = 311164928;
932
+ size_t required_A_s_d_bytes = 38895616;
933
+ size_t required_B_d_bytes = 45088768;
934
+
935
+ // Ensure buffer sizes do not exceed the maximum allocation size
936
+ size_t max_A_q_d_bytes = MIN(required_A_q_d_bytes, max_alloc_size);
937
+ size_t max_A_s_d_bytes = MIN(required_A_s_d_bytes, max_alloc_size);
938
+ size_t max_B_d_bytes = MIN(required_B_d_bytes, max_alloc_size);
939
+ if (required_A_q_d_bytes > max_alloc_size) {
940
+ GGML_LOG_WARN("ggml_opencl: A_q_d buffer size reduced from %zu to %zu due to device limitations.\n",
941
+ required_A_q_d_bytes, max_A_q_d_bytes);
942
+ }
943
+ if (required_A_s_d_bytes > max_alloc_size) {
944
+ GGML_LOG_WARN("ggml_opencl: A_s_d buffer size reduced from %zu to %zu due to device limitations.\n",
945
+ required_A_s_d_bytes, max_A_s_d_bytes);
946
+ }
947
+ if (required_B_d_bytes > max_alloc_size) {
948
+ GGML_LOG_WARN("ggml_opencl: B_d buffer size reduced from %zu to %zu due to device limitations.\n",
949
+ required_B_d_bytes, max_B_d_bytes);
950
+ }
951
 
952
  CL_CHECK((backend_ctx->A_q_d_max = clCreateBuffer(context, 0, max_A_q_d_bytes, NULL, &err), err));
953
  CL_CHECK((backend_ctx->A_s_d_max = clCreateBuffer(context, 0, max_A_s_d_bytes, NULL, &err), err));