germanaizek commited on
Commit
8325ed5
·
1 Parent(s): 9794ea7

ggml-opencl, llama: using reserve() if count already known (llama/7272)

Browse files
Files changed (1) hide show
  1. ggml-opencl.cpp +5 -2
ggml-opencl.cpp CHANGED
@@ -1,4 +1,4 @@
1
- #include "ggml.h"
2
  #include "ggml-opencl.h"
3
  #include "ggml-backend-impl.h"
4
 
@@ -1835,7 +1835,10 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
1835
  CL_CHECK(clEnqueueNDRangeKernel(queue, *to_fp32_cl, 1, &offset, &global, local > 0 ? &local : NULL, events.size(), !events.empty() ? events.data() : NULL, NULL));
1836
  }
1837
 
1838
- for (int64_t i12 = i02 * r2, e12 = i12 + r2; i12 < e12; i12++) {
 
 
 
1839
  if (mul_mat_vec) { // specialized dequantize_mul_mat_vec kernel
1840
  // copy src1 to device
1841
  events.emplace_back();
 
1
+ #include "ggml.h"
2
  #include "ggml-opencl.h"
3
  #include "ggml-backend-impl.h"
4
 
 
1835
  CL_CHECK(clEnqueueNDRangeKernel(queue, *to_fp32_cl, 1, &offset, &global, local > 0 ? &local : NULL, events.size(), !events.empty() ? events.data() : NULL, NULL));
1836
  }
1837
 
1838
+ int64_t i12 = i02 * r2;
1839
+ int64_t e12 = i12 + r2;
1840
+ events.reserve(e12 - i12);
1841
+ for (; i12 < e12; i12++) {
1842
  if (mul_mat_vec) { // specialized dequantize_mul_mat_vec kernel
1843
  // copy src1 to device
1844
  events.emplace_back();