gonchaxyz commited on
Commit
13afdc6
·
unverified ·
1 Parent(s): 8453bfd

opencl : sync opencl compilation fix in ggml (#1111)

Browse files
Files changed (1) hide show
  1. ggml-opencl.cpp +6 -6
ggml-opencl.cpp CHANGED
@@ -653,13 +653,13 @@ __kernel void dequantize_mul_mat_vec_q6_K(__global const struct block_q6_K * xx,
653
  const int im = tid/step; // 0 or 1. 0 computes 0..., 1 computes 128...
654
  const int in = tid - step*im; // 0...15 or 0...7
655
 
656
- #if K_QUANTS_PER_ITERATION == 1
657
  const int l0 = K_QUANTS_PER_ITERATION*in; // 0...15
658
  const int is = 0;
659
- #else
660
  const int l0 = 4 * in; // 0, 4, 8, ..., 28
661
  const int is = in / 4;
662
- #endif
663
  const int ql_offset = 64*im + l0;
664
  const int qh_offset = 32*im + l0;
665
  const int s_offset = 8*im + is;
@@ -676,7 +676,7 @@ __kernel void dequantize_mul_mat_vec_q6_K(__global const struct block_q6_K * xx,
676
 
677
  const float d = vload_half(0, &x[i].d);
678
 
679
- #if K_QUANTS_PER_ITERATION == 1
680
  float sum = y[ 0] * s[0] * d * ((int8_t)((ql[ 0] & 0xF) | ((qh[ 0] & 0x03) << 4)) - 32)
681
  + y[16] * s[1] * d * ((int8_t)((ql[16] & 0xF) | ((qh[16] & 0x03) << 4)) - 32)
682
  + y[32] * s[2] * d * ((int8_t)((ql[32] & 0xF) | ((qh[ 0] & 0x0c) << 2)) - 32)
@@ -686,7 +686,7 @@ __kernel void dequantize_mul_mat_vec_q6_K(__global const struct block_q6_K * xx,
686
  + y[96] * s[6] * d * ((int8_t)((ql[32] >> 4) | ((qh[ 0] & 0xc0) >> 2)) - 32)
687
  +y[112] * s[7] * d * ((int8_t)((ql[48] >> 4) | ((qh[16] & 0xc0) >> 2)) - 32);
688
  tmp[16 * ix + tid] += sum;
689
- #else
690
  float sum = 0;
691
  for (int l = 0; l < 4; ++l) {
692
  sum += y[l+ 0] * s[0] * d * ((int8_t)((ql[l+ 0] & 0xF) | (((qh[l] >> 0) & 3) << 4)) - 32)
@@ -695,7 +695,7 @@ __kernel void dequantize_mul_mat_vec_q6_K(__global const struct block_q6_K * xx,
695
  + y[l+96] * s[6] * d * ((int8_t)((ql[l+32] >> 4) | (((qh[l] >> 6) & 3) << 4)) - 32);
696
  }
697
  tmp[16 * ix + tid] += sum;
698
- #endif
699
 
700
  }
701
 
 
653
  const int im = tid/step; // 0 or 1. 0 computes 0..., 1 computes 128...
654
  const int in = tid - step*im; // 0...15 or 0...7
655
 
656
+ \n#if K_QUANTS_PER_ITERATION == 1\n
657
  const int l0 = K_QUANTS_PER_ITERATION*in; // 0...15
658
  const int is = 0;
659
+ \n#else\n
660
  const int l0 = 4 * in; // 0, 4, 8, ..., 28
661
  const int is = in / 4;
662
+ \n#endif\n
663
  const int ql_offset = 64*im + l0;
664
  const int qh_offset = 32*im + l0;
665
  const int s_offset = 8*im + is;
 
676
 
677
  const float d = vload_half(0, &x[i].d);
678
 
679
+ \n#if K_QUANTS_PER_ITERATION == 1\n
680
  float sum = y[ 0] * s[0] * d * ((int8_t)((ql[ 0] & 0xF) | ((qh[ 0] & 0x03) << 4)) - 32)
681
  + y[16] * s[1] * d * ((int8_t)((ql[16] & 0xF) | ((qh[16] & 0x03) << 4)) - 32)
682
  + y[32] * s[2] * d * ((int8_t)((ql[32] & 0xF) | ((qh[ 0] & 0x0c) << 2)) - 32)
 
686
  + y[96] * s[6] * d * ((int8_t)((ql[32] >> 4) | ((qh[ 0] & 0xc0) >> 2)) - 32)
687
  +y[112] * s[7] * d * ((int8_t)((ql[48] >> 4) | ((qh[16] & 0xc0) >> 2)) - 32);
688
  tmp[16 * ix + tid] += sum;
689
+ \n#else\n
690
  float sum = 0;
691
  for (int l = 0; l < 4; ++l) {
692
  sum += y[l+ 0] * s[0] * d * ((int8_t)((ql[l+ 0] & 0xF) | (((qh[l] >> 0) & 3) << 4)) - 32)
 
695
  + y[l+96] * s[6] * d * ((int8_t)((ql[l+32] >> 4) | (((qh[l] >> 6) & 3) << 4)) - 32);
696
  }
697
  tmp[16 * ix + tid] += sum;
698
+ \n#endif\n
699
 
700
  }
701