Kawrakow ikawrakow commited on
Commit
e538f25
·
unverified ·
1 Parent(s): 12970f1

ggml-quants : fix compiler warnings (shadow variable) (llama/5472)

Browse files
Files changed (1) hide show
  1. ggml-quants.c +18 -18
ggml-quants.c CHANGED
@@ -3819,15 +3819,15 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
3819
  /* Compute combined scale for the block */
3820
  const __m256 d = _mm256_set1_ps( GGML_FP16_TO_FP32(x[i].d) * GGML_FP16_TO_FP32(y[i].d) );
3821
 
3822
- __m256i bx = bytes_from_nibbles_32(x[i].qs);
3823
 
3824
  // Now we have a vector with bytes in [ 0 .. 15 ] interval. Offset them into [ -8 .. +7 ] interval.
3825
  const __m256i off = _mm256_set1_epi8( 8 );
3826
- bx = _mm256_sub_epi8( bx, off );
3827
 
3828
- __m256i by = _mm256_loadu_si256((const __m256i *)y[i].qs);
3829
 
3830
- const __m256 q = mul_sum_i8_pairs_float(bx, by);
3831
 
3832
  /* Multiply q with scale and accumulate */
3833
  acc = _mm256_fmadd_ps( d, q, acc );
@@ -4196,10 +4196,10 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
4196
  const __m256 d0d1 = _mm256_mul_ps( d0v, d1v );
4197
 
4198
  // Load 16 bytes, and unpack 4 bit fields into bytes, making 32 bytes
4199
- const __m256i bx = bytes_from_nibbles_32(x[i].qs);
4200
- const __m256i by = _mm256_loadu_si256( (const __m256i *)y[i].qs );
4201
 
4202
- const __m256 xy = mul_sum_us8_pairs_float(bx, by);
4203
 
4204
  // Accumulate d0*d1*x*y
4205
  #if defined(__AVX2__)
@@ -4418,14 +4418,14 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * r
4418
  /* Compute combined scale for the block */
4419
  const __m256 d = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d) * GGML_FP16_TO_FP32(y[i].d));
4420
 
4421
- __m256i bx = bytes_from_nibbles_32(x[i].qs);
4422
  __m256i bxhi = bytes_from_bits_32(x[i].qh);
4423
  bxhi = _mm256_andnot_si256(bxhi, _mm256_set1_epi8((char)0xF0));
4424
- bx = _mm256_or_si256(bx, bxhi);
4425
 
4426
- __m256i by = _mm256_loadu_si256((const __m256i *)y[i].qs);
4427
 
4428
- const __m256 q = mul_sum_i8_pairs_float(bx, by);
4429
 
4430
  /* Multiply q with scale and accumulate */
4431
  acc = _mm256_fmadd_ps(d, q, acc);
@@ -4722,15 +4722,15 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
4722
 
4723
  summs += GGML_FP16_TO_FP32(x[i].m) * y[i].s;
4724
 
4725
- __m256i bx = bytes_from_nibbles_32(x[i].qs);
4726
  __m256i bxhi = bytes_from_bits_32(x[i].qh);
4727
  bxhi = _mm256_and_si256(bxhi, _mm256_set1_epi8(0x10));
4728
- bx = _mm256_or_si256(bx, bxhi);
4729
 
4730
  const __m256 dy = _mm256_set1_ps(y[i].d);
4731
- const __m256i by = _mm256_loadu_si256((const __m256i *)y[i].qs);
4732
 
4733
- const __m256 q = mul_sum_us8_pairs_float(bx, by);
4734
 
4735
  acc = _mm256_fmadd_ps(q, _mm256_mul_ps(dx, dy), acc);
4736
  }
@@ -4973,10 +4973,10 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r
4973
  for (int i = 0; i < nb; ++i) {
4974
  // Compute combined scale for the block
4975
  const __m256 d = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d) * GGML_FP16_TO_FP32(y[i].d));
4976
- __m256i bx = _mm256_loadu_si256((const __m256i *)x[i].qs);
4977
- __m256i by = _mm256_loadu_si256((const __m256i *)y[i].qs);
4978
 
4979
- const __m256 q = mul_sum_i8_pairs_float(bx, by);
4980
 
4981
  // Multiply q with scale and accumulate
4982
  #if defined(__AVX2__)
 
3819
  /* Compute combined scale for the block */
3820
  const __m256 d = _mm256_set1_ps( GGML_FP16_TO_FP32(x[i].d) * GGML_FP16_TO_FP32(y[i].d) );
3821
 
3822
+ __m256i qx = bytes_from_nibbles_32(x[i].qs);
3823
 
3824
  // Now we have a vector with bytes in [ 0 .. 15 ] interval. Offset them into [ -8 .. +7 ] interval.
3825
  const __m256i off = _mm256_set1_epi8( 8 );
3826
+ qx = _mm256_sub_epi8( qx, off );
3827
 
3828
+ __m256i qy = _mm256_loadu_si256((const __m256i *)y[i].qs);
3829
 
3830
+ const __m256 q = mul_sum_i8_pairs_float(qx, qy);
3831
 
3832
  /* Multiply q with scale and accumulate */
3833
  acc = _mm256_fmadd_ps( d, q, acc );
 
4196
  const __m256 d0d1 = _mm256_mul_ps( d0v, d1v );
4197
 
4198
  // Load 16 bytes, and unpack 4 bit fields into bytes, making 32 bytes
4199
+ const __m256i qx = bytes_from_nibbles_32(x[i].qs);
4200
+ const __m256i qy = _mm256_loadu_si256( (const __m256i *)y[i].qs );
4201
 
4202
+ const __m256 xy = mul_sum_us8_pairs_float(qx, qy);
4203
 
4204
  // Accumulate d0*d1*x*y
4205
  #if defined(__AVX2__)
 
4418
  /* Compute combined scale for the block */
4419
  const __m256 d = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d) * GGML_FP16_TO_FP32(y[i].d));
4420
 
4421
+ __m256i qx = bytes_from_nibbles_32(x[i].qs);
4422
  __m256i bxhi = bytes_from_bits_32(x[i].qh);
4423
  bxhi = _mm256_andnot_si256(bxhi, _mm256_set1_epi8((char)0xF0));
4424
+ qx = _mm256_or_si256(qx, bxhi);
4425
 
4426
+ __m256i qy = _mm256_loadu_si256((const __m256i *)y[i].qs);
4427
 
4428
+ const __m256 q = mul_sum_i8_pairs_float(qx, qy);
4429
 
4430
  /* Multiply q with scale and accumulate */
4431
  acc = _mm256_fmadd_ps(d, q, acc);
 
4722
 
4723
  summs += GGML_FP16_TO_FP32(x[i].m) * y[i].s;
4724
 
4725
+ __m256i qx = bytes_from_nibbles_32(x[i].qs);
4726
  __m256i bxhi = bytes_from_bits_32(x[i].qh);
4727
  bxhi = _mm256_and_si256(bxhi, _mm256_set1_epi8(0x10));
4728
+ qx = _mm256_or_si256(qx, bxhi);
4729
 
4730
  const __m256 dy = _mm256_set1_ps(y[i].d);
4731
+ const __m256i qy = _mm256_loadu_si256((const __m256i *)y[i].qs);
4732
 
4733
+ const __m256 q = mul_sum_us8_pairs_float(qx, qy);
4734
 
4735
  acc = _mm256_fmadd_ps(q, _mm256_mul_ps(dx, dy), acc);
4736
  }
 
4973
  for (int i = 0; i < nb; ++i) {
4974
  // Compute combined scale for the block
4975
  const __m256 d = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d) * GGML_FP16_TO_FP32(y[i].d));
4976
+ __m256i qx = _mm256_loadu_si256((const __m256i *)x[i].qs);
4977
+ __m256i qy = _mm256_loadu_si256((const __m256i *)y[i].qs);
4978
 
4979
+ const __m256 q = mul_sum_i8_pairs_float(qx, qy);
4980
 
4981
  // Multiply q with scale and accumulate
4982
  #if defined(__AVX2__)