jeffbolznv commited on
Commit
d49a569
·
1 Parent(s): 2fbcec1

vulkan: optimize coopmat2 q2_k dequant function (llama/11130)

Browse files
ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp CHANGED
@@ -101,19 +101,25 @@ layout(buffer_reference, std430, buffer_reference_align = 4) buffer decodeBufQ2_
101
  block_q2_K block;
102
  };
103
 
 
 
 
 
104
  float16_t dequantFuncQ2_K(const in decodeBufQ2_K bl, const in uint blockCoords[2], const in uint coordInBlock[2])
105
  {
 
106
  const f16vec2 d = bl.block.d;
107
  const uint idx = coordInBlock[1];
108
- const uint iqs = idx;
109
 
110
- const uint qsi = (iqs / 128) * 32 + (iqs % 32); // 0..31
111
- const uint scalesi = iqs / 16; // 0..15
112
- const uint qsshift = ((iqs % 128) / 32) * 2; // 0,2,4,6
 
 
 
113
 
114
- uint32_t qs = bl.block.qs[qsi];
115
  const uint scales = bl.block.scales[scalesi];
116
- float16_t ret = d.x * float16_t(scales & 0xF) * float16_t((qs >> qsshift) & 3) - d.y * float16_t(scales >> 4);
117
  return ret;
118
  }
119
 
 
101
  block_q2_K block;
102
  };
103
 
104
+ layout(buffer_reference, std430, buffer_reference_align = 16) buffer decodeBufQ2_K_packed16 {
105
+ block_q2_K_packed16 block;
106
+ };
107
+
108
  float16_t dequantFuncQ2_K(const in decodeBufQ2_K bl, const in uint blockCoords[2], const in uint coordInBlock[2])
109
  {
110
+ decodeBufQ2_K_packed16 bl16 = decodeBufQ2_K_packed16(bl);
111
  const f16vec2 d = bl.block.d;
112
  const uint idx = coordInBlock[1];
 
113
 
114
+ const uint scalesi = (idx & 0xF0) >> 4; // 0..15
115
+ const uint qsshift = (idx & 0x60) >> 4; // 0,2,4,6
116
+
117
+ uint qs = uint32_t(bl16.block.qs[((idx & 0x80) >> 3) + ((idx & 0x1E) >> 1)]);
118
+ qs = (qs >> qsshift) & 0x0303;
119
+ qs = unpack8(qs)[idx & 1];
120
 
 
121
  const uint scales = bl.block.scales[scalesi];
122
+ float16_t ret = d.x * float16_t(scales & 0xF) * float16_t(qs) - d.y * float16_t(scales >> 4);
123
  return ret;
124
  }
125