Spaces:
Running
Running
Commit
·
d49a569
1
Parent(s):
2fbcec1
vulkan: optimize coopmat2 q2_k dequant function (llama/11130)
Browse files
ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp
CHANGED
|
@@ -101,19 +101,25 @@ layout(buffer_reference, std430, buffer_reference_align = 4) buffer decodeBufQ2_
|
|
| 101 |
block_q2_K block;
|
| 102 |
};
|
| 103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
float16_t dequantFuncQ2_K(const in decodeBufQ2_K bl, const in uint blockCoords[2], const in uint coordInBlock[2])
|
| 105 |
{
|
|
|
|
| 106 |
const f16vec2 d = bl.block.d;
|
| 107 |
const uint idx = coordInBlock[1];
|
| 108 |
-
const uint iqs = idx;
|
| 109 |
|
| 110 |
-
const uint
|
| 111 |
-
const uint
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
-
uint32_t qs = bl.block.qs[qsi];
|
| 115 |
const uint scales = bl.block.scales[scalesi];
|
| 116 |
-
float16_t ret = d.x * float16_t(scales & 0xF) * float16_t(
|
| 117 |
return ret;
|
| 118 |
}
|
| 119 |
|
|
|
|
| 101 |
block_q2_K block;
|
| 102 |
};
|
| 103 |
|
| 104 |
+
layout(buffer_reference, std430, buffer_reference_align = 16) buffer decodeBufQ2_K_packed16 {
|
| 105 |
+
block_q2_K_packed16 block;
|
| 106 |
+
};
|
| 107 |
+
|
| 108 |
float16_t dequantFuncQ2_K(const in decodeBufQ2_K bl, const in uint blockCoords[2], const in uint coordInBlock[2])
|
| 109 |
{
|
| 110 |
+
decodeBufQ2_K_packed16 bl16 = decodeBufQ2_K_packed16(bl);
|
| 111 |
const f16vec2 d = bl.block.d;
|
| 112 |
const uint idx = coordInBlock[1];
|
|
|
|
| 113 |
|
| 114 |
+
const uint scalesi = (idx & 0xF0) >> 4; // 0..15
|
| 115 |
+
const uint qsshift = (idx & 0x60) >> 4; // 0,2,4,6
|
| 116 |
+
|
| 117 |
+
uint qs = uint32_t(bl16.block.qs[((idx & 0x80) >> 3) + ((idx & 0x1E) >> 1)]);
|
| 118 |
+
qs = (qs >> qsshift) & 0x0303;
|
| 119 |
+
qs = unpack8(qs)[idx & 1];
|
| 120 |
|
|
|
|
| 121 |
const uint scales = bl.block.scales[scalesi];
|
| 122 |
+
float16_t ret = d.x * float16_t(scales & 0xF) * float16_t(qs) - d.y * float16_t(scales >> 4);
|
| 123 |
return ret;
|
| 124 |
}
|
| 125 |
|