jeffbolznv commited on
Commit
cea89af
·
1 Parent(s): 18a0ad1

vulkan: define all quant data structures in types.comp (llama/10440)

Browse files
ggml/src/ggml-vulkan/vulkan-shaders/types.comp CHANGED
@@ -30,10 +30,8 @@
30
  #endif
31
  #endif
32
 
33
- #if defined(DATA_A_Q4_0)
34
- #extension GL_EXT_shader_16bit_storage : require
35
- #define QUANT_K 32
36
- #define QUANT_R 2
37
 
38
  struct block_q4_0
39
  {
@@ -46,14 +44,15 @@ struct block_q4_0_packed16
46
  uint16_t qs[16/2];
47
  };
48
 
 
 
 
49
  #define A_TYPE block_q4_0
50
  #define A_TYPE_PACKED16 block_q4_0_packed16
51
  #endif
52
 
53
- #if defined(DATA_A_Q4_1)
54
- #extension GL_EXT_shader_16bit_storage : require
55
- #define QUANT_K 32
56
- #define QUANT_R 2
57
 
58
  struct block_q4_1
59
  {
@@ -69,15 +68,15 @@ struct block_q4_1_packed16
69
  uint16_t qs[16/2];
70
  };
71
 
 
 
 
72
  #define A_TYPE block_q4_1
73
  #define A_TYPE_PACKED16 block_q4_1_packed16
74
  #endif
75
 
76
- #if defined(DATA_A_Q5_0)
77
- #extension GL_EXT_shader_16bit_storage : require
78
- #extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
79
- #define QUANT_K 32
80
- #define QUANT_R 2
81
 
82
  struct block_q5_0
83
  {
@@ -93,15 +92,15 @@ struct block_q5_0_packed16
93
  uint16_t qs[16/2];
94
  };
95
 
 
 
 
96
  #define A_TYPE block_q5_0
97
  #define A_TYPE_PACKED16 block_q5_0_packed16
98
  #endif
99
 
100
- #if defined(DATA_A_Q5_1)
101
- #extension GL_EXT_shader_16bit_storage : require
102
- #extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
103
- #define QUANT_K 32
104
- #define QUANT_R 2
105
 
106
  struct block_q5_1
107
  {
@@ -119,14 +118,15 @@ struct block_q5_1_packed16
119
  uint16_t qs[16/2];
120
  };
121
 
 
 
 
122
  #define A_TYPE block_q5_1
123
  #define A_TYPE_PACKED16 block_q5_1_packed16
124
  #endif
125
 
126
- #if defined(DATA_A_Q8_0)
127
- #extension GL_EXT_shader_16bit_storage : require
128
- #define QUANT_K 32
129
- #define QUANT_R 1
130
 
131
  struct block_q8_0
132
  {
@@ -139,164 +139,164 @@ struct block_q8_0_packed16
139
  uint16_t qs[32/2];
140
  };
141
 
 
 
 
142
  #define A_TYPE block_q8_0
143
  #define A_TYPE_PACKED16 block_q8_0_packed16
144
  #endif
145
 
146
  // K-quants
147
- #if defined(DATA_A_Q2_K)
148
- #extension GL_EXT_shader_16bit_storage : require
149
- #define QUANT_K 256
150
 
151
  struct block_q2_K
152
  {
153
- uint8_t scales[QUANT_K/16];
154
- uint8_t qs[QUANT_K/4];
155
  f16vec2 d;
156
  };
157
 
158
  struct block_q2_K_packed16
159
  {
160
- uint16_t scales[QUANT_K/16/2];
161
- uint16_t qs[QUANT_K/4/2];
162
  f16vec2 d;
163
  };
164
 
165
  struct block_q2_K_packed32
166
  {
167
- uint32_t scales[QUANT_K/16/4];
168
- uint32_t qs[QUANT_K/4/4];
169
  f16vec2 d;
170
  };
171
 
 
 
172
  #define A_TYPE block_q2_K
173
  #define A_TYPE_PACKED16 block_q2_K_packed16
174
  #define A_TYPE_PACKED32 block_q2_K_packed32
175
  #endif
176
 
177
- #if defined(DATA_A_Q3_K)
178
- #extension GL_EXT_shader_16bit_storage : require
179
- #define QUANT_K 256
180
 
181
  struct block_q3_K
182
  {
183
- uint8_t hmask[QUANT_K/8];
184
- uint8_t qs[QUANT_K/4];
185
  uint8_t scales[12];
186
  float16_t d;
187
  };
188
 
189
  struct block_q3_K_packed16
190
  {
191
- uint16_t hmask[QUANT_K/8/2];
192
- uint16_t qs[QUANT_K/4/2];
193
  uint16_t scales[12/2];
194
  float16_t d;
195
  };
196
 
 
 
197
  #define A_TYPE block_q3_K
198
  #define A_TYPE_PACKED16 block_q3_K_packed16
199
  #endif
200
 
201
- #if defined(DATA_A_Q4_K)
202
- #extension GL_EXT_shader_16bit_storage : require
203
- #define QUANT_K 256
204
 
205
  struct block_q4_K
206
  {
207
  f16vec2 d;
208
- uint8_t scales[3*QUANT_K/64];
209
- uint8_t qs[QUANT_K/2];
210
  };
211
 
212
  struct block_q4_K_packed16
213
  {
214
  f16vec2 d;
215
- uint16_t scales[3*QUANT_K/64/2];
216
- uint16_t qs[QUANT_K/2/2];
217
  };
218
 
219
  struct block_q4_K_packed32
220
  {
221
  f16vec2 d;
222
- uint32_t scales[3*QUANT_K/64/4];
223
- uint32_t qs[QUANT_K/2/4];
224
  };
225
 
 
 
226
  #define A_TYPE block_q4_K
227
  #define A_TYPE_PACKED16 block_q4_K_packed16
228
  #define A_TYPE_PACKED32 block_q4_K_packed32
229
  #endif
230
 
231
- #if defined(DATA_A_Q5_K)
232
- #extension GL_EXT_shader_16bit_storage : require
233
- #define QUANT_K 256
234
 
235
  struct block_q5_K
236
  {
237
  f16vec2 d;
238
  uint8_t scales[12];
239
- uint8_t qh[QUANT_K/8];
240
- uint8_t qs[QUANT_K/2];
241
  };
242
 
243
  struct block_q5_K_packed16
244
  {
245
  f16vec2 d;
246
  uint16_t scales[12/2];
247
- uint16_t qh[QUANT_K/8/2];
248
- uint16_t qs[QUANT_K/2/2];
249
  };
250
 
 
 
251
  #define A_TYPE block_q5_K
252
  #define A_TYPE_PACKED16 block_q5_K_packed16
253
  #endif
254
 
255
- #if defined(DATA_A_Q6_K)
256
- #extension GL_EXT_shader_16bit_storage : require
257
- #define QUANT_K 256
258
 
259
  struct block_q6_K
260
  {
261
- uint8_t ql[QUANT_K/2];
262
- uint8_t qh[QUANT_K/4];
263
- int8_t scales[QUANT_K/16];
264
  float16_t d;
265
  };
266
 
267
  struct block_q6_K_packed16
268
  {
269
- uint16_t ql[QUANT_K/2/2];
270
- uint16_t qh[QUANT_K/4/2];
271
- int8_t scales[QUANT_K/16];
272
  float16_t d;
273
  };
274
 
 
 
275
  #define A_TYPE block_q6_K
276
  #define A_TYPE_PACKED16 block_q6_K_packed16
277
  #endif
278
 
279
  // IQuants
280
 
281
- #if defined(DATA_A_IQ4_NL)
282
- #extension GL_EXT_shader_16bit_storage : require
283
- #define QUANT_K 32
284
- #define QUANT_R 2
285
 
286
  struct block_iq4_nl
287
  {
288
  float16_t d;
289
- uint8_t qs[QUANT_K/2];
290
  };
291
 
292
  struct block_iq4_nl_packed16
293
  {
294
  float16_t d;
295
- uint16_t qs[QUANT_K/2/2];
296
  };
297
 
298
- #define A_TYPE block_iq4_nl
299
- #define A_TYPE_PACKED16 block_iq4_nl_packed16
300
 
301
  const int8_t kvalues_iq4nl_const[16] = {
302
  int8_t(-127), int8_t(-104), int8_t(-83), int8_t(-65), int8_t(-49), int8_t(-35), int8_t(-22), int8_t(-10),
@@ -313,6 +313,11 @@ void init_iq4nl_shmem()
313
  }
314
  barrier();
315
  }
 
 
 
 
 
316
  #endif
317
 
318
  #endif // !defined(GGML_TYPES_COMP)
 
30
  #endif
31
  #endif
32
 
33
+ #define QUANT_K_Q4_0 32
34
+ #define QUANT_R_Q4_0 2
 
 
35
 
36
  struct block_q4_0
37
  {
 
44
  uint16_t qs[16/2];
45
  };
46
 
47
+ #if defined(DATA_A_Q4_0)
48
+ #define QUANT_K QUANT_K_Q4_0
49
+ #define QUANT_R QUANT_R_Q4_0
50
  #define A_TYPE block_q4_0
51
  #define A_TYPE_PACKED16 block_q4_0_packed16
52
  #endif
53
 
54
+ #define QUANT_K_Q4_1 32
55
+ #define QUANT_R_Q4_1 2
 
 
56
 
57
  struct block_q4_1
58
  {
 
68
  uint16_t qs[16/2];
69
  };
70
 
71
+ #if defined(DATA_A_Q4_1)
72
+ #define QUANT_K QUANT_K_Q4_1
73
+ #define QUANT_R QUANT_R_Q4_1
74
  #define A_TYPE block_q4_1
75
  #define A_TYPE_PACKED16 block_q4_1_packed16
76
  #endif
77
 
78
+ #define QUANT_K_Q5_0 32
79
+ #define QUANT_R_Q5_0 2
 
 
 
80
 
81
  struct block_q5_0
82
  {
 
92
  uint16_t qs[16/2];
93
  };
94
 
95
+ #if defined(DATA_A_Q5_0)
96
+ #define QUANT_K QUANT_K_Q5_0
97
+ #define QUANT_R QUANT_R_Q5_0
98
  #define A_TYPE block_q5_0
99
  #define A_TYPE_PACKED16 block_q5_0_packed16
100
  #endif
101
 
102
+ #define QUANT_K_Q5_1 32
103
+ #define QUANT_R_Q5_1 2
 
 
 
104
 
105
  struct block_q5_1
106
  {
 
118
  uint16_t qs[16/2];
119
  };
120
 
121
+ #if defined(DATA_A_Q5_1)
122
+ #define QUANT_K QUANT_K_Q5_1
123
+ #define QUANT_R QUANT_R_Q5_1
124
  #define A_TYPE block_q5_1
125
  #define A_TYPE_PACKED16 block_q5_1_packed16
126
  #endif
127
 
128
+ #define QUANT_K_Q8_0 32
129
+ #define QUANT_R_Q8_0 1
 
 
130
 
131
  struct block_q8_0
132
  {
 
139
  uint16_t qs[32/2];
140
  };
141
 
142
+ #if defined(DATA_A_Q8_0)
143
+ #define QUANT_K QUANT_K_Q8_0
144
+ #define QUANT_R QUANT_R_Q8_0
145
  #define A_TYPE block_q8_0
146
  #define A_TYPE_PACKED16 block_q8_0_packed16
147
  #endif
148
 
149
  // K-quants
150
+ #define QUANT_K_Q2_K 256
 
 
151
 
152
  struct block_q2_K
153
  {
154
+ uint8_t scales[QUANT_K_Q2_K/16];
155
+ uint8_t qs[QUANT_K_Q2_K/4];
156
  f16vec2 d;
157
  };
158
 
159
  struct block_q2_K_packed16
160
  {
161
+ uint16_t scales[QUANT_K_Q2_K/16/2];
162
+ uint16_t qs[QUANT_K_Q2_K/4/2];
163
  f16vec2 d;
164
  };
165
 
166
  struct block_q2_K_packed32
167
  {
168
+ uint32_t scales[QUANT_K_Q2_K/16/4];
169
+ uint32_t qs[QUANT_K_Q2_K/4/4];
170
  f16vec2 d;
171
  };
172
 
173
+ #if defined(DATA_A_Q2_K)
174
+ #define QUANT_K QUANT_K_Q2_K
175
  #define A_TYPE block_q2_K
176
  #define A_TYPE_PACKED16 block_q2_K_packed16
177
  #define A_TYPE_PACKED32 block_q2_K_packed32
178
  #endif
179
 
180
+ #define QUANT_K_Q3_K 256
 
 
181
 
182
  struct block_q3_K
183
  {
184
+ uint8_t hmask[QUANT_K_Q3_K/8];
185
+ uint8_t qs[QUANT_K_Q3_K/4];
186
  uint8_t scales[12];
187
  float16_t d;
188
  };
189
 
190
  struct block_q3_K_packed16
191
  {
192
+ uint16_t hmask[QUANT_K_Q3_K/8/2];
193
+ uint16_t qs[QUANT_K_Q3_K/4/2];
194
  uint16_t scales[12/2];
195
  float16_t d;
196
  };
197
 
198
+ #if defined(DATA_A_Q3_K)
199
+ #define QUANT_K QUANT_K_Q3_K
200
  #define A_TYPE block_q3_K
201
  #define A_TYPE_PACKED16 block_q3_K_packed16
202
  #endif
203
 
204
+ #define QUANT_K_Q4_K 256
 
 
205
 
206
  struct block_q4_K
207
  {
208
  f16vec2 d;
209
+ uint8_t scales[3*QUANT_K_Q4_K/64];
210
+ uint8_t qs[QUANT_K_Q4_K/2];
211
  };
212
 
213
  struct block_q4_K_packed16
214
  {
215
  f16vec2 d;
216
+ uint16_t scales[3*QUANT_K_Q4_K/64/2];
217
+ uint16_t qs[QUANT_K_Q4_K/2/2];
218
  };
219
 
220
  struct block_q4_K_packed32
221
  {
222
  f16vec2 d;
223
+ uint32_t scales[3*QUANT_K_Q4_K/64/4];
224
+ uint32_t qs[QUANT_K_Q4_K/2/4];
225
  };
226
 
227
+ #if defined(DATA_A_Q4_K)
228
+ #define QUANT_K QUANT_K_Q4_K
229
  #define A_TYPE block_q4_K
230
  #define A_TYPE_PACKED16 block_q4_K_packed16
231
  #define A_TYPE_PACKED32 block_q4_K_packed32
232
  #endif
233
 
234
+ #define QUANT_K_Q5_K 256
 
 
235
 
236
  struct block_q5_K
237
  {
238
  f16vec2 d;
239
  uint8_t scales[12];
240
+ uint8_t qh[QUANT_K_Q5_K/8];
241
+ uint8_t qs[QUANT_K_Q5_K/2];
242
  };
243
 
244
  struct block_q5_K_packed16
245
  {
246
  f16vec2 d;
247
  uint16_t scales[12/2];
248
+ uint16_t qh[QUANT_K_Q5_K/8/2];
249
+ uint16_t qs[QUANT_K_Q5_K/2/2];
250
  };
251
 
252
+ #if defined(DATA_A_Q5_K)
253
+ #define QUANT_K QUANT_K_Q5_K
254
  #define A_TYPE block_q5_K
255
  #define A_TYPE_PACKED16 block_q5_K_packed16
256
  #endif
257
 
258
+ #define QUANT_K_Q6_K 256
 
 
259
 
260
  struct block_q6_K
261
  {
262
+ uint8_t ql[QUANT_K_Q6_K/2];
263
+ uint8_t qh[QUANT_K_Q6_K/4];
264
+ int8_t scales[QUANT_K_Q6_K/16];
265
  float16_t d;
266
  };
267
 
268
  struct block_q6_K_packed16
269
  {
270
+ uint16_t ql[QUANT_K_Q6_K/2/2];
271
+ uint16_t qh[QUANT_K_Q6_K/4/2];
272
+ int8_t scales[QUANT_K_Q6_K/16];
273
  float16_t d;
274
  };
275
 
276
+ #if defined(DATA_A_Q6_K)
277
+ #define QUANT_K QUANT_K_Q6_K
278
  #define A_TYPE block_q6_K
279
  #define A_TYPE_PACKED16 block_q6_K_packed16
280
  #endif
281
 
282
  // IQuants
283
 
284
+ #define QUANT_K_IQ4_NL 32
285
+ #define QUANT_R_IQ4_NL 2
 
 
286
 
287
  struct block_iq4_nl
288
  {
289
  float16_t d;
290
+ uint8_t qs[QUANT_K_IQ4_NL/2];
291
  };
292
 
293
  struct block_iq4_nl_packed16
294
  {
295
  float16_t d;
296
+ uint16_t qs[QUANT_K_IQ4_NL/2/2];
297
  };
298
 
299
+ #if defined(DATA_A_IQ4_NL)
 
300
 
301
  const int8_t kvalues_iq4nl_const[16] = {
302
  int8_t(-127), int8_t(-104), int8_t(-83), int8_t(-65), int8_t(-49), int8_t(-35), int8_t(-22), int8_t(-10),
 
313
  }
314
  barrier();
315
  }
316
+
317
+ #define QUANT_K QUANT_K_IQ4_NL
318
+ #define QUANT_R QUANT_R_IQ4_NL
319
+ #define A_TYPE block_iq4_nl
320
+ #define A_TYPE_PACKED16 block_iq4_nl_packed16
321
  #endif
322
 
323
  #endif // !defined(GGML_TYPES_COMP)