Spaces:
Running
Running
Commit
·
c31abdb
1
Parent(s):
50a2978
vulkan: copy iq4_nl LUT into shared memory (llama/10409)
Browse files- ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +2 -0
- ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +4 -0
- ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +4 -0
- ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +4 -0
- ggml/src/ggml-vulkan/vulkan-shaders/types.comp +12 -1
- ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +3 -3
ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp
CHANGED
|
@@ -10,6 +10,8 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_b[];};
|
|
| 10 |
void main() {
|
| 11 |
const uint i = gl_WorkGroupID.x * 4 + gl_LocalInvocationID.x / 64;
|
| 12 |
|
|
|
|
|
|
|
| 13 |
const uint tid = gl_LocalInvocationID.x % 64;
|
| 14 |
const uint il = tid/32;
|
| 15 |
const uint ir = tid%32;
|
|
|
|
| 10 |
void main() {
|
| 11 |
const uint i = gl_WorkGroupID.x * 4 + gl_LocalInvocationID.x / 64;
|
| 12 |
|
| 13 |
+
init_iq4nl_shmem();
|
| 14 |
+
|
| 15 |
const uint tid = gl_LocalInvocationID.x % 64;
|
| 16 |
const uint il = tid/32;
|
| 17 |
const uint ir = tid%32;
|
ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp
CHANGED
|
@@ -12,6 +12,10 @@ void main() {
|
|
| 12 |
const uint i11 = (gl_GlobalInvocationID.z)/p.ne12;
|
| 13 |
const uint i12 = (gl_GlobalInvocationID.z)%p.ne12;
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
if (i00 >= p.ne00) {
|
| 16 |
return;
|
| 17 |
}
|
|
|
|
| 12 |
const uint i11 = (gl_GlobalInvocationID.z)/p.ne12;
|
| 13 |
const uint i12 = (gl_GlobalInvocationID.z)%p.ne12;
|
| 14 |
|
| 15 |
+
#if defined(DATA_A_IQ4_NL)
|
| 16 |
+
init_iq4nl_shmem();
|
| 17 |
+
#endif
|
| 18 |
+
|
| 19 |
if (i00 >= p.ne00) {
|
| 20 |
return;
|
| 21 |
}
|
ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp
CHANGED
|
@@ -161,6 +161,10 @@ void compute_outputs(const uint32_t first_row, const uint32_t num_rows) {
|
|
| 161 |
void main() {
|
| 162 |
const uint first_row = NUM_ROWS * (gl_WorkGroupID.x + gl_NumWorkGroups.x * gl_WorkGroupID.z);
|
| 163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
// do NUM_ROWS at a time, unless there aren't enough remaining rows
|
| 165 |
if (first_row + NUM_ROWS <= p.stride_d) {
|
| 166 |
compute_outputs(first_row, NUM_ROWS);
|
|
|
|
| 161 |
void main() {
|
| 162 |
const uint first_row = NUM_ROWS * (gl_WorkGroupID.x + gl_NumWorkGroups.x * gl_WorkGroupID.z);
|
| 163 |
|
| 164 |
+
#if defined(DATA_A_IQ4_NL)
|
| 165 |
+
init_iq4nl_shmem();
|
| 166 |
+
#endif
|
| 167 |
+
|
| 168 |
// do NUM_ROWS at a time, unless there aren't enough remaining rows
|
| 169 |
if (first_row + NUM_ROWS <= p.stride_d) {
|
| 170 |
compute_outputs(first_row, NUM_ROWS);
|
ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp
CHANGED
|
@@ -75,6 +75,10 @@ shared u16vec2 row_ids[3072];
|
|
| 75 |
#endif
|
| 76 |
|
| 77 |
void main() {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
#ifdef MUL_MAT_ID
|
| 79 |
const uint expert_idx = gl_GlobalInvocationID.z;
|
| 80 |
#else
|
|
|
|
| 75 |
#endif
|
| 76 |
|
| 77 |
void main() {
|
| 78 |
+
#if defined(DATA_A_IQ4_NL)
|
| 79 |
+
init_iq4nl_shmem();
|
| 80 |
+
#endif
|
| 81 |
+
|
| 82 |
#ifdef MUL_MAT_ID
|
| 83 |
const uint expert_idx = gl_GlobalInvocationID.z;
|
| 84 |
#else
|
ggml/src/ggml-vulkan/vulkan-shaders/types.comp
CHANGED
|
@@ -298,10 +298,21 @@ struct block_iq4_nl_packed16
|
|
| 298 |
#define A_TYPE block_iq4_nl
|
| 299 |
#define A_TYPE_PACKED16 block_iq4_nl_packed16
|
| 300 |
|
| 301 |
-
const int8_t
|
| 302 |
int8_t(-127), int8_t(-104), int8_t(-83), int8_t(-65), int8_t(-49), int8_t(-35), int8_t(-22), int8_t(-10),
|
| 303 |
int8_t(1), int8_t(13), int8_t(25), int8_t(38), int8_t(53), int8_t(69), int8_t(89), int8_t(113)
|
| 304 |
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
#endif
|
| 306 |
|
| 307 |
#endif // !defined(GGML_TYPES_COMP)
|
|
|
|
| 298 |
#define A_TYPE block_iq4_nl
|
| 299 |
#define A_TYPE_PACKED16 block_iq4_nl_packed16
|
| 300 |
|
| 301 |
+
const int8_t kvalues_iq4nl_const[16] = {
|
| 302 |
int8_t(-127), int8_t(-104), int8_t(-83), int8_t(-65), int8_t(-49), int8_t(-35), int8_t(-22), int8_t(-10),
|
| 303 |
int8_t(1), int8_t(13), int8_t(25), int8_t(38), int8_t(53), int8_t(69), int8_t(89), int8_t(113)
|
| 304 |
};
|
| 305 |
+
|
| 306 |
+
shared FLOAT_TYPE kvalues_iq4nl[16];
|
| 307 |
+
|
| 308 |
+
void init_iq4nl_shmem()
|
| 309 |
+
{
|
| 310 |
+
// copy the table into shared memory and sync
|
| 311 |
+
if (gl_LocalInvocationIndex.x < 16) {
|
| 312 |
+
kvalues_iq4nl[gl_LocalInvocationIndex.x] = FLOAT_TYPE(kvalues_iq4nl_const[gl_LocalInvocationIndex.x]);
|
| 313 |
+
}
|
| 314 |
+
barrier();
|
| 315 |
+
}
|
| 316 |
#endif
|
| 317 |
|
| 318 |
#endif // !defined(GGML_TYPES_COMP)
|
ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
CHANGED
|
@@ -331,11 +331,11 @@ void process_shaders() {
|
|
| 331 |
shader = (tname == "f32" || tname == "f16") ? "get_rows.comp" : "get_rows_quant.comp";
|
| 332 |
|
| 333 |
if (tname == "f16") {
|
| 334 |
-
string_to_spv("get_rows_" + tname, shader, {{data_a_key, "1"}, {"B_TYPE", "int"}, {"D_TYPE", "float16_t"}, {"OPTIMIZATION_ERROR_WORKAROUND", "1"}});
|
| 335 |
} else {
|
| 336 |
-
string_to_spv("get_rows_" + tname, shader, {{data_a_key, "1"}, {"B_TYPE", "int"}, {"D_TYPE", "float16_t"}});
|
| 337 |
}
|
| 338 |
-
string_to_spv("get_rows_" + tname + "_f32", shader, {{data_a_key, "1"}, {"B_TYPE", "int"}, {"D_TYPE", "float"}});
|
| 339 |
}
|
| 340 |
}
|
| 341 |
|
|
|
|
| 331 |
shader = (tname == "f32" || tname == "f16") ? "get_rows.comp" : "get_rows_quant.comp";
|
| 332 |
|
| 333 |
if (tname == "f16") {
|
| 334 |
+
string_to_spv("get_rows_" + tname, shader, merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "int"}, {"D_TYPE", "float16_t"}, {"OPTIMIZATION_ERROR_WORKAROUND", "1"}}));
|
| 335 |
} else {
|
| 336 |
+
string_to_spv("get_rows_" + tname, shader, merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "int"}, {"D_TYPE", "float16_t"}}));
|
| 337 |
}
|
| 338 |
+
string_to_spv("get_rows_" + tname + "_f32", shader, merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "int"}, {"D_TYPE", "float"}}));
|
| 339 |
}
|
| 340 |
}
|
| 341 |
|