Spaces:
Running
Running
vulkan : argsort barriers must be under uniform control flow (ggml/951)
Browse filesa return before a barrier (that happens only in some threads in
a workgroup) leads to UB.
While the old code actually works on some devices,
it fails on some others (i.e. "smaller" GPUs).
BTW, I think it would be better to set specialization constants
when the graph is built, in that way the local workgroup
could be sized appropriately.
But it would take a lot of work.
Signed-off-by: Salvatore Mesoraca <[email protected]>
ggml/src/vulkan-shaders/argsort.comp
CHANGED
|
@@ -29,20 +29,18 @@ void main() {
|
|
| 29 |
const int col = int(gl_LocalInvocationID.x);
|
| 30 |
const uint row = gl_WorkGroupID.y;
|
| 31 |
|
| 32 |
-
if (col >= p.ncols_pad) {
|
| 33 |
-
return;
|
| 34 |
-
}
|
| 35 |
-
|
| 36 |
const uint row_offset = row * p.ncols;
|
| 37 |
|
| 38 |
// initialize indices
|
| 39 |
-
|
|
|
|
|
|
|
| 40 |
barrier();
|
| 41 |
|
| 42 |
for (uint k = 2; k <= p.ncols_pad; k *= 2) {
|
| 43 |
for (uint j = k / 2; j > 0; j /= 2) {
|
| 44 |
const uint ixj = col ^ j;
|
| 45 |
-
if (ixj > col) {
|
| 46 |
if ((col & k) == 0) {
|
| 47 |
if (dst_row[col] >= p.ncols ||
|
| 48 |
(dst_row[ixj] < p.ncols && (p.order == ASC ?
|
|
|
|
| 29 |
const int col = int(gl_LocalInvocationID.x);
|
| 30 |
const uint row = gl_WorkGroupID.y;
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
const uint row_offset = row * p.ncols;
|
| 33 |
|
| 34 |
// initialize indices
|
| 35 |
+
if (col < p.ncols_pad) {
|
| 36 |
+
dst_row[col] = col;
|
| 37 |
+
}
|
| 38 |
barrier();
|
| 39 |
|
| 40 |
for (uint k = 2; k <= p.ncols_pad; k *= 2) {
|
| 41 |
for (uint j = k / 2; j > 0; j /= 2) {
|
| 42 |
const uint ixj = col ^ j;
|
| 43 |
+
if (col < p.ncols_pad && ixj > col) {
|
| 44 |
if ((col & k) == 0) {
|
| 45 |
if (dst_row[col] >= p.ncols ||
|
| 46 |
(dst_row[ixj] < p.ncols && (p.order == ASC ?
|