49 lines
1.3 KiB
Plaintext
49 lines
1.3 KiB
Plaintext
// Implementation of the parallel prefix sum algorithm
|
|
|
|
const int SIZE = 512;
|
|
|
|
layout(set=0, binding=0) readonly buffer inputs {
|
|
float[] in_data;
|
|
};
|
|
layout(set=0, binding=1) writeonly buffer outputs {
|
|
float[] out_data;
|
|
};
|
|
|
|
workgroup float[SIZE * 2] shared_data;
|
|
|
|
// Test that workgroup-shared variables are passed to user-defined functions
|
|
// correctly.
|
|
noinline void store(uint i, float value) {
|
|
shared_data[i] = value;
|
|
}
|
|
|
|
void main() {
|
|
uint id = sk_GlobalInvocationID.x;
|
|
uint rd_id;
|
|
uint wr_id;
|
|
uint mask;
|
|
|
|
// Each thread is responsible for two elements of the output array
|
|
shared_data[id * 2] = in_data[id * 2];
|
|
shared_data[id * 2 + 1] = in_data[id * 2 + 1];
|
|
|
|
workgroupBarrier();
|
|
|
|
const uint steps = uint(log2(float(SIZE))) + 1;
|
|
for (uint step = 0; step < steps; step++) {
|
|
// Calculate the read and write index in the shared array
|
|
mask = (1 << step) - 1;
|
|
rd_id = ((id >> step) << (step + 1)) + mask;
|
|
wr_id = rd_id + 1 + (id & mask);
|
|
|
|
// Accumulate the read data into our element
|
|
store(wr_id, shared_data[wr_id] + shared_data[rd_id]);
|
|
|
|
workgroupBarrier();
|
|
}
|
|
|
|
// Write the final result out
|
|
out_data[id * 2] = shared_data[id * 2];
|
|
out_data[id * 2 + 1] = shared_data[id * 2 + 1];
|
|
}
|