40 lines
1.6 KiB
Metal
40 lines
1.6 KiB
Metal
|
|
#include <metal_stdlib>
|
||
|
|
#include <simd/simd.h>
|
||
|
|
using namespace metal;
|
||
|
|
struct GlobalCounts {
|
||
|
|
atomic_uint firstHalfCount;
|
||
|
|
atomic_uint secondHalfCount;
|
||
|
|
};
|
||
|
|
struct Inputs {
|
||
|
|
uint3 sk_LocalInvocationID;
|
||
|
|
};
|
||
|
|
struct ssbo {
|
||
|
|
GlobalCounts globalCounts;
|
||
|
|
};
|
||
|
|
struct Globals {
|
||
|
|
device ssbo* _anonInterface0;
|
||
|
|
};
|
||
|
|
struct Threadgroups {
|
||
|
|
array<atomic_uint, 2> localCounts;
|
||
|
|
};
|
||
|
|
kernel void computeMain(uint3 sk_LocalInvocationID [[thread_position_in_threadgroup]], device ssbo& _anonInterface0 [[buffer(0)]]) {
|
||
|
|
Globals _globals{&_anonInterface0};
|
||
|
|
(void)_globals;
|
||
|
|
threadgroup Threadgroups _threadgroups{{}};
|
||
|
|
(void)_threadgroups;
|
||
|
|
Inputs _in = { sk_LocalInvocationID };
|
||
|
|
if (_in.sk_LocalInvocationID.x == 0u) {
|
||
|
|
atomic_store_explicit(&_threadgroups.localCounts[0], 0u, memory_order_relaxed);
|
||
|
|
atomic_store_explicit(&_threadgroups.localCounts[1], 0u, memory_order_relaxed);
|
||
|
|
}
|
||
|
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||
|
|
uint idx = uint(_in.sk_LocalInvocationID.x < 512u ? 0 : 1);
|
||
|
|
atomic_fetch_add_explicit(&_threadgroups.localCounts[idx], 1u, memory_order_relaxed);
|
||
|
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||
|
|
if (_in.sk_LocalInvocationID.x == 0u) {
|
||
|
|
atomic_fetch_add_explicit(&_globals._anonInterface0->globalCounts.firstHalfCount, atomic_load_explicit(&_threadgroups.localCounts[0], memory_order_relaxed), memory_order_relaxed);
|
||
|
|
atomic_fetch_add_explicit(&_globals._anonInterface0->globalCounts.secondHalfCount, atomic_load_explicit(&_threadgroups.localCounts[1], memory_order_relaxed), memory_order_relaxed);
|
||
|
|
}
|
||
|
|
return;
|
||
|
|
}
|