#version 450 core

layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

#define EMPTY_KEY 0xFFFFFFFF
#define MAX_STEPS 8192

struct entry
{
    uint Key;
    uint PackedNormal;
    uint PackedColour;
};

layout (std430, binding = 0) restrict buffer htable_out
{
    entry Entries[];
} HTableOutBuffer;

layout (std430, binding = 1) readonly restrict buffer data_in
{
    entry Entries[];
} DataInputBuffer;

layout (location = 2) uniform uint TableSizeUniform;
layout (location = 3) uniform uint OffsetUniform;

uvec4 ComputeHashes(uint Key)
{
    const uvec4 SizeMod = uvec4((TableSizeUniform - 1) >> 2);
    const uvec4 SlotOffsets = (uvec4((TableSizeUniform - 1) >> 2) * uvec4(0, 1, 2, 3));
    const uvec4 S0 = uvec4(16, 18, 17, 16);
    const uvec4 S1 = uvec4(15, 16, 15, 16);
    const uvec4 S2 = uvec4(16, 17, 16, 15);

    const uvec4 C0 = uvec4(0x7feb352d, 0xa136aaad, 0x24f4d2cd, 0xe2d0d4cb);
    const uvec4 C1 = uvec4(0x846ca68b, 0x9f6d62d7, 0x1ba3b969, 0x3c6ad939);

    uvec4 K = uvec4(Key);
    K ^= K >> S0;
    K *= C0;
    K ^= K >> S1;
    K *= C1;
    K ^= K >> S2;
    return K % (TableSizeUniform - 1);
    //return (K % SizeMod) + SlotOffsets;
}

void main()
{
    uint ThreadID = gl_GlobalInvocationID.x;
    entry InputPair = DataInputBuffer.Entries[OffsetUniform + ThreadID];

    uint Slot = ComputeHashes(InputPair.Key).x;

    int Step;
    for (Step = 0; Step < MAX_STEPS; ++Step)
    {
        InputPair.Key = atomicExchange(HTableOutBuffer.Entries[Slot].Key, InputPair.Key);
        InputPair.PackedNormal = atomicExchange(HTableOutBuffer.Entries[Slot].PackedNormal, InputPair.PackedNormal);
        InputPair.PackedColour = atomicExchange(HTableOutBuffer.Entries[Slot].PackedColour, InputPair.PackedColour);
        if (EMPTY_KEY != InputPair.Key)
        {
            uvec4 Hashes = ComputeHashes(InputPair.Key);

            if (Slot == Hashes.x) Slot = Hashes.y;
            else if (Slot == Hashes.y) Slot = Hashes.z;
            else if (Slot == Hashes.z) Slot = Hashes.w;
            else if (Slot == Hashes.w) Slot = Hashes.x;
        }
        else
        {
            break;
        }
    }

}