981 lines
34 KiB
Plaintext
981 lines
34 KiB
Plaintext
// Copyright 2019 The Android Open Source Project
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// For implementation details, please refer to:
|
|
// https://www.khronos.org/registry/OpenGL/extensions/KHR/KHR_texture_compression_astc_hdr.txt
|
|
|
|
// Please refer to this document for operator precendence (slightly different from C):
|
|
// https://www.khronos.org/registry/OpenGL/specs/gl/GLSLangSpec.4.60.html#operators
|
|
|
|
#version 450
|
|
|
|
precision highp int;
|
|
|
|
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
|
|
|
|
layout(push_constant) uniform ImageFormatBlock {
|
|
uvec2 blockSize;
|
|
uint compFormat;
|
|
uint baseLayer;
|
|
uint sRGB;
|
|
uint smallBlock;
|
|
}
|
|
u_pushConstant;
|
|
|
|
layout(binding = 0, rgba32ui) readonly uniform uimage${type} u_image0;
|
|
layout(binding = 1, rgba8ui) writeonly uniform uimage${type} u_image1;
|
|
|
|
${UnquantTables}
|
|
|
|
// HDR CEM: 2, 3, 7, 11, 14, 15
|
|
|
|
const bool kHDRCEM[16] = {
|
|
false, false, true, true, false, false, false, true,
|
|
false, false, false, true, false, false, true, true,
|
|
};
|
|
|
|
// Encoding table for C.2.12
|
|
|
|
const uint kTritEncodings[256][5] = {
|
|
{0, 0, 0, 0, 0}, {1, 0, 0, 0, 0}, {2, 0, 0, 0, 0}, {0, 0, 2, 0, 0},
|
|
{0, 1, 0, 0, 0}, {1, 1, 0, 0, 0}, {2, 1, 0, 0, 0}, {1, 0, 2, 0, 0},
|
|
{0, 2, 0, 0, 0}, {1, 2, 0, 0, 0}, {2, 2, 0, 0, 0}, {2, 0, 2, 0, 0},
|
|
{0, 2, 2, 0, 0}, {1, 2, 2, 0, 0}, {2, 2, 2, 0, 0}, {2, 0, 2, 0, 0},
|
|
{0, 0, 1, 0, 0}, {1, 0, 1, 0, 0}, {2, 0, 1, 0, 0}, {0, 1, 2, 0, 0},
|
|
{0, 1, 1, 0, 0}, {1, 1, 1, 0, 0}, {2, 1, 1, 0, 0}, {1, 1, 2, 0, 0},
|
|
{0, 2, 1, 0, 0}, {1, 2, 1, 0, 0}, {2, 2, 1, 0, 0}, {2, 1, 2, 0, 0},
|
|
{0, 0, 0, 2, 2}, {1, 0, 0, 2, 2}, {2, 0, 0, 2, 2}, {0, 0, 2, 2, 2},
|
|
{0, 0, 0, 1, 0}, {1, 0, 0, 1, 0}, {2, 0, 0, 1, 0}, {0, 0, 2, 1, 0},
|
|
{0, 1, 0, 1, 0}, {1, 1, 0, 1, 0}, {2, 1, 0, 1, 0}, {1, 0, 2, 1, 0},
|
|
{0, 2, 0, 1, 0}, {1, 2, 0, 1, 0}, {2, 2, 0, 1, 0}, {2, 0, 2, 1, 0},
|
|
{0, 2, 2, 1, 0}, {1, 2, 2, 1, 0}, {2, 2, 2, 1, 0}, {2, 0, 2, 1, 0},
|
|
{0, 0, 1, 1, 0}, {1, 0, 1, 1, 0}, {2, 0, 1, 1, 0}, {0, 1, 2, 1, 0},
|
|
{0, 1, 1, 1, 0}, {1, 1, 1, 1, 0}, {2, 1, 1, 1, 0}, {1, 1, 2, 1, 0},
|
|
{0, 2, 1, 1, 0}, {1, 2, 1, 1, 0}, {2, 2, 1, 1, 0}, {2, 1, 2, 1, 0},
|
|
{0, 1, 0, 2, 2}, {1, 1, 0, 2, 2}, {2, 1, 0, 2, 2}, {1, 0, 2, 2, 2},
|
|
{0, 0, 0, 2, 0}, {1, 0, 0, 2, 0}, {2, 0, 0, 2, 0}, {0, 0, 2, 2, 0},
|
|
{0, 1, 0, 2, 0}, {1, 1, 0, 2, 0}, {2, 1, 0, 2, 0}, {1, 0, 2, 2, 0},
|
|
{0, 2, 0, 2, 0}, {1, 2, 0, 2, 0}, {2, 2, 0, 2, 0}, {2, 0, 2, 2, 0},
|
|
{0, 2, 2, 2, 0}, {1, 2, 2, 2, 0}, {2, 2, 2, 2, 0}, {2, 0, 2, 2, 0},
|
|
{0, 0, 1, 2, 0}, {1, 0, 1, 2, 0}, {2, 0, 1, 2, 0}, {0, 1, 2, 2, 0},
|
|
{0, 1, 1, 2, 0}, {1, 1, 1, 2, 0}, {2, 1, 1, 2, 0}, {1, 1, 2, 2, 0},
|
|
{0, 2, 1, 2, 0}, {1, 2, 1, 2, 0}, {2, 2, 1, 2, 0}, {2, 1, 2, 2, 0},
|
|
{0, 2, 0, 2, 2}, {1, 2, 0, 2, 2}, {2, 2, 0, 2, 2}, {2, 0, 2, 2, 2},
|
|
{0, 0, 0, 0, 2}, {1, 0, 0, 0, 2}, {2, 0, 0, 0, 2}, {0, 0, 2, 0, 2},
|
|
{0, 1, 0, 0, 2}, {1, 1, 0, 0, 2}, {2, 1, 0, 0, 2}, {1, 0, 2, 0, 2},
|
|
{0, 2, 0, 0, 2}, {1, 2, 0, 0, 2}, {2, 2, 0, 0, 2}, {2, 0, 2, 0, 2},
|
|
{0, 2, 2, 0, 2}, {1, 2, 2, 0, 2}, {2, 2, 2, 0, 2}, {2, 0, 2, 0, 2},
|
|
{0, 0, 1, 0, 2}, {1, 0, 1, 0, 2}, {2, 0, 1, 0, 2}, {0, 1, 2, 0, 2},
|
|
{0, 1, 1, 0, 2}, {1, 1, 1, 0, 2}, {2, 1, 1, 0, 2}, {1, 1, 2, 0, 2},
|
|
{0, 2, 1, 0, 2}, {1, 2, 1, 0, 2}, {2, 2, 1, 0, 2}, {2, 1, 2, 0, 2},
|
|
{0, 2, 2, 2, 2}, {1, 2, 2, 2, 2}, {2, 2, 2, 2, 2}, {2, 0, 2, 2, 2},
|
|
{0, 0, 0, 0, 1}, {1, 0, 0, 0, 1}, {2, 0, 0, 0, 1}, {0, 0, 2, 0, 1},
|
|
{0, 1, 0, 0, 1}, {1, 1, 0, 0, 1}, {2, 1, 0, 0, 1}, {1, 0, 2, 0, 1},
|
|
{0, 2, 0, 0, 1}, {1, 2, 0, 0, 1}, {2, 2, 0, 0, 1}, {2, 0, 2, 0, 1},
|
|
{0, 2, 2, 0, 1}, {1, 2, 2, 0, 1}, {2, 2, 2, 0, 1}, {2, 0, 2, 0, 1},
|
|
{0, 0, 1, 0, 1}, {1, 0, 1, 0, 1}, {2, 0, 1, 0, 1}, {0, 1, 2, 0, 1},
|
|
{0, 1, 1, 0, 1}, {1, 1, 1, 0, 1}, {2, 1, 1, 0, 1}, {1, 1, 2, 0, 1},
|
|
{0, 2, 1, 0, 1}, {1, 2, 1, 0, 1}, {2, 2, 1, 0, 1}, {2, 1, 2, 0, 1},
|
|
{0, 0, 1, 2, 2}, {1, 0, 1, 2, 2}, {2, 0, 1, 2, 2}, {0, 1, 2, 2, 2},
|
|
{0, 0, 0, 1, 1}, {1, 0, 0, 1, 1}, {2, 0, 0, 1, 1}, {0, 0, 2, 1, 1},
|
|
{0, 1, 0, 1, 1}, {1, 1, 0, 1, 1}, {2, 1, 0, 1, 1}, {1, 0, 2, 1, 1},
|
|
{0, 2, 0, 1, 1}, {1, 2, 0, 1, 1}, {2, 2, 0, 1, 1}, {2, 0, 2, 1, 1},
|
|
{0, 2, 2, 1, 1}, {1, 2, 2, 1, 1}, {2, 2, 2, 1, 1}, {2, 0, 2, 1, 1},
|
|
{0, 0, 1, 1, 1}, {1, 0, 1, 1, 1}, {2, 0, 1, 1, 1}, {0, 1, 2, 1, 1},
|
|
{0, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {2, 1, 1, 1, 1}, {1, 1, 2, 1, 1},
|
|
{0, 2, 1, 1, 1}, {1, 2, 1, 1, 1}, {2, 2, 1, 1, 1}, {2, 1, 2, 1, 1},
|
|
{0, 1, 1, 2, 2}, {1, 1, 1, 2, 2}, {2, 1, 1, 2, 2}, {1, 1, 2, 2, 2},
|
|
{0, 0, 0, 2, 1}, {1, 0, 0, 2, 1}, {2, 0, 0, 2, 1}, {0, 0, 2, 2, 1},
|
|
{0, 1, 0, 2, 1}, {1, 1, 0, 2, 1}, {2, 1, 0, 2, 1}, {1, 0, 2, 2, 1},
|
|
{0, 2, 0, 2, 1}, {1, 2, 0, 2, 1}, {2, 2, 0, 2, 1}, {2, 0, 2, 2, 1},
|
|
{0, 2, 2, 2, 1}, {1, 2, 2, 2, 1}, {2, 2, 2, 2, 1}, {2, 0, 2, 2, 1},
|
|
{0, 0, 1, 2, 1}, {1, 0, 1, 2, 1}, {2, 0, 1, 2, 1}, {0, 1, 2, 2, 1},
|
|
{0, 1, 1, 2, 1}, {1, 1, 1, 2, 1}, {2, 1, 1, 2, 1}, {1, 1, 2, 2, 1},
|
|
{0, 2, 1, 2, 1}, {1, 2, 1, 2, 1}, {2, 2, 1, 2, 1}, {2, 1, 2, 2, 1},
|
|
{0, 2, 1, 2, 2}, {1, 2, 1, 2, 2}, {2, 2, 1, 2, 2}, {2, 1, 2, 2, 2},
|
|
{0, 0, 0, 1, 2}, {1, 0, 0, 1, 2}, {2, 0, 0, 1, 2}, {0, 0, 2, 1, 2},
|
|
{0, 1, 0, 1, 2}, {1, 1, 0, 1, 2}, {2, 1, 0, 1, 2}, {1, 0, 2, 1, 2},
|
|
{0, 2, 0, 1, 2}, {1, 2, 0, 1, 2}, {2, 2, 0, 1, 2}, {2, 0, 2, 1, 2},
|
|
{0, 2, 2, 1, 2}, {1, 2, 2, 1, 2}, {2, 2, 2, 1, 2}, {2, 0, 2, 1, 2},
|
|
{0, 0, 1, 1, 2}, {1, 0, 1, 1, 2}, {2, 0, 1, 1, 2}, {0, 1, 2, 1, 2},
|
|
{0, 1, 1, 1, 2}, {1, 1, 1, 1, 2}, {2, 1, 1, 1, 2}, {1, 1, 2, 1, 2},
|
|
{0, 2, 1, 1, 2}, {1, 2, 1, 1, 2}, {2, 2, 1, 1, 2}, {2, 1, 2, 1, 2},
|
|
{0, 2, 2, 2, 2}, {1, 2, 2, 2, 2}, {2, 2, 2, 2, 2}, {2, 1, 2, 2, 2},
|
|
};
|
|
|
|
const uint kQuintEncodings[128][3] = {
|
|
{0, 0, 0}, {1, 0, 0}, {2, 0, 0}, {3, 0, 0}, {4, 0, 0}, {0, 4, 0},
|
|
{4, 4, 0}, {4, 4, 4}, {0, 1, 0}, {1, 1, 0}, {2, 1, 0}, {3, 1, 0},
|
|
{4, 1, 0}, {1, 4, 0}, {4, 4, 1}, {4, 4, 4}, {0, 2, 0}, {1, 2, 0},
|
|
{2, 2, 0}, {3, 2, 0}, {4, 2, 0}, {2, 4, 0}, {4, 4, 2}, {4, 4, 4},
|
|
{0, 3, 0}, {1, 3, 0}, {2, 3, 0}, {3, 3, 0}, {4, 3, 0}, {3, 4, 0},
|
|
{4, 4, 3}, {4, 4, 4}, {0, 0, 1}, {1, 0, 1}, {2, 0, 1}, {3, 0, 1},
|
|
{4, 0, 1}, {0, 4, 1}, {4, 0, 4}, {0, 4, 4}, {0, 1, 1}, {1, 1, 1},
|
|
{2, 1, 1}, {3, 1, 1}, {4, 1, 1}, {1, 4, 1}, {4, 1, 4}, {1, 4, 4},
|
|
{0, 2, 1}, {1, 2, 1}, {2, 2, 1}, {3, 2, 1}, {4, 2, 1}, {2, 4, 1},
|
|
{4, 2, 4}, {2, 4, 4}, {0, 3, 1}, {1, 3, 1}, {2, 3, 1}, {3, 3, 1},
|
|
{4, 3, 1}, {3, 4, 1}, {4, 3, 4}, {3, 4, 4}, {0, 0, 2}, {1, 0, 2},
|
|
{2, 0, 2}, {3, 0, 2}, {4, 0, 2}, {0, 4, 2}, {2, 0, 4}, {3, 0, 4},
|
|
{0, 1, 2}, {1, 1, 2}, {2, 1, 2}, {3, 1, 2}, {4, 1, 2}, {1, 4, 2},
|
|
{2, 1, 4}, {3, 1, 4}, {0, 2, 2}, {1, 2, 2}, {2, 2, 2}, {3, 2, 2},
|
|
{4, 2, 2}, {2, 4, 2}, {2, 2, 4}, {3, 2, 4}, {0, 3, 2}, {1, 3, 2},
|
|
{2, 3, 2}, {3, 3, 2}, {4, 3, 2}, {3, 4, 2}, {2, 3, 4}, {3, 3, 4},
|
|
{0, 0, 3}, {1, 0, 3}, {2, 0, 3}, {3, 0, 3}, {4, 0, 3}, {0, 4, 3},
|
|
{0, 0, 4}, {1, 0, 4}, {0, 1, 3}, {1, 1, 3}, {2, 1, 3}, {3, 1, 3},
|
|
{4, 1, 3}, {1, 4, 3}, {0, 1, 4}, {1, 1, 4}, {0, 2, 3}, {1, 2, 3},
|
|
{2, 2, 3}, {3, 2, 3}, {4, 2, 3}, {2, 4, 3}, {0, 2, 4}, {1, 2, 4},
|
|
{0, 3, 3}, {1, 3, 3}, {2, 3, 3}, {3, 3, 3}, {4, 3, 3}, {3, 4, 3},
|
|
{0, 3, 4}, {1, 3, 4}};
|
|
|
|
const int kRQuantParamTableLength = 19;
|
|
// T, Q, B values in Table c.2.16, including binaries, in reversed order
|
|
const uint kRQuantParamTable[kRQuantParamTableLength][3] = {
|
|
{0, 0, 8}, // 255
|
|
{1, 0, 6}, // 191
|
|
{0, 1, 5}, // 159
|
|
{0, 0, 7}, // 127
|
|
{1, 0, 5}, // 95
|
|
{0, 1, 4}, // 79
|
|
{0, 0, 6}, // 63
|
|
{1, 0, 4}, // 47
|
|
{0, 1, 3}, // 39
|
|
{0, 0, 5}, // 31
|
|
{1, 0, 3}, // 23
|
|
{0, 1, 2}, // 19
|
|
{0, 0, 4}, // 15
|
|
{1, 0, 2}, // 11
|
|
{0, 1, 1}, // 9
|
|
{0, 0, 3}, // 7
|
|
{1, 0, 1}, // 5
|
|
//{0, 1, 0}, // 4
|
|
{0, 0, 2}, // 3
|
|
//{1, 0, 0}, // 2
|
|
{0, 0, 1}, // 1
|
|
};
|
|
|
|
uint bit(uint u, int bit) {
|
|
return (u >> bit) & 1;
|
|
}
|
|
|
|
uint bits128(uvec4 u, uint bitStart, uint bitCount) {
|
|
uint firstIdx = bitStart / 32;
|
|
uint firstOffset = bitStart % 32;
|
|
uint bitMask = (1 << bitCount) - 1;
|
|
if (firstIdx == ((bitStart + bitCount - 1) / 32)) {
|
|
return (u[3 - firstIdx] >> firstOffset) & bitMask;
|
|
} else {
|
|
uint firstCount = 32 - firstOffset;
|
|
uint ret = u[3 - firstIdx - 1] << firstCount;
|
|
ret |= ((u[3 - firstIdx] >> firstOffset) & ((1 << firstCount) - 1));
|
|
return ret & bitMask;
|
|
}
|
|
}
|
|
|
|
uint bits128fillZeros(uvec4 u, uint bitStart, uint bitEnd, uint bitCount) {
|
|
if (bitEnd <= bitStart) {
|
|
return 0;
|
|
}
|
|
return bits128(u, bitStart, min(bitEnd - bitStart, bitCount));
|
|
}
|
|
|
|
uint get_bit_count(uint num_vals, uint trits, uint quints, uint bits) {
|
|
// See section C.2.22 for the formula used here.
|
|
uint trit_bit_count = ((num_vals * 8 * trits) + 4) / 5;
|
|
uint quint_bit_count = ((num_vals * 7 * quints) + 2) / 3;
|
|
uint base_bit_count = num_vals * bits;
|
|
return trit_bit_count + quint_bit_count + base_bit_count;
|
|
}
|
|
|
|
void get_pack_size(uint trits,
|
|
uint quints,
|
|
uint bits,
|
|
out uint pack,
|
|
out uint packedSize) {
|
|
if (trits == 1) {
|
|
pack = 5;
|
|
packedSize = 8 + 5 * bits;
|
|
} else if (quints == 1) {
|
|
pack = 3;
|
|
packedSize = 7 + 3 * bits;
|
|
} else {
|
|
pack = 1;
|
|
packedSize = bits;
|
|
}
|
|
}
|
|
|
|
uint[5] decode_trit(uvec4 data, uint start, uint end, uint n) {
|
|
// We either have three quints or five trits
|
|
const int kNumVals = 5;
|
|
const int kInterleavedBits[5] = {2, 2, 1, 2, 1};
|
|
|
|
// Decode the block
|
|
uint m[kNumVals];
|
|
uint encoded = 0;
|
|
uint encoded_bits_read = 0;
|
|
for (int i = 0; i < kNumVals; ++i) {
|
|
m[i] = bits128fillZeros(data, start, end, n);
|
|
start += n;
|
|
|
|
uint encoded_bits =
|
|
bits128fillZeros(data, start, end, kInterleavedBits[i]);
|
|
start += kInterleavedBits[i];
|
|
encoded |= encoded_bits << encoded_bits_read;
|
|
encoded_bits_read += kInterleavedBits[i];
|
|
}
|
|
|
|
uint[kNumVals] result;
|
|
for (int i = 0; i < kNumVals; ++i) {
|
|
result[i] = kTritEncodings[encoded][i] << n | m[i];
|
|
}
|
|
return result;
|
|
}
|
|
|
|
uint[3] decode_quint(uvec4 data, uint start, uint end, uint n) {
|
|
// We either have three quints or five trits
|
|
const int kNumVals = 3;
|
|
const int kInterleavedBits[3] = {3, 2, 2};
|
|
|
|
// Decode the block
|
|
uint m[kNumVals];
|
|
uint encoded = 0;
|
|
uint encoded_bits_read = 0;
|
|
uint bitMask = (1 << n) - 1;
|
|
for (int i = 0; i < kNumVals; ++i) {
|
|
m[i] = bits128fillZeros(data, start, end, n);
|
|
start += n;
|
|
|
|
uint encoded_bits =
|
|
bits128fillZeros(data, start, end, kInterleavedBits[i]);
|
|
start += kInterleavedBits[i];
|
|
encoded |= encoded_bits << encoded_bits_read;
|
|
encoded_bits_read += kInterleavedBits[i];
|
|
}
|
|
|
|
uint[kNumVals] result;
|
|
for (int i = 0; i < kNumVals; ++i) {
|
|
result[i] = kQuintEncodings[encoded][i] << n | m[i];
|
|
}
|
|
return result;
|
|
}
|
|
|
|
uint get_v_count(uint cem) {
|
|
return (cem / 4 + 1) * 2;
|
|
}
|
|
|
|
const uint kLDRLumaDirect = 0;
|
|
const uint kLDRLumaBaseOffset = 1;
|
|
const uint kHDRLumaLargeRange = 2;
|
|
const uint kHDRLumaSmallRange = 3;
|
|
const uint kLDRLumaAlphaDirect = 4;
|
|
const uint kLDRLumaAlphaBaseOffset = 5;
|
|
const uint kLDRRGBBaseScale = 6;
|
|
const uint kHDRRGBBaseScale = 7;
|
|
const uint kLDRRGBDirect = 8;
|
|
const uint kLDRRGBBaseOffset = 9;
|
|
const uint kLDRRGBBaseScaleTwoA = 10;
|
|
const uint kHDRRGBDirect = 11;
|
|
const uint kLDRRGBADirect = 12;
|
|
const uint kLDRRGBABaseOffset = 13;
|
|
const uint kHDRRGBDirectLDRAlpha = 14;
|
|
const uint kHDRRGBDirectHDRAlpha = 15;
|
|
|
|
void swap(inout ivec4 v1, inout ivec4 v2) {
|
|
ivec4 tmp = v1;
|
|
v1 = v2;
|
|
v2 = tmp;
|
|
}
|
|
|
|
void bit_transfer_signed(inout int a, inout int b) {
|
|
b >>= 1;
|
|
b |= (a & 0x80);
|
|
a >>= 1;
|
|
a &= 0x3F;
|
|
if ((a & 0x20) != 0)
|
|
a -= 0x40;
|
|
}
|
|
|
|
void blue_contract(inout ivec4 val) {
|
|
val.r = (val.r + val.b) / 2;
|
|
val.g = (val.g + val.b) / 2;
|
|
}
|
|
|
|
void decode_ldr_for_mode(const uint[40] vals,
|
|
uint start_idx,
|
|
uint mode,
|
|
out uvec4 c1,
|
|
out uvec4 c2) {
|
|
int v0 = int(vals[start_idx + 0]);
|
|
int v1 = int(vals[start_idx + 1]);
|
|
int v2 = int(vals[start_idx + 2]);
|
|
int v3 = int(vals[start_idx + 3]);
|
|
int v4 = int(vals[start_idx + 4]);
|
|
int v5 = int(vals[start_idx + 5]);
|
|
int v6 = int(vals[start_idx + 6]);
|
|
int v7 = int(vals[start_idx + 7]);
|
|
ivec4 endpoint_low_rgba;
|
|
ivec4 endpoint_high_rgba;
|
|
switch (mode) {
|
|
case kLDRLumaDirect: {
|
|
endpoint_low_rgba = ivec4(v0, v0, v0, 255);
|
|
endpoint_high_rgba = ivec4(v1, v1, v1, 255);
|
|
} break;
|
|
|
|
case kLDRLumaBaseOffset: {
|
|
const int l0 = (v0 >> 2) | (v1 & 0xC0);
|
|
const int l1 = min(l0 + (v1 & 0x3F), 0xFF);
|
|
|
|
endpoint_low_rgba = ivec4(l0, l0, l0, 255);
|
|
endpoint_high_rgba = ivec4(l1, l1, l1, 255);
|
|
} break;
|
|
|
|
case kLDRLumaAlphaDirect: {
|
|
endpoint_low_rgba = ivec4(v0, v0, v0, v2);
|
|
endpoint_high_rgba = ivec4(v1, v1, v1, v3);
|
|
} break;
|
|
|
|
case kLDRLumaAlphaBaseOffset: {
|
|
bit_transfer_signed(v1, v0);
|
|
bit_transfer_signed(v3, v2);
|
|
|
|
endpoint_low_rgba = clamp(ivec4(v0, v0, v0, v2), 0, 255);
|
|
const int high_luma = v0 + v1;
|
|
endpoint_high_rgba = clamp(
|
|
ivec4(high_luma, high_luma, high_luma, v2 + v3), 0, 255);
|
|
} break;
|
|
|
|
case kLDRRGBBaseScale: {
|
|
endpoint_high_rgba = ivec4(v0, v1, v2, 255);
|
|
for (int i = 0; i < 3; ++i) {
|
|
const int x = endpoint_high_rgba[i];
|
|
endpoint_low_rgba[i] = (x * v3) >> 8;
|
|
}
|
|
endpoint_low_rgba[3] = 255;
|
|
} break;
|
|
|
|
case kLDRRGBDirect: {
|
|
const int s0 = v0 + v2 + v4;
|
|
const int s1 = v1 + v3 + v5;
|
|
|
|
endpoint_low_rgba = ivec4(v0, v2, v4, 255);
|
|
endpoint_high_rgba = ivec4(v1, v3, v5, 255);
|
|
|
|
if (s1 < s0) {
|
|
swap(endpoint_low_rgba, endpoint_high_rgba);
|
|
blue_contract(endpoint_low_rgba);
|
|
blue_contract(endpoint_high_rgba);
|
|
}
|
|
} break;
|
|
|
|
case kLDRRGBBaseOffset: {
|
|
bit_transfer_signed(v1, v0);
|
|
bit_transfer_signed(v3, v2);
|
|
bit_transfer_signed(v5, v4);
|
|
|
|
endpoint_low_rgba = ivec4(v0, v2, v4, 255);
|
|
endpoint_high_rgba = ivec4(v0 + v1, v2 + v3, v4 + v5, 255);
|
|
|
|
if (v1 + v3 + v5 < 0) {
|
|
swap(endpoint_low_rgba, endpoint_high_rgba);
|
|
blue_contract(endpoint_low_rgba);
|
|
blue_contract(endpoint_high_rgba);
|
|
}
|
|
|
|
endpoint_low_rgba = clamp(endpoint_low_rgba, 0, 255);
|
|
endpoint_high_rgba = clamp(endpoint_high_rgba, 0, 255);
|
|
} break;
|
|
|
|
case kLDRRGBBaseScaleTwoA: {
|
|
// Base
|
|
endpoint_low_rgba = endpoint_high_rgba = ivec4(v0, v1, v2, 255);
|
|
|
|
// Scale
|
|
endpoint_low_rgba = (endpoint_low_rgba * v3) >> 8;
|
|
|
|
// Two A
|
|
endpoint_low_rgba[3] = v4;
|
|
endpoint_high_rgba[3] = v5;
|
|
} break;
|
|
|
|
case kLDRRGBADirect: {
|
|
const uint s0 = v0 + v2 + v4;
|
|
const uint s1 = v1 + v3 + v5;
|
|
|
|
endpoint_low_rgba = ivec4(v0, v2, v4, v6);
|
|
endpoint_high_rgba = ivec4(v1, v3, v5, v7);
|
|
|
|
if (s1 < s0) {
|
|
swap(endpoint_low_rgba, endpoint_high_rgba);
|
|
blue_contract(endpoint_low_rgba);
|
|
blue_contract(endpoint_high_rgba);
|
|
}
|
|
} break;
|
|
|
|
case kLDRRGBABaseOffset: {
|
|
bit_transfer_signed(v1, v0);
|
|
bit_transfer_signed(v3, v2);
|
|
bit_transfer_signed(v5, v4);
|
|
bit_transfer_signed(v7, v6);
|
|
|
|
endpoint_low_rgba = ivec4(v0, v2, v4, v6);
|
|
endpoint_high_rgba = ivec4(v0 + v1, v2 + v3, v4 + v5, v6 + v7);
|
|
|
|
if (v1 + v3 + v5 < 0) {
|
|
swap(endpoint_low_rgba, endpoint_high_rgba);
|
|
blue_contract(endpoint_low_rgba);
|
|
blue_contract(endpoint_high_rgba);
|
|
}
|
|
|
|
endpoint_low_rgba = clamp(endpoint_low_rgba, 0, 255);
|
|
endpoint_high_rgba = clamp(endpoint_high_rgba, 0, 255);
|
|
} break;
|
|
|
|
default:
|
|
// Unimplemented color encoding.
|
|
// TODO(google): Is this the correct error handling?
|
|
endpoint_high_rgba = endpoint_low_rgba = ivec4(0, 0, 0, 0);
|
|
}
|
|
c1 = uvec4(endpoint_low_rgba);
|
|
c2 = uvec4(endpoint_high_rgba);
|
|
}
|
|
|
|
uint hash52(uint p) {
|
|
p ^= p >> 15;
|
|
p -= p << 17;
|
|
p += p << 7;
|
|
p += p << 4;
|
|
p ^= p >> 5;
|
|
p += p << 16;
|
|
p ^= p >> 7;
|
|
p ^= p >> 3;
|
|
p ^= p << 6;
|
|
p ^= p >> 17;
|
|
return p;
|
|
}
|
|
|
|
uint select_partition(uint seed, uint x, uint y, uint partitioncount) {
|
|
if (partitioncount == 1) {
|
|
return 0;
|
|
}
|
|
uint z = 0;
|
|
if (u_pushConstant.smallBlock != 0) {
|
|
x <<= 1;
|
|
y <<= 1;
|
|
}
|
|
seed += (partitioncount - 1) * 1024;
|
|
uint rnum = hash52(seed);
|
|
uint seed1 = rnum & 0xF;
|
|
uint seed2 = (rnum >> 4) & 0xF;
|
|
uint seed3 = (rnum >> 8) & 0xF;
|
|
uint seed4 = (rnum >> 12) & 0xF;
|
|
uint seed5 = (rnum >> 16) & 0xF;
|
|
uint seed6 = (rnum >> 20) & 0xF;
|
|
uint seed7 = (rnum >> 24) & 0xF;
|
|
uint seed8 = (rnum >> 28) & 0xF;
|
|
uint seed9 = (rnum >> 18) & 0xF;
|
|
uint seed10 = (rnum >> 22) & 0xF;
|
|
uint seed11 = (rnum >> 26) & 0xF;
|
|
uint seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
|
|
|
|
seed1 *= seed1;
|
|
seed2 *= seed2;
|
|
seed3 *= seed3;
|
|
seed4 *= seed4;
|
|
seed5 *= seed5;
|
|
seed6 *= seed6;
|
|
seed7 *= seed7;
|
|
seed8 *= seed8;
|
|
seed9 *= seed9;
|
|
seed10 *= seed10;
|
|
seed11 *= seed11;
|
|
seed12 *= seed12;
|
|
|
|
uint sh1, sh2, sh3;
|
|
if ((seed & 1) != 0) {
|
|
sh1 = ((seed & 2) != 0 ? 4 : 5);
|
|
sh2 = (partitioncount == 3 ? 6 : 5);
|
|
} else {
|
|
sh1 = (partitioncount == 3 ? 6 : 5);
|
|
sh2 = ((seed & 2) != 0 ? 4 : 5);
|
|
}
|
|
sh3 = ((seed & 0x10) != 0) ? sh1 : sh2;
|
|
|
|
seed1 >>= sh1;
|
|
seed2 >>= sh2;
|
|
seed3 >>= sh1;
|
|
seed4 >>= sh2;
|
|
seed5 >>= sh1;
|
|
seed6 >>= sh2;
|
|
seed7 >>= sh1;
|
|
seed8 >>= sh2;
|
|
seed9 >>= sh3;
|
|
seed10 >>= sh3;
|
|
seed11 >>= sh3;
|
|
seed12 >>= sh3;
|
|
|
|
uint a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
|
|
uint b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
|
|
uint c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
|
|
uint d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
|
|
|
|
a &= 0x3F;
|
|
b &= 0x3F;
|
|
c &= 0x3F;
|
|
d &= 0x3F;
|
|
|
|
if (partitioncount < 4)
|
|
d = 0;
|
|
if (partitioncount < 3)
|
|
c = 0;
|
|
|
|
if (a >= b && a >= c && a >= d)
|
|
return 0;
|
|
else if (b >= c && b >= d)
|
|
return 1;
|
|
else if (c >= d)
|
|
return 2;
|
|
else
|
|
return 3;
|
|
}
|
|
|
|
uvec4[144] single_color_block(uvec4 color) {
|
|
uvec4 ret[144];
|
|
for (int h = 0; h < u_pushConstant.blockSize.y; h++) {
|
|
for (int w = 0; w < u_pushConstant.blockSize.x; w++) {
|
|
ret[h * u_pushConstant.blockSize.x + w] = color;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
uvec4[144] error_color_block() {
|
|
return single_color_block(uvec4(0xff, 0, 0xff, 0xff));
|
|
}
|
|
|
|
uvec4[144] astc_decode_block(const uvec4 u) {
|
|
uint d;
|
|
uint hdr;
|
|
uint b;
|
|
uint a;
|
|
uint r;
|
|
uint width;
|
|
uint height;
|
|
uvec4 cem;
|
|
uint weightGrid[120];
|
|
const uint u3 = u[3];
|
|
const uint b87 = u3 >> 7 & 3;
|
|
const uint b65 = u3 >> 5 & 3;
|
|
const uint b32 = u3 >> 2 & 3;
|
|
a = b65;
|
|
b = b87;
|
|
d = bit(u3, 10);
|
|
hdr = bit(u3, 9);
|
|
if ((u3 & 3) == 0) {
|
|
r = b32 << 1 | bit(u3, 4);
|
|
if (b87 == 0) {
|
|
width = 12;
|
|
height = a + 2;
|
|
} else if (b87 == 1) {
|
|
width = a + 2;
|
|
height = 12;
|
|
} else if (b87 == 3) {
|
|
if (b65 == 0) {
|
|
width = 6;
|
|
height = 10;
|
|
} else if (b65 == 1) {
|
|
width = 10;
|
|
height = 6;
|
|
} else if ((u3 & 0xDFF) == 0xDFC) {
|
|
// Void-extent
|
|
// In void extend, the last 12 bits should be
|
|
// 1 1 D 1 1 1 1 1 1 1 0 0
|
|
// Where D is the HDR bit
|
|
|
|
uvec4 color = uvec4(u[1] >> 8 & 0xff, u[1] >> 24 & 0xff,
|
|
u[0] >> 8 & 0xff, u[0] >> 24 & 0xff);
|
|
return single_color_block(color);
|
|
} else { // reserved
|
|
return error_color_block();
|
|
}
|
|
} else { // b87 == 2
|
|
b = u3 >> 9 & 3;
|
|
width = a + 6;
|
|
height = b + 6;
|
|
d = 0;
|
|
hdr = 0;
|
|
}
|
|
} else {
|
|
r = (u3 & 3) << 1 | bit(u3, 4);
|
|
if (b32 == 0) {
|
|
width = b + 4;
|
|
height = a + 2;
|
|
} else if (b32 == 1) {
|
|
width = b + 8;
|
|
height = a + 2;
|
|
} else if (b32 == 2) {
|
|
width = a + 2;
|
|
height = b + 8;
|
|
} else if (bit(u3, 8) == 0) {
|
|
width = a + 2;
|
|
height = (b & 1) + 6;
|
|
} else {
|
|
width = (b & 1) + 2;
|
|
height = a + 2;
|
|
}
|
|
}
|
|
|
|
if (width > u_pushConstant.blockSize.x ||
|
|
height > u_pushConstant.blockSize.y) {
|
|
return error_color_block();
|
|
}
|
|
// Decode weight
|
|
uint trits = 0;
|
|
uint quints = 0;
|
|
uint bits = 0;
|
|
const uint weightCounts = height * width * (d + 1);
|
|
const int kMaxNumWeights = 64;
|
|
if (kMaxNumWeights < weightCounts) {
|
|
return error_color_block();
|
|
}
|
|
{
|
|
if (hdr == 0) {
|
|
switch (r) {
|
|
case 2:
|
|
bits = 1;
|
|
break;
|
|
case 3:
|
|
trits = 1;
|
|
break;
|
|
case 4:
|
|
bits = 2;
|
|
break;
|
|
case 5:
|
|
quints = 1;
|
|
break;
|
|
case 6:
|
|
trits = 1;
|
|
bits = 1;
|
|
break;
|
|
case 7:
|
|
bits = 3;
|
|
break;
|
|
default:
|
|
return error_color_block();
|
|
}
|
|
} else {
|
|
switch (r) {
|
|
case 2:
|
|
bits = 1;
|
|
quints = 1;
|
|
break;
|
|
case 3:
|
|
trits = 1;
|
|
bits = 2;
|
|
break;
|
|
case 4:
|
|
bits = 4;
|
|
break;
|
|
case 5:
|
|
quints = 1;
|
|
bits = 2;
|
|
break;
|
|
case 6:
|
|
trits = 1;
|
|
bits = 3;
|
|
break;
|
|
case 7:
|
|
bits = 5;
|
|
break;
|
|
default:
|
|
return error_color_block();
|
|
}
|
|
}
|
|
uint packedSize = 0;
|
|
uint pack = 0;
|
|
get_pack_size(trits, quints, bits, pack, packedSize);
|
|
uint srcIdx = 0;
|
|
uint dstIdx = 0;
|
|
uvec4 uReversed = bitfieldReverse(u);
|
|
const uint weightBitCount =
|
|
get_bit_count(weightCounts, trits, quints, bits);
|
|
const int kWeightGridMinBitLength = 24;
|
|
const int kWeightGridMaxBitLength = 96;
|
|
if (weightBitCount < kWeightGridMinBitLength ||
|
|
weightBitCount > kWeightGridMaxBitLength) {
|
|
return error_color_block();
|
|
}
|
|
uReversed =
|
|
uvec4(uReversed[3], uReversed[2], uReversed[1], uReversed[0]);
|
|
const uint kUnquantBinMulTable[] = {0x3f, 0x15, 0x9, 0x4, 0x2, 0x1};
|
|
const uint kUnquantBinMovTable[] = {0x8, 0x8, 0x8, 0x2, 0x4, 0x8};
|
|
while (dstIdx < weightCounts) {
|
|
if (trits == 1) {
|
|
uint decoded[5] =
|
|
decode_trit(uReversed, srcIdx, weightBitCount, bits);
|
|
// uint decoded[5] = {0, 0, 0, 0, 0};
|
|
for (int i = 0; i < 5; i++) {
|
|
weightGrid[dstIdx] = kUnquantTritWeightMap
|
|
[kUnquantTritWeightMapBitIdx[bits] + decoded[i]];
|
|
if (weightGrid[dstIdx] > 32) {
|
|
weightGrid[dstIdx] += 1;
|
|
}
|
|
dstIdx++;
|
|
if (dstIdx >= weightCounts) {
|
|
break;
|
|
}
|
|
}
|
|
} else if (quints == 1) {
|
|
uint decoded[3] =
|
|
decode_quint(uReversed, srcIdx, weightBitCount, bits);
|
|
for (int i = 0; i < 3; i++) {
|
|
// TODO: handle overflow in the last
|
|
weightGrid[dstIdx] = kUnquantQuintWeightMap
|
|
[kUnquantQuintWeightMapBitIdx[bits] + decoded[i]];
|
|
if (weightGrid[dstIdx] > 32) {
|
|
weightGrid[dstIdx] += 1;
|
|
}
|
|
dstIdx++;
|
|
if (dstIdx >= weightCounts) {
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
uint decodedRaw = bits128(uReversed, srcIdx, packedSize);
|
|
uint decoded = decodedRaw * kUnquantBinMulTable[bits - 1] |
|
|
decodedRaw >> kUnquantBinMovTable[bits - 1];
|
|
weightGrid[dstIdx] = decoded;
|
|
if (weightGrid[dstIdx] > 32) {
|
|
weightGrid[dstIdx] += 1;
|
|
}
|
|
dstIdx++;
|
|
}
|
|
srcIdx += packedSize;
|
|
}
|
|
}
|
|
uint partitionCount = (u3 >> 11 & 3) + 1;
|
|
if (d == 1 && partitionCount == 4) {
|
|
return error_color_block();
|
|
}
|
|
const uint weightStart =
|
|
128 - get_bit_count(weightCounts, trits, quints, bits);
|
|
uint dualPlaneStart = 0;
|
|
// Decode cem mode
|
|
if (partitionCount == 1) {
|
|
// Single-partition mode
|
|
cem[0] = u3 >> 13 & 0xf;
|
|
dualPlaneStart = weightStart - d * 2;
|
|
} else {
|
|
// Multi-partition mode
|
|
// Calculate CEM for all 4 partitions, even when partitionCount < 4
|
|
uint partMode = u3 >> 23 & 3;
|
|
const uint kExtraMBitsTable[4] = {0, 2, 5, 8};
|
|
const uint extraMBitCount =
|
|
(partMode == 0) ? 0 : kExtraMBitsTable[partitionCount - 1];
|
|
const uint extraMStart = weightStart - extraMBitCount;
|
|
dualPlaneStart = extraMStart - d * 2;
|
|
|
|
if (partMode == 0) {
|
|
uint cem_all = u3 >> 25 & 0xf;
|
|
cem = uvec4(cem_all, cem_all, cem_all, cem_all);
|
|
} else {
|
|
uint cemBase = partMode - 1;
|
|
uvec4 cemHigh = cemBase + uvec4(bit(u3, 25), bit(u3, 26),
|
|
bit(u3, 27), bit(u3, 28));
|
|
const uint extraM = bits128(u, extraMStart, extraMBitCount);
|
|
const uint kMainMBitsTable[4] = {0, 2, 1, 0};
|
|
const uint mainMBitCount = kMainMBitsTable[partitionCount - 1];
|
|
const uint m = extraM << mainMBitCount |
|
|
((u3 >> 27 & 3) >> (2 - mainMBitCount));
|
|
cem = cemHigh << 2 |
|
|
uvec4(m & 3, m >> 2 & 3, m >> 4 & 3, m >> 6 & 3);
|
|
}
|
|
}
|
|
// Decode end points
|
|
uvec4 endPoints[4][2];
|
|
{
|
|
uint totalV = 0;
|
|
for (uint part = 0; part < partitionCount; part++) {
|
|
totalV += get_v_count(cem[part]);
|
|
}
|
|
const uint epStart = (partitionCount == 1) ? 17 : 29;
|
|
const uint totalAvailBits = dualPlaneStart - epStart;
|
|
if (totalAvailBits >= 128) {
|
|
// overflowed
|
|
return error_color_block();
|
|
}
|
|
uint epQuints = 0;
|
|
uint epTrits = 0;
|
|
uint epBits = 0;
|
|
uint i;
|
|
for (i = 0; i < kRQuantParamTableLength; i++) {
|
|
epTrits = kRQuantParamTable[i][0];
|
|
epQuints = kRQuantParamTable[i][1];
|
|
epBits = kRQuantParamTable[i][2];
|
|
if (get_bit_count(totalV, epTrits, epQuints, epBits) <=
|
|
totalAvailBits) {
|
|
break;
|
|
}
|
|
}
|
|
if (i >= kRQuantParamTableLength) {
|
|
return error_color_block();
|
|
}
|
|
|
|
const uint epBitCount =
|
|
get_bit_count(totalV, epTrits, epQuints, epBits);
|
|
const uint epEnd = epStart + epBitCount;
|
|
uint packedSize = 0;
|
|
uint pack = 0;
|
|
get_pack_size(epTrits, epQuints, epBits, pack, packedSize);
|
|
|
|
// Decode end point parameters into buffer
|
|
uint vBuffer[40];
|
|
uint srcIdx = epStart;
|
|
uint dstIdx = 0;
|
|
const uint kUnquantBinMulTable[8] = {0xff, 0x55, 0x24, 0x11,
|
|
0x8, 0x4, 0x2, 0x1};
|
|
const uint kUnquantBinMovTable[8] = {8, 8, 1, 8, 2, 4, 6, 8};
|
|
while (dstIdx < totalV) {
|
|
if (epTrits == 1) {
|
|
uint decoded[5] = decode_trit(u, srcIdx, epEnd, epBits);
|
|
for (int i = 0; i < 5; i++) {
|
|
vBuffer[dstIdx] = kUnquantTritColorMap
|
|
[kUnquantTritColorMapBitIdx[epBits] + decoded[i]];
|
|
dstIdx++;
|
|
if (dstIdx >= totalV) {
|
|
break;
|
|
}
|
|
}
|
|
} else if (epQuints == 1) {
|
|
uint decoded[3] = decode_quint(u, srcIdx, epEnd, epBits);
|
|
for (int i = 0; i < 3; i++) {
|
|
vBuffer[dstIdx] = kUnquantQuintColorMap
|
|
[kUnquantQuintColorMapBitIdx[epBits] + decoded[i]];
|
|
dstIdx++;
|
|
if (dstIdx >= totalV) {
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
uint src = bits128(u, srcIdx, packedSize);
|
|
uint decoded = src * kUnquantBinMulTable[epBits - 1] |
|
|
src >> kUnquantBinMovTable[epBits - 1];
|
|
vBuffer[dstIdx] = decoded;
|
|
dstIdx++;
|
|
}
|
|
srcIdx += packedSize;
|
|
}
|
|
uint bufferIdx = 0;
|
|
for (uint part = 0; part < partitionCount; part++) {
|
|
// TODO: HDR support
|
|
decode_ldr_for_mode(vBuffer, bufferIdx, cem[part],
|
|
endPoints[part][0], endPoints[part][1]);
|
|
bufferIdx += get_v_count(cem[part]);
|
|
}
|
|
}
|
|
uvec4 ret[144];
|
|
{
|
|
uvec2 dst = (1024 + u_pushConstant.blockSize / 2) /
|
|
(u_pushConstant.blockSize - 1);
|
|
uint dd = d + 1;
|
|
for (uint h = 0; h < u_pushConstant.blockSize.y; h++) {
|
|
for (uint w = 0; w < u_pushConstant.blockSize.x; w++) {
|
|
uint part =
|
|
select_partition(u3 >> 13 & 1023, w, h, partitionCount);
|
|
if (kHDRCEM[cem[part]]) {
|
|
// HDR not supported
|
|
ret[h * u_pushConstant.blockSize.x + w] = uvec4(0xff, 0,
|
|
0xff, 0xff);
|
|
continue;
|
|
}
|
|
// Calculate weight
|
|
uvec2 st = uvec2(w, h);
|
|
uvec2 cst = dst * st;
|
|
uvec2 gst = (cst * (uvec2(width, height) - 1) + 32) >> 6;
|
|
uvec2 jst = gst >> 4;
|
|
uvec2 fst = gst & 0xf;
|
|
uint v0 = jst.x + jst.y * width;
|
|
uvec2 p00 = uvec2(weightGrid[v0 * dd], weightGrid[v0 * dd + 1]);
|
|
uvec2 p01 = uvec2(weightGrid[(v0 + 1) * dd],
|
|
weightGrid[(v0 + 1) * dd + 1]);
|
|
uvec2 p10 = uvec2(weightGrid[(v0 + width) * dd],
|
|
weightGrid[(v0 + width) * dd + 1]);
|
|
uvec2 p11 = uvec2(weightGrid[(v0 + width + 1) * dd],
|
|
weightGrid[(v0 + width + 1) * dd + 1]);
|
|
uint w11 = (fst.x * fst.y + 8) >> 4;
|
|
uint w10 = fst.y - w11;
|
|
uint w01 = fst.x - w11;
|
|
uint w00 = 16 - fst.x - fst.y + w11;
|
|
uvec2 i = (p00 * w00 + p01 * w01 + p10 * w10 + p11 * w11 + 8) >>
|
|
4;
|
|
|
|
uvec4 c0 = endPoints[part][0];
|
|
uvec4 c1 = endPoints[part][1];
|
|
uvec4 c = (c0 * (64 - i[0]) + c1 * i[0] + 32) / 64;
|
|
if (d == 1) {
|
|
uint ccs = bits128(u, dualPlaneStart, 2);
|
|
c[ccs] = (c0[ccs] * (64 - i[1]) + c1[ccs] * i[1] + 32) / 64;
|
|
}
|
|
ret[h * u_pushConstant.blockSize.x + w] = c;
|
|
}
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
ivec2 getPos1DArray(ivec3 pos) {
|
|
return ivec2(pos.x, pos.z);
|
|
}
|
|
|
|
ivec3 getPos2DArray(ivec3 pos) {
|
|
return pos;
|
|
}
|
|
|
|
ivec3 getPos3D(ivec3 pos) {
|
|
return pos;
|
|
}
|
|
|
|
uint block_y_size_1DArray() {
|
|
return 1;
|
|
}
|
|
|
|
uint block_y_size_2DArray() {
|
|
return u_pushConstant.blockSize.y;
|
|
}
|
|
|
|
uint block_y_size_3D() {
|
|
return u_pushConstant.blockSize.y;
|
|
}
|
|
|
|
uvec4 flip32(uvec4 a) {
|
|
return ((a & 0xff) << 24) | ((a & 0xff00) << 8) | ((a & 0xff0000) >> 8) |
|
|
((a & 0xff000000) >> 24);
|
|
}
|
|
|
|
void main(void) {
|
|
ivec3 pos = ivec3(gl_GlobalInvocationID.xyz);
|
|
pos.z += int(u_pushConstant.baseLayer);
|
|
uvec4 srcBlock = uvec4(imageLoad(u_image0, getPos${type}(pos)));
|
|
srcBlock = uvec4(srcBlock[3], srcBlock[2], srcBlock[1], srcBlock[0]);
|
|
uvec4[144] decompressed = astc_decode_block(srcBlock);
|
|
|
|
for (uint y = 0; y < block_y_size_${type}(); y++) {
|
|
for (uint x = 0; x < u_pushConstant.blockSize.x; x++) {
|
|
imageStore(u_image1,
|
|
getPos${type}(ivec3(
|
|
pos.xy * u_pushConstant.blockSize + ivec2(x, y),
|
|
pos.z)),
|
|
decompressed[y * u_pushConstant.blockSize.x + x]);
|
|
}
|
|
}
|
|
}
|