diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 8929609c34..bd763ad6d7 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -22,85 +22,86 @@ struct QuantizedSequence; namespace impl { -template +template struct unorm_constant; template<> -struct unorm_constant<4> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d888889u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d888889u; }; template<> -struct unorm_constant<5> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d042108u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d042108u; }; template<> -struct unorm_constant<8> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3b808081u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3b808081u; }; template<> -struct unorm_constant<10> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3a802008u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3a802008u; }; template<> -struct unorm_constant<16> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x37800080u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x37800080u; }; template<> -struct unorm_constant<21> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x35000004u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x35000004u; }; template<> -struct unorm_constant<32> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; }; - -template -struct decode_helper; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; }; template -struct decode_helper +struct decode_before_scramble_helper { using scalar_type = typename vector_traits::scalar_type; - using fp_type = typename float_of_size::type; - using uvec_type = vector; + using uvec_type = vector; using sequence_type = QuantizedSequence; - using return_type = vector; - NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value; + using return_type = vector; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant::value; static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) { uvec_type seqVal; NBL_UNROLL for(uint16_t i = 0; i < D; i++) - seqVal[i] = val.get(i) ^ scrambleKey[i]; - return return_type(seqVal) * bit_cast(UNormConstant); + seqVal[i] = val.get(i); + seqVal ^= scrambleKey; + return return_type(seqVal) * bit_cast >(UNormConstant); } }; template -struct decode_helper +struct decode_after_scramble_helper { using scalar_type = typename vector_traits::scalar_type; - using fp_type = typename float_of_size::type; - using uvec_type = vector; + using uvec_type = vector; using sequence_type = QuantizedSequence; - using sequence_store_type = typename sequence_type::store_type; - using sequence_scalar_type = typename vector_traits::scalar_type; - using return_type = vector; - NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = sequence_type::UNormConstant; + using return_type = vector; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant::value; - static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) + static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, NBL_CONST_REF_ARG(sequence_type) scrambleKey) { sequence_type scramble; - NBL_UNROLL for(uint16_t i = 0; i < D; i++) - scramble.set(i, scrambleKey[i]); - scramble.data ^= val.data; + scramble.data = val.data ^ scrambleKey.data; uvec_type seqVal; NBL_UNROLL for(uint16_t i = 0; i < D; i++) seqVal[i] = scramble.get(i); - return return_type(seqVal) * bit_cast(UNormConstant); + return return_type(seqVal) * bit_cast >(UNormConstant); } }; + +template +NBL_BOOL_CONCEPT SequenceSpecialization = concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4; } -template -vector::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const vector::scalar_type, D> scrambleKey) +// post-decode scramble +template +vector decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const vector,D> scrambleKey) { - return impl::decode_helper::__call(val, scrambleKey); + return impl::decode_before_scramble_helper::__call(val, scrambleKey); } -#define SEQUENCE_SPECIALIZATION_CONCEPT concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4 +// pre-decode scramble +template +vector decode(NBL_CONST_REF_ARG(QuantizedSequence) val, NBL_CONST_REF_ARG(QuantizedSequence) scrambleKey) +{ + return impl::decode_after_scramble_helper::__call(val, scrambleKey); +} // all Dim=1 -template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT) -struct QuantizedSequence +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization) +struct QuantizedSequence) > { using store_type = T; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(store_type)>::value; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v; store_type get(const uint16_t idx) { assert(idx > 0 && idx < 1); return data; } void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; } @@ -109,40 +110,43 @@ struct QuantizedSequence NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 1 && Dim > 1 && Dim < 5) -struct QuantizedSequence::Dimension == 1 && Dim > 1 && Dim < 5) > +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == 1 && Dim > 1 && Dim < 5) +struct QuantizedSequence && vector_traits::Dimension == 1 && Dim > 1 && Dim < 5) > { using store_type = T; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE store_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; + + static QuantizedSequence create(const vector value) + { + QuantizedSequence seq; + NBL_UNROLL for (uint16_t i = 0; i < Dim; i++) + seq.set(i, value[i]); + return seq; + } store_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); - return (data >> (BitsPerComponent * idx)) & Mask; + return glsl::bitfieldExtract(data, BitsPerComponent * idx, BitsPerComponent); } void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < Dim); - const uint16_t bits = (BitsPerComponent * idx); - data &= ~(Mask << bits); - data |= ((value >> DiscardBits) & Mask) << bits; + glsl::bitfieldInsert(data, value, BitsPerComponent * idx, BitsPerComponent); } store_type data; }; // Dim 2,3,4 matches vector dim -template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == Dim && Dim > 1 && Dim < 5) -struct QuantizedSequence::Dimension == Dim && Dim > 1 && Dim < 5) > +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == Dim && Dim > 1 && Dim < 5) +struct QuantizedSequence && vector_traits::Dimension == Dim && Dim > 1 && Dim < 5) > { using store_type = T; using scalar_type = typename vector_traits::scalar_type; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v; scalar_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); return data[idx]; } void set(const uint16_t idx, const scalar_type value) { assert(idx > 0 && idx < Dim); data[idx] = value; } @@ -150,157 +154,172 @@ struct QuantizedSequence NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 2 && Dim == 3) -struct QuantizedSequence::Dimension == 2 && Dim == 3) > +// uint32_t2; Dim=3 -- should never use uint16_t2 instead of uint32_t +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && size_of_v::scalar_type> == 4 && vector_traits::Dimension == 2 && Dim == 3) +struct QuantizedSequence && size_of_v::scalar_type> == 4 && vector_traits::Dimension == 2 && Dim == 3) > { using store_type = T; using scalar_type = typename vector_traits::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (scalar_type(1u) << BitsPerComponent) - scalar_type(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; + + static QuantizedSequence create(const vector value) + { + QuantizedSequence seq; + NBL_UNROLL for (uint16_t i = 0; i < Dim; i++) + seq.set(i, value[i]); + return seq; + } scalar_type get(const uint16_t idx) { assert(idx >= 0 && idx < 3); - if (idx < 2) + if (idx == 0) // x + return glsl::bitfieldExtract(data[0], 0u, BitsPerComponent); + else if (idx == 1) // y { - return data[idx] & Mask; - } - else - { - const scalar_type zbits = scalar_type(DiscardBits); - const scalar_type zmask = (scalar_type(1u) << zbits) - scalar_type(1u); - scalar_type z = (data[0] >> BitsPerComponent) & zmask; - z |= ((data[1] >> BitsPerComponent) & zmask) << DiscardBits; - return z; + scalar_type y = glsl::bitfieldExtract(data[0], BitsPerComponent, DiscardBits); + y |= glsl::bitfieldExtract(data[1], 0u, DiscardBits - 1u) << DiscardBits; + return y; } + else // z + return glsl::bitfieldExtract(data[1], DiscardBits - 1u, BitsPerComponent); } void set(const uint16_t idx, const scalar_type value) { assert(idx >= 0 && idx < 3); - if (idx < 2) + if (idx == 0) // x + glsl::bitfieldInsert(data[0], value, 0u, BitsPerComponent); + else if (idx == 1) // y { - const scalar_type trunc_val = value >> DiscardBits; - data[idx] &= ~Mask; - data[idx] |= trunc_val & Mask; - } - else - { - const scalar_type zbits = scalar_type(DiscardBits); - const scalar_type zmask = (scalar_type(1u) << zbits) - scalar_type(1u); - const scalar_type trunc_val = value >> DiscardBits; - data[0] &= Mask; - data[1] &= Mask; - data[0] |= (trunc_val & zmask) << BitsPerComponent; - data[1] |= ((trunc_val >> zbits) & zmask) << BitsPerComponent; + glsl::bitfieldInsert(data[0], value, BitsPerComponent, DiscardBits); + glsl::bitfieldInsert(data[1], value >> DiscardBits, 0u, DiscardBits - 1u); } + else // z + glsl::bitfieldInsert(data[1], value, DiscardBits - 1u, BitsPerComponent); } store_type data; }; -// uint16_t2, uint32_t2; Dim=4 -template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 2 && Dim == 4) -struct QuantizedSequence::Dimension == 2 && Dim == 4) > +// uint16_t2; Dim=4 -- should use uint16_t4 instead of uint32_t2 +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && size_of_v::scalar_type> == 2 && vector_traits::Dimension == 2 && Dim == 4) +struct QuantizedSequence && size_of_v::scalar_type> == 2 && vector_traits::Dimension == 2 && Dim == 4) > { using store_type = T; using scalar_type = typename vector_traits::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; + + static QuantizedSequence create(const vector value) + { + QuantizedSequence seq; + NBL_UNROLL for (uint16_t i = 0; i < Dim; i++) + seq.set(i, value[i]); + return seq; + } scalar_type get(const uint16_t idx) { assert(idx >= 0 && idx < 4); - const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u); - return (data[i] >> (BitsPerComponent * (idx & uint16_t(1u)))) & Mask; + if (idx >= 0 && idx < 2) // x y + { + return glsl::bitfieldExtract(data[0], BitsPerComponent * idx, BitsPerComponent); + } + else // z w + { + return glsl::bitfieldExtract(data[1], BitsPerComponent * (idx & uint16_t(1u)), BitsPerComponent); + } } void set(const uint16_t idx, const scalar_type value) { assert(idx >= 0 && idx < 4); - const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u); - const uint16_t odd = idx & uint16_t(1u); - data[i] &= hlsl::mix(~Mask, Mask, bool(odd)); - data[i] |= ((value >> DiscardBits) & Mask) << (BitsPerComponent * odd); + if (idx >= 0 && idx < 2) // x y + { + glsl::bitfieldInsert(data[0], value, BitsPerComponent * idx, BitsPerComponent); + } + else // z w + { + glsl::bitfieldInsert(data[1], value, BitsPerComponent * (idx & uint16_t(1u)), BitsPerComponent); + } } store_type data; }; -// uint16_t4, uint32_t4; Dim=2 -template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 4 && Dim == 2) -struct QuantizedSequence::Dimension == 4 && Dim == 2) > -{ - using store_type = T; - using scalar_type = typename vector_traits::scalar_type; - using base_type = vector; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; - - base_type get(const uint16_t idx) - { - assert(idx >= 0 && idx < 2); - base_type a; - a[0] = data[uint16_t(2u) * idx]; - a[1] = data[uint16_t(2u) * idx + 1]; - return a; - } - - void set(const uint16_t idx, const base_type value) - { - assert(idx >= 0 && idx < 2); - base_type a; - data[uint16_t(2u) * idx] = value[0]; - data[uint16_t(2u) * idx + 1] = value[1]; - } - - store_type data; -}; +// no uint16_t4, uint32_t4; Dim=2 -// uint16_t4, uint32_t4; Dim=3 -// uint16_t4 --> returns uint16_t2 - 21 bits per component: 16 in x, 5 in y -// uint16_t4 --> returns uint32_t2 - 42 bits per component: 32 in x, 10 in y -template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 4 && Dim == 3) -struct QuantizedSequence::Dimension == 4 && Dim == 3) > +// uint32_t4; Dim=3 --> returns uint32_t2 - 42 bits per component: 32 in x, 10 in y +// use uint32_t2 instead of uint16_t4 +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && size_of_v::scalar_type> == 4 && vector_traits::Dimension == 4 && Dim == 3) +struct QuantizedSequence && size_of_v::scalar_type> == 4 && vector_traits::Dimension == 4 && Dim == 3) > { using store_type = T; using scalar_type = typename vector_traits::scalar_type; using base_type = vector; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE uint16_t LeftoverBitsPerComponent = BitsPerComponent - uint16_t(8u) * size_of_v; - NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; base_type get(const uint16_t idx) { assert(idx >= 0 && idx < 3); - base_type a; - a[0] = data[idx]; - a[1] = (data[3] >> (LeftoverBitsPerComponent * idx)) & Mask; - return a; + if (idx == 0) // x + { + base_type x; + x[0] = data[0]; + x[1] = glsl::bitfieldExtract(data[1], 0u, 10u); + return x; + } + else if (idx == 1) // y + { + base_type y; + y[0] = glsl::bitfieldExtract(data[1], 10u, 22u); + y[0] |= glsl::bitfieldExtract(data[2], 0u, 10u) << 22u; + y[1] = glsl::bitfieldExtract(data[2], 10u, 10u); + return y; + } + else // z + { + base_type z; + z[0] = glsl::bitfieldInsert(data[2], 20u, 12u); + z[0] |= glsl::bitfieldInsert(data[3], 0u, 20u) << 12u; + z[1] = glsl::bitfieldInsert(data[3], 20u, 10u); + return z; + } } void set(const uint16_t idx, const base_type value) { assert(idx >= 0 && idx < 3); - data[idx] = value[0]; - data[3] &= ~Mask; - data[3] |= ((value[1] >> DiscardBits) & Mask) << (LeftoverBitsPerComponent * idx); + if (idx == 0) // x + { + data[0] = value[0]; + glsl::bitfieldInsert(data[1], value[1], 0u, 10u); + } + else if (idx == 1) // y + { + glsl::bitfieldInsert(data[1], value[0], 10u, 22u); + glsl::bitfieldInsert(data[2], value[0] >> 22u, 0u, 10u); + glsl::bitfieldInsert(data[2], value[1], 10u, 10u); + } + else // z + { + glsl::bitfieldInsert(data[2], value[0], 20u, 12u); + glsl::bitfieldInsert(data[3], value[0] >> 12u, 0u, 20u); + glsl::bitfieldInsert(data[3], value[1], 20u, 10u); + } } store_type data; + // data[0] = | -- x 32 bits -- | + // data[1] = MSB | -- y 22 bits -- | -- x 10 bits -- | LSB + // data[2] = MSB | -- z 12 bits -- | -- y 20 bits -- | LSB + // data[3] = | -- z 30 bits -- | }; -#undef SEQUENCE_SPECIALIZATION_CONCEPT - } }