From df94ffe689ccdc6f273dddd0f6710e9c310f0733 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 9 Jan 2026 10:25:45 +0700 Subject: [PATCH 1/4] changed unormconstant template params to include float --- .../hlsl/sampling/quantized_sequence.hlsl | 83 +++++++++---------- 1 file changed, 40 insertions(+), 43 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 8929609c34..b6f1e0d2e5 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -22,22 +22,22 @@ struct QuantizedSequence; namespace impl { -template +template struct unorm_constant; template<> -struct unorm_constant<4> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d888889u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d888889u; }; template<> -struct unorm_constant<5> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d042108u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d042108u; }; template<> -struct unorm_constant<8> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3b808081u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3b808081u; }; template<> -struct unorm_constant<10> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3a802008u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3a802008u; }; template<> -struct unorm_constant<16> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x37800080u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x37800080u; }; template<> -struct unorm_constant<21> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x35000004u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x35000004u; }; template<> -struct unorm_constant<32> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; }; +struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; }; template struct decode_helper; @@ -46,30 +46,28 @@ template struct decode_helper { using scalar_type = typename vector_traits::scalar_type; - using fp_type = typename float_of_size::type; - using uvec_type = vector; + using uvec_type = vector; using sequence_type = QuantizedSequence; - using return_type = vector; - NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value; + using return_type = vector; + NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant::value; static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) { uvec_type seqVal; NBL_UNROLL for(uint16_t i = 0; i < D; i++) seqVal[i] = val.get(i) ^ scrambleKey[i]; - return return_type(seqVal) * bit_cast(UNormConstant); + return return_type(seqVal) * bit_cast >(UNormConstant); } }; template struct decode_helper { using scalar_type = typename vector_traits::scalar_type; - using fp_type = typename float_of_size::type; - using uvec_type = vector; + using uvec_type = vector; using sequence_type = QuantizedSequence; using sequence_store_type = typename sequence_type::store_type; using sequence_scalar_type = typename vector_traits::scalar_type; - using return_type = vector; + using return_type = vector; NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = sequence_type::UNormConstant; static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) @@ -82,25 +80,26 @@ struct decode_helper uvec_type seqVal; NBL_UNROLL for(uint16_t i = 0; i < D; i++) seqVal[i] = scramble.get(i); - return return_type(seqVal) * bit_cast(UNormConstant); + return return_type(seqVal) * bit_cast >(UNormConstant); } }; + +template +NBL_BOOL_CONCEPT SequenceSpecialization = concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4; } template -vector::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const vector::scalar_type, D> scrambleKey) +vector::scalar_type)>, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const vector::scalar_type, D> scrambleKey) { return impl::decode_helper::__call(val, scrambleKey); } -#define SEQUENCE_SPECIALIZATION_CONCEPT concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4 - // all Dim=1 -template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT) -struct QuantizedSequence +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization) +struct QuantizedSequence) > { using store_type = T; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(store_type)>::value; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; store_type get(const uint16_t idx) { assert(idx > 0 && idx < 1); return data; } void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; } @@ -109,15 +108,15 @@ struct QuantizedSequence NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 1 && Dim > 1 && Dim < 5) -struct QuantizedSequence::Dimension == 1 && Dim > 1 && Dim < 5) > +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == 1 && Dim > 1 && Dim < 5) +struct QuantizedSequence && vector_traits::Dimension == 1 && Dim > 1 && Dim < 5) > { using store_type = T; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; NBL_CONSTEXPR_STATIC_INLINE store_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; store_type get(const uint16_t idx) { @@ -137,12 +136,12 @@ struct QuantizedSequence NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == Dim && Dim > 1 && Dim < 5) -struct QuantizedSequence::Dimension == Dim && Dim > 1 && Dim < 5) > +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == Dim && Dim > 1 && Dim < 5) +struct QuantizedSequence && vector_traits::Dimension == Dim && Dim > 1 && Dim < 5) > { using store_type = T; using scalar_type = typename vector_traits::scalar_type; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); return data[idx]; } void set(const uint16_t idx, const scalar_type value) { assert(idx > 0 && idx < Dim); data[idx] = value; } @@ -151,8 +150,8 @@ struct QuantizedSequence NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 2 && Dim == 3) -struct QuantizedSequence::Dimension == 2 && Dim == 3) > +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == 2 && Dim == 3) +struct QuantizedSequence && vector_traits::Dimension == 2 && Dim == 3) > { using store_type = T; using scalar_type = typename vector_traits::scalar_type; @@ -160,7 +159,7 @@ struct QuantizedSequence) - BitsPerComponent; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) { @@ -204,8 +203,8 @@ struct QuantizedSequence NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 2 && Dim == 4) -struct QuantizedSequence::Dimension == 2 && Dim == 4) > +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == 2 && Dim == 4) +struct QuantizedSequence && vector_traits::Dimension == 2 && Dim == 4) > { using store_type = T; using scalar_type = typename vector_traits::scalar_type; @@ -213,7 +212,7 @@ struct QuantizedSequence) - BitsPerComponent; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) { @@ -235,13 +234,13 @@ struct QuantizedSequence NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 4 && Dim == 2) -struct QuantizedSequence::Dimension == 4 && Dim == 2) > +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == 4 && Dim == 2) +struct QuantizedSequence && vector_traits::Dimension == 4 && Dim == 2) > { using store_type = T; using scalar_type = typename vector_traits::scalar_type; using base_type = vector; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; base_type get(const uint16_t idx) { @@ -266,8 +265,8 @@ struct QuantizedSequence returns uint16_t2 - 21 bits per component: 16 in x, 5 in y // uint16_t4 --> returns uint32_t2 - 42 bits per component: 32 in x, 10 in y -template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 4 && Dim == 3) -struct QuantizedSequence::Dimension == 4 && Dim == 3) > +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == 4 && Dim == 3) +struct QuantizedSequence && vector_traits::Dimension == 4 && Dim == 3) > { using store_type = T; using scalar_type = typename vector_traits::scalar_type; @@ -277,7 +276,7 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; base_type get(const uint16_t idx) { @@ -299,8 +298,6 @@ struct QuantizedSequence Date: Fri, 9 Jan 2026 12:20:35 +0700 Subject: [PATCH 2/4] separate pre and post decode scramble into different structs, no unormconstant in quantized sequence --- .../hlsl/sampling/quantized_sequence.hlsl | 46 +++++++++---------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index b6f1e0d2e5..f669a9fd3f 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -39,43 +39,37 @@ struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0 template<> struct unorm_constant { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; }; -template -struct decode_helper; - template -struct decode_helper +struct decode_before_scramble_helper { using scalar_type = typename vector_traits::scalar_type; using uvec_type = vector; using sequence_type = QuantizedSequence; using return_type = vector; - NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant::value; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant::value; static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) { uvec_type seqVal; NBL_UNROLL for(uint16_t i = 0; i < D; i++) - seqVal[i] = val.get(i) ^ scrambleKey[i]; + seqVal[i] = val.get(i); + seqVal ^= scrambleKey; return return_type(seqVal) * bit_cast >(UNormConstant); } }; template -struct decode_helper +struct decode_after_scramble_helper { using scalar_type = typename vector_traits::scalar_type; using uvec_type = vector; using sequence_type = QuantizedSequence; - using sequence_store_type = typename sequence_type::store_type; - using sequence_scalar_type = typename vector_traits::scalar_type; using return_type = vector; - NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = sequence_type::UNormConstant; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant::value; - static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) + static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, NBL_CONST_REF_ARG(sequence_type) scrambleKey) { sequence_type scramble; - NBL_UNROLL for(uint16_t i = 0; i < D; i++) - scramble.set(i, scrambleKey[i]); - scramble.data ^= val.data; + scramble.data = val.data ^ scrambleKey.data; uvec_type seqVal; NBL_UNROLL for(uint16_t i = 0; i < D; i++) @@ -88,10 +82,18 @@ template NBL_BOOL_CONCEPT SequenceSpecialization = concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4; } -template -vector::scalar_type)>, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const vector::scalar_type, D> scrambleKey) +// post-decode scramble +template +vector decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const vector,D> scrambleKey) +{ + return impl::decode_before_scramble_helper::__call(val, scrambleKey); +} + +// pre-decode scramble +template +vector decode(NBL_CONST_REF_ARG(QuantizedSequence) val, NBL_CONST_REF_ARG(QuantizedSequence) scrambleKey) { - return impl::decode_helper::__call(val, scrambleKey); + return impl::decode_after_scramble_helper::__call(val, scrambleKey); } // all Dim=1 @@ -99,7 +101,7 @@ template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization) struct QuantizedSequence) > { using store_type = T; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v; store_type get(const uint16_t idx) { assert(idx > 0 && idx < 1); return data; } void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; } @@ -116,7 +118,6 @@ struct QuantizedSequence::value; store_type get(const uint16_t idx) { @@ -141,7 +142,7 @@ struct QuantizedSequence::scalar_type; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v; scalar_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); return data[idx]; } void set(const uint16_t idx, const scalar_type value) { assert(idx > 0 && idx < Dim); data[idx] = value; } @@ -159,7 +160,6 @@ struct QuantizedSequence) - BitsPerComponent; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) { @@ -212,7 +212,6 @@ struct QuantizedSequence) - BitsPerComponent; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) { @@ -240,7 +239,7 @@ struct QuantizedSequence::scalar_type; using base_type = vector; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v; base_type get(const uint16_t idx) { @@ -276,7 +275,6 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; base_type get(const uint16_t idx) { From 8894dd10863ead92a9f54271281a03bf7ef8d49a Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 9 Jan 2026 15:28:12 +0700 Subject: [PATCH 3/4] change quantized sequence to set/get with bitfieldInsert/Extract, also order stored values from LSB to MSB --- .../hlsl/sampling/quantized_sequence.hlsl | 176 +++++++++--------- 1 file changed, 88 insertions(+), 88 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index f669a9fd3f..025e506fb4 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -116,21 +116,17 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE store_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; store_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); - return (data >> (BitsPerComponent * idx)) & Mask; + return glsl::bitfieldExtract(data, BitsPerComponent * idx, BitsPerComponent); } void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < Dim); - const uint16_t bits = (BitsPerComponent * idx); - data &= ~(Mask << bits); - data |= ((value >> DiscardBits) & Mask) << bits; + glsl::bitfieldInsert(data, value, BitsPerComponent * idx, BitsPerComponent); } store_type data; @@ -150,150 +146,154 @@ struct QuantizedSequence NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == 2 && Dim == 3) -struct QuantizedSequence && vector_traits::Dimension == 2 && Dim == 3) > +// uint32_t2; Dim=3 -- should never use uint16_t2 instead of uint32_t +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && size_of_v::scalar_type> == 4 && vector_traits::Dimension == 2 && Dim == 3) +struct QuantizedSequence && size_of_v::scalar_type> == 4 && vector_traits::Dimension == 2 && Dim == 3) > { using store_type = T; using scalar_type = typename vector_traits::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (scalar_type(1u) << BitsPerComponent) - scalar_type(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; scalar_type get(const uint16_t idx) { assert(idx >= 0 && idx < 3); - if (idx < 2) + if (idx == 0) // x + return glsl::bitfieldExtract(data[0], 0u, BitsPerComponent); + else if (idx == 1) // y { - return data[idx] & Mask; - } - else - { - const scalar_type zbits = scalar_type(DiscardBits); - const scalar_type zmask = (scalar_type(1u) << zbits) - scalar_type(1u); - scalar_type z = (data[0] >> BitsPerComponent) & zmask; - z |= ((data[1] >> BitsPerComponent) & zmask) << DiscardBits; - return z; + scalar_type y = glsl::bitfieldExtract(data[0], BitsPerComponent, DiscardBits); + y |= glsl::bitfieldExtract(data[1], 0u, DiscardBits - 1u) << DiscardBits; + return y; } + else // z + return glsl::bitfieldExtract(data[1], DiscardBits - 1u, BitsPerComponent); } void set(const uint16_t idx, const scalar_type value) { assert(idx >= 0 && idx < 3); - if (idx < 2) + if (idx == 0) // x + glsl::bitfieldInsert(data[0], value, 0u, BitsPerComponent); + else if (idx == 1) // y { - const scalar_type trunc_val = value >> DiscardBits; - data[idx] &= ~Mask; - data[idx] |= trunc_val & Mask; - } - else - { - const scalar_type zbits = scalar_type(DiscardBits); - const scalar_type zmask = (scalar_type(1u) << zbits) - scalar_type(1u); - const scalar_type trunc_val = value >> DiscardBits; - data[0] &= Mask; - data[1] &= Mask; - data[0] |= (trunc_val & zmask) << BitsPerComponent; - data[1] |= ((trunc_val >> zbits) & zmask) << BitsPerComponent; + glsl::bitfieldInsert(data[0], value, BitsPerComponent, DiscardBits); + glsl::bitfieldInsert(data[1], value >> DiscardBits, 0u, DiscardBits - 1u); } + else // z + glsl::bitfieldInsert(data[1], value, DiscardBits - 1u, BitsPerComponent); } store_type data; }; -// uint16_t2, uint32_t2; Dim=4 -template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == 2 && Dim == 4) -struct QuantizedSequence && vector_traits::Dimension == 2 && Dim == 4) > +// uint16_t2; Dim=4 -- should use uint16_t4 instead of uint32_t2 +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && size_of_v::scalar_type> == 2 && vector_traits::Dimension == 2 && Dim == 4) +struct QuantizedSequence && size_of_v::scalar_type> == 2 && vector_traits::Dimension == 2 && Dim == 4) > { using store_type = T; using scalar_type = typename vector_traits::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; scalar_type get(const uint16_t idx) { assert(idx >= 0 && idx < 4); - const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u); - return (data[i] >> (BitsPerComponent * (idx & uint16_t(1u)))) & Mask; + if (idx >= 0 && idx < 2) // x y + { + return glsl::bitfieldExtract(data[0], BitsPerComponent * idx, BitsPerComponent); + } + else // z w + { + return glsl::bitfieldExtract(data[1], BitsPerComponent * (idx & uint16_t(1u)), BitsPerComponent); + } } void set(const uint16_t idx, const scalar_type value) { assert(idx >= 0 && idx < 4); - const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u); - const uint16_t odd = idx & uint16_t(1u); - data[i] &= hlsl::mix(~Mask, Mask, bool(odd)); - data[i] |= ((value >> DiscardBits) & Mask) << (BitsPerComponent * odd); + if (idx >= 0 && idx < 2) // x y + { + glsl::bitfieldInsert(data[0], value, BitsPerComponent * idx, BitsPerComponent); + } + else // z w + { + glsl::bitfieldInsert(data[1], value, BitsPerComponent * (idx & uint16_t(1u)), BitsPerComponent); + } } store_type data; }; -// uint16_t4, uint32_t4; Dim=2 -template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == 4 && Dim == 2) -struct QuantizedSequence && vector_traits::Dimension == 4 && Dim == 2) > -{ - using store_type = T; - using scalar_type = typename vector_traits::scalar_type; - using base_type = vector; - NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v; +// no uint16_t4, uint32_t4; Dim=2 - base_type get(const uint16_t idx) - { - assert(idx >= 0 && idx < 2); - base_type a; - a[0] = data[uint16_t(2u) * idx]; - a[1] = data[uint16_t(2u) * idx + 1]; - return a; - } - - void set(const uint16_t idx, const base_type value) - { - assert(idx >= 0 && idx < 2); - base_type a; - data[uint16_t(2u) * idx] = value[0]; - data[uint16_t(2u) * idx + 1] = value[1]; - } - - store_type data; -}; - -// uint16_t4, uint32_t4; Dim=3 -// uint16_t4 --> returns uint16_t2 - 21 bits per component: 16 in x, 5 in y -// uint16_t4 --> returns uint32_t2 - 42 bits per component: 32 in x, 10 in y -template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && vector_traits::Dimension == 4 && Dim == 3) -struct QuantizedSequence && vector_traits::Dimension == 4 && Dim == 3) > +// uint32_t4; Dim=3 --> returns uint32_t2 - 42 bits per component: 32 in x, 10 in y +// use uint32_t2 instead of uint16_t4 +template NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization && size_of_v::scalar_type> == 4 && vector_traits::Dimension == 4 && Dim == 3) +struct QuantizedSequence && size_of_v::scalar_type> == 4 && vector_traits::Dimension == 4 && Dim == 3) > { using store_type = T; using scalar_type = typename vector_traits::scalar_type; using base_type = vector; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE uint16_t LeftoverBitsPerComponent = BitsPerComponent - uint16_t(8u) * size_of_v; - NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; base_type get(const uint16_t idx) { assert(idx >= 0 && idx < 3); - base_type a; - a[0] = data[idx]; - a[1] = (data[3] >> (LeftoverBitsPerComponent * idx)) & Mask; - return a; + if (idx == 0) // x + { + base_type x; + x[0] = data[0]; + x[1] = glsl::bitfieldExtract(data[1], 0u, 10u); + return x; + } + else if (idx == 1) // y + { + base_type y; + y[0] = glsl::bitfieldExtract(data[1], 10u, 22u); + y[0] |= glsl::bitfieldExtract(data[2], 0u, 10u) << 22u; + y[1] = glsl::bitfieldExtract(data[2], 10u, 10u); + return y; + } + else // z + { + base_type z; + z[0] = glsl::bitfieldInsert(data[2], 20u, 12u); + z[0] |= glsl::bitfieldInsert(data[3], 0u, 20u) << 12u; + z[1] = glsl::bitfieldInsert(data[3], 20u, 10u); + return z; + } } void set(const uint16_t idx, const base_type value) { assert(idx >= 0 && idx < 3); - data[idx] = value[0]; - data[3] &= ~Mask; - data[3] |= ((value[1] >> DiscardBits) & Mask) << (LeftoverBitsPerComponent * idx); + if (idx == 0) // x + { + data[0] = value[0]; + glsl::bitfieldInsert(data[1], value[1], 0u, 10u); + } + else if (idx == 1) // y + { + glsl::bitfieldInsert(data[1], value[0], 10u, 22u); + glsl::bitfieldInsert(data[2], value[0] >> 22u, 0u, 10u); + glsl::bitfieldInsert(data[2], value[1], 10u, 10u); + } + else // z + { + glsl::bitfieldInsert(data[2], value[0], 20u, 12u); + glsl::bitfieldInsert(data[3], value[0] >> 12u, 0u, 20u); + glsl::bitfieldInsert(data[3], value[1], 20u, 10u); + } } store_type data; + // data[0] = | -- x 32 bits -- | + // data[1] = MSB | -- y 22 bits -- | -- x 10 bits -- | LSB + // data[2] = MSB | -- z 12 bits -- | -- y 20 bits -- | LSB + // data[3] = | -- z 30 bits -- | }; } From 639f464a1788117f8c5bb32646f487380098c836 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 9 Jan 2026 15:42:44 +0700 Subject: [PATCH 4/4] added create functions that should not preserve existing bits --- .../hlsl/sampling/quantized_sequence.hlsl | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 025e506fb4..bd763ad6d7 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -117,6 +117,14 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + static QuantizedSequence create(const vector value) + { + QuantizedSequence seq; + NBL_UNROLL for (uint16_t i = 0; i < Dim; i++) + seq.set(i, value[i]); + return seq; + } + store_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); @@ -156,6 +164,14 @@ struct QuantizedSequence) - BitsPerComponent; + static QuantizedSequence create(const vector value) + { + QuantizedSequence seq; + NBL_UNROLL for (uint16_t i = 0; i < Dim; i++) + seq.set(i, value[i]); + return seq; + } + scalar_type get(const uint16_t idx) { assert(idx >= 0 && idx < 3); @@ -197,6 +213,14 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + static QuantizedSequence create(const vector value) + { + QuantizedSequence seq; + NBL_UNROLL for (uint16_t i = 0; i < Dim; i++) + seq.set(i, value[i]); + return seq; + } + scalar_type get(const uint16_t idx) { assert(idx >= 0 && idx < 4);