Skip to content

Conversation

@keptsecret
Copy link
Contributor

No description provided.

Comment on lines +81 to +82
template<typename T>
NBL_BOOL_CONCEPT SequenceSpecialization = concepts::UnsignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> <= 4;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could have just used concepts::IntVector<T> instead of the UnsignedIntegral

Comment on lines +87 to +97
vector<R,D> decode(NBL_CONST_REF_ARG(QuantizedSequence<T, D>) val, const vector<unsigned_integer_of_size_t<sizeof(R)>,D> scrambleKey)
{
return impl::decode_helper<T,D,EncodeScramble>::__call(val, scrambleKey);
return impl::decode_before_scramble_helper<T,D>::__call(val, scrambleKey);
}

#define SEQUENCE_SPECIALIZATION_CONCEPT concepts::UnsignedIntegral<typename vector_traits<T>::scalar_type> && size_of_v<typename vector_traits<T>::scalar_type> <= 4
// pre-decode scramble
template<typename R, typename T, uint16_t D>
vector<R,D> decode(NBL_CONST_REF_ARG(QuantizedSequence<T, D>) val, NBL_CONST_REF_ARG(QuantizedSequence<T, D>) scrambleKey)
{
return impl::decode_after_scramble_helper<T,D>::__call(val, scrambleKey);
}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't like these being "freely floating" as-in just sampling::decode they should be a NBL_CONST_METHOD of the QuantizedSequence

as you can see when its a free floating function there's an implicit const this as first parameter, so you can see that it wworks better conceptually as a method

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

then they'd only be template<typename F> vector<F,D> decode(scramblekey)

Comment on lines 42 to -64
template<typename T, uint16_t D>
struct decode_helper<T, D, false>
struct decode_before_scramble_helper
{
using scalar_type = typename vector_traits<T>::scalar_type;
using fp_type = typename float_of_size<sizeof(scalar_type)>::type;
using uvec_type = vector<scalar_type, D>;
using uvec_type = vector<uint32_t, D>;
using sequence_type = QuantizedSequence<T, D>;
using return_type = vector<fp_type, D>;
NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value;
using return_type = vector<float32_t, D>;
NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = unorm_constant<float,8u*sizeof(scalar_type)>::value;

static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey)
{
uvec_type seqVal;
NBL_UNROLL for(uint16_t i = 0; i < D; i++)
seqVal[i] = val.get(i) ^ scrambleKey[i];
return return_type(seqVal) * bit_cast<fp_type>(UNormConstant);
seqVal[i] = val.get(i);
seqVal ^= scrambleKey;
return return_type(seqVal) * bit_cast<float_of_size_t<sizeof(scalar_type)> >(UNormConstant);
}
};
template<typename T, uint16_t D>
struct decode_helper<T, D, true>
Copy link
Member

@devshgraphicsprogramming devshgraphicsprogramming Jan 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

its confusing and digusting that they're templated on T,D which are storage type and dimensions, it would be better to template them on just the QuantizedSequence and return type' scalar type F

Also the scalar_type alias here is a complete misnomer

Comment on lines +100 to 106
template<typename T> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T>)
struct QuantizedSequence<T, 1 NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T>) >
{
using store_type = T;
NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(store_type)>::value;
NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = 8u*size_of_v<store_type>;

store_type get(const uint16_t idx) { assert(idx > 0 && idx < 1); return data; }
Copy link
Member

@devshgraphicsprogramming devshgraphicsprogramming Jan 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make all QuantizedSequence have an create for consistency

Also have a

template<typename unorm_t> requires (sizeof(unorm_t)*8>=BitsPerComponent) encode(const vector<unorm_t,D>)

which assumes you're feeding full range sizeof(unorm_t)*8-bit random values and pre-shifts right by sizeof(unorm_t)*8-BitsPerComponent (to remove extra LSB bits) before calling create so this treatment is not buried somewhere in your C++ code which fills the sequence

Comment on lines +122 to +123
QuantizedSequence<T, Dim> seq;
NBL_UNROLL for (uint16_t i = 0; i < Dim; i++)
Copy link
Member

@devshgraphicsprogramming devshgraphicsprogramming Jan 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you forgot to set seq.data = 0 first, you have UB (compiler will OpUndef assign and then any expression involving this will get optimized away - source of bugs)

Comment on lines +169 to +171
QuantizedSequence<T, Dim> seq;
NBL_UNROLL for (uint16_t i = 0; i < Dim; i++)
seq.set(i, value[i]);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same stuff about intializing to 0

Comment on lines +218 to +221
QuantizedSequence<T, Dim> seq;
NBL_UNROLL for (uint16_t i = 0; i < Dim; i++)
seq.set(i, value[i]);
return seq;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

again, initialize to 0

Comment on lines +255 to +258
// uint32_t4; Dim=3 --> returns uint32_t2 - 42 bits per component: 32 in x, 10 in y
// use uint32_t2 instead of uint16_t4
template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 4 && vector_traits<T>::Dimension == 4 && Dim == 3)
struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 4 && vector_traits<T>::Dimension == 4 && Dim == 3) >

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove this for now, because it would require a decode of the sequence into float64_t to use for anything

struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits<T>::Dimension == 2 && Dim == 4) >
// uint16_t2; Dim=4 -- should use uint16_t4 instead of uint32_t2
template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 2 && vector_traits<T>::Dimension == 2 && Dim == 4)
struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 2 && vector_traits<T>::Dimension == 2 && Dim == 4) >

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the specializations only meant to match uint16_t2 should just check with is_same_v<T,uint16_t2>

Comment on lines +157 to +159
// uint32_t2; Dim=3 -- should never use uint16_t2 instead of uint32_t
template<typename T, uint16_t Dim> NBL_PARTIAL_REQ_TOP(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 4 && vector_traits<T>::Dimension == 2 && Dim == 3)
struct QuantizedSequence<T, Dim NBL_PARTIAL_REQ_BOT(impl::SequenceSpecialization<T> && size_of_v<typename vector_traits<T>::scalar_type> == 4 && vector_traits<T>::Dimension == 2 && Dim == 3) >

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Keep it Super Simple with is_same_v<T,uint32_t2> && Dim==3

Comment on lines +227 to +240
if (idx >= 0 && idx < 2) // x y
{
return glsl::bitfieldExtract(data[0], BitsPerComponent * idx, BitsPerComponent);
}
else // z w
{
return glsl::bitfieldExtract(data[1], BitsPerComponent * (idx & uint16_t(1u)), BitsPerComponent);
}
}

void set(const uint16_t idx, const scalar_type value)
{
assert(idx >= 0 && idx < 4);
const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u);
const uint16_t odd = idx & uint16_t(1u);
data[i] &= hlsl::mix(~Mask, Mask, bool(odd));
data[i] |= ((value >> DiscardBits) & Mask) << (BitsPerComponent * odd);
if (idx >= 0 && idx < 2) // x y

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you're asserting idx>=0 you don't need to check it in the if-statement

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants