Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
# Copyright (c) 2026 Samsung Electronics Co., Ltd. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import importlib.util
import unittest

import torch
from tico.quantization.config.ptq import PTQConfig
from tico.quantization.wrapq.dtypes import DType
from tico.quantization.wrapq.mode import Mode
from tico.quantization.wrapq.wrappers.nn.quant_conv3d import QuantConv3d
from tico.quantization.wrapq.wrappers.qwen_vl.quant_vision_patch_embed import (
QuantQwen3VLVisionPatchEmbed,
)


trans_spec = importlib.util.find_spec("transformers")
skip_msg = "transformers not installed — skipping Qwen3VLVisionPatchEmbed tests"


@unittest.skipUnless(trans_spec, skip_msg)
class TestQuantQwen3VLVisionPatchEmbed(unittest.TestCase):
fp_patch_embed: torch.nn.Module
hidden_size: int

@classmethod
def setUpClass(cls):
from transformers.models.qwen3_vl.configuration_qwen3_vl import (
Qwen3VLVisionConfig,
)
from transformers.models.qwen3_vl.modeling_qwen3_vl import (
Qwen3VLVisionPatchEmbed,
)

cfg = Qwen3VLVisionConfig(
hidden_size=64, # Smaller for testing
spatial_merge_size=2,
temporal_merge_size=2,
)

cls.fp_patch_embed = Qwen3VLVisionPatchEmbed(cfg)
cls.hidden_size = cfg.hidden_size

def test_mode_transitions(self):
"""Test quantization mode transitions: NO_QUANT → CALIB → QUANT"""
q_patch = QuantQwen3VLVisionPatchEmbed(self.fp_patch_embed)
self.assertIs(q_patch._mode, Mode.NO_QUANT)

q_patch.enable_calibration()
self.assertIs(q_patch._mode, Mode.CALIB)

# Run forward pass during calibration
x = torch.randn(2, 3, 4, 32, 32)
_ = q_patch(x)

q_patch.freeze_qparams()
self.assertIs(q_patch._mode, Mode.QUANT)

def test_forward_diff(self):
"""
Test that quantized output is acceptably close to FP32 reference.
After calibration and freeze, quantized output should:
- Differ from FP reference (quantization actually applied)
- Stay within reasonable error bounds
"""
q_patch = QuantQwen3VLVisionPatchEmbed(self.fp_patch_embed)
q_patch.enable_calibration()

# Calibrate with multiple inputs
for _ in range(4):
x = torch.randn(2, 3, 4, 32, 32)
_ = q_patch(x)

q_patch.freeze_qparams()

x = torch.randn(2, 3, 4, 32, 32)
with torch.no_grad():
q_out = q_patch(x)
fp_out = self.fp_patch_embed(x)

diff = (fp_out - q_out).abs().mean().item()
self.assertGreater(diff, 0.0) # not identical
self.assertLess(diff, 0.4) # acceptably close
self.assertEqual(fp_out.shape, q_out.shape)

def test_proj_override(self):
"""
PTQConfig overrides should propagate to the wrapped Conv3d layer.
"""
cfg = PTQConfig(
default_dtype=DType.uint(8),
overrides={
"proj": {
"weight": {"dtype": DType.uint(4)},
"act_in": {"dtype": DType.uint(4)},
"act_out": {"dtype": DType.uint(4)},
}
},
)
q_patch = QuantQwen3VLVisionPatchEmbed(self.fp_patch_embed, qcfg=cfg)
q_conv3d = q_patch.proj.wrapped

self.assertIsInstance(q_conv3d, QuantConv3d)
self.assertEqual(q_conv3d.obs_weight.dtype, DType.uint(4))
self.assertEqual(q_conv3d.obs_act_in.dtype, DType.uint(4))
self.assertEqual(q_conv3d.obs_act_out.dtype, DType.uint(4))

def test_activation_stats_collected(self):
"""
Test that activation statistics are properly collected during calibration.
Both local observers and wrapped Conv3d observers should collect stats.
"""
q_patch = QuantQwen3VLVisionPatchEmbed(self.fp_patch_embed)
q_patch.enable_calibration()

# Run forward pass to collect stats
x = torch.randn(2, 3, 4, 32, 32)
_ = q_patch(x)

# Check that local observers have collected stats
self.assertTrue(q_patch.obs_hidden.min_val.numel() > 0)
self.assertTrue(q_patch.obs_output.min_val.numel() > 0)

# Check that wrapped Conv3d observers have collected stats
q_conv3d = q_patch.proj.wrapped
self.assertTrue(q_conv3d.obs_act_in.min_val.numel() > 0)
self.assertTrue(q_conv3d.obs_act_out.min_val.numel() > 0)
self.assertTrue(q_conv3d.obs_weight.min_val.numel() > 0)

# Freeze and check qparams exist
q_patch.freeze_qparams()
self.assertTrue(q_patch.obs_hidden.has_qparams)
self.assertTrue(q_patch.obs_output.has_qparams)
self.assertTrue(q_conv3d.obs_act_in.has_qparams)
self.assertTrue(q_conv3d.obs_act_out.has_qparams)
self.assertTrue(q_conv3d.obs_weight.has_qparams)

def test_observer_count(self):
"""
Test that the wrapper has the correct number of observers.
- 2 local observers (obs_hidden, obs_output)
- 3 observers from wrapped Conv3d (obs_weight, obs_act_in, obs_act_out)
"""
q_patch = QuantQwen3VLVisionPatchEmbed(self.fp_patch_embed)

observers = list(q_patch._all_observers())
self.assertEqual(len(observers), 5) # 2 local + 3 from Conv3d

def test_registration_in_registry(self):
"""
Test that Qwen3VLVisionPatchEmbed is properly registered in the wrapper registry.
"""
from tico.quantization.wrapq.wrappers.qwen_vl.quant_vision_patch_embed import (
QuantQwen3VLVisionPatchEmbed,
)
from tico.quantization.wrapq.wrappers.registry import lookup
from transformers.models.qwen3_vl.modeling_qwen3_vl import (
Qwen3VLVisionPatchEmbed,
)

# Verify Qwen3VLVisionPatchEmbed maps to QuantQwen3VLVisionPatchEmbed
wrapper_cls = lookup(Qwen3VLVisionPatchEmbed)
self.assertIs(wrapper_cls, QuantQwen3VLVisionPatchEmbed)

def test_output_shape(self):
"""Test that output shape is correct after patch embedding."""
q_patch = QuantQwen3VLVisionPatchEmbed(self.fp_patch_embed)
q_patch.enable_calibration()

x = torch.randn(2, 3, 4, 32, 32)
_ = q_patch(x)

q_patch.freeze_qparams()

with torch.no_grad():
q_out = q_patch(x)
fp_out = self.fp_patch_embed(x)

self.assertEqual(q_out.shape, fp_out.shape)

def test_multiple_calibration_steps(self):
"""
Test that running multiple calibration iterations works correctly.
Statistics should be accumulated across multiple forward passes.
"""
q_patch = QuantQwen3VLVisionPatchEmbed(self.fp_patch_embed)
q_patch.enable_calibration()

# Run multiple calibration steps
for i in range(5):
x = torch.randn(2, 3, 4, 32, 32)
_ = q_patch(x)

q_patch.freeze_qparams()

# Verify that all observers have quantization parameters
self.assertTrue(q_patch.obs_hidden.has_qparams)
self.assertTrue(q_patch.obs_output.has_qparams)
self.assertTrue(q_patch.proj.wrapped.obs_act_in.has_qparams)
self.assertTrue(q_patch.proj.wrapped.obs_act_out.has_qparams)
self.assertTrue(q_patch.proj.wrapped.obs_weight.has_qparams)

def test_different_batch_sizes(self):
"""
Test that quantization works correctly with different batch sizes.
"""
q_patch = QuantQwen3VLVisionPatchEmbed(self.fp_patch_embed)
q_patch.enable_calibration()

# Calibrate with one batch size
calibrate_batch = torch.randn(2, 3, 4, 32, 32)
for _ in range(3):
_ = q_patch(calibrate_batch)
q_patch.freeze_qparams()

# Test with different batch sizes
for batch_size in [1, 2, 4]:
x = torch.randn(batch_size, 3, 4, 32, 32)
with torch.no_grad():
q_out = q_patch(x)
fp_out = self.fp_patch_embed(x)

self.assertEqual(q_out.shape, fp_out.shape)
diff = (fp_out - q_out).abs().mean().item()
self.assertLess(diff, 0.4)
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#!/usr/bin/env python3
# Copyright (c) 2026 Samsung Electronics Co., Ltd. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import importlib.util
import sys

import torch
import torch.nn as nn

import tico
import tico.quantization
import tico.quantization.config.ptq

# Check if transformers is available
trans_spec = importlib.util.find_spec("transformers")
if trans_spec is None:
print(
"Error: transformers package not installed. Cannot test Qwen3VLVisionPatchEmbed."
)
sys.exit(1)

from transformers.models.qwen3_vl.configuration_qwen3_vl import Qwen3VLVisionConfig
from transformers.models.qwen3_vl.modeling_qwen3_vl import Qwen3VLVisionPatchEmbed


def generate_calibration_data(batch_size: int, sample_shape) -> list:
"""Generate calibration data for PTQ"""
calibration_data = []
for i in range(batch_size):
x = torch.randn(sample_shape)
calibration_data.append(x)
return calibration_data


def main():
# Create the vision patch embed model
cfg = Qwen3VLVisionConfig(
in_channels=3,
hidden_size=1024,
temporal_merge_size=2,
patch_size=16,
)
model = Qwen3VLVisionPatchEmbed(cfg)
Comment on lines 50 to 56
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Just to note) Oh...? This model looks a bit different from my vision patch embed. Maybe because spacial_merge_size ..

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dayo09 How different is it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mhs4670go I cannot attach image files here, see here

Copy link
Contributor

@dayo09 dayo09 Feb 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dvsav

As below are our target configuration for this layer, could you use this?

Qwen3VLVisionPatchEmbed(
  (proj): Conv3d(3, 1024, kernel_size=(2, 16, 16), stride=(2, 16, 16))
)

'args': ('Tensor(shape=[468, 1536], dtype=torch.float32)',)

The reason why is that, your current example generates some float32 ADD operator remains. (See #489 for details)
We are planning to lower above specific Conv3d operator into Conv2d+Reshape (@llFreetimell is working on it). Above specifics are derived from a use case scenario (which is not 100% fixed for now, though).
Thus, it would be good to provide quantization example with above version.

(+ Do you have any specific reason to decide your configuration of this Qwen3VLVisionPatchEmbed?)

Copy link
Contributor Author

@dvsav dvsav Feb 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As below are our target configuration for this layer, could you use this?

@dayo09 👍 Thanks for noticing this! I've changed the code of example and added assertions checking that Conv3d has the right configuration.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dvsav Well, after applying the config, the graph remains the same. (I am sorry that I cannot show you the image. I am not yet permitted to upload image, I will process that soon to alleviate your inconvenience)

Convolution's weight is lifted up as constant input and not constant-folded. I believe constant folding after quantization is required in this case. 😅

model.eval()

# Qwen3VLVisionPatchEmbed(
# (proj): Conv3d(3, 1024, kernel_size=(2, 16, 16), stride=(2, 16, 16))
# )
assert model.proj.in_channels == 3
assert model.proj.out_channels == 1024
assert model.proj.kernel_size == (2, 16, 16)
assert model.proj.stride == (2, 16, 16)

# Generate calibration data
# Input shape: (batch_size, in_channels, depth, height, width)
# Example: (2, 3, 8, 224, 224) - 2 videos, RGB, 8 frames, 224x224 resolution
calibration_data = generate_calibration_data(
batch_size=20, sample_shape=(2, 3, 8, 224, 224)
)

# Configure PTQ
ptq_config = tico.quantization.config.ptq.PTQConfig()

# Prepare the model for quantization
prepared_model = tico.quantization.prepare(
model, ptq_config, inplace=True # Transform the model in place
)

# Calibrate the model (collect statistics)
with torch.no_grad():
for i, batch in enumerate(calibration_data):
prepared_model(batch)

# Convert to quantized model
quantized_model = tico.quantization.convert(prepared_model, inplace=True)

# Convert to Circle format
# example_inputs shape: (batch_size, in_channels, depth, height, width)
example_inputs = (torch.randn(2, 3, 8, 224, 224),)
circle_model = tico.convert(quantized_model, example_inputs)

# Save the Circle model
filename = "quantized_vision_patch_embed.circle"
circle_model.save(filename)
print(f"Circle model saved as '{filename}'")


if __name__ == "__main__":
main()
Loading