From d614df650074644a03bf16ae611fcf1b7be62baf Mon Sep 17 00:00:00 2001
From: "d.savchenkov" <d.savchenkov@partner.samsung.com>
Date: Mon, 16 Feb 2026 17:28:22 +0300
Subject: [PATCH] [quantization] Introduce wrapper for
 Qwen3VLVisionRotaryEmbedding

This change introduces QuantQwen3VLVisionRotaryEmbedding wrapper to support post-training quantization of Qwen3VLVisionRotaryEmbedding module.

TICO-DCO-1.0-Signed-off-by: d.savchenkov <d.savchenkov@partner.samsung.com>
---
 .../test_quant_vision_rotary_embedding.py     | 224 ++++++++++++++++++
 .../quantize_qwen_vision_rotary_embedding.py  |  95 ++++++++
 .../qwen_vl/quant_vision_rotary_embedding.py  |  84 +++++++
 tico/quantization/wrapq/wrappers/registry.py  |   1 +
 4 files changed, 404 insertions(+)
 create mode 100644 test/quantization/wrapq/wrappers/qwen_vl/test_quant_vision_rotary_embedding.py
 create mode 100644 tico/quantization/wrapq/examples/qwen/quantize_qwen_vision_rotary_embedding.py
 create mode 100644 tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_rotary_embedding.py

diff --git a/test/quantization/wrapq/wrappers/qwen_vl/test_quant_vision_rotary_embedding.py b/test/quantization/wrapq/wrappers/qwen_vl/test_quant_vision_rotary_embedding.py
new file mode 100644
index 00000000..52f011e0
--- /dev/null
+++ b/test/quantization/wrapq/wrappers/qwen_vl/test_quant_vision_rotary_embedding.py
@@ -0,0 +1,224 @@
+# Copyright (c) 2026 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import importlib.util
+import unittest
+
+import torch
+from tico.quantization.config.ptq import PTQConfig
+from tico.quantization.wrapq.dtypes import DType
+from tico.quantization.wrapq.mode import Mode
+from tico.quantization.wrapq.wrappers.qwen_vl.quant_vision_rotary_embedding import (
+    QuantQwen3VLVisionRotaryEmbedding,
+)
+
+
+trans_spec = importlib.util.find_spec("transformers")
+skip_msg = "transformers not installed — skipping Qwen3VLVisionRotaryEmbedding tests"
+
+
+@unittest.skipUnless(trans_spec, skip_msg)
+class TestQuantQwen3VLVisionRotaryEmbedding(unittest.TestCase):
+    fp_rope: torch.nn.Module
+    dim: int
+    theta: float
+
+    @classmethod
+    def setUpClass(cls):
+        from transformers.models.qwen3_vl.modeling_qwen3_vl import (
+            Qwen3VLVisionRotaryEmbedding,
+        )
+
+        # Use smaller dim for testing (typically 128 for head_dim=64)
+        cls.fp_rope = Qwen3VLVisionRotaryEmbedding(dim=64)
+        cls.dim = 64
+        cls.theta = 10000.0
+
+    def test_mode_transitions(self):
+        """Test quantization mode transitions: NO_QUANT → CALIB → QUANT"""
+        q_rope = QuantQwen3VLVisionRotaryEmbedding(self.fp_rope)
+        self.assertIs(q_rope._mode, Mode.NO_QUANT)
+
+        q_rope.enable_calibration()
+        self.assertIs(q_rope._mode, Mode.CALIB)
+
+        # Run forward pass during calibration
+        seqlen = 128
+        _ = q_rope(seqlen)
+
+        q_rope.freeze_qparams()
+        self.assertIs(q_rope._mode, Mode.QUANT)
+
+    def test_quantised_output_close(self):
+        """
+        Test that quantized output is acceptably close to FP32 reference.
+        """
+        torch.manual_seed(42)
+        q_rope = QuantQwen3VLVisionRotaryEmbedding(self.fp_rope)
+        q_rope.enable_calibration()
+
+        # Calibrate with different sequence lengths
+        for seqlen in [64, 128, 256]:
+            _ = q_rope(seqlen)
+
+        q_rope.freeze_qparams()
+
+        seqlen = 128
+        with torch.no_grad():
+            q_out = q_rope(seqlen)
+            fp_out = self.fp_rope(seqlen)
+
+        diff = (fp_out - q_out).abs().mean().item()
+        self.assertGreater(diff, 0.0)  # not identical
+        self.assertLess(diff, 0.4)  # acceptably close
+        self.assertEqual(fp_out.shape, q_out.shape)
+
+    def test_output_shape(self):
+        """
+        Test that output shape is correct: (seqlen, dim/2)
+        """
+        q_rope = QuantQwen3VLVisionRotaryEmbedding(self.fp_rope)
+        q_rope.enable_calibration()
+
+        for seqlen in [64, 128, 256]:
+            q_rope.enable_calibration()
+            _ = q_rope(seqlen)
+
+        q_rope.freeze_qparams()
+
+        seqlen = 128
+        with torch.no_grad():
+            q_out = q_rope(seqlen)
+
+        expected_shape = (seqlen, self.dim // 2)
+        self.assertEqual(q_out.shape, expected_shape)
+
+    def test_different_sequence_lengths(self):
+        """
+        Test that quantization works correctly with different sequence lengths.
+        """
+        q_rope = QuantQwen3VLVisionRotaryEmbedding(self.fp_rope)
+        q_rope.enable_calibration()
+
+        # Calibrate with one length
+        for _ in range(3):
+            _ = q_rope(256)
+
+        q_rope.freeze_qparams()
+
+        # Test with different lengths
+        for seqlen in [2, 4, 8, 16, 32, 64, 128, 256]:
+            with torch.no_grad():
+                q_out = q_rope(seqlen)
+                fp_out = self.fp_rope(seqlen)
+
+            diff = (fp_out - q_out).abs().mean().item()
+            self.assertLess(diff, 0.4)
+            self.assertEqual(q_out.shape[0], seqlen)
+            self.assertEqual(q_out.shape[1], self.dim // 2)
+
+    def test_dtype_override(self):
+        """
+        PTQConfig overrides should affect the output observer.
+        """
+        cfg = PTQConfig(
+            default_dtype=DType.uint(8),
+            overrides={
+                "output": {"dtype": DType.uint(4)},
+            },
+        )
+        q_rope = QuantQwen3VLVisionRotaryEmbedding(self.fp_rope, qcfg=cfg)
+
+        self.assertEqual(q_rope.obs_output.dtype, DType.uint(4))
+
+    def test_activation_stats_collected(self):
+        """
+        Test that activation statistics are properly collected during calibration.
+        """
+        q_rope = QuantQwen3VLVisionRotaryEmbedding(self.fp_rope)
+        q_rope.enable_calibration()
+
+        # Run forward pass to collect stats
+        seqlen = 128
+        _ = q_rope(seqlen)
+
+        # Check that observer has collected stats
+        self.assertTrue(q_rope.obs_output.min_val.numel() > 0)
+
+        # Freeze and check qparams exist
+        q_rope.freeze_qparams()
+        self.assertTrue(q_rope.obs_output.has_qparams)
+
+    def test_observer_count(self):
+        """
+        Test that the wrapper has the correct number of observers.
+        Only 1 observer (output) since there are no learnable parameters.
+        """
+        q_rope = QuantQwen3VLVisionRotaryEmbedding(self.fp_rope)
+
+        observers = list(q_rope._all_observers())
+        self.assertEqual(len(observers), 1)
+
+    def test_registration_in_registry(self):
+        """
+        Test that Qwen3VLVisionRotaryEmbedding is properly registered.
+        """
+        from tico.quantization.wrapq.wrappers.qwen_vl.quant_vision_rotary_embedding import (
+            QuantQwen3VLVisionRotaryEmbedding,
+        )
+        from tico.quantization.wrapq.wrappers.registry import lookup
+        from transformers.models.qwen3_vl.modeling_qwen3_vl import (
+            Qwen3VLVisionRotaryEmbedding,
+        )
+
+        wrapper_cls = lookup(Qwen3VLVisionRotaryEmbedding)
+        self.assertIs(wrapper_cls, QuantQwen3VLVisionRotaryEmbedding)
+
+    def test_no_learnable_parameters(self):
+        """
+        Test that the wrapper has no learnable parameters (only buffers).
+        """
+        q_rope = QuantQwen3VLVisionRotaryEmbedding(self.fp_rope)
+
+        # Check that there are no parameters
+        params = list(q_rope.parameters())
+        self.assertEqual(len(params), 0)
+
+        # Check that inv_freq is a buffer, not a parameter
+        self.assertIsInstance(q_rope.inv_freq, torch.Tensor)
+        self.assertIn("inv_freq", q_rope._buffers)
+
+    def test_frequency_values_correct(self):
+        """
+        Test that the computed frequency values are mathematically correct.
+        Formula: freqs[i, j] = i * theta^(-2j/dim)
+        """
+        q_rope = QuantQwen3VLVisionRotaryEmbedding(self.fp_rope)
+        q_rope.enable_calibration()
+        q_rope.freeze_qparams()
+
+        seqlen = 4
+        with torch.no_grad():
+            freqs = q_rope(seqlen)
+
+        # Manually compute expected values
+        expected = torch.outer(
+            torch.arange(seqlen, dtype=torch.float32),
+            self.fp_rope.inv_freq,
+        )
+
+        # The quantized output should still have the same pattern
+        # (quantization changes precision but not the mathematical relationship)
+        torch.testing.assert_close(freqs.shape, expected.shape)
+        self.assertEqual(freqs.shape, (seqlen, self.dim // 2))
diff --git a/tico/quantization/wrapq/examples/qwen/quantize_qwen_vision_rotary_embedding.py b/tico/quantization/wrapq/examples/qwen/quantize_qwen_vision_rotary_embedding.py
new file mode 100644
index 00000000..63a27600
--- /dev/null
+++ b/tico/quantization/wrapq/examples/qwen/quantize_qwen_vision_rotary_embedding.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+# Copyright (c) 2026 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import importlib.util
+import sys
+
+import torch
+import torch.nn as nn
+
+import tico
+import tico.quantization
+import tico.quantization.config.ptq
+
+# Check if transformers is available
+trans_spec = importlib.util.find_spec("transformers")
+if trans_spec is None:
+    print(
+        "Error: transformers package not installed. Cannot test Qwen3VLVisionRotaryEmbedding."
+    )
+    sys.exit(1)
+
+from transformers.models.qwen3_vl.modeling_qwen3_vl import Qwen3VLVisionRotaryEmbedding
+
+
+def generate_calibration_data(batch_size: int, sequence_lengths: list) -> list:
+    """Generate calibration data for PTQ"""
+    calibration_data = []
+    for _ in range(batch_size):
+        for seqlen in sequence_lengths:
+            calibration_data.append(seqlen)
+    return calibration_data
+
+
+def main():
+    # Create the vision rotary embedding model
+    # dim=128 is typical for head_dim=64 in Qwen3-VL
+    dim = 128
+    theta = 10000.0
+    model = Qwen3VLVisionRotaryEmbedding(dim=dim, theta=theta)
+    model.eval()
+
+    # Qwen3VLVisionRotaryEmbedding(
+    #   (inv_freq): Buffer [64]  # dim/2 frequency bands
+    # )
+    assert model.dim == dim
+    assert model.theta == theta
+    assert model.inv_freq.shape == (dim // 2,)
+
+    # Generate calibration data
+    # Calibrate with various sequence lengths to capture full dynamic range
+    calibration_data = generate_calibration_data(
+        batch_size=20, sequence_lengths=[64, 128, 256, 512]
+    )
+
+    # Configure PTQ
+    ptq_config = tico.quantization.config.ptq.PTQConfig()
+
+    # Prepare the model for quantization
+    prepared_model = tico.quantization.prepare(
+        model, ptq_config, inplace=True  # Transform the model in place
+    )
+
+    # Calibrate the model (collect statistics)
+    with torch.no_grad():
+        for i, seqlen in enumerate(calibration_data):
+            _ = prepared_model(seqlen)
+
+    # Convert to quantized model
+    quantized_model = tico.quantization.convert(prepared_model, inplace=True)
+
+    # Convert to Circle format
+    # example_inputs: seqlen as an integer
+    example_seqlen = 256
+    circle_model = tico.convert(quantized_model, (example_seqlen,))
+
+    # Save the Circle model
+    filename = "quantized_vision_rotary_embedding.circle"
+    circle_model.save(filename)
+    print(f"Circle model saved as '{filename}'")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_rotary_embedding.py b/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_rotary_embedding.py
new file mode 100644
index 00000000..301bf15e
--- /dev/null
+++ b/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_rotary_embedding.py
@@ -0,0 +1,84 @@
+# Copyright (c) 2026 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional
+
+import torch
+import torch.nn as nn
+
+from tico.quantization.config.ptq import PTQConfig
+from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
+from tico.quantization.wrapq.wrappers.registry import try_register
+
+
+@try_register(
+    "transformers.models.qwen3_vl.modeling_qwen3_vl.Qwen3VLVisionRotaryEmbedding",
+)
+class QuantQwen3VLVisionRotaryEmbedding(QuantModuleBase):
+    """
+    Quantization wrapper for Qwen3VLVisionRotaryEmbedding module.
+
+    This module generates rotary positional embedding frequencies for vision tokens.
+    Since it has no learnable parameters (only a constant buffer), only the
+    output activation is quantized.
+    """
+
+    def __init__(
+        self,
+        fp_rope: nn.Module,
+        *,
+        qcfg: Optional[PTQConfig] = None,
+        fp_name: Optional[str] = None,
+    ):
+        super().__init__(qcfg, fp_name=fp_name)
+
+        assert hasattr(fp_rope, "dim")
+        assert hasattr(fp_rope, "theta")
+        assert hasattr(fp_rope, "inv_freq")
+
+        self.dim = fp_rope.dim
+        self.theta = fp_rope.theta
+
+        # Copy the inv_freq buffer to the wrapper
+        self.register_buffer("inv_freq", fp_rope.inv_freq.clone())
+
+        # Observer for output activation (rotary frequencies)
+        self.obs_output = self._make_obs("output")
+
+    def forward(self, seqlen: int) -> torch.Tensor:
+        """
+        Forward pass with fake quantization.
+
+        Args:
+            seqlen: Sequence length (number of positions/tokens)
+
+        Returns:
+            Rotary frequencies of shape (seqlen, dim/2)
+        """
+        # Compute sequence tensor
+        seq = torch.arange(
+            seqlen, device=self.inv_freq.device, dtype=self.inv_freq.dtype
+        )
+
+        # Compute outer product (frequencies)
+        freqs = torch.outer(seq, self.inv_freq)
+
+        # Quantize output activation
+        freqs = self._fq(freqs, self.obs_output)
+
+        return freqs
+
+    def _all_observers(self):
+        """Yield the output observer."""
+        yield self.obs_output
diff --git a/tico/quantization/wrapq/wrappers/registry.py b/tico/quantization/wrapq/wrappers/registry.py
index 8eb896bd..aab4ff88 100644
--- a/tico/quantization/wrapq/wrappers/registry.py
+++ b/tico/quantization/wrapq/wrappers/registry.py
@@ -45,6 +45,7 @@
     "tico.quantization.wrapq.wrappers.qwen_vl.quant_text_attn",
     "tico.quantization.wrapq.wrappers.qwen_vl.quant_vision_mlp",
     "tico.quantization.wrapq.wrappers.qwen_vl.quant_vision_patch_embed",
+    "tico.quantization.wrapq.wrappers.qwen_vl.quant_vision_rotary_embedding",
     # add future core wrappers here
 )