From ec90278c162e53526d3c9cb6023dad08c5a40996 Mon Sep 17 00:00:00 2001
From: "s.malakhov" <s.malakhov@partner.samsung.com>
Date: Wed, 11 Feb 2026 11:14:46 +0300
Subject: [PATCH 1/4] ssfdf

---
 .../wrappers/qwen_vl/test_quant_vision_mlp.py | 84 ++++++++++++++++
 .../evaluation/script/mini_vqa_eval.py        |  5 +-
 .../examples/quantize_qwen_vision_mlp.py      | 95 +++++++++++++++++++
 .../wrappers/qwen_vl/quant_vision_mlp.py      | 93 ++++++++++++++++++
 tico/quantization/wrapq/wrappers/registry.py  |  1 +
 5 files changed, 277 insertions(+), 1 deletion(-)
 create mode 100644 test/quantization/wrapq/wrappers/qwen_vl/test_quant_vision_mlp.py
 create mode 100644 tico/quantization/wrapq/examples/quantize_qwen_vision_mlp.py
 create mode 100644 tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_mlp.py

diff --git a/test/quantization/wrapq/wrappers/qwen_vl/test_quant_vision_mlp.py b/test/quantization/wrapq/wrappers/qwen_vl/test_quant_vision_mlp.py
new file mode 100644
index 00000000..7bd0ca8e
--- /dev/null
+++ b/test/quantization/wrapq/wrappers/qwen_vl/test_quant_vision_mlp.py
@@ -0,0 +1,84 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pathlib
+import tempfile
+import unittest
+import warnings
+
+import tico
+
+import torch
+from tico.quantization.wrapq.mode import Mode
+from tico.quantization.wrapq.wrappers.qwen_vl.quant_vision_mlp import QuantQwen3VLVisionMLP
+from transformers.activations import GELUTanh
+
+class DummyMLP(torch.nn.Module):
+    """Tiny stand-in for HF LlamaMLP (hidden=4, inter=8)."""
+
+    def __init__(self):
+        super().__init__()
+        self.linear_fc1 = torch.nn.Linear(4, 8)
+        self.linear_fc2 = torch.nn.Linear(8, 4)
+        self.act_fn = GELUTanh() #torch.nn.SiLU()
+
+    def forward(self, x):
+        return self.linear_fc2(self.act_fn(self.linear_fc1(x)))
+
+
+class TestQuantQwenVisionMLP(unittest.TestCase):
+    def setUp(self):
+        torch.manual_seed(0)
+        self.fp32 = DummyMLP()
+        self.quant = QuantQwen3VLVisionMLP(self.fp32)
+        self.x = torch.randn(32, 4)
+
+    def test_mode_and_forward(self):
+        # calibration
+        self.quant.enable_calibration()
+        _ = self.quant(self.x)
+        self.quant.freeze_qparams()
+        self.assertIs(self.quant._mode, Mode.QUANT)
+
+        # forward diff
+        with torch.no_grad():
+            q = self.quant(self.x)
+            f = self.fp32(self.x)
+        diff = (q - f).abs().mean().item()
+        self.assertLess(diff, 0.7)  # loose bound
+        self.assertGreater(diff, 0.0)
+
+
+class TestSubgraphExport(unittest.TestCase):
+    def setUp(self):
+        torch.manual_seed(0)
+        self.mlp_int8 = QuantQwen3VLVisionMLP(DummyMLP()).eval()
+        self.x = torch.randn(16, 4)
+
+    def test_calib_quant_export(self):
+        # calib
+        self.mlp_int8.enable_calibration()
+        _ = self.mlp_int8(self.x)
+        self.mlp_int8.freeze_qparams()
+
+        self.assertIs(self.mlp_int8._mode, Mode.QUANT)
+
+        # export
+        with tempfile.TemporaryDirectory() as td:
+            path = pathlib.Path(td) / "mlp.circle"
+            with warnings.catch_warnings():
+                warnings.filterwarnings("ignore", category=UserWarning)
+                exported = tico.convert(self.mlp_int8, (self.x[:1],))
+            exported.save(path)
+            self.assertTrue(path.exists())
diff --git a/tico/quantization/evaluation/script/mini_vqa_eval.py b/tico/quantization/evaluation/script/mini_vqa_eval.py
index e015658b..4c4b7d9e 100644
--- a/tico/quantization/evaluation/script/mini_vqa_eval.py
+++ b/tico/quantization/evaluation/script/mini_vqa_eval.py
@@ -231,6 +231,8 @@ def main():
         default="bfloat16",
         choices=["float16", "bfloat16", "float32"],
     )
+    ap.add_argument("--cache_dir", type=str, default="cpu")
+    
     args = ap.parse_args()
 
     # Reproducibility
@@ -270,11 +272,12 @@ def main():
     torch_dtype = dtype_map[args.dtype]
 
     # Load model and processor
-    processor = AutoProcessor.from_pretrained(args.model_id, trust_remote_code=True)
+    processor = AutoProcessor.from_pretrained(args.model_id, trust_remote_code=True, cache_dir=args.cache_dir)
     model = AutoModelForVision2Seq.from_pretrained(
         args.model_id,
         torch_dtype=torch_dtype,
         trust_remote_code=True,
+        cache_dir=args.cache_dir,
     ).to(args.device)
     model.eval()
 
diff --git a/tico/quantization/wrapq/examples/quantize_qwen_vision_mlp.py b/tico/quantization/wrapq/examples/quantize_qwen_vision_mlp.py
new file mode 100644
index 00000000..3ad5ca0c
--- /dev/null
+++ b/tico/quantization/wrapq/examples/quantize_qwen_vision_mlp.py
@@ -0,0 +1,95 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pathlib
+
+import torch
+from transformers import AutoModelForVision2Seq
+
+from tico.quantization import convert, prepare
+from tico.quantization.config.ptq import PTQConfig
+from tico.quantization.evaluation.metric import compute_peir
+from tico.quantization.evaluation.utils import plot_two_outputs
+from tico.quantization.wrapq.mode import Mode
+from tico.quantization.wrapq.wrappers.qwen_vl.quant_vision_mlp import (
+    QuantQwen3VLVisionMLP,
+)
+from tico.utils.utils import SuppressWarning
+
+# -------------------------------------------------------------------------
+# 0. Load a Qwen3-VL model (text tower) + tokenizer
+# -------------------------------------------------------------------------
+name = "Qwen/Qwen3-VL-2B-Instruct"
+model = AutoModelForVision2Seq.from_pretrained(
+    name,
+    device_map="cpu",
+    trust_remote_code=True,
+    cache_dir="/mnt/storage/transformers_cache"
+)
+model.eval()
+
+# -------------------------------------------------------------------------
+# 1. Replace layer-0’s mlp with QuantQwen3VLVisionMLP
+# -------------------------------------------------------------------------
+orig_mlp = model.model.visual.blocks[0].mlp
+mlp_q = prepare(orig_mlp, PTQConfig())
+
+assert isinstance(mlp_q.wrapped, QuantQwen3VLVisionMLP)
+
+inp_shape = (orig_mlp.intermediate_size, orig_mlp.hidden_size)
+# -------------------------------------------------------------------------
+# 2. calibration
+# -------------------------------------------------------------------------
+examples = [
+    torch.randn(inp_shape),
+    torch.randn(inp_shape),
+    torch.randn(inp_shape),
+]
+
+with torch.no_grad():
+    for example in examples:
+        _ = mlp_q(example)
+
+convert(mlp_q)
+assert mlp_q._mode is Mode.QUANT, "Quantization mode should be active now."
+
+# -------------------------------------------------------------------------
+# 3. Quick diff check (INT-sim vs FP32)
+# -------------------------------------------------------------------------
+hidden = examples[0]
+
+with torch.no_grad():
+    int8_out = mlp_q(hidden)
+    fp_out = orig_mlp(hidden)
+
+print("┌───────────── Quantization Error Summary ─────────────")
+print(f"│ Mean |diff|: {(int8_out - fp_out).abs().mean().item():.6f}")
+print(f"│ PEIR       : {compute_peir(fp_out, int8_out) * 100:.6f} %")
+print("└──────────────────────────────────────────────────────")
+print(plot_two_outputs(fp_out, int8_out))
+
+# -------------------------------------------------------------------------
+# 4. Export the quantized block
+# -------------------------------------------------------------------------
+import tico
+
+save_path = pathlib.Path("qwen3vl_vision_mlp.q.circle")
+
+example = torch.randn(inp_shape)
+
+with SuppressWarning(UserWarning, ".*"):
+    cm = tico.convert(mlp_q, (example, ))
+cm.save(save_path)
+
+print(f"Quantized Circle model saved to {save_path.resolve()}")
diff --git a/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_mlp.py b/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_mlp.py
new file mode 100644
index 00000000..e7afc04e
--- /dev/null
+++ b/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_mlp.py
@@ -0,0 +1,93 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+from typing import Iterable, Optional, Tuple
+
+import torch
+import torch.nn as nn
+
+from tico.quantization.config.ptq import PTQConfig
+from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
+from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
+from tico.quantization.wrapq.wrappers.registry import try_register
+
+
+@try_register(
+    "transformers.models.qwen3_vl.modeling_qwen3_vl.Qwen3VLVisionMLP",
+)
+class QuantQwen3VLVisionMLP(QuantModuleBase):
+    def __init__(
+        self,
+        mlp_fp: nn.Module,
+        *,
+        qcfg: Optional[PTQConfig] = None,
+        fp_name: Optional[str] = None,
+    ):
+        super().__init__(qcfg, fp_name=fp_name)
+        linear_fc1_cfg = qcfg.child("linear_fc1") if qcfg else None
+        linear_fc2_cfg = qcfg.child("linear_fc2") if qcfg else None
+        act_cfg = qcfg.child("act_fn") if qcfg else None
+        
+        # ----- wrap three Linear layers -------------------------------
+        assert hasattr(mlp_fp, "linear_fc1") and isinstance(
+            mlp_fp.linear_fc1, torch.nn.Module
+        )
+        assert hasattr(mlp_fp, "linear_fc2") and isinstance(
+            mlp_fp.linear_fc2, torch.nn.Module
+        )
+        
+        self.linear_fc1 = PTQWrapper(
+            mlp_fp.linear_fc1, qcfg=linear_fc1_cfg, fp_name=f"{fp_name}.linear_fc1"
+        )
+        self.linear_fc2 = PTQWrapper(
+            mlp_fp.linear_fc2, qcfg=linear_fc2_cfg, fp_name=f"{fp_name}.linear_fc2"
+        )
+        
+        # ----- activation ---------------------------------------------
+        assert hasattr(mlp_fp, "act_fn") and isinstance(mlp_fp.act_fn, torch.nn.Module)
+       # self.act_fn = PTQWrapper(
+       #     mlp_fp.act_fn, qcfg=act_cfg, fp_name=f"{fp_name}.act_fn"
+       # )
+        self.act_fn = mlp_fp.act_fn
+
+        # ----- local observers ----------------------------------------
+        self.obs_act_in = self._make_obs("act_in")
+        self.obs_act_out = self._make_obs("act_out")
+        
+
+    def forward(self, hidden_state):
+        
+        # self.linear_fc2(self.act_fn(self.linear_fc1(hidden_state)))
+
+        # 1) quantize input once
+        x_q = self._fq(hidden_state, self.obs_act_in)
+
+        # 2) linear_fc1
+        fc1 = self.linear_fc1(x_q)
+
+        # 3) activation on linear_fc1
+        a = self.act_fn(fc1)
+
+        # 4) linear_fc2
+        h = self._fq(self.linear_fc2(a), self.obs_act_out)
+
+        return h
+
+    def _all_observers(self) -> Iterable:
+        yield self.obs_act_in
+        yield self.obs_act_out
+        # recurse into children that are QuantModuleBase
+        for m in (self.linear_fc1, self.linear_fc2):#, self.act_fn):
+            yield from m._all_observers()
\ No newline at end of file
diff --git a/tico/quantization/wrapq/wrappers/registry.py b/tico/quantization/wrapq/wrappers/registry.py
index 0bf5f477..7e9f51e0 100644
--- a/tico/quantization/wrapq/wrappers/registry.py
+++ b/tico/quantization/wrapq/wrappers/registry.py
@@ -42,6 +42,7 @@
     "tico.quantization.wrapq.wrappers.fairseq.quant_mha",
     ## qwen_vl ##
     "tico.quantization.wrapq.wrappers.qwen_vl.quant_text_attn",
+    "tico.quantization.wrapq.wrappers.qwen_vl.quant_vision_mlp",
     # add future core wrappers here
 )
 

From 4ac91ceaf050849cea62938715e372943ae67f74 Mon Sep 17 00:00:00 2001
From: "d.savchenkov" <d.savchenkov@partner.samsung.com>
Date: Tue, 10 Feb 2026 15:53:40 +0300
Subject: [PATCH 2/4] [quantization] Introduce wrapper for GELUTanh

This change introduces QuantGELUTanh wrapper to support post-training quantization of GELUTanh operation.

TICO-DCO-1.0-Signed-off-by: d.savchenkov <d.savchenkov@partner.samsung.com>
---
 .../wrapq/wrappers/nn/test_quant_gelutanh.py  | 146 ++++++++++++++++++
 .../wrapq/wrappers/nn/quant_gelutanh.py       |  72 +++++++++
 tico/quantization/wrapq/wrappers/registry.py  |   1 +
 3 files changed, 219 insertions(+)
 create mode 100644 test/quantization/wrapq/wrappers/nn/test_quant_gelutanh.py
 create mode 100644 tico/quantization/wrapq/wrappers/nn/quant_gelutanh.py

diff --git a/test/quantization/wrapq/wrappers/nn/test_quant_gelutanh.py b/test/quantization/wrapq/wrappers/nn/test_quant_gelutanh.py
new file mode 100644
index 00000000..cabd1722
--- /dev/null
+++ b/test/quantization/wrapq/wrappers/nn/test_quant_gelutanh.py
@@ -0,0 +1,146 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import importlib.util
+import unittest
+
+import torch
+import torch.nn as nn
+
+from tico.quantization.config.ptq import PTQConfig
+from tico.quantization.wrapq.dtypes import DType
+from tico.quantization.wrapq.mode import Mode
+from tico.quantization.wrapq.wrappers.nn.quant_gelutanh import QuantGELUTanh
+
+trans_spec = importlib.util.find_spec("transformers")
+skip_msg = "transformers not installed — skipping TestQuantGELUTanh tests"
+
+
+@unittest.skipUnless(trans_spec, skip_msg)
+class TestQuantGELUTanh(unittest.TestCase):
+    gelutanh: type
+
+    @classmethod
+    def setUpClass(cls):
+        import transformers
+
+        cls.gelutanh = transformers.activations.GELUTanh
+
+    def setUp(self):
+        torch.manual_seed(0)
+        self.x = torch.randn(128, 4) * 3  # wider than N(0,1) for better tanh coverage
+        self.fp_gelu_tanh = self.gelutanh()
+        self.qgelu_tanh = QuantGELUTanh(self.fp_gelu_tanh)  # default uint8
+
+    def test_mode_transitions(self):
+        """Test quantization mode transitions: NO_QUANT → CALIB → QUANT"""
+        self.assertIs(self.qgelu_tanh._mode, Mode.NO_QUANT)
+        self.qgelu_tanh.enable_calibration()
+        self.assertIs(self.qgelu_tanh._mode, Mode.CALIB)
+        _ = self.qgelu_tanh(self.x)  # collect stats
+        self.qgelu_tanh.freeze_qparams()
+        self.assertIs(self.qgelu_tanh._mode, Mode.QUANT)
+
+    def test_quantised_output(self):
+        """
+        Test that quantized output is acceptably close to FP32 reference.
+        After calibration and freeze, quantized output should:
+        - Differ from FP reference (quantization actually applied)
+        - Stay within reasonable error bounds
+        """
+        self.qgelu_tanh.enable_calibration()
+        _ = self.qgelu_tanh(self.x)
+        self.qgelu_tanh.freeze_qparams()
+
+        with torch.no_grad():
+            q_out = self.qgelu_tanh(self.x)
+            fp_out = self.gelutanh()(self.x)
+
+        diff = (q_out - fp_out).abs().mean().item()
+        self.assertGreater(diff, 0.0)  # not identical (quantization applied)
+        self.assertLess(diff, 0.3)  # acceptably close (same tolerance as SiLU)
+
+    def test_dtype_override(self):
+        """
+        PTQConfig overrides should propagate to observers created by QuantGELUTanh.
+        Test that different dtypes can be applied to intermediate activations.
+        """
+        cfg = PTQConfig(
+            default_dtype=DType.uint(8),
+            overrides={
+                "tanh": {"dtype": DType.uint(4)},
+                "mul": {"dtype": DType.uint(4)},
+            },
+        )
+        qgelu_custom = QuantGELUTanh(self.fp_gelu_tanh, qcfg=cfg)
+
+        # Check that overrides were applied
+        self.assertEqual(qgelu_custom.obs_tanh.dtype, DType.uint(4))
+        self.assertEqual(qgelu_custom.obs_mul.dtype, DType.uint(4))
+
+    def test_activation_stats_collected(self):
+        """
+        Test that activation statistics are properly collected during calibration.
+        All three observers (act_in, tanh, mul) should collect statistics.
+        """
+        self.qgelu_tanh.enable_calibration()
+
+        # Run forward pass to collect stats
+        _ = self.qgelu_tanh(self.x)
+
+        # Check that activation observers have collected stats
+        self.assertTrue(
+            self.qgelu_tanh.obs_act_in.has_qparams
+            or self.qgelu_tanh.obs_act_in.min_val.numel() > 0
+        )
+        self.assertTrue(
+            self.qgelu_tanh.obs_tanh.has_qparams
+            or self.qgelu_tanh.obs_tanh.min_val.numel() > 0
+        )
+        self.assertTrue(
+            self.qgelu_tanh.obs_mul.has_qparams
+            or self.qgelu_tanh.obs_mul.min_val.numel() > 0
+        )
+
+        # Freeze and check qparams exist
+        self.qgelu_tanh.freeze_qparams()
+        self.assertTrue(self.qgelu_tanh.obs_act_in.has_qparams)
+        self.assertTrue(self.qgelu_tanh.obs_tanh.has_qparams)
+        self.assertTrue(self.qgelu_tanh.obs_mul.has_qparams)
+
+    def test_no_quant_matches_reference(self):
+        """
+        In NO_QUANT mode, output should match FP32 reference exactly
+        (up to numerical tolerances).
+        """
+        # Create fresh wrapper that stays in NO_QUANT mode
+        qgelu = QuantGELUTanh(self.fp_gelu_tanh)
+
+        with torch.no_grad():
+            q_out = qgelu(self.x)
+            fp_out = self.gelutanh()(self.x)
+
+        self.assertIs(qgelu._mode, Mode.NO_QUANT)
+        self.assertTrue(torch.allclose(q_out, fp_out, atol=1e-6, rtol=1e-6))
+
+    def test_registration_in_registry(self):
+        """
+        Test that GELUTanh is properly registered in the wrapper registry.
+        """
+        from tico.quantization.wrapq.wrappers.nn.quant_gelutanh import QuantGELUTanh
+        from tico.quantization.wrapq.wrappers.registry import lookup
+
+        # Verify GELUTanh maps to QuantGELUTanh
+        wrapper_cls = lookup(self.gelutanh)
+        self.assertIs(wrapper_cls, QuantGELUTanh)
diff --git a/tico/quantization/wrapq/wrappers/nn/quant_gelutanh.py b/tico/quantization/wrapq/wrappers/nn/quant_gelutanh.py
new file mode 100644
index 00000000..0988e1c2
--- /dev/null
+++ b/tico/quantization/wrapq/wrappers/nn/quant_gelutanh.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+from typing import Optional
+
+import torch
+import torch.nn as nn
+
+from tico.quantization.config.ptq import PTQConfig
+from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
+from tico.quantization.wrapq.wrappers.registry import try_register
+
+
+@try_register("transformers.activations.GELUTanh")
+class QuantGELUTanh(QuantModuleBase):
+    """
+    QuantGELUTanh — drop-in quantized implementation of the Tanh-based GELUTanh activation.
+
+    This module quantizes both intermediate tensors:
+        t  = tanh(sqrt(2/π) * (x + 0.044715 * x^3))  (tanh)
+        y  = x * 0.5 * (1 + t)                       (mul)
+
+    GELUTanh formula:
+        GELUTanh(x) = x * 0.5 * (1 + tanh(sqrt(2/π) * (x + 0.044715 * x^3)))
+    """
+
+    def __init__(
+        self,
+        fp: nn.Module,
+        *,
+        qcfg: Optional[PTQConfig] = None,
+        fp_name: Optional[str] = None
+    ):
+        super().__init__(qcfg, fp_name=fp_name)
+        self.obs_act_in = self._make_obs("act_in")
+        self.obs_tanh = self._make_obs("tanh")
+        self.obs_mul = self._make_obs("mul")
+        self.module = fp
+
+    def forward(self, x: torch.Tensor):
+        # Quantize input
+        x_q = self._fq(x, self.obs_act_in)
+
+        # GELUTanh computation: x * 0.5 * (1 + tanh(sqrt(2/π) * (x + 0.044715 * x^3)))
+        x3 = x_q * x_q * x_q
+        inner = x_q + 0.044715 * x3
+        t = torch.tanh(math.sqrt(2.0 / math.pi) * inner)
+
+        # Quantize tanh output
+        t = self._fq(t, self.obs_tanh)
+
+        y = x_q * 0.5 * (1 + t)
+
+        # Quantize final output
+        y = self._fq(y, self.obs_mul)
+
+        return y
+
+    def _all_observers(self):
+        return (self.obs_act_in, self.obs_tanh, self.obs_mul)
diff --git a/tico/quantization/wrapq/wrappers/registry.py b/tico/quantization/wrapq/wrappers/registry.py
index 7e9f51e0..6fe00a1a 100644
--- a/tico/quantization/wrapq/wrappers/registry.py
+++ b/tico/quantization/wrapq/wrappers/registry.py
@@ -26,6 +26,7 @@
     ## nn ##
     "tico.quantization.wrapq.wrappers.nn.quant_layernorm",
     "tico.quantization.wrapq.wrappers.nn.quant_linear",
+    "tico.quantization.wrapq.wrappers.nn.quant_gelutanh",
     # This includes not only `nn.SiLU` but also `SiLUActivation` from transformers
     # as they are same operation.
     "tico.quantization.wrapq.wrappers.nn.quant_silu",

From 594fe9d1514a59ce8aa7faa6b3162ea09a398c85 Mon Sep 17 00:00:00 2001
From: "s.malakhov" <s.malakhov@partner.samsung.com>
Date: Wed, 11 Feb 2026 11:38:37 +0300
Subject: [PATCH 3/4] [quantization] Introduce Qwen3VLVisionMLP wrapper

This commit introduces Qwen3VLVisionMLP wrapper.
---
 .../wrappers/qwen_vl/test_quant_vision_mlp.py |  7 +++++--
 .../examples/quantize_qwen_vision_mlp.py      |  6 +++---
 .../wrappers/qwen_vl/quant_vision_mlp.py      | 20 +++++++++----------
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/test/quantization/wrapq/wrappers/qwen_vl/test_quant_vision_mlp.py b/test/quantization/wrapq/wrappers/qwen_vl/test_quant_vision_mlp.py
index 7bd0ca8e..8b58ae7f 100644
--- a/test/quantization/wrapq/wrappers/qwen_vl/test_quant_vision_mlp.py
+++ b/test/quantization/wrapq/wrappers/qwen_vl/test_quant_vision_mlp.py
@@ -21,9 +21,12 @@
 
 import torch
 from tico.quantization.wrapq.mode import Mode
-from tico.quantization.wrapq.wrappers.qwen_vl.quant_vision_mlp import QuantQwen3VLVisionMLP
+from tico.quantization.wrapq.wrappers.qwen_vl.quant_vision_mlp import (
+    QuantQwen3VLVisionMLP,
+)
 from transformers.activations import GELUTanh
 
+
 class DummyMLP(torch.nn.Module):
     """Tiny stand-in for HF LlamaMLP (hidden=4, inter=8)."""
 
@@ -31,7 +34,7 @@ def __init__(self):
         super().__init__()
         self.linear_fc1 = torch.nn.Linear(4, 8)
         self.linear_fc2 = torch.nn.Linear(8, 4)
-        self.act_fn = GELUTanh() #torch.nn.SiLU()
+        self.act_fn = GELUTanh()
 
     def forward(self, x):
         return self.linear_fc2(self.act_fn(self.linear_fc1(x)))
diff --git a/tico/quantization/wrapq/examples/quantize_qwen_vision_mlp.py b/tico/quantization/wrapq/examples/quantize_qwen_vision_mlp.py
index 3ad5ca0c..80c4b665 100644
--- a/tico/quantization/wrapq/examples/quantize_qwen_vision_mlp.py
+++ b/tico/quantization/wrapq/examples/quantize_qwen_vision_mlp.py
@@ -35,7 +35,7 @@
     name,
     device_map="cpu",
     trust_remote_code=True,
-    cache_dir="/mnt/storage/transformers_cache"
+    cache_dir="/mnt/storage/transformers_cache",
 )
 model.eval()
 
@@ -44,7 +44,7 @@
 # -------------------------------------------------------------------------
 orig_mlp = model.model.visual.blocks[0].mlp
 mlp_q = prepare(orig_mlp, PTQConfig())
-
+mlp_q.eval()
 assert isinstance(mlp_q.wrapped, QuantQwen3VLVisionMLP)
 
 inp_shape = (orig_mlp.intermediate_size, orig_mlp.hidden_size)
@@ -89,7 +89,7 @@
 example = torch.randn(inp_shape)
 
 with SuppressWarning(UserWarning, ".*"):
-    cm = tico.convert(mlp_q, (example, ))
+    cm = tico.convert(mlp_q, (example,))
 cm.save(save_path)
 
 print(f"Quantized Circle model saved to {save_path.resolve()}")
diff --git a/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_mlp.py b/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_mlp.py
index e7afc04e..19d619e0 100644
--- a/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_mlp.py
+++ b/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_mlp.py
@@ -39,7 +39,7 @@ def __init__(
         linear_fc1_cfg = qcfg.child("linear_fc1") if qcfg else None
         linear_fc2_cfg = qcfg.child("linear_fc2") if qcfg else None
         act_cfg = qcfg.child("act_fn") if qcfg else None
-        
+
         # ----- wrap three Linear layers -------------------------------
         assert hasattr(mlp_fp, "linear_fc1") and isinstance(
             mlp_fp.linear_fc1, torch.nn.Module
@@ -47,28 +47,26 @@ def __init__(
         assert hasattr(mlp_fp, "linear_fc2") and isinstance(
             mlp_fp.linear_fc2, torch.nn.Module
         )
-        
+
         self.linear_fc1 = PTQWrapper(
             mlp_fp.linear_fc1, qcfg=linear_fc1_cfg, fp_name=f"{fp_name}.linear_fc1"
         )
         self.linear_fc2 = PTQWrapper(
             mlp_fp.linear_fc2, qcfg=linear_fc2_cfg, fp_name=f"{fp_name}.linear_fc2"
         )
-        
+
         # ----- activation ---------------------------------------------
         assert hasattr(mlp_fp, "act_fn") and isinstance(mlp_fp.act_fn, torch.nn.Module)
-       # self.act_fn = PTQWrapper(
-       #     mlp_fp.act_fn, qcfg=act_cfg, fp_name=f"{fp_name}.act_fn"
-       # )
-        self.act_fn = mlp_fp.act_fn
+        self.act_fn = PTQWrapper(
+            mlp_fp.act_fn, qcfg=act_cfg, fp_name=f"{fp_name}.act_fn"
+        )
 
         # ----- local observers ----------------------------------------
         self.obs_act_in = self._make_obs("act_in")
         self.obs_act_out = self._make_obs("act_out")
-        
 
     def forward(self, hidden_state):
-        
+
         # self.linear_fc2(self.act_fn(self.linear_fc1(hidden_state)))
 
         # 1) quantize input once
@@ -89,5 +87,5 @@ def _all_observers(self) -> Iterable:
         yield self.obs_act_in
         yield self.obs_act_out
         # recurse into children that are QuantModuleBase
-        for m in (self.linear_fc1, self.linear_fc2):#, self.act_fn):
-            yield from m._all_observers()
\ No newline at end of file
+        for m in (self.linear_fc1, self.linear_fc2, self.act_fn):
+            yield from m._all_observers()

From 4173a2662015411cdb0977dd7c1cddecc9a269f5 Mon Sep 17 00:00:00 2001
From: Stanislav Malakhov <112689352+stamalakhov@users.noreply.github.com>
Date: Thu, 12 Feb 2026 07:33:43 +0300
Subject: [PATCH 4/4] Apply suggestions from code review

Apply suggestions from code review

Co-authored-by: Dayoung Lee <dayoung.lee@samsung.com>
---
 test/quantization/wrapq/wrappers/nn/test_quant_gelutanh.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/quantization/wrapq/wrappers/nn/test_quant_gelutanh.py b/test/quantization/wrapq/wrappers/nn/test_quant_gelutanh.py
index cabd1722..3969b610 100644
--- a/test/quantization/wrapq/wrappers/nn/test_quant_gelutanh.py
+++ b/test/quantization/wrapq/wrappers/nn/test_quant_gelutanh.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+# Copyright (c) 2026 Samsung Electronics Co., Ltd. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.