From fbe84277cdea74a679ecc4d601d654f21aea753f Mon Sep 17 00:00:00 2001
From: Mergen Nachin <mnachin@meta.com>
Date: Tue, 3 Feb 2026 17:42:58 -0500
Subject: [PATCH] Enable HQQ quantization algorithm by default

Add configurable qparams_algorithm parameter to quantize_model_() with
"hqq_scale_only" as default. HQQ (Half-Quadratic Quantization) provides
better accuracy by minimizing reconstruction error during quantization.

Applied to IntxWeightOnlyConfig and Int8DynamicActivationIntxWeightConfig.
Int4WeightOnlyConfig (4w with packing) keeps hardcoded "hqq" due to
different API. UIntxWeightOnlyConfig (fpa4w) unchanged as it lacks this
parameter.

Users can override globally via:
  from optimum.exporters.executorch import quantization
  quantization.DEFAULT_QPARAMS_ALGORITHM = "affine"
---
 optimum/exporters/executorch/quantization.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/optimum/exporters/executorch/quantization.py b/optimum/exporters/executorch/quantization.py
index 7e32244..53c3d4e 100644
--- a/optimum/exporters/executorch/quantization.py
+++ b/optimum/exporters/executorch/quantization.py
@@ -18,6 +18,11 @@
 import torch
 
 
+# Applied to IntxWeightOnlyConfig and Int8DynamicActivationIntxWeightConfig.
+# Not applied to Int4WeightOnlyConfig (different API) or UIntxWeightOnlyConfig (no such param).
+DEFAULT_QPARAMS_ALGORITHM = "hqq_scale_only"
+
+
 def quantize_model_(
     eager_model: torch.nn.Module,
     qlinear_config: Optional[str] = None,
@@ -25,7 +30,12 @@ def quantize_model_(
     qlinear_packing_format: Optional[str] = None,
     qembedding_config: Optional[str] = None,
     qembedding_group_size: Optional[int] = 0,
+    qparams_algorithm: Optional[str] = None,
 ) -> torch.nn.Module:
+    # qparams_algorithm is applied to IntxWeightOnlyConfig and Int8DynamicActivationIntxWeightConfig.
+    # Not applied to Int4WeightOnlyConfig (different API) or UIntxWeightOnlyConfig (no such param).
+    if qparams_algorithm is None:
+        qparams_algorithm = DEFAULT_QPARAMS_ALGORITHM
     if not (qlinear_config or qembedding_config):
         return
 
@@ -54,10 +64,12 @@ def quantize_model_(
             "4w": IntxWeightOnlyConfig(
                 weight_dtype=torch.int4,
                 granularity=embedding_weight_granularity,
+                intx_choose_qparams_algorithm=qparams_algorithm,
             ),
             "8w": IntxWeightOnlyConfig(
                 weight_dtype=torch.int8,
                 granularity=embedding_weight_granularity,
+                intx_choose_qparams_algorithm=qparams_algorithm,
             ),
         }[qembedding_config]
 
@@ -75,6 +87,7 @@ def build_linear_config(quant_config_key: str, granularity: str, packing_format:
                 return Int8DynamicActivationIntxWeightConfig(
                     weight_dtype=torch.int4,
                     weight_granularity=granularity,
+                    intx_choose_qparams_algorithm=qparams_algorithm,
                 )
             if quant_config_key == "4w":
                 # Determine if we need to use Int4WeightOnlyConfig with int4_packing_format
@@ -88,16 +101,19 @@ def build_linear_config(quant_config_key: str, granularity: str, packing_format:
                     return IntxWeightOnlyConfig(
                         weight_dtype=torch.int4,
                         granularity=granularity,
+                        intx_choose_qparams_algorithm=qparams_algorithm,
                     )
             if quant_config_key == "8w":
                 return IntxWeightOnlyConfig(
                     weight_dtype=torch.int8,
                     granularity=granularity,
+                    intx_choose_qparams_algorithm=qparams_algorithm,
                 )
             if quant_config_key == "8da8w":
                 return Int8DynamicActivationIntxWeightConfig(
                     weight_dtype=torch.int8,
                     weight_granularity=PerAxis(0),
+                    intx_choose_qparams_algorithm=qparams_algorithm,
                 )
             if quant_config_key == "fpa4w":
                 # Need to import to load the ops