From 3646461761895708e8b18780a1a2705bcb526625 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 14 Jun 2025 15:38:56 +0000 Subject: [PATCH 1/3] Initial plan for issue From 8cc5271cfe7d91c7bc374f5cd69c9816f1faa71f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 14 Jun 2025 15:49:09 +0000 Subject: [PATCH 2/3] Implement quantize_per_channel and dequantize_per_channel for torchlib Co-authored-by: justinchuby <11205048+justinchuby@users.noreply.github.com> --- .../torch_lib/ops/quantized_decomposed.py | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/onnxscript/function_libs/torch_lib/ops/quantized_decomposed.py b/onnxscript/function_libs/torch_lib/ops/quantized_decomposed.py index 92962a9ea6..9dfd8a4da1 100644 --- a/onnxscript/function_libs/torch_lib/ops/quantized_decomposed.py +++ b/onnxscript/function_libs/torch_lib/ops/quantized_decomposed.py @@ -14,7 +14,9 @@ from onnxscript.function_libs.torch_lib.ops import common from onnxscript.function_libs.torch_lib.registration import torch_op from onnxscript.onnx_opset import opset18 as op +from onnxscript.onnx_opset import opset23 as op23 from onnxscript.onnx_types import TensorType +from typing import Optional @torch_op( @@ -61,3 +63,61 @@ def quantized_decomposed_dequantize_per_tensor( return dequantized assert out_dtype > 0, f"out_dtype must be -1 or > 0 not {out_dtype}" return op.Cast(dequantized, to=out_dtype) + + +@torch_op( + ( + "quantized_decomposed::quantize_per_channel", + "quantized_decomposed::quantize_per_channel.tensor", + "quantized_decomposed::quantize_per_channel.tensor2", + ), + trace_only=True, +) +def quantized_decomposed_quantize_per_channel( + input: TensorType, + scales: TensorType, + zero_points: TensorType, + axis: int, + quant_min: int, + quant_max: int, + dtype: int, +) -> TensorType: + """Affine per channel quantization for the Tensor using the same quantization + parameters for each channel/axis to map from floating point to quantized values. + + Uses ONNX QuantizeLinear with per-axis quantization support. + """ + # Use opset23 for per-axis quantization support + return op23.QuantizeLinear(input, scales, zero_points, axis=axis, output_dtype=dtype) + + +@torch_op( + ( + "quantized_decomposed::dequantize_per_channel", + "quantized_decomposed::dequantize_per_channel.tensor", + "quantized_decomposed::dequantize_per_channel.tensor2", + ), + trace_only=True, +) +def quantized_decomposed_dequantize_per_channel( + input: TensorType, + scales: TensorType, + zero_points: Optional[TensorType], + axis: int, + quant_min: int, + quant_max: int, + dtype: int, + out_dtype: int = -1, +) -> TensorType: + """Affine per channel dequantization for the Tensor using the same quantization + parameters for each channel/axis to map from quantized values to floating point values. + + Uses ONNX DequantizeLinear with per-axis quantization support. + """ + # Use opset23 for per-axis quantization support with optional output_dtype + if out_dtype in (-1, None): + # Use default output type (same as scales type) + return op23.DequantizeLinear(input, scales, zero_points, axis=axis) + else: + assert out_dtype > 0, f"out_dtype must be -1 or > 0 not {out_dtype}" + return op23.DequantizeLinear(input, scales, zero_points, axis=axis, output_dtype=out_dtype) From 82c8f9edfeaf57e9908cd0f98c2acac62e5aa050 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 18 Jun 2025 00:28:13 +0000 Subject: [PATCH 3/3] Format code using lintrunner Co-authored-by: justinchuby <11205048+justinchuby@users.noreply.github.com> --- .../torch_lib/ops/quantized_decomposed.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/onnxscript/function_libs/torch_lib/ops/quantized_decomposed.py b/onnxscript/function_libs/torch_lib/ops/quantized_decomposed.py index 9dfd8a4da1..ae82f1572b 100644 --- a/onnxscript/function_libs/torch_lib/ops/quantized_decomposed.py +++ b/onnxscript/function_libs/torch_lib/ops/quantized_decomposed.py @@ -11,12 +11,13 @@ from __future__ import annotations +from typing import Optional + from onnxscript.function_libs.torch_lib.ops import common from onnxscript.function_libs.torch_lib.registration import torch_op from onnxscript.onnx_opset import opset18 as op from onnxscript.onnx_opset import opset23 as op23 from onnxscript.onnx_types import TensorType -from typing import Optional @torch_op( @@ -84,7 +85,7 @@ def quantized_decomposed_quantize_per_channel( ) -> TensorType: """Affine per channel quantization for the Tensor using the same quantization parameters for each channel/axis to map from floating point to quantized values. - + Uses ONNX QuantizeLinear with per-axis quantization support. """ # Use opset23 for per-axis quantization support @@ -111,7 +112,7 @@ def quantized_decomposed_dequantize_per_channel( ) -> TensorType: """Affine per channel dequantization for the Tensor using the same quantization parameters for each channel/axis to map from quantized values to floating point values. - + Uses ONNX DequantizeLinear with per-axis quantization support. """ # Use opset23 for per-axis quantization support with optional output_dtype @@ -120,4 +121,6 @@ def quantized_decomposed_dequantize_per_channel( return op23.DequantizeLinear(input, scales, zero_points, axis=axis) else: assert out_dtype > 0, f"out_dtype must be -1 or > 0 not {out_dtype}" - return op23.DequantizeLinear(input, scales, zero_points, axis=axis, output_dtype=out_dtype) + return op23.DequantizeLinear( + input, scales, zero_points, axis=axis, output_dtype=out_dtype + )