Skip to content

Commit

Permalink
Quantization tool: Allow user to override calibrator's session EP (#2…
Browse files Browse the repository at this point in the history
…3559)

### Description
The quantization calibrators have `execution_providers` attributes but
there is no way for a user to provide their own providers when using the
`quantize` or `quantize_static` functions. This PR adds a
`calibration_providers` parameter to allow users to specify the
execution providers to use during calibration. It is helpful when
quantizing large models which are slow to calibrate on the CPU.
- Chose `calibration_providers` as the name since there is the
docstrings refer to another `execution_provider`
https://github.com/microsoft/onnxruntime/blob/169917b1e7f69daa687a5448526c189d1f7a4e2b/onnxruntime/python/tools/quantization/quantize.py#L204

https://github.com/microsoft/onnxruntime/blob/169917b1e7f69daa687a5448526c189d1f7a4e2b/onnxruntime/python/tools/quantization/quantize.py#L415
which are not present anywhere in the code.
- Can change the name to something else if needed like
calibrator_providers, and/or make it into a string instead of a
providers list.
  • Loading branch information
jambayk authored Feb 6, 2025
1 parent 649ced4 commit d1fb58b
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 1 deletion.
5 changes: 4 additions & 1 deletion onnxruntime/python/tools/quantization/calibrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ def add_reduce_min_max(tensor_name, reduce_op_name):
else:
raise ValueError(
f"Unable to guess tensor type for tensor {tensor_name!r}, "
f"running shape inference before quantization may resolve this issue."
"running shape inference before quantization may resolve this issue."
)

# Include axes in reduce_op when per_channel, always keeping axis=1
Expand Down Expand Up @@ -1177,6 +1177,7 @@ def create_calibrator(
augmented_model_path="augmented_model.onnx",
calibrate_method=CalibrationMethod.MinMax,
use_external_data_format=False,
providers=None,
extra_options={}, # noqa: B006
):
calibrator = None
Expand Down Expand Up @@ -1243,6 +1244,8 @@ def create_calibrator(

if calibrator:
calibrator.augment_graph()
if providers:
calibrator.execution_providers = providers
calibrator.create_inference_session()
return calibrator

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def get_qnn_qdq_config(
weight_symmetric: bool | None = None,
keep_removable_activations: bool = False,
stride: int | None = None,
calibration_providers: list[str] | None = None,
) -> StaticQuantConfig:
"""
Returns a static quantization configuration suitable for running QDQ models on QNN EP.
Expand Down Expand Up @@ -117,6 +118,8 @@ def get_qnn_qdq_config(
are automatically removed if activations are asymmetrically quantized. Keeping these activations
is necessary if optimizations or EP transformations will later remove
QuantizeLinear/DequantizeLinear operators from the model.
calibration_providers: Execution providers to run the session during calibration. Default is None which uses
[ "CPUExecutionProvider" ].
Returns:
A StaticQuantConfig object
Expand Down Expand Up @@ -192,6 +195,7 @@ def get_qnn_qdq_config(
op_types_to_quantize=list(op_types.difference(OP_TYPES_TO_EXCLUDE)),
per_channel=per_channel,
use_external_data_format=(model_has_external_data or model.ByteSize() >= MODEL_SIZE_THRESHOLD),
calibration_providers=calibration_providers,
extra_options=extra_options,
)

Expand Down
9 changes: 9 additions & 0 deletions onnxruntime/python/tools/quantization/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ def __init__(
per_channel=False,
reduce_range=False,
use_external_data_format=False,
calibration_providers=None,
extra_options=None,
):
"""
Expand All @@ -112,6 +113,8 @@ def __init__(
quant_format: QuantFormat{QOperator, QDQ}.
QOperator format quantizes the model with quantized operators directly.
QDQ format quantize the model by inserting QuantizeLinear/DeQuantizeLinear on the tensor.
calibration_providers: Execution providers to run the session during calibration. Default is None which uses
[ "CPUExecutionProvider" ].
extra_options:
key value pair dictionary for various options in different case. Current used:
extra.Sigmoid.nnapi = True/False (Default is False)
Expand Down Expand Up @@ -219,6 +222,7 @@ def __init__(
self.calibration_data_reader = calibration_data_reader
self.calibrate_method = calibrate_method
self.quant_format = quant_format
self.calibration_providers = calibration_providers
self.extra_options = extra_options or {}


Expand Down Expand Up @@ -473,6 +477,7 @@ def quantize_static(
nodes_to_exclude=None,
use_external_data_format=False,
calibrate_method=CalibrationMethod.MinMax,
calibration_providers=None,
extra_options=None,
):
"""
Expand Down Expand Up @@ -520,6 +525,8 @@ def quantize_static(
List of nodes names to exclude. The nodes in this list will be excluded from quantization
when it is not None.
use_external_data_format: option used for large size (>2GB) model. Set to False by default.
calibration_providers: Execution providers to run the session during calibration. Default is None which uses
[ "CPUExecutionProvider" ]
extra_options:
key value pair dictionary for various options in different case. Current used:
extra.Sigmoid.nnapi = True/False (Default is False)
Expand Down Expand Up @@ -697,6 +704,7 @@ def inc_dataloader():
augmented_model_path=Path(quant_tmp_dir).joinpath("augmented_model.onnx").as_posix(),
calibrate_method=calibrate_method,
use_external_data_format=use_external_data_format,
providers=calibration_providers,
extra_options=calib_extra_options,
)

Expand Down Expand Up @@ -890,6 +898,7 @@ def quantize(
per_channel=quant_config.per_channel,
reduce_range=quant_config.reduce_range,
use_external_data_format=quant_config.use_external_data_format,
calibration_providers=quant_config.calibration_providers,
extra_options=quant_config.extra_options,
)

Expand Down

0 comments on commit d1fb58b

Please sign in to comment.