diff --git a/onnxruntime/python/tools/quantization/calibrate.py b/onnxruntime/python/tools/quantization/calibrate.py index 4d5dbcc06ddc6..26427366c1617 100644 --- a/onnxruntime/python/tools/quantization/calibrate.py +++ b/onnxruntime/python/tools/quantization/calibrate.py @@ -380,7 +380,7 @@ def add_reduce_min_max(tensor_name, reduce_op_name): else: raise ValueError( f"Unable to guess tensor type for tensor {tensor_name!r}, " - f"running shape inference before quantization may resolve this issue." + "running shape inference before quantization may resolve this issue." ) # Include axes in reduce_op when per_channel, always keeping axis=1 @@ -1177,6 +1177,7 @@ def create_calibrator( augmented_model_path="augmented_model.onnx", calibrate_method=CalibrationMethod.MinMax, use_external_data_format=False, + providers=None, extra_options={}, # noqa: B006 ): calibrator = None @@ -1243,6 +1244,8 @@ def create_calibrator( if calibrator: calibrator.augment_graph() + if providers: + calibrator.execution_providers = providers calibrator.create_inference_session() return calibrator diff --git a/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py b/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py index eac5b3b78690b..1f38adb483323 100644 --- a/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py +++ b/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py @@ -53,6 +53,7 @@ def get_qnn_qdq_config( weight_symmetric: bool | None = None, keep_removable_activations: bool = False, stride: int | None = None, + calibration_providers: list[str] | None = None, ) -> StaticQuantConfig: """ Returns a static quantization configuration suitable for running QDQ models on QNN EP. @@ -117,6 +118,8 @@ def get_qnn_qdq_config( are automatically removed if activations are asymmetrically quantized. Keeping these activations is necessary if optimizations or EP transformations will later remove QuantizeLinear/DequantizeLinear operators from the model. + calibration_providers: Execution providers to run the session during calibration. Default is None which uses + [ "CPUExecutionProvider" ]. Returns: A StaticQuantConfig object @@ -192,6 +195,7 @@ def get_qnn_qdq_config( op_types_to_quantize=list(op_types.difference(OP_TYPES_TO_EXCLUDE)), per_channel=per_channel, use_external_data_format=(model_has_external_data or model.ByteSize() >= MODEL_SIZE_THRESHOLD), + calibration_providers=calibration_providers, extra_options=extra_options, ) diff --git a/onnxruntime/python/tools/quantization/quantize.py b/onnxruntime/python/tools/quantization/quantize.py index 27221f9445c30..fa468a9676a65 100644 --- a/onnxruntime/python/tools/quantization/quantize.py +++ b/onnxruntime/python/tools/quantization/quantize.py @@ -99,6 +99,7 @@ def __init__( per_channel=False, reduce_range=False, use_external_data_format=False, + calibration_providers=None, extra_options=None, ): """ @@ -112,6 +113,8 @@ def __init__( quant_format: QuantFormat{QOperator, QDQ}. QOperator format quantizes the model with quantized operators directly. QDQ format quantize the model by inserting QuantizeLinear/DeQuantizeLinear on the tensor. + calibration_providers: Execution providers to run the session during calibration. Default is None which uses + [ "CPUExecutionProvider" ]. extra_options: key value pair dictionary for various options in different case. Current used: extra.Sigmoid.nnapi = True/False (Default is False) @@ -219,6 +222,7 @@ def __init__( self.calibration_data_reader = calibration_data_reader self.calibrate_method = calibrate_method self.quant_format = quant_format + self.calibration_providers = calibration_providers self.extra_options = extra_options or {} @@ -473,6 +477,7 @@ def quantize_static( nodes_to_exclude=None, use_external_data_format=False, calibrate_method=CalibrationMethod.MinMax, + calibration_providers=None, extra_options=None, ): """ @@ -520,6 +525,8 @@ def quantize_static( List of nodes names to exclude. The nodes in this list will be excluded from quantization when it is not None. use_external_data_format: option used for large size (>2GB) model. Set to False by default. + calibration_providers: Execution providers to run the session during calibration. Default is None which uses + [ "CPUExecutionProvider" ] extra_options: key value pair dictionary for various options in different case. Current used: extra.Sigmoid.nnapi = True/False (Default is False) @@ -697,6 +704,7 @@ def inc_dataloader(): augmented_model_path=Path(quant_tmp_dir).joinpath("augmented_model.onnx").as_posix(), calibrate_method=calibrate_method, use_external_data_format=use_external_data_format, + providers=calibration_providers, extra_options=calib_extra_options, ) @@ -890,6 +898,7 @@ def quantize( per_channel=quant_config.per_channel, reduce_range=quant_config.reduce_range, use_external_data_format=quant_config.use_external_data_format, + calibration_providers=quant_config.calibration_providers, extra_options=quant_config.extra_options, )