From 5126f53d4c3dfcaadcc3a9a205e2d262b3d2ab37 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Mon, 21 Nov 2022 14:49:08 +0100 Subject: [PATCH 01/17] Add ort export in exporters for encoder-decoder models --- optimum/exporters/onnx/__main__.py | 79 +++++++++++++++++-------- optimum/exporters/onnx/base.py | 46 ++++++++++++++ optimum/exporters/onnx/convert.py | 5 +- optimum/exporters/onnx/model_configs.py | 54 ++++++++++------- optimum/utils/input_generators.py | 4 ++ 5 files changed, 143 insertions(+), 45 deletions(-) diff --git a/optimum/exporters/onnx/__main__.py b/optimum/exporters/onnx/__main__.py index 36c15fe94f..1e79e456e5 100644 --- a/optimum/exporters/onnx/__main__.py +++ b/optimum/exporters/onnx/__main__.py @@ -64,14 +64,21 @@ def main(): ), ) parser.add_argument("--cache_dir", type=str, default=None, help="Path indicating where to store cache.") + parser.add_argument( + "--for-ort", + action="store_true", + help=( + "This is needed by some models, for some tasks. If not provided, will attempt to use the tokenizer to guess" + " it." + ), + ) parser.add_argument("output", type=Path, help="Path indicating the directory where to store generated ONNX model.") # Retrieve CLI arguments args = parser.parse_args() - args.output = args.output.joinpath("model.onnx") - if not args.output.parent.exists(): - args.output.parent.mkdir(parents=True) + if not args.output.exists(): + args.output.mkdir(parents=True) # Infer the task task = args.task @@ -115,41 +122,65 @@ def main(): f"At least {onnx_config.DEFAULT_ONNX_OPSET} is required." ) - onnx_inputs, onnx_outputs = export( - model, - onnx_config, - args.opset, - args.output, - ) + if args.atol is None: + args.atol = onnx_config.ATOL_FOR_VALIDATION + if isinstance(args.atol, dict): + args.atol = args.atol[task.replace("-with-past", "")] + + if model.config.is_encoder_decoder and args.for_ort: + encoder_model = model.get_encoder() + encoder_onnx_config = onnx_config.get_encoder_onnx_config(encoder_model.config) + + decoder_model = model.get_decoder() + decoder_onnx_config = onnx_config.get_decoder_onnx_config( + decoder_model.config, task.replace("-with-past", ""), use_past=False + ) + + models_for_export = [ + (encoder_model, encoder_onnx_config, "encoder_model.onnx"), + (model, decoder_onnx_config, "decoder_model.onnx"), + ] + + if "-with-past" in task: + decoder_onnx_config_with_past = onnx_config.get_decoder_onnx_config( + decoder_model.config, task.replace("-with-past", ""), use_past=True + ) + models_for_export.extend([(model, decoder_onnx_config_with_past, "decoder_with_past_model.onnx")]) + else: + models_for_export = [(model, onnx_config, "model.onnx")] + + for model_to_export, model_onnx_config, output_name in models_for_export: + save_path = args.output.joinpath(output_name) + onnx_inputs, onnx_outputs = export( + model_to_export, + model_onnx_config, + args.opset, + save_path, + ) + + try: + validate_model_outputs(model_onnx_config, model_to_export, save_path, onnx_outputs, args.atol) + except ValueError: + logger.error(f"An error occured, but the model was saved at: {save_path.as_posix()}") + return + logger.info(f"All good, model saved at: {save_path.as_posix()}") # Saving the model config as this is needed sometimes. - model.config.save_pretrained(args.output.parent) + model.config.save_pretrained(args.output) # Saving the tokenizer / feature extractor as well. try: tokenizer = AutoTokenizer.from_pretrained(args.model) - tokenizer.save_pretrained(args.output.parent) + tokenizer.save_pretrained(args.output) except Exception: pass try: feature_extractor = AutoFeatureExtractor.from_pretrained(args.model) - feature_extractor.save_pretrained(args.output.parent) + feature_extractor.save_pretrained(args.output) except Exception: pass - if args.atol is None: - args.atol = onnx_config.ATOL_FOR_VALIDATION - if isinstance(args.atol, dict): - args.atol = args.atol[task.replace("-with-past", "")] - - try: - validate_model_outputs(onnx_config, model, args.output, onnx_outputs, args.atol) - except ValueError: - logger.error(f"An error occured, but the model was saved at: {args.output.as_posix()}") - return - logger.info(f"All good, model saved at: {args.output.as_posix()}") - if __name__ == "__main__": main() diff --git a/optimum/exporters/onnx/base.py b/optimum/exporters/onnx/base.py index 802a856003..7607c20204 100644 --- a/optimum/exporters/onnx/base.py +++ b/optimum/exporters/onnx/base.py @@ -303,6 +303,18 @@ def flatten_output_collection_property(cls, name: str, field: Iterable[Any]) -> """ return {f"{name}.{idx}": item for idx, item in enumerate(itertools.chain.from_iterable(field))} + def generate_dummy_inputs_onnxruntime(self, reference_model_inputs: Mapping[str, Any]) -> Mapping[str, Any]: + """ + Generate inputs for ONNX Runtime using the reference model inputs. Override this to run inference with seq2seq + models which have the encoder and decoder exported as separate ONNX files. + Args: + reference_model_inputs ([`Mapping[str, Tensor]`): + Reference inputs for the model. + Returns: + `Mapping[str, Tensor]`: The mapping holding the kwargs to provide to the model's forward function + """ + return reference_model_inputs + class OnnxConfigWithPast(OnnxConfig, ABC): PAD_ATTENTION_MASK_TO_MATCH_TOTAL_SEQUENCE_LENGTH = True @@ -454,3 +466,37 @@ def flatten_past_key_values(self, flattened_output, name, idx, t): flattened_output[f"{name}.{idx}.decoder.value"] = t[1] flattened_output[f"{name}.{idx}.encoder.key"] = t[2] flattened_output[f"{name}.{idx}.encoder.value"] = t[3] + + def get_encoder_onnx_config(self, config: "PretrainedConfig") -> OnnxConfig: + """ + Returns ONNX encoder config for `Seq2Seq` models. Implement the method to export the encoder + of the model separately. + + Args: + config (`PretrainedConfig`): + The encoder model's configuration to use when exporting to ONNX. + + Returns: + `OnnxConfig`: An instance of the ONNX configuration object. + """ + raise NotImplementedError(f"Implement the method to export encoder for {config.model_type}") + + def get_decoder_onnx_config( + self, config: "PretrainedConfig", task: str = "default", use_past: bool = False + ) -> OnnxConfig: + """ + Returns ONNX decoder config for `Seq2Seq` model. Implement the method to export the encoder + of the model separately. + + Args: + config (`PretrainedConfig`): + The decoder model's configuration to use when exporting to ONNX. + task (`str`, defaults to `"default"`): + The task the model should be exported for. + use_past (`bool`, defaults to `False`): + Whether to export the model with past_key_values. + + Returns: + `OnnxConfig`: An instance of the ONNX configuration object. + """ + raise NotImplementedError(f"Implement the method to export decoder for {config.model_type}") diff --git a/optimum/exporters/onnx/convert.py b/optimum/exporters/onnx/convert.py index 306370d17d..76361e442b 100644 --- a/optimum/exporters/onnx/convert.py +++ b/optimum/exporters/onnx/convert.py @@ -115,9 +115,12 @@ def validate_model_outputs( else: ref_outputs_dict[name] = value + # Create onnxruntime inputs from the reference model inputs + reference_model_inputs_onnxruntime = config.generate_dummy_inputs_onnxruntime(reference_model_inputs) + # We flatten potential collection of inputs (i.e. past_keys) onnx_inputs = {} - for name, value in reference_model_inputs.items(): + for name, value in reference_model_inputs_onnxruntime.items(): if isinstance(value, (list, tuple)): value = config.flatten_output_collection_property(name, value) onnx_inputs.update({tensor_name: pt_tensor.numpy() for tensor_name, pt_tensor in value.items()}) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 46926118a3..2b46794043 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -639,26 +639,6 @@ def generate_dummy_inputs(self, framework: str = "pt"): return dummy_inputs -class WhisperOnnxConfig(TextAndAudioOnnxConfig): - NORMALIZED_CONFIG_CLASS = NormalizedSeq2SeqConfig - ATOL_FOR_VALIDATION = 1e-3 - - @property - def inputs(self) -> Mapping[str, Mapping[int, str]]: - common_inputs = { - "input_features": {0: "batch_size", 1: "feature_size", 2: "encoder_sequence_length"}, - } - if self.use_past: - common_inputs["decoder_input_ids"] = {0: "batch_size"} - else: - common_inputs["decoder_input_ids"] = {0: "batch_size", 1: "decoder_sequence_length"} - - if self.use_past: - self.add_past_key_values(common_inputs, direction="inputs") - - return common_inputs - - class SpeechSeq2SeqEncoderOnnxConfig(AudioOnnxConfig): NORMALIZED_CONFIG_CLASS = NormalizedConfig @@ -708,3 +688,37 @@ def values_override(self) -> Optional[Mapping[str, Any]]: return {"use_cache": True} return None + + def generate_dummy_inputs_onnxruntime(self, reference_model_inputs: Mapping[str, Any]) -> Mapping[str, Any]: + reference_model_inputs["input_ids"] = reference_model_inputs.pop("decoder_input_ids") + reference_model_inputs["encoder_hidden_states"] = reference_model_inputs.pop("encoder_outputs")[0] + + return reference_model_inputs + + +class WhisperOnnxConfig(TextAndAudioOnnxConfig): + NORMALIZED_CONFIG_CLASS = NormalizedSeq2SeqConfig + ATOL_FOR_VALIDATION = 1e-3 + + @property + def inputs(self) -> Mapping[str, Mapping[int, str]]: + common_inputs = { + "input_features": {0: "batch_size", 1: "feature_size", 2: "encoder_sequence_length"}, + } + if self.use_past: + common_inputs["decoder_input_ids"] = {0: "batch_size"} + else: + common_inputs["decoder_input_ids"] = {0: "batch_size", 1: "decoder_sequence_length"} + + if self.use_past: + self.add_past_key_values(common_inputs, direction="inputs") + + return common_inputs + + def get_encoder_onnx_config(self, config: "PretrainedConfig") -> SpeechSeq2SeqEncoderOnnxConfig: + return SpeechSeq2SeqEncoderOnnxConfig(config, task="default") + + def get_decoder_onnx_config( + self, config: "PretrainedConfig", task: str = "default", use_past: bool = False + ) -> SpeechSeq2SeqDecoderOnnxConfig: + return SpeechSeq2SeqDecoderOnnxConfig(config, task, use_past=use_past) diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py index 56e7ba8109..246f8867ad 100644 --- a/optimum/utils/input_generators.py +++ b/optimum/utils/input_generators.py @@ -27,9 +27,13 @@ if is_torch_available(): import torch + torch.manual_seed(0) + if is_tf_available(): import tensorflow as tf + tf.random.set_seed(0) + def check_framework_is_available(func): @functools.wraps(func) From efcdeb0822149ff42c8c218890d8c1c8d314f646 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Mon, 21 Nov 2022 15:04:01 +0100 Subject: [PATCH 02/17] Updated error docstring --- optimum/exporters/onnx/base.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/optimum/exporters/onnx/base.py b/optimum/exporters/onnx/base.py index 7607c20204..e1e6e95acf 100644 --- a/optimum/exporters/onnx/base.py +++ b/optimum/exporters/onnx/base.py @@ -479,7 +479,10 @@ def get_encoder_onnx_config(self, config: "PretrainedConfig") -> OnnxConfig: Returns: `OnnxConfig`: An instance of the ONNX configuration object. """ - raise NotImplementedError(f"Implement the method to export encoder for {config.model_type}") + raise NotImplementedError( + f"{config.model_type} encoder export is not supported yet. ", + f"If you want to support {config.model_type} please propose a PR or open up an issue.", + ) def get_decoder_onnx_config( self, config: "PretrainedConfig", task: str = "default", use_past: bool = False @@ -499,4 +502,7 @@ def get_decoder_onnx_config( Returns: `OnnxConfig`: An instance of the ONNX configuration object. """ - raise NotImplementedError(f"Implement the method to export decoder for {config.model_type}") + raise NotImplementedError( + f"{config.model_type} decoder export is not supported yet. ", + f"If you want to support {config.model_type} please propose a PR or open up an issue.", + ) From 2dd03396c993ee0c63e30b026978ae5302b6560c Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Mon, 21 Nov 2022 15:35:21 +0100 Subject: [PATCH 03/17] Update encoder decoder config location --- optimum/exporters/onnx/base.py | 78 +++++++++++++++++----------------- 1 file changed, 38 insertions(+), 40 deletions(-) diff --git a/optimum/exporters/onnx/base.py b/optimum/exporters/onnx/base.py index e1e6e95acf..a5346f4a1a 100644 --- a/optimum/exporters/onnx/base.py +++ b/optimum/exporters/onnx/base.py @@ -315,6 +315,44 @@ def generate_dummy_inputs_onnxruntime(self, reference_model_inputs: Mapping[str, """ return reference_model_inputs + def get_encoder_onnx_config(self, config: "PretrainedConfig"): + """ + Returns ONNX encoder config for `Seq2Seq` models. Implement the method to export the encoder + of the model separately. + + Args: + config (`PretrainedConfig`): + The encoder model's configuration to use when exporting to ONNX. + + Returns: + `OnnxConfig`: An instance of the ONNX configuration object. + """ + raise NotImplementedError( + f"{config.model_type} encoder export is not supported yet. ", + f"If you want to support {config.model_type} please propose a PR or open up an issue.", + ) + + def get_decoder_onnx_config(self, config: "PretrainedConfig", task: str = "default", use_past: bool = False): + """ + Returns ONNX decoder config for `Seq2Seq` model. Implement the method to export the encoder + of the model separately. + + Args: + config (`PretrainedConfig`): + The decoder model's configuration to use when exporting to ONNX. + task (`str`, defaults to `"default"`): + The task the model should be exported for. + use_past (`bool`, defaults to `False`): + Whether to export the model with past_key_values. + + Returns: + `OnnxConfig`: An instance of the ONNX configuration object. + """ + raise NotImplementedError( + f"{config.model_type} decoder export is not supported yet. ", + f"If you want to support {config.model_type} please propose a PR or open up an issue.", + ) + class OnnxConfigWithPast(OnnxConfig, ABC): PAD_ATTENTION_MASK_TO_MATCH_TOTAL_SEQUENCE_LENGTH = True @@ -466,43 +504,3 @@ def flatten_past_key_values(self, flattened_output, name, idx, t): flattened_output[f"{name}.{idx}.decoder.value"] = t[1] flattened_output[f"{name}.{idx}.encoder.key"] = t[2] flattened_output[f"{name}.{idx}.encoder.value"] = t[3] - - def get_encoder_onnx_config(self, config: "PretrainedConfig") -> OnnxConfig: - """ - Returns ONNX encoder config for `Seq2Seq` models. Implement the method to export the encoder - of the model separately. - - Args: - config (`PretrainedConfig`): - The encoder model's configuration to use when exporting to ONNX. - - Returns: - `OnnxConfig`: An instance of the ONNX configuration object. - """ - raise NotImplementedError( - f"{config.model_type} encoder export is not supported yet. ", - f"If you want to support {config.model_type} please propose a PR or open up an issue.", - ) - - def get_decoder_onnx_config( - self, config: "PretrainedConfig", task: str = "default", use_past: bool = False - ) -> OnnxConfig: - """ - Returns ONNX decoder config for `Seq2Seq` model. Implement the method to export the encoder - of the model separately. - - Args: - config (`PretrainedConfig`): - The decoder model's configuration to use when exporting to ONNX. - task (`str`, defaults to `"default"`): - The task the model should be exported for. - use_past (`bool`, defaults to `False`): - Whether to export the model with past_key_values. - - Returns: - `OnnxConfig`: An instance of the ONNX configuration object. - """ - raise NotImplementedError( - f"{config.model_type} decoder export is not supported yet. ", - f"If you want to support {config.model_type} please propose a PR or open up an issue.", - ) From 90b92712091e411a3c39eacb868eddc441f95c0b Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 22 Nov 2022 08:20:59 +0100 Subject: [PATCH 04/17] Added tests --- optimum/exporters/onnx/convert.py | 4 +- tests/exporters/test_onnx_export.py | 89 +++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 1 deletion(-) diff --git a/optimum/exporters/onnx/convert.py b/optimum/exporters/onnx/convert.py index 76361e442b..2268f67a5d 100644 --- a/optimum/exporters/onnx/convert.py +++ b/optimum/exporters/onnx/convert.py @@ -226,7 +226,9 @@ def export_pytorch( device = torch.device(device) if device.type == "cuda" and torch.cuda.is_available(): model.to(device) - dummy_inputs = tree_map(lambda value: value.to(device), dummy_inputs) + dummy_inputs = tree_map( + lambda value: value.to(device) if isinstance(value, torch.Tensor) else None, dummy_inputs + ) check_dummy_inputs_are_allowed(model, dummy_inputs) inputs = config.ordered_inputs(model) input_names = list(inputs.keys()) diff --git a/tests/exporters/test_onnx_export.py b/tests/exporters/test_onnx_export.py index 81f5063c1b..0f3f562c4e 100644 --- a/tests/exporters/test_onnx_export.py +++ b/tests/exporters/test_onnx_export.py @@ -108,6 +108,10 @@ ("roberta", "roberta-base"), } +PYTORCH_ENCODER_DECODER_MODELS = { + ("whisper", "openai/whisper-tiny.en"), +} + @require_onnx class OnnxUtilsTestCase(TestCase): @@ -266,6 +270,69 @@ def _onnx_export(self, test_name, name, model_name, task, onnx_config_class_cons except (RuntimeError, ValueError) as e: self.fail(f"{name}, {task} -> {e}") + def _onnx_export_for_encoder_decoder_models_for_ort( + self, test_name, name, model_name, task, onnx_config_class_constructor, device="cpu" + ): + model_class = TasksManager.get_model_class_for_task(task) + config = AutoConfig.from_pretrained(model_name) + model = model_class.from_config(config) + + onnx_config = onnx_config_class_constructor(model.config) + + if is_torch_available(): + from optimum.exporters.onnx.utils import TORCH_VERSION + + if not onnx_config.is_torch_support_available: + pytest.skip( + "Skipping due to incompatible PyTorch version. Minimum required is" + f" {onnx_config.MIN_TORCH_VERSION}, got: {TORCH_VERSION}" + ) + + encoder_model = model.get_encoder() + encoder_onnx_config = onnx_config.get_encoder_onnx_config(encoder_model.config) + + decoder_model = model.get_decoder() + decoder_onnx_config = onnx_config.get_decoder_onnx_config( + decoder_model.config, task.replace("-with-past", ""), use_past=False + ) + + models_for_export = [ + ( + encoder_model, + encoder_onnx_config, + ), + (model, decoder_onnx_config), + ] + + if "-with-past" in task: + decoder_onnx_config_with_past = onnx_config.get_decoder_onnx_config( + decoder_model.config, task.replace("-with-past", ""), use_past=True + ) + models_for_export.extend([(model, decoder_onnx_config_with_past)]) + + for model_to_export, model_onnx_config in models_for_export: + with NamedTemporaryFile("w") as output: + try: + onnx_inputs, onnx_outputs = export( + model_to_export, + model_onnx_config, + onnx_config.DEFAULT_ONNX_OPSET, + Path(output.name), + device=device, + ) + atol = model_onnx_config.ATOL_FOR_VALIDATION + if isinstance(atol, dict): + atol = atol[task.replace("-with-past", "")] + validate_model_outputs( + model_onnx_config, + model_to_export, + Path(output.name), + onnx_outputs, + atol, + ) + except (RuntimeError, ValueError) as e: + self.fail(f"{name}, {task} -> {e}") + @parameterized.expand(_get_models_to_test(PYTORCH_EXPORT_MODELS)) @slow @require_torch @@ -280,6 +347,28 @@ def test_pytorch_export(self, test_name, name, model_name, task, onnx_config_cla def test_pytorch_export_on_cuda(self, test_name, name, model_name, task, onnx_config_class_constructor): self._onnx_export(test_name, name, model_name, task, onnx_config_class_constructor, device="cuda") + @parameterized.expand(_get_models_to_test(PYTORCH_ENCODER_DECODER_MODELS)) + @slow + @require_torch + @require_vision + def test_pytorch_export_for_encoder_decoder_models_for_ort( + self, test_name, name, model_name, task, onnx_config_class_constructor + ): + self._onnx_export_for_encoder_decoder_models_for_ort( + test_name, name, model_name, task, onnx_config_class_constructor + ) + + @parameterized.expand(_get_models_to_test(PYTORCH_ENCODER_DECODER_MODELS)) + @slow + @require_torch + @require_vision + def test_pytorch_export_for_encoder_decoder_models_for_ort_on_cuda( + self, test_name, name, model_name, task, onnx_config_class_constructor + ): + self._onnx_export_for_encoder_decoder_models_for_ort( + test_name, name, model_name, task, onnx_config_class_constructor, device="cuda" + ) + @parameterized.expand(_get_models_to_test(TENSORFLOW_EXPORT_MODELS)) @slow @require_tf From 90153cfcc1104da088e95c3b619600ce4e71a55c Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 22 Nov 2022 08:43:09 +0100 Subject: [PATCH 05/17] Update arguments help --- optimum/exporters/onnx/__main__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/optimum/exporters/onnx/__main__.py b/optimum/exporters/onnx/__main__.py index 1e79e456e5..b916ea49eb 100644 --- a/optimum/exporters/onnx/__main__.py +++ b/optimum/exporters/onnx/__main__.py @@ -68,8 +68,8 @@ def main(): "--for-ort", action="store_true", help=( - "This is needed by some models, for some tasks. If not provided, will attempt to use the tokenizer to guess" - " it." + "This generates ONNX models to run inference with ONNX Runtime ORTModelXXX for encoder-decoder models." + " If enabled the encoder and decoder of the model are exported separately." ), ) parser.add_argument("output", type=Path, help="Path indicating the directory where to store generated ONNX model.") @@ -145,7 +145,7 @@ def main(): decoder_onnx_config_with_past = onnx_config.get_decoder_onnx_config( decoder_model.config, task.replace("-with-past", ""), use_past=True ) - models_for_export.extend([(model, decoder_onnx_config_with_past, "decoder_with_past_model.onnx")]) + models_for_export.append((model, decoder_onnx_config_with_past, "decoder_with_past_model.onnx")) else: models_for_export = [(model, onnx_config, "model.onnx")] From bbb89e1262984702f0b99c6caf18addbdea6461f Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 22 Nov 2022 08:57:56 +0100 Subject: [PATCH 06/17] Updated docstring and removed redundant code --- optimum/exporters/onnx/base.py | 2 +- optimum/utils/input_generators.py | 4 ---- tests/exporters/test_onnx_export.py | 2 +- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/optimum/exporters/onnx/base.py b/optimum/exporters/onnx/base.py index a5346f4a1a..be5eaee99b 100644 --- a/optimum/exporters/onnx/base.py +++ b/optimum/exporters/onnx/base.py @@ -334,7 +334,7 @@ def get_encoder_onnx_config(self, config: "PretrainedConfig"): def get_decoder_onnx_config(self, config: "PretrainedConfig", task: str = "default", use_past: bool = False): """ - Returns ONNX decoder config for `Seq2Seq` model. Implement the method to export the encoder + Returns ONNX decoder config for `Seq2Seq` models. Implement the method to export the decoder of the model separately. Args: diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py index 246f8867ad..56e7ba8109 100644 --- a/optimum/utils/input_generators.py +++ b/optimum/utils/input_generators.py @@ -27,13 +27,9 @@ if is_torch_available(): import torch - torch.manual_seed(0) - if is_tf_available(): import tensorflow as tf - tf.random.set_seed(0) - def check_framework_is_available(func): @functools.wraps(func) diff --git a/tests/exporters/test_onnx_export.py b/tests/exporters/test_onnx_export.py index 0f3f562c4e..b146263447 100644 --- a/tests/exporters/test_onnx_export.py +++ b/tests/exporters/test_onnx_export.py @@ -308,7 +308,7 @@ def _onnx_export_for_encoder_decoder_models_for_ort( decoder_onnx_config_with_past = onnx_config.get_decoder_onnx_config( decoder_model.config, task.replace("-with-past", ""), use_past=True ) - models_for_export.extend([(model, decoder_onnx_config_with_past)]) + models_for_export.append((model, decoder_onnx_config_with_past)) for model_to_export, model_onnx_config in models_for_export: with NamedTemporaryFile("w") as output: From fc97e78d298f178cae79807c19a81a38757d0904 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Mon, 28 Nov 2022 10:12:10 +0100 Subject: [PATCH 07/17] Updated config location --- optimum/exporters/onnx/__main__.py | 2 +- optimum/exporters/onnx/base.py | 78 +++++++++++++++--------------- 2 files changed, 41 insertions(+), 39 deletions(-) diff --git a/optimum/exporters/onnx/__main__.py b/optimum/exporters/onnx/__main__.py index b916ea49eb..d728a5ed14 100644 --- a/optimum/exporters/onnx/__main__.py +++ b/optimum/exporters/onnx/__main__.py @@ -68,7 +68,7 @@ def main(): "--for-ort", action="store_true", help=( - "This generates ONNX models to run inference with ONNX Runtime ORTModelXXX for encoder-decoder models." + "This exports models ready to be run with optimum.onnxruntime ORTModelXXX. Useful for encoder-decoder models." " If enabled the encoder and decoder of the model are exported separately." ), ) diff --git a/optimum/exporters/onnx/base.py b/optimum/exporters/onnx/base.py index be5eaee99b..d59d6f687e 100644 --- a/optimum/exporters/onnx/base.py +++ b/optimum/exporters/onnx/base.py @@ -315,44 +315,6 @@ def generate_dummy_inputs_onnxruntime(self, reference_model_inputs: Mapping[str, """ return reference_model_inputs - def get_encoder_onnx_config(self, config: "PretrainedConfig"): - """ - Returns ONNX encoder config for `Seq2Seq` models. Implement the method to export the encoder - of the model separately. - - Args: - config (`PretrainedConfig`): - The encoder model's configuration to use when exporting to ONNX. - - Returns: - `OnnxConfig`: An instance of the ONNX configuration object. - """ - raise NotImplementedError( - f"{config.model_type} encoder export is not supported yet. ", - f"If you want to support {config.model_type} please propose a PR or open up an issue.", - ) - - def get_decoder_onnx_config(self, config: "PretrainedConfig", task: str = "default", use_past: bool = False): - """ - Returns ONNX decoder config for `Seq2Seq` models. Implement the method to export the decoder - of the model separately. - - Args: - config (`PretrainedConfig`): - The decoder model's configuration to use when exporting to ONNX. - task (`str`, defaults to `"default"`): - The task the model should be exported for. - use_past (`bool`, defaults to `False`): - Whether to export the model with past_key_values. - - Returns: - `OnnxConfig`: An instance of the ONNX configuration object. - """ - raise NotImplementedError( - f"{config.model_type} decoder export is not supported yet. ", - f"If you want to support {config.model_type} please propose a PR or open up an issue.", - ) - class OnnxConfigWithPast(OnnxConfig, ABC): PAD_ATTENTION_MASK_TO_MATCH_TOTAL_SEQUENCE_LENGTH = True @@ -504,3 +466,43 @@ def flatten_past_key_values(self, flattened_output, name, idx, t): flattened_output[f"{name}.{idx}.decoder.value"] = t[1] flattened_output[f"{name}.{idx}.encoder.key"] = t[2] flattened_output[f"{name}.{idx}.encoder.value"] = t[3] + + def get_encoder_onnx_config(self, config: "PretrainedConfig") -> OnnxConfig: + """ + Returns ONNX encoder config for `Seq2Seq` models. Implement the method to export the encoder + of the model separately. + + Args: + config (`PretrainedConfig`): + The encoder model's configuration to use when exporting to ONNX. + + Returns: + `OnnxConfig`: An instance of the ONNX configuration object. + """ + raise NotImplementedError( + f"{config.model_type} encoder export is not supported yet. ", + f"If you want to support {config.model_type} please propose a PR or open up an issue.", + ) + + def get_decoder_onnx_config( + self, config: "PretrainedConfig", task: str = "default", use_past: bool = False + ) -> OnnxConfig: + """ + Returns ONNX decoder config for `Seq2Seq` models. Implement the method to export the decoder + of the model separately. + + Args: + config (`PretrainedConfig`): + The decoder model's configuration to use when exporting to ONNX. + task (`str`, defaults to `"default"`): + The task the model should be exported for. + use_past (`bool`, defaults to `False`): + Whether to export the model with past_key_values. + + Returns: + `OnnxConfig`: An instance of the ONNX configuration object. + """ + raise NotImplementedError( + f"{config.model_type} decoder export is not supported yet. ", + f"If you want to support {config.model_type} please propose a PR or open up an issue.", + ) From 7ec493d4d34d0c1f7abf8321deebb29c35620a01 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Mon, 28 Nov 2022 14:17:55 +0100 Subject: [PATCH 08/17] Added methods for ncoder/decoder onnx export and validation --- optimum/exporters/onnx/__init__.py | 7 +- optimum/exporters/onnx/__main__.py | 93 ++++++++++---------- optimum/exporters/onnx/convert.py | 124 ++++++++++++++++++++++++++- optimum/exporters/onnx/utils.py | 51 ++++++++++- tests/exporters/test_onnx_export.py | 127 ++++++++++++---------------- 5 files changed, 280 insertions(+), 122 deletions(-) diff --git a/optimum/exporters/onnx/__init__.py b/optimum/exporters/onnx/__init__.py index 74cad88256..b9dadaa5e6 100644 --- a/optimum/exporters/onnx/__init__.py +++ b/optimum/exporters/onnx/__init__.py @@ -15,4 +15,9 @@ from .base import OnnxConfig, OnnxConfigWithPast, OnnxSeq2SeqConfigWithPast # noqa from .config import TextDecoderOnnxConfig, TextEncoderOnnxConfig, TextSeq2SeqOnnxConfig # noqa -from .convert import export, validate_model_outputs # noqa +from .convert import ( # noqa + export, + export_encoder_decoder_model, + validate_encoder_decoder_model_outputs, + validate_model_outputs, +) diff --git a/optimum/exporters/onnx/__main__.py b/optimum/exporters/onnx/__main__.py index d728a5ed14..89c68b32d1 100644 --- a/optimum/exporters/onnx/__main__.py +++ b/optimum/exporters/onnx/__main__.py @@ -22,7 +22,12 @@ from ...utils import logging from ..tasks import TasksManager from .base import OnnxConfigWithPast -from .convert import export, validate_model_outputs +from .convert import ( + export, + export_encoder_decoder_model, + validate_encoder_decoder_model_outputs, + validate_model_outputs, +) logger = logging.get_logger() # pylint: disable=invalid-name @@ -76,9 +81,10 @@ def main(): # Retrieve CLI arguments args = parser.parse_args() + args.output = args.output.joinpath("model.onnx") - if not args.output.exists(): - args.output.mkdir(parents=True) + if not args.output.parent.exists(): + args.output.parent.mkdir(parents=True) # Infer the task task = args.task @@ -122,65 +128,62 @@ def main(): f"At least {onnx_config.DEFAULT_ONNX_OPSET} is required." ) - if args.atol is None: - args.atol = onnx_config.ATOL_FOR_VALIDATION - if isinstance(args.atol, dict): - args.atol = args.atol[task.replace("-with-past", "")] - + use_past = True if "-with-past" in task else False if model.config.is_encoder_decoder and args.for_ort: - encoder_model = model.get_encoder() - encoder_onnx_config = onnx_config.get_encoder_onnx_config(encoder_model.config) - - decoder_model = model.get_decoder() - decoder_onnx_config = onnx_config.get_decoder_onnx_config( - decoder_model.config, task.replace("-with-past", ""), use_past=False - ) - - models_for_export = [ - (encoder_model, encoder_onnx_config, "encoder_model.onnx"), - (model, decoder_onnx_config, "decoder_model.onnx"), - ] - - if "-with-past" in task: - decoder_onnx_config_with_past = onnx_config.get_decoder_onnx_config( - decoder_model.config, task.replace("-with-past", ""), use_past=True - ) - models_for_export.append((model, decoder_onnx_config_with_past, "decoder_with_past_model.onnx")) - else: - models_for_export = [(model, onnx_config, "model.onnx")] - - for model_to_export, model_onnx_config, output_name in models_for_export: - save_path = args.output.joinpath(output_name) - onnx_inputs, onnx_outputs = export( - model_to_export, - model_onnx_config, + onnx_inputs, onnx_outputs = export_encoder_decoder_model( + model, + onnx_config, args.opset, - save_path, + task, + use_past, + args.output.parent.joinpath("encoder_model.onnx"), + args.output.parent.joinpath("decoder_model.onnx"), + args.output.parent.joinpath("decoder_with_past_model.onnx"), ) - - try: - validate_model_outputs(model_onnx_config, model_to_export, save_path, onnx_outputs, args.atol) - except ValueError: - logger.error(f"An error occured, but the model was saved at: {save_path.as_posix()}") - return - logger.info(f"All good, model saved at: {save_path.as_posix()}") + else: + onnx_inputs, onnx_outputs = export(model, onnx_config, args.opset, args.output) # Saving the model config as this is needed sometimes. - model.config.save_pretrained(args.output) + model.config.save_pretrained(args.output.parent) # Saving the tokenizer / feature extractor as well. try: tokenizer = AutoTokenizer.from_pretrained(args.model) - tokenizer.save_pretrained(args.output) + tokenizer.save_pretrained(args.output.parent) except Exception: pass try: feature_extractor = AutoFeatureExtractor.from_pretrained(args.model) - feature_extractor.save_pretrained(args.output) + feature_extractor.save_pretrained(args.output.parent) except Exception: pass + if args.atol is None: + args.atol = onnx_config.ATOL_FOR_VALIDATION + if isinstance(args.atol, dict): + args.atol = args.atol[task.replace("-with-past", "")] + + try: + if model.config.is_encoder_decoder and args.for_ort: + validate_encoder_decoder_model_outputs( + onnx_config, + model, + onnx_outputs, + args.atol, + task, + use_past, + args.output.parent.joinpath("encoder_model.onnx"), + args.output.parent.joinpath("decoder_model.onnx"), + args.output.parent.joinpath("decoder_with_past_model.onnx"), + ) + else: + validate_model_outputs(onnx_config, model, args.output, onnx_outputs, args.atol) + except ValueError: + logger.error(f"An error occured, but the model was saved at: {args.output.parent.as_posix()}") + return + logger.info(f"All good, model saved at: {args.output.parent.as_posix()}") + if __name__ == "__main__": main() diff --git a/optimum/exporters/onnx/convert.py b/optimum/exporters/onnx/convert.py index 2268f67a5d..a93fb54e46 100644 --- a/optimum/exporters/onnx/convert.py +++ b/optimum/exporters/onnx/convert.py @@ -24,7 +24,7 @@ from ...utils import logging from .base import OnnxConfig -from .utils import MIN_TORCH_VERSION, is_torch_onnx_support_available +from .utils import MIN_TORCH_VERSION, get_encoder_decoder_models_for_export, is_torch_onnx_support_available if is_torch_available(): @@ -61,6 +61,66 @@ def check_dummy_inputs_are_allowed( ) +def validate_encoder_decoder_model_outputs( + config: OnnxConfig, + reference_model: Union["PreTrainedModel", "TFPreTrainedModel"], + onnx_named_outputs: List[str], + atol: float, + task: str, + use_past: bool, + encoder_onnx_model: Path, + decoder_onnx_model: Path, + decoder_with_past_onnx_model: Path = None, +): + """ + Validates the export by checking that the outputs from both the reference and the exported model match. + The following method validates the ONNX models exported using the `export_encoder_decoder_model` method. + + Args: + config ([`~OnnxConfig`]: + The configuration used to export the model. + reference_model ([`~PreTrainedModel`] or [`~TFPreTrainedModel`]): + The model used for the export. + onnx_named_outputs (`List[str]`): + The names of the outputs to check. + atol (`float`): + The absolute tolerance in terms of outputs difference between the reference and the exported model. + task (`str`) + The type of task to export the model with. + use_past (`bool`, *optional*, defaults to `None`): + Whether to export the model with past_key_values. + encoder_onnx_model (`Path`): + The path to the exported encoder ONNX model. + decoder_onnx_model (`Path`): + The path to the exported decoder ONNX model. + decoder_with_past_onnx_model (`Path`, *optional*, defaults to `None`): + The path to the exported decoder with past ONNX model. Required when `use_past` is True. + Raises: + ValueError: If the outputs shapes or values do not match between the reference and the exported model. + """ + task = task.replace("-with-past", "") + + models_for_validation = get_encoder_decoder_models_for_export(reference_model, config, task, use_past) + + if len(onnx_named_outputs) != len(models_for_validation.keys()): + raise ValueError( + f"Invalid number of ONNX named outputs. Required {len(models_for_validation.keys())}, Provided {len(onnx_named_outputs)}" + ) + + # Validate encoder + model, onnx_config = models_for_validation["encoder"] + validate_model_outputs(onnx_config, model, encoder_onnx_model, onnx_named_outputs[0], atol) + + # Validate decoder + model, onnx_config = models_for_validation["decoder"] + validate_model_outputs(onnx_config, model, decoder_onnx_model, onnx_named_outputs[1], atol) + + if use_past: + # Validate decoder with past + model, onnx_config = models_for_validation["decoder_with_past"] + validate_model_outputs(onnx_config, model, decoder_with_past_onnx_model, onnx_named_outputs[2], atol) + + def validate_model_outputs( config: OnnxConfig, reference_model: Union["PreTrainedModel", "TFPreTrainedModel"], @@ -326,6 +386,68 @@ def export_tensorflow( return input_names, output_names +def export_encoder_decoder_model( + model: Union["PreTrainedModel", "TFPreTrainedModel"], + config: OnnxConfig, + opset: int, + task: str, + use_past: bool, + encoder_output: Path, + decoder_output: Path, + decoder_with_past_output: Path = None, + device: str = "cpu", +) -> Tuple[List[List[str]], List[List[str]]]: + """ + Exports a Pytorch or TensorFlow encoder decoder model to an ONNX Intermediate Representation. + The following method exports the encoder and decoder components of the model as separate + ONNX files. + + Args: + model ([`PreTrainedModel`] or [`TFPreTrainedModel`]): + The model to export. + config ([`~exporters.onnx.config.OnnxConfig`]): + The ONNX configuration associated with the exported model. + opset (`int`): + The version of the ONNX operator set to use. + task (`str`) + The type of task to export the model with. + use_past (`bool`): + Whether to export the model with past_key_values. + encoder_output (`Path`): + Directory to store the exported encoder ONNX model. + decoder_output (`Path`): + Directory to store the exported decoder ONNX model. + decoder_with_past_output (`Path`, *optional*, defaults to `None`): + Directory to store the exported decoder with past ONNX model. Required when `use_past` is True. + device (`str`, *optional*, defaults to `cpu`): + The device on which the ONNX model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for + export on CUDA devices. + Returns: + `Tuple[List[List[str]], List[List[str]]]`: A tuple with an ordered list of the model's inputs, and the named + inputs from the ONNX configuration. + """ + task = task.replace("-with-past", "") + + models_for_export = get_encoder_decoder_models_for_export(model, config, task, use_past) + outputs = [] + + # export encoder + model, onnx_config = models_for_export["encoder"] + outputs.append(export(model, onnx_config, opset, encoder_output, device=device)) + + # export decoder + model, onnx_config = models_for_export["decoder"] + outputs.append(export(model, onnx_config, opset, decoder_output, device=device)) + + if use_past: + # export decoder with past + model, onnx_config = models_for_export["decoder_with_past"] + outputs.append(export(model, onnx_config, opset, decoder_with_past_output, device=device)) + + outputs = list(map(list, zip(*outputs))) + return outputs + + def export( model: Union["PreTrainedModel", "TFPreTrainedModel"], config: OnnxConfig, diff --git a/optimum/exporters/onnx/utils.py b/optimum/exporters/onnx/utils.py index 87c2acc4ef..a0031f0be3 100644 --- a/optimum/exporters/onnx/utils.py +++ b/optimum/exporters/onnx/utils.py @@ -16,11 +16,23 @@ from ctypes import c_float, sizeof from enum import Enum +from typing import TYPE_CHECKING, Dict, Tuple, Union import packaging -from transformers.utils import is_torch_available +from transformers.utils import is_tf_available, is_torch_available +if TYPE_CHECKING: + from pathlib import Path + + from .base import OnnxConfig + + if is_torch_available(): + from transformers.modeling_utils import PreTrainedModel + + if is_tf_available(): + from transformers.modeling_tf_utils import TFPreTrainedModel + MIN_TORCH_VERSION = packaging.version.parse("1.11.0") TORCH_VERSION = None if is_torch_available(): @@ -69,3 +81,40 @@ def check_onnxruntime_requirements(minimum_version: packaging.version.Version): f"but we require the version to be >= {minimum_version} to enable all the conversions options.\n" "Please update ONNX Runtime by running `pip install --upgrade onnxruntime`" ) + + +def get_encoder_decoder_models_for_export( + model: Union["PreTrainedModel", "TFPreTrainedModel"], config: "OnnxConfig", task: str, use_past: bool +) -> Dict[str, Tuple[Union["PreTrainedModel", "TFPreTrainedModel"], "OnnxConfig"]]: + """ + Exports a Pytorch or TensorFlow model to an ONNX Intermediate Representation. + + Args: + model ([`PreTrainedModel`] or [`TFPreTrainedModel`]): + The model to export. + config ([`~exporters.onnx.config.OnnxConfig`]): + The ONNX configuration associated with the exported model. + task (`str`) + The type of task to export the model with. + use_past (`bool`): + Whether to export the model with past_key_values. + + Returns: + `Dict[str, Tuple[Union[`PreTrainedModel`, `TFPreTrainedModel`], `OnnxConfig`]: A Dict containing the model and + onnx configs for the encoder and decoder parts of the model. + """ + models_for_export = dict() + + encoder_model = model.get_encoder() + encoder_onnx_config = config.get_encoder_onnx_config(encoder_model.config) + models_for_export["encoder"] = (encoder_model, encoder_onnx_config) + + decoder_model = model.get_decoder() + decoder_onnx_config = config.get_decoder_onnx_config(decoder_model.config, task, use_past=False) + models_for_export["decoder"] = (model, decoder_onnx_config) + + if use_past: + decoder_onnx_config_with_past = config.get_decoder_onnx_config(decoder_model.config, task, use_past=True) + models_for_export["decoder_with_past"] = (model, decoder_onnx_config_with_past) + + return models_for_export diff --git a/tests/exporters/test_onnx_export.py b/tests/exporters/test_onnx_export.py index b146263447..412349f58a 100644 --- a/tests/exporters/test_onnx_export.py +++ b/tests/exporters/test_onnx_export.py @@ -21,7 +21,14 @@ from transformers import AutoConfig, is_tf_available, is_torch_available from transformers.testing_utils import require_onnx, require_tf, require_torch, require_vision, slow -from optimum.exporters.onnx import OnnxConfig, OnnxConfigWithPast, export, validate_model_outputs +from optimum.exporters.onnx import ( + OnnxConfig, + OnnxConfigWithPast, + export, + export_encoder_decoder_model, + validate_encoder_decoder_model_outputs, + validate_model_outputs, +) from parameterized import parameterized @@ -223,7 +230,9 @@ class OnnxExportTestCase(TestCase): Integration tests ensuring supported models are correctly exported. """ - def _onnx_export(self, test_name, name, model_name, task, onnx_config_class_constructor, device="cpu"): + def _onnx_export( + self, test_name, name, model_name, task, onnx_config_class_constructor, device="cpu", for_ort=False + ): model_class = TasksManager.get_model_class_for_task(task) config = AutoConfig.from_pretrained(model_name) model = model_class.from_config(config) @@ -252,80 +261,50 @@ def _onnx_export(self, test_name, name, model_name, task, onnx_config_class_cons f" {onnx_config.MIN_TORCH_VERSION}, got: {TORCH_VERSION}" ) - with NamedTemporaryFile("w") as output: - try: - onnx_inputs, onnx_outputs = export( - model, onnx_config, onnx_config.DEFAULT_ONNX_OPSET, Path(output.name), device=device - ) - atol = onnx_config.ATOL_FOR_VALIDATION - if isinstance(atol, dict): - atol = atol[task.replace("-with-past", "")] - validate_model_outputs( - onnx_config, - model, - Path(output.name), - onnx_outputs, - atol, - ) - except (RuntimeError, ValueError) as e: - self.fail(f"{name}, {task} -> {e}") - - def _onnx_export_for_encoder_decoder_models_for_ort( - self, test_name, name, model_name, task, onnx_config_class_constructor, device="cpu" - ): - model_class = TasksManager.get_model_class_for_task(task) - config = AutoConfig.from_pretrained(model_name) - model = model_class.from_config(config) - - onnx_config = onnx_config_class_constructor(model.config) + atol = onnx_config.ATOL_FOR_VALIDATION + if isinstance(atol, dict): + atol = atol[task.replace("-with-past", "")] - if is_torch_available(): - from optimum.exporters.onnx.utils import TORCH_VERSION - - if not onnx_config.is_torch_support_available: - pytest.skip( - "Skipping due to incompatible PyTorch version. Minimum required is" - f" {onnx_config.MIN_TORCH_VERSION}, got: {TORCH_VERSION}" - ) - - encoder_model = model.get_encoder() - encoder_onnx_config = onnx_config.get_encoder_onnx_config(encoder_model.config) + if for_ort: + with NamedTemporaryFile("w") as encoder_output, NamedTemporaryFile( + "w" + ) as decoder_output, NamedTemporaryFile("w") as decoder_with_past_output: + try: + onnx_inputs, onnx_outputs = export_encoder_decoder_model( + model, + onnx_config, + onnx_config.DEFAULT_ONNX_OPSET, + task, + True, + Path(encoder_output.name), + Path(decoder_output.name), + Path(decoder_with_past_output.name), + device=device, + ) - decoder_model = model.get_decoder() - decoder_onnx_config = onnx_config.get_decoder_onnx_config( - decoder_model.config, task.replace("-with-past", ""), use_past=False - ) + validate_encoder_decoder_model_outputs( + onnx_config, + model, + onnx_outputs, + atol, + task, + True, + Path(encoder_output.name), + Path(decoder_output.name), + Path(decoder_with_past_output.name), + ) + except (RuntimeError, ValueError) as e: + self.fail(f"{name}, {task} -> {e}") - models_for_export = [ - ( - encoder_model, - encoder_onnx_config, - ), - (model, decoder_onnx_config), - ] - - if "-with-past" in task: - decoder_onnx_config_with_past = onnx_config.get_decoder_onnx_config( - decoder_model.config, task.replace("-with-past", ""), use_past=True - ) - models_for_export.append((model, decoder_onnx_config_with_past)) - - for model_to_export, model_onnx_config in models_for_export: + else: with NamedTemporaryFile("w") as output: try: onnx_inputs, onnx_outputs = export( - model_to_export, - model_onnx_config, - onnx_config.DEFAULT_ONNX_OPSET, - Path(output.name), - device=device, + model, onnx_config, onnx_config.DEFAULT_ONNX_OPSET, Path(output.name), device=device ) - atol = model_onnx_config.ATOL_FOR_VALIDATION - if isinstance(atol, dict): - atol = atol[task.replace("-with-past", "")] validate_model_outputs( - model_onnx_config, - model_to_export, + onnx_config, + model, Path(output.name), onnx_outputs, atol, @@ -345,7 +324,9 @@ def test_pytorch_export(self, test_name, name, model_name, task, onnx_config_cla @require_torch @require_vision def test_pytorch_export_on_cuda(self, test_name, name, model_name, task, onnx_config_class_constructor): - self._onnx_export(test_name, name, model_name, task, onnx_config_class_constructor, device="cuda") + self._onnx_export( + test_name, name, model_name, task, onnx_config_class_constructor, device="cuda", for_ort=True + ) @parameterized.expand(_get_models_to_test(PYTORCH_ENCODER_DECODER_MODELS)) @slow @@ -354,8 +335,8 @@ def test_pytorch_export_on_cuda(self, test_name, name, model_name, task, onnx_co def test_pytorch_export_for_encoder_decoder_models_for_ort( self, test_name, name, model_name, task, onnx_config_class_constructor ): - self._onnx_export_for_encoder_decoder_models_for_ort( - test_name, name, model_name, task, onnx_config_class_constructor + self._onnx_export( + test_name, name, model_name, task, onnx_config_class_constructor, device="cuda", for_ort=True ) @parameterized.expand(_get_models_to_test(PYTORCH_ENCODER_DECODER_MODELS)) @@ -365,9 +346,7 @@ def test_pytorch_export_for_encoder_decoder_models_for_ort( def test_pytorch_export_for_encoder_decoder_models_for_ort_on_cuda( self, test_name, name, model_name, task, onnx_config_class_constructor ): - self._onnx_export_for_encoder_decoder_models_for_ort( - test_name, name, model_name, task, onnx_config_class_constructor, device="cuda" - ) + self._onnx_export(test_name, name, model_name, task, onnx_config_class_constructor) @parameterized.expand(_get_models_to_test(TENSORFLOW_EXPORT_MODELS)) @slow From 634c682c5993ca8ec2cab05bb2142ee312c43857 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 29 Nov 2022 08:41:30 +0100 Subject: [PATCH 09/17] Added Seq2Seq-lm encoder-decoder configs --- optimum/exporters/onnx/model_configs.py | 139 ++++++++++++++++++++---- optimum/utils/input_generators.py | 7 ++ tests/exporters/test_onnx_export.py | 77 ++++++++----- 3 files changed, 172 insertions(+), 51 deletions(-) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 2b46794043..99cfa1ebc4 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -49,6 +49,70 @@ from .base import PatchingSpec +class Seq2SeqEncoderOnnxConfig(TextEncoderOnnxConfig): + NORMALIZED_CONFIG_CLASS = NormalizedTextConfig + + @property + def inputs(self) -> Mapping[str, Mapping[int, str]]: + return { + "input_ids": {0: "batch_size", 1: "sequence_length"}, + "attention_mask": {0: "batch_size", 1: "sequence_length"}, + } + + +class Seq2SeqDecoderOnnxConfig(TextSeq2SeqOnnxConfig): + NORMALIZED_CONFIG_CLASS = NormalizedSeq2SeqConfig + + DUMMY_INPUT_GENERATOR_CLASSES = ( + DummySeq2SeqDecoderTextInputGenerator, + DummyDecoderTextInputGenerator, + DummySeq2SeqPastKeyValuesGenerator, + ) + + @property + def inputs(self) -> Mapping[str, Mapping[int, str]]: + common_inputs = { + "decoder_input_ids": {0: "batch_size", 1: "past_decoder_sequence_length + sequence_length"}, + "encoder_outputs": {0: "batch_size", 1: "encoder_sequence_length"}, + "attention_mask": {0: "batch_size", 1: "encoder_sequence_length"}, + } + + if self.use_past: + self.add_past_key_values(common_inputs, direction="inputs") + + return common_inputs + + @property + def torch_to_onnx_input_map(self) -> Mapping[str, str]: + return { + "decoder_input_ids": "input_ids", + "encoder_outputs": "encoder_hidden_states", + "attention_mask": "encoder_attention_mask", + } + + @property + def outputs(self) -> Mapping[str, Mapping[int, str]]: + common_outputs = super().outputs + self.add_past_key_values(common_outputs, direction="outputs") + return common_outputs + + @property + def values_override(self) -> Optional[Mapping[str, Any]]: + # Needed here because the configuration will actually be used with both use_past = True and use_past = False, + # but the cache must always be used regardless. + if hasattr(self._config, "use_cache"): + return {"use_cache": True} + + return None + + def generate_dummy_inputs_onnxruntime(self, reference_model_inputs: Mapping[str, Any]) -> Mapping[str, Any]: + reference_model_inputs["input_ids"] = reference_model_inputs.pop("decoder_input_ids") + reference_model_inputs["encoder_hidden_states"] = reference_model_inputs.pop("encoder_outputs")[0] + reference_model_inputs["encoder_attention_mask"] = reference_model_inputs.pop("attention_mask") + + return reference_model_inputs + + class BertOnnxConfig(TextEncoderOnnxConfig): NORMALIZED_CONFIG_CLASS = NormalizedTextConfig ATOL_FOR_VALIDATION = 1e-4 @@ -224,6 +288,23 @@ def generate(self, input_name: str, framework: str = "pt"): ] +class T5DecoderOnnxConfig(Seq2SeqDecoderOnnxConfig): + NORMALIZED_CONFIG_CLASS = NormalizedSeq2SeqConfig.with_args( + hidden_size="d_model", + num_attention_heads="num_heads", + encoder_num_layers="num_layers", + decoder_num_layers="num_decoder_layers", + key_value_dim="d_kv", + allow_new=True, + ) + + DUMMY_INPUT_GENERATOR_CLASSES = ( + DummySeq2SeqDecoderTextInputGenerator, + DummyDecoderTextInputGenerator, + T5DummySeq2SeqPastKeyValuesGenerator, + ) + + class T5OnnxConfig(TextSeq2SeqOnnxConfig): DEFAULT_ONNX_OPSET = 13 DUMMY_INPUT_GENERATOR_CLASSES = TextSeq2SeqOnnxConfig.DUMMY_INPUT_GENERATOR_CLASSES[:-1] + ( @@ -238,6 +319,14 @@ class T5OnnxConfig(TextSeq2SeqOnnxConfig): allow_new=True, ) + def get_encoder_onnx_config(self, config: "PretrainedConfig") -> Seq2SeqEncoderOnnxConfig: + return Seq2SeqEncoderOnnxConfig(config, task="default") + + def get_decoder_onnx_config( + self, config: "PretrainedConfig", task: str = "default", use_past: bool = False + ) -> T5DecoderOnnxConfig: + return T5DecoderOnnxConfig(config, task, use_past=use_past) + class MT5OnnxConfig(T5OnnxConfig): ATOL_FOR_VALIDATION = 1e-4 @@ -286,6 +375,17 @@ def generate(self, input_name: str, framework: str = "pt"): return int_tensor +class BartDecoderOnnxConfig(Seq2SeqDecoderOnnxConfig): + NORMALIZED_CONFIG_CLASS = NormalizedSeq2SeqConfig.with_args( + encoder_num_layers="encoder_layers", + decoder_num_layers="decoder_layers", + num_layers="decoder_layers", # Used for the causal-lm task past key values input generation. + encoder_num_attention_heads="encoder_attention_heads", + decoder_num_attention_heads="decoder_attention_heads", + eos_token_id="eos_token_id", + ) + + class BartOnnxConfig(TextSeq2SeqOnnxConfig): NORMALIZED_CONFIG_CLASS = NormalizedSeq2SeqConfig.with_args( encoder_num_layers="encoder_layers", @@ -425,6 +525,14 @@ def flatten_past_key_values(self, flattened_output, name, idx, t): flattened_output, name, idx, t ) + def get_encoder_onnx_config(self, config: "PretrainedConfig") -> Seq2SeqEncoderOnnxConfig: + return Seq2SeqEncoderOnnxConfig(config, task="default") + + def get_decoder_onnx_config( + self, config: "PretrainedConfig", task: str = "default", use_past: bool = False + ) -> BartDecoderOnnxConfig: + return BartDecoderOnnxConfig(config, task, use_past=use_past) + class MBartOnnxConfig(BartOnnxConfig): pass @@ -442,8 +550,16 @@ class BlenderbotSmallOnnxConfig(BartOnnxConfig): pass +class BigBirdPegasusEncoderOnnxConfig(Seq2SeqEncoderOnnxConfig): + def generate_dummy_inputs_onnxruntime(self, reference_model_inputs: Mapping[str, Any]) -> Mapping[str, Any]: + # TODO: check why the attention mask is not present in the exported model + reference_model_inputs.pop("attention_mask") + return reference_model_inputs + + class BigBirdPegasusOnnxConfig(BartOnnxConfig): - pass + def get_encoder_onnx_config(self, config: "PretrainedConfig") -> BigBirdPegasusEncoderOnnxConfig: + return BigBirdPegasusEncoderOnnxConfig(config, task="default") class MarianOnnxConfig(BartOnnxConfig): @@ -649,7 +765,7 @@ def inputs(self) -> Mapping[str, Mapping[int, str]]: } -class SpeechSeq2SeqDecoderOnnxConfig(OnnxSeq2SeqConfigWithPast): +class SpeechSeq2SeqDecoderOnnxConfig(Seq2SeqDecoderOnnxConfig): NORMALIZED_CONFIG_CLASS = NormalizedSeq2SeqConfig DUMMY_INPUT_GENERATOR_CLASSES = ( @@ -670,25 +786,6 @@ def inputs(self) -> Mapping[str, Mapping[int, str]]: return common_inputs - @property - def torch_to_onnx_input_map(self) -> Mapping[str, str]: - return {"decoder_input_ids": "input_ids", "encoder_outputs": "encoder_hidden_states"} - - @property - def outputs(self) -> Mapping[str, Mapping[int, str]]: - common_outputs = super().outputs - self.add_past_key_values(common_outputs, direction="outputs") - return common_outputs - - @property - def values_override(self) -> Optional[Mapping[str, Any]]: - # Needed here because the configuration will actually be used with both use_past = True and use_past = False, - # but the cache must always be used regardless. - if hasattr(self._config, "use_cache"): - return {"use_cache": True} - - return None - def generate_dummy_inputs_onnxruntime(self, reference_model_inputs: Mapping[str, Any]) -> Mapping[str, Any]: reference_model_inputs["input_ids"] = reference_model_inputs.pop("decoder_input_ids") reference_model_inputs["encoder_hidden_states"] = reference_model_inputs.pop("encoder_outputs")[0] diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py index 56e7ba8109..8a263be080 100644 --- a/optimum/utils/input_generators.py +++ b/optimum/utils/input_generators.py @@ -296,6 +296,7 @@ class DummySeq2SeqDecoderTextInputGenerator(DummyDecoderTextInputGenerator): "decoder_input_ids", "decoder_attention_mask", "encoder_outputs", + "attention_mask", ) def __init__( @@ -327,6 +328,12 @@ def generate(self, input_name: str, framework: str = "pt"): shape = (self.batch_size, self.sequence_length, self.hidden_size) return (self.random_float_tensor(shape, min_value=0, max_value=1, framework=framework), None, None) + if input_name == "attention_mask": + min_value = 0 + max_value = 2 + shape = (self.batch_size, self.sequence_length) + return self.random_int_tensor(shape, max_value, min_value=min_value, framework=framework) + return super().generate(input_name, framework=framework) diff --git a/tests/exporters/test_onnx_export.py b/tests/exporters/test_onnx_export.py index 412349f58a..8fdecbe284 100644 --- a/tests/exporters/test_onnx_export.py +++ b/tests/exporters/test_onnx_export.py @@ -115,7 +115,24 @@ ("roberta", "roberta-base"), } -PYTORCH_ENCODER_DECODER_MODELS = { +PYTORCH_ENCODER_DECODER_MODELS_FOR_CONDITIONAL_GENERATION = { + ("bart", "facebook/bart-base", ("seq2seq-lm", "seq2seq-lm-with-past")), + ("mbart", "sshleifer/tiny-mbart", ("seq2seq-lm", "seq2seq-lm-with-past")), + ("t5", "t5-small"), + ("marian", "Helsinki-NLP/opus-mt-en-de", ("seq2seq-lm", "seq2seq-lm-with-past")), + # Not using google/mt5-small because it takes too much time for testing. + ("mt5", "lewtun/tiny-random-mt5"), + # Not using facebook/m2m100_418M because it takes too much time for testing. + ( + "m2m-100", + "hf-internal-testing/tiny-random-m2m_100", + ), + # Not using google/bigbird-pegasus-large-arxiv because it takes too much time for testing. + ( + "bigbird-pegasus", + "hf-internal-testing/tiny-random-bigbird_pegasus", + ("seq2seq-lm", "seq2seq-lm-with-past"), + ), ("whisper", "openai/whisper-tiny.en"), } @@ -312,45 +329,45 @@ def _onnx_export( except (RuntimeError, ValueError) as e: self.fail(f"{name}, {task} -> {e}") - @parameterized.expand(_get_models_to_test(PYTORCH_EXPORT_MODELS)) + # @parameterized.expand(_get_models_to_test(PYTORCH_EXPORT_MODELS)) + # @slow + # @require_torch + # @require_vision + # def test_pytorch_export(self, test_name, name, model_name, task, onnx_config_class_constructor): + # self._onnx_export(test_name, name, model_name, task, onnx_config_class_constructor) + + # @parameterized.expand(_get_models_to_test(PYTORCH_EXPORT_MODELS)) + # @slow + # @require_torch + # @require_vision + # def test_pytorch_export_on_cuda(self, test_name, name, model_name, task, onnx_config_class_constructor): + # self._onnx_export( + # test_name, name, model_name, task, onnx_config_class_constructor, device="cuda", for_ort=True + # ) + + @parameterized.expand(_get_models_to_test(PYTORCH_ENCODER_DECODER_MODELS_FOR_CONDITIONAL_GENERATION)) @slow @require_torch @require_vision - def test_pytorch_export(self, test_name, name, model_name, task, onnx_config_class_constructor): - self._onnx_export(test_name, name, model_name, task, onnx_config_class_constructor) - - @parameterized.expand(_get_models_to_test(PYTORCH_EXPORT_MODELS)) - @slow - @require_torch - @require_vision - def test_pytorch_export_on_cuda(self, test_name, name, model_name, task, onnx_config_class_constructor): - self._onnx_export( - test_name, name, model_name, task, onnx_config_class_constructor, device="cuda", for_ort=True - ) + def test_pytorch_export_for_encoder_decoder_models_for_conditional_generation( + self, test_name, name, model_name, task, onnx_config_class_constructor + ): + self._onnx_export(test_name, name, model_name, task, onnx_config_class_constructor, for_ort=True) - @parameterized.expand(_get_models_to_test(PYTORCH_ENCODER_DECODER_MODELS)) + @parameterized.expand(_get_models_to_test(PYTORCH_ENCODER_DECODER_MODELS_FOR_CONDITIONAL_GENERATION)) @slow @require_torch @require_vision - def test_pytorch_export_for_encoder_decoder_models_for_ort( + def test_pytorch_export_for_encoder_decoder_models_for_conditional_generation_on_cuda( self, test_name, name, model_name, task, onnx_config_class_constructor ): self._onnx_export( test_name, name, model_name, task, onnx_config_class_constructor, device="cuda", for_ort=True ) - @parameterized.expand(_get_models_to_test(PYTORCH_ENCODER_DECODER_MODELS)) - @slow - @require_torch - @require_vision - def test_pytorch_export_for_encoder_decoder_models_for_ort_on_cuda( - self, test_name, name, model_name, task, onnx_config_class_constructor - ): - self._onnx_export(test_name, name, model_name, task, onnx_config_class_constructor) - - @parameterized.expand(_get_models_to_test(TENSORFLOW_EXPORT_MODELS)) - @slow - @require_tf - @require_vision - def test_tensorflow_export(self, test_name, name, model_name, task, onnx_config_class_constructor): - self._onnx_export(test_name, name, model_name, task, onnx_config_class_constructor) + # @parameterized.expand(_get_models_to_test(TENSORFLOW_EXPORT_MODELS)) + # @slow + # @require_tf + # @require_vision + # def test_tensorflow_export(self, test_name, name, model_name, task, onnx_config_class_constructor): + # self._onnx_export(test_name, name, model_name, task, onnx_config_class_constructor) From db97f8d6cc98ff751be0bc82076bf716046daa28 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 29 Nov 2022 08:46:30 +0100 Subject: [PATCH 10/17] Uncommented tests --- optimum/exporters/onnx/utils.py | 2 -- tests/exporters/test_onnx_export.py | 42 ++++++++++++++--------------- 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/optimum/exporters/onnx/utils.py b/optimum/exporters/onnx/utils.py index a0031f0be3..a51c1fdea4 100644 --- a/optimum/exporters/onnx/utils.py +++ b/optimum/exporters/onnx/utils.py @@ -23,8 +23,6 @@ if TYPE_CHECKING: - from pathlib import Path - from .base import OnnxConfig if is_torch_available(): diff --git a/tests/exporters/test_onnx_export.py b/tests/exporters/test_onnx_export.py index 8fdecbe284..d65ff90b11 100644 --- a/tests/exporters/test_onnx_export.py +++ b/tests/exporters/test_onnx_export.py @@ -329,21 +329,21 @@ def _onnx_export( except (RuntimeError, ValueError) as e: self.fail(f"{name}, {task} -> {e}") - # @parameterized.expand(_get_models_to_test(PYTORCH_EXPORT_MODELS)) - # @slow - # @require_torch - # @require_vision - # def test_pytorch_export(self, test_name, name, model_name, task, onnx_config_class_constructor): - # self._onnx_export(test_name, name, model_name, task, onnx_config_class_constructor) - - # @parameterized.expand(_get_models_to_test(PYTORCH_EXPORT_MODELS)) - # @slow - # @require_torch - # @require_vision - # def test_pytorch_export_on_cuda(self, test_name, name, model_name, task, onnx_config_class_constructor): - # self._onnx_export( - # test_name, name, model_name, task, onnx_config_class_constructor, device="cuda", for_ort=True - # ) + @parameterized.expand(_get_models_to_test(PYTORCH_EXPORT_MODELS)) + @slow + @require_torch + @require_vision + def test_pytorch_export(self, test_name, name, model_name, task, onnx_config_class_constructor): + self._onnx_export(test_name, name, model_name, task, onnx_config_class_constructor) + + @parameterized.expand(_get_models_to_test(PYTORCH_EXPORT_MODELS)) + @slow + @require_torch + @require_vision + def test_pytorch_export_on_cuda(self, test_name, name, model_name, task, onnx_config_class_constructor): + self._onnx_export( + test_name, name, model_name, task, onnx_config_class_constructor, device="cuda", for_ort=True + ) @parameterized.expand(_get_models_to_test(PYTORCH_ENCODER_DECODER_MODELS_FOR_CONDITIONAL_GENERATION)) @slow @@ -365,9 +365,9 @@ def test_pytorch_export_for_encoder_decoder_models_for_conditional_generation_on test_name, name, model_name, task, onnx_config_class_constructor, device="cuda", for_ort=True ) - # @parameterized.expand(_get_models_to_test(TENSORFLOW_EXPORT_MODELS)) - # @slow - # @require_tf - # @require_vision - # def test_tensorflow_export(self, test_name, name, model_name, task, onnx_config_class_constructor): - # self._onnx_export(test_name, name, model_name, task, onnx_config_class_constructor) + @parameterized.expand(_get_models_to_test(TENSORFLOW_EXPORT_MODELS)) + @slow + @require_tf + @require_vision + def test_tensorflow_export(self, test_name, name, model_name, task, onnx_config_class_constructor): + self._onnx_export(test_name, name, model_name, task, onnx_config_class_constructor) From bc81a4145c93d7fa26a639645a3da77315377283 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 29 Nov 2022 08:47:30 +0100 Subject: [PATCH 11/17] Fixed test --- tests/exporters/test_onnx_export.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/exporters/test_onnx_export.py b/tests/exporters/test_onnx_export.py index d65ff90b11..01d7366212 100644 --- a/tests/exporters/test_onnx_export.py +++ b/tests/exporters/test_onnx_export.py @@ -341,9 +341,7 @@ def test_pytorch_export(self, test_name, name, model_name, task, onnx_config_cla @require_torch @require_vision def test_pytorch_export_on_cuda(self, test_name, name, model_name, task, onnx_config_class_constructor): - self._onnx_export( - test_name, name, model_name, task, onnx_config_class_constructor, device="cuda", for_ort=True - ) + self._onnx_export(test_name, name, model_name, task, onnx_config_class_constructor, device="cuda") @parameterized.expand(_get_models_to_test(PYTORCH_ENCODER_DECODER_MODELS_FOR_CONDITIONAL_GENERATION)) @slow From bbd201c54721e2d8601d3bfcb0c416bb7593f9fa Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 29 Nov 2022 08:56:59 +0100 Subject: [PATCH 12/17] Updated argument help --- optimum/exporters/onnx/__main__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/optimum/exporters/onnx/__main__.py b/optimum/exporters/onnx/__main__.py index 89c68b32d1..9a30c29bb9 100644 --- a/optimum/exporters/onnx/__main__.py +++ b/optimum/exporters/onnx/__main__.py @@ -73,8 +73,8 @@ def main(): "--for-ort", action="store_true", help=( - "This exports models ready to be run with optimum.onnxruntime ORTModelXXX. Useful for encoder-decoder models." - " If enabled the encoder and decoder of the model are exported separately." + "This exports models ready to be run with optimum.onnxruntime ORTModelXXX. Useful for encoder-decoder models for" + "conditional generation. If enabled the encoder and decoder of the model are exported separately." ), ) parser.add_argument("output", type=Path, help="Path indicating the directory where to store generated ONNX model.") From f9713dc271a99be506caba3fee106d2fa7d9fdb7 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 29 Nov 2022 12:35:56 +0100 Subject: [PATCH 13/17] Removed use-past and updated docstring --- optimum/exporters/onnx/__main__.py | 5 +---- optimum/exporters/onnx/base.py | 2 +- optimum/exporters/onnx/convert.py | 22 ++++++++-------------- optimum/exporters/onnx/model_configs.py | 6 +++--- optimum/exporters/onnx/utils.py | 8 +++----- tests/exporters/test_onnx_export.py | 2 -- 6 files changed, 16 insertions(+), 29 deletions(-) diff --git a/optimum/exporters/onnx/__main__.py b/optimum/exporters/onnx/__main__.py index 9a30c29bb9..646218ddcc 100644 --- a/optimum/exporters/onnx/__main__.py +++ b/optimum/exporters/onnx/__main__.py @@ -73,7 +73,7 @@ def main(): "--for-ort", action="store_true", help=( - "This exports models ready to be run with optimum.onnxruntime ORTModelXXX. Useful for encoder-decoder models for" + "This exports models ready to be run with optimum.onnxruntime. Useful for encoder-decoder models for" "conditional generation. If enabled the encoder and decoder of the model are exported separately." ), ) @@ -128,14 +128,12 @@ def main(): f"At least {onnx_config.DEFAULT_ONNX_OPSET} is required." ) - use_past = True if "-with-past" in task else False if model.config.is_encoder_decoder and args.for_ort: onnx_inputs, onnx_outputs = export_encoder_decoder_model( model, onnx_config, args.opset, task, - use_past, args.output.parent.joinpath("encoder_model.onnx"), args.output.parent.joinpath("decoder_model.onnx"), args.output.parent.joinpath("decoder_with_past_model.onnx"), @@ -172,7 +170,6 @@ def main(): onnx_outputs, args.atol, task, - use_past, args.output.parent.joinpath("encoder_model.onnx"), args.output.parent.joinpath("decoder_model.onnx"), args.output.parent.joinpath("decoder_with_past_model.onnx"), diff --git a/optimum/exporters/onnx/base.py b/optimum/exporters/onnx/base.py index d59d6f687e..f69bad215b 100644 --- a/optimum/exporters/onnx/base.py +++ b/optimum/exporters/onnx/base.py @@ -303,7 +303,7 @@ def flatten_output_collection_property(cls, name: str, field: Iterable[Any]) -> """ return {f"{name}.{idx}": item for idx, item in enumerate(itertools.chain.from_iterable(field))} - def generate_dummy_inputs_onnxruntime(self, reference_model_inputs: Mapping[str, Any]) -> Mapping[str, Any]: + def generate_dummy_inputs_for_validation(self, reference_model_inputs: Mapping[str, Any]) -> Mapping[str, Any]: """ Generate inputs for ONNX Runtime using the reference model inputs. Override this to run inference with seq2seq models which have the encoder and decoder exported as separate ONNX files. diff --git a/optimum/exporters/onnx/convert.py b/optimum/exporters/onnx/convert.py index a93fb54e46..a871dc92d0 100644 --- a/optimum/exporters/onnx/convert.py +++ b/optimum/exporters/onnx/convert.py @@ -67,7 +67,6 @@ def validate_encoder_decoder_model_outputs( onnx_named_outputs: List[str], atol: float, task: str, - use_past: bool, encoder_onnx_model: Path, decoder_onnx_model: Path, decoder_with_past_onnx_model: Path = None, @@ -87,20 +86,18 @@ def validate_encoder_decoder_model_outputs( The absolute tolerance in terms of outputs difference between the reference and the exported model. task (`str`) The type of task to export the model with. - use_past (`bool`, *optional*, defaults to `None`): - Whether to export the model with past_key_values. encoder_onnx_model (`Path`): The path to the exported encoder ONNX model. decoder_onnx_model (`Path`): The path to the exported decoder ONNX model. decoder_with_past_onnx_model (`Path`, *optional*, defaults to `None`): - The path to the exported decoder with past ONNX model. Required when `use_past` is True. + The path to the exported decoder with past ONNX model. Required when `past_key_values` are exported. Raises: ValueError: If the outputs shapes or values do not match between the reference and the exported model. """ task = task.replace("-with-past", "") - models_for_validation = get_encoder_decoder_models_for_export(reference_model, config, task, use_past) + models_for_validation = get_encoder_decoder_models_for_export(reference_model, config, task) if len(onnx_named_outputs) != len(models_for_validation.keys()): raise ValueError( @@ -115,7 +112,7 @@ def validate_encoder_decoder_model_outputs( model, onnx_config = models_for_validation["decoder"] validate_model_outputs(onnx_config, model, decoder_onnx_model, onnx_named_outputs[1], atol) - if use_past: + if config.use_past: # Validate decoder with past model, onnx_config = models_for_validation["decoder_with_past"] validate_model_outputs(onnx_config, model, decoder_with_past_onnx_model, onnx_named_outputs[2], atol) @@ -176,11 +173,11 @@ def validate_model_outputs( ref_outputs_dict[name] = value # Create onnxruntime inputs from the reference model inputs - reference_model_inputs_onnxruntime = config.generate_dummy_inputs_onnxruntime(reference_model_inputs) + reference_model_inputs_for_validation = config.generate_dummy_inputs_for_validation(reference_model_inputs) # We flatten potential collection of inputs (i.e. past_keys) onnx_inputs = {} - for name, value in reference_model_inputs_onnxruntime.items(): + for name, value in reference_model_inputs_for_validation.items(): if isinstance(value, (list, tuple)): value = config.flatten_output_collection_property(name, value) onnx_inputs.update({tensor_name: pt_tensor.numpy() for tensor_name, pt_tensor in value.items()}) @@ -391,7 +388,6 @@ def export_encoder_decoder_model( config: OnnxConfig, opset: int, task: str, - use_past: bool, encoder_output: Path, decoder_output: Path, decoder_with_past_output: Path = None, @@ -411,14 +407,12 @@ def export_encoder_decoder_model( The version of the ONNX operator set to use. task (`str`) The type of task to export the model with. - use_past (`bool`): - Whether to export the model with past_key_values. encoder_output (`Path`): Directory to store the exported encoder ONNX model. decoder_output (`Path`): Directory to store the exported decoder ONNX model. decoder_with_past_output (`Path`, *optional*, defaults to `None`): - Directory to store the exported decoder with past ONNX model. Required when `use_past` is True. + Directory to store the exported decoder with past ONNX model. Required when `past_key_values` are exported. device (`str`, *optional*, defaults to `cpu`): The device on which the ONNX model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for export on CUDA devices. @@ -428,7 +422,7 @@ def export_encoder_decoder_model( """ task = task.replace("-with-past", "") - models_for_export = get_encoder_decoder_models_for_export(model, config, task, use_past) + models_for_export = get_encoder_decoder_models_for_export(model, config, task) outputs = [] # export encoder @@ -439,7 +433,7 @@ def export_encoder_decoder_model( model, onnx_config = models_for_export["decoder"] outputs.append(export(model, onnx_config, opset, decoder_output, device=device)) - if use_past: + if config.use_past: # export decoder with past model, onnx_config = models_for_export["decoder_with_past"] outputs.append(export(model, onnx_config, opset, decoder_with_past_output, device=device)) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 99cfa1ebc4..640d881c86 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -105,7 +105,7 @@ def values_override(self) -> Optional[Mapping[str, Any]]: return None - def generate_dummy_inputs_onnxruntime(self, reference_model_inputs: Mapping[str, Any]) -> Mapping[str, Any]: + def generate_dummy_inputs_for_validation(self, reference_model_inputs: Mapping[str, Any]) -> Mapping[str, Any]: reference_model_inputs["input_ids"] = reference_model_inputs.pop("decoder_input_ids") reference_model_inputs["encoder_hidden_states"] = reference_model_inputs.pop("encoder_outputs")[0] reference_model_inputs["encoder_attention_mask"] = reference_model_inputs.pop("attention_mask") @@ -551,7 +551,7 @@ class BlenderbotSmallOnnxConfig(BartOnnxConfig): class BigBirdPegasusEncoderOnnxConfig(Seq2SeqEncoderOnnxConfig): - def generate_dummy_inputs_onnxruntime(self, reference_model_inputs: Mapping[str, Any]) -> Mapping[str, Any]: + def generate_dummy_inputs_for_validation(self, reference_model_inputs: Mapping[str, Any]) -> Mapping[str, Any]: # TODO: check why the attention mask is not present in the exported model reference_model_inputs.pop("attention_mask") return reference_model_inputs @@ -786,7 +786,7 @@ def inputs(self) -> Mapping[str, Mapping[int, str]]: return common_inputs - def generate_dummy_inputs_onnxruntime(self, reference_model_inputs: Mapping[str, Any]) -> Mapping[str, Any]: + def generate_dummy_inputs_for_validation(self, reference_model_inputs: Mapping[str, Any]) -> Mapping[str, Any]: reference_model_inputs["input_ids"] = reference_model_inputs.pop("decoder_input_ids") reference_model_inputs["encoder_hidden_states"] = reference_model_inputs.pop("encoder_outputs")[0] diff --git a/optimum/exporters/onnx/utils.py b/optimum/exporters/onnx/utils.py index a51c1fdea4..9d56aca8d2 100644 --- a/optimum/exporters/onnx/utils.py +++ b/optimum/exporters/onnx/utils.py @@ -82,10 +82,10 @@ def check_onnxruntime_requirements(minimum_version: packaging.version.Version): def get_encoder_decoder_models_for_export( - model: Union["PreTrainedModel", "TFPreTrainedModel"], config: "OnnxConfig", task: str, use_past: bool + model: Union["PreTrainedModel", "TFPreTrainedModel"], config: "OnnxConfig", task: str ) -> Dict[str, Tuple[Union["PreTrainedModel", "TFPreTrainedModel"], "OnnxConfig"]]: """ - Exports a Pytorch or TensorFlow model to an ONNX Intermediate Representation. + Returns the encoder and decoder parts of the model and their subsequent onnx configs. Args: model ([`PreTrainedModel`] or [`TFPreTrainedModel`]): @@ -94,8 +94,6 @@ def get_encoder_decoder_models_for_export( The ONNX configuration associated with the exported model. task (`str`) The type of task to export the model with. - use_past (`bool`): - Whether to export the model with past_key_values. Returns: `Dict[str, Tuple[Union[`PreTrainedModel`, `TFPreTrainedModel`], `OnnxConfig`]: A Dict containing the model and @@ -111,7 +109,7 @@ def get_encoder_decoder_models_for_export( decoder_onnx_config = config.get_decoder_onnx_config(decoder_model.config, task, use_past=False) models_for_export["decoder"] = (model, decoder_onnx_config) - if use_past: + if config.use_past: decoder_onnx_config_with_past = config.get_decoder_onnx_config(decoder_model.config, task, use_past=True) models_for_export["decoder_with_past"] = (model, decoder_onnx_config_with_past) diff --git a/tests/exporters/test_onnx_export.py b/tests/exporters/test_onnx_export.py index 01d7366212..1ff4db8f09 100644 --- a/tests/exporters/test_onnx_export.py +++ b/tests/exporters/test_onnx_export.py @@ -292,7 +292,6 @@ def _onnx_export( onnx_config, onnx_config.DEFAULT_ONNX_OPSET, task, - True, Path(encoder_output.name), Path(decoder_output.name), Path(decoder_with_past_output.name), @@ -305,7 +304,6 @@ def _onnx_export( onnx_outputs, atol, task, - True, Path(encoder_output.name), Path(decoder_output.name), Path(decoder_with_past_output.name), From 936b114b7272975bc04172991b5f202da1659fbe Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 29 Nov 2022 13:42:12 +0100 Subject: [PATCH 14/17] Updated input generator Seq2SeqDecoderConfig --- optimum/exporters/onnx/model_configs.py | 6 +++--- optimum/utils/input_generators.py | 9 +-------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 640d881c86..af0c6c88ef 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -64,8 +64,8 @@ class Seq2SeqDecoderOnnxConfig(TextSeq2SeqOnnxConfig): NORMALIZED_CONFIG_CLASS = NormalizedSeq2SeqConfig DUMMY_INPUT_GENERATOR_CLASSES = ( + DummyTextInputGenerator, DummySeq2SeqDecoderTextInputGenerator, - DummyDecoderTextInputGenerator, DummySeq2SeqPastKeyValuesGenerator, ) @@ -299,8 +299,8 @@ class T5DecoderOnnxConfig(Seq2SeqDecoderOnnxConfig): ) DUMMY_INPUT_GENERATOR_CLASSES = ( + DummyTextInputGenerator, DummySeq2SeqDecoderTextInputGenerator, - DummyDecoderTextInputGenerator, T5DummySeq2SeqPastKeyValuesGenerator, ) @@ -769,8 +769,8 @@ class SpeechSeq2SeqDecoderOnnxConfig(Seq2SeqDecoderOnnxConfig): NORMALIZED_CONFIG_CLASS = NormalizedSeq2SeqConfig DUMMY_INPUT_GENERATOR_CLASSES = ( + DummyTextInputGenerator, DummySeq2SeqDecoderTextInputGenerator, - DummyDecoderTextInputGenerator, DummySeq2SeqPastKeyValuesGenerator, ) diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py index 8a263be080..f1592144d0 100644 --- a/optimum/utils/input_generators.py +++ b/optimum/utils/input_generators.py @@ -296,7 +296,6 @@ class DummySeq2SeqDecoderTextInputGenerator(DummyDecoderTextInputGenerator): "decoder_input_ids", "decoder_attention_mask", "encoder_outputs", - "attention_mask", ) def __init__( @@ -326,13 +325,7 @@ def __init__( def generate(self, input_name: str, framework: str = "pt"): if input_name == "encoder_outputs": shape = (self.batch_size, self.sequence_length, self.hidden_size) - return (self.random_float_tensor(shape, min_value=0, max_value=1, framework=framework), None, None) - - if input_name == "attention_mask": - min_value = 0 - max_value = 2 - shape = (self.batch_size, self.sequence_length) - return self.random_int_tensor(shape, max_value, min_value=min_value, framework=framework) + return (self.random_float_tensor(shape, framework=framework), None, None) return super().generate(input_name, framework=framework) From 56db9c57091c91a61d0370b7b270e3dff86b71b7 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 29 Nov 2022 18:16:22 +0530 Subject: [PATCH 15/17] Update docstrings to use Optional Co-authored-by: Michael Benayoun --- optimum/exporters/onnx/convert.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/optimum/exporters/onnx/convert.py b/optimum/exporters/onnx/convert.py index a871dc92d0..8a0f0a26a8 100644 --- a/optimum/exporters/onnx/convert.py +++ b/optimum/exporters/onnx/convert.py @@ -69,7 +69,7 @@ def validate_encoder_decoder_model_outputs( task: str, encoder_onnx_model: Path, decoder_onnx_model: Path, - decoder_with_past_onnx_model: Path = None, + decoder_with_past_onnx_model: Optional[Path] = None, ): """ Validates the export by checking that the outputs from both the reference and the exported model match. @@ -90,7 +90,7 @@ def validate_encoder_decoder_model_outputs( The path to the exported encoder ONNX model. decoder_onnx_model (`Path`): The path to the exported decoder ONNX model. - decoder_with_past_onnx_model (`Path`, *optional*, defaults to `None`): + decoder_with_past_onnx_model (`Optional[Path]`, defaults to `None`): The path to the exported decoder with past ONNX model. Required when `past_key_values` are exported. Raises: ValueError: If the outputs shapes or values do not match between the reference and the exported model. @@ -284,7 +284,7 @@ def export_pytorch( if device.type == "cuda" and torch.cuda.is_available(): model.to(device) dummy_inputs = tree_map( - lambda value: value.to(device) if isinstance(value, torch.Tensor) else None, dummy_inputs + lambda value: value.to(device) if isinstance(value, torch.Tensor) else value, dummy_inputs ) check_dummy_inputs_are_allowed(model, dummy_inputs) inputs = config.ordered_inputs(model) @@ -390,7 +390,7 @@ def export_encoder_decoder_model( task: str, encoder_output: Path, decoder_output: Path, - decoder_with_past_output: Path = None, + decoder_with_past_output: Optional[Path] = None, device: str = "cpu", ) -> Tuple[List[List[str]], List[List[str]]]: """ @@ -411,7 +411,7 @@ def export_encoder_decoder_model( Directory to store the exported encoder ONNX model. decoder_output (`Path`): Directory to store the exported decoder ONNX model. - decoder_with_past_output (`Path`, *optional*, defaults to `None`): + decoder_with_past_output (`Optional[Path]`, defaults to `None`): Directory to store the exported decoder with past ONNX model. Required when `past_key_values` are exported. device (`str`, *optional*, defaults to `cpu`): The device on which the ONNX model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for From 224f79c26eb4c4407d57a07af9e12a1c3f845b37 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 29 Nov 2022 14:02:19 +0100 Subject: [PATCH 16/17] Added optional import --- optimum/exporters/onnx/convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/exporters/onnx/convert.py b/optimum/exporters/onnx/convert.py index 8a0f0a26a8..a1a7a76363 100644 --- a/optimum/exporters/onnx/convert.py +++ b/optimum/exporters/onnx/convert.py @@ -17,7 +17,7 @@ from inspect import signature from itertools import chain from pathlib import Path -from typing import Iterable, List, Tuple, Union +from typing import Iterable, List, Optional, Tuple, Union import numpy as np from transformers.utils import is_tf_available, is_torch_available From eef106c1074af3b3ead8b27b2a8850508a429d6a Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Wed, 30 Nov 2022 11:00:03 +0100 Subject: [PATCH 17/17] Remove reduntant task from the export function --- optimum/exporters/onnx/__main__.py | 2 -- optimum/exporters/onnx/convert.py | 14 ++------------ optimum/exporters/onnx/utils.py | 10 +++++----- tests/exporters/test_onnx_export.py | 2 -- 4 files changed, 7 insertions(+), 21 deletions(-) diff --git a/optimum/exporters/onnx/__main__.py b/optimum/exporters/onnx/__main__.py index 646218ddcc..5b1c1d4239 100644 --- a/optimum/exporters/onnx/__main__.py +++ b/optimum/exporters/onnx/__main__.py @@ -133,7 +133,6 @@ def main(): model, onnx_config, args.opset, - task, args.output.parent.joinpath("encoder_model.onnx"), args.output.parent.joinpath("decoder_model.onnx"), args.output.parent.joinpath("decoder_with_past_model.onnx"), @@ -169,7 +168,6 @@ def main(): model, onnx_outputs, args.atol, - task, args.output.parent.joinpath("encoder_model.onnx"), args.output.parent.joinpath("decoder_model.onnx"), args.output.parent.joinpath("decoder_with_past_model.onnx"), diff --git a/optimum/exporters/onnx/convert.py b/optimum/exporters/onnx/convert.py index a1a7a76363..b01a946cd9 100644 --- a/optimum/exporters/onnx/convert.py +++ b/optimum/exporters/onnx/convert.py @@ -66,7 +66,6 @@ def validate_encoder_decoder_model_outputs( reference_model: Union["PreTrainedModel", "TFPreTrainedModel"], onnx_named_outputs: List[str], atol: float, - task: str, encoder_onnx_model: Path, decoder_onnx_model: Path, decoder_with_past_onnx_model: Optional[Path] = None, @@ -84,8 +83,6 @@ def validate_encoder_decoder_model_outputs( The names of the outputs to check. atol (`float`): The absolute tolerance in terms of outputs difference between the reference and the exported model. - task (`str`) - The type of task to export the model with. encoder_onnx_model (`Path`): The path to the exported encoder ONNX model. decoder_onnx_model (`Path`): @@ -95,9 +92,7 @@ def validate_encoder_decoder_model_outputs( Raises: ValueError: If the outputs shapes or values do not match between the reference and the exported model. """ - task = task.replace("-with-past", "") - - models_for_validation = get_encoder_decoder_models_for_export(reference_model, config, task) + models_for_validation = get_encoder_decoder_models_for_export(reference_model, config) if len(onnx_named_outputs) != len(models_for_validation.keys()): raise ValueError( @@ -387,7 +382,6 @@ def export_encoder_decoder_model( model: Union["PreTrainedModel", "TFPreTrainedModel"], config: OnnxConfig, opset: int, - task: str, encoder_output: Path, decoder_output: Path, decoder_with_past_output: Optional[Path] = None, @@ -405,8 +399,6 @@ def export_encoder_decoder_model( The ONNX configuration associated with the exported model. opset (`int`): The version of the ONNX operator set to use. - task (`str`) - The type of task to export the model with. encoder_output (`Path`): Directory to store the exported encoder ONNX model. decoder_output (`Path`): @@ -420,9 +412,7 @@ def export_encoder_decoder_model( `Tuple[List[List[str]], List[List[str]]]`: A tuple with an ordered list of the model's inputs, and the named inputs from the ONNX configuration. """ - task = task.replace("-with-past", "") - - models_for_export = get_encoder_decoder_models_for_export(model, config, task) + models_for_export = get_encoder_decoder_models_for_export(model, config) outputs = [] # export encoder diff --git a/optimum/exporters/onnx/utils.py b/optimum/exporters/onnx/utils.py index 9d56aca8d2..247f1e6729 100644 --- a/optimum/exporters/onnx/utils.py +++ b/optimum/exporters/onnx/utils.py @@ -82,7 +82,7 @@ def check_onnxruntime_requirements(minimum_version: packaging.version.Version): def get_encoder_decoder_models_for_export( - model: Union["PreTrainedModel", "TFPreTrainedModel"], config: "OnnxConfig", task: str + model: Union["PreTrainedModel", "TFPreTrainedModel"], config: "OnnxConfig" ) -> Dict[str, Tuple[Union["PreTrainedModel", "TFPreTrainedModel"], "OnnxConfig"]]: """ Returns the encoder and decoder parts of the model and their subsequent onnx configs. @@ -92,8 +92,6 @@ def get_encoder_decoder_models_for_export( The model to export. config ([`~exporters.onnx.config.OnnxConfig`]): The ONNX configuration associated with the exported model. - task (`str`) - The type of task to export the model with. Returns: `Dict[str, Tuple[Union[`PreTrainedModel`, `TFPreTrainedModel`], `OnnxConfig`]: A Dict containing the model and @@ -106,11 +104,13 @@ def get_encoder_decoder_models_for_export( models_for_export["encoder"] = (encoder_model, encoder_onnx_config) decoder_model = model.get_decoder() - decoder_onnx_config = config.get_decoder_onnx_config(decoder_model.config, task, use_past=False) + decoder_onnx_config = config.get_decoder_onnx_config(decoder_model.config, config.task, use_past=False) models_for_export["decoder"] = (model, decoder_onnx_config) if config.use_past: - decoder_onnx_config_with_past = config.get_decoder_onnx_config(decoder_model.config, task, use_past=True) + decoder_onnx_config_with_past = config.get_decoder_onnx_config( + decoder_model.config, config.task, use_past=True + ) models_for_export["decoder_with_past"] = (model, decoder_onnx_config_with_past) return models_for_export diff --git a/tests/exporters/test_onnx_export.py b/tests/exporters/test_onnx_export.py index 1ff4db8f09..ee3a8b6307 100644 --- a/tests/exporters/test_onnx_export.py +++ b/tests/exporters/test_onnx_export.py @@ -291,7 +291,6 @@ def _onnx_export( model, onnx_config, onnx_config.DEFAULT_ONNX_OPSET, - task, Path(encoder_output.name), Path(decoder_output.name), Path(decoder_with_past_output.name), @@ -303,7 +302,6 @@ def _onnx_export( model, onnx_outputs, atol, - task, Path(encoder_output.name), Path(decoder_output.name), Path(decoder_with_past_output.name),