Merge branch 'master' into support-onnx-export-float16

huggingface · Feb 7, 2023 · 2d210a6 · 2d210a6
2 parents c13b017 + a65191e
commit 2d210a6
Show file tree

Hide file tree

Showing 22 changed files with 507 additions and 212 deletions.
diff --git a/.github/workflows/test_exporters_slow.yml b/.github/workflows/test_exporters_slow.yml
@@ -0,0 +1,33 @@
+name: Exporters slow / Python - Test
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: 0 7 * * * # every day at 7am
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [3.8, 3.9]
+        os: [ubuntu-20.04]
+
+    runs-on: ${{ matrix.os }}
+    steps:
+    - uses: actions/checkout@v2
+    - name: Setup Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        pip install .[tests,exporters-tf]
+    - name: Test with unittest
+      working-directory: tests
+      run: |
+        RUN_SLOW=1 pytest exporters -s --durations=0
diff --git a/docs/source/exporters/onnx/usage_guides/export_a_model.mdx b/docs/source/exporters/onnx/usage_guides/export_a_model.mdx
@@ -70,13 +70,16 @@ The Optimum ONNX export can be used through Optimum command-line:
 ```bash
 optimum-cli export onnx --help
 
-usage: optimum-cli <command> [<args>] export onnx [-h] -m MODEL [--task TASK] [--for-ort] [--device DEVICE] [--opset OPSET] [--atol ATOL]
-                                                  [--framework {pt,tf}] [--pad_token_id PAD_TOKEN_ID] [--cache_dir CACHE_DIR] [--batch_size BATCH_SIZE]
-                                                  [--sequence_length SEQUENCE_LENGTH] [--num_choices NUM_CHOICES] [--width WIDTH] [--height HEIGHT]
-                                                  [--num_channels NUM_CHANNELS] [--feature_size FEATURE_SIZE] [--nb_max_frames NB_MAX_FRAMES]
-                                                  [--audio_sequence_length AUDIO_SEQUENCE_LENGTH]
+usage: optimum-cli <command> [<args>] export onnx [-h] -m MODEL [--task TASK] [--monolith] [--device DEVICE] [--opset OPSET] [--atol ATOL]
+                                                  [--framework {pt,tf}] [--pad_token_id PAD_TOKEN_ID] [--cache_dir CACHE_DIR] [--trust-remote-code]
+                                                  [--batch_size BATCH_SIZE] [--sequence_length SEQUENCE_LENGTH] [--num_choices NUM_CHOICES] [--width WIDTH]
+                                                  [--height HEIGHT] [--num_channels NUM_CHANNELS] [--feature_size FEATURE_SIZE]
+                                                  [--nb_max_frames NB_MAX_FRAMES] [--audio_sequence_length AUDIO_SEQUENCE_LENGTH]
                                                   output
 
+optional arguments:
+  -h, --help            show this help message and exit
+
 Required arguments:
   -m MODEL, --model MODEL
                         Model ID on huggingface.co or path on disk to load model from.
@@ -86,10 +89,10 @@ Optional arguments:
   --task TASK           The task to export the model for. If not specified, the task will be auto-inferred based on the model. Available tasks depend on
                         the model, but are among: ['default', 'masked-lm', 'causal-lm', 'seq2seq-lm', 'sequence-classification', 'token-classification',
                         'multiple-choice', 'object-detection', 'question-answering', 'image-classification', 'image-segmentation', 'masked-im', 'semantic-
-                        segmentation', 'speech2seq-lm', 'stable-diffusion']. For decoder models, use `xxx-with-past` to export the model using past key
-                        values in the decoder.
-  --for-ort             This exports models ready to be run with Optimum's ORTModel. Useful for encoder-decoder models forconditional generation. If
-                        enabled the encoder and decoder of the model are exported separately.
+                        segmentation', 'speech2seq-lm', 'audio-classification', 'audio-frame-classification', 'audio-ctc', 'audio-xvector', 'vision2seq-
+                        lm', 'stable-diffusion']. For decoder models, use `xxx-with-past` to export the model using past key values in the decoder.
+  --monolith            Force to export the model as a single ONNX file. By default, the ONNX exporter may break the model in several ONNX files, for
+                        example for encoder-decoder models where the encoder should be run only once while thedecoder is looped over.
   --device DEVICE       The device to use to do the export. Defaults to "cpu".
   --opset OPSET         If specified, ONNX opset version to export the model with. Otherwise, the default opset will be used.
   --atol ATOL           If specified, the absolute difference tolerance when validating the model. Otherwise, the default atol for the model will be used.
@@ -187,7 +190,7 @@ optimum-cli export onnx --model keras-io/transformers-qa distilbert_base_cased_s
 
 ### Exporting a model to be used with Optimum's ORTModel
 
-Models exported through `optimum-cli export onnx` can be used directly in [`~onnxruntime.ORTModel`] by passing the parameter `--for-ort`. This is especially useful for encoder-decoder models, where in this case the export will split the encoder and decoder into two `.onnx` files, as the encoder is usually only run once while the decoder may be run several times in autogenerative tasks.
+Models exported through `optimum-cli export onnx` can be used directly in [`~onnxruntime.ORTModel`]. This is especially useful for encoder-decoder models, where in this case the export will split the encoder and decoder into two `.onnx` files, as the encoder is usually only run once while the decoder may be run several times in autogenerative tasks.
 
 ### Exporting a model using past keys/values in the decoder
 
@@ -198,7 +201,7 @@ In the ONNX export, the past keys/values are reused by default. This behavior co
 A model exported using past key/values can be reused directly into Optimum's [`~onnxruntime.ORTModel`]:
 
 ```bash
-optimum-cli export onnx --model gpt2 --for-ort --task causal-lm-with-past gpt2_onnx/
+optimum-cli export onnx --model gpt2 --task causal-lm-with-past gpt2_onnx/
 ```
 
 and

diff --git a/optimum/bettertransformer/transformation.py b/optimum/bettertransformer/transformation.py
@@ -215,6 +215,11 @@ def transform(
         else:
             load_accelerate = False
 
+        if hasattr(model, "use_bettertransformer") and model.use_bettertransformer is True:
+            raise Exception(
+                "`BetterTransform.transform()` was called on a model already using Better Transformer modeling."
+            )
+
         if BetterTransformerManager.cannot_support(model.config.model_type):
             raise ValueError(
                 f"The model type {model.config.model_type} can not be supported to be used with BetterTransformer. The identified reason is:"

diff --git a/optimum/commands/export/onnx.py b/optimum/commands/export/onnx.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 """Defines the command line for the export with ONNX."""
 
+import argparse
 import subprocess
 from pathlib import Path
 
@@ -39,11 +40,12 @@ def parse_args_onnx(parser):
         ),
     )
     optional_group.add_argument(
-        "--for-ort",
+        "--monolith",
         action="store_true",
         help=(
-            "This exports models ready to be run with Optimum's ORTModel. Useful for encoder-decoder models for"
-            "conditional generation. If enabled the encoder and decoder of the model are exported separately."
+            "Force to export the model as a single ONNX file. By default, the ONNX exporter may break the model in several"
+            " ONNX files, for example for encoder-decoder models where the encoder should be run only once while the"
+            " decoder is looped over."
         ),
     )
     optional_group.add_argument(
@@ -157,6 +159,9 @@ def parse_args_onnx(parser):
         help=f"Audio tasks only. Audio sequence length {doc_input}",
     )
 
+    # deprecated argument
+    parser.add_argument("--for-ort", action="store_true", help=argparse.SUPPRESS)
+
 
 class ONNXExportCommand:
     def __init__(self, args_string):

diff --git a/optimum/exporters/onnx/__main__.py b/optimum/exporters/onnx/__main__.py
@@ -50,6 +50,12 @@ def main():
     if not args.output.parent.exists():
         args.output.parent.mkdir(parents=True)
 
+    if args.for_ort:
+        logger.warning(
+            "The option --for-ort was passed, but its behavior is now the default in the ONNX exporter"
+            " and passing it is not required anymore."
+        )
+
     # Infer the task
     task = args.task
     if task == "auto":
@@ -123,7 +129,7 @@ def main():
         maybe_save_preprocessors(args.model, args.output.parent)
 
     if task == "stable-diffusion" or (
-        args.for_ort and (model.config.is_encoder_decoder or task.startswith("causal-lm"))
+        task.startswith(("causal-lm", "seq2seq-lm", "speech2seq-lm", "vision2seq-lm")) and not args.monolith
     ):
         if task == "stable-diffusion":
             output_names = [
@@ -173,7 +179,7 @@ def main():
 
     try:
         if task == "stable-diffusion" or (
-            args.for_ort and (model.config.is_encoder_decoder or task.startswith("causal-lm"))
+            task.startswith(("causal-lm", "seq2seq-lm", "speech2seq-lm", "vision2seq-lm")) and not args.monolith
         ):
             validate_models_outputs(
                 models_and_onnx_configs=models_and_onnx_configs,