From 7f5282ef879fc5b2ed25139682a8a89662da09dd Mon Sep 17 00:00:00 2001
From: Dheeraj Peri <peri.dheeraj@gmail.com>
Date: Fri, 13 Oct 2023 00:08:53 -0700
Subject: [PATCH 1/6] chore: Accept graphmodule, move exported program APIs
 within transform

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
---
 py/requirements.txt                         |   5 +-
 py/torch_tensorrt/dynamo/__init__.py        |   1 +
 py/torch_tensorrt/dynamo/compile.py         |  10 +-
 py/torch_tensorrt/dynamo/export.py          |  18 +-
 tests/py/dynamo/models/test_export_serde.py | 453 ++++++++------------
 5 files changed, 210 insertions(+), 277 deletions(-)

diff --git a/py/requirements.txt b/py/requirements.txt
index aa957c3743..9a6d0eb90d 100644
--- a/py/requirements.txt
+++ b/py/requirements.txt
@@ -1,9 +1,8 @@
 numpy
 packaging
 pybind11==2.6.2
---extra-index-url https://download.pytorch.org/whl/nightly/cu121
-torch>=2.1.0,<2.2.0
-torchvision>=0.16.0,<0.17.0
+torch==2.1.0
+torchvision==0.16.0
 --extra-index-url https://pypi.ngc.nvidia.com
 tensorrt==8.6.1
 pyyaml
diff --git a/py/torch_tensorrt/dynamo/__init__.py b/py/torch_tensorrt/dynamo/__init__.py
index 63cc2af10a..dd3e2970e6 100644
--- a/py/torch_tensorrt/dynamo/__init__.py
+++ b/py/torch_tensorrt/dynamo/__init__.py
@@ -16,3 +16,4 @@
         DYNAMO_CONVERTERS,
         dynamo_tensorrt_converter,
     )
+    from .export import transform
diff --git a/py/torch_tensorrt/dynamo/compile.py b/py/torch_tensorrt/dynamo/compile.py
index 5394c1382e..092f551b5a 100644
--- a/py/torch_tensorrt/dynamo/compile.py
+++ b/py/torch_tensorrt/dynamo/compile.py
@@ -86,7 +86,15 @@ def compile(
     inputs = prepare_inputs(inputs)
     device = to_torch_tensorrt_device(device)
 
-    gm = exported_program.module()
+    if isinstance(exported_program, torch.fx.GraphModule):
+        gm = exported_program
+    elif isinstance(exported_program, ExportedProgram):
+        gm = exported_program.module()
+    else:
+        raise AssertionError(
+            f"Input graph should either be an ExportedProgram or a GraphModule but got type {type(exported_program)}"
+        )
+
     logger.debug("Input graph: " + str(gm.graph))
 
     # Apply lowering on the graph module
diff --git a/py/torch_tensorrt/dynamo/export.py b/py/torch_tensorrt/dynamo/export.py
index 9bd1dbddb3..33b9395739 100644
--- a/py/torch_tensorrt/dynamo/export.py
+++ b/py/torch_tensorrt/dynamo/export.py
@@ -1,6 +1,6 @@
 import copy
 import operator
-from typing import Any, Dict, Sequence, Tuple, Union, cast
+from typing import Any, Dict, Sequence, Tuple, cast
 
 import torch
 from torch._export.exported_program import CallSpec
@@ -11,8 +11,8 @@
 
 
 def transform(
-    gm: torch.fx.GraphModule, inputs: Sequence[torch.Tensor]
-) -> torch.fx.GraphModule:
+    gm: torch.fx.GraphModule, inputs: Sequence[torch.Tensor], call_spec: CallSpec
+) -> ExportedProgram:
     # Run shape analysis
     _, outputs_map = partitioning.run_shape_analysis(gm, inputs)
 
@@ -31,7 +31,10 @@ def transform(
     gm.graph.eliminate_dead_code()
     gm.graph.lint()
 
-    return gm
+    # Create an exported program with the TRT GraphModule
+    exp_program = create_trt_exp_program(gm, call_spec)
+
+    return exp_program
 
 
 def lift_constant_pass(trt_gm: torch.fx.GraphModule) -> torch.fx.GraphModule:
@@ -115,7 +118,6 @@ def inline_torch_modules(gm: torch.fx.GraphModule) -> torch.fx.GraphModule:
 
                 # Copy all nodes in the submodule into gm and
                 # store the output node of this submodule which is now present in gm
-
                 submodule_output = gm.graph.graph_copy(submodule.graph, val_map)
 
                 # Get their references (since we copied) in the parent graph (gm)
@@ -174,9 +176,7 @@ def copy_submodule_attributes(
 
 
 def create_trt_exp_program(
-    gm: torch.fx.GraphModule,
-    call_spec: CallSpec,
-    state_dict: Dict[str, Union[torch.Tensor, torch.nn.Parameter]],
+    gm: torch.fx.GraphModule, call_spec: CallSpec
 ) -> ExportedProgram:
     """Creates a new Exported Program. This function takes an torch.fx.GraphModule which has TRT engines
     and constructs an Exported Program object with the new IO node names, call_spec and state_dict
@@ -208,7 +208,7 @@ def create_trt_exp_program(
     )
 
     trt_exp_program = ExportedProgram(
-        gm, gm.graph, trt_graph_signature, call_spec, state_dict, {}, [], []
+        gm, gm.graph, trt_graph_signature, call_spec, gm.state_dict(), {}, [], []
     )
 
     return trt_exp_program
diff --git a/tests/py/dynamo/models/test_export_serde.py b/tests/py/dynamo/models/test_export_serde.py
index 5e0dc7406c..df9fc3c56d 100644
--- a/tests/py/dynamo/models/test_export_serde.py
+++ b/tests/py/dynamo/models/test_export_serde.py
@@ -6,299 +6,182 @@
 import torch_tensorrt as torchtrt
 import torchvision.models as models
 from torch._export.serde.serialize import deserialize, serialize
-from torch_tensorrt.dynamo.export import create_trt_exp_program, transform
 from torch_tensorrt.dynamo.utils import COSINE_THRESHOLD, cosine_similarity
 
 assertions = unittest.TestCase()
 
 
-@pytest.mark.unit
-def test_base_full_compile(ir):
-    """
-    This tests export serde functionality on a base model
-    which is fully TRT convertible
-    """
-
-    class MyModule(torch.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True)
-            self.relu = torch.nn.ReLU()
-
-        def forward(self, x):
-            out = self.conv(x)
-            out = self.relu(out)
-            return out
-
-    model = MyModule().eval().cuda()
-    input = torch.randn((1, 3, 224, 224)).to("cuda")
-
-    compile_spec = {
-        "inputs": [
-            torchtrt.Input(
-                input.shape, dtype=torch.float, format=torch.contiguous_format
-            )
-        ],
-        "ir": ir,
-        "min_block_size": 1,
-    }
-
-    exp_program = torchtrt.dynamo.trace(model, **compile_spec)
-    trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
-    trt_gm = transform(trt_gm, [input])
-    trt_exp_program = create_trt_exp_program(
-        trt_gm, exp_program.call_spec, trt_gm.state_dict()
-    )
-    serialized_prog = serialize(trt_exp_program)
-    deserialized_prog = deserialize(*serialized_prog)
-
-    # Check Pyt and TRT exported program outputs
-    cos_sim = cosine_similarity(model(input), trt_exp_program(input))
-    assertions.assertTrue(
-        cos_sim > COSINE_THRESHOLD,
-        msg=f"test_base_model_full_compile TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
-    )
-    # Check Pyt and deserialized TRT exported program outputs
-    cos_sim = cosine_similarity(model(input), deserialized_prog(input))
-    assertions.assertTrue(
-        cos_sim > COSINE_THRESHOLD,
-        msg=f"test_base_model_full_compile TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
-    )
-
-
-@pytest.mark.unit
-def test_base_full_compile_multiple_outputs(ir):
-    """
-    This tests export serde functionality on a base model
-    with multiple outputs which is fully TRT convertible
-    """
-
-    class MyModule(torch.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True)
-            self.relu = torch.nn.ReLU()
-
-        def forward(self, x):
-            conv = self.conv(x)
-            conv = conv * 0.5
-            relu = self.relu(conv)
-            return conv, relu
-
-    model = MyModule().eval().cuda()
-    input = torch.randn((1, 3, 224, 224)).to("cuda")
-
-    compile_spec = {
-        "inputs": [
-            torchtrt.Input(
-                input.shape, dtype=torch.float, format=torch.contiguous_format
-            )
-        ],
-        "ir": ir,
-        "min_block_size": 1,
-    }
-
-    exp_program = torchtrt.dynamo.trace(model, **compile_spec)
-    trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
-    trt_gm = transform(trt_gm, [input])
-    trt_exp_program = create_trt_exp_program(
-        trt_gm, exp_program.call_spec, trt_gm.state_dict()
-    )
-
-    serialized_prog = serialize(trt_exp_program)
-    deserialized_prog = deserialize(*serialized_prog)
-    # Check Pyt and TRT exported program outputs
-    outputs_pyt = model(input)
-    outputs_trt = trt_exp_program(input)
-    for idx in range(len(outputs_pyt)):
-        cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt[idx])
-        assertions.assertTrue(
-            cos_sim > COSINE_THRESHOLD,
-            msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
-        )
-
-    # Check Pyt and deserialized TRT exported program outputs
-    outputs_trt_deser = deserialized_prog(input)
-    for idx in range(len(outputs_pyt)):
-        cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt_deser[idx])
-        assertions.assertTrue(
-            cos_sim > COSINE_THRESHOLD,
-            msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
-        )
-
-
-@pytest.mark.unit
-def test_base_full_compile_save_load(ir):
-    """
-    This tests export save and load functionality on a base model
-    with multiple outputs which is fully TRT convertible
-    """
-
-    class MyModule(torch.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True)
-            self.relu = torch.nn.ReLU()
-
-        def forward(self, x):
-            conv = self.conv(x)
-            conv = conv * 0.5
-            relu = self.relu(conv)
-            return conv, relu
+# @pytest.mark.unit
+# def test_base_full_compile(ir):
+#     """
+#     This tests export serde functionality on a base model
+#     which is fully TRT convertible
+#     """
 
-    model = MyModule().eval().cuda()
-    input = torch.randn((1, 3, 224, 224)).to("cuda")
+#     class MyModule(torch.nn.Module):
+#         def __init__(self):
+#             super().__init__()
+#             self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True)
+#             self.relu = torch.nn.ReLU()
 
-    compile_spec = {
-        "inputs": [
-            torchtrt.Input(
-                input.shape, dtype=torch.float, format=torch.contiguous_format
-            )
-        ],
-        "ir": ir,
-        "min_block_size": 1,
-    }
+#         def forward(self, x):
+#             out = self.conv(x)
+#             out = self.relu(out)
+#             return out
 
-    exp_program = torchtrt.dynamo.trace(model, **compile_spec)
-    trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
-    trt_gm = transform(trt_gm, [input])
-    trt_exp_program = create_trt_exp_program(
-        trt_gm, exp_program.call_spec, trt_gm.state_dict()
-    )
+#     model = MyModule().eval().cuda()
+#     input = torch.randn((1, 3, 224, 224)).to("cuda")
 
-    torch._export.save(trt_exp_program, "/tmp/trt.ep")
-    deser_trt_exp_program = torch._export.load("/tmp/trt.ep")
+#     compile_spec = {
+#         "inputs": [
+#             torchtrt.Input(
+#                 input.shape, dtype=torch.float, format=torch.contiguous_format
+#             )
+#         ],
+#         "ir": ir,
+#         "min_block_size": 1,
+#     }
 
-    outputs_pyt = model(input)
-    outputs_trt = trt_exp_program(input)
-    # Check Pyt and TRT exported program outputs
-    for idx in range(len(outputs_pyt)):
-        cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt[idx])
-        assertions.assertTrue(
-            cos_sim > COSINE_THRESHOLD,
-            msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
-        )
-    # Check Pyt and deserialized TRT exported program outputs
-    outputs_trt_deser = deser_trt_exp_program(input)
-    for idx in range(len(outputs_pyt)):
-        cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt_deser[idx])
-        assertions.assertTrue(
-            cos_sim > COSINE_THRESHOLD,
-            msg=f"test_base_full_compile_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
-        )
+#     exp_program = torchtrt.dynamo.trace(model, **compile_spec)
+#     trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
+#     trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec)
+#     serialized_prog = serialize(trt_exp_program)
+#     deserialized_prog = deserialize(*serialized_prog)
+
+#     # Check Pyt and TRT exported program outputs
+#     cos_sim = cosine_similarity(model(input), trt_exp_program(input))
+#     assertions.assertTrue(
+#         cos_sim > COSINE_THRESHOLD,
+#         msg=f"test_base_model_full_compile TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+#     )
+#     # Check Pyt and deserialized TRT exported program outputs
+#     cos_sim = cosine_similarity(model(input), deserialized_prog(input))
+#     assertions.assertTrue(
+#         cos_sim > COSINE_THRESHOLD,
+#         msg=f"test_base_model_full_compile TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+#     )
 
 
-@pytest.mark.unit
-def test_hybrid_relu_fallback(ir):
-    """
-    This tests export save and load functionality on a hybrid
-    model with Pytorch and TRT segments. Relu (unweighted) layer is forced to
-    fallback
-    """
+# @pytest.mark.unit
+# def test_base_full_compile_multiple_outputs(ir):
+#     """
+#     This tests export serde functionality on a base model
+#     with multiple outputs which is fully TRT convertible
+#     """
 
-    class MyModule(torch.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True)
-            self.relu = torch.nn.ReLU()
+#     class MyModule(torch.nn.Module):
+#         def __init__(self):
+#             super().__init__()
+#             self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True)
+#             self.relu = torch.nn.ReLU()
 
-        def forward(self, x):
-            conv = self.conv(x)
-            relu = self.relu(conv)
-            mul = relu * 0.5
-            return mul
+#         def forward(self, x):
+#             conv = self.conv(x)
+#             conv = conv * 0.5
+#             relu = self.relu(conv)
+#             return conv, relu
 
-    model = MyModule().eval().cuda()
-    input = torch.randn((1, 3, 224, 224)).to("cuda")
+#     model = MyModule().eval().cuda()
+#     input = torch.randn((1, 3, 224, 224)).to("cuda")
 
-    compile_spec = {
-        "inputs": [
-            torchtrt.Input(
-                input.shape, dtype=torch.float, format=torch.contiguous_format
-            )
-        ],
-        "ir": ir,
-        "min_block_size": 1,
-        "torch_executed_ops": "torch.ops.aten.relu.default",
-    }
+#     compile_spec = {
+#         "inputs": [
+#             torchtrt.Input(
+#                 input.shape, dtype=torch.float, format=torch.contiguous_format
+#             )
+#         ],
+#         "ir": ir,
+#         "min_block_size": 1,
+#     }
 
-    exp_program = torchtrt.dynamo.trace(model, **compile_spec)
-    trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
-    trt_gm = transform(trt_gm, [input])
-    trt_exp_program = create_trt_exp_program(
-        trt_gm, exp_program.call_spec, trt_gm.state_dict()
-    )
+#     exp_program = torchtrt.dynamo.trace(model, **compile_spec)
+#     trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
+#     trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec)
+#     serialized_prog = serialize(trt_exp_program)
+#     deserialized_prog = deserialize(*serialized_prog)
+#     # Check Pyt and TRT exported program outputs
+#     outputs_pyt = model(input)
+#     outputs_trt = trt_exp_program(input)
+#     for idx in range(len(outputs_pyt)):
+#         cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt[idx])
+#         assertions.assertTrue(
+#             cos_sim > COSINE_THRESHOLD,
+#             msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+#         )
 
-    torch._export.save(trt_exp_program, "/tmp/trt.ep")
-    deser_trt_exp_program = torch._export.load("/tmp/trt.ep")
+#     # Check Pyt and deserialized TRT exported program outputs
+#     outputs_trt_deser = deserialized_prog(input)
+#     for idx in range(len(outputs_pyt)):
+#         cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt_deser[idx])
+#         assertions.assertTrue(
+#             cos_sim > COSINE_THRESHOLD,
+#             msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+#         )
 
-    outputs_pyt = model(input)
-    outputs_trt = trt_exp_program(input)
-    for idx in range(len(outputs_pyt)):
-        cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt[idx])
-        assertions.assertTrue(
-            cos_sim > COSINE_THRESHOLD,
-            msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
-        )
 
-    outputs_trt_deser = deser_trt_exp_program(input)
-    for idx in range(len(outputs_pyt)):
-        cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt_deser[idx])
-        assertions.assertTrue(
-            cos_sim > COSINE_THRESHOLD,
-            msg=f"test_base_full_compile_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
-        )
+# @pytest.mark.unit
+# def test_base_full_compile_save_load(ir):
+#     """
+#     This tests export save and load functionality on a base model
+#     with multiple outputs which is fully TRT convertible
+#     """
 
+#     class MyModule(torch.nn.Module):
+#         def __init__(self):
+#             super().__init__()
+#             self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True)
+#             self.relu = torch.nn.ReLU()
 
-@pytest.mark.unit
-def test_resnet18_save_load(ir):
-    """
-    This tests export save and load functionality on Resnet18 model
-    """
-    model = models.resnet18().eval().cuda()
-    input = torch.randn((1, 3, 224, 224)).to("cuda")
+#         def forward(self, x):
+#             conv = self.conv(x)
+#             conv = conv * 0.5
+#             relu = self.relu(conv)
+#             return conv, relu
 
-    compile_spec = {
-        "inputs": [
-            torchtrt.Input(
-                input.shape, dtype=torch.float, format=torch.contiguous_format
-            )
-        ],
-        "ir": ir,
-        "min_block_size": 1,
-    }
+#     model = MyModule().eval().cuda()
+#     input = torch.randn((1, 3, 224, 224)).to("cuda")
 
-    exp_program = torchtrt.dynamo.trace(model, **compile_spec)
-    trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
-    trt_gm = transform(trt_gm, [input])
-    trt_exp_program = create_trt_exp_program(
-        trt_gm, exp_program.call_spec, trt_gm.state_dict()
-    )
-    torch._export.save(trt_exp_program, "/tmp/trt.ep")
-    deser_trt_exp_program = torch._export.load("/tmp/trt.ep")
+#     compile_spec = {
+#         "inputs": [
+#             torchtrt.Input(
+#                 input.shape, dtype=torch.float, format=torch.contiguous_format
+#             )
+#         ],
+#         "ir": ir,
+#         "min_block_size": 1,
+#     }
 
-    outputs_pyt = model(input)
-    outputs_trt = trt_exp_program(input)
-    cos_sim = cosine_similarity(outputs_pyt, outputs_trt)
-    assertions.assertTrue(
-        cos_sim > COSINE_THRESHOLD,
-        msg=f"test_resnet18_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
-    )
+#     exp_program = torchtrt.dynamo.trace(model, **compile_spec)
+#     trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
+#     trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec)
+#     torch._export.save(trt_exp_program, "/tmp/trt.ep")
+#     deser_trt_exp_program = torch._export.load("/tmp/trt.ep")
 
-    outputs_trt_deser = deser_trt_exp_program(input)
-    cos_sim = cosine_similarity(outputs_pyt, outputs_trt_deser)
-    assertions.assertTrue(
-        cos_sim > COSINE_THRESHOLD,
-        msg=f"test_resnet18_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
-    )
+#     outputs_pyt = model(input)
+#     outputs_trt = trt_exp_program(input)
+#     # Check Pyt and TRT exported program outputs
+#     for idx in range(len(outputs_pyt)):
+#         cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt[idx])
+#         assertions.assertTrue(
+#             cos_sim > COSINE_THRESHOLD,
+#             msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+#         )
+#     # Check Pyt and deserialized TRT exported program outputs
+#     outputs_trt_deser = deser_trt_exp_program(input)
+#     for idx in range(len(outputs_pyt)):
+#         cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt_deser[idx])
+#         assertions.assertTrue(
+#             cos_sim > COSINE_THRESHOLD,
+#             msg=f"test_base_full_compile_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+#         )
 
 
-# Enable this test once this issue is resolved https://github.com/pytorch/TensorRT/issues/2341
 # @pytest.mark.unit
-# def test_hybrid_conv_fallback(ir):
+# def test_hybrid_relu_fallback(ir):
 #     """
 #     This tests export save and load functionality on a hybrid
-#     model where a conv (a weighted layer)  has been forced to fallback to Pytorch.
+#     model with Pytorch and TRT segments. Relu (unweighted) layer is forced to
+#     fallback
 #     """
 
 #     class MyModule(torch.nn.Module):
@@ -324,10 +207,12 @@ def test_resnet18_save_load(ir):
 #         ],
 #         "ir": ir,
 #         "min_block_size": 1,
-#         "torch_executed_ops": "torch.ops.aten.convolution.default",
+#         "torch_executed_ops": "torch.ops.aten.relu.default",
 #     }
 
-#     trt_exp_program = torchtrt.compile(model, **compile_spec)
+#     exp_program = torchtrt.dynamo.trace(model, **compile_spec)
+#     trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
+#     trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec)
 #     torch._export.save(trt_exp_program, "/tmp/trt.ep")
 #     deser_trt_exp_program = torch._export.load("/tmp/trt.ep")
 
@@ -347,3 +232,43 @@ def test_resnet18_save_load(ir):
 #             cos_sim > COSINE_THRESHOLD,
 #             msg=f"test_base_full_compile_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
 #         )
+
+
+@pytest.mark.unit
+def test_resnet18_save_load(ir):
+    """
+    This tests export save and load functionality on Resnet18 model
+    """
+    model = models.resnet18().eval().cuda()
+    input = torch.randn((1, 3, 224, 224)).to("cuda")
+
+    compile_spec = {
+        "inputs": [
+            torchtrt.Input(
+                input.shape, dtype=torch.float, format=torch.contiguous_format
+            )
+        ],
+        "ir": ir,
+        "min_block_size": 1,
+    }
+
+    exp_program = torchtrt.dynamo.trace(model, **compile_spec)
+    trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
+    trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec)
+    torch._export.save(trt_exp_program, "/tmp/trt.ep")
+    deser_trt_exp_program = torch._export.load("/tmp/trt.ep")
+
+    outputs_pyt = model(input)
+    outputs_trt = trt_exp_program(input)
+    cos_sim = cosine_similarity(outputs_pyt, outputs_trt)
+    assertions.assertTrue(
+        cos_sim > COSINE_THRESHOLD,
+        msg=f"test_resnet18_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+    )
+
+    outputs_trt_deser = deser_trt_exp_program(input)
+    cos_sim = cosine_similarity(outputs_pyt, outputs_trt_deser)
+    assertions.assertTrue(
+        cos_sim > COSINE_THRESHOLD,
+        msg=f"test_resnet18_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+    )

From 41e833984c3efcee902e6399a14796a3720808a7 Mon Sep 17 00:00:00 2001
From: Dheeraj Peri <peri.dheeraj@gmail.com>
Date: Fri, 13 Oct 2023 01:04:38 -0700
Subject: [PATCH 2/6] chore: Fix uncommented tests

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
---
 tests/py/dynamo/models/test_export_serde.py | 442 ++++++++++----------
 1 file changed, 221 insertions(+), 221 deletions(-)

diff --git a/tests/py/dynamo/models/test_export_serde.py b/tests/py/dynamo/models/test_export_serde.py
index df9fc3c56d..13e0a36b32 100644
--- a/tests/py/dynamo/models/test_export_serde.py
+++ b/tests/py/dynamo/models/test_export_serde.py
@@ -11,227 +11,227 @@
 assertions = unittest.TestCase()
 
 
-# @pytest.mark.unit
-# def test_base_full_compile(ir):
-#     """
-#     This tests export serde functionality on a base model
-#     which is fully TRT convertible
-#     """
-
-#     class MyModule(torch.nn.Module):
-#         def __init__(self):
-#             super().__init__()
-#             self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True)
-#             self.relu = torch.nn.ReLU()
-
-#         def forward(self, x):
-#             out = self.conv(x)
-#             out = self.relu(out)
-#             return out
-
-#     model = MyModule().eval().cuda()
-#     input = torch.randn((1, 3, 224, 224)).to("cuda")
-
-#     compile_spec = {
-#         "inputs": [
-#             torchtrt.Input(
-#                 input.shape, dtype=torch.float, format=torch.contiguous_format
-#             )
-#         ],
-#         "ir": ir,
-#         "min_block_size": 1,
-#     }
-
-#     exp_program = torchtrt.dynamo.trace(model, **compile_spec)
-#     trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
-#     trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec)
-#     serialized_prog = serialize(trt_exp_program)
-#     deserialized_prog = deserialize(*serialized_prog)
-
-#     # Check Pyt and TRT exported program outputs
-#     cos_sim = cosine_similarity(model(input), trt_exp_program(input))
-#     assertions.assertTrue(
-#         cos_sim > COSINE_THRESHOLD,
-#         msg=f"test_base_model_full_compile TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
-#     )
-#     # Check Pyt and deserialized TRT exported program outputs
-#     cos_sim = cosine_similarity(model(input), deserialized_prog(input))
-#     assertions.assertTrue(
-#         cos_sim > COSINE_THRESHOLD,
-#         msg=f"test_base_model_full_compile TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
-#     )
-
-
-# @pytest.mark.unit
-# def test_base_full_compile_multiple_outputs(ir):
-#     """
-#     This tests export serde functionality on a base model
-#     with multiple outputs which is fully TRT convertible
-#     """
-
-#     class MyModule(torch.nn.Module):
-#         def __init__(self):
-#             super().__init__()
-#             self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True)
-#             self.relu = torch.nn.ReLU()
-
-#         def forward(self, x):
-#             conv = self.conv(x)
-#             conv = conv * 0.5
-#             relu = self.relu(conv)
-#             return conv, relu
-
-#     model = MyModule().eval().cuda()
-#     input = torch.randn((1, 3, 224, 224)).to("cuda")
-
-#     compile_spec = {
-#         "inputs": [
-#             torchtrt.Input(
-#                 input.shape, dtype=torch.float, format=torch.contiguous_format
-#             )
-#         ],
-#         "ir": ir,
-#         "min_block_size": 1,
-#     }
-
-#     exp_program = torchtrt.dynamo.trace(model, **compile_spec)
-#     trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
-#     trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec)
-#     serialized_prog = serialize(trt_exp_program)
-#     deserialized_prog = deserialize(*serialized_prog)
-#     # Check Pyt and TRT exported program outputs
-#     outputs_pyt = model(input)
-#     outputs_trt = trt_exp_program(input)
-#     for idx in range(len(outputs_pyt)):
-#         cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt[idx])
-#         assertions.assertTrue(
-#             cos_sim > COSINE_THRESHOLD,
-#             msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
-#         )
-
-#     # Check Pyt and deserialized TRT exported program outputs
-#     outputs_trt_deser = deserialized_prog(input)
-#     for idx in range(len(outputs_pyt)):
-#         cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt_deser[idx])
-#         assertions.assertTrue(
-#             cos_sim > COSINE_THRESHOLD,
-#             msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
-#         )
-
-
-# @pytest.mark.unit
-# def test_base_full_compile_save_load(ir):
-#     """
-#     This tests export save and load functionality on a base model
-#     with multiple outputs which is fully TRT convertible
-#     """
-
-#     class MyModule(torch.nn.Module):
-#         def __init__(self):
-#             super().__init__()
-#             self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True)
-#             self.relu = torch.nn.ReLU()
-
-#         def forward(self, x):
-#             conv = self.conv(x)
-#             conv = conv * 0.5
-#             relu = self.relu(conv)
-#             return conv, relu
-
-#     model = MyModule().eval().cuda()
-#     input = torch.randn((1, 3, 224, 224)).to("cuda")
-
-#     compile_spec = {
-#         "inputs": [
-#             torchtrt.Input(
-#                 input.shape, dtype=torch.float, format=torch.contiguous_format
-#             )
-#         ],
-#         "ir": ir,
-#         "min_block_size": 1,
-#     }
-
-#     exp_program = torchtrt.dynamo.trace(model, **compile_spec)
-#     trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
-#     trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec)
-#     torch._export.save(trt_exp_program, "/tmp/trt.ep")
-#     deser_trt_exp_program = torch._export.load("/tmp/trt.ep")
-
-#     outputs_pyt = model(input)
-#     outputs_trt = trt_exp_program(input)
-#     # Check Pyt and TRT exported program outputs
-#     for idx in range(len(outputs_pyt)):
-#         cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt[idx])
-#         assertions.assertTrue(
-#             cos_sim > COSINE_THRESHOLD,
-#             msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
-#         )
-#     # Check Pyt and deserialized TRT exported program outputs
-#     outputs_trt_deser = deser_trt_exp_program(input)
-#     for idx in range(len(outputs_pyt)):
-#         cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt_deser[idx])
-#         assertions.assertTrue(
-#             cos_sim > COSINE_THRESHOLD,
-#             msg=f"test_base_full_compile_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
-#         )
-
-
-# @pytest.mark.unit
-# def test_hybrid_relu_fallback(ir):
-#     """
-#     This tests export save and load functionality on a hybrid
-#     model with Pytorch and TRT segments. Relu (unweighted) layer is forced to
-#     fallback
-#     """
-
-#     class MyModule(torch.nn.Module):
-#         def __init__(self):
-#             super().__init__()
-#             self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True)
-#             self.relu = torch.nn.ReLU()
-
-#         def forward(self, x):
-#             conv = self.conv(x)
-#             relu = self.relu(conv)
-#             mul = relu * 0.5
-#             return mul
-
-#     model = MyModule().eval().cuda()
-#     input = torch.randn((1, 3, 224, 224)).to("cuda")
-
-#     compile_spec = {
-#         "inputs": [
-#             torchtrt.Input(
-#                 input.shape, dtype=torch.float, format=torch.contiguous_format
-#             )
-#         ],
-#         "ir": ir,
-#         "min_block_size": 1,
-#         "torch_executed_ops": "torch.ops.aten.relu.default",
-#     }
-
-#     exp_program = torchtrt.dynamo.trace(model, **compile_spec)
-#     trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
-#     trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec)
-#     torch._export.save(trt_exp_program, "/tmp/trt.ep")
-#     deser_trt_exp_program = torch._export.load("/tmp/trt.ep")
-
-#     outputs_pyt = model(input)
-#     outputs_trt = trt_exp_program(input)
-#     for idx in range(len(outputs_pyt)):
-#         cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt[idx])
-#         assertions.assertTrue(
-#             cos_sim > COSINE_THRESHOLD,
-#             msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
-#         )
-
-#     outputs_trt_deser = deser_trt_exp_program(input)
-#     for idx in range(len(outputs_pyt)):
-#         cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt_deser[idx])
-#         assertions.assertTrue(
-#             cos_sim > COSINE_THRESHOLD,
-#             msg=f"test_base_full_compile_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
-#         )
+@pytest.mark.unit
+def test_base_full_compile(ir):
+    """
+    This tests export serde functionality on a base model
+    which is fully TRT convertible
+    """
+
+    class MyModule(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True)
+            self.relu = torch.nn.ReLU()
+
+        def forward(self, x):
+            out = self.conv(x)
+            out = self.relu(out)
+            return out
+
+    model = MyModule().eval().cuda()
+    input = torch.randn((1, 3, 224, 224)).to("cuda")
+
+    compile_spec = {
+        "inputs": [
+            torchtrt.Input(
+                input.shape, dtype=torch.float, format=torch.contiguous_format
+            )
+        ],
+        "ir": ir,
+        "min_block_size": 1,
+    }
+
+    exp_program = torchtrt.dynamo.trace(model, **compile_spec)
+    trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
+    trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec)
+    serialized_prog = serialize(trt_exp_program)
+    deserialized_prog = deserialize(*serialized_prog)
+
+    # Check Pyt and TRT exported program outputs
+    cos_sim = cosine_similarity(model(input), trt_exp_program(input))
+    assertions.assertTrue(
+        cos_sim > COSINE_THRESHOLD,
+        msg=f"test_base_model_full_compile TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+    )
+    # Check Pyt and deserialized TRT exported program outputs
+    cos_sim = cosine_similarity(model(input), deserialized_prog(input))
+    assertions.assertTrue(
+        cos_sim > COSINE_THRESHOLD,
+        msg=f"test_base_model_full_compile TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+    )
+
+
+@pytest.mark.unit
+def test_base_full_compile_multiple_outputs(ir):
+    """
+    This tests export serde functionality on a base model
+    with multiple outputs which is fully TRT convertible
+    """
+
+    class MyModule(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True)
+            self.relu = torch.nn.ReLU()
+
+        def forward(self, x):
+            conv = self.conv(x)
+            conv = conv * 0.5
+            relu = self.relu(conv)
+            return conv, relu
+
+    model = MyModule().eval().cuda()
+    input = torch.randn((1, 3, 224, 224)).to("cuda")
+
+    compile_spec = {
+        "inputs": [
+            torchtrt.Input(
+                input.shape, dtype=torch.float, format=torch.contiguous_format
+            )
+        ],
+        "ir": ir,
+        "min_block_size": 1,
+    }
+
+    exp_program = torchtrt.dynamo.trace(model, **compile_spec)
+    trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
+    trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec)
+    serialized_prog = serialize(trt_exp_program)
+    deserialized_prog = deserialize(*serialized_prog)
+    # Check Pyt and TRT exported program outputs
+    outputs_pyt = model(input)
+    outputs_trt = trt_exp_program(input)
+    for idx in range(len(outputs_pyt)):
+        cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt[idx])
+        assertions.assertTrue(
+            cos_sim > COSINE_THRESHOLD,
+            msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+        )
+
+    # Check Pyt and deserialized TRT exported program outputs
+    outputs_trt_deser = deserialized_prog(input)
+    for idx in range(len(outputs_pyt)):
+        cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt_deser[idx])
+        assertions.assertTrue(
+            cos_sim > COSINE_THRESHOLD,
+            msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+        )
+
+
+@pytest.mark.unit
+def test_base_full_compile_save_load(ir):
+    """
+    This tests export save and load functionality on a base model
+    with multiple outputs which is fully TRT convertible
+    """
+
+    class MyModule(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True)
+            self.relu = torch.nn.ReLU()
+
+        def forward(self, x):
+            conv = self.conv(x)
+            conv = conv * 0.5
+            relu = self.relu(conv)
+            return conv, relu
+
+    model = MyModule().eval().cuda()
+    input = torch.randn((1, 3, 224, 224)).to("cuda")
+
+    compile_spec = {
+        "inputs": [
+            torchtrt.Input(
+                input.shape, dtype=torch.float, format=torch.contiguous_format
+            )
+        ],
+        "ir": ir,
+        "min_block_size": 1,
+    }
+
+    exp_program = torchtrt.dynamo.trace(model, **compile_spec)
+    trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
+    trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec)
+    torch._export.save(trt_exp_program, "/tmp/trt.ep")
+    deser_trt_exp_program = torch._export.load("/tmp/trt.ep")
+
+    outputs_pyt = model(input)
+    outputs_trt = trt_exp_program(input)
+    # Check Pyt and TRT exported program outputs
+    for idx in range(len(outputs_pyt)):
+        cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt[idx])
+        assertions.assertTrue(
+            cos_sim > COSINE_THRESHOLD,
+            msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+        )
+    # Check Pyt and deserialized TRT exported program outputs
+    outputs_trt_deser = deser_trt_exp_program(input)
+    for idx in range(len(outputs_pyt)):
+        cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt_deser[idx])
+        assertions.assertTrue(
+            cos_sim > COSINE_THRESHOLD,
+            msg=f"test_base_full_compile_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+        )
+
+
+@pytest.mark.unit
+def test_hybrid_relu_fallback(ir):
+    """
+    This tests export save and load functionality on a hybrid
+    model with Pytorch and TRT segments. Relu (unweighted) layer is forced to
+    fallback
+    """
+
+    class MyModule(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True)
+            self.relu = torch.nn.ReLU()
+
+        def forward(self, x):
+            conv = self.conv(x)
+            relu = self.relu(conv)
+            mul = relu * 0.5
+            return mul
+
+    model = MyModule().eval().cuda()
+    input = torch.randn((1, 3, 224, 224)).to("cuda")
+
+    compile_spec = {
+        "inputs": [
+            torchtrt.Input(
+                input.shape, dtype=torch.float, format=torch.contiguous_format
+            )
+        ],
+        "ir": ir,
+        "min_block_size": 1,
+        "torch_executed_ops": "torch.ops.aten.relu.default",
+    }
+
+    exp_program = torchtrt.dynamo.trace(model, **compile_spec)
+    trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
+    trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec)
+    torch._export.save(trt_exp_program, "/tmp/trt.ep")
+    deser_trt_exp_program = torch._export.load("/tmp/trt.ep")
+
+    outputs_pyt = model(input)
+    outputs_trt = trt_exp_program(input)
+    for idx in range(len(outputs_pyt)):
+        cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt[idx])
+        assertions.assertTrue(
+            cos_sim > COSINE_THRESHOLD,
+            msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+        )
+
+    outputs_trt_deser = deser_trt_exp_program(input)
+    for idx in range(len(outputs_pyt)):
+        cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt_deser[idx])
+        assertions.assertTrue(
+            cos_sim > COSINE_THRESHOLD,
+            msg=f"test_base_full_compile_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+        )
 
 
 @pytest.mark.unit

From 0eace85c3733a1173832fb5d8efb91839d6e1a6f Mon Sep 17 00:00:00 2001
From: Dheeraj Peri <peri.dheeraj@gmail.com>
Date: Fri, 13 Oct 2023 10:26:41 -0700
Subject: [PATCH 3/6] chore: fix types

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
---
 py/torch_tensorrt/dynamo/compile.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/py/torch_tensorrt/dynamo/compile.py b/py/torch_tensorrt/dynamo/compile.py
index 092f551b5a..7e6598b1ad 100644
--- a/py/torch_tensorrt/dynamo/compile.py
+++ b/py/torch_tensorrt/dynamo/compile.py
@@ -46,7 +46,7 @@
 
 
 def compile(
-    exported_program: ExportedProgram,
+    exported_program: Union[torch.fx.GraphModule, ExportedProgram],
     inputs: Any,
     *,
     device: Optional[Union[Device, torch.device, str]] = DEVICE,

From 358caa55595097ce986424befc4a2a95c9645c1f Mon Sep 17 00:00:00 2001
From: Dheeraj Peri <peri.dheeraj@gmail.com>
Date: Fri, 13 Oct 2023 10:38:04 -0700
Subject: [PATCH 4/6] chore: update docs

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
---
 docsrc/user_guide/saving_models.rst | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/docsrc/user_guide/saving_models.rst b/docsrc/user_guide/saving_models.rst
index 46fadcb905..ca1fb6d548 100644
--- a/docsrc/user_guide/saving_models.rst
+++ b/docsrc/user_guide/saving_models.rst
@@ -37,21 +37,19 @@ b) ExportedProgram
 
     import torch
     import torch_tensorrt
-    from torch_tensorrt.dynamo.export import transform, create_exported_program
 
     model = MyModel().eval().cuda()
     inputs = torch.randn((1, 3, 224, 224)).cuda()
     trt_gm = torch_tensorrt.compile(model, ir="dynamo", inputs) # Output is a torch.fx.GraphModule
     # Transform and create an exported program
-    trt_gm = transform(trt_gm, inputs)
-    trt_exp_program = create_exported_program(trt_gm, call_spec, trt_gm.state_dict())
-    torch._export.save(trt_exp_program, "trt_model.ep")
+    trt_exp_program = torch_tensorrt.dynamo.transform(trt_gm, inputs, call_spec)
+    torch.export.save(trt_exp_program, "trt_model.ep")
 
     # Later, you can load it and run inference 
-    model = torch._export.load("trt_model.ep")
+    model = torch.export.load("trt_model.ep")
     model(inputs)
 
-`torch_tensorrt.dynamo.export.transform` inlines the submodules within a GraphModule to their corresponding nodes and stiches all the nodes together. 
+`torch_tensorrt.dynamo.transform` inlines the submodules within a GraphModule to their corresponding nodes, stiches all the nodes together and creates an ExportedProgram. 
 This is needed as `torch._export` serialization cannot handle serializing and deserializing of submodules (`call_module` nodes). 
 
 NOTE: This way of saving the models using `ExportedProgram` is experimental. Here is a known issue : https://github.com/pytorch/TensorRT/issues/2341

From c5d2299e86dab034c16ec58d45b1b4ea3bdc318a Mon Sep 17 00:00:00 2001
From: Dheeraj Peri <peri.dheeraj@gmail.com>
Date: Tue, 24 Oct 2023 13:43:17 -0700
Subject: [PATCH 5/6] chore: update exported_program serialization functions

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
---
 docsrc/user_guide/saving_models.rst         |  6 +--
 py/torch_tensorrt/dynamo/__init__.py        |  2 +-
 py/torch_tensorrt/dynamo/export.py          | 59 ++++++++++++---------
 tests/py/dynamo/models/test_export_serde.py | 20 +++++--
 4 files changed, 54 insertions(+), 33 deletions(-)

diff --git a/docsrc/user_guide/saving_models.rst b/docsrc/user_guide/saving_models.rst
index ca1fb6d548..6863f62317 100644
--- a/docsrc/user_guide/saving_models.rst
+++ b/docsrc/user_guide/saving_models.rst
@@ -22,8 +22,8 @@ The following code illustrates this approach.
     model = MyModel().eval().cuda()
     inputs = torch.randn((1, 3, 224, 224)).cuda()
     trt_gm = torch_tensorrt.compile(model, ir="dynamo", inputs) # Output is a torch.fx.GraphModule
-    trt_script_model = torch.jit.trace(trt_gm, inputs)
-    torch.jit.save(trt_script_model, "trt_model.ts")
+    trt_traced_model = torchtrt.dynamo.serialize(trt_gm, inputs)
+    torch.jit.save(trt_traced_model, "trt_model.ts")
 
     # Later, you can load it and run inference
     model = torch.jit.load("trt_model.ts").cuda()
@@ -42,7 +42,7 @@ b) ExportedProgram
     inputs = torch.randn((1, 3, 224, 224)).cuda()
     trt_gm = torch_tensorrt.compile(model, ir="dynamo", inputs) # Output is a torch.fx.GraphModule
     # Transform and create an exported program
-    trt_exp_program = torch_tensorrt.dynamo.transform(trt_gm, inputs, call_spec)
+    trt_exp_program = torch_tensorrt.dynamo.serialize(trt_gm, inputs, call_spec, ir="exported_program")
     torch.export.save(trt_exp_program, "trt_model.ep")
 
     # Later, you can load it and run inference 
diff --git a/py/torch_tensorrt/dynamo/__init__.py b/py/torch_tensorrt/dynamo/__init__.py
index dd3e2970e6..1e39ea7fb1 100644
--- a/py/torch_tensorrt/dynamo/__init__.py
+++ b/py/torch_tensorrt/dynamo/__init__.py
@@ -16,4 +16,4 @@
         DYNAMO_CONVERTERS,
         dynamo_tensorrt_converter,
     )
-    from .export import transform
+    from .export import serialize
diff --git a/py/torch_tensorrt/dynamo/export.py b/py/torch_tensorrt/dynamo/export.py
index 33b9395739..6d2f0ae66e 100644
--- a/py/torch_tensorrt/dynamo/export.py
+++ b/py/torch_tensorrt/dynamo/export.py
@@ -10,31 +10,42 @@
 from torch_tensorrt.dynamo import partitioning
 
 
-def transform(
-    gm: torch.fx.GraphModule, inputs: Sequence[torch.Tensor], call_spec: CallSpec
+def serialize(
+    gm: torch.fx.GraphModule,
+    inputs: Sequence[torch.Tensor],
+    call_spec: CallSpec = None,
+    ir: str = "torchscript",
 ) -> ExportedProgram:
-    # Run shape analysis
-    _, outputs_map = partitioning.run_shape_analysis(gm, inputs)
-
-    # Inline TensorRT submodules
-    inline_trt_modules(gm, outputs_map)
-
-    # Inline pytorch submodules
-    inline_torch_modules(gm)
-
-    # Lift constant buffers and parameters in the graph
-    # torch.export serialization expects them to be lifted
-    lift_constant_pass(gm)
-
-    # Clean the graph
-    gm.delete_all_unused_submodules()
-    gm.graph.eliminate_dead_code()
-    gm.graph.lint()
-
-    # Create an exported program with the TRT GraphModule
-    exp_program = create_trt_exp_program(gm, call_spec)
-
-    return exp_program
+    if ir == "torchscript":
+        return torch.jit.trace(gm, inputs)
+    elif ir == "exported_program":
+        assert call_spec
+        # Run shape analysis
+        _, outputs_map = partitioning.run_shape_analysis(gm, inputs)
+
+        # Inline TensorRT submodules
+        inline_trt_modules(gm, outputs_map)
+
+        # Inline pytorch submodules
+        inline_torch_modules(gm)
+
+        # Lift constant buffers and parameters in the graph
+        # torch.export serialization expects them to be lifted
+        lift_constant_pass(gm)
+
+        # Clean the graph
+        gm.delete_all_unused_submodules()
+        gm.graph.eliminate_dead_code()
+        gm.graph.lint()
+
+        # Create an exported program with the TRT GraphModule
+        exp_program = create_trt_exp_program(gm, call_spec)
+
+        return exp_program
+    else:
+        raise ValueError(
+            "Invalid ir provided for serialization. Select among torchscript | exported_program"
+        )
 
 
 def lift_constant_pass(trt_gm: torch.fx.GraphModule) -> torch.fx.GraphModule:
diff --git a/tests/py/dynamo/models/test_export_serde.py b/tests/py/dynamo/models/test_export_serde.py
index 13e0a36b32..122503bb00 100644
--- a/tests/py/dynamo/models/test_export_serde.py
+++ b/tests/py/dynamo/models/test_export_serde.py
@@ -44,7 +44,9 @@ def forward(self, x):
 
     exp_program = torchtrt.dynamo.trace(model, **compile_spec)
     trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
-    trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec)
+    trt_exp_program = torchtrt.dynamo.serialize(
+        trt_gm, [input], call_spec=exp_program.call_spec, ir="exported_program"
+    )
     serialized_prog = serialize(trt_exp_program)
     deserialized_prog = deserialize(*serialized_prog)
 
@@ -96,7 +98,9 @@ def forward(self, x):
 
     exp_program = torchtrt.dynamo.trace(model, **compile_spec)
     trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
-    trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec)
+    trt_exp_program = torchtrt.dynamo.serialize(
+        trt_gm, [input], call_spec=exp_program.call_spec, ir="exported_program"
+    )
     serialized_prog = serialize(trt_exp_program)
     deserialized_prog = deserialize(*serialized_prog)
     # Check Pyt and TRT exported program outputs
@@ -153,7 +157,9 @@ def forward(self, x):
 
     exp_program = torchtrt.dynamo.trace(model, **compile_spec)
     trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
-    trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec)
+    trt_exp_program = torchtrt.dynamo.serialize(
+        trt_gm, [input], call_spec=exp_program.call_spec, ir="exported_program"
+    )
     torch._export.save(trt_exp_program, "/tmp/trt.ep")
     deser_trt_exp_program = torch._export.load("/tmp/trt.ep")
 
@@ -212,7 +218,9 @@ def forward(self, x):
 
     exp_program = torchtrt.dynamo.trace(model, **compile_spec)
     trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
-    trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec)
+    trt_exp_program = torchtrt.dynamo.serialize(
+        trt_gm, [input], call_spec=exp_program.call_spec, ir="exported_program"
+    )
     torch._export.save(trt_exp_program, "/tmp/trt.ep")
     deser_trt_exp_program = torch._export.load("/tmp/trt.ep")
 
@@ -254,7 +262,9 @@ def test_resnet18_save_load(ir):
 
     exp_program = torchtrt.dynamo.trace(model, **compile_spec)
     trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec)
-    trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec)
+    trt_exp_program = torchtrt.dynamo.serialize(
+        trt_gm, [input], call_spec=exp_program.call_spec, ir="exported_program"
+    )
     torch._export.save(trt_exp_program, "/tmp/trt.ep")
     deser_trt_exp_program = torch._export.load("/tmp/trt.ep")
 

From 5238698bff944fefa03fb758163c6b4ec0d4a4e0 Mon Sep 17 00:00:00 2001
From: Dheeraj Peri <peri.dheeraj@gmail.com>
Date: Wed, 25 Oct 2023 12:54:02 -0700
Subject: [PATCH 6/6] chore: minor updates

Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
---
 py/torch_tensorrt/dynamo/export.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/py/torch_tensorrt/dynamo/export.py b/py/torch_tensorrt/dynamo/export.py
index 6d2f0ae66e..91573f4491 100644
--- a/py/torch_tensorrt/dynamo/export.py
+++ b/py/torch_tensorrt/dynamo/export.py
@@ -44,7 +44,7 @@ def serialize(
         return exp_program
     else:
         raise ValueError(
-            "Invalid ir provided for serialization. Select among torchscript | exported_program"
+            "Invalid ir : {ir} provided for serialization. Options include torchscript | exported_program"
         )