From 7f5282ef879fc5b2ed25139682a8a89662da09dd Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Fri, 13 Oct 2023 00:08:53 -0700 Subject: [PATCH 1/6] chore: Accept graphmodule, move exported program APIs within transform Signed-off-by: Dheeraj Peri --- py/requirements.txt | 5 +- py/torch_tensorrt/dynamo/__init__.py | 1 + py/torch_tensorrt/dynamo/compile.py | 10 +- py/torch_tensorrt/dynamo/export.py | 18 +- tests/py/dynamo/models/test_export_serde.py | 453 ++++++++------------ 5 files changed, 210 insertions(+), 277 deletions(-) diff --git a/py/requirements.txt b/py/requirements.txt index aa957c3743..9a6d0eb90d 100644 --- a/py/requirements.txt +++ b/py/requirements.txt @@ -1,9 +1,8 @@ numpy packaging pybind11==2.6.2 ---extra-index-url https://download.pytorch.org/whl/nightly/cu121 -torch>=2.1.0,<2.2.0 -torchvision>=0.16.0,<0.17.0 +torch==2.1.0 +torchvision==0.16.0 --extra-index-url https://pypi.ngc.nvidia.com tensorrt==8.6.1 pyyaml diff --git a/py/torch_tensorrt/dynamo/__init__.py b/py/torch_tensorrt/dynamo/__init__.py index 63cc2af10a..dd3e2970e6 100644 --- a/py/torch_tensorrt/dynamo/__init__.py +++ b/py/torch_tensorrt/dynamo/__init__.py @@ -16,3 +16,4 @@ DYNAMO_CONVERTERS, dynamo_tensorrt_converter, ) + from .export import transform diff --git a/py/torch_tensorrt/dynamo/compile.py b/py/torch_tensorrt/dynamo/compile.py index 5394c1382e..092f551b5a 100644 --- a/py/torch_tensorrt/dynamo/compile.py +++ b/py/torch_tensorrt/dynamo/compile.py @@ -86,7 +86,15 @@ def compile( inputs = prepare_inputs(inputs) device = to_torch_tensorrt_device(device) - gm = exported_program.module() + if isinstance(exported_program, torch.fx.GraphModule): + gm = exported_program + elif isinstance(exported_program, ExportedProgram): + gm = exported_program.module() + else: + raise AssertionError( + f"Input graph should either be an ExportedProgram or a GraphModule but got type {type(exported_program)}" + ) + logger.debug("Input graph: " + str(gm.graph)) # Apply lowering on the graph module diff --git a/py/torch_tensorrt/dynamo/export.py b/py/torch_tensorrt/dynamo/export.py index 9bd1dbddb3..33b9395739 100644 --- a/py/torch_tensorrt/dynamo/export.py +++ b/py/torch_tensorrt/dynamo/export.py @@ -1,6 +1,6 @@ import copy import operator -from typing import Any, Dict, Sequence, Tuple, Union, cast +from typing import Any, Dict, Sequence, Tuple, cast import torch from torch._export.exported_program import CallSpec @@ -11,8 +11,8 @@ def transform( - gm: torch.fx.GraphModule, inputs: Sequence[torch.Tensor] -) -> torch.fx.GraphModule: + gm: torch.fx.GraphModule, inputs: Sequence[torch.Tensor], call_spec: CallSpec +) -> ExportedProgram: # Run shape analysis _, outputs_map = partitioning.run_shape_analysis(gm, inputs) @@ -31,7 +31,10 @@ def transform( gm.graph.eliminate_dead_code() gm.graph.lint() - return gm + # Create an exported program with the TRT GraphModule + exp_program = create_trt_exp_program(gm, call_spec) + + return exp_program def lift_constant_pass(trt_gm: torch.fx.GraphModule) -> torch.fx.GraphModule: @@ -115,7 +118,6 @@ def inline_torch_modules(gm: torch.fx.GraphModule) -> torch.fx.GraphModule: # Copy all nodes in the submodule into gm and # store the output node of this submodule which is now present in gm - submodule_output = gm.graph.graph_copy(submodule.graph, val_map) # Get their references (since we copied) in the parent graph (gm) @@ -174,9 +176,7 @@ def copy_submodule_attributes( def create_trt_exp_program( - gm: torch.fx.GraphModule, - call_spec: CallSpec, - state_dict: Dict[str, Union[torch.Tensor, torch.nn.Parameter]], + gm: torch.fx.GraphModule, call_spec: CallSpec ) -> ExportedProgram: """Creates a new Exported Program. This function takes an torch.fx.GraphModule which has TRT engines and constructs an Exported Program object with the new IO node names, call_spec and state_dict @@ -208,7 +208,7 @@ def create_trt_exp_program( ) trt_exp_program = ExportedProgram( - gm, gm.graph, trt_graph_signature, call_spec, state_dict, {}, [], [] + gm, gm.graph, trt_graph_signature, call_spec, gm.state_dict(), {}, [], [] ) return trt_exp_program diff --git a/tests/py/dynamo/models/test_export_serde.py b/tests/py/dynamo/models/test_export_serde.py index 5e0dc7406c..df9fc3c56d 100644 --- a/tests/py/dynamo/models/test_export_serde.py +++ b/tests/py/dynamo/models/test_export_serde.py @@ -6,299 +6,182 @@ import torch_tensorrt as torchtrt import torchvision.models as models from torch._export.serde.serialize import deserialize, serialize -from torch_tensorrt.dynamo.export import create_trt_exp_program, transform from torch_tensorrt.dynamo.utils import COSINE_THRESHOLD, cosine_similarity assertions = unittest.TestCase() -@pytest.mark.unit -def test_base_full_compile(ir): - """ - This tests export serde functionality on a base model - which is fully TRT convertible - """ - - class MyModule(torch.nn.Module): - def __init__(self): - super().__init__() - self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True) - self.relu = torch.nn.ReLU() - - def forward(self, x): - out = self.conv(x) - out = self.relu(out) - return out - - model = MyModule().eval().cuda() - input = torch.randn((1, 3, 224, 224)).to("cuda") - - compile_spec = { - "inputs": [ - torchtrt.Input( - input.shape, dtype=torch.float, format=torch.contiguous_format - ) - ], - "ir": ir, - "min_block_size": 1, - } - - exp_program = torchtrt.dynamo.trace(model, **compile_spec) - trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec) - trt_gm = transform(trt_gm, [input]) - trt_exp_program = create_trt_exp_program( - trt_gm, exp_program.call_spec, trt_gm.state_dict() - ) - serialized_prog = serialize(trt_exp_program) - deserialized_prog = deserialize(*serialized_prog) - - # Check Pyt and TRT exported program outputs - cos_sim = cosine_similarity(model(input), trt_exp_program(input)) - assertions.assertTrue( - cos_sim > COSINE_THRESHOLD, - msg=f"test_base_model_full_compile TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", - ) - # Check Pyt and deserialized TRT exported program outputs - cos_sim = cosine_similarity(model(input), deserialized_prog(input)) - assertions.assertTrue( - cos_sim > COSINE_THRESHOLD, - msg=f"test_base_model_full_compile TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", - ) - - -@pytest.mark.unit -def test_base_full_compile_multiple_outputs(ir): - """ - This tests export serde functionality on a base model - with multiple outputs which is fully TRT convertible - """ - - class MyModule(torch.nn.Module): - def __init__(self): - super().__init__() - self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True) - self.relu = torch.nn.ReLU() - - def forward(self, x): - conv = self.conv(x) - conv = conv * 0.5 - relu = self.relu(conv) - return conv, relu - - model = MyModule().eval().cuda() - input = torch.randn((1, 3, 224, 224)).to("cuda") - - compile_spec = { - "inputs": [ - torchtrt.Input( - input.shape, dtype=torch.float, format=torch.contiguous_format - ) - ], - "ir": ir, - "min_block_size": 1, - } - - exp_program = torchtrt.dynamo.trace(model, **compile_spec) - trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec) - trt_gm = transform(trt_gm, [input]) - trt_exp_program = create_trt_exp_program( - trt_gm, exp_program.call_spec, trt_gm.state_dict() - ) - - serialized_prog = serialize(trt_exp_program) - deserialized_prog = deserialize(*serialized_prog) - # Check Pyt and TRT exported program outputs - outputs_pyt = model(input) - outputs_trt = trt_exp_program(input) - for idx in range(len(outputs_pyt)): - cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt[idx]) - assertions.assertTrue( - cos_sim > COSINE_THRESHOLD, - msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", - ) - - # Check Pyt and deserialized TRT exported program outputs - outputs_trt_deser = deserialized_prog(input) - for idx in range(len(outputs_pyt)): - cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt_deser[idx]) - assertions.assertTrue( - cos_sim > COSINE_THRESHOLD, - msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", - ) - - -@pytest.mark.unit -def test_base_full_compile_save_load(ir): - """ - This tests export save and load functionality on a base model - with multiple outputs which is fully TRT convertible - """ - - class MyModule(torch.nn.Module): - def __init__(self): - super().__init__() - self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True) - self.relu = torch.nn.ReLU() - - def forward(self, x): - conv = self.conv(x) - conv = conv * 0.5 - relu = self.relu(conv) - return conv, relu +# @pytest.mark.unit +# def test_base_full_compile(ir): +# """ +# This tests export serde functionality on a base model +# which is fully TRT convertible +# """ - model = MyModule().eval().cuda() - input = torch.randn((1, 3, 224, 224)).to("cuda") +# class MyModule(torch.nn.Module): +# def __init__(self): +# super().__init__() +# self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True) +# self.relu = torch.nn.ReLU() - compile_spec = { - "inputs": [ - torchtrt.Input( - input.shape, dtype=torch.float, format=torch.contiguous_format - ) - ], - "ir": ir, - "min_block_size": 1, - } +# def forward(self, x): +# out = self.conv(x) +# out = self.relu(out) +# return out - exp_program = torchtrt.dynamo.trace(model, **compile_spec) - trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec) - trt_gm = transform(trt_gm, [input]) - trt_exp_program = create_trt_exp_program( - trt_gm, exp_program.call_spec, trt_gm.state_dict() - ) +# model = MyModule().eval().cuda() +# input = torch.randn((1, 3, 224, 224)).to("cuda") - torch._export.save(trt_exp_program, "/tmp/trt.ep") - deser_trt_exp_program = torch._export.load("/tmp/trt.ep") +# compile_spec = { +# "inputs": [ +# torchtrt.Input( +# input.shape, dtype=torch.float, format=torch.contiguous_format +# ) +# ], +# "ir": ir, +# "min_block_size": 1, +# } - outputs_pyt = model(input) - outputs_trt = trt_exp_program(input) - # Check Pyt and TRT exported program outputs - for idx in range(len(outputs_pyt)): - cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt[idx]) - assertions.assertTrue( - cos_sim > COSINE_THRESHOLD, - msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", - ) - # Check Pyt and deserialized TRT exported program outputs - outputs_trt_deser = deser_trt_exp_program(input) - for idx in range(len(outputs_pyt)): - cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt_deser[idx]) - assertions.assertTrue( - cos_sim > COSINE_THRESHOLD, - msg=f"test_base_full_compile_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", - ) +# exp_program = torchtrt.dynamo.trace(model, **compile_spec) +# trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec) +# trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec) +# serialized_prog = serialize(trt_exp_program) +# deserialized_prog = deserialize(*serialized_prog) + +# # Check Pyt and TRT exported program outputs +# cos_sim = cosine_similarity(model(input), trt_exp_program(input)) +# assertions.assertTrue( +# cos_sim > COSINE_THRESHOLD, +# msg=f"test_base_model_full_compile TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", +# ) +# # Check Pyt and deserialized TRT exported program outputs +# cos_sim = cosine_similarity(model(input), deserialized_prog(input)) +# assertions.assertTrue( +# cos_sim > COSINE_THRESHOLD, +# msg=f"test_base_model_full_compile TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", +# ) -@pytest.mark.unit -def test_hybrid_relu_fallback(ir): - """ - This tests export save and load functionality on a hybrid - model with Pytorch and TRT segments. Relu (unweighted) layer is forced to - fallback - """ +# @pytest.mark.unit +# def test_base_full_compile_multiple_outputs(ir): +# """ +# This tests export serde functionality on a base model +# with multiple outputs which is fully TRT convertible +# """ - class MyModule(torch.nn.Module): - def __init__(self): - super().__init__() - self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True) - self.relu = torch.nn.ReLU() +# class MyModule(torch.nn.Module): +# def __init__(self): +# super().__init__() +# self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True) +# self.relu = torch.nn.ReLU() - def forward(self, x): - conv = self.conv(x) - relu = self.relu(conv) - mul = relu * 0.5 - return mul +# def forward(self, x): +# conv = self.conv(x) +# conv = conv * 0.5 +# relu = self.relu(conv) +# return conv, relu - model = MyModule().eval().cuda() - input = torch.randn((1, 3, 224, 224)).to("cuda") +# model = MyModule().eval().cuda() +# input = torch.randn((1, 3, 224, 224)).to("cuda") - compile_spec = { - "inputs": [ - torchtrt.Input( - input.shape, dtype=torch.float, format=torch.contiguous_format - ) - ], - "ir": ir, - "min_block_size": 1, - "torch_executed_ops": "torch.ops.aten.relu.default", - } +# compile_spec = { +# "inputs": [ +# torchtrt.Input( +# input.shape, dtype=torch.float, format=torch.contiguous_format +# ) +# ], +# "ir": ir, +# "min_block_size": 1, +# } - exp_program = torchtrt.dynamo.trace(model, **compile_spec) - trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec) - trt_gm = transform(trt_gm, [input]) - trt_exp_program = create_trt_exp_program( - trt_gm, exp_program.call_spec, trt_gm.state_dict() - ) +# exp_program = torchtrt.dynamo.trace(model, **compile_spec) +# trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec) +# trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec) +# serialized_prog = serialize(trt_exp_program) +# deserialized_prog = deserialize(*serialized_prog) +# # Check Pyt and TRT exported program outputs +# outputs_pyt = model(input) +# outputs_trt = trt_exp_program(input) +# for idx in range(len(outputs_pyt)): +# cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt[idx]) +# assertions.assertTrue( +# cos_sim > COSINE_THRESHOLD, +# msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", +# ) - torch._export.save(trt_exp_program, "/tmp/trt.ep") - deser_trt_exp_program = torch._export.load("/tmp/trt.ep") +# # Check Pyt and deserialized TRT exported program outputs +# outputs_trt_deser = deserialized_prog(input) +# for idx in range(len(outputs_pyt)): +# cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt_deser[idx]) +# assertions.assertTrue( +# cos_sim > COSINE_THRESHOLD, +# msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", +# ) - outputs_pyt = model(input) - outputs_trt = trt_exp_program(input) - for idx in range(len(outputs_pyt)): - cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt[idx]) - assertions.assertTrue( - cos_sim > COSINE_THRESHOLD, - msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", - ) - outputs_trt_deser = deser_trt_exp_program(input) - for idx in range(len(outputs_pyt)): - cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt_deser[idx]) - assertions.assertTrue( - cos_sim > COSINE_THRESHOLD, - msg=f"test_base_full_compile_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", - ) +# @pytest.mark.unit +# def test_base_full_compile_save_load(ir): +# """ +# This tests export save and load functionality on a base model +# with multiple outputs which is fully TRT convertible +# """ +# class MyModule(torch.nn.Module): +# def __init__(self): +# super().__init__() +# self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True) +# self.relu = torch.nn.ReLU() -@pytest.mark.unit -def test_resnet18_save_load(ir): - """ - This tests export save and load functionality on Resnet18 model - """ - model = models.resnet18().eval().cuda() - input = torch.randn((1, 3, 224, 224)).to("cuda") +# def forward(self, x): +# conv = self.conv(x) +# conv = conv * 0.5 +# relu = self.relu(conv) +# return conv, relu - compile_spec = { - "inputs": [ - torchtrt.Input( - input.shape, dtype=torch.float, format=torch.contiguous_format - ) - ], - "ir": ir, - "min_block_size": 1, - } +# model = MyModule().eval().cuda() +# input = torch.randn((1, 3, 224, 224)).to("cuda") - exp_program = torchtrt.dynamo.trace(model, **compile_spec) - trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec) - trt_gm = transform(trt_gm, [input]) - trt_exp_program = create_trt_exp_program( - trt_gm, exp_program.call_spec, trt_gm.state_dict() - ) - torch._export.save(trt_exp_program, "/tmp/trt.ep") - deser_trt_exp_program = torch._export.load("/tmp/trt.ep") +# compile_spec = { +# "inputs": [ +# torchtrt.Input( +# input.shape, dtype=torch.float, format=torch.contiguous_format +# ) +# ], +# "ir": ir, +# "min_block_size": 1, +# } - outputs_pyt = model(input) - outputs_trt = trt_exp_program(input) - cos_sim = cosine_similarity(outputs_pyt, outputs_trt) - assertions.assertTrue( - cos_sim > COSINE_THRESHOLD, - msg=f"test_resnet18_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", - ) +# exp_program = torchtrt.dynamo.trace(model, **compile_spec) +# trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec) +# trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec) +# torch._export.save(trt_exp_program, "/tmp/trt.ep") +# deser_trt_exp_program = torch._export.load("/tmp/trt.ep") - outputs_trt_deser = deser_trt_exp_program(input) - cos_sim = cosine_similarity(outputs_pyt, outputs_trt_deser) - assertions.assertTrue( - cos_sim > COSINE_THRESHOLD, - msg=f"test_resnet18_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", - ) +# outputs_pyt = model(input) +# outputs_trt = trt_exp_program(input) +# # Check Pyt and TRT exported program outputs +# for idx in range(len(outputs_pyt)): +# cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt[idx]) +# assertions.assertTrue( +# cos_sim > COSINE_THRESHOLD, +# msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", +# ) +# # Check Pyt and deserialized TRT exported program outputs +# outputs_trt_deser = deser_trt_exp_program(input) +# for idx in range(len(outputs_pyt)): +# cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt_deser[idx]) +# assertions.assertTrue( +# cos_sim > COSINE_THRESHOLD, +# msg=f"test_base_full_compile_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", +# ) -# Enable this test once this issue is resolved https://github.com/pytorch/TensorRT/issues/2341 # @pytest.mark.unit -# def test_hybrid_conv_fallback(ir): +# def test_hybrid_relu_fallback(ir): # """ # This tests export save and load functionality on a hybrid -# model where a conv (a weighted layer) has been forced to fallback to Pytorch. +# model with Pytorch and TRT segments. Relu (unweighted) layer is forced to +# fallback # """ # class MyModule(torch.nn.Module): @@ -324,10 +207,12 @@ def test_resnet18_save_load(ir): # ], # "ir": ir, # "min_block_size": 1, -# "torch_executed_ops": "torch.ops.aten.convolution.default", +# "torch_executed_ops": "torch.ops.aten.relu.default", # } -# trt_exp_program = torchtrt.compile(model, **compile_spec) +# exp_program = torchtrt.dynamo.trace(model, **compile_spec) +# trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec) +# trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec) # torch._export.save(trt_exp_program, "/tmp/trt.ep") # deser_trt_exp_program = torch._export.load("/tmp/trt.ep") @@ -347,3 +232,43 @@ def test_resnet18_save_load(ir): # cos_sim > COSINE_THRESHOLD, # msg=f"test_base_full_compile_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", # ) + + +@pytest.mark.unit +def test_resnet18_save_load(ir): + """ + This tests export save and load functionality on Resnet18 model + """ + model = models.resnet18().eval().cuda() + input = torch.randn((1, 3, 224, 224)).to("cuda") + + compile_spec = { + "inputs": [ + torchtrt.Input( + input.shape, dtype=torch.float, format=torch.contiguous_format + ) + ], + "ir": ir, + "min_block_size": 1, + } + + exp_program = torchtrt.dynamo.trace(model, **compile_spec) + trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec) + trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec) + torch._export.save(trt_exp_program, "/tmp/trt.ep") + deser_trt_exp_program = torch._export.load("/tmp/trt.ep") + + outputs_pyt = model(input) + outputs_trt = trt_exp_program(input) + cos_sim = cosine_similarity(outputs_pyt, outputs_trt) + assertions.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"test_resnet18_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) + + outputs_trt_deser = deser_trt_exp_program(input) + cos_sim = cosine_similarity(outputs_pyt, outputs_trt_deser) + assertions.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"test_resnet18_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) From 41e833984c3efcee902e6399a14796a3720808a7 Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Fri, 13 Oct 2023 01:04:38 -0700 Subject: [PATCH 2/6] chore: Fix uncommented tests Signed-off-by: Dheeraj Peri --- tests/py/dynamo/models/test_export_serde.py | 442 ++++++++++---------- 1 file changed, 221 insertions(+), 221 deletions(-) diff --git a/tests/py/dynamo/models/test_export_serde.py b/tests/py/dynamo/models/test_export_serde.py index df9fc3c56d..13e0a36b32 100644 --- a/tests/py/dynamo/models/test_export_serde.py +++ b/tests/py/dynamo/models/test_export_serde.py @@ -11,227 +11,227 @@ assertions = unittest.TestCase() -# @pytest.mark.unit -# def test_base_full_compile(ir): -# """ -# This tests export serde functionality on a base model -# which is fully TRT convertible -# """ - -# class MyModule(torch.nn.Module): -# def __init__(self): -# super().__init__() -# self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True) -# self.relu = torch.nn.ReLU() - -# def forward(self, x): -# out = self.conv(x) -# out = self.relu(out) -# return out - -# model = MyModule().eval().cuda() -# input = torch.randn((1, 3, 224, 224)).to("cuda") - -# compile_spec = { -# "inputs": [ -# torchtrt.Input( -# input.shape, dtype=torch.float, format=torch.contiguous_format -# ) -# ], -# "ir": ir, -# "min_block_size": 1, -# } - -# exp_program = torchtrt.dynamo.trace(model, **compile_spec) -# trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec) -# trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec) -# serialized_prog = serialize(trt_exp_program) -# deserialized_prog = deserialize(*serialized_prog) - -# # Check Pyt and TRT exported program outputs -# cos_sim = cosine_similarity(model(input), trt_exp_program(input)) -# assertions.assertTrue( -# cos_sim > COSINE_THRESHOLD, -# msg=f"test_base_model_full_compile TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", -# ) -# # Check Pyt and deserialized TRT exported program outputs -# cos_sim = cosine_similarity(model(input), deserialized_prog(input)) -# assertions.assertTrue( -# cos_sim > COSINE_THRESHOLD, -# msg=f"test_base_model_full_compile TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", -# ) - - -# @pytest.mark.unit -# def test_base_full_compile_multiple_outputs(ir): -# """ -# This tests export serde functionality on a base model -# with multiple outputs which is fully TRT convertible -# """ - -# class MyModule(torch.nn.Module): -# def __init__(self): -# super().__init__() -# self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True) -# self.relu = torch.nn.ReLU() - -# def forward(self, x): -# conv = self.conv(x) -# conv = conv * 0.5 -# relu = self.relu(conv) -# return conv, relu - -# model = MyModule().eval().cuda() -# input = torch.randn((1, 3, 224, 224)).to("cuda") - -# compile_spec = { -# "inputs": [ -# torchtrt.Input( -# input.shape, dtype=torch.float, format=torch.contiguous_format -# ) -# ], -# "ir": ir, -# "min_block_size": 1, -# } - -# exp_program = torchtrt.dynamo.trace(model, **compile_spec) -# trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec) -# trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec) -# serialized_prog = serialize(trt_exp_program) -# deserialized_prog = deserialize(*serialized_prog) -# # Check Pyt and TRT exported program outputs -# outputs_pyt = model(input) -# outputs_trt = trt_exp_program(input) -# for idx in range(len(outputs_pyt)): -# cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt[idx]) -# assertions.assertTrue( -# cos_sim > COSINE_THRESHOLD, -# msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", -# ) - -# # Check Pyt and deserialized TRT exported program outputs -# outputs_trt_deser = deserialized_prog(input) -# for idx in range(len(outputs_pyt)): -# cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt_deser[idx]) -# assertions.assertTrue( -# cos_sim > COSINE_THRESHOLD, -# msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", -# ) - - -# @pytest.mark.unit -# def test_base_full_compile_save_load(ir): -# """ -# This tests export save and load functionality on a base model -# with multiple outputs which is fully TRT convertible -# """ - -# class MyModule(torch.nn.Module): -# def __init__(self): -# super().__init__() -# self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True) -# self.relu = torch.nn.ReLU() - -# def forward(self, x): -# conv = self.conv(x) -# conv = conv * 0.5 -# relu = self.relu(conv) -# return conv, relu - -# model = MyModule().eval().cuda() -# input = torch.randn((1, 3, 224, 224)).to("cuda") - -# compile_spec = { -# "inputs": [ -# torchtrt.Input( -# input.shape, dtype=torch.float, format=torch.contiguous_format -# ) -# ], -# "ir": ir, -# "min_block_size": 1, -# } - -# exp_program = torchtrt.dynamo.trace(model, **compile_spec) -# trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec) -# trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec) -# torch._export.save(trt_exp_program, "/tmp/trt.ep") -# deser_trt_exp_program = torch._export.load("/tmp/trt.ep") - -# outputs_pyt = model(input) -# outputs_trt = trt_exp_program(input) -# # Check Pyt and TRT exported program outputs -# for idx in range(len(outputs_pyt)): -# cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt[idx]) -# assertions.assertTrue( -# cos_sim > COSINE_THRESHOLD, -# msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", -# ) -# # Check Pyt and deserialized TRT exported program outputs -# outputs_trt_deser = deser_trt_exp_program(input) -# for idx in range(len(outputs_pyt)): -# cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt_deser[idx]) -# assertions.assertTrue( -# cos_sim > COSINE_THRESHOLD, -# msg=f"test_base_full_compile_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", -# ) - - -# @pytest.mark.unit -# def test_hybrid_relu_fallback(ir): -# """ -# This tests export save and load functionality on a hybrid -# model with Pytorch and TRT segments. Relu (unweighted) layer is forced to -# fallback -# """ - -# class MyModule(torch.nn.Module): -# def __init__(self): -# super().__init__() -# self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True) -# self.relu = torch.nn.ReLU() - -# def forward(self, x): -# conv = self.conv(x) -# relu = self.relu(conv) -# mul = relu * 0.5 -# return mul - -# model = MyModule().eval().cuda() -# input = torch.randn((1, 3, 224, 224)).to("cuda") - -# compile_spec = { -# "inputs": [ -# torchtrt.Input( -# input.shape, dtype=torch.float, format=torch.contiguous_format -# ) -# ], -# "ir": ir, -# "min_block_size": 1, -# "torch_executed_ops": "torch.ops.aten.relu.default", -# } - -# exp_program = torchtrt.dynamo.trace(model, **compile_spec) -# trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec) -# trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec) -# torch._export.save(trt_exp_program, "/tmp/trt.ep") -# deser_trt_exp_program = torch._export.load("/tmp/trt.ep") - -# outputs_pyt = model(input) -# outputs_trt = trt_exp_program(input) -# for idx in range(len(outputs_pyt)): -# cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt[idx]) -# assertions.assertTrue( -# cos_sim > COSINE_THRESHOLD, -# msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", -# ) - -# outputs_trt_deser = deser_trt_exp_program(input) -# for idx in range(len(outputs_pyt)): -# cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt_deser[idx]) -# assertions.assertTrue( -# cos_sim > COSINE_THRESHOLD, -# msg=f"test_base_full_compile_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", -# ) +@pytest.mark.unit +def test_base_full_compile(ir): + """ + This tests export serde functionality on a base model + which is fully TRT convertible + """ + + class MyModule(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True) + self.relu = torch.nn.ReLU() + + def forward(self, x): + out = self.conv(x) + out = self.relu(out) + return out + + model = MyModule().eval().cuda() + input = torch.randn((1, 3, 224, 224)).to("cuda") + + compile_spec = { + "inputs": [ + torchtrt.Input( + input.shape, dtype=torch.float, format=torch.contiguous_format + ) + ], + "ir": ir, + "min_block_size": 1, + } + + exp_program = torchtrt.dynamo.trace(model, **compile_spec) + trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec) + trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec) + serialized_prog = serialize(trt_exp_program) + deserialized_prog = deserialize(*serialized_prog) + + # Check Pyt and TRT exported program outputs + cos_sim = cosine_similarity(model(input), trt_exp_program(input)) + assertions.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"test_base_model_full_compile TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) + # Check Pyt and deserialized TRT exported program outputs + cos_sim = cosine_similarity(model(input), deserialized_prog(input)) + assertions.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"test_base_model_full_compile TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) + + +@pytest.mark.unit +def test_base_full_compile_multiple_outputs(ir): + """ + This tests export serde functionality on a base model + with multiple outputs which is fully TRT convertible + """ + + class MyModule(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True) + self.relu = torch.nn.ReLU() + + def forward(self, x): + conv = self.conv(x) + conv = conv * 0.5 + relu = self.relu(conv) + return conv, relu + + model = MyModule().eval().cuda() + input = torch.randn((1, 3, 224, 224)).to("cuda") + + compile_spec = { + "inputs": [ + torchtrt.Input( + input.shape, dtype=torch.float, format=torch.contiguous_format + ) + ], + "ir": ir, + "min_block_size": 1, + } + + exp_program = torchtrt.dynamo.trace(model, **compile_spec) + trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec) + trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec) + serialized_prog = serialize(trt_exp_program) + deserialized_prog = deserialize(*serialized_prog) + # Check Pyt and TRT exported program outputs + outputs_pyt = model(input) + outputs_trt = trt_exp_program(input) + for idx in range(len(outputs_pyt)): + cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt[idx]) + assertions.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) + + # Check Pyt and deserialized TRT exported program outputs + outputs_trt_deser = deserialized_prog(input) + for idx in range(len(outputs_pyt)): + cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt_deser[idx]) + assertions.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) + + +@pytest.mark.unit +def test_base_full_compile_save_load(ir): + """ + This tests export save and load functionality on a base model + with multiple outputs which is fully TRT convertible + """ + + class MyModule(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True) + self.relu = torch.nn.ReLU() + + def forward(self, x): + conv = self.conv(x) + conv = conv * 0.5 + relu = self.relu(conv) + return conv, relu + + model = MyModule().eval().cuda() + input = torch.randn((1, 3, 224, 224)).to("cuda") + + compile_spec = { + "inputs": [ + torchtrt.Input( + input.shape, dtype=torch.float, format=torch.contiguous_format + ) + ], + "ir": ir, + "min_block_size": 1, + } + + exp_program = torchtrt.dynamo.trace(model, **compile_spec) + trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec) + trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec) + torch._export.save(trt_exp_program, "/tmp/trt.ep") + deser_trt_exp_program = torch._export.load("/tmp/trt.ep") + + outputs_pyt = model(input) + outputs_trt = trt_exp_program(input) + # Check Pyt and TRT exported program outputs + for idx in range(len(outputs_pyt)): + cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt[idx]) + assertions.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) + # Check Pyt and deserialized TRT exported program outputs + outputs_trt_deser = deser_trt_exp_program(input) + for idx in range(len(outputs_pyt)): + cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt_deser[idx]) + assertions.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"test_base_full_compile_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) + + +@pytest.mark.unit +def test_hybrid_relu_fallback(ir): + """ + This tests export save and load functionality on a hybrid + model with Pytorch and TRT segments. Relu (unweighted) layer is forced to + fallback + """ + + class MyModule(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True) + self.relu = torch.nn.ReLU() + + def forward(self, x): + conv = self.conv(x) + relu = self.relu(conv) + mul = relu * 0.5 + return mul + + model = MyModule().eval().cuda() + input = torch.randn((1, 3, 224, 224)).to("cuda") + + compile_spec = { + "inputs": [ + torchtrt.Input( + input.shape, dtype=torch.float, format=torch.contiguous_format + ) + ], + "ir": ir, + "min_block_size": 1, + "torch_executed_ops": "torch.ops.aten.relu.default", + } + + exp_program = torchtrt.dynamo.trace(model, **compile_spec) + trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec) + trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec) + torch._export.save(trt_exp_program, "/tmp/trt.ep") + deser_trt_exp_program = torch._export.load("/tmp/trt.ep") + + outputs_pyt = model(input) + outputs_trt = trt_exp_program(input) + for idx in range(len(outputs_pyt)): + cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt[idx]) + assertions.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"test_base_full_compile_multiple_outputs TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) + + outputs_trt_deser = deser_trt_exp_program(input) + for idx in range(len(outputs_pyt)): + cos_sim = cosine_similarity(outputs_pyt[idx], outputs_trt_deser[idx]) + assertions.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"test_base_full_compile_save_load TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) @pytest.mark.unit From 0eace85c3733a1173832fb5d8efb91839d6e1a6f Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Fri, 13 Oct 2023 10:26:41 -0700 Subject: [PATCH 3/6] chore: fix types Signed-off-by: Dheeraj Peri --- py/torch_tensorrt/dynamo/compile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py/torch_tensorrt/dynamo/compile.py b/py/torch_tensorrt/dynamo/compile.py index 092f551b5a..7e6598b1ad 100644 --- a/py/torch_tensorrt/dynamo/compile.py +++ b/py/torch_tensorrt/dynamo/compile.py @@ -46,7 +46,7 @@ def compile( - exported_program: ExportedProgram, + exported_program: Union[torch.fx.GraphModule, ExportedProgram], inputs: Any, *, device: Optional[Union[Device, torch.device, str]] = DEVICE, From 358caa55595097ce986424befc4a2a95c9645c1f Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Fri, 13 Oct 2023 10:38:04 -0700 Subject: [PATCH 4/6] chore: update docs Signed-off-by: Dheeraj Peri --- docsrc/user_guide/saving_models.rst | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/docsrc/user_guide/saving_models.rst b/docsrc/user_guide/saving_models.rst index 46fadcb905..ca1fb6d548 100644 --- a/docsrc/user_guide/saving_models.rst +++ b/docsrc/user_guide/saving_models.rst @@ -37,21 +37,19 @@ b) ExportedProgram import torch import torch_tensorrt - from torch_tensorrt.dynamo.export import transform, create_exported_program model = MyModel().eval().cuda() inputs = torch.randn((1, 3, 224, 224)).cuda() trt_gm = torch_tensorrt.compile(model, ir="dynamo", inputs) # Output is a torch.fx.GraphModule # Transform and create an exported program - trt_gm = transform(trt_gm, inputs) - trt_exp_program = create_exported_program(trt_gm, call_spec, trt_gm.state_dict()) - torch._export.save(trt_exp_program, "trt_model.ep") + trt_exp_program = torch_tensorrt.dynamo.transform(trt_gm, inputs, call_spec) + torch.export.save(trt_exp_program, "trt_model.ep") # Later, you can load it and run inference - model = torch._export.load("trt_model.ep") + model = torch.export.load("trt_model.ep") model(inputs) -`torch_tensorrt.dynamo.export.transform` inlines the submodules within a GraphModule to their corresponding nodes and stiches all the nodes together. +`torch_tensorrt.dynamo.transform` inlines the submodules within a GraphModule to their corresponding nodes, stiches all the nodes together and creates an ExportedProgram. This is needed as `torch._export` serialization cannot handle serializing and deserializing of submodules (`call_module` nodes). NOTE: This way of saving the models using `ExportedProgram` is experimental. Here is a known issue : https://github.com/pytorch/TensorRT/issues/2341 From c5d2299e86dab034c16ec58d45b1b4ea3bdc318a Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Tue, 24 Oct 2023 13:43:17 -0700 Subject: [PATCH 5/6] chore: update exported_program serialization functions Signed-off-by: Dheeraj Peri --- docsrc/user_guide/saving_models.rst | 6 +-- py/torch_tensorrt/dynamo/__init__.py | 2 +- py/torch_tensorrt/dynamo/export.py | 59 ++++++++++++--------- tests/py/dynamo/models/test_export_serde.py | 20 +++++-- 4 files changed, 54 insertions(+), 33 deletions(-) diff --git a/docsrc/user_guide/saving_models.rst b/docsrc/user_guide/saving_models.rst index ca1fb6d548..6863f62317 100644 --- a/docsrc/user_guide/saving_models.rst +++ b/docsrc/user_guide/saving_models.rst @@ -22,8 +22,8 @@ The following code illustrates this approach. model = MyModel().eval().cuda() inputs = torch.randn((1, 3, 224, 224)).cuda() trt_gm = torch_tensorrt.compile(model, ir="dynamo", inputs) # Output is a torch.fx.GraphModule - trt_script_model = torch.jit.trace(trt_gm, inputs) - torch.jit.save(trt_script_model, "trt_model.ts") + trt_traced_model = torchtrt.dynamo.serialize(trt_gm, inputs) + torch.jit.save(trt_traced_model, "trt_model.ts") # Later, you can load it and run inference model = torch.jit.load("trt_model.ts").cuda() @@ -42,7 +42,7 @@ b) ExportedProgram inputs = torch.randn((1, 3, 224, 224)).cuda() trt_gm = torch_tensorrt.compile(model, ir="dynamo", inputs) # Output is a torch.fx.GraphModule # Transform and create an exported program - trt_exp_program = torch_tensorrt.dynamo.transform(trt_gm, inputs, call_spec) + trt_exp_program = torch_tensorrt.dynamo.serialize(trt_gm, inputs, call_spec, ir="exported_program") torch.export.save(trt_exp_program, "trt_model.ep") # Later, you can load it and run inference diff --git a/py/torch_tensorrt/dynamo/__init__.py b/py/torch_tensorrt/dynamo/__init__.py index dd3e2970e6..1e39ea7fb1 100644 --- a/py/torch_tensorrt/dynamo/__init__.py +++ b/py/torch_tensorrt/dynamo/__init__.py @@ -16,4 +16,4 @@ DYNAMO_CONVERTERS, dynamo_tensorrt_converter, ) - from .export import transform + from .export import serialize diff --git a/py/torch_tensorrt/dynamo/export.py b/py/torch_tensorrt/dynamo/export.py index 33b9395739..6d2f0ae66e 100644 --- a/py/torch_tensorrt/dynamo/export.py +++ b/py/torch_tensorrt/dynamo/export.py @@ -10,31 +10,42 @@ from torch_tensorrt.dynamo import partitioning -def transform( - gm: torch.fx.GraphModule, inputs: Sequence[torch.Tensor], call_spec: CallSpec +def serialize( + gm: torch.fx.GraphModule, + inputs: Sequence[torch.Tensor], + call_spec: CallSpec = None, + ir: str = "torchscript", ) -> ExportedProgram: - # Run shape analysis - _, outputs_map = partitioning.run_shape_analysis(gm, inputs) - - # Inline TensorRT submodules - inline_trt_modules(gm, outputs_map) - - # Inline pytorch submodules - inline_torch_modules(gm) - - # Lift constant buffers and parameters in the graph - # torch.export serialization expects them to be lifted - lift_constant_pass(gm) - - # Clean the graph - gm.delete_all_unused_submodules() - gm.graph.eliminate_dead_code() - gm.graph.lint() - - # Create an exported program with the TRT GraphModule - exp_program = create_trt_exp_program(gm, call_spec) - - return exp_program + if ir == "torchscript": + return torch.jit.trace(gm, inputs) + elif ir == "exported_program": + assert call_spec + # Run shape analysis + _, outputs_map = partitioning.run_shape_analysis(gm, inputs) + + # Inline TensorRT submodules + inline_trt_modules(gm, outputs_map) + + # Inline pytorch submodules + inline_torch_modules(gm) + + # Lift constant buffers and parameters in the graph + # torch.export serialization expects them to be lifted + lift_constant_pass(gm) + + # Clean the graph + gm.delete_all_unused_submodules() + gm.graph.eliminate_dead_code() + gm.graph.lint() + + # Create an exported program with the TRT GraphModule + exp_program = create_trt_exp_program(gm, call_spec) + + return exp_program + else: + raise ValueError( + "Invalid ir provided for serialization. Select among torchscript | exported_program" + ) def lift_constant_pass(trt_gm: torch.fx.GraphModule) -> torch.fx.GraphModule: diff --git a/tests/py/dynamo/models/test_export_serde.py b/tests/py/dynamo/models/test_export_serde.py index 13e0a36b32..122503bb00 100644 --- a/tests/py/dynamo/models/test_export_serde.py +++ b/tests/py/dynamo/models/test_export_serde.py @@ -44,7 +44,9 @@ def forward(self, x): exp_program = torchtrt.dynamo.trace(model, **compile_spec) trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec) - trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec) + trt_exp_program = torchtrt.dynamo.serialize( + trt_gm, [input], call_spec=exp_program.call_spec, ir="exported_program" + ) serialized_prog = serialize(trt_exp_program) deserialized_prog = deserialize(*serialized_prog) @@ -96,7 +98,9 @@ def forward(self, x): exp_program = torchtrt.dynamo.trace(model, **compile_spec) trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec) - trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec) + trt_exp_program = torchtrt.dynamo.serialize( + trt_gm, [input], call_spec=exp_program.call_spec, ir="exported_program" + ) serialized_prog = serialize(trt_exp_program) deserialized_prog = deserialize(*serialized_prog) # Check Pyt and TRT exported program outputs @@ -153,7 +157,9 @@ def forward(self, x): exp_program = torchtrt.dynamo.trace(model, **compile_spec) trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec) - trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec) + trt_exp_program = torchtrt.dynamo.serialize( + trt_gm, [input], call_spec=exp_program.call_spec, ir="exported_program" + ) torch._export.save(trt_exp_program, "/tmp/trt.ep") deser_trt_exp_program = torch._export.load("/tmp/trt.ep") @@ -212,7 +218,9 @@ def forward(self, x): exp_program = torchtrt.dynamo.trace(model, **compile_spec) trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec) - trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec) + trt_exp_program = torchtrt.dynamo.serialize( + trt_gm, [input], call_spec=exp_program.call_spec, ir="exported_program" + ) torch._export.save(trt_exp_program, "/tmp/trt.ep") deser_trt_exp_program = torch._export.load("/tmp/trt.ep") @@ -254,7 +262,9 @@ def test_resnet18_save_load(ir): exp_program = torchtrt.dynamo.trace(model, **compile_spec) trt_gm = torchtrt.dynamo.compile(exp_program, **compile_spec) - trt_exp_program = torchtrt.dynamo.transform(trt_gm, [input], exp_program.call_spec) + trt_exp_program = torchtrt.dynamo.serialize( + trt_gm, [input], call_spec=exp_program.call_spec, ir="exported_program" + ) torch._export.save(trt_exp_program, "/tmp/trt.ep") deser_trt_exp_program = torch._export.load("/tmp/trt.ep") From 5238698bff944fefa03fb758163c6b4ec0d4a4e0 Mon Sep 17 00:00:00 2001 From: Dheeraj Peri Date: Wed, 25 Oct 2023 12:54:02 -0700 Subject: [PATCH 6/6] chore: minor updates Signed-off-by: Dheeraj Peri --- py/torch_tensorrt/dynamo/export.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py/torch_tensorrt/dynamo/export.py b/py/torch_tensorrt/dynamo/export.py index 6d2f0ae66e..91573f4491 100644 --- a/py/torch_tensorrt/dynamo/export.py +++ b/py/torch_tensorrt/dynamo/export.py @@ -44,7 +44,7 @@ def serialize( return exp_program else: raise ValueError( - "Invalid ir provided for serialization. Select among torchscript | exported_program" + "Invalid ir : {ir} provided for serialization. Options include torchscript | exported_program" )