diff --git a/examples/dynamo/refit_engine_example.py b/examples/dynamo/refit_engine_example.py index 1feb033a3a..9503dc2a8b 100644 --- a/examples/dynamo/refit_engine_example.py +++ b/examples/dynamo/refit_engine_example.py @@ -70,6 +70,7 @@ min_block_size=min_block_size, torch_executed_ops=torch_executed_ops, make_refitable=True, + reuse_cached_engines=False, ) # Output is a torch.fx.GraphModule # Save the graph module as an exported program diff --git a/tests/py/dynamo/models/test_model_refit.py b/tests/py/dynamo/models/test_model_refit.py index 9782cd829c..c2de5f5472 100644 --- a/tests/py/dynamo/models/test_model_refit.py +++ b/tests/py/dynamo/models/test_model_refit.py @@ -35,8 +35,8 @@ @pytest.mark.unit def test_mapping(): - model = models.resnet18(pretrained=True).eval().to("cuda") - model2 = models.resnet18(pretrained=False).eval().to("cuda") + model = models.resnet18(pretrained=False).eval().to("cuda") + model2 = models.resnet18(pretrained=True).eval().to("cuda") inputs = [torch.randn((1, 3, 224, 224)).to("cuda")] trt_input = [ torchtrt.Input(i.shape, dtype=torch.float, format=torch.contiguous_format) @@ -58,6 +58,7 @@ def test_mapping(): debug=debug, min_block_size=min_block_size, make_refitable=True, + reuse_cached_engines=False, ) settings = trt_gm._run_on_acc_0.settings runtime = trt.Runtime(TRT_LOGGER) @@ -110,6 +111,7 @@ def test_refit_one_engine_with_weightmap(): debug=debug, min_block_size=min_block_size, make_refitable=True, + reuse_cached_engines=False, ) new_trt_gm = refit_module_weights( @@ -141,8 +143,8 @@ def test_refit_one_engine_with_weightmap(): @pytest.mark.unit def test_refit_one_engine_no_map_with_weightmap(): - model = models.resnet18(pretrained=True).eval().to("cuda") - model2 = models.resnet18(pretrained=False).eval().to("cuda") + model = models.resnet18(pretrained=False).eval().to("cuda") + model2 = models.resnet18(pretrained=True).eval().to("cuda") inputs = [torch.randn((1, 3, 224, 224)).to("cuda")] enabled_precisions = {torch.float} debug = False @@ -160,6 +162,7 @@ def test_refit_one_engine_no_map_with_weightmap(): debug=debug, min_block_size=min_block_size, make_refitable=True, + reuse_cached_engines=False, ) trt_gm._run_on_acc_0.weight_name_map = None @@ -192,8 +195,8 @@ def test_refit_one_engine_no_map_with_weightmap(): @pytest.mark.unit def test_refit_one_engine_with_wrong_weightmap(): - model = models.resnet18(pretrained=True).eval().to("cuda") - model2 = models.resnet18(pretrained=False).eval().to("cuda") + model = models.resnet18(pretrained=False).eval().to("cuda") + model2 = models.resnet18(pretrained=True).eval().to("cuda") inputs = [torch.randn((1, 3, 224, 224)).to("cuda")] enabled_precisions = {torch.float} debug = False @@ -211,6 +214,7 @@ def test_refit_one_engine_with_wrong_weightmap(): debug=debug, min_block_size=min_block_size, make_refitable=True, + reuse_cached_engines=False, ) # Manually Deleted all batch norm layer. This suppose to fail the fast refit trt_gm._run_on_acc_0.weight_name_map = { @@ -268,6 +272,7 @@ def test_refit_one_engine_bert_with_weightmap(): debug=debug, min_block_size=min_block_size, make_refitable=True, + reuse_cached_engines=False, ) new_trt_gm = refit_module_weights( @@ -302,8 +307,8 @@ def test_refit_one_engine_bert_with_weightmap(): @pytest.mark.unit def test_refit_one_engine_inline_runtime__with_weightmap(): trt_ep_path = os.path.join(tempfile.gettempdir(), "compiled.ep") - model = models.resnet18(pretrained=True).eval().to("cuda") - model2 = models.resnet18(pretrained=False).eval().to("cuda") + model = models.resnet18(pretrained=False).eval().to("cuda") + model2 = models.resnet18(pretrained=True).eval().to("cuda") inputs = [torch.randn((1, 3, 224, 224)).to("cuda")] enabled_precisions = {torch.float} debug = False @@ -321,6 +326,7 @@ def test_refit_one_engine_inline_runtime__with_weightmap(): debug=debug, min_block_size=min_block_size, make_refitable=True, + reuse_cached_engines=False, ) torchtrt.save(trt_gm, trt_ep_path, inputs=inputs) trt_gm = torch.export.load(trt_ep_path) @@ -348,8 +354,8 @@ def test_refit_one_engine_inline_runtime__with_weightmap(): @pytest.mark.unit def test_refit_one_engine_python_runtime_with_weightmap(): - model = models.resnet18(pretrained=True).eval().to("cuda") - model2 = models.resnet18(pretrained=False).eval().to("cuda") + model = models.resnet18(pretrained=False).eval().to("cuda") + model2 = models.resnet18(pretrained=True).eval().to("cuda") inputs = [torch.randn((1, 3, 224, 224)).to("cuda")] enabled_precisions = {torch.float} debug = False @@ -367,6 +373,7 @@ def test_refit_one_engine_python_runtime_with_weightmap(): debug=debug, min_block_size=min_block_size, make_refitable=True, + reuse_cached_engines=False, ) new_trt_gm = refit_module_weights( @@ -438,6 +445,7 @@ def forward(self, x): min_block_size=min_block_size, make_refitable=True, torch_executed_ops=torch_executed_ops, + reuse_cached_engines=False, ) new_trt_gm = refit_module_weights( @@ -487,6 +495,7 @@ def test_refit_one_engine_without_weightmap(): debug=debug, min_block_size=min_block_size, make_refitable=True, + reuse_cached_engines=False, ) new_trt_gm = refit_module_weights( @@ -538,6 +547,7 @@ def test_refit_one_engine_bert_without_weightmap(): debug=debug, min_block_size=min_block_size, make_refitable=True, + reuse_cached_engines=False, ) new_trt_gm = refit_module_weights( @@ -591,6 +601,7 @@ def test_refit_one_engine_inline_runtime_without_weightmap(): debug=debug, min_block_size=min_block_size, make_refitable=True, + reuse_cached_engines=False, ) torchtrt.save(trt_gm, trt_ep_path, inputs=inputs) trt_gm = torch.export.load(trt_ep_path) @@ -637,6 +648,7 @@ def test_refit_one_engine_python_runtime_without_weightmap(): debug=debug, min_block_size=min_block_size, make_refitable=True, + reuse_cached_engines=False, ) new_trt_gm = refit_module_weights( @@ -708,6 +720,7 @@ def forward(self, x): min_block_size=min_block_size, make_refitable=True, torch_executed_ops=torch_executed_ops, + reuse_cached_engines=False, ) new_trt_gm = refit_module_weights(