From 4f3373795e954ca36058497cb663176f006b5be1 Mon Sep 17 00:00:00 2001
From: Wei Wei <wwei6@fb.com>
Date: Fri, 12 Aug 2022 11:33:21 -0700
Subject: [PATCH 1/6] sync to fb master

---
 py/torch_tensorrt/_compile.py                 |  50 +-
 .../fx/converters/acc_ops_converters.py       | 668 +++++++++++++-----
 py/torch_tensorrt/fx/input_tensor_spec.py     |  21 +-
 py/torch_tensorrt/fx/lower.py                 |  30 +-
 .../fx/passes/lower_pass_manager_builder.py   |  53 +-
 .../test/converters/acc_op/test_as_strided.py |   1 +
 .../fx/test/converters/acc_op/test_avgpool.py |  20 +-
 .../test/converters/acc_op/test_batchnorm.py  |   8 +-
 .../fx/test/converters/acc_op/test_clamp.py   |   4 +-
 .../converters/acc_op/test_convolution.py     |  31 +-
 .../fx/test/converters/acc_op/test_gelu.py    |  12 +-
 .../converters/acc_op/test_interpolate.py     |   4 +-
 .../fx/test/converters/acc_op/test_matmul.py  |  10 +-
 .../fx/test/converters/acc_op/test_max.py     |  12 +-
 .../fx/test/converters/acc_op/test_min.py     |  12 +-
 .../fx/test/converters/acc_op/test_narrow.py  |   4 +-
 .../fx/test/converters/acc_op/test_pad.py     |   1 +
 .../fx/test/converters/acc_op/test_prod.py    |   8 +-
 .../test/converters/acc_op/test_reduce_ops.py |  18 +-
 .../fx/test/converters/acc_op/test_tile.py    |  21 +-
 .../test/converters/acc_op/test_to_dtype.py   |  20 +-
 .../fx/test/converters/acc_op/test_topk.py    |  12 +-
 .../fx/test/converters/acc_op/test_type_as.py |   2 -
 .../test/converters/acc_op/test_unary_ops.py  |  16 +-
 .../fx/test/core/test_input_tensor_spec.py    |  12 +-
 .../fx/test/passes/test_graph_opts.py         |   3 +
 .../fx/test/quant/test_quant_trt.py           | 132 ++--
 .../fx/test/trt_lower/test_diagnostics.py     |   3 +
 py/torch_tensorrt/fx/tools/common_fx2trt.py   |  48 +-
 py/torch_tensorrt/fx/tools/trt_minimizer.py   |  16 +-
 py/torch_tensorrt/fx/tools/trt_splitter.py    |   6 +-
 .../fx/tracer/acc_tracer/acc_tracer.py        |  44 +-
 32 files changed, 956 insertions(+), 346 deletions(-)

diff --git a/py/torch_tensorrt/_compile.py b/py/torch_tensorrt/_compile.py
index 0735e86bbf..c6550ae7c7 100644
--- a/py/torch_tensorrt/_compile.py
+++ b/py/torch_tensorrt/_compile.py
@@ -5,22 +5,20 @@
 import torch
 import torch.fx
 from enum import Enum
-
-# import torch_tensorrt.fx
-# from torch_tensorrt.fx.lower import lower_to_trt
-# from torch_tensorrt.fx.utils import LowerPrecision
-
+import torch_tensorrt.fx
+from torch_tensorrt.fx.lower import lower_to_trt
+from torch_tensorrt.fx.utils import LowerPrecision
 
 class _IRType(Enum):
-    """Enum to set the minimum required logging level to print a message to stdout"""
-
+    """Enum to set the minimum required logging level to print a message to stdout
+    """
     ts = 0
     fx = 1
 
 
 class _ModuleType(Enum):
-    """Enum to set the minimum required logging level to print a message to stdout"""
-
+    """Enum to set the minimum required logging level to print a message to stdout
+    """
     nn = 0
     ts = 1
     fx = 2
@@ -56,8 +54,8 @@ def _get_target_ir(module_type: _ModuleType, ir: str) -> _IRType:
                 return _IRType.ts
             elif module_is_fxable:
                 raise ValueError("Was given a torch.fx.GraphModule, fx is not currently supported by Torch-TensorRT")
-                # logging.log(logging.Level.Info, "ir was set to default, using TorchScript as fx")
-                # return _IRType.fx
+                #logging.log(logging.Level.Info, "ir was set to default, using TorchScript as fx")
+                #return _IRType.fx
             else:
                 raise ValueError("Module was provided with in an unsupported format")
         else:
@@ -107,7 +105,7 @@ def compile(module: Any, ir="default", inputs=[], enabled_precisions=set([_enums
         if module_type == _ModuleType.nn:
             logging.log(
                 logging.Level.Info,
-                "Module was provided as a torch.nn.Module, trying to script the module with torch.jit.script. In the event of a failure please preconvert your module to TorchScript",
+                "Module was provided as a torch.nn.Module, trying to script the module with torch.jit.script. In the event of a failure please preconvert your module to TorchScript"
             )
             ts_mod = torch.jit.script(module)
         return torch_tensorrt.ts.compile(ts_mod, inputs=inputs, enabled_precisions=enabled_precisions, **kwargs)
@@ -119,21 +117,17 @@ def compile(module: Any, ir="default", inputs=[], enabled_precisions=set([_enums
         else:
             raise ValueError(f"Precision {enabled_precisions} not supported on FX")
 
-        return lower_to_trt(
-            module,
-            inputs,
-            lower_precision=lower_precision,
-            max_batch_size=inputs[0].size(0),
-            explicit_batch_dimension=True,
-            dynamic_batch=False,
-        )
+        return lower_to_trt(module, inputs, lower_precision=lower_precision, max_batch_size=inputs[0].size(0), explicit_batch_dimension=True, dynamic_batch=False)
     else:
         raise RuntimeError("Module is an unknown format or the ir requested is unknown")
 
 
-def convert_method_to_trt_engine(
-    module: Any, method_name: str, ir="default", inputs=[], enabled_precisions=set([_enums.dtype.float]), **kwargs
-):
+def convert_method_to_trt_engine(module: Any,
+                                 method_name: str,
+                                 ir="default",
+                                 inputs=[],
+                                 enabled_precisions=set([_enums.dtype.float]),
+                                 **kwargs):
     """Convert a TorchScript module method to a serialized TensorRT engine
 
     Converts a specified method of a module to a serialized TensorRT engine given a dictionary of conversion settings
@@ -171,12 +165,14 @@ def convert_method_to_trt_engine(
         if module_type == _ModuleType.nn:
             logging.log(
                 logging.Level.Info,
-                "Module was provided as a torch.nn.Module, trying to script the module with torch.jit.script. In the event of a failure please preconvert your module to TorchScript",
+                "Module was provided as a torch.nn.Module, trying to script the module with torch.jit.script. In the event of a failure please preconvert your module to TorchScript"
             )
             ts_mod = torch.jit.script(module)
-        return torch_tensorrt.ts.convert_method_to_trt_engine(
-            ts_mod, method_name, inputs=inputs, enabled_precisions=enabled_precisions, **kwargs
-        )
+        return torch_tensorrt.ts.convert_method_to_trt_engine(ts_mod,
+                                                              method_name,
+                                                              inputs=inputs,
+                                                              enabled_precisions=enabled_precisions,
+                                                              **kwargs)
     elif target_ir == _IRType.fx:
         raise RuntimeError("fx is currently not supported")
     else:
diff --git a/py/torch_tensorrt/fx/converters/acc_ops_converters.py b/py/torch_tensorrt/fx/converters/acc_ops_converters.py
index b97ccb59b7..68334ebe44 100644
--- a/py/torch_tensorrt/fx/converters/acc_ops_converters.py
+++ b/py/torch_tensorrt/fx/converters/acc_ops_converters.py
@@ -22,6 +22,7 @@
 
 from .converter_utils import *  # noqa: F403
 
+
 _LOGGER: logging.Logger = logging.getLogger(__name__)
 
 
@@ -35,7 +36,10 @@ def acc_ops_conv1d(
 ) -> Union[TRTTensor, Sequence[TRTTensor]]:
     input_val = kwargs["input"]
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"Conv received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"Conv received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     # Process 1d input with unsqueeze -> conv2d -> squeeze to calculated conv1d
     unsqueeze_layer = network.add_shuffle(input=input_val)
@@ -50,7 +54,9 @@ def acc_ops_conv1d(
     # and bias being ITensor is not supported in TensorRT api
     # right now
     if kwargs["bias"] is not None and not isinstance(kwargs["bias"], torch.Tensor):
-        raise RuntimeError(f"linear {name} has bias of type {type(kwargs['bias'])}, Expect Optional[Tenosr]")
+        raise RuntimeError(
+            f"linear {name} has bias of type {type(kwargs['bias'])}, Expect Optional[Tenosr]"
+        )
     bias = to_numpy(kwargs["bias"])  # type: ignore[arg-type]
     if bias is not None:
         bias = bias[None]
@@ -78,7 +84,9 @@ def acc_ops_conv1d(
         layer.set_input(1, weight)
     else:
         if not isinstance(kwargs["weight"], torch.Tensor):
-            raise RuntimeError(f"linear {name} has weight of type {type(kwargs['weight'])}, Expect Optional[Tenosr]")
+            raise RuntimeError(
+                f"linear {name} has weight of type {type(kwargs['weight'])}, Expect Optional[Tenosr]"
+            )
         weight = to_numpy(weight)
         weight = np.expand_dims(weight, -1)
         layer = network.add_convolution_nd(
@@ -120,7 +128,10 @@ def acc_ops_convnd(
     input_val = kwargs["input"]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"Conv received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"Conv received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     if has_dynamic_shape(input_val.shape):
         assert input_val.shape[1] != -1, "Channel dim can't be dynamic for convolution."
@@ -129,7 +140,9 @@ def acc_ops_convnd(
     # and bias being ITensor is not supported in TensorRT api
     # right now
     if kwargs["bias"] is not None and not isinstance(kwargs["bias"], torch.Tensor):
-        raise RuntimeError(f"linear {name} has bias of type {type(kwargs['bias'])}, Expect Optional[Tenosr]")
+        raise RuntimeError(
+            f"linear {name} has bias of type {type(kwargs['bias'])}, Expect Optional[Tenosr]"
+        )
     bias = to_numpy(kwargs["bias"])  # type: ignore[arg-type]
 
     if network.has_explicit_precision:
@@ -149,7 +162,9 @@ def acc_ops_convnd(
         layer.set_input(1, weight)
     else:
         if not isinstance(kwargs["weight"], torch.Tensor):
-            raise RuntimeError(f"linear {name} has weight of type {type(kwargs['weight'])}, Expect Optional[Tenosr]")
+            raise RuntimeError(
+                f"linear {name} has weight of type {type(kwargs['weight'])}, Expect Optional[Tenosr]"
+            )
         weight = to_numpy(kwargs["weight"])
         layer = network.add_convolution_nd(
             input=input_val,
@@ -181,16 +196,23 @@ def acc_ops_conv_transposend(
     input_val = kwargs["input"]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"Transpose conv received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"Transpose conv received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     if has_dynamic_shape(input_val.shape):
-        assert input_val.shape[1] != -1, "Channel dim can't be dynamic for transpose convolution."
+        assert (
+            input_val.shape[1] != -1
+        ), "Channel dim can't be dynamic for transpose convolution."
 
     # for now we'll assume bias is constant Tensor or None,
     # and bias being ITensor is not supported in TensorRT api
     # right now
     if kwargs["bias"] is not None and not isinstance(kwargs["bias"], torch.Tensor):
-        raise RuntimeError(f"ConvTranspose {name} has bias of type {type(kwargs['bias'])}, Expect Optional[Tensor]")
+        raise RuntimeError(
+            f"ConvTranspose {name} has bias of type {type(kwargs['bias'])}, Expect Optional[Tensor]"
+        )
     bias = to_numpy(kwargs["bias"])  # type: ignore[arg-type]
 
     if network.has_explicit_precision:
@@ -212,7 +234,9 @@ def acc_ops_conv_transposend(
         layer.set_input(1, weight)
     else:
         if not isinstance(kwargs["weight"], torch.Tensor):
-            raise RuntimeError(f"conv {name} has weight of type {type(kwargs['weight'])}, Expect Optional[Tensor]")
+            raise RuntimeError(
+                f"conv {name} has weight of type {type(kwargs['weight'])}, Expect Optional[Tensor]"
+            )
         weight = to_numpy(kwargs["weight"])
         # nn.ConvTranspose2d/3d weight size is (in_channels, out_channels/groups, kernel_0, kernel_1, [kernel_2])
         layer = network.add_deconvolution_nd(
@@ -248,16 +272,25 @@ def acc_ops_pad_with_padding_layer(
     rank = len(input_val.shape)  # type: ignore[union-attr]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"pad received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"pad received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     if mode != "constant":
-        raise RuntimeError(f"Currently we only support constant mode for pad, got {mode}.")
+        raise RuntimeError(
+            f"Currently we only support constant mode for pad, got {mode}."
+        )
 
     if len(pad) / 2 > rank:
-        raise RuntimeError(f"Trying to pad last {len(pad) / 2} dimension but the input only has {rank} dimension.")
+        raise RuntimeError(
+            f"Trying to pad last {len(pad) / 2} dimension but the input only has {rank} dimension."
+        )
 
     if value != 0:
-        raise RuntimeError(f"Currently we only support padding value of 0, got {value}.")
+        raise RuntimeError(
+            f"Currently we only support padding value of 0, got {value}."
+        )
 
     if len(pad) > 4:
         raise RuntimeError("Currently we only support padding last two dimensions.")
@@ -289,24 +322,34 @@ def acc_ops_pad_with_slice_layer(
     rank = len(input_val.shape)  # type: ignore[union-attr]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"pad received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"pad received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     if mode != "constant":
-        raise RuntimeError(f"Currently we only support constant mode for pad, got {mode}.")
+        raise RuntimeError(
+            f"Currently we only support constant mode for pad, got {mode}."
+        )
 
     if len(pad) / 2 > rank:
-        raise RuntimeError(f"Trying to pad last {len(pad) / 2} dimension but the input only has {rank} dimension.")
+        raise RuntimeError(
+            f"Trying to pad last {len(pad) / 2} dimension but the input only has {rank} dimension."
+        )
 
     # cast value to TRTensor
     dt = torch_dtype_from_trt(input_val.dtype)
     value = 0 if value == None else value
-    value_const = get_trt_tensor(network, torch.tensor([value], dtype=dt), f"{name}_value")
+    value_const = get_trt_tensor(
+        network, torch.tensor([value], dtype=dt), f"{name}_value"
+    )
 
     input_shape = input_val.shape
     pre_start = tuple(i - 1 for i in input_shape)
     prefix_len = len(input_shape) - len(pad) // 2
     pre_shape = tuple(
-        input_shape[i] + (pad[-(i - prefix_len) * 2 - 2] if i >= prefix_len else 0) for i in range(0, len(input_shape))
+        input_shape[i] + (pad[-(i - prefix_len) * 2 - 2] if i >= prefix_len else 0)
+        for i in range(0, len(input_shape))
     )
     pre_stride = [-1] * len(input_shape)
 
@@ -333,7 +376,8 @@ def acc_ops_pad_with_slice_layer(
     shape = transpose_output.shape
     post_start = tuple([0] * len(shape))
     post_shape = tuple(
-        shape[i] + (pad[-(i - prefix_len) * 2 - 1] if i >= prefix_len else 0) for i in range(0, len(shape))
+        shape[i] + (pad[-(i - prefix_len) * 2 - 1] if i >= prefix_len else 0)
+        for i in range(0, len(shape))
     )
     post_stride = tuple([1] * len(shape))
 
@@ -355,11 +399,18 @@ def acc_ops_flatten(
     input_val = kwargs["input"]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"flatten received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"flatten received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     num_dims = len(input_val.shape) + (1 if network.has_implicit_batch_dimension else 0)
-    start_dim = get_positive_dim(cast(int, kwargs["start_dim"] if "start_dim" in kwargs else 0), num_dims)
-    end_dim = get_positive_dim(cast(int, kwargs["end_dim"] if "end_dim" in kwargs else -1), num_dims)
+    start_dim = get_positive_dim(
+        cast(int, kwargs["start_dim"] if "start_dim" in kwargs else 0), num_dims
+    )
+    end_dim = get_positive_dim(
+        cast(int, kwargs["end_dim"] if "end_dim" in kwargs else -1), num_dims
+    )
 
     if network.has_implicit_batch_dimension:
         assert start_dim != 0, "Can't flatten batch dimension when it's implicit."
@@ -462,14 +513,20 @@ def acc_ops_size(
 ) -> Union[TRTTensor, Sequence[TRTTensor]]:
     input_t = kwargs["input"]
     if type(input_t) == torch.nn.Parameter or type(input_t) == torch.Tensor:
-        if not has_dynamic_shape(input_t.shape) and network.has_implicit_batch_dimension:
+        if (
+            not has_dynamic_shape(input_t.shape)
+            and network.has_implicit_batch_dimension
+        ):
             return torch.Size((IMPLICIT_BATCH_DIM,) + tuple(input_t.shape))
         return input_t.shape
 
     # input_val = get_trt_tensor(network, input_t, f"{name}_input_t")
     input_val = input_t
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"size received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"size received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     if not has_dynamic_shape(input_val.shape):
         if network.has_implicit_batch_dimension:
@@ -492,7 +549,10 @@ def acc_ops_numel(
     input_val = kwargs["input"]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"size received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"size received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     if has_dynamic_shape(input_val.shape):
         raise RuntimeError(f"numel does not support dynamic shapes.")
@@ -514,16 +574,25 @@ def acc_ops_batch_norm(
     input_val = kwargs["input"]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"BatchNorm2d received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"BatchNorm2d received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     if has_dynamic_shape(input_val.shape):
         assert input_val.shape[1] != -1, "Channel dim can't be dynamic for batch norm."
 
-    scale = cast(torch.Tensor, to_numpy(cast(torch.Tensor, kwargs["weight"]))) / np.sqrt(
-        cast(torch.Tensor, to_numpy(cast(torch.Tensor, kwargs["running_var"]))) + cast(float, kwargs["eps"])
+    scale = cast(
+        torch.Tensor, to_numpy(cast(torch.Tensor, kwargs["weight"]))
+    ) / np.sqrt(
+        cast(torch.Tensor, to_numpy(cast(torch.Tensor, kwargs["running_var"])))
+        + cast(float, kwargs["eps"])
     )
 
-    bias = to_numpy(cast(torch.Tensor, kwargs["bias"])) - to_numpy(cast(torch.Tensor, kwargs["running_mean"])) * scale
+    bias = (
+        to_numpy(cast(torch.Tensor, kwargs["bias"]))
+        - to_numpy(cast(torch.Tensor, kwargs["running_mean"])) * scale
+    )
     power = np.ones_like(scale)
 
     # For BatchNorm1d, reshape 1d to 2d
@@ -561,21 +630,30 @@ def acc_ops_layer_norm(network, target, args, kwargs, name):
     input_val = kwargs["input"]
 
     if not isinstance(input_val, trt.tensorrt.ITensor):
-        raise RuntimeError(f"LayerNorm received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"LayerNorm received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     gamma = kwargs["weight"].detach().cpu().float().numpy()
     gamma_field = trt.PluginField("gamma", gamma, trt.PluginFieldType.FLOAT32)
     beta = kwargs["bias"].detach().cpu().float().numpy()
     beta_field = trt.PluginField("beta", beta, trt.PluginFieldType.FLOAT32)
-    eps_field = trt.PluginField("eps", np.array([kwargs["eps"]], dtype=np.float32), trt.PluginFieldType.FLOAT32)
+    eps_field = trt.PluginField(
+        "eps", np.array([kwargs["eps"]], dtype=np.float32), trt.PluginFieldType.FLOAT32
+    )
     try:
         normalized_shape = np.array(kwargs["normalized_shape"], dtype=np.int32)
     except TypeError:
         _LOGGER.error("Unable to convert normalized_shape to a field, fall back to []")
         normalized_shape = np.array([], dtype=np.int32)
 
-    normalized_shape_filed = trt.PluginField("normalized_shape", normalized_shape, trt.PluginFieldType.INT32)
-    field_collection = trt.PluginFieldCollection([gamma_field, beta_field, eps_field, normalized_shape_filed])
+    normalized_shape_filed = trt.PluginField(
+        "normalized_shape", normalized_shape, trt.PluginFieldType.INT32
+    )
+    field_collection = trt.PluginFieldCollection(
+        [gamma_field, beta_field, eps_field, normalized_shape_filed]
+    )
 
     try:
         if network.has_implicit_batch_dimension:
@@ -583,7 +661,9 @@ def acc_ops_layer_norm(network, target, args, kwargs, name):
         else:
             plugin = get_trt_plugin("LayerNormDynamic", field_collection, "1", "fx2trt")
     except AssertionError:
-        _LOGGER.error("Unable to find layer norm plugin, fall back to TensorRT implementation.")
+        _LOGGER.error(
+            "Unable to find layer norm plugin, fall back to TensorRT implementation."
+        )
         return layer_norm(network, target, args, kwargs, name)
     layer = network.add_plugin_v2([input_val], plugin)
     layer.name = name
@@ -600,7 +680,10 @@ def layer_norm(
     input_val = kwargs["input"]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"LayerNorm received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"LayerNorm received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     shape = kwargs["weight"].shape  # type: ignore[union-attr]
     broadcasted_shape = (1,) * (len(input_val.shape) - len(shape)) + shape
@@ -613,7 +696,9 @@ def layer_norm(
         axes |= 1 << (len(input_val.shape) - d - 1)
 
     # E[x]
-    mean_expected_layer = network.add_reduce(input_val, trt.ReduceOperation.AVG, axes, keep_dims=True)
+    mean_expected_layer = network.add_reduce(
+        input_val, trt.ReduceOperation.AVG, axes, keep_dims=True
+    )
     set_layer_name(mean_expected_layer, target, f"{name}_mean_expected")
 
     # X-E[x]
@@ -639,7 +724,9 @@ def layer_norm(
         target,
         f"{name}_pow_var",
     )
-    mean_trt_layer = network.add_reduce(pow_var, trt.ReduceOperation.AVG, axes, keep_dims=True)
+    mean_trt_layer = network.add_reduce(
+        pow_var, trt.ReduceOperation.AVG, axes, keep_dims=True
+    )
     set_layer_name(mean_trt_layer, target, f"{name}_mean")
     # Variance + eps
     eps_tensor = network.add_constant(
@@ -656,7 +743,9 @@ def layer_norm(
         f"{name}_add",
     )
     # SQRT((Var + eps))
-    sqrt_trt = add_unary_layer(network, add_trt, trt.UnaryOperation.SQRT, target, f"{name}_sqrt")
+    sqrt_trt = add_unary_layer(
+        network, add_trt, trt.UnaryOperation.SQRT, target, f"{name}_sqrt"
+    )
     # (x - E[x]) / sqrt((var + eps))
     div_trt = add_binary_elementwise_layer(
         network,
@@ -668,14 +757,10 @@ def layer_norm(
     )
 
     assert gamma is not None
-    gamma_tensor = network.add_constant(
-        gamma.shape, trt.Weights(np.ascontiguousarray(gamma))
-    )  # type: ignore[attr-defined]
+    gamma_tensor = network.add_constant(gamma.shape, trt.Weights(np.ascontiguousarray(gamma)))  # type: ignore[attr-defined]
     gamma_tensor.name = f"{name}_gamma"
     assert beta is not None
-    beta_tensor = network.add_constant(
-        gamma.shape, trt.Weights(np.ascontiguousarray(beta))
-    )  # type: ignore[attr-defined]
+    beta_tensor = network.add_constant(gamma.shape, trt.Weights(np.ascontiguousarray(beta)))  # type: ignore[attr-defined]
     beta_tensor.name = f"{name}_beta"
     # y * gamma + beta
     scale_layer = add_binary_elementwise_layer(
@@ -708,7 +793,10 @@ def acc_ops_softmax(
     input_ranks = len(input_val.shape) + (1 if network.has_implicit_batch_dimension else 0)  # type: ignore[union-attr]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"softmax received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"softmax received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     # Used to get dim when dim is None. Copied from PyTorch softmax implementation.
     def get_softmax_dim(ndim: int) -> int:
@@ -746,7 +834,9 @@ def acc_ops_tile(
     input_val = get_trt_tensor(network, input_t, f"{name}_input")
 
     dims = tuple(cast(Sequence[int], kwargs["dims"]))
-    n_input_dims = len(input_val.shape) + (1 if network.has_implicit_batch_dimension else 0)
+    n_input_dims = len(input_val.shape) + (
+        1 if network.has_implicit_batch_dimension else 0
+    )
 
     if len(dims) > n_input_dims:
         assert not network.has_implicit_batch_dimension
@@ -761,12 +851,16 @@ def acc_ops_tile(
                 (num_preceding_ones,),
                 np.ascontiguousarray([1] * num_preceding_ones, np.int32),
             ).get_output(0)
-            reshape_layer = network.add_concatenation([preceding_ones, input_shape_layer.get_output(0)])
+            reshape_layer = network.add_concatenation(
+                [preceding_ones, input_shape_layer.get_output(0)]
+            )
             reshape_layer.axis = 0
             reshape_layer.name = f"{name}_reshape_dims"
             layer.set_input(1, reshape_layer.get_output(0))
         else:
-            layer.reshape_dims = (1,) * (len(dims) - n_input_dims) + tuple(input_val.shape)
+            layer.reshape_dims = (1,) * (len(dims) - n_input_dims) + tuple(
+                input_val.shape
+            )
         input_val = layer.get_output(0)
     else:
         dims = (1,) * (n_input_dims - len(dims)) + dims
@@ -806,11 +900,13 @@ def acc_ops_tile(
     set_layer_name(layer, target, name)
 
     if has_dynamic_shape(input_val.shape):  # type: ignore[union-attr]
-        starts_tensor = network.add_constant((len(dims),), np.ascontiguousarray([0] * len(dims), np.int32)).get_output(
-            0
-        )
+        starts_tensor = network.add_constant(
+            (len(dims),), np.ascontiguousarray([0] * len(dims), np.int32)
+        ).get_output(0)
         if all(isinstance(d, int) for d in dims):
-            dims_tensor = network.add_constant((len(dims),), np.ascontiguousarray(dims, np.int32)).get_output(0)
+            dims_tensor = network.add_constant(
+                (len(dims),), np.ascontiguousarray(dims, np.int32)
+            ).get_output(0)
         else:
             assert all(isinstance(d, TRTTensor) for d in dims)
             concat_dims_layer = network.add_concatenation(inputs=dims)
@@ -875,7 +971,9 @@ def acc_ops_leaky_relu(
     input_val = kwargs["input"]
     negative_slope = kwargs["negative_slope"]
     operation_type = trt.ActivationType.LEAKY_RELU
-    return add_activation_layer(network, input_val, operation_type, target, name, negative_slope)
+    return add_activation_layer(
+        network, input_val, operation_type, target, name, negative_slope
+    )
 
 
 @tensorrt_converter(acc_ops.elu)
@@ -1147,7 +1245,9 @@ def acc_ops_sum(
     kwargs: Dict[str, Argument],
     name: str,
 ) -> TRTTensor:
-    return add_reduce_layer(network, target, args, kwargs, trt.ReduceOperation.SUM, name)
+    return add_reduce_layer(
+        network, target, args, kwargs, trt.ReduceOperation.SUM, name
+    )
 
 
 @tensorrt_converter(acc_ops.prod)
@@ -1158,7 +1258,9 @@ def acc_ops_prod(
     kwargs: Dict[str, Argument],
     name: str,
 ) -> TRTTensor:
-    return add_reduce_layer(network, target, args, kwargs, trt.ReduceOperation.PROD, name)
+    return add_reduce_layer(
+        network, target, args, kwargs, trt.ReduceOperation.PROD, name
+    )
 
 
 @tensorrt_converter(acc_ops.mean)
@@ -1169,14 +1271,21 @@ def acc_ops_mean(
     kwargs: Dict[str, Argument],
     name: str,
 ) -> TRTTensor:
-    return add_reduce_layer(network, target, args, kwargs, trt.ReduceOperation.AVG, name)
+    return add_reduce_layer(
+        network, target, args, kwargs, trt.ReduceOperation.AVG, name
+    )
 
 
 def add_acc_ops_full_reduce(network, target, args, kwargs, name, reduce_op):
     input_val = kwargs["input"]
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"max received input {input_val} that is not part " "of the TensorRT region!")
-    assert not network.has_implicit_batch_dimension, "Do not support max over all the elements for implicit batch."
+        raise RuntimeError(
+            f"max received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
+    assert (
+        not network.has_implicit_batch_dimension
+    ), "Do not support max over all the elements for implicit batch."
 
     dim = range(len(input_val.shape))
 
@@ -1200,7 +1309,9 @@ def add_acc_ops_dim_reduce(network, target, args, kwargs, name, reduce_op):
         new_kwargs["largest"] = False
     new_kwargs["sorted"] = False
 
-    topk_out0, topk_out1 = acc_ops_topk(network, target, args, new_kwargs, name + "_topk")
+    topk_out0, topk_out1 = acc_ops_topk(
+        network, target, args, new_kwargs, name + "_topk"
+    )
 
     topk_out0.name = f"{name}_topk0"
     topk_out1.name = f"{name}_topk1"
@@ -1210,7 +1321,9 @@ def add_acc_ops_dim_reduce(network, target, args, kwargs, name, reduce_op):
 
     dim = new_kwargs["dim"]
     if network.has_implicit_batch_dimension:
-        assert dim != 0, "can't reduce on dim == 0 when network has implicit batch dimension"
+        assert (
+            dim != 0
+        ), "can't reduce on dim == 0 when network has implicit batch dimension"
         # we remove the first dim in the shape tuple when it is implicit
         dim -= 1
     input_val = topk_out0
@@ -1244,7 +1357,9 @@ def acc_ops_max_full_reduce(
     kwargs: Dict[str, Argument],
     name: str,
 ) -> Union[TRTTensor, Sequence[TRTTensor]]:
-    return add_acc_ops_full_reduce(network, target, args, kwargs, name, trt.ReduceOperation.MAX)
+    return add_acc_ops_full_reduce(
+        network, target, args, kwargs, name, trt.ReduceOperation.MAX
+    )
 
 
 @tensorrt_converter(acc_ops.min_full_reduce, no_implicit_batch_dim=True)
@@ -1255,7 +1370,9 @@ def acc_ops_min_full_reduce(
     kwargs: Dict[str, Argument],
     name: str,
 ) -> Union[TRTTensor, Sequence[TRTTensor]]:
-    return add_acc_ops_full_reduce(network, target, args, kwargs, name, trt.ReduceOperation.MIN)
+    return add_acc_ops_full_reduce(
+        network, target, args, kwargs, name, trt.ReduceOperation.MIN
+    )
 
 
 @tensorrt_converter(acc_ops.max_dim_reduce)
@@ -1266,7 +1383,9 @@ def acc_ops_max_dim_reduce(
     kwargs: Dict[str, Argument],
     name: str,
 ) -> Union[TRTTensor, Sequence[TRTTensor]]:
-    return add_acc_ops_dim_reduce(network, target, args, kwargs, name, trt.ReduceOperation.MAX)
+    return add_acc_ops_dim_reduce(
+        network, target, args, kwargs, name, trt.ReduceOperation.MAX
+    )
 
 
 @tensorrt_converter(acc_ops.min_dim_reduce)
@@ -1277,7 +1396,9 @@ def acc_ops_min_dim_reduce(
     kwargs: Dict[str, Argument],
     name: str,
 ) -> Union[TRTTensor, Sequence[TRTTensor]]:
-    return add_acc_ops_dim_reduce(network, target, args, kwargs, name, trt.ReduceOperation.MIN)
+    return add_acc_ops_dim_reduce(
+        network, target, args, kwargs, name, trt.ReduceOperation.MIN
+    )
 
 
 @tensorrt_converter(acc_ops.maximum)
@@ -1384,7 +1505,9 @@ def acc_ops_logical_and(
     name: str,
 ) -> Union[TRTTensor, Sequence[TRTTensor]]:
     if network.has_implicit_batch_dimension:
-        raise RuntimeError("The `logical_and` function should be called with explicit batch dimension.")
+        raise RuntimeError(
+            "The `logical_and` function should be called with explicit batch dimension."
+        )
 
     input_t = kwargs["input"]
     other_t = kwargs["other"]
@@ -1393,11 +1516,17 @@ def acc_ops_logical_and(
 
         def check_is_bool(input_t):
             if isinstance(input_t, TRTTensor):
-                assert input_t.dtype == trt.bool, "We currently do not support input is non-bool"
+                assert (
+                    input_t.dtype == trt.bool
+                ), "We currently do not support input is non-bool"
             elif isinstance(input_t, torch.Tensor):
-                assert input_t.dtype == torch.bool, "We currently do not support input is non-bool"
+                assert (
+                    input_t.dtype == torch.bool
+                ), "We currently do not support input is non-bool"
             else:
-                assert isinstance(input_t.bool), "We currently do not support input is non-bool"
+                assert isinstance(
+                    input_t.bool
+                ), "We currently do not support input is non-bool"
 
         check_is_bool(input_t)
         check_is_bool(other_t)
@@ -1409,7 +1538,9 @@ def check_is_bool(input_t):
         input_t = type_cast(network, target, f"{name}_input", input_t, trt.bool)
     if other_t.dtype != trt.bool:
         other_t = type_cast(network, target, f"{name}_other", other_t, trt.bool)
-    return add_binary_elementwise_layer(network, input_t, other_t, trt.ElementWiseOperation.AND, target, name)
+    return add_binary_elementwise_layer(
+        network, input_t, other_t, trt.ElementWiseOperation.AND, target, name
+    )
 
 
 @tensorrt_converter(acc_ops.ne, no_implicit_batch_dim=True)
@@ -1421,7 +1552,9 @@ def acc_ops_ne(
     name: str,
 ) -> Union[TRTTensor, Sequence[TRTTensor]]:
     if network.has_implicit_batch_dimension:
-        raise RuntimeError("The `ne` function should be called with explicit batch dimension.")
+        raise RuntimeError(
+            "The `ne` function should be called with explicit batch dimension."
+        )
 
     input_t = kwargs["input"]
     other_t = kwargs["other"]
@@ -1430,7 +1563,9 @@ def acc_ops_ne(
     other_t = get_trt_tensor(network, other_t, f"{name}_other_t")
 
     input_t, other_t = dtype_uniform(network, target, name, input_t, other_t)
-    eq_t = add_binary_elementwise_layer(network, input_t, other_t, trt.ElementWiseOperation.EQUAL, target, name)
+    eq_t = add_binary_elementwise_layer(
+        network, input_t, other_t, trt.ElementWiseOperation.EQUAL, target, name
+    )
 
     return add_unary_layer(network, eq_t, trt.UnaryOperation.NOT, target, name)
 
@@ -1444,7 +1579,9 @@ def acc_ops_eq(
     name: str,
 ) -> Union[TRTTensor, Sequence[TRTTensor]]:
     if network.has_implicit_batch_dimension:
-        raise RuntimeError("The `eq` function should be called with explicit batch dimension.")
+        raise RuntimeError(
+            "The `eq` function should be called with explicit batch dimension."
+        )
 
     input_t = kwargs["input"]
     other_t = kwargs["other"]
@@ -1453,7 +1590,9 @@ def acc_ops_eq(
     other_t = get_trt_tensor(network, other_t, f"{name}_other_t")
 
     input_t, other_t = dtype_uniform(network, target, name, input_t, other_t)
-    return add_binary_elementwise_layer(network, input_t, other_t, trt.ElementWiseOperation.EQUAL, target, name)
+    return add_binary_elementwise_layer(
+        network, input_t, other_t, trt.ElementWiseOperation.EQUAL, target, name
+    )
 
 
 @tensorrt_converter(acc_ops.gt, no_implicit_batch_dim=True)
@@ -1465,7 +1604,9 @@ def acc_ops_gt(
     name: str,
 ) -> Union[TRTTensor, Sequence[TRTTensor]]:
     if network.has_implicit_batch_dimension:
-        raise RuntimeError("The `gt` function should be called with explicit batch dimension.")
+        raise RuntimeError(
+            "The `gt` function should be called with explicit batch dimension."
+        )
 
     input_t = kwargs["input"]
     other_t = kwargs["other"]
@@ -1474,7 +1615,9 @@ def acc_ops_gt(
     other_t = get_trt_tensor(network, other_t, f"{name}_other_t")
 
     input_t, other_t = dtype_uniform(network, target, name, input_t, other_t)
-    return add_binary_elementwise_layer(network, input_t, other_t, trt.ElementWiseOperation.GREATER, target, name)
+    return add_binary_elementwise_layer(
+        network, input_t, other_t, trt.ElementWiseOperation.GREATER, target, name
+    )
 
 
 @tensorrt_converter(acc_ops.lt, no_implicit_batch_dim=True)
@@ -1486,7 +1629,9 @@ def acc_ops_lt(
     name: str,
 ) -> Union[TRTTensor, Sequence[TRTTensor]]:
     if network.has_implicit_batch_dimension:
-        raise RuntimeError("The `le` function should be called with explicit batch dimension.")
+        raise RuntimeError(
+            "The `le` function should be called with explicit batch dimension."
+        )
 
     input_t = kwargs["input"]
     other_t = kwargs["other"]
@@ -1495,7 +1640,9 @@ def acc_ops_lt(
     other_t = get_trt_tensor(network, other_t, f"{name}_other_t")
 
     input_t, other_t = dtype_uniform(network, target, name, input_t, other_t)
-    return add_binary_elementwise_layer(network, input_t, other_t, trt.ElementWiseOperation.LESS, target, name)
+    return add_binary_elementwise_layer(
+        network, input_t, other_t, trt.ElementWiseOperation.LESS, target, name
+    )
 
 
 @tensorrt_converter(acc_ops.logical_or, no_implicit_batch_dim=True)
@@ -1507,7 +1654,9 @@ def acc_ops_logical_or(
     name: str,
 ) -> Union[TRTTensor, Sequence[TRTTensor]]:
     if network.has_implicit_batch_dimension:
-        raise RuntimeError("The `logical_or` function should be called with explicit batch dimension.")
+        raise RuntimeError(
+            "The `logical_or` function should be called with explicit batch dimension."
+        )
 
     input_t = kwargs["input"]
     other_t = kwargs["other"]
@@ -1528,7 +1677,9 @@ def acc_ops_logical_or(
         set_layer_name(layer_o, target, f"{name}_other_dtype_change")
         other_t = layer_o.get_output(0)
 
-    return add_binary_elementwise_layer(network, input_t, other_t, trt.ElementWiseOperation.OR, target, name)
+    return add_binary_elementwise_layer(
+        network, input_t, other_t, trt.ElementWiseOperation.OR, target, name
+    )
 
 
 @tensorrt_converter(acc_ops.logical_xor, no_implicit_batch_dim=True)
@@ -1540,7 +1691,9 @@ def acc_ops_logical_xor(
     name: str,
 ) -> Union[TRTTensor, Sequence[TRTTensor]]:
     if network.has_implicit_batch_dimension:
-        raise RuntimeError("The `logical_xor` function should be called with explicit batch dimension.")
+        raise RuntimeError(
+            "The `logical_xor` function should be called with explicit batch dimension."
+        )
 
     input_t = kwargs["input"]
     other_t = kwargs["other"]
@@ -1561,7 +1714,9 @@ def acc_ops_logical_xor(
         set_layer_name(layer_o, target, f"{name}_other_dtype_change")
         other_t = layer_o.get_output(0)
 
-    return add_binary_elementwise_layer(network, input_t, other_t, trt.ElementWiseOperation.XOR, target, name)
+    return add_binary_elementwise_layer(
+        network, input_t, other_t, trt.ElementWiseOperation.XOR, target, name
+    )
 
 
 # T113156424 Have some accuracy problems in hf_T5.
@@ -1611,15 +1766,22 @@ def acc_ops_any(
 ) -> Union[TRTTensor, Sequence[TRTTensor]]:
     input_t = kwargs["input"]
     if not isinstance(input_t, TRTTensor):
-        raise RuntimeError(f"isinf received input {input_t} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"isinf received input {input_t} that is not part "
+            "of the TensorRT region!"
+        )
 
     if input_t.dtype in (trt.float32, trt.float16, trt.int32):
-        comp_t = torch.zeros(tuple([*input_t.shape])).to(torch_dtype_from_trt(input_t.dtype))
+        comp_t = torch.zeros(tuple([*input_t.shape])).to(
+            torch_dtype_from_trt(input_t.dtype)
+        )
         comp_t = get_trt_tensor(network, comp_t, f"{name}_comp_t")
         kwargs_new = {"input": input_t, "other": comp_t}
         eq_output = acc_ops_eq(network, target, None, kwargs_new, name + "_eq")
         kwargs_new = {"input": eq_output}
-        not_output = acc_ops_logical_not(network, target, None, kwargs_new, name + "_not")
+        not_output = acc_ops_logical_not(
+            network, target, None, kwargs_new, name + "_not"
+        )
     else:
         not_output = input_t
     # cast bool result to int
@@ -1649,7 +1811,9 @@ def acc_ops_fmod(
     name: str,
 ) -> Union[TRTTensor, Sequence[TRTTensor]]:
     # NOTE: TRT doesnt currently implement fmod so we need multiple operations to perform it
-    trunc_div_value = trunc_div(kwargs["input"], kwargs["other"], network, target, name + "_trunc_div")
+    trunc_div_value = trunc_div(
+        kwargs["input"], kwargs["other"], network, target, name + "_trunc_div"
+    )
     prod_value = add_binary_elementwise_layer(
         network,
         trunc_div_value,
@@ -1745,7 +1909,10 @@ def acc_ops_max_pool1d(
 ) -> Union[TRTTensor, Sequence[TRTTensor]]:
     input_trt = kwargs["input"]
     if not isinstance(input_trt, TRTTensor):
-        raise RuntimeError(f"Max_pool1d received input {input_trt} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"Max_pool1d received input {input_trt} that is not part "
+            "of the TensorRT region!"
+        )
 
     # adds unsqueeze layer -> max pool 2d -> squeeze layer to emulate max pool 1d.
     unsqueeze_layer = network.add_shuffle(input=input_trt)
@@ -1764,12 +1931,21 @@ def acc_ops_max_pool1d(
     if len(stride) == 0 or stride[0] == None:
         stride = kernel_size
 
-    if any([not isinstance(param, int) for param in [kernel_size[0], stride[0], padding[0], dilation[0]]]):
-        raise RuntimeError(f"Parameters kernel_size, stride, padding, and dilation should be of type int.")
+    if any(
+        [
+            not isinstance(param, int)
+            for param in [kernel_size[0], stride[0], padding[0], dilation[0]]
+        ]
+    ):
+        raise RuntimeError(
+            f"Parameters kernel_size, stride, padding, and dilation should be of type int."
+        )
     if dilation[0] != 1:
         raise RuntimeError(f"Only support dilation=1 for maxpool, but got {dilation}")
 
-    max_pooling_layer = network.add_pooling(input=input_trt, type=trt.PoolingType.MAX, window_size=(kernel_size[0], 1))
+    max_pooling_layer = network.add_pooling(
+        input=input_trt, type=trt.PoolingType.MAX, window_size=(kernel_size[0], 1)
+    )
     max_pooling_layer.stride_nd = stride + (1,)
     max_pooling_layer.padding_nd = padding + (0,)
     set_layer_name(max_pooling_layer, target, name)
@@ -1795,7 +1971,10 @@ def acc_ops_max_poolnd(
     input_val = kwargs["input"]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"MaxPool2d received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"MaxPool2d received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
     extend_len = 2 if target == acc_ops.max_pool2d else 3
     kernel_size = extend_attr_to_tuple(kwargs["kernel_size"], extend_len)
     stride = extend_attr_to_tuple(kwargs["stride"], extend_len)
@@ -1808,9 +1987,13 @@ def acc_ops_max_poolnd(
 
     ones = (1,) * extend_len
     if dilation != ones:
-        raise RuntimeError(f"Only support dilation=(1, 1) for maxpool, but got {dilation}")
+        raise RuntimeError(
+            f"Only support dilation=(1, 1) for maxpool, but got {dilation}"
+        )
 
-    layer = network.add_pooling_nd(input=input_val, type=trt.PoolingType.MAX, window_size=kernel_size)
+    layer = network.add_pooling_nd(
+        input=input_val, type=trt.PoolingType.MAX, window_size=kernel_size
+    )
     layer.stride_nd = stride
     layer.padding_nd = padding
     set_layer_name(layer, target, name)
@@ -1832,14 +2015,19 @@ def acc_ops_squeeze(
     input_val = kwargs["input"]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"squeeze received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"squeeze received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     dim = cast(Optional[int], kwargs["dim"] if "dim" in kwargs else None)
     # Squeeze with dim=None would only work in explicit batch dim mode without any dynamic
     # dim, which is a very rare case. For now we just claim not supporting dim=None.
     assert dim is not None, "We don't support dim=None right now for squeeze."
 
-    dim = get_positive_dim(dim, len(input_val.shape) + (1 if network.has_implicit_batch_dimension else 0))
+    dim = get_positive_dim(
+        dim, len(input_val.shape) + (1 if network.has_implicit_batch_dimension else 0)
+    )
     if network.has_implicit_batch_dimension:
         assert dim != 0, "We don't support squeeze batch dim when it's implicit."
         dim -= 1
@@ -1990,11 +2178,18 @@ def acc_ops_unsqueeze(
     input_t = kwargs["input"]
     input_val = get_trt_tensor(network, input_t, f"{name}_input_t")
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"unsqueeze received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"unsqueeze received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     dim = cast(int, kwargs["dim"])
     input_shape = input_val.shape
-    input_shape_size = len(input_val.shape) + 1 if network.has_implicit_batch_dimension else len(input_val.shape)
+    input_shape_size = (
+        len(input_val.shape) + 1
+        if network.has_implicit_batch_dimension
+        else len(input_val.shape)
+    )
     dim = get_positive_dim(dim, input_shape_size + 1)
 
     if network.has_implicit_batch_dimension:
@@ -2005,7 +2200,9 @@ def acc_ops_unsqueeze(
         len(get_dynamic_dims(input_val.shape)) <= 1
     ), "Currently we don't support unsqueeze with more than one dynamic dims."
     layer = network.add_shuffle(input_val)
-    layer.reshape_dims = tuple(input_val.shape)[:dim] + (1,) + tuple(input_val.shape)[dim:]
+    layer.reshape_dims = (
+        tuple(input_val.shape)[:dim] + (1,) + tuple(input_val.shape)[dim:]
+    )
     set_layer_name(layer, target, name)
     return layer.get_output(0)
 
@@ -2021,7 +2218,10 @@ def acc_ops_topk(
     input_val = kwargs["input"]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"topk received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"topk received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     if kwargs["sorted"] and kwargs["k"] != 1:
         raise RuntimeError("Currently we don't support sorted=True in topk.")
@@ -2055,14 +2255,19 @@ def acc_ops_adaptive_avg_poolnd(
     input_val = kwargs["input"]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"AdaptiveAvgPool2d received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"AdaptiveAvgPool2d received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     extend_len = 2 if target == acc_ops.adaptive_avg_pool2d else 3
     assert all(
         input_val.shape[-(i + 1)] != -1 for i in range(extend_len)
     ), "AdaptiveAvgPool2d and AdaptiveAvgPool3d currently doesn't support dynamic shapes for last two dims."
 
-    output_size = cast(Sequence[int], extend_attr_to_tuple(kwargs["output_size"], extend_len))
+    output_size = cast(
+        Sequence[int], extend_attr_to_tuple(kwargs["output_size"], extend_len)
+    )
     for input_dim, output_dim in zip(input_val.shape[-extend_len:], output_size):
         if input_dim % output_dim != 0:
             raise RuntimeError(
@@ -2070,9 +2275,16 @@ def acc_ops_adaptive_avg_poolnd(
                 f"Got input dim {input_dim}, output dim {output_dim}"
             )
 
-    stride = tuple(input_val.shape[-extend_len + i] // output_size[i] for i in range(extend_len))
-    kernel_size = tuple(input_val.shape[-extend_len + i] - (output_size[i] - 1) * stride[i] for i in range(extend_len))
-    layer = network.add_pooling_nd(input=input_val, type=trt.PoolingType.AVERAGE, window_size=kernel_size)
+    stride = tuple(
+        input_val.shape[-extend_len + i] // output_size[i] for i in range(extend_len)
+    )
+    kernel_size = tuple(
+        input_val.shape[-extend_len + i] - (output_size[i] - 1) * stride[i]
+        for i in range(extend_len)
+    )
+    layer = network.add_pooling_nd(
+        input=input_val, type=trt.PoolingType.AVERAGE, window_size=kernel_size
+    )
     layer.stride_nd = stride
     set_layer_name(layer, target, name)
 
@@ -2090,7 +2302,10 @@ def acc_ops_avg_pool1d(
     input_val = kwargs["input"]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"AvgPool1d received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"AvgPool1d received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     kernel_size = extend_attr_to_tuple(kwargs["kernel_size"], 1)
     stride = extend_attr_to_tuple(kwargs["stride"], 1)
@@ -2106,7 +2321,9 @@ def acc_ops_avg_pool1d(
     set_layer_name(shuffle_layer, target, name + "_shuffle1")
     shuffle_out = shuffle_layer.get_output(0)
 
-    layer = network.add_pooling_nd(input=shuffle_out, type=trt.PoolingType.AVERAGE, window_size=(kernel_size[0], 1))
+    layer = network.add_pooling_nd(
+        input=shuffle_out, type=trt.PoolingType.AVERAGE, window_size=(kernel_size[0], 1)
+    )
 
     layer.stride_nd = stride + (1,)
     layer.padding_nd = padding + (0,)
@@ -2134,7 +2351,10 @@ def acc_ops_avg_pool2d(
     input_val = kwargs["input"]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"AvgPool2d received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"AvgPool2d received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     kernel_size = extend_attr_to_tuple(kwargs["kernel_size"], 2)
     stride = extend_attr_to_tuple(kwargs["stride"], 2)
@@ -2149,7 +2369,9 @@ def acc_ops_avg_pool2d(
     if divisor_override:
         raise RuntimeError("TensorRT does not support divisor_override.")
 
-    layer = network.add_pooling(input=input_val, type=trt.PoolingType.AVERAGE, window_size=kernel_size)
+    layer = network.add_pooling(
+        input=input_val, type=trt.PoolingType.AVERAGE, window_size=kernel_size
+    )
     layer.stride = stride
     layer.padding = padding
     layer.average_count_excludes_padding = False if count_include_pad else True
@@ -2213,14 +2435,19 @@ def acc_ops_slice_tensor(
     input_val = kwargs["input"]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"slice_tensor received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"slice_tensor received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     ranks = len(input_val.shape) + (1 if network.has_implicit_batch_dimension else 0)
     dim = get_positive_dim(cast(int, kwargs["dim"]), ranks)
     dynamic_shape = has_dynamic_shape(input_val.shape)
     if network.has_implicit_batch_dimension:
         if dim == 0:
-            raise RuntimeError(f"We do not support slice_tensor at batch dim when it's implicit, got {dim}!")
+            raise RuntimeError(
+                f"We do not support slice_tensor at batch dim when it's implicit, got {dim}!"
+            )
         dim = dim - 1
     else:
         if dynamic_shape:
@@ -2238,7 +2465,9 @@ def acc_ops_slice_tensor(
     output_shape[dim] = (stop_int - start_int) // step_int
 
     if dynamic_shape > 0:
-        output_shape = get_shape_with_dynamic_shape(network, output_shape, input_val, target, name)
+        output_shape = get_shape_with_dynamic_shape(
+            network, output_shape, input_val, target, name
+        )
     layer = network.add_slice(
         input_val,
         start=start,
@@ -2275,7 +2504,9 @@ def acc_ops_expand_tensor(
     inshape = tuple(input_val.shape)
     shape = tuple(shape)
     start = tuple([0] * ranks)
-    stride = tuple([int(i == o) for i, o in zip(inshape, shape)])  # stride == 1 if dimensions match, 0 otherwise
+    stride = tuple(
+        [int(i == o) for i, o in zip(inshape, shape)]
+    )  # stride == 1 if dimensions match, 0 otherwise
     layer = network.add_slice(input_val, start=start, shape=shape, stride=stride)
     set_layer_name(layer, target, name)
     return layer.get_output(0)
@@ -2386,7 +2617,9 @@ def acc_ops_masked_fill_tensor(
     mask_t = kwargs["mask"]
     value_t = kwargs["value"]
     if network.has_implicit_batch_dimension:
-        raise RuntimeError("We don't support masked_fill with implicit batch dimension due to select layer!")
+        raise RuntimeError(
+            "We don't support masked_fill with implicit batch dimension due to select layer!"
+        )
 
     shape = list(input_t.shape)
     mask_shape = list(mask_t.shape)
@@ -2443,7 +2676,10 @@ def acc_ops_split(
     input_val = kwargs["input"]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"split received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"split received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     dim = cast(int, kwargs["dim"])
     dynamic_shape = has_dynamic_shape(input_val.shape)
@@ -2461,7 +2697,9 @@ def acc_ops_split(
     offset = 0
     num_splits = (input_val.shape[dim] + split_size - 1) // split_size
     if num_splits < 1:
-        raise RuntimeError(f"Invalid split: {input_val.shape[dim]} with split_size={split_size}")
+        raise RuntimeError(
+            f"Invalid split: {input_val.shape[dim]} with split_size={split_size}"
+        )
 
     max_offset = input_val.shape[dim]
     # add slice layers
@@ -2471,8 +2709,12 @@ def acc_ops_split(
         shape[dim] = min(split_size, cast(int, max_offset - offset))
         start[dim] = offset
         if dynamic_shape:
-            shape = get_shape_with_dynamic_shape(network, shape, input_val, target, f"{name}_shape_{i}")
-        layer = network.add_slice(input_val, start=start, shape=[] if dynamic_shape else shape, stride=stride)
+            shape = get_shape_with_dynamic_shape(
+                network, shape, input_val, target, f"{name}_shape_{i}"
+            )
+        layer = network.add_slice(
+            input_val, start=start, shape=[] if dynamic_shape else shape, stride=stride
+        )
         if dynamic_shape:
             layer.set_input(2, shape)
         offset += split_size
@@ -2492,11 +2734,15 @@ def acc_ops_linear(
     input_val = kwargs["input"]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"Linear received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"Linear received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     dynamic_dims = get_dynamic_dims(input_val.shape)
     assert len(dynamic_dims) < 2 and input_val.shape[-1] != -1, (
-        "Currently we only support one dynmaic " "dim for linear and it can't be the last dim."
+        "Currently we only support one dynmaic "
+        "dim for linear and it can't be the last dim."
     )
 
     if isinstance(kwargs["weight"], torch.Tensor):
@@ -2516,7 +2762,9 @@ def acc_ops_linear(
     else:
         input_op = trt.MatrixOperation.NONE
 
-    input_val, weight = broadcast(network, input_val, weight, f"{name}_input", f"{name}_weight", preset_diff)
+    input_val, weight = broadcast(
+        network, input_val, weight, f"{name}_input", f"{name}_weight", preset_diff
+    )
     matmul_layer = network.add_matrix_multiply(input_val, input_op, weight, weight_op)
     set_layer_name(matmul_layer, target, f"{name}_matmul")
     res = matmul_layer.get_output(0)
@@ -2536,7 +2784,12 @@ def acc_ops_linear(
 
 def add_clamp(network, input, val, op):
     acc_ops_clamp_shape = (1,) * len(input.shape)  # broadcast all dimensions
-    acc_ops_clamp_tensor = val * torch.ones(acc_ops_clamp_shape, dtype=torch_dtype_from_trt(input.dtype)).cpu().numpy()
+    acc_ops_clamp_tensor = (
+        val
+        * torch.ones(acc_ops_clamp_shape, dtype=torch_dtype_from_trt(input.dtype))
+        .cpu()
+        .numpy()
+    )
     acc_ops_clamp_trt = network.add_constant(acc_ops_clamp_shape, acc_ops_clamp_tensor)
     layer = network.add_elementwise(input, acc_ops_clamp_trt.get_output(0), op)
 
@@ -2556,14 +2809,21 @@ def acc_ops_clamp(
     max_val = kwargs["max"]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"Clamp received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"Clamp received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     if min_val is not None:
-        clamp_min_layer = add_clamp(network, input_val, min_val, trt.ElementWiseOperation.MAX)
+        clamp_min_layer = add_clamp(
+            network, input_val, min_val, trt.ElementWiseOperation.MAX
+        )
         set_layer_name(clamp_min_layer, target, f"{name}_clamp_min")
         input_val = clamp_min_layer.get_output(0)
     if max_val is not None:
-        clamp_max_layer = add_clamp(network, input_val, max_val, trt.ElementWiseOperation.MIN)
+        clamp_max_layer = add_clamp(
+            network, input_val, max_val, trt.ElementWiseOperation.MIN
+        )
         set_layer_name(clamp_max_layer, target, f"{name}_clamp_max")
         input_val = clamp_max_layer.get_output(0)
 
@@ -2625,9 +2885,15 @@ def slice_to_trt_params(py_slice, dim_size):
         """
         Convert python slice to TensorRT slice layer parameters.
         """
-        start = get_positive_dim(py_slice.start, dim_size) if py_slice.start != None else 0
+        start = (
+            get_positive_dim(py_slice.start, dim_size) if py_slice.start != None else 0
+        )
         stride = py_slice.step if py_slice.step != None else 1
-        stop = get_positive_dim(py_slice.stop, dim_size) if py_slice.stop != None else dim_size
+        stop = (
+            get_positive_dim(py_slice.stop, dim_size)
+            if py_slice.stop != None
+            else dim_size
+        )
         size = math.ceil((stop - start) * 1.0 / stride)
         return start, size, stride
 
@@ -2636,7 +2902,9 @@ def slice_to_trt_params(py_slice, dim_size):
         # slice(None, None, None).
         batch_subscript = slices[0]
         if batch_subscript not in [slice(None, None, None), slice(0, None, None)]:
-            raise RuntimeError(f"{name}: Can't subscript batch dimension when it's implicit. Got {slices}")
+            raise RuntimeError(
+                f"{name}: Can't subscript batch dimension when it's implicit. Got {slices}"
+            )
 
         # Remove batch_dim subscript
         slices = slices[1:]
@@ -2729,7 +2997,9 @@ def acc_ops_cat(
     dim = kwargs["dim"]
 
     if any(not isinstance(t, TRTTensor) for t in tensors):  # type: ignore[union-attr]
-        raise RuntimeError(f"cat received inputs {tensors} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"cat received inputs {tensors} that is not part " "of the TensorRT region!"
+        )
     layer = network.add_concatenation(inputs=tensors)
     if dim < 0:
         if network.has_implicit_batch_dimension:
@@ -2755,7 +3025,9 @@ def acc_ops_matmul(
 
     for i in [input_val, other_val]:
         if not isinstance(i, TRTTensor):
-            raise RuntimeError(f"matmul received input {i} that is not part of the TensorRT region!")
+            raise RuntimeError(
+                f"matmul received input {i} that is not part of the TensorRT region!"
+            )
 
     input_matrix_op = other_matrix_op = trt.MatrixOperation.NONE
     preset_diff = 0
@@ -2768,8 +3040,12 @@ def acc_ops_matmul(
         preset_diff += 1
         other_matrix_op = trt.MatrixOperation.VECTOR
 
-    input_val, other_val = broadcast(network, input_val, other_val, f"{name}_input", f"{name}_other", preset_diff)
-    layer = network.add_matrix_multiply(input_val, input_matrix_op, other_val, other_matrix_op)
+    input_val, other_val = broadcast(
+        network, input_val, other_val, f"{name}_input", f"{name}_other", preset_diff
+    )
+    layer = network.add_matrix_multiply(
+        input_val, input_matrix_op, other_val, other_matrix_op
+    )
     set_layer_name(layer, target, name)
     return layer.get_output(0)
 
@@ -2785,7 +3061,10 @@ def acc_ops_hard_sigmoid(
     input_val = kwargs["input"]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"Hard sigmoid received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"Hard sigmoid received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     return add_activation_layer(
         network,
@@ -2809,9 +3088,14 @@ def acc_ops_sigmoid(
     input_val = kwargs["input"]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"Sigmoid received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"Sigmoid received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
-    return add_activation_layer(network, input_val, trt.ActivationType.SIGMOID, target, name)
+    return add_activation_layer(
+        network, input_val, trt.ActivationType.SIGMOID, target, name
+    )
 
 
 @tensorrt_converter(acc_ops.permute)
@@ -2831,7 +3115,10 @@ def acc_ops_permute(
     permutation = [get_positive_dim(i, ranks) for i in cast(Sequence[int], index)]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"permute received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"permute received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     if network.has_implicit_batch_dimension:
         assert permutation[0] == 0, "Can't permute batch dimension when it's implicit."
@@ -2854,7 +3141,10 @@ def acc_ops_quantize_per_tensor(
     input_val = get_trt_tensor(network, kwargs["input"], f"{name}_input")
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"{name} received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"{name} received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     qparams = kwargs["acc_out_ty"].qparams  # type: ignore[misc]
     q_scale = qparams["scale"]
@@ -2869,7 +3159,9 @@ def acc_ops_quantize_per_tensor(
     if q_zero_point != 0:
         raise RuntimeError(f"Only support zero_point == 0, get {q_zero_point}")
 
-    scale_layer = network.add_constant((1,), trt.Weights(np.ascontiguousarray([float(q_scale)], dtype=np.float32)))
+    scale_layer = network.add_constant(
+        (1,), trt.Weights(np.ascontiguousarray([float(q_scale)], dtype=np.float32))
+    )
     scale_layer.name = input_val.name + ".per_tensor_quant.scale"
     scale = scale_layer.get_output(0)
     # assert trt.__version__ > "8.0", "Explicit quantize op is only supported in "
@@ -2891,7 +3183,10 @@ def acc_ops_quantize_per_channel(
     input_val = get_trt_tensor(network, kwargs["input"], f"{name}_input")
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"{name} received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"{name} received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     qparams = kwargs["acc_out_ty"].qparams  # type: ignore[misc]
     q_per_channel_scales = qparams["scale"]
@@ -2908,9 +3203,13 @@ def acc_ops_quantize_per_channel(
     # is supported in TensorRT
     if not torch.equal(
         q_per_channel_zero_points,
-        torch.zeros(q_per_channel_zero_points.shape, dtype=q_per_channel_zero_points.dtype),
+        torch.zeros(
+            q_per_channel_zero_points.shape, dtype=q_per_channel_zero_points.dtype
+        ),
     ):
-        raise RuntimeError(f"Only support zero_point == 0, get {q_per_channel_zero_points}")
+        raise RuntimeError(
+            f"Only support zero_point == 0, get {q_per_channel_zero_points}"
+        )
 
     if not torch.all(torch.ge(q_per_channel_scales, 0)):
         raise RuntimeError(f"All scale values must be >= 0, get {q_per_channel_scales}")
@@ -2941,7 +3240,10 @@ def acc_ops_dequantize(
     input_val_tensor_meta = kwargs["_itensor_to_tensor_meta"][input_val]  # type: ignore[index]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"{name} received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"{name} received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     qparams = input_val_tensor_meta.qparams  # type: ignore[misc]
     qscheme = qparams["qscheme"]
@@ -2956,7 +3258,9 @@ def acc_ops_dequantize(
         q_scale = qparams["scale"]
         q_zero_point = qparams["zero_point"]
         q_axis = qparams["axis"]
-        assert isinstance(q_scale, immutable_list), "expected q_scale to be immutable_list got {}".format(type(q_scale))
+        assert isinstance(
+            q_scale, immutable_list
+        ), "expected q_scale to be immutable_list got {}".format(type(q_scale))
         scale_shape = (len(q_scale),)
         if any(x != 0 for x in q_zero_point):
             raise RuntimeError(f"Only support zero_point == 0, get {q_zero_point}")
@@ -2967,10 +3271,13 @@ def acc_ops_dequantize(
 
     if dtype not in (torch.quint8, torch.qint8, torch.qint32):
         raise RuntimeError(
-            "Only support (torch.quint8, torch.qint8, torch.qint32) " f"quantized type in dequantize, get {dtype}."
+            "Only support (torch.quint8, torch.qint8, torch.qint32) "
+            f"quantized type in dequantize, get {dtype}."
         )
 
-    scale_layer = network.add_constant(scale_shape, trt.Weights(np.ascontiguousarray(q_scale, dtype=np.float32)))
+    scale_layer = network.add_constant(
+        scale_shape, trt.Weights(np.ascontiguousarray(q_scale, dtype=np.float32))
+    )
     scale_layer.name = input_val.name + ".dequant.scale"
     scale = scale_layer.get_output(0)
     # assert trt.__version__ > "8.0", "Explicit dequantize op is only supported in "
@@ -2991,13 +3298,20 @@ def acc_ops_gelu(
 ) -> Union[TRTTensor, Sequence[TRTTensor]]:
     input_val = kwargs["input"]
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"GELU received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"GELU received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
     if network.has_implicit_batch_dimension:
-        raise RuntimeError("GeLU converter currently doesn't support implicit batch dimension")
+        raise RuntimeError(
+            "GeLU converter currently doesn't support implicit batch dimension"
+        )
 
     plugin_name = "CustomGeluPluginDynamic"
     # type_id 0 for float32, 1 for  float16
-    type_id = trt.PluginField("type_id", np.array(0, dtype=np.int32), trt.PluginFieldType.INT32)
+    type_id = trt.PluginField(
+        "type_id", np.array(0, dtype=np.int32), trt.PluginFieldType.INT32
+    )
     field_collection = TRTPluginFieldCollection([type_id])
     plugin_version = "1"
 
@@ -3022,7 +3336,10 @@ def acc_ops_chunk(
     input_dim_size = len(input_val.shape)  # type: ignore[union-attr]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"chunk received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"chunk received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     dynamic_shape = has_dynamic_shape(input_val.shape)
     if network.has_implicit_batch_dimension:
@@ -3056,9 +3373,13 @@ def acc_ops_chunk(
         shape = list(input_val.shape)
         shape[dim] = min(split_size, max_offset - offset)
         if dynamic_shape:
-            shape = get_shape_with_dynamic_shape(network, shape, input_val, target, f"{name}_{i}")
+            shape = get_shape_with_dynamic_shape(
+                network, shape, input_val, target, f"{name}_{i}"
+            )
         start[dim] = offset
-        layer = network.add_slice(input_val, start=start, shape=[] if dynamic_shape else shape, stride=stride)
+        layer = network.add_slice(
+            input_val, start=start, shape=[] if dynamic_shape else shape, stride=stride
+        )
         if dynamic_shape:
             layer.set_input(2, shape)
         offset += split_size
@@ -3081,9 +3402,14 @@ def acc_ops_cumsum(
     input_dim_size = len(input_val.shape)  # type: ignore[union-attr]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"cumsum received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"cumsum received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
     if network.has_implicit_batch_dimension:
-        raise RuntimeError("cumsum converter currently doesn't support implicit batch dimension")
+        raise RuntimeError(
+            "cumsum converter currently doesn't support implicit batch dimension"
+        )
     dim = get_positive_dim(dim, input_dim_size)
     loop = network.add_loop()
     trip_limit = None
@@ -3103,7 +3429,9 @@ def acc_ops_cumsum(
     data = iterator.get_output(0)
     new_dims = tuple(data.shape)
     zero_tensor = torch.zeros(new_dims, dtype=trt_dtype_to_torch_dtype(input_val.dtype))
-    zero_tensor = network.add_constant(zero_tensor.shape, to_numpy(zero_tensor)).get_output(0)
+    zero_tensor = network.add_constant(
+        zero_tensor.shape, to_numpy(zero_tensor)
+    ).get_output(0)
 
     running_sum = loop.add_recurrence(zero_tensor)
     set_layer_name(running_sum, target, f"{name}_running_sum_1")
@@ -3150,7 +3478,10 @@ def acc_ops_hardtanh(
     input_val = kwargs["input"]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"hardtanh received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"hardtanh received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     return add_activation_layer(
         network,
@@ -3178,15 +3509,22 @@ def acc_ops_interpolate(
     align_corners = kwargs["align_corners"]
 
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"interpolate received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"interpolate received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     dim = input_val.shape
     ranks = len(input_val.shape)
     if network.has_implicit_batch_dimension:
-        assert ranks >= 2 and ranks <= 4, "Interpolate expects inputs are 3D,4D,5D in shape"
+        assert (
+            ranks >= 2 and ranks <= 4
+        ), "Interpolate expects inputs are 3D,4D,5D in shape"
         ranks = ranks - 1
     else:
-        assert ranks >= 3 and ranks <= 5, "Interpolate expects inputs are 3D,4D,5D in shape"
+        assert (
+            ranks >= 3 and ranks <= 5
+        ), "Interpolate expects inputs are 3D,4D,5D in shape"
         ranks = ranks - 2
 
     layer = network.add_resize(input_val)
@@ -3219,7 +3557,9 @@ def acc_ops_interpolate(
         layer.resize_mode = trt.ResizeMode.NEAREST
 
     if align_corners != None:
-        layer.coordinate_transformation = trt.ResizeCoordinateTransformation.ALIGN_CORNERS
+        layer.coordinate_transformation = (
+            trt.ResizeCoordinateTransformation.ALIGN_CORNERS
+        )
 
     set_layer_name(layer, target, name)
     return layer.get_output(0)
@@ -3241,7 +3581,9 @@ def acc_ops_new_ones(
         dtype_val = torch_dtype_from_trt(dtype_val)
 
     device_val = kwargs.get("device")
-    assert device_val == "cuda" or device_val == None, f"device is not `cuda` but {device_val}"
+    assert (
+        device_val == "cuda" or device_val == None
+    ), f"device is not `cuda` but {device_val}"
 
     weight = torch.ones(size_val, dtype=dtype_val)
     return get_trt_tensor(network, weight, f"{name}_weight")
@@ -3263,7 +3605,9 @@ def acc_ops_new_empty(
         dtype_val = torch_dtype_from_trt(dtype_val)
 
     device_val = kwargs.get("device")
-    assert device_val == "cuda" or device_val == None, f"device is not `cuda` but {device_val}"
+    assert (
+        device_val == "cuda" or device_val == None
+    ), f"device is not `cuda` but {device_val}"
 
     weight = torch.zeros(size_val, dtype=dtype_val)
     return get_trt_tensor(network, weight, f"{name}_weight")
@@ -3292,7 +3636,9 @@ def acc_ops_einsum(
     if const_flag:
         for i, input_source in enumerate(input_val):
             if input_source.dtype != trt.float32:
-                input_val[i] = type_cast(network, target, f"{name}_input_cast{i}", input_source, trt.float32)
+                input_val[i] = type_cast(
+                    network, target, f"{name}_input_cast{i}", input_source, trt.float32
+                )
     einsum_layer = network.add_einsum(inputs=input_val, equation=equation)
     return einsum_layer.get_output(0)
 
diff --git a/py/torch_tensorrt/fx/input_tensor_spec.py b/py/torch_tensorrt/fx/input_tensor_spec.py
index 2cab729b88..2fd49b9e5d 100644
--- a/py/torch_tensorrt/fx/input_tensor_spec.py
+++ b/py/torch_tensorrt/fx/input_tensor_spec.py
@@ -8,7 +8,10 @@
 
 def generate_input_specs(inputs, lower_setting, additional_inputs=None):
     # dynamic_batch is TRT only flag.
-    if not lower_setting.explicit_batch_dimension or lower_setting.dynamic_batch is False:
+    if (
+        not lower_setting.explicit_batch_dimension
+        or lower_setting.dynamic_batch is False
+    ):
         return InputTensorSpec.from_tensors(inputs)
 
     # If we don't have additional inputs, we assume the first dimension
@@ -32,12 +35,16 @@ def generate_input_specs(inputs, lower_setting, additional_inputs=None):
 
             for idx, values in enumerate(zip(i.shape, j.shape)):
                 if values[0] != values[1]:
-                    assert found_batch_dim is False, f"We've already found a batch dim, {i.shape}, {j.shape}."
+                    assert (
+                        found_batch_dim is False
+                    ), f"We've already found a batch dim, {i.shape}, {j.shape}."
                     batch_dims.append(idx)
                     found_batch_dim = True
 
             if not found_batch_dim:
-                raise RuntimeError(f"Failed to find batch dimension because shapes are the same, {i.shape}")
+                raise RuntimeError(
+                    f"Failed to find batch dimension because shapes are the same, {i.shape}"
+                )
 
         return InputTensorSpec.from_tensors_with_dynamic_batch_size(
             inputs,
@@ -152,10 +159,10 @@ def from_tensors_with_dynamic_batch_size(
             ), f"The {i}th tensor (shape: {tensor.shape}) doesn't have the correct batch size: {batch_size}."
             shape = list(tensor.shape)
             shape[batch_dim] = -1
-            shape_ranges: List[ShapeRange] = [
-                tuple(tuple(shape[0:batch_dim] + [bs] + shape[batch_dim + 1 :]) for bs in batch_size_range)
-            ] * opt_profile_replica  # type: ignore[list-item]
-            input_specs.append(cls(tuple(shape), tensor.dtype, tensor.device, shape_ranges))
+            shape_ranges: List[ShapeRange] = [tuple(tuple(shape[0:batch_dim] + [bs] + shape[batch_dim + 1 :]) for bs in batch_size_range)] * opt_profile_replica  # type: ignore[list-item]
+            input_specs.append(
+                cls(tuple(shape), tensor.dtype, tensor.device, shape_ranges)
+            )
 
         return input_specs
 
diff --git a/py/torch_tensorrt/fx/lower.py b/py/torch_tensorrt/fx/lower.py
index af91ad0037..387b4db841 100644
--- a/py/torch_tensorrt/fx/lower.py
+++ b/py/torch_tensorrt/fx/lower.py
@@ -79,7 +79,9 @@ class LowerTrtInterpreter:
 
     @classmethod
     def create(cls, lower_setting):
-        timing_cache_manager = TimingCacheManager(lower_setting.timing_cache_prefix, lower_setting.save_timing_cache)
+        timing_cache_manager = TimingCacheManager(
+            lower_setting.timing_cache_prefix, lower_setting.save_timing_cache
+        )
         return LowerTrtInterpreter(lower_setting, timing_cache_manager)
 
     def __call__(self, mod, input, split_name) -> TRTInterpreterResult:
@@ -103,7 +105,9 @@ def __call__(self, mod, input, split_name) -> TRTInterpreterResult:
             input_specs=self.lower_setting.input_specs,
             explicit_batch_dimension=self.lower_setting.explicit_batch_dimension,
             explicit_precision=self.lower_setting.explicit_precision,
-            logger_level=trt.Logger.VERBOSE if self.lower_setting.verbose_log else trt.Logger.WARNING,
+            logger_level=trt.Logger.VERBOSE
+            if self.lower_setting.verbose_log
+            else trt.Logger.WARNING,
         )
 
         interp_result: TRTInterpreterResult = interpreter.run(
@@ -127,7 +131,9 @@ def __call__(self, mod, input, split_name) -> TRTInterpreterResult:
         return interp_result
 
 
-def default_split_function(model: fx.GraphModule, inputs: Input, lower_setting: LowerSetting) -> SplitResult:
+def default_split_function(
+    model: fx.GraphModule, inputs: Input, lower_setting: LowerSetting
+) -> SplitResult:
     splitter_setting = TRTSplitterSetting()
     splitter_setting.use_implicit_batch_dim = not lower_setting.explicit_batch_dimension
     splitter_setting.min_acc_module_size = lower_setting.min_acc_module_size
@@ -143,7 +149,9 @@ def create_lower_trt_interpreter(lower_setting: LowerSetting) -> LowerTrtInterpr
 def default_lower_pass(
     create_trt_interpreter: Callable[[LowerSetting], LowerTrtInterpreter],
 ) -> PassFunc:
-    def lower_pass(mod: nn.Module, input: Input, lower_setting: LowerSetting, module_name: str) -> nn.Module:
+    def lower_pass(
+        mod: nn.Module, input: Input, lower_setting: LowerSetting, module_name: str
+    ) -> nn.Module:
         """
         Create a module transformation pass which lowers an `fx.GraphModule` into a
         `TRTModule`
@@ -217,10 +225,18 @@ def __call__(
     ) -> nn.Module:
         module.eval()
 
-        if self.lower_pass_manager_builder.lower_setting.lower_precision == LowerPrecision.FP16:
+        if (
+            self.lower_pass_manager_builder.lower_setting.lower_precision
+            == LowerPrecision.FP16
+        ):
             module.half()
-            inputs = tuple(x.half() if x is not None and x.dtype == torch.float32 else x for x in inputs)
-        pm = self.lower_pass_manager_builder.build_trt_lower_pipeline(inputs, additional_inputs)
+            inputs = tuple(
+                x.half() if x is not None and x.dtype == torch.float32 else x
+                for x in inputs
+            )
+        pm = self.lower_pass_manager_builder.build_trt_lower_pipeline(
+            inputs, additional_inputs
+        )
 
         lower_result = pm(module)
 
diff --git a/py/torch_tensorrt/fx/passes/lower_pass_manager_builder.py b/py/torch_tensorrt/fx/passes/lower_pass_manager_builder.py
index bb9b6c03fe..047ceb3ad2 100644
--- a/py/torch_tensorrt/fx/passes/lower_pass_manager_builder.py
+++ b/py/torch_tensorrt/fx/passes/lower_pass_manager_builder.py
@@ -18,10 +18,13 @@
 
 from .lower_basic_pass import run_const_fold
 
+
 _LOGGER: logging.Logger = logging.getLogger(__name__)
 
+
 Input = Sequence[Any]
 
+
 # ----------------------------------------------------------------------
 # OBSERVERS
 # ----------------------------------------------------------------------
@@ -34,13 +37,19 @@
 # >>>     lower(module, sample_input)
 
 # Observer for the model after the fuse passes.
-FUSE_PASSES_POST_OBSERVER: Observer[Callable[[nn.Module, Input], None]] = Observer("FUSE_PASSES_POST_OBSERVER")
+FUSE_PASSES_POST_OBSERVER: Observer[Callable[[nn.Module, Input], None]] = Observer(
+    "FUSE_PASSES_POST_OBSERVER"
+)
 
 # Observer for the TRT split submodules before lowering
-LOWER_SPLIT_PRE_OBSERVER: Observer[Callable[[str, nn.Module, Input], None]] = Observer("LOWER_SPLIT_PRE_OBSERVER")
+LOWER_SPLIT_PRE_OBSERVER: Observer[Callable[[str, nn.Module, Input], None]] = Observer(
+    "LOWER_SPLIT_PRE_OBSERVER"
+)
 
 # Observer for the TRT split submodules after lowering
-LOWER_SPLIT_POST_OBSERVER: Observer[Callable[[str, nn.Module, Input], None]] = Observer("LOWER_SPLIT_POST_OBSERVER")
+LOWER_SPLIT_POST_OBSERVER: Observer[Callable[[str, nn.Module, Input], None]] = Observer(
+    "LOWER_SPLIT_POST_OBSERVER"
+)
 # ----------------------------------------------------------------------
 
 
@@ -96,12 +105,18 @@ def graph_optimization_pass(self) -> PassManager:
             passes.append(wrapper(p, self._input))
 
         passes.append(inplace_wrapper(common_subexpression_elimination))
-        passes.append(inplace_wrapper(lambda m: FUSE_PASSES_POST_OBSERVER.observe(m, self._input)))
+        passes.append(
+            inplace_wrapper(lambda m: FUSE_PASSES_POST_OBSERVER.observe(m, self._input))
+        )
 
         return PassManager.build_from_passlist(passes)
 
     def _split_pass(self) -> PassManager:
-        passes = [partial(self._split_func, inputs=self._input, lower_setting=self.lower_setting)]
+        passes = [
+            partial(
+                self._split_func, inputs=self._input, lower_setting=self.lower_setting
+            )
+        ]
         passes.append(
             inplace_wrapper(
                 lambda split_result: remove_duplicate_output_args(
@@ -139,11 +154,17 @@ def lower_func(split_result: SplitResult) -> nn.Module:
                     self.lower_setting.input_specs = generate_input_specs(
                         submod_inputs,
                         self.lower_setting,
-                        additional_submodule_inputs[submod_name] if additional_submodule_inputs else None,
+                        additional_submodule_inputs[submod_name]
+                        if additional_submodule_inputs
+                        else None,
+                    )
+                    lowered_module = self._lower_func(
+                        submod, submod_inputs, self.lower_setting, submod_name
                     )
-                    lowered_module = self._lower_func(submod, submod_inputs, self.lower_setting, submod_name)
                     setattr(split_result.split_module, submod_name, lowered_module)
-                    LOWER_SPLIT_POST_OBSERVER.observe(submod_name, lowered_module, submod_inputs)
+                    LOWER_SPLIT_POST_OBSERVER.observe(
+                        submod_name, lowered_module, submod_inputs
+                    )
                     _LOGGER.info(
                         f"Lowering submodule {submod_name} elapsed time {datetime.datetime.now() - lowering_start_time}"
                     )
@@ -165,9 +186,13 @@ def lower_func(split_result: SplitResult) -> nn.Module:
                     _LOGGER.info(f"Now lowering submodule {submod_name}")
                     lowering_start_time = datetime.datetime.now()
 
-                    lowered_module = self._lower_func(submod, submod_inputs, self.lower_setting, submod_name)
+                    lowered_module = self._lower_func(
+                        submod, submod_inputs, self.lower_setting, submod_name
+                    )
                     setattr(split_result.split_module, submod_name, lowered_module)
-                    LOWER_SPLIT_POST_OBSERVER.observe(submod_name, lowered_module, submod_inputs)
+                    LOWER_SPLIT_POST_OBSERVER.observe(
+                        submod_name, lowered_module, submod_inputs
+                    )
                     _LOGGER.info(
                         f"Lowering submodule {submod_name} elapsed time {datetime.datetime.now() - lowering_start_time}"
                     )
@@ -176,7 +201,9 @@ def lower_func(split_result: SplitResult) -> nn.Module:
 
         return PassManager.build_from_passlist([lower_func])
 
-    def build_trt_lower_pipeline(self, input: Input, additional_input: Optional[Input] = None) -> PassManager:
+    def build_trt_lower_pipeline(
+        self, input: Input, additional_input: Optional[Input] = None
+    ) -> PassManager:
         self._input = input
         self._additional_input = additional_input
         passes = []
@@ -189,7 +216,9 @@ def build_trt_lower_pipeline(self, input: Input, additional_input: Optional[Inpu
         pm = PassManager.build_from_passlist(passes)
         return pm
 
-    def build_default_lower_pipeline(self, input: Input, additional_input: Optional[Input] = None) -> PassManager:
+    def build_default_lower_pipeline(
+        self, input: Input, additional_input: Optional[Input] = None
+    ) -> PassManager:
         self._input = input
         self._additional_input = additional_input
         passes = []
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_as_strided.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_as_strided.py
index be8ecaf92e..003c8bd3e0 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_as_strided.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_as_strided.py
@@ -34,6 +34,7 @@ def forward(self, x):
 
     # Testing with shape (-1, 3) results into error:
     # RuntimeError: setStorage: sizes [2, 3], strides [1, 2], storage offset 0, and itemsize 8 requiring a storage size of 48 are out of bounds for storage of size 16
+
     """
     def test_as_strided_with_dynamic_shape_four_dimensions(self):
         class Stride(nn.Module):
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_avgpool.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_avgpool.py
index d96ed9e0b6..d5bf56e678 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_avgpool.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_avgpool.py
@@ -29,7 +29,9 @@ def test_avg_pool1d(
         class TestModule(torch.nn.Module):
             def __init__(self):
                 super().__init__()
-                self.avg_pool = torch.nn.AvgPool1d(kernel_size, stride, padding, ceil_mode, count_include_pad)
+                self.avg_pool = torch.nn.AvgPool1d(
+                    kernel_size, stride, padding, ceil_mode, count_include_pad
+                )
 
             def forward(self, x):
                 return self.avg_pool(x)
@@ -60,7 +62,9 @@ def test_avg_pool1d_with_dynamic_shape(
         class TestModule(torch.nn.Module):
             def __init__(self):
                 super().__init__()
-                self.avg_pool = torch.nn.AvgPool1d(kernel_size, stride, padding, ceil_mode, count_include_pad)
+                self.avg_pool = torch.nn.AvgPool1d(
+                    kernel_size, stride, padding, ceil_mode, count_include_pad
+                )
 
             def forward(self, x):
                 return self.avg_pool(x)
@@ -73,7 +77,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.avg_pool1d})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.avg_pool1d}
+        )
 
     def test_avg_pool2d_with_dynamic_shape_four_dimensions(
         self,
@@ -108,7 +114,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.avg_pool2d})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.avg_pool2d}
+        )
 
     @parameterized.expand(
         [
@@ -248,7 +256,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.avg_pool2d})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.avg_pool2d}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_batchnorm.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_batchnorm.py
index 551520dcf8..24f26d5480 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_batchnorm.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_batchnorm.py
@@ -34,7 +34,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.batch_norm})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.batch_norm}
+        )
 
     def test_batchnorm_with_dynamic_shape(self):
         class TestModule(torch.nn.Module):
@@ -53,7 +55,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.batch_norm})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.batch_norm}
+        )
 
     # Testing with shape=(-1, -1, -1, -1) results in AssertionError: Channel dim can't be dynamic for batch norm.
 
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_clamp.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_clamp.py
index 70dc86c098..5291331c67 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_clamp.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_clamp.py
@@ -53,7 +53,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.clamp})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.clamp}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_convolution.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_convolution.py
index 4af16b1815..e08484cd56 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_convolution.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_convolution.py
@@ -29,7 +29,9 @@ def test_conv1d(
         class TestModule(torch.nn.Module):
             def __init__(self):
                 super().__init__()
-                self.conv = torch.nn.Conv1d(3, 6, kernel_size, stride, padding, dilation, groups, bias)
+                self.conv = torch.nn.Conv1d(
+                    3, 6, kernel_size, stride, padding, dilation, groups, bias
+                )
 
             def forward(self, x):
                 return self.conv(x)
@@ -60,7 +62,9 @@ def test_conv1d_with_dynamic_shape(
         class TestModule(torch.nn.Module):
             def __init__(self):
                 super().__init__()
-                self.conv = torch.nn.Conv1d(3, 6, kernel_size, stride, padding, dilation, groups, bias)
+                self.conv = torch.nn.Conv1d(
+                    3, 6, kernel_size, stride, padding, dilation, groups, bias
+                )
 
             def forward(self, x):
                 return self.conv(x)
@@ -73,7 +77,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.conv1d})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.conv1d}
+        )
 
     @parameterized.expand(
         [
@@ -98,7 +104,9 @@ def test_conv2d(
         class TestModule(torch.nn.Module):
             def __init__(self):
                 super().__init__()
-                self.conv = torch.nn.Conv2d(3, 6, kernel_size, stride, padding, dilation, groups, bias)
+                self.conv = torch.nn.Conv2d(
+                    3, 6, kernel_size, stride, padding, dilation, groups, bias
+                )
 
             def forward(self, x):
                 return self.conv(x)
@@ -125,7 +133,9 @@ def forward(self, x):
                 shape_ranges=[((1, 3, 1, 1), (1, 3, 4, 4), (32, 3, 128, 128))],
             ),
         ]
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.conv2d})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.conv2d}
+        )
 
     @parameterized.expand(
         [
@@ -134,8 +144,7 @@ def forward(self, x):
             ("tuple_parameters", 1, (1, 1, 1), (1, 1, 1)),
             param("non_zero_padding", 1, padding=1),
             param("dilation", 1, dilation=2),
-            # TODO TRT 8.4.1 will trigger issue with this test. T127981773
-            # param("groups", 1, groups=3),
+            param("groups", 1, groups=3),
         ]
     )
     def test_conv3d(
@@ -151,7 +160,9 @@ def test_conv3d(
         class TestModule(torch.nn.Module):
             def __init__(self):
                 super().__init__()
-                self.conv = torch.nn.Conv3d(3, 6, kernel_size, stride, padding, dilation, groups, bias)
+                self.conv = torch.nn.Conv3d(
+                    3, 6, kernel_size, stride, padding, dilation, groups, bias
+                )
 
             def forward(self, x):
                 return self.conv(x)
@@ -178,7 +189,9 @@ def forward(self, x):
                 shape_ranges=[((1, 3, 1, 1, 1), (1, 3, 4, 4, 4), (8, 3, 32, 32, 32))],
             ),
         ]
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.conv3d})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.conv3d}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_gelu.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_gelu.py
index f380e03032..c33088a498 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_gelu.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_gelu.py
@@ -7,7 +7,9 @@
 from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec
 
 
-@unittest.skip(reason="Could not find CustomGeluPluginDynamic. Enable it once we upgrade TRT to 8.4")
+@unittest.skip(
+    reason="Could not find CustomGeluPluginDynamic. Enable it once we upgrade TRT to 8.4"
+)
 class TestGELU(AccTestCase):
     def test_gelu(self):
         class TestModule(nn.Module):
@@ -34,7 +36,9 @@ def forward(self, x):
                 shape_ranges=[((1, 1, 1), (1, 2, 3), (3, 3, 3))],
             ),
         ]
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.gelu})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.gelu}
+        )
 
     def test_gelu_with_dynamic_shape_four_dimensions(self):
         class TestModule(nn.Module):
@@ -49,7 +53,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.gelu})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.gelu}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_interpolate.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_interpolate.py
index 4d2a27d372..f0054e5cb7 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_interpolate.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_interpolate.py
@@ -133,7 +133,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(Interpolate(), input_specs, expected_ops={acc_ops.interpolate})
+        self.run_test_with_dynamic_shape(
+            Interpolate(), input_specs, expected_ops={acc_ops.interpolate}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_matmul.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_matmul.py
index 051b4f00a2..50e1f5bfcd 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_matmul.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_matmul.py
@@ -73,7 +73,11 @@ def forward(self, input, other):
                 return torch.matmul(input, other)
 
         inputs = [torch.randn(*input_shape), torch.randn(*other_shape)]
-        test_implicit_batch_dim = input_shape[0] == other_shape[0] and len(input_shape) > 2 and len(other_shape) > 2
+        test_implicit_batch_dim = (
+            input_shape[0] == other_shape[0]
+            and len(input_shape) > 2
+            and len(other_shape) > 2
+        )
         self.run_test(
             MatMul(),
             inputs,
@@ -104,7 +108,9 @@ def forward(self, input, other):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(Matmul(), input_specs, expected_ops={acc_ops.matmul})
+        self.run_test_with_dynamic_shape(
+            Matmul(), input_specs, expected_ops={acc_ops.matmul}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_max.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_max.py
index fd92dfe956..1da3dd07fa 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_max.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_max.py
@@ -104,7 +104,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(MaxDimReduce(), input_specs, expected_ops={acc_ops.max_dim_reduce})
+        self.run_test_with_dynamic_shape(
+            MaxDimReduce(), input_specs, expected_ops={acc_ops.max_dim_reduce}
+        )
 
     def test_max_full_reduce(
         self,
@@ -124,7 +126,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(MaxFullReduce(), input_specs, expected_ops={acc_ops.max_full_reduce})
+        self.run_test_with_dynamic_shape(
+            MaxFullReduce(), input_specs, expected_ops={acc_ops.max_full_reduce}
+        )
 
     def test_max_method(self):
         class MaxMethod(torch.nn.Module):
@@ -147,7 +151,9 @@ def forward(self, input, other):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(MaxMethod(), input_specs, expected_ops={acc_ops.maximum})
+        self.run_test_with_dynamic_shape(
+            MaxMethod(), input_specs, expected_ops={acc_ops.maximum}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_min.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_min.py
index e720f70386..33b2aa5671 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_min.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_min.py
@@ -103,7 +103,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(MinDimReduce(), input_specs, expected_ops={acc_ops.min_dim_reduce})
+        self.run_test_with_dynamic_shape(
+            MinDimReduce(), input_specs, expected_ops={acc_ops.min_dim_reduce}
+        )
 
     def test_min_full_reduce(
         self,
@@ -123,7 +125,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(MinFullReduce(), input_specs, expected_ops={acc_ops.min_full_reduce})
+        self.run_test_with_dynamic_shape(
+            MinFullReduce(), input_specs, expected_ops={acc_ops.min_full_reduce}
+        )
 
     def test_min_method(self):
         class MinMethod(torch.nn.Module):
@@ -146,7 +150,9 @@ def forward(self, input, other):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(MinMethod(), input_specs, expected_ops={acc_ops.minimum})
+        self.run_test_with_dynamic_shape(
+            MinMethod(), input_specs, expected_ops={acc_ops.minimum}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_narrow.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_narrow.py
index f59d6e9256..9c2a4f34ab 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_narrow.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_narrow.py
@@ -25,7 +25,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(Narrow(), input_specs, expected_ops={acc_ops.slice_tensor})
+        self.run_test_with_dynamic_shape(
+            Narrow(), input_specs, expected_ops={acc_ops.slice_tensor}
+        )
 
 
 class TestNarrowConverter(AccTestCase):
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py
index 78d3eefa9a..c82eee79ee 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py
@@ -56,6 +56,7 @@ def forward(self, x):
     # Testing with (-1, 3, 3, 3) results into following error:
     # test_pad_with_dynamic_shape_four_dimensions_0_2d (deeplearning.trt.torch_tensorrt.py.torch_tensorrt.fx.test.converters.acc_op.test_pad.TestPadConverter) ... [07/15/2022-09:23:18] [TRT] [E] 2: [intInterval.cpp::max::26] Error Code 2: Internal Error (Assertion !empty() failed. )
     # Segmentation fault (core dumped)
+
     """
     def test_pad_with_dynamic_shape_four_dimensions(self):
         class Pad(nn.Module):
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_prod.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_prod.py
index 835f6a5f0e..26e4332fdc 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_prod.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_prod.py
@@ -72,7 +72,9 @@ def forward(self, x):
             test_implicit_batch_dim=(dim != 0),
         )
 
-    @parameterized.expand([(f"{acc_ops.prod.__name__}_no_dim_no_keepdim", torch.prod, acc_ops.prod)])
+    @parameterized.expand(
+        [(f"{acc_ops.prod.__name__}_no_dim_no_keepdim", torch.prod, acc_ops.prod)]
+    )
     def test_prod_all_dims(
         self,
         test_name,
@@ -107,7 +109,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(Prod(), input_specs, expected_ops={acc_ops.prod})
+        self.run_test_with_dynamic_shape(
+            Prod(), input_specs, expected_ops={acc_ops.prod}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_reduce_ops.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_reduce_ops.py
index f47dc0ea7f..879a0e0eb5 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_reduce_ops.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_reduce_ops.py
@@ -52,7 +52,12 @@ def forward(self, x):
             test_implicit_batch_dim=(dim != 0),
         )
 
-    @parameterized.expand([(f"{acc_op.__name__}_no_dim_no_keepdim", op, acc_op) for op, acc_op in reduce_ops])
+    @parameterized.expand(
+        [
+            (f"{acc_op.__name__}_no_dim_no_keepdim", op, acc_op)
+            for op, acc_op in reduce_ops
+        ]
+    )
     def test_reduce_all_dims(
         self,
         test_name,
@@ -71,7 +76,12 @@ def forward(self, x):
             test_implicit_batch_dim=False,
         )
 
-    @parameterized.expand([(f"{acc_op.__name__}_no_dim_no_keepdim", op, acc_op) for op, acc_op in reduce_ops])
+    @parameterized.expand(
+        [
+            (f"{acc_op.__name__}_no_dim_no_keepdim", op, acc_op)
+            for op, acc_op in reduce_ops
+        ]
+    )
     def test_reduce_all_dims_with_dynamic_shape_four_dimensions(
         self,
         test_name,
@@ -89,7 +99,9 @@ def forward(self, x):
                 shape_ranges=[((1, 1, 1, 1), (3, 3, 3, 3), (3, 3, 3, 3))],
             ),
         ]
-        self.run_test_with_dynamic_shape(Reduce(), input_specs, expected_ops={expected_acc_op})
+        self.run_test_with_dynamic_shape(
+            Reduce(), input_specs, expected_ops={expected_acc_op}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_tile.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_tile.py
index 66122e5386..cd8e6f97b5 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_tile.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_tile.py
@@ -28,7 +28,10 @@ def forward(self, x):
             Tile(dims),
             inputs,
             expected_ops={acc_ops.tile},
-            test_implicit_batch_dim=(len(input_shape) > len(dims) or (len(input_shape) == len(dims) and dims[0] == 1)),
+            test_implicit_batch_dim=(
+                len(input_shape) > len(dims)
+                or (len(input_shape) == len(dims) and dims[0] == 1)
+            ),
         )
 
     @parameterized.expand(
@@ -61,7 +64,9 @@ def forward(self, x):
                 ],
             ),
         ]
-        self.run_test_with_dynamic_shape(Tile(dims), input_specs, expected_ops={acc_ops.tile})
+        self.run_test_with_dynamic_shape(
+            Tile(dims), input_specs, expected_ops={acc_ops.tile}
+        )
 
     @parameterized.expand(
         [
@@ -85,7 +90,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(Tile(dims), input_specs, expected_ops={acc_ops.tile})
+        self.run_test_with_dynamic_shape(
+            Tile(dims), input_specs, expected_ops={acc_ops.tile}
+        )
 
     def test_tile_non_int_dims(self):
         class Tile(nn.Module):
@@ -98,7 +105,9 @@ def forward(self, x, y):
 
         inputs = [torch.randn(2, 2, 3), torch.randn(2, 2, 3)]
         batch_size_range = (1, 2, 3)
-        input_specs = InputTensorSpec.from_tensors_with_dynamic_batch_size(inputs, batch_size_range)
+        input_specs = InputTensorSpec.from_tensors_with_dynamic_batch_size(
+            inputs, batch_size_range
+        )
         self.run_test_with_dynamic_shape(
             Tile(),
             input_specs,
@@ -127,7 +136,9 @@ def forward(self, x, y):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(Tile(), input_specs, expected_ops={acc_ops.tile})
+        self.run_test_with_dynamic_shape(
+            Tile(), input_specs, expected_ops={acc_ops.tile}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_to_dtype.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_to_dtype.py
index a14c207856..67a07d83cf 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_to_dtype.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_to_dtype.py
@@ -53,7 +53,9 @@ def forward(self, x):
         inputs = [
             input,
         ]
-        self.run_test(To(), inputs, expected_ops={acc_ops.to_dtype}, test_implicit_batch_dim=False)
+        self.run_test(
+            To(), inputs, expected_ops={acc_ops.to_dtype}, test_implicit_batch_dim=False
+        )
 
     def test_cuda_fp16(self):
         class To(torch.nn.Module):
@@ -106,7 +108,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(To(), input_specs, expected_ops={acc_ops.to_dtype, acc_ops.add})
+        self.run_test_with_dynamic_shape(
+            To(), input_specs, expected_ops={acc_ops.to_dtype, acc_ops.add}
+        )
 
     def test_device(self):
         class To(torch.nn.Module):
@@ -150,7 +154,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(To(), input_specs, expected_ops={acc_ops.to_dtype, acc_ops.add})
+        self.run_test_with_dynamic_shape(
+            To(), input_specs, expected_ops={acc_ops.to_dtype, acc_ops.add}
+        )
 
     def test_device_fp16(self):
         class To(torch.nn.Module):
@@ -240,7 +246,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(To(), input_specs, expected_ops={acc_ops.to_dtype})
+        self.run_test_with_dynamic_shape(
+            To(), input_specs, expected_ops={acc_ops.to_dtype}
+        )
 
     # Half is not suitable for dynamic shape
     # Error: assert engine
@@ -301,7 +309,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(To(), input_specs, expected_ops={acc_ops.to_dtype})
+        self.run_test_with_dynamic_shape(
+            To(), input_specs, expected_ops={acc_ops.to_dtype}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_topk.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_topk.py
index a7cfde2c6e..7ae93bf9bd 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_topk.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_topk.py
@@ -26,7 +26,9 @@ def __init__(self, k, dim):
 
             def forward(self, x):
                 if self.dim is not None:
-                    out = torch.topk(x, k=self.k, dim=self.dim, largest=self.largest, sorted=False)
+                    out = torch.topk(
+                        x, k=self.k, dim=self.dim, largest=self.largest, sorted=False
+                    )
                 else:
                     out = torch.topk(x, k=self.k, largest=self.largest, sorted=False)
                 return out[0], out[1]
@@ -58,7 +60,9 @@ def __init__(self, k, dim):
 
             def forward(self, x):
                 if self.dim is not None:
-                    out = torch.topk(x, k=self.k, dim=self.dim, largest=self.largest, sorted=False)
+                    out = torch.topk(
+                        x, k=self.k, dim=self.dim, largest=self.largest, sorted=False
+                    )
                 else:
                     out = torch.topk(x, k=self.k, largest=self.largest, sorted=False)
                 return out[0], out[1]
@@ -71,7 +75,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TopK(k, dim), input_specs, expected_ops={acc_ops.topk})
+        self.run_test_with_dynamic_shape(
+            TopK(k, dim), input_specs, expected_ops={acc_ops.topk}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_type_as.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_type_as.py
index 0bfffd210f..839ff44566 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_type_as.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_type_as.py
@@ -1,5 +1,4 @@
 import torch
-import unittest
 import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops
 from torch.testing._internal.common_utils import run_tests
 from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec
@@ -104,7 +103,6 @@ def forward(self, input):
             precision=LowerPrecision.FP16,
         )
 
-    @unittest.skip("Does not pass in TRT 8.4.1 T127981773")
     def test_type_tensor_with_dynamic_shape_four_dimensions(self):
         class Type_as(torch.nn.Module):
             def forward(self, input):
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_unary_ops.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_unary_ops.py
index 59c8fdea4f..7fad26dc84 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_unary_ops.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_unary_ops.py
@@ -64,7 +64,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(orig_op), input_specs, expected_ops={expected_op})
+        self.run_test_with_dynamic_shape(
+            TestModule(orig_op), input_specs, expected_ops={expected_op}
+        )
 
 
 class TestUnaryOpNotConverters(AccTestCase):
@@ -87,7 +89,9 @@ def forward(self, x):
 
         m = TestModule(orig_op)
         inputs = [torch.randn(2, 2, 3).to(input_dtype)]
-        self.run_test(m, inputs, expected_ops={expected_op}, test_implicit_batch_dim=False)
+        self.run_test(
+            m, inputs, expected_ops={expected_op}, test_implicit_batch_dim=False
+        )
 
 
 class TestUnaryOpNotConvertersWithDynamicShapeFourDimensions(AccTestCase):
@@ -116,7 +120,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(orig_op), input_specs, expected_ops={expected_op})
+        self.run_test_with_dynamic_shape(
+            TestModule(orig_op), input_specs, expected_ops={expected_op}
+        )
 
 
 class TestUnaryRSQRTConverters(AccTestCase):
@@ -144,7 +150,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.sqrt, acc_ops.reciprocal})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.sqrt, acc_ops.reciprocal}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/core/test_input_tensor_spec.py b/py/torch_tensorrt/fx/test/core/test_input_tensor_spec.py
index aab7832101..db848eaf1c 100644
--- a/py/torch_tensorrt/fx/test/core/test_input_tensor_spec.py
+++ b/py/torch_tensorrt/fx/test/core/test_input_tensor_spec.py
@@ -37,7 +37,9 @@ def test_from_tensors(self):
     def test_from_tensors_with_dynamic_batch_size(self):
         tensors = [torch.randn(1, 2, 3), torch.randn(1, 4)]
         batch_size_range = [2, 3, 4]
-        specs = InputTensorSpec.from_tensors_with_dynamic_batch_size(tensors, batch_size_range)
+        specs = InputTensorSpec.from_tensors_with_dynamic_batch_size(
+            tensors, batch_size_range
+        )
         for spec, tensor in zip(specs, tensors):
             self._validate_spec(spec, tensor, dynamic_dims=[0])
 
@@ -48,7 +50,9 @@ def test_from_tensors_with_dynamic_batch_size(self):
     def test_from_tensors_with_dynamic_batch_size_different_batch_dims(self):
         tensors = [torch.randn(1, 2, 3), torch.randn(2, 1, 4)]
         batch_size_range = [2, 3, 4]
-        specs = InputTensorSpec.from_tensors_with_dynamic_batch_size(tensors, batch_size_range, batch_dims=[0, 1])
+        specs = InputTensorSpec.from_tensors_with_dynamic_batch_size(
+            tensors, batch_size_range, batch_dims=[0, 1]
+        )
         for i, spec_and_tensor in enumerate(zip(specs, tensors)):
             spec, tensor = spec_and_tensor
             self._validate_spec(spec, tensor, dynamic_dims=[i])
@@ -60,7 +64,9 @@ def test_from_tensors_with_dynamic_batch_size_different_batch_dims(self):
                 self.assertSequenceEqual(tensor_shape, shape)
 
     def test_generate_input_specs(self):
-        lower_setting = LowerSetting(explicit_batch_dimension=False, max_batch_size=256, opt_profile_replica=2)
+        lower_setting = LowerSetting(
+            explicit_batch_dimension=False, max_batch_size=256, opt_profile_replica=2
+        )
 
         # Implicit batch dim.
         inputs = [torch.randn(1, 2, 3)]
diff --git a/py/torch_tensorrt/fx/test/passes/test_graph_opts.py b/py/torch_tensorrt/fx/test/passes/test_graph_opts.py
index de9f962851..9db4183e64 100644
--- a/py/torch_tensorrt/fx/test/passes/test_graph_opts.py
+++ b/py/torch_tensorrt/fx/test/passes/test_graph_opts.py
@@ -11,6 +11,9 @@
 _LOGGER: logging.Logger = logging.getLogger(__name__)
 
 
+_LOGGER: logging.Logger = logging.getLogger(__name__)
+
+
 def debug_print_graph_module(mod_graph: torch.fx.GraphModule) -> None:
     """
     Helper func to print model's graph in plain and tabular format, also print code.
diff --git a/py/torch_tensorrt/fx/test/quant/test_quant_trt.py b/py/torch_tensorrt/fx/test/quant/test_quant_trt.py
index d4fdfce087..eee8b6da37 100644
--- a/py/torch_tensorrt/fx/test/quant/test_quant_trt.py
+++ b/py/torch_tensorrt/fx/test/quant/test_quant_trt.py
@@ -12,11 +12,13 @@
 
 import torch_tensorrt.fx.tracer.acc_tracer.acc_tracer as acc_tracer
 from torch.ao.quantization import default_qconfig
-from torch.ao.quantization.backend_config.observation_type import ObservationType
+from torch.ao.quantization.backend_config import (
+    get_tensorrt_backend_config_dict,
+    ObservationType,
+)
 from torch.ao.quantization.fx.match_utils import MatchAllNode
 from torch.ao.quantization.quantize_fx import (
     convert_to_reference_fx,
-    get_tensorrt_backend_config_dict,
     prepare_fx,
     prepare_qat_fx,
 )
@@ -46,7 +48,9 @@ def lower_to_trt(model, inputs, shape_ranges):
         )
     ]
 
-    interp = TRTInterpreter(model, input_specs, explicit_batch_dimension=True, explicit_precision=True)
+    interp = TRTInterpreter(
+        model, input_specs, explicit_batch_dimension=True, explicit_precision=True
+    )
     result = interp.run(lower_precision=LowerPrecision.INT8)
     trt_mod = TRTModule(result.engine, result.input_names, result.output_names)
     return trt_mod
@@ -63,7 +67,9 @@ def setUp(self):
         )
         self.trt_backend_config_dict = get_tensorrt_backend_config_dict()
 
-    def _test_quantized_inputs_outputs(self, prepare_custom_config_dict, prepare_count_check, convert_count_check):
+    def _test_quantized_inputs_outputs(
+        self, prepare_custom_config_dict, prepare_count_check, convert_count_check
+    ):
         """
         Test the option to have inputs and outputs of the graph quantized
         """
@@ -92,7 +98,7 @@ def forward(self, x):
         )
         self.checkGraphModuleNodes(mp, expected_node_occurrence=prepare_count_check)
         mp(torch.randn(1, 1, 4, 4))
-        mq = convert_to_reference_fx(mp, backend_config_dict=self.trt_backend_config_dict)
+        mq = convert_to_reference_fx(mp, backend_config=self.trt_backend_config_dict)
         self.checkGraphModuleNodes(mq, expected_node_occurrence=convert_count_check)
 
     def test_quantized_input_quantized_output(self):
@@ -109,7 +115,9 @@ def test_quantized_input_quantized_output(self):
             # input of ref conv1 and input of ref conv2
             ns.call_method("dequantize"): 2,
         }
-        self._test_quantized_inputs_outputs(prepare_custom_config_dict, prepare_count_check, convert_count_check)
+        self._test_quantized_inputs_outputs(
+            prepare_custom_config_dict, prepare_count_check, convert_count_check
+        )
 
     def test_fp32_input_quantized_output(self):
         prepare_custom_config_dict = {"output_quantized_idxs": [0]}
@@ -122,7 +130,9 @@ def test_fp32_input_quantized_output(self):
             # input of conv1, conv2
             ns.call_method("dequantize"): 2,
         }
-        self._test_quantized_inputs_outputs(prepare_custom_config_dict, prepare_count_check, convert_count_check)
+        self._test_quantized_inputs_outputs(
+            prepare_custom_config_dict, prepare_count_check, convert_count_check
+        )
 
     def test_quantized_input_fp32_output(self):
         prepare_custom_config_dict = {"input_quantized_idxs": [0]}
@@ -135,7 +145,9 @@ def test_quantized_input_fp32_output(self):
             # input of ref conv1, input of ref conv2, final output
             ns.call_method("dequantize"): 3,
         }
-        self._test_quantized_inputs_outputs(prepare_custom_config_dict, prepare_count_check, convert_count_check)
+        self._test_quantized_inputs_outputs(
+            prepare_custom_config_dict, prepare_count_check, convert_count_check
+        )
 
     def test_fp32_input_fp32_output(self):
         prepare_custom_config_dict = {}
@@ -146,7 +158,9 @@ def test_fp32_input_fp32_output(self):
             ns.call_function(torch.quantize_per_tensor): 3,
             ns.call_method("dequantize"): 3,
         }
-        self._test_quantized_inputs_outputs(prepare_custom_config_dict, prepare_count_check, convert_count_check)
+        self._test_quantized_inputs_outputs(
+            prepare_custom_config_dict, prepare_count_check, convert_count_check
+        )
 
     def _test_standalone_module(
         self,
@@ -201,10 +215,16 @@ def forward(self, x):
         # instantiate M and RefM and align the parameters
         original_m = M().eval()
         original_ref_m = RefM().eval()
-        original_ref_m.conv1.weight = torch.nn.Parameter(original_m.conv.weight.detach())
+        original_ref_m.conv1.weight = torch.nn.Parameter(
+            original_m.conv.weight.detach()
+        )
         original_ref_m.conv1.bias = torch.nn.Parameter(original_m.conv.bias.detach())
-        original_ref_m.conv2.weight = torch.nn.Parameter(original_m.standalone.conv.weight.detach())
-        original_ref_m.conv2.bias = torch.nn.Parameter(original_m.standalone.conv.bias.detach())
+        original_ref_m.conv2.weight = torch.nn.Parameter(
+            original_m.standalone.conv.weight.detach()
+        )
+        original_ref_m.conv2.bias = torch.nn.Parameter(
+            original_m.standalone.conv.bias.detach()
+        )
 
         sm_example_inputs = (data,)
         prepare_config = {
@@ -230,17 +250,21 @@ def forward(self, x):
             qconfig_dict,
             example_inputs,
             prepare_custom_config=prepare_config,
-            backend_config_dict=backend_config_dict,
+            backend_config=backend_config_dict,
         )
         # calibration
         m(data)
         self.checkGraphModuleNodes(m, expected_node_occurrence=prepare_count_check)
-        self.checkGraphModuleNodes(m.standalone, expected_node_occurrence=standalone_prepare_count_check)
+        self.checkGraphModuleNodes(
+            m.standalone, expected_node_occurrence=standalone_prepare_count_check
+        )
 
         # check converted/quantized model
-        m = convert_to_reference_fx(m, backend_config_dict=backend_config_dict)
+        m = convert_to_reference_fx(m, backend_config=backend_config_dict)
         self.checkGraphModuleNodes(m, expected_node_occurrence=convert_count_check)
-        self.checkGraphModuleNodes(m.standalone, expected_node_occurrence=standalone_convert_count_check)
+        self.checkGraphModuleNodes(
+            m.standalone, expected_node_occurrence=standalone_convert_count_check
+        )
         res = m(data)
 
         # quantize the reference model
@@ -248,10 +272,10 @@ def forward(self, x):
             original_ref_m_copy,
             qconfig_dict,
             example_inputs,
-            backend_config_dict=backend_config_dict,
+            backend_config=backend_config_dict,
         )
         ref_m(data)
-        ref_m = convert_to_reference_fx(ref_m, backend_config_dict=backend_config_dict)
+        ref_m = convert_to_reference_fx(ref_m, backend_config=backend_config_dict)
         ref_res = ref_m(data)
         self.assertEqual(res, ref_res)
 
@@ -263,9 +287,13 @@ def test_standalone_module_float_interface(self):
         interface_config = float_interface_config
         # input and output of first conv, observer for standalone module
         # will be inserted in the standalone module itself
-        prepare_count_check = {ns.call_module(torch.ao.quantization.HistogramObserver): 2}
+        prepare_count_check = {
+            ns.call_module(torch.ao.quantization.HistogramObserver): 2
+        }
         # for input and output of conv in the standalone module
-        standalone_prepare_count_check = {ns.call_module(torch.ao.quantization.HistogramObserver): 2}
+        standalone_prepare_count_check = {
+            ns.call_module(torch.ao.quantization.HistogramObserver): 2
+        }
         convert_count_check = {
             # input and output of reference conv
             ns.call_function(torch.quantize_per_tensor): 2,
@@ -325,9 +353,13 @@ def test_standalone_module_quantized_interface(self):
         }
         custom_backend_config_dict = {"configs": [conv_module_config]}
         # observer for input and output of first conv
-        prepare_count_check = {ns.call_module(torch.ao.quantization.HistogramObserver): 2}
+        prepare_count_check = {
+            ns.call_module(torch.ao.quantization.HistogramObserver): 2
+        }
         # for output of conv in the standalone module
-        standalone_prepare_count_check = {ns.call_module(torch.ao.quantization.HistogramObserver): 1}
+        standalone_prepare_count_check = {
+            ns.call_module(torch.ao.quantization.HistogramObserver): 1
+        }
         convert_count_check = {
             # quantizing input/output for reference conv
             ns.call_function(torch.quantize_per_tensor): 2,
@@ -370,7 +402,9 @@ def setUp(self):
         )
         self.trt_backend_config_dict = get_tensorrt_backend_config_dict()
 
-    def _test_module(self, m, inputs, shape_ranges, no_prepare=None, no_convert=None, is_qat=False):
+    def _test_module(
+        self, m, inputs, shape_ranges, no_prepare=None, no_convert=None, is_qat=False
+    ):
         """
         Args:
           m: the float module we want to test
@@ -396,14 +430,14 @@ def _test_module(self, m, inputs, shape_ranges, no_prepare=None, no_convert=None
             m,
             {"": self.trt_qconfig},
             example_inputs,
-            backend_config_dict=self.trt_backend_config_dict,
+            backend_config=self.trt_backend_config_dict,
         )
         self.checkGraphModuleNodes(prepared, expected_node_occurrence=no_prepare)
         # calibration
         prepared(*inputs)
         quantized = convert_to_reference_fx(
             prepared,
-            backend_config_dict=self.trt_backend_config_dict,
+            backend_config=self.trt_backend_config_dict,
         )
         self.checkGraphModuleNodes(quantized, expected_node_occurrence=no_convert)
         # lower to trt
@@ -436,7 +470,9 @@ def forward(self, x):
                 return self.relu(self.conv(x))
 
         # just testing conv2d since conv1d and conv3d are not supported in fx2trt
-        for dim, has_relu, f_relu, is_qat in itertools.product([1, 2], [True, False], [True, False], [True, False]):
+        for dim, has_relu, f_relu, is_qat in itertools.product(
+            [1, 2], [True, False], [True, False], [True, False]
+        ):
             # when has_relu=False, we have torch.nn.Identity, which would introduce
             # extra quant-dequat pair
             no_convert = {
@@ -476,7 +512,9 @@ def forward(self, x):
         linear_input = torch.rand(8, 5)
 
         shape_ranges = [((1, 5), (5, 5), (10, 5))]
-        for has_relu, f_relu, is_qat in itertools.product([True, False], [True, False], [True, False]):
+        for has_relu, f_relu, is_qat in itertools.product(
+            [True, False], [True, False], [True, False]
+        ):
             # when has_relu=False, we have torch.nn.Identity, which would introduce
             # extra quant-dequat pair
             no_convert = {
@@ -513,9 +551,9 @@ def forward(self, x):
             m,
             {"": self.trt_qconfig},
             example_inputs,
-            backend_config_dict=self.trt_backend_config_dict,
+            backend_config=self.trt_backend_config_dict,
         )
-        m = convert_to_reference_fx(m, backend_config_dict=self.trt_backend_config_dict)
+        m = convert_to_reference_fx(m, backend_config=self.trt_backend_config_dict)
         expected_occurrence = {
             ns.call_function(torch.quantize_per_tensor): 5,
             ns.call_method("dequantize"): 5,
@@ -544,13 +582,13 @@ def forward(self, x):
             m,
             {"": trt_unsupported_qconfig},
             example_inputs=example_inputs,
-            backend_config_dict=self.trt_backend_config_dict,
+            backend_config=self.trt_backend_config_dict,
         )
         # calibration
         prepared(linear_module_input)
         quantized = convert_to_reference_fx(
             prepared,
-            backend_config_dict=self.trt_backend_config_dict,
+            backend_config=self.trt_backend_config_dict,
         )
         node_occurrence = {
             ns.call_function(torch.quantize_per_tensor): 0,
@@ -575,12 +613,12 @@ def forward(self, x):
             m,
             {"": self.trt_qconfig},
             example_inputs,
-            backend_config_dict=self.trt_backend_config_dict,
+            backend_config=self.trt_backend_config_dict,
         )
         self.assertTrue(len(dict(prepared.named_children())) == 1)
         quantized = convert_to_reference_fx(
             prepared,
-            backend_config_dict=self.trt_backend_config_dict,
+            backend_config=self.trt_backend_config_dict,
         )
         node_occurrence = {
             ns.call_function(torch.quantize_per_tensor): 2,
@@ -605,7 +643,7 @@ def forward(self, x):
             m,
             {"": self.trt_qconfig},
             example_inputs,
-            backend_config_dict=self.trt_backend_config_dict,
+            backend_config=self.trt_backend_config_dict,
         )
         node_occurrence = {
             # weight
@@ -616,7 +654,7 @@ def forward(self, x):
         self.checkGraphModuleNodes(prepared, expected_node_occurrence=node_occurrence)
         quantized = convert_to_reference_fx(
             prepared,
-            backend_config_dict=self.trt_backend_config_dict,
+            backend_config=self.trt_backend_config_dict,
         )
         node_occurrence = {
             # input activation, output activation and weight
@@ -626,7 +664,9 @@ def forward(self, x):
         }
         self.checkGraphModuleNodes(quantized, expected_node_occurrence=node_occurrence)
 
-    @unittest.skip("This is not supported yet, we can enable the test after it's supported")
+    @unittest.skip(
+        "This is not supported yet, we can enable the test after it's supported"
+    )
     def test_conv_add(self):
         class M(torch.nn.Module):
             def __init__(self):
@@ -675,13 +715,13 @@ def conv_add_extra_inputs_getter(pattern):
             m,
             {"": self.trt_qconfig},
             example_inputs,
-            backend_config_dict=modified_backend_config_dict,
+            backend_config=modified_backend_config_dict,
         )
         node_occurrence = {
             ns.call_module(torch.ao.quantization.HistogramObserver): 3,
         }
         self.checkGraphModuleNodes(m, expected_node_occurrence=node_occurrence)
-        m = convert_to_reference_fx(m, backend_config_dict=modified_backend_config_dict)
+        m = convert_to_reference_fx(m, backend_config=modified_backend_config_dict)
         node_occurrence = {
             ns.call_function(torch.quantize_per_tensor): 3,
             ns.call_method("dequantize"): 3,
@@ -778,7 +818,7 @@ def forward(self, x):
             {"": qconfig},
             example_inputs,
             prepare_custom_config=prepare_custom_config_dict,
-            backend_config_dict=backend_config_dict,
+            backend_config=backend_config_dict,
         )
         node_occurrence = {
             # for input and output of conv, where input is used twice, once in conv and
@@ -790,8 +830,10 @@ def forward(self, x):
             # output of the standalone module
             ns.call_module(torch.ao.quantization.HistogramObserver): 1,
         }
-        self.checkGraphModuleNodes(m.standalone, expected_node_occurrence=standalone_node_occurrence)
-        m = convert_to_reference_fx(m, backend_config_dict=backend_config_dict)
+        self.checkGraphModuleNodes(
+            m.standalone, expected_node_occurrence=standalone_node_occurrence
+        )
+        m = convert_to_reference_fx(m, backend_config=backend_config_dict)
         node_occurrence = {
             # two inputs for standalone module
             ns.call_function(torch.quantize_per_tensor): 2,
@@ -807,7 +849,9 @@ def forward(self, x):
             # two input and one output for the pattern in standalone module
             ns.call_method("dequantize"): 3,
         }
-        self.checkGraphModuleNodes(m.standalone, expected_node_occurrence=standalone_node_occurrence)
+        self.checkGraphModuleNodes(
+            m.standalone, expected_node_occurrence=standalone_node_occurrence
+        )
 
     def test_quant_dequant_not_fold(self):
         class LinearModule(torch.nn.Module):
@@ -826,11 +870,11 @@ def forward(self, x):
             model,
             {"": self.trt_qconfig},
             example_inputs,
-            backend_config_dict=self.trt_backend_config_dict,
+            backend_config=self.trt_backend_config_dict,
         )
         quantized = convert_to_reference_fx(
             prepared,
-            backend_config_dict=self.trt_backend_config_dict,
+            backend_config=self.trt_backend_config_dict,
         )
 
         model = acc_tracer.trace(quantized, inputs)
diff --git a/py/torch_tensorrt/fx/test/trt_lower/test_diagnostics.py b/py/torch_tensorrt/fx/test/trt_lower/test_diagnostics.py
index 7a8583359c..77f68c5ad6 100644
--- a/py/torch_tensorrt/fx/test/trt_lower/test_diagnostics.py
+++ b/py/torch_tensorrt/fx/test/trt_lower/test_diagnostics.py
@@ -13,6 +13,9 @@
 _LOGGER: logging.Logger = logging.getLogger(__name__)
 
 
+_LOGGER: logging.Logger = logging.getLogger(__name__)
+
+
 def reset_diag(fn):
     @functools.wraps(fn)
     def reset(*a, **kw):
diff --git a/py/torch_tensorrt/fx/tools/common_fx2trt.py b/py/torch_tensorrt/fx/tools/common_fx2trt.py
index c91bb51351..a2ef83b57c 100644
--- a/py/torch_tensorrt/fx/tools/common_fx2trt.py
+++ b/py/torch_tensorrt/fx/tools/common_fx2trt.py
@@ -31,7 +31,9 @@ def fetch_attr(mod, target):
     attr_itr = mod
     for i, atom in enumerate(target_atoms):
         if not hasattr(attr_itr, atom):
-            raise RuntimeError(f"Node referenced nonexistent target {'.'.join(target_atoms[:i])}")
+            raise RuntimeError(
+                f"Node referenced nonexistent target {'.'.join(target_atoms[:i])}"
+            )
         attr_itr = getattr(attr_itr, atom)
     return attr_itr
 
@@ -82,7 +84,9 @@ def run_test(
             outputs = trt_mod(*cuda_inputs)
             end_event.record()
             torch.cuda.synchronize()
-            _LOGGER.info(f"TRT run time(s)= {(start_event.elapsed_time(end_event) * 1.0e-3)}")
+            _LOGGER.info(
+                f"TRT run time(s)= {(start_event.elapsed_time(end_event) * 1.0e-3)}"
+            )
 
             if isinstance(outputs, torch.Tensor):
                 ref_outputs = [ref_outputs]
@@ -124,7 +128,9 @@ def run_test_custom_compare_results(
                 self.assert_has_op(mod, expected_ops)
 
             interpreter_result = interpreter.run(
-                lower_precision=LowerPrecision.FP16 if fp16_mode else LowerPrecision.FP32
+                lower_precision=LowerPrecision.FP16
+                if fp16_mode
+                else LowerPrecision.FP32
             )
             trt_mod = TRTModule(
                 interpreter_result.engine,
@@ -135,7 +141,9 @@ def run_test_custom_compare_results(
             res_cpu = mod(*inputs)
             assert len(res_trt) == len(res_cpu)
             assert len(res_cpu) == len(comparators)
-            for output_trt, output_cpu, comparator in zip(res_trt, res_cpu, comparators):
+            for output_trt, output_cpu, comparator in zip(
+                res_trt, res_cpu, comparators
+            ):
                 comp_func = comparator[0]
                 args = comparator[1]
                 self.assertTrue(comp_func(output_trt, output_cpu, *args))
@@ -159,7 +167,9 @@ def assert_has_op(self, mod, ops):
             elif node.op in {"call_function", "call_method"}:
                 ops_in_mod.add(node.target)
 
-        self.assertTrue(ops_in_mod >= ops, f"expected ops {ops}, actuall ops {ops_in_mod}")
+        self.assertTrue(
+            ops_in_mod >= ops, f"expected ops {ops}, actuall ops {ops_in_mod}"
+        )
 
     def assert_unexpected_op(self, mod, ops):
         for node in mod.graph.nodes:
@@ -196,7 +206,9 @@ def run_test_custom_compare_results(
         shape_prop.ShapeProp(mod).propagate(*inputs)
         mod = NormalizeArgs(mod).transform()
         interp = TRTInterpreter(mod, InputTensorSpec.from_tensors(inputs))
-        super().run_test_custom_compare_results(mod, inputs, expected_ops, interp, comparators, fp16_mode=fp16_mode)
+        super().run_test_custom_compare_results(
+            mod, inputs, expected_ops, interp, comparators, fp16_mode=fp16_mode
+        )
 
 
 class AccTestCase(TRTTestCase):
@@ -223,11 +235,17 @@ def run_test(
 
         if test_implicit_batch_dim:
             interp = TRTInterpreter(mod, InputTensorSpec.from_tensors(inputs))
-            super().run_test(mod, inputs, expected_ops, unexpected_ops, interp, rtol, atol, precision)
+            super().run_test(
+                mod, inputs, expected_ops, unexpected_ops, interp, rtol, atol, precision
+            )
 
         if test_explicit_batch_dim:
-            interp = TRTInterpreter(mod, InputTensorSpec.from_tensors(inputs), explicit_batch_dimension=True)
-            super().run_test(mod, inputs, expected_ops, unexpected_ops, interp, rtol, atol, precision)
+            interp = TRTInterpreter(
+                mod, InputTensorSpec.from_tensors(inputs), explicit_batch_dimension=True
+            )
+            super().run_test(
+                mod, inputs, expected_ops, unexpected_ops, interp, rtol, atol, precision
+            )
 
         if test_explicit_precision:
             interp = TRTInterpreter(
@@ -235,7 +253,9 @@ def run_test(
                 InputTensorSpec.from_tensors(inputs),
                 explicit_precision=test_explicit_precision,
             )
-            super().run_test(mod, inputs, expected_ops, unexpected_ops, interp, rtol, atol)
+            super().run_test(
+                mod, inputs, expected_ops, unexpected_ops, interp, rtol, atol
+            )
 
             interp = TRTInterpreter(
                 mod,
@@ -243,7 +263,9 @@ def run_test(
                 explicit_batch_dimension=True,
                 explicit_precision=test_explicit_precision,
             )
-            super().run_test(mod, inputs, expected_ops, unexpected_ops, interp, rtol, atol, precision)
+            super().run_test(
+                mod, inputs, expected_ops, unexpected_ops, interp, rtol, atol, precision
+            )
 
     def run_test_with_assert_error(
         self,
@@ -261,7 +283,9 @@ def run_test_with_assert_error(
             super().run_test_with_error(mod, inputs, interp, expect_error)
 
         if test_explicit_batch_dim:
-            interp = TRTInterpreter(mod, InputTensorSpec.from_tensors(inputs), explicit_batch_dimension=True)
+            interp = TRTInterpreter(
+                mod, InputTensorSpec.from_tensors(inputs), explicit_batch_dimension=True
+            )
             super().run_test_with_error(mod, inputs, interp, expect_error)
 
     def run_test_with_dynamic_shape(
diff --git a/py/torch_tensorrt/fx/tools/trt_minimizer.py b/py/torch_tensorrt/fx/tools/trt_minimizer.py
index a67cc8ec89..308687e0c9 100644
--- a/py/torch_tensorrt/fx/tools/trt_minimizer.py
+++ b/py/torch_tensorrt/fx/tools/trt_minimizer.py
@@ -10,8 +10,12 @@
 _LOGGER: logging.Logger = logging.getLogger(__name__)
 
 
-def lower_mod_default(mod: torch.fx.GraphModule, inputs: Tensors, batch_size: Any = 2048) -> TRTModule:
-    interp = TRTInterpreter(mod, InputTensorSpec.from_tensors(inputs), explicit_batch_dimension=True)
+def lower_mod_default(
+    mod: torch.fx.GraphModule, inputs: Tensors, batch_size: Any = 2048
+) -> TRTModule:
+    interp = TRTInterpreter(
+        mod, InputTensorSpec.from_tensors(inputs), explicit_batch_dimension=True
+    )
     interpreter_result = interp.run(max_batch_size=batch_size)
     res_mod = TRTModule(
         interpreter_result.engine,
@@ -35,7 +39,9 @@ def __init__(
         compare_fn: Callable[[Any, Any, Any], Tuple[float, bool]],
         settings: TensorRTMinizerSetting = TensorRTMinizerSetting(),
         max_batch_size: Any = 2048,
-        lower_fn: Callable[[torch.fx.GraphModule, Tensors, Any], TRTModule] = lower_mod_default,
+        lower_fn: Callable[
+            [torch.fx.GraphModule, Tensors, Any], TRTModule
+        ] = lower_mod_default,
     ):
         self.lower_fn = lower_fn
         self.max_batch_size = max_batch_size
@@ -52,7 +58,9 @@ def run_b(self, mod, inputs):
             mod = self.lower_fn(mod, inputs, self.max_batch_size)
             output = mod(*inputs)
         except RuntimeError as e:
-            raise net_min_base.FxNetMinimizerRunFuncError(f"Encounter an error when processing \n{mod.graph}\n {e}")
+            raise net_min_base.FxNetMinimizerRunFuncError(
+                f"Encounter an error when processing \n{mod.graph}\n {e}"
+            )
         else:
             return output
 
diff --git a/py/torch_tensorrt/fx/tools/trt_splitter.py b/py/torch_tensorrt/fx/tools/trt_splitter.py
index 9176cf2d31..7fbca8d99a 100644
--- a/py/torch_tensorrt/fx/tools/trt_splitter.py
+++ b/py/torch_tensorrt/fx/tools/trt_splitter.py
@@ -49,7 +49,7 @@ def __init__(self):
         # During split, we'll split out the operators that
         # don't support the batch dim.
         self.use_implicit_batch_dim: bool = True
-        self.exclude_support_node_name: set = set(self.op_lowering_disallow_list)
+        self.exclude_support_node_name: set = set()
 
 
 class TRTSplitter(splitter_base._SplitterBase):
@@ -74,7 +74,9 @@ def __init__(
             non_acc_submodule_name="_run_on_gpu_",
         )
 
-    def _lower_model_to_backend(self, mod: torch.fx.GraphModule, inputs: Iterable[torch.Tensor]):
+    def _lower_model_to_backend(
+        self, mod: torch.fx.GraphModule, inputs: Iterable[torch.Tensor]
+    ):
         """
         Lower a GraphModule `mod` to TensorRT with `inputs`.
         """
diff --git a/py/torch_tensorrt/fx/tracer/acc_tracer/acc_tracer.py b/py/torch_tensorrt/fx/tracer/acc_tracer/acc_tracer.py
index 02a50f32bb..57f7d0e7ea 100644
--- a/py/torch_tensorrt/fx/tracer/acc_tracer/acc_tracer.py
+++ b/py/torch_tensorrt/fx/tracer/acc_tracer/acc_tracer.py
@@ -19,6 +19,7 @@
 
 from . import acc_normalizer, acc_ops, acc_shape_prop, acc_utils  # noqa: F401
 
+
 _LOGGER = logging.getLogger(__name__)
 
 
@@ -42,7 +43,9 @@ def __init__(self):
         self.exceptions_rewritten: Set[Type[Exception]] = set()
         self.exceptions_bool_rewritten: Set[Type[Exception]] = set()
 
-    def rewrite(self, fn: FunctionType) -> Tuple[FunctionType, Set[Type[Exception]], Set[Type[Exception]]]:
+    def rewrite(
+        self, fn: FunctionType
+    ) -> Tuple[FunctionType, Set[Type[Exception]], Set[Type[Exception]]]:
 
         # Normalize the source lines
         sourcelines, _ = inspect.getsourcelines(fn)
@@ -138,7 +141,8 @@ def _reuse_loc(node):
         # Check that we actually have a builtin exception.
         if (
             not issubclass(exc_type, Exception)
-            or getattr(getattr(exc_type, "__class__", None), "__module__", None) != "builtins"
+            or getattr(getattr(exc_type, "__class__", None), "__module__", None)
+            != "builtins"
         ):
             return if_node
 
@@ -154,13 +158,19 @@ def _reuse_loc(node):
         # module is safe because the RewrittenModule will add it as an attr
         # based on the returned exceptions_rewritten, and we assume we are
         # currently modifying the AST of a method from a RewrittenModule.
-        exc_wrapper_node = ast.parse(f"self.{_get_exception_wrapper_attr_name(exc_type)}()", mode="eval")
+        exc_wrapper_node = ast.parse(
+            f"self.{_get_exception_wrapper_attr_name(exc_type)}()", mode="eval"
+        )
         assert isinstance(exc_wrapper_node, ast.Expression)
         exc_wrapper_call_node = exc_wrapper_node.body
         assert isinstance(exc_wrapper_call_node, ast.Call)
-        if isinstance(if_node.test, ast.BoolOp) and isinstance(if_node.test.op, ast.And):
+        if isinstance(if_node.test, ast.BoolOp) and isinstance(
+            if_node.test.op, ast.And
+        ):
             self.exceptions_bool_rewritten.add(exc_type)
-            bool_wrapper_node = ast.parse(f"self.{_get_exception_wrapper_attr_name(exc_type)}_bool()", mode="eval")
+            bool_wrapper_node = ast.parse(
+                f"self.{_get_exception_wrapper_attr_name(exc_type)}_bool()", mode="eval"
+            )
             assert isinstance(exc_wrapper_node, ast.Expression)
             bool_wrapper_call_node = bool_wrapper_node.body
             assert isinstance(exc_wrapper_call_node, ast.Call)
@@ -315,7 +325,9 @@ def create_node(
             and not (name_target in allow_list)
             and kind != "placeholder"
         ):
-            raise RuntimeError(f"Tried to trace mutable operation {name_target}. FX only supports functional code")
+            raise RuntimeError(
+                f"Tried to trace mutable operation {name_target}. FX only supports functional code"
+            )
 
         return self.graph.create_node(kind, target, args, kwargs, name, type_expr)
 
@@ -374,7 +386,9 @@ class RewrittenModule(base_class):  # type: ignore[valid-type, misc]
             for method_name in dir(base_class):
                 method = getattr(base_class, method_name, None)
                 if method is None and method_name not in {"__doc__"}:
-                    _LOGGER.warning(f"{__qualname__} does not have attribute {method_name}")
+                    _LOGGER.warning(
+                        f"{__qualname__} does not have attribute {method_name}"
+                    )
 
                 if builtins.type(method) is not FunctionType:
                     continue
@@ -424,10 +438,10 @@ def __init__(self, orig):
                 for k, v in orig.__dict__.items():
                     if k == "_modules":
                         for mod_k, mod_v in v.items():
-                            if (
-                                getattr(mod_v, "_base_class_origin", type(mod_v)) in leaf_module_list
-                            ):  # type: ignore[operator]
-                                _LOGGER.info(f"Skip rewriting leaf module {type(mod_v)}")
+                            if getattr(mod_v, "_base_class_origin", type(mod_v)) in leaf_module_list:  # type: ignore[operator]
+                                _LOGGER.info(
+                                    f"Skip rewriting leaf module {type(mod_v)}"
+                                )
                                 self._modules[mod_k] = mod_v
                             else:
                                 self._modules[mod_k] = rewrite_module(mod_v)
@@ -463,7 +477,9 @@ def _remove_exceptions(gm: torch.fx.GraphModule) -> bool:
     for node in reversed(gm.graph.nodes):
         if node.op == "call_module" and (
             isinstance(gm.get_submodule(node.target), ConditionalExceptionWrapper)
-            or isinstance(gm.get_submodule(node.target), ConditionalExceptionBoolCondWrapper)
+            or isinstance(
+                gm.get_submodule(node.target), ConditionalExceptionBoolCondWrapper
+            )
         ):
             gm.graph.erase_node(node)
             changed = True
@@ -473,7 +489,9 @@ def _remove_exceptions(gm: torch.fx.GraphModule) -> bool:
 def _replace_tensor_meta_with_rank(gm: torch.fx.GraphModule):
     for node in gm.graph.nodes:
         if node.op != "output" and "tensor_meta" in node.meta:
-            node.meta["tensor_rank"] = acc_utils.map_tensor_metadata(node.meta["tensor_meta"], lambda x: len(x.shape))
+            node.meta["tensor_rank"] = acc_utils.map_tensor_metadata(
+                node.meta["tensor_meta"], lambda x: len(x.shape)
+            )
             del node.meta["tensor_meta"]
 
 

From 813c5d80b0681fa9419fbb3f3812f1e8b66dd518 Mon Sep 17 00:00:00 2001
From: Wei Wei <wwei6@fb.com>
Date: Fri, 12 Aug 2022 11:45:07 -0700
Subject: [PATCH 2/6] reverse _compile.py change

---
 py/torch_tensorrt/_compile.py | 44 +++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 20 deletions(-)

diff --git a/py/torch_tensorrt/_compile.py b/py/torch_tensorrt/_compile.py
index c6550ae7c7..5102338d32 100644
--- a/py/torch_tensorrt/_compile.py
+++ b/py/torch_tensorrt/_compile.py
@@ -5,20 +5,22 @@
 import torch
 import torch.fx
 from enum import Enum
+
 import torch_tensorrt.fx
 from torch_tensorrt.fx.lower import lower_to_trt
 from torch_tensorrt.fx.utils import LowerPrecision
 
+
 class _IRType(Enum):
-    """Enum to set the minimum required logging level to print a message to stdout
-    """
+    """Enum to set the minimum required logging level to print a message to stdout"""
+
     ts = 0
     fx = 1
 
 
 class _ModuleType(Enum):
-    """Enum to set the minimum required logging level to print a message to stdout
-    """
+    """Enum to set the minimum required logging level to print a message to stdout"""
+
     nn = 0
     ts = 1
     fx = 2
@@ -54,8 +56,8 @@ def _get_target_ir(module_type: _ModuleType, ir: str) -> _IRType:
                 return _IRType.ts
             elif module_is_fxable:
                 raise ValueError("Was given a torch.fx.GraphModule, fx is not currently supported by Torch-TensorRT")
-                #logging.log(logging.Level.Info, "ir was set to default, using TorchScript as fx")
-                #return _IRType.fx
+                # logging.log(logging.Level.Info, "ir was set to default, using TorchScript as fx")
+                # return _IRType.fx
             else:
                 raise ValueError("Module was provided with in an unsupported format")
         else:
@@ -105,7 +107,7 @@ def compile(module: Any, ir="default", inputs=[], enabled_precisions=set([_enums
         if module_type == _ModuleType.nn:
             logging.log(
                 logging.Level.Info,
-                "Module was provided as a torch.nn.Module, trying to script the module with torch.jit.script. In the event of a failure please preconvert your module to TorchScript"
+                "Module was provided as a torch.nn.Module, trying to script the module with torch.jit.script. In the event of a failure please preconvert your module to TorchScript",
             )
             ts_mod = torch.jit.script(module)
         return torch_tensorrt.ts.compile(ts_mod, inputs=inputs, enabled_precisions=enabled_precisions, **kwargs)
@@ -117,17 +119,21 @@ def compile(module: Any, ir="default", inputs=[], enabled_precisions=set([_enums
         else:
             raise ValueError(f"Precision {enabled_precisions} not supported on FX")
 
-        return lower_to_trt(module, inputs, lower_precision=lower_precision, max_batch_size=inputs[0].size(0), explicit_batch_dimension=True, dynamic_batch=False)
+        return lower_to_trt(
+            module,
+            inputs,
+            lower_precision=lower_precision,
+            max_batch_size=inputs[0].size(0),
+            explicit_batch_dimension=True,
+            dynamic_batch=False,
+        )
     else:
         raise RuntimeError("Module is an unknown format or the ir requested is unknown")
 
 
-def convert_method_to_trt_engine(module: Any,
-                                 method_name: str,
-                                 ir="default",
-                                 inputs=[],
-                                 enabled_precisions=set([_enums.dtype.float]),
-                                 **kwargs):
+def convert_method_to_trt_engine(
+    module: Any, method_name: str, ir="default", inputs=[], enabled_precisions=set([_enums.dtype.float]), **kwargs
+):
     """Convert a TorchScript module method to a serialized TensorRT engine
 
     Converts a specified method of a module to a serialized TensorRT engine given a dictionary of conversion settings
@@ -165,14 +171,12 @@ def convert_method_to_trt_engine(module: Any,
         if module_type == _ModuleType.nn:
             logging.log(
                 logging.Level.Info,
-                "Module was provided as a torch.nn.Module, trying to script the module with torch.jit.script. In the event of a failure please preconvert your module to TorchScript"
+                "Module was provided as a torch.nn.Module, trying to script the module with torch.jit.script. In the event of a failure please preconvert your module to TorchScript",
             )
             ts_mod = torch.jit.script(module)
-        return torch_tensorrt.ts.convert_method_to_trt_engine(ts_mod,
-                                                              method_name,
-                                                              inputs=inputs,
-                                                              enabled_precisions=enabled_precisions,
-                                                              **kwargs)
+        return torch_tensorrt.ts.convert_method_to_trt_engine(
+            ts_mod, method_name, inputs=inputs, enabled_precisions=enabled_precisions, **kwargs
+        )
     elif target_ir == _IRType.fx:
         raise RuntimeError("fx is currently not supported")
     else:

From 1784d640a8d0fb610253f0d9b91632b068877578 Mon Sep 17 00:00:00 2001
From: Wei Wei <wwei6@fb.com>
Date: Fri, 12 Aug 2022 14:11:22 -0700
Subject: [PATCH 3/6] comment line length to use default

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8f10237ebe..38456cf53d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,8 +17,8 @@ requires = [
 
 [tool.black]
 # Uncomment if pyproject.toml worked fine to ensure consistency with flake8
-line-length = 120
+# line-length = 120
 target-versions = ["py37", "py38", "py39", "py310"]
 force-exclude = """
 elu_converter/setup.py
-"""
\ No newline at end of file
+"""

From e13889b21e0947a58046782338a0794b7767b1cb Mon Sep 17 00:00:00 2001
From: Wei Wei <wwei6@fb.com>
Date: Fri, 12 Aug 2022 14:12:54 -0700
Subject: [PATCH 4/6] update nightly pytorch to 0810

---
 .circleci/config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index e7aa93fdea..bd3f3c536c 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -751,7 +751,7 @@ parameters:
   # Nightly platform config
   torch-nightly-build:
     type: string
-    default: "1.13.0.dev20220731+cu113"
+    default: "1.13.0.dev20220810+cu113"
   torch-nightly-build-index:
     type: string
     default: "https://download.pytorch.org/whl/nightly/cu113"

From b08f6c732365607813fd91697a42d4923d661445 Mon Sep 17 00:00:00 2001
From: Wei Wei <wwei6@fb.com>
Date: Fri, 12 Aug 2022 14:59:07 -0700
Subject: [PATCH 5/6] black formatting

---
 docsrc/conf.py                                |  14 +-
 examples/custom_converters/elu_model.py       |   4 +-
 examples/fx/fx2trt_example.py                 |   4 +-
 .../fx/hugging_face_torchdynamo_example.py    |  12 +-
 examples/fx/lower_example.py                  |   5 +-
 examples/fx/quantized_resnet_test.py          |   4 +-
 examples/fx/torch_trt_simple_example.py       |  44 ++++-
 examples/fx/torchdynamo_example.py            |   5 +-
 examples/int8/training/vgg16/export_ckpt.py   |   4 +-
 examples/int8/training/vgg16/export_qat.py    |   4 +-
 examples/int8/training/vgg16/finetune_qat.py  |  29 ++-
 examples/int8/training/vgg16/main.py          |  29 ++-
 examples/int8/training/vgg16/test_qat.py      |   8 +-
 noxfile.py                                    |  54 +++++-
 py/setup.py                                   |  28 ++-
 py/torch_tensorrt/_Device.py                  |   8 +-
 py/torch_tensorrt/_Input.py                   |  20 +-
 py/torch_tensorrt/__init__.py                 |   4 +-
 py/torch_tensorrt/_compile.py                 |  48 ++++-
 py/torch_tensorrt/fx/converters/activation.py |  14 +-
 .../fx/converters/adaptive_avgpool.py         |   9 +-
 py/torch_tensorrt/fx/converters/add.py        |  16 +-
 py/torch_tensorrt/fx/converters/batchnorm.py  |  12 +-
 .../fx/converters/converter_utils.py          |  92 ++++++---
 .../fx/converters/convolution.py              |  27 ++-
 py/torch_tensorrt/fx/converters/linear.py     |  10 +-
 py/torch_tensorrt/fx/converters/maxpool.py    |  15 +-
 py/torch_tensorrt/fx/converters/mul.py        |   8 +-
 .../fx/converters/quantization.py             |  23 ++-
 .../fx/converters/transformation.py           |   9 +-
 py/torch_tensorrt/fx/diagnostics.py           |  33 +++-
 py/torch_tensorrt/fx/fx2trt.py                | 106 +++++++---
 py/torch_tensorrt/fx/lower_setting.py         |   4 +-
 py/torch_tensorrt/fx/observer.py              |  12 +-
 .../fx/passes/lower_basic_pass.py             | 104 +++++++---
 py/torch_tensorrt/fx/passes/pass_utils.py     |  12 +-
 .../fx/passes/remove_duplicate_output_args.py |   9 +-
 .../acc_op/test_adaptive_avgpool.py           |  12 +-
 .../fx/test/converters/acc_op/test_any.py     |   4 +-
 .../test/converters/acc_op/test_binary_ops.py |  16 +-
 .../fx/test/converters/acc_op/test_chunk.py   |   8 +-
 .../test/converters/acc_op/test_dequantize.py |   8 +-
 .../fx/test/converters/acc_op/test_einsum.py  |   8 +-
 .../fx/test/converters/acc_op/test_elu.py     |   8 +-
 .../test/converters/acc_op/test_embedding.py  |   8 +-
 .../fx/test/converters/acc_op/test_eq.py      |  24 ++-
 .../fx/test/converters/acc_op/test_getitem.py |  12 +-
 .../fx/test/converters/acc_op/test_gt.py      |  24 ++-
 .../converters/acc_op/test_hard_sigmoid.py    |   8 +-
 .../test/converters/acc_op/test_hardtanh.py   |  12 +-
 .../fx/test/converters/acc_op/test_isinf.py   |  12 +-
 .../test/converters/acc_op/test_leaky_relu.py |   8 +-
 .../converters/acc_op/test_logical_and.py     |   8 +-
 .../test/converters/acc_op/test_logical_or.py |  12 +-
 .../converters/acc_op/test_logical_xor.py     |  12 +-
 .../fx/test/converters/acc_op/test_lt.py      |  24 ++-
 .../fx/test/converters/acc_op/test_maximum.py |   8 +-
 .../fx/test/converters/acc_op/test_maxpool.py |  28 ++-
 .../fx/test/converters/acc_op/test_minimum.py |   8 +-
 .../fx/test/converters/acc_op/test_ne.py      |  20 +-
 .../test/converters/acc_op/test_new_ones.py   |  12 +-
 .../fx/test/converters/acc_op/test_permute.py |   8 +-
 .../acc_op/test_quantize_per_tensor.py        |   8 +-
 .../fx/test/converters/acc_op/test_relu.py    |   8 +-
 .../acc_op/test_repeat_interleave.py          |   4 +-
 .../fx/test/converters/acc_op/test_reshape.py |   8 +-
 .../fx/test/converters/acc_op/test_selu.py    |   8 +-
 .../fx/test/converters/acc_op/test_sigmoid.py |   4 +-
 .../fx/test/converters/acc_op/test_silu.py    |   8 +-
 .../fx/test/converters/acc_op/test_size.py    |   8 +-
 .../fx/test/converters/acc_op/test_softmax.py |  12 +-
 .../test/converters/acc_op/test_softsign.py   |   8 +-
 .../fx/test/converters/acc_op/test_split.py   |  12 +-
 .../fx/test/converters/acc_op/test_squeeze.py |   4 +-
 .../fx/test/converters/acc_op/test_tanh.py    |   8 +-
 .../acc_op/test_transpose_convolution.py      |   8 +-
 .../test/converters/acc_op/test_unsqueeze.py  |   4 +-
 .../vanilla/test_convolution_vanilla.py       |  12 +-
 .../fx/test/core/test_trt_module.py           |   8 +-
 .../passes/test_fuse_permute_matmul_trt.py    |   4 +-
 .../test_remove_duplicate_output_args.py      |   8 +-
 .../fx/test/passes/test_setitem.py            |  16 +-
 .../fx/test/tracer/test_acc_tracer.py         | 181 +++++++++++++-----
 .../fx/test/tracer/test_dispatch_tracer.py    |   5 +-
 .../fx/test/trt_lower/test_diagnostics.py     |   8 +-
 .../fx/test/trt_lower/test_observer_gpu.py    |   8 +-
 .../fx/test/trt_lower/trt_splitter_test.py    |  46 ++++-
 .../fx/tools/engine_layer_visualize.py        |  12 +-
 py/torch_tensorrt/fx/tools/model_packager.py  |  17 +-
 py/torch_tensorrt/fx/tools/node_profiler.py   |   4 +-
 .../fx/tools/timing_cache_utils.py            |   4 +-
 .../fx/tools/trt_profiler_sorted.py           |  16 +-
 .../fx/tracer/acc_tracer/acc_normalizer.py    |  41 +++-
 .../fx/tracer/acc_tracer/acc_ops.py           | 175 ++++++++++++-----
 .../fx/tracer/acc_tracer/acc_utils.py         |  14 +-
 .../fx/tracer/dispatch_tracer/tracer.py       |  24 ++-
 py/torch_tensorrt/fx/trt_module.py            |  49 +++--
 py/torch_tensorrt/ptq.py                      |  36 +++-
 py/torch_tensorrt/ts/_compile_spec.py         |  93 +++++++--
 py/torch_tensorrt/ts/_compiler.py             |  12 +-
 tests/modules/hub.py                          |  19 +-
 tests/py/api/test_classes.py                  |  20 +-
 tests/py/api/test_collections.py              |  77 ++++++--
 tests/py/api/test_e2e_behavior.py             |  16 +-
 tests/py/api/test_ts_backend.py               |  14 +-
 tests/py/hw/test_api_dla.py                   |   4 +-
 tests/py/hw/test_multi_gpu.py                 |  14 +-
 tests/py/integrations/test_to_backend_api.py  |   4 +-
 .../test_trt_intercompatibility.py            |   8 +-
 .../py/ptq/test_ptq_dataloader_calibrator.py  |   8 +-
 tests/py/ptq/test_ptq_to_backend.py           |   8 +-
 tests/py/ptq/test_ptq_trt_calibrator.py       |  13 +-
 tests/py/qat/test_qat_trt_accuracy.py         |   8 +-
 tools/linter/cpplint.py                       |   4 +-
 tools/linter/cpplint_diff.py                  |   8 +-
 tools/linter/pylint.py                        |   4 +-
 tools/linter/pylint_diff.py                   |   4 +-
 tools/perf/perf_run.py                        |  26 ++-
 118 files changed, 1772 insertions(+), 547 deletions(-)

diff --git a/docsrc/conf.py b/docsrc/conf.py
index 4359824bf4..a8f6ef59a2 100644
--- a/docsrc/conf.py
+++ b/docsrc/conf.py
@@ -99,7 +99,9 @@
 }
 
 html_show_sourcelink = True
-html_sidebars = {"**": ["logo-text.html", "globaltoc.html", "localtoc.html", "searchbox.html"]}
+html_sidebars = {
+    "**": ["logo-text.html", "globaltoc.html", "localtoc.html", "searchbox.html"]
+}
 
 # extensions.append("sphinx_material")
 html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]
@@ -183,7 +185,15 @@ def handle_item(fieldarg, content):
                 typename = typename.replace("long", "python:long")
                 typename = typename.replace("float", "python:float")
                 typename = typename.replace("type", "python:type")
-                par.extend(self.make_xrefs(self.typerolename, domain, typename, addnodes.literal_emphasis, **kw))
+                par.extend(
+                    self.make_xrefs(
+                        self.typerolename,
+                        domain,
+                        typename,
+                        addnodes.literal_emphasis,
+                        **kw
+                    )
+                )
             else:
                 par += fieldtype
             par += nodes.Text(")")
diff --git a/examples/custom_converters/elu_model.py b/examples/custom_converters/elu_model.py
index 00500330d2..01cfdd1250 100644
--- a/examples/custom_converters/elu_model.py
+++ b/examples/custom_converters/elu_model.py
@@ -2,7 +2,9 @@
 import torch_tensorrt
 
 # After "python3 setup install", you should find this .so file under generated "build" directory
-torch.ops.load_library("./elu_converter/build/lib.linux-x86_64-3.6/elu_converter.cpython-36m-x86_64-linux-gnu.so")
+torch.ops.load_library(
+    "./elu_converter/build/lib.linux-x86_64-3.6/elu_converter.cpython-36m-x86_64-linux-gnu.so"
+)
 
 
 class Elu(torch.nn.Module):
diff --git a/examples/fx/fx2trt_example.py b/examples/fx/fx2trt_example.py
index f10d8582a5..996609f55c 100644
--- a/examples/fx/fx2trt_example.py
+++ b/examples/fx/fx2trt_example.py
@@ -141,4 +141,6 @@ def get_input(self, inputs):
 
 # Make sure the results match
 regular_model_output = model(*inputs)
-torch.testing.assert_close(reload_model_output, regular_model_output, atol=3e-3, rtol=1e-2)
+torch.testing.assert_close(
+    reload_model_output, regular_model_output, atol=3e-3, rtol=1e-2
+)
diff --git a/examples/fx/hugging_face_torchdynamo_example.py b/examples/fx/hugging_face_torchdynamo_example.py
index 902701d93e..3d4d91d3f8 100644
--- a/examples/fx/hugging_face_torchdynamo_example.py
+++ b/examples/fx/hugging_face_torchdynamo_example.py
@@ -353,14 +353,18 @@ def run_all_eval(args, optimize_ctx, optimize_name, dtype):
             eval_inputs = (input_ids,)
 
         # Correctness check
-        is_accurate = check_correctness(args, model, eval_inputs, optimize_ctx, optimize_name)
+        is_accurate = check_correctness(
+            args, model, eval_inputs, optimize_ctx, optimize_name
+        )
         # Profile eager
         t, m = bench_model_eval(args, "eager", model, eval_inputs, NullContext())
         results.append(create_record(model_name, dtype, is_accurate, "eager", t, m))
 
         # Profile Dynamo nvfuser
         t, m = bench_model_eval(args, optimize_name, model, eval_inputs, optimize_ctx)
-        results.append(create_record(model_name, dtype, is_accurate, optimize_name, t, m))
+        results.append(
+            create_record(model_name, dtype, is_accurate, optimize_name, t, m)
+        )
 
         # calculate relative improvements
         base_r = results[-2]
@@ -412,7 +416,9 @@ def main():
     if optimize_name == "dynamo_fx2trt_fp32":
         experiment = partial(experiment, dtype=torch.float32)
 
-    experiment = partial(experiment, optimize_ctx=optimize_ctx, optimize_name=optimize_name)
+    experiment = partial(
+        experiment, optimize_ctx=optimize_ctx, optimize_name=optimize_name
+    )
     experiment(args)
 
 
diff --git a/examples/fx/lower_example.py b/examples/fx/lower_example.py
index 5a5acd665c..71f15a2f88 100644
--- a/examples/fx/lower_example.py
+++ b/examples/fx/lower_example.py
@@ -125,7 +125,10 @@ def benchmark(
         ),
     ]
 
-    results = [run_configuration_benchmark(deepcopy(model), inputs, conf_) for conf_ in configurations]
+    results = [
+        run_configuration_benchmark(deepcopy(model), inputs, conf_)
+        for conf_ in configurations
+    ]
 
     for res in results:
         print(res.format())
diff --git a/examples/fx/quantized_resnet_test.py b/examples/fx/quantized_resnet_test.py
index e3228b1da7..64d7579414 100644
--- a/examples/fx/quantized_resnet_test.py
+++ b/examples/fx/quantized_resnet_test.py
@@ -108,7 +108,9 @@ def build_int8_trt_implicit_quant(rn18):
         InputTensorSpec.from_tensors([data]),
         logger_level=trt.Logger.VERBOSE,
     )
-    interpreter_result = interp.run(lower_precision=LowerPrecision.INT8, strict_type_constraints=True)
+    interpreter_result = interp.run(
+        lower_precision=LowerPrecision.INT8, strict_type_constraints=True
+    )
     trt_mod = TRTModule(
         interpreter_result.engine,
         interpreter_result.input_names,
diff --git a/examples/fx/torch_trt_simple_example.py b/examples/fx/torch_trt_simple_example.py
index 4052aea060..400dda3360 100644
--- a/examples/fx/torch_trt_simple_example.py
+++ b/examples/fx/torch_trt_simple_example.py
@@ -12,17 +12,31 @@ def test_torch_tensorrt(model, inputs):
     # fp32 test
     with torch.inference_mode():
         ref_fp32 = model_ts(*inputs_ts)
-    trt_ts_module = torch_tensorrt.compile(model_ts, inputs=inputs_ts, enabled_precisions={torch.float32})
+    trt_ts_module = torch_tensorrt.compile(
+        model_ts, inputs=inputs_ts, enabled_precisions={torch.float32}
+    )
     result_fp32 = trt_ts_module(*inputs_ts)
-    assert torch.nn.functional.cosine_similarity(ref_fp32.flatten(), result_fp32.flatten(), dim=0) > 0.9999
+    assert (
+        torch.nn.functional.cosine_similarity(
+            ref_fp32.flatten(), result_fp32.flatten(), dim=0
+        )
+        > 0.9999
+    )
     # fp16 test
     model_ts = model_ts.half()
     inputs_ts = [i.cuda().half() for i in inputs_ts]
     with torch.inference_mode():
         ref_fp16 = model_ts(*inputs_ts)
-    trt_ts_module = torch_tensorrt.compile(model_ts, inputs=inputs_ts, enabled_precisions={torch.float16})
+    trt_ts_module = torch_tensorrt.compile(
+        model_ts, inputs=inputs_ts, enabled_precisions={torch.float16}
+    )
     result_fp16 = trt_ts_module(*inputs_ts)
-    assert torch.nn.functional.cosine_similarity(ref_fp16.flatten(), result_fp16.flatten(), dim=0) > 0.99
+    assert (
+        torch.nn.functional.cosine_similarity(
+            ref_fp16.flatten(), result_fp16.flatten(), dim=0
+        )
+        > 0.99
+    )
 
     # FX path
     model_fx = copy.deepcopy(model)
@@ -30,17 +44,31 @@ def test_torch_tensorrt(model, inputs):
     # fp32 test
     with torch.inference_mode():
         ref_fp32 = model_fx(*inputs_fx)
-    trt_fx_module = torch_tensorrt.compile(model_fx, ir="fx", inputs=inputs_fx, enabled_precisions={torch.float32})
+    trt_fx_module = torch_tensorrt.compile(
+        model_fx, ir="fx", inputs=inputs_fx, enabled_precisions={torch.float32}
+    )
     result_fp32 = trt_fx_module(*inputs_fx)
-    assert torch.nn.functional.cosine_similarity(ref_fp32.flatten(), result_fp32.flatten(), dim=0) > 0.9999
+    assert (
+        torch.nn.functional.cosine_similarity(
+            ref_fp32.flatten(), result_fp32.flatten(), dim=0
+        )
+        > 0.9999
+    )
     # fp16 test
     model_fx = model_fx.cuda().half()
     inputs_fx = [i.cuda().half() for i in inputs_fx]
     with torch.inference_mode():
         ref_fp16 = model_fx(*inputs_fx)
-    trt_fx_module = torch_tensorrt.compile(model_fx, ir="fx", inputs=inputs_fx, enabled_precisions={torch.float16})
+    trt_fx_module = torch_tensorrt.compile(
+        model_fx, ir="fx", inputs=inputs_fx, enabled_precisions={torch.float16}
+    )
     result_fp16 = trt_fx_module(*inputs_fx)
-    assert torch.nn.functional.cosine_similarity(ref_fp16.flatten(), result_fp16.flatten(), dim=0) > 0.99
+    assert (
+        torch.nn.functional.cosine_similarity(
+            ref_fp16.flatten(), result_fp16.flatten(), dim=0
+        )
+        > 0.99
+    )
 
 
 if __name__ == "__main__":
diff --git a/examples/fx/torchdynamo_example.py b/examples/fx/torchdynamo_example.py
index 8f1d4de31a..6bb93f6d6e 100644
--- a/examples/fx/torchdynamo_example.py
+++ b/examples/fx/torchdynamo_example.py
@@ -142,7 +142,10 @@ def benchmark(
         ),
     ]
 
-    results = [run_configuration_benchmark(deepcopy(model), inputs, conf_) for conf_ in configurations]
+    results = [
+        run_configuration_benchmark(deepcopy(model), inputs, conf_)
+        for conf_ in configurations
+    ]
 
     for res in results:
         print(res.format())
diff --git a/examples/int8/training/vgg16/export_ckpt.py b/examples/int8/training/vgg16/export_ckpt.py
index 290eb326cc..16f0426811 100644
--- a/examples/int8/training/vgg16/export_ckpt.py
+++ b/examples/int8/training/vgg16/export_ckpt.py
@@ -75,7 +75,9 @@ def test(model, dataloader, crit):
     ),
 )
 
-testing_dataloader = torch.utils.data.DataLoader(testing_dataset, batch_size=32, shuffle=False, num_workers=2)
+testing_dataloader = torch.utils.data.DataLoader(
+    testing_dataset, batch_size=32, shuffle=False, num_workers=2
+)
 
 crit = torch.nn.CrossEntropyLoss()
 
diff --git a/examples/int8/training/vgg16/export_qat.py b/examples/int8/training/vgg16/export_qat.py
index faae1f5a45..af881c5642 100644
--- a/examples/int8/training/vgg16/export_qat.py
+++ b/examples/int8/training/vgg16/export_qat.py
@@ -72,7 +72,9 @@ def test(model, dataloader, crit):
     ),
 )
 
-testing_dataloader = torch.utils.data.DataLoader(testing_dataset, batch_size=32, shuffle=False, num_workers=2)
+testing_dataloader = torch.utils.data.DataLoader(
+    testing_dataset, batch_size=32, shuffle=False, num_workers=2
+)
 
 crit = torch.nn.CrossEntropyLoss()
 
diff --git a/examples/int8/training/vgg16/finetune_qat.py b/examples/int8/training/vgg16/finetune_qat.py
index 6eed2787cf..48709d9f8a 100644
--- a/examples/int8/training/vgg16/finetune_qat.py
+++ b/examples/int8/training/vgg16/finetune_qat.py
@@ -21,14 +21,20 @@
 
 from vgg16 import vgg16
 
-PARSER = argparse.ArgumentParser(description="VGG16 example to use with Torch-TensorRT PTQ")
-PARSER.add_argument("--epochs", default=100, type=int, help="Number of total epochs to train")
+PARSER = argparse.ArgumentParser(
+    description="VGG16 example to use with Torch-TensorRT PTQ"
+)
+PARSER.add_argument(
+    "--epochs", default=100, type=int, help="Number of total epochs to train"
+)
 PARSER.add_argument(
     "--enable_qat",
     action="store_true",
     help="Enable quantization aware training. This is recommended to perform on a pre-trained model.",
 )
-PARSER.add_argument("--batch-size", default=128, type=int, help="Batch size to use when training")
+PARSER.add_argument(
+    "--batch-size", default=128, type=int, help="Batch size to use when training"
+)
 PARSER.add_argument("--lr", default=0.1, type=float, help="Initial learning rate")
 PARSER.add_argument("--drop-ratio", default=0.0, type=float, help="Dropout ratio")
 PARSER.add_argument("--momentum", default=0.9, type=float, help="Momentum")
@@ -194,7 +200,9 @@ def main():
                 transforms.RandomCrop(32, padding=4),
                 transforms.RandomHorizontalFlip(),
                 transforms.ToTensor(),
-                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
+                transforms.Normalize(
+                    (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
+                ),
             ]
         ),
     )
@@ -209,7 +217,9 @@ def main():
         transform=transforms.Compose(
             [
                 transforms.ToTensor(),
-                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
+                transforms.Normalize(
+                    (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
+                ),
             ]
         ),
     )
@@ -309,9 +319,14 @@ def train(model, dataloader, crit, opt, epoch):
 
         running_loss += loss.item()
         if batch % 50 == 49:
-            writer.add_scalar("Training Loss", running_loss / 100, epoch * len(dataloader) + batch)
+            writer.add_scalar(
+                "Training Loss", running_loss / 100, epoch * len(dataloader) + batch
+            )
             writer.close()
-            print("Batch: [%5d | %5d] loss: %.3f" % (batch + 1, len(dataloader), running_loss / 100))
+            print(
+                "Batch: [%5d | %5d] loss: %.3f"
+                % (batch + 1, len(dataloader), running_loss / 100)
+            )
             running_loss = 0.0
 
 
diff --git a/examples/int8/training/vgg16/main.py b/examples/int8/training/vgg16/main.py
index 00dc325422..3f248a9283 100644
--- a/examples/int8/training/vgg16/main.py
+++ b/examples/int8/training/vgg16/main.py
@@ -15,9 +15,15 @@
 
 from vgg16 import vgg16
 
-PARSER = argparse.ArgumentParser(description="VGG16 example to use with Torch-TensorRT PTQ")
-PARSER.add_argument("--epochs", default=100, type=int, help="Number of total epochs to train")
-PARSER.add_argument("--batch-size", default=128, type=int, help="Batch size to use when training")
+PARSER = argparse.ArgumentParser(
+    description="VGG16 example to use with Torch-TensorRT PTQ"
+)
+PARSER.add_argument(
+    "--epochs", default=100, type=int, help="Number of total epochs to train"
+)
+PARSER.add_argument(
+    "--batch-size", default=128, type=int, help="Batch size to use when training"
+)
 PARSER.add_argument("--lr", default=0.1, type=float, help="Initial learning rate")
 PARSER.add_argument("--drop-ratio", default=0.0, type=float, help="Dropout ratio")
 PARSER.add_argument("--momentum", default=0.9, type=float, help="Momentum")
@@ -89,7 +95,9 @@ def main():
                 transforms.RandomCrop(32, padding=4),
                 transforms.RandomHorizontalFlip(),
                 transforms.ToTensor(),
-                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
+                transforms.Normalize(
+                    (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
+                ),
             ]
         ),
     )
@@ -104,7 +112,9 @@ def main():
         transform=transforms.Compose(
             [
                 transforms.ToTensor(),
-                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
+                transforms.Normalize(
+                    (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
+                ),
             ]
         ),
     )
@@ -182,9 +192,14 @@ def train(model, dataloader, crit, opt, epoch):
 
         running_loss += loss.item()
         if batch % 50 == 49:
-            writer.add_scalar("Training Loss", running_loss / 100, epoch * len(dataloader) + batch)
+            writer.add_scalar(
+                "Training Loss", running_loss / 100, epoch * len(dataloader) + batch
+            )
             writer.close()
-            print("Batch: [%5d | %5d] loss: %.3f" % (batch + 1, len(dataloader), running_loss / 100))
+            print(
+                "Batch: [%5d | %5d] loss: %.3f"
+                % (batch + 1, len(dataloader), running_loss / 100)
+            )
             running_loss = 0.0
 
 
diff --git a/examples/int8/training/vgg16/test_qat.py b/examples/int8/training/vgg16/test_qat.py
index bdb9520505..d38d36f3fc 100644
--- a/examples/int8/training/vgg16/test_qat.py
+++ b/examples/int8/training/vgg16/test_qat.py
@@ -71,7 +71,9 @@ def test(model, dataloader, crit):
     ),
 )
 
-testing_dataloader = torch.utils.data.DataLoader(testing_dataset, batch_size=32, shuffle=False, num_workers=2)
+testing_dataloader = torch.utils.data.DataLoader(
+    testing_dataset, batch_size=32, shuffle=False, num_workers=2
+)
 
 crit = torch.nn.CrossEntropyLoss()
 
@@ -94,6 +96,8 @@ def test(model, dataloader, crit):
 }
 new_mod = torch.jit.load("trained_vgg16_qat.jit.pt")
 trt_ts_module = torchtrt.compile(new_mod, **compile_settings)
-testing_dataloader = torch.utils.data.DataLoader(testing_dataset, batch_size=1, shuffle=False, num_workers=2)
+testing_dataloader = torch.utils.data.DataLoader(
+    testing_dataset, batch_size=1, shuffle=False, num_workers=2
+)
 test_loss, test_acc = test(trt_ts_module, testing_dataloader, crit)
 print("[TRTorch] Test Loss: {:.5f} Test Acc: {:.2f}%".format(test_loss, 100 * test_acc))
diff --git a/noxfile.py b/noxfile.py
index c81415bc7a..41926b5ee1 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -4,12 +4,20 @@
 import sys
 
 # Use system installed Python packages
-PYT_PATH = "/opt/conda/lib/python3.8/site-packages" if not "PYT_PATH" in os.environ else os.environ["PYT_PATH"]
+PYT_PATH = (
+    "/opt/conda/lib/python3.8/site-packages"
+    if not "PYT_PATH" in os.environ
+    else os.environ["PYT_PATH"]
+)
 print(f"Using python path {PYT_PATH}")
 
 # Set the root directory to the directory of the noxfile unless the user wants to
 # TOP_DIR
-TOP_DIR = os.path.dirname(os.path.realpath(__file__)) if not "TOP_DIR" in os.environ else os.environ["TOP_DIR"]
+TOP_DIR = (
+    os.path.dirname(os.path.realpath(__file__))
+    if not "TOP_DIR" in os.environ
+    else os.environ["TOP_DIR"]
+)
 print(f"Test root directory {TOP_DIR}")
 
 # Set the USE_CXX11=1 to use cxx11_abi
@@ -24,7 +32,9 @@
 
 SUPPORTED_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10"]
 
-nox.options.sessions = ["l0_api_tests-" + "{}.{}".format(sys.version_info.major, sys.version_info.minor)]
+nox.options.sessions = [
+    "l0_api_tests-" + "{}.{}".format(sys.version_info.major, sys.version_info.minor)
+]
 
 
 def install_deps(session):
@@ -54,11 +64,21 @@ def install_torch_trt(session):
 
 
 def download_datasets(session):
-    print("Downloading dataset to path", os.path.join(TOP_DIR, "examples/int8/training/vgg16"))
+    print(
+        "Downloading dataset to path",
+        os.path.join(TOP_DIR, "examples/int8/training/vgg16"),
+    )
     session.chdir(os.path.join(TOP_DIR, "examples/int8/training/vgg16"))
-    session.run_always("wget", "https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz", external=True)
+    session.run_always(
+        "wget", "https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz", external=True
+    )
     session.run_always("tar", "-xvzf", "cifar-10-binary.tar.gz", external=True)
-    session.run_always("mkdir", "-p", os.path.join(TOP_DIR, "tests/accuracy/datasets/data"), external=True)
+    session.run_always(
+        "mkdir",
+        "-p",
+        os.path.join(TOP_DIR, "tests/accuracy/datasets/data"),
+        external=True,
+    )
     session.run_always(
         "cp",
         "-rpf",
@@ -91,7 +111,12 @@ def train_model(session):
             env={"PYTHONPATH": PYT_PATH},
         )
 
-        session.run_always("python", "export_ckpt.py", "vgg16_ckpts/ckpt_epoch25.pth", env={"PYTHONPATH": PYT_PATH})
+        session.run_always(
+            "python",
+            "export_ckpt.py",
+            "vgg16_ckpts/ckpt_epoch25.pth",
+            env={"PYTHONPATH": PYT_PATH},
+        )
     else:
         session.run_always(
             "python",
@@ -113,7 +138,9 @@ def train_model(session):
 
 def finetune_model(session):
     # Install pytorch-quantization dependency
-    session.install("pytorch-quantization", "--extra-index-url", "https://pypi.ngc.nvidia.com")
+    session.install(
+        "pytorch-quantization", "--extra-index-url", "https://pypi.ngc.nvidia.com"
+    )
     session.chdir(os.path.join(TOP_DIR, "examples/int8/training/vgg16"))
 
     if USE_HOST_DEPS:
@@ -136,7 +163,12 @@ def finetune_model(session):
         )
 
         # Export model
-        session.run_always("python", "export_qat.py", "vgg16_ckpts/ckpt_epoch26.pth", env={"PYTHONPATH": PYT_PATH})
+        session.run_always(
+            "python",
+            "export_qat.py",
+            "vgg16_ckpts/ckpt_epoch26.pth",
+            env={"PYTHONPATH": PYT_PATH},
+        )
     else:
         session.run_always(
             "python",
@@ -202,7 +234,9 @@ def copy_model(session):
     model_files = ["trained_vgg16.jit.pt", "trained_vgg16_qat.jit.pt"]
 
     for file_name in model_files:
-        src_file = os.path.join(TOP_DIR, str("examples/int8/training/vgg16/") + file_name)
+        src_file = os.path.join(
+            TOP_DIR, str("examples/int8/training/vgg16/") + file_name
+        )
         if os.path.exists(src_file):
             session.run_always(
                 "cp",
diff --git a/py/setup.py b/py/setup.py
index 2b9e36bab2..fc7019ce31 100644
--- a/py/setup.py
+++ b/py/setup.py
@@ -35,7 +35,11 @@
 
 
 def get_git_revision_short_hash() -> str:
-    return subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).decode("ascii").strip()
+    return (
+        subprocess.check_output(["git", "rev-parse", "--short", "HEAD"])
+        .decode("ascii")
+        .strip()
+    )
 
 
 if "--fx-only" in sys.argv:
@@ -70,7 +74,9 @@ def get_git_revision_short_hash() -> str:
         elif version == "5.0":
             JETPACK_VERSION = "4.6"
     if not JETPACK_VERSION:
-        warnings.warn("Assuming jetpack version to be 4.6 or greater, if not use the --jetpack-version option")
+        warnings.warn(
+            "Assuming jetpack version to be 4.6 or greater, if not use the --jetpack-version option"
+        )
         JETPACK_VERSION = "4.6"
 
 
@@ -158,7 +164,11 @@ def copy_libtorchtrt(multilinux=False):
             dir_path + "/trtorch/lib/libtrtorch.so",
         )
     else:
-        os.system("tar -xzf ../bazel-bin/libtorchtrt.tar.gz --strip-components=2 -C " + dir_path + "/torch_tensorrt")
+        os.system(
+            "tar -xzf ../bazel-bin/libtorchtrt.tar.gz --strip-components=2 -C "
+            + dir_path
+            + "/torch_tensorrt"
+        )
 
 
 class DevelopCommand(develop):
@@ -299,7 +309,11 @@ def run(self):
             "-Wno-deprecated",
             "-Wno-deprecated-declarations",
         ]
-        + (["-D_GLIBCXX_USE_CXX11_ABI=1"] if CXX11_ABI else ["-D_GLIBCXX_USE_CXX11_ABI=0"]),
+        + (
+            ["-D_GLIBCXX_USE_CXX11_ABI=1"]
+            if CXX11_ABI
+            else ["-D_GLIBCXX_USE_CXX11_ABI=0"]
+        ),
         extra_link_args=[
             "-Wno-deprecated",
             "-Wno-deprecated-declarations",
@@ -314,7 +328,11 @@ def run(self):
             "-Xlinker",
             "-export-dynamic",
         ]
-        + (["-D_GLIBCXX_USE_CXX11_ABI=1"] if CXX11_ABI else ["-D_GLIBCXX_USE_CXX11_ABI=0"]),
+        + (
+            ["-D_GLIBCXX_USE_CXX11_ABI=1"]
+            if CXX11_ABI
+            else ["-D_GLIBCXX_USE_CXX11_ABI=0"]
+        ),
         undef_macros=["NDEBUG"],
     )
 ]
diff --git a/py/torch_tensorrt/_Device.py b/py/torch_tensorrt/_Device.py
index dbcde6c4b2..16c9b8ea98 100644
--- a/py/torch_tensorrt/_Device.py
+++ b/py/torch_tensorrt/_Device.py
@@ -46,7 +46,9 @@ def __init__(self, *args, **kwargs):
         """
         if len(args) == 1:
             if not isinstance(args[0], str):
-                raise TypeError("When specifying Device through positional argument, argument must be str")
+                raise TypeError(
+                    "When specifying Device through positional argument, argument must be str"
+                )
             else:
                 (self.device_type, id) = Device._parse_device_str(args[0])
                 if self.device_type == _enums.DeviceType.GPU:
@@ -96,7 +98,9 @@ def __str__(self) -> str:
         return (
             "Device(type={}, gpu_id={}".format(self.device_type, self.gpu_id) + ")"
             if self.device_type == _enums.DeviceType.GPU
-            else ", dla_core={}, allow_gpu_fallback={}".format(self.dla_core, self.allow_gpu_fallback)
+            else ", dla_core={}, allow_gpu_fallback={}".format(
+                self.dla_core, self.allow_gpu_fallback
+            )
         )
 
     def _to_internal(self) -> _C.Device:
diff --git a/py/torch_tensorrt/_Input.py b/py/torch_tensorrt/_Input.py
index edabd88f2e..c66d0ec788 100644
--- a/py/torch_tensorrt/_Input.py
+++ b/py/torch_tensorrt/_Input.py
@@ -30,7 +30,9 @@ class _ShapeMode(Enum):
 
     shape_mode = None  #: (torch_tensorrt.Input._ShapeMode): Is input statically or dynamically shaped
     shape = None  #: (Tuple or Dict): Either a single Tuple or a dict of tuples defining the input shape. Static shaped inputs will have a single tuple. Dynamic inputs will have a dict of the form ``{ "min_shape": Tuple, "opt_shape": Tuple, "max_shape": Tuple }``
-    dtype = _enums.dtype.unknown  #: The expected data type of the input tensor (default: torch_tensorrt.dtype.float32)
+    dtype = (
+        _enums.dtype.unknown
+    )  #: The expected data type of the input tensor (default: torch_tensorrt.dtype.float32)
     _explicit_set_dtype = False
     format = (
         _enums.TensorFormat.contiguous
@@ -74,11 +76,15 @@ def __init__(self, *args, **kwargs):
             self.shape_mode = Input._ShapeMode.STATIC
 
         elif len(args) == 0:
-            if not ("shape" in kwargs) and not (all(k in kwargs for k in ["min_shape", "opt_shape", "max_shape"])):
+            if not ("shape" in kwargs) and not (
+                all(k in kwargs for k in ["min_shape", "opt_shape", "max_shape"])
+            ):
                 raise ValueError(
                     "Missing required arguments for class Input\nEither shape or all three of min_shape, opt_shape, max_shape must be defined"
                 )
-            elif ("shape" in kwargs) and all(k in kwargs for k in ["min_shape", "opt_shape", "max_shape"]):
+            elif ("shape" in kwargs) and all(
+                k in kwargs for k in ["min_shape", "opt_shape", "max_shape"]
+            ):
                 raise ValueError(
                     "Found that both shape, and one or more of min_shape, opt_shape, max_shape were specified\nclass Input expects that only either shape or all three of min_shape, opt_shape, max_shape are defined"
                 )
@@ -134,7 +140,9 @@ def __init__(self, *args, **kwargs):
 
     def __str__(self) -> str:
         if self.shape_mode == Input._ShapeMode.STATIC:
-            return "Input(shape={}, dtype={}, format={})".format(self.shape, str(self.dtype), str(self.format))
+            return "Input(shape={}, dtype={}, format={})".format(
+                self.shape, str(self.dtype), str(self.format)
+            )
         elif self.shape_mode == Input._ShapeMode.DYNAMIC:
             return "Input(min_shape={}, opt_shape={}, max_shape={}, dtype={}, format={})".format(
                 self.shape["min_shape"],
@@ -266,6 +274,8 @@ def _from_tensor(cls, t: torch.Tensor):
                 "Tensor does not have a supported contiguous memory format, supported formats are contiguous or channel_last"
             )
         frmt = (
-            torch.contiguous_format if t.is_contiguous(memory_format=torch.contiguous_format) else torch.channels_last
+            torch.contiguous_format
+            if t.is_contiguous(memory_format=torch.contiguous_format)
+            else torch.channels_last
         )
         return cls(shape=t.shape, dtype=t.dtype, format=frmt)
diff --git a/py/torch_tensorrt/__init__.py b/py/torch_tensorrt/__init__.py
index e4bcd01a77..68fde67e71 100644
--- a/py/torch_tensorrt/__init__.py
+++ b/py/torch_tensorrt/__init__.py
@@ -12,7 +12,9 @@
 )
 
 if sys.version_info < (3,):
-    raise Exception("Python 2 has reached end-of-life and is not supported by Torch-TensorRT")
+    raise Exception(
+        "Python 2 has reached end-of-life and is not supported by Torch-TensorRT"
+    )
 
 
 def _parse_semver(version):
diff --git a/py/torch_tensorrt/_compile.py b/py/torch_tensorrt/_compile.py
index 5102338d32..f6487a4402 100644
--- a/py/torch_tensorrt/_compile.py
+++ b/py/torch_tensorrt/_compile.py
@@ -27,7 +27,10 @@ class _ModuleType(Enum):
 
 
 def _parse_module_type(module: Any) -> _ModuleType:
-    if any(isinstance(module, t) for t in [torch.jit.ScriptModule, torch.jit.ScriptFunction]):
+    if any(
+        isinstance(module, t)
+        for t in [torch.jit.ScriptModule, torch.jit.ScriptFunction]
+    ):
         return _ModuleType.ts
     elif isinstance(module, torch.fx.GraphModule):
         return _ModuleType.fx
@@ -52,10 +55,14 @@ def _get_target_ir(module_type: _ModuleType, ir: str) -> _IRType:
         if ir == "default":
             # Options are listed in order of preference
             if module_is_tsable:
-                logging.log(logging.Level.Info, "ir was set to default, using TorchScript as ir")
+                logging.log(
+                    logging.Level.Info, "ir was set to default, using TorchScript as ir"
+                )
                 return _IRType.ts
             elif module_is_fxable:
-                raise ValueError("Was given a torch.fx.GraphModule, fx is not currently supported by Torch-TensorRT")
+                raise ValueError(
+                    "Was given a torch.fx.GraphModule, fx is not currently supported by Torch-TensorRT"
+                )
                 # logging.log(logging.Level.Info, "ir was set to default, using TorchScript as fx")
                 # return _IRType.fx
             else:
@@ -64,7 +71,13 @@ def _get_target_ir(module_type: _ModuleType, ir: str) -> _IRType:
             raise ValueError("Unknown ir was requested")
 
 
-def compile(module: Any, ir="default", inputs=[], enabled_precisions=set([_enums.dtype.float]), **kwargs):
+def compile(
+    module: Any,
+    ir="default",
+    inputs=[],
+    enabled_precisions=set([_enums.dtype.float]),
+    **kwargs,
+):
     """Compile a PyTorch module for NVIDIA GPUs using TensorRT
 
     Takes a existing PyTorch module and a set of settings to configure the compiler
@@ -110,11 +123,19 @@ def compile(module: Any, ir="default", inputs=[], enabled_precisions=set([_enums
                 "Module was provided as a torch.nn.Module, trying to script the module with torch.jit.script. In the event of a failure please preconvert your module to TorchScript",
             )
             ts_mod = torch.jit.script(module)
-        return torch_tensorrt.ts.compile(ts_mod, inputs=inputs, enabled_precisions=enabled_precisions, **kwargs)
+        return torch_tensorrt.ts.compile(
+            ts_mod, inputs=inputs, enabled_precisions=enabled_precisions, **kwargs
+        )
     elif target_ir == _IRType.fx:
-        if torch.float16 in enabled_precisions or torch_tensorrt.dtype.half in enabled_precisions:
+        if (
+            torch.float16 in enabled_precisions
+            or torch_tensorrt.dtype.half in enabled_precisions
+        ):
             lower_precision = LowerPrecision.FP16
-        elif torch.float32 in enabled_precisions or torch_tensorrt.dtype.float in enabled_precisions:
+        elif (
+            torch.float32 in enabled_precisions
+            or torch_tensorrt.dtype.float in enabled_precisions
+        ):
             lower_precision = LowerPrecision.FP32
         else:
             raise ValueError(f"Precision {enabled_precisions} not supported on FX")
@@ -132,7 +153,12 @@ def compile(module: Any, ir="default", inputs=[], enabled_precisions=set([_enums
 
 
 def convert_method_to_trt_engine(
-    module: Any, method_name: str, ir="default", inputs=[], enabled_precisions=set([_enums.dtype.float]), **kwargs
+    module: Any,
+    method_name: str,
+    ir="default",
+    inputs=[],
+    enabled_precisions=set([_enums.dtype.float]),
+    **kwargs,
 ):
     """Convert a TorchScript module method to a serialized TensorRT engine
 
@@ -175,7 +201,11 @@ def convert_method_to_trt_engine(
             )
             ts_mod = torch.jit.script(module)
         return torch_tensorrt.ts.convert_method_to_trt_engine(
-            ts_mod, method_name, inputs=inputs, enabled_precisions=enabled_precisions, **kwargs
+            ts_mod,
+            method_name,
+            inputs=inputs,
+            enabled_precisions=enabled_precisions,
+            **kwargs,
         )
     elif target_ir == _IRType.fx:
         raise RuntimeError("fx is currently not supported")
diff --git a/py/torch_tensorrt/fx/converters/activation.py b/py/torch_tensorrt/fx/converters/activation.py
index d11ac503bc..a7ab25152c 100644
--- a/py/torch_tensorrt/fx/converters/activation.py
+++ b/py/torch_tensorrt/fx/converters/activation.py
@@ -9,7 +9,9 @@
 from .converter_utils import mark_as_int8_layer
 
 
-def common_activation(network, mod, input_val, activation_type, activation_dyn_range_fn, layer_name):
+def common_activation(
+    network, mod, input_val, activation_type, activation_dyn_range_fn, layer_name
+):
     layer = network.add_activation(input=input_val, type=activation_type)
     layer.name = layer_name
 
@@ -28,7 +30,10 @@ def relu(network, submod, args, kwargs, layer_name):
     input_val = kwargs["input"]
 
     if not isinstance(input_val, trt.tensorrt.ITensor):
-        raise RuntimeError(f"ReLU received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"ReLU received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     def activation_dyn_range_fn(dyn_range):
         return max(0, dyn_range[0]), max(0, dyn_range[1])
@@ -50,7 +55,10 @@ def sigmoid(network, submod, args, kwargs, layer_name):
     input_val = kwargs["input"]
 
     if not isinstance(input_val, trt.tensorrt.ITensor):
-        raise RuntimeError(f"Sigmoid received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"Sigmoid received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     def activation_dyn_range_fn(dyn_range):
         def sigmoid_fn(x):
diff --git a/py/torch_tensorrt/fx/converters/adaptive_avgpool.py b/py/torch_tensorrt/fx/converters/adaptive_avgpool.py
index 50f74c9c2d..8de9987c77 100644
--- a/py/torch_tensorrt/fx/converters/adaptive_avgpool.py
+++ b/py/torch_tensorrt/fx/converters/adaptive_avgpool.py
@@ -14,7 +14,10 @@ def adaptive_avgpool2d(network, submod, args, kwargs, name):
     input_val = kwargs["input"]
 
     if not isinstance(input_val, trt.tensorrt.ITensor):
-        raise RuntimeError(f"AdaptiveAvgPool2d received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"AdaptiveAvgPool2d received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     output_size = extend_mod_attr_to_tuple(submod, "output_size", 2)
     stride = (
@@ -22,7 +25,9 @@ def adaptive_avgpool2d(network, submod, args, kwargs, name):
         input_val.shape[-1] // output_size[-1],
     )
     kernel_size = stride
-    layer = network.add_pooling(input=input_val, type=trt.PoolingType.AVERAGE, window_size=kernel_size)
+    layer = network.add_pooling(
+        input=input_val, type=trt.PoolingType.AVERAGE, window_size=kernel_size
+    )
     layer.stride = stride
     layer.name = name
 
diff --git a/py/torch_tensorrt/fx/converters/add.py b/py/torch_tensorrt/fx/converters/add.py
index 0c01b160f2..c60b0313a3 100644
--- a/py/torch_tensorrt/fx/converters/add.py
+++ b/py/torch_tensorrt/fx/converters/add.py
@@ -22,7 +22,9 @@ def add(network, target, args, kwargs, layer_name):
         assert kwargs["alpha"] == 1
 
     if not all(isinstance(arg, trt.tensorrt.ITensor) for arg in [lhs_val, rhs_val]):
-        raise RuntimeError("add() received an input that is not part of the TensorRT region!")
+        raise RuntimeError(
+            "add() received an input that is not part of the TensorRT region!"
+        )
 
     layer = network.add_elementwise(lhs_val, rhs_val, trt.ElementWiseOperation.SUM)
     layer.name = layer_name
@@ -35,7 +37,9 @@ def quantized_add(network, target, args, kwargs, layer_name):
     lhs_val, rhs_val = kwargs["qa"], kwargs["qb"]
 
     if not all(isinstance(i, trt.tensorrt.ITensor) for i in [lhs_val, rhs_val]):
-        raise RuntimeError("Quantized add received an input that is not part of the TensorRT region!")
+        raise RuntimeError(
+            "Quantized add received an input that is not part of the TensorRT region!"
+        )
 
     layer = network.add_elementwise(lhs_val, rhs_val, trt.ElementWiseOperation.SUM)
     layer.name = layer_name
@@ -50,14 +54,18 @@ def quantized_add_relu(network, submod, args, kwargs, layer_name):
     lhs_val, rhs_val = kwargs["qa"], kwargs["qb"]
 
     if not all(isinstance(i, trt.tensorrt.ITensor) for i in [lhs_val, rhs_val]):
-        raise RuntimeError("Quantized add_relu received an input that is not part of the TensorRT region!")
+        raise RuntimeError(
+            "Quantized add_relu received an input that is not part of the TensorRT region!"
+        )
 
     layer = network.add_elementwise(lhs_val, rhs_val, trt.ElementWiseOperation.SUM)
     layer.name = f"{layer_name}_add"
     dyn_range = get_dyn_range(kwargs["scale"], kwargs["zero_point"], torch.quint8)
     mark_as_int8_layer(layer, dyn_range)
 
-    layer = network.add_activation(input=layer.get_output(0), type=trt.ActivationType.RELU)
+    layer = network.add_activation(
+        input=layer.get_output(0), type=trt.ActivationType.RELU
+    )
     layer.name = f"{layer_name}_relu"
     mark_as_int8_layer(layer, dyn_range)
 
diff --git a/py/torch_tensorrt/fx/converters/batchnorm.py b/py/torch_tensorrt/fx/converters/batchnorm.py
index ba27aaaebb..130991df54 100644
--- a/py/torch_tensorrt/fx/converters/batchnorm.py
+++ b/py/torch_tensorrt/fx/converters/batchnorm.py
@@ -18,7 +18,9 @@ def common_batchnorm(network, mod, input_val, layer_name, is_quantized):
     layer.name = layer_name
 
     if is_quantized:
-        mark_as_int8_layer(layer, get_dyn_range(mod.scale, mod.zero_point, torch.quint8))
+        mark_as_int8_layer(
+            layer, get_dyn_range(mod.scale, mod.zero_point, torch.quint8)
+        )
 
     return layer.get_output(0)
 
@@ -30,7 +32,10 @@ def batchnorm2d(network, submod, args, kwargs, layer_name):
     input_val = kwargs["input"]
 
     if not isinstance(input_val, trt.tensorrt.ITensor):
-        raise RuntimeError(f"BatchNorm2d received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"BatchNorm2d received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     return common_batchnorm(network, submod, input_val, layer_name, is_quantized=False)
 
@@ -41,7 +46,8 @@ def quantized_batchnorm2d(network, submod, args, kwargs, layer_name):
 
     if not isinstance(input_val, trt.tensorrt.ITensor):
         raise RuntimeError(
-            f"Quantized BatchNorm2d received input {input_val} that is not part " "of the TensorRT region!"
+            f"Quantized BatchNorm2d received input {input_val} that is not part "
+            "of the TensorRT region!"
         )
 
     return common_batchnorm(network, submod, input_val, layer_name, is_quantized=True)
diff --git a/py/torch_tensorrt/fx/converters/converter_utils.py b/py/torch_tensorrt/fx/converters/converter_utils.py
index 8600697055..50c6f6fb03 100644
--- a/py/torch_tensorrt/fx/converters/converter_utils.py
+++ b/py/torch_tensorrt/fx/converters/converter_utils.py
@@ -47,9 +47,13 @@ def get_trt_plugin(
     #     print(plugin_creator.name)
 
     plugin_registry = trt.get_plugin_registry()
-    plugin_creator = plugin_registry.get_plugin_creator(plugin_name, version, plugin_namespace)
+    plugin_creator = plugin_registry.get_plugin_creator(
+        plugin_name, version, plugin_namespace
+    )
     assert plugin_creator, f"Unabled to find plugin creator with name {plugin_name}"
-    plugin = plugin_creator.create_plugin(name=plugin_name, field_collection=field_collection)
+    plugin = plugin_creator.create_plugin(
+        name=plugin_name, field_collection=field_collection
+    )
 
     assert plugin is not None, f"Plugin: {plugin_name} could not be fetched"
     return plugin
@@ -129,7 +133,9 @@ def to_numpy(tensor: Optional[torch.Tensor]) -> Optional[np.ndarray]:
     if tensor is None:
         return tensor
 
-    assert isinstance(tensor, torch.Tensor), f"to_numpy can only be called on None or a torch.Tensor, got: {tensor}"
+    assert isinstance(
+        tensor, torch.Tensor
+    ), f"to_numpy can only be called on None or a torch.Tensor, got: {tensor}"
     if tensor.is_quantized:
         tensor = tensor.dequantize()
 
@@ -218,7 +224,9 @@ def create_constant(
     return constant.get_output(0)
 
 
-def get_trt_tensor(network: TRTNetwork, input_val: Any, name: str, dtype: Optional[torch.dtype] = None) -> TRTTensor:
+def get_trt_tensor(
+    network: TRTNetwork, input_val: Any, name: str, dtype: Optional[torch.dtype] = None
+) -> TRTTensor:
     """
     Given a value of random type, we try to convert it to a TensorRT ITensor.
     An runtime error is raised if we're not able to do that.
@@ -249,7 +257,10 @@ def get_trt_tensor(network: TRTNetwork, input_val: Any, name: str, dtype: Option
     if isinstance(input_val, (torch.Tensor, int, float)):
         return create_constant(network, input_val, name, dtype)
     elif not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"Received input {input_val} of name {name} that " "is not part of the TensorRT region!")
+        raise RuntimeError(
+            f"Received input {input_val} of name {name} that "
+            "is not part of the TensorRT region!"
+        )
     else:
         return input_val
 
@@ -282,7 +293,9 @@ def prepend_ones(
     if has_dynamic_shape(tensor.shape):
         tensor_shape_layer = network.add_shape(tensor)
         tensor_shape_layer.name = f"{name}_broadcast_orig_shape"
-        prepend_shape_layer = network.add_constant((num_prepend_ones,), np.ones((num_prepend_ones,), dtype=np.int32))
+        prepend_shape_layer = network.add_constant(
+            (num_prepend_ones,), np.ones((num_prepend_ones,), dtype=np.int32)
+        )
         prepend_shape_layer.name = f"{name}_broadcast_prepend_ones"
         reshape_dim_layer = network.add_concatenation(
             [prepend_shape_layer.get_output(0), tensor_shape_layer.get_output(0)]
@@ -369,12 +382,16 @@ def get_shape_with_dynamic_shape(
     # Ger real shape info for input_val
     input_shape = network.add_shape(input_val).get_output(0)
 
-    scale_layer = network.add_constant(input_shape.shape, np.ascontiguousarray(shape, dtype=np.int32))
+    scale_layer = network.add_constant(
+        input_shape.shape, np.ascontiguousarray(shape, dtype=np.int32)
+    )
     set_layer_name(scale_layer, target, f"{name}_scale")
     scale_res = scale_layer.get_output(0)
 
     length = input_shape.shape[0]
-    zero_layer = network.add_constant(input_shape.shape, to_numpy(torch.zeros((length), dtype=torch.int32)))
+    zero_layer = network.add_constant(
+        input_shape.shape, to_numpy(torch.zeros((length), dtype=torch.int32))
+    )
     set_layer_name(zero_layer, target, f"{name}_zeros")
 
     condition_val = add_binary_elementwise_layer(
@@ -480,11 +497,17 @@ def add_binary_elementwise_layer(
     # Check the limitation in the doc string.
     if network.has_implicit_batch_dimension:
         if is_lhs_trt_tensor and not is_rhs_trt_tensor:
-            assert len(lhs_val.shape) >= len(rhs_val.shape), f"{lhs_val.shape} >= {rhs_val.shape}"
+            assert len(lhs_val.shape) >= len(
+                rhs_val.shape
+            ), f"{lhs_val.shape} >= {rhs_val.shape}"
         elif not is_lhs_trt_tensor and is_rhs_trt_tensor:
-            assert len(rhs_val.shape) >= len(lhs_val.shape), f"{rhs_val.shape} >= {lhs_val.shape}"
+            assert len(rhs_val.shape) >= len(
+                lhs_val.shape
+            ), f"{rhs_val.shape} >= {lhs_val.shape}"
 
-    lhs_val, rhs_val = broadcast(network, lhs_val, rhs_val, f"{name}_lhs", f"{name}_rhs")
+    lhs_val, rhs_val = broadcast(
+        network, lhs_val, rhs_val, f"{name}_lhs", f"{name}_rhs"
+    )
     layer = network.add_elementwise(lhs_val, rhs_val, op_type)
     set_layer_name(layer, target, name)
     output = layer.get_output(0)
@@ -524,7 +547,10 @@ def add_unary_layer(
         The output of TensorRT Unary layer.
     """
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"{operation_type} received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"{operation_type} received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
     layer = network.add_unary(input_val, operation_type)
     set_layer_name(layer, target, name)
     output = layer.get_output(0)
@@ -561,7 +587,10 @@ def add_activation_layer(
         The output of TensorRT Activation layer.
     """
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"{operation_type} received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"{operation_type} received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
     layer = network.add_activation(input_val, operation_type)
     if alpha is not None:
         layer.alpha = alpha
@@ -596,7 +625,10 @@ def add_reduce_layer(
     """
     input_val = kwargs["input"]
     if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(f"{name} received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"{name} received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     # If dim is specified, then the op is reducing over certain dimensions.
     # Otherwise, it's reducing over all elements, which is only supported in
@@ -672,7 +704,9 @@ def get_inputs_from_args_and_kwargs(args, kwargs, input_names):
     return inputs
 
 
-def sign(network: TRTNetwork, input_val: TRTTensor, target: Target, name: str) -> TRTTensor:
+def sign(
+    network: TRTNetwork, input_val: TRTTensor, target: Target, name: str
+) -> TRTTensor:
     """
     Sign is calculated as below:
        x = input
@@ -690,8 +724,12 @@ def sign(network: TRTNetwork, input_val: TRTTensor, target: Target, name: str) -
     Returns:
         A TensorRT tensor represent the result of sign operator.
     """
-    input_exp_output = add_unary_layer(network, input_val, trt.UnaryOperation.EXP, target, f"{name}_prod_exp")
-    input_abs_output = add_unary_layer(network, input_val, trt.UnaryOperation.ABS, target, f"{name}_prod_abs")
+    input_exp_output = add_unary_layer(
+        network, input_val, trt.UnaryOperation.EXP, target, f"{name}_prod_exp"
+    )
+    input_abs_output = add_unary_layer(
+        network, input_val, trt.UnaryOperation.ABS, target, f"{name}_prod_abs"
+    )
     input_abs_exp_output = add_unary_layer(
         network,
         input_abs_output,
@@ -725,7 +763,9 @@ def sign(network: TRTNetwork, input_val: TRTTensor, target: Target, name: str) -
     )
 
 
-def trunc_div(input: TRTTensor, other: TRTTensor, network: TRTNetwork, target: Target, name: str) -> TRTTensor:
+def trunc_div(
+    input: TRTTensor, other: TRTTensor, network: TRTNetwork, target: Target, name: str
+) -> TRTTensor:
     """
     Perform trunc divide on Tensor, result of divide will be round toward zero.
     This means for positive number, it will be floor round; for negative number,
@@ -750,10 +790,16 @@ def trunc_div(input: TRTTensor, other: TRTTensor, network: TRTNetwork, target: T
     if not isinstance(input, trt.tensorrt.ITensor):
         input = get_trt_tensor(network, input, f"{name}_input")
     if not isinstance(other, trt.tensorrt.ITensor):
-        other = get_trt_tensor(network, other, f"{name}_other", dtype=torch_dtype_from_trt(input.dtype))
+        other = get_trt_tensor(
+            network, other, f"{name}_other", dtype=torch_dtype_from_trt(input.dtype)
+        )
 
-    abs_input_output = add_unary_layer(network, input, trt.UnaryOperation.ABS, target, f"{name}_abs_input")
-    abs_other_output = add_unary_layer(network, other, trt.UnaryOperation.ABS, target, f"{name}_abs_other")
+    abs_input_output = add_unary_layer(
+        network, input, trt.UnaryOperation.ABS, target, f"{name}_abs_input"
+    )
+    abs_other_output = add_unary_layer(
+        network, other, trt.UnaryOperation.ABS, target, f"{name}_abs_other"
+    )
     abs_floor_output = add_binary_elementwise_layer(
         network,
         abs_input_output,
@@ -791,7 +837,9 @@ def get_python_op_from_trt_elementwise_op(
         raise RuntimeError(f"{trt_op} is not supported yet!")
 
 
-def dtype_uniform(network: TRTNetwork, target: Target, name: str, input: TRTTensor, other: TRTTensor):
+def dtype_uniform(
+    network: TRTNetwork, target: Target, name: str, input: TRTTensor, other: TRTTensor
+):
     table = {trt.bool: 0, trt.int32: 1, trt.float16: 2, trt.float32: 3}
     input_dtype = input.dtype
     other_dtype = other.dtype
diff --git a/py/torch_tensorrt/fx/converters/convolution.py b/py/torch_tensorrt/fx/converters/convolution.py
index 9107e2b64d..5228616219 100644
--- a/py/torch_tensorrt/fx/converters/convolution.py
+++ b/py/torch_tensorrt/fx/converters/convolution.py
@@ -54,7 +54,9 @@ def common_conv(network, mod, dimension, input_val, layer_name, is_quantized):
 
     if is_quantized:
         # Assume the dtype of activation is torch.quint8
-        mark_as_int8_layer(layer, get_dyn_range(mod.scale, mod.zero_point, torch.quint8))
+        mark_as_int8_layer(
+            layer, get_dyn_range(mod.scale, mod.zero_point, torch.quint8)
+        )
 
     result = layer.get_output(0)
     if dimension == 1:
@@ -93,7 +95,10 @@ def conv1d(network, submod, args, kwargs, layer_name):
     input_val = kwargs["input"]
 
     if not isinstance(input_val, trt.tensorrt.ITensor):
-        raise RuntimeError(f"Conv1d received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"Conv1d received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     if layer_name is None:
         raise RuntimeError("layer name is none")
@@ -114,7 +119,10 @@ def conv2d(network, submod, args, kwargs, layer_name):
     input_val = kwargs["input"]
 
     if not isinstance(input_val, trt.tensorrt.ITensor):
-        raise RuntimeError(f"Conv2d received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"Conv2d received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     return common_conv(
         network,
@@ -133,7 +141,10 @@ def conv3d(network, submod, args, kwargs, layer_name):
     input_val = kwargs["input"]
 
     if not isinstance(input_val, trt.tensorrt.ITensor):
-        raise RuntimeError(f"Conv3d received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"Conv3d received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     return common_conv(
         network,
@@ -150,7 +161,10 @@ def quantized_conv2d(network, submod, args, kwargs, layer_name):
     input_val = args[0]
 
     if not isinstance(input_val, trt.tensorrt.ITensor):
-        raise RuntimeError(f"Quantized Conv2d received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"Quantized Conv2d received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     return common_conv(
         network,
@@ -168,7 +182,8 @@ def quantized_conv_relu2d(network, submod, args, kwargs, layer_name):
 
     if not isinstance(input_val, trt.tensorrt.ITensor):
         raise RuntimeError(
-            f"Quantized ConvReLU2d received input {input_val} that is not part " "of the TensorRT region!"
+            f"Quantized ConvReLU2d received input {input_val} that is not part "
+            "of the TensorRT region!"
         )
 
     return common_conv_relu(
diff --git a/py/torch_tensorrt/fx/converters/linear.py b/py/torch_tensorrt/fx/converters/linear.py
index 6544b964b0..e7cca6f76a 100644
--- a/py/torch_tensorrt/fx/converters/linear.py
+++ b/py/torch_tensorrt/fx/converters/linear.py
@@ -60,7 +60,10 @@ def linear(network, submod, args, kwargs, layer_name):
     input_val = kwargs["input"]
 
     if not isinstance(input_val, trt.tensorrt.ITensor):
-        raise RuntimeError(f"Linear received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"Linear received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     return common_linear(network, submod, input_val, layer_name, is_quantized=False)
 
@@ -70,6 +73,9 @@ def quantized_linear(network, submod, args, kwargs, layer_name):
     input_val = args[0]
 
     if not isinstance(input_val, trt.tensorrt.ITensor):
-        raise RuntimeError(f"Quantized Linear received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"Quantized Linear received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     return common_linear(network, submod, input_val, layer_name, is_quantized=True)
diff --git a/py/torch_tensorrt/fx/converters/maxpool.py b/py/torch_tensorrt/fx/converters/maxpool.py
index bdf08ffebc..6c64a3b108 100644
--- a/py/torch_tensorrt/fx/converters/maxpool.py
+++ b/py/torch_tensorrt/fx/converters/maxpool.py
@@ -12,7 +12,9 @@ def common_maxpool(network, mod, dimension, input_val, layer_name):
     stride = extend_mod_attr_to_tuple(mod, "stride", dimension)
     padding = extend_mod_attr_to_tuple(mod, "padding", dimension)
 
-    layer = network.add_pooling(input=input_val, type=trt.PoolingType.MAX, window_size=kernel_size)
+    layer = network.add_pooling(
+        input=input_val, type=trt.PoolingType.MAX, window_size=kernel_size
+    )
 
     layer.stride = stride
     layer.padding = padding
@@ -34,6 +36,11 @@ def maxpool2d(network, submod, args, kwargs, layer_name):
     input_val = kwargs["input"]
 
     if not isinstance(input_val, trt.tensorrt.ITensor):
-        raise RuntimeError(f"MaxPool2d received input {input_val} that is not part " "of the TensorRT region!")
-
-    return common_maxpool(network, submod, dimension=2, input_val=input_val, layer_name=layer_name)
+        raise RuntimeError(
+            f"MaxPool2d received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
+
+    return common_maxpool(
+        network, submod, dimension=2, input_val=input_val, layer_name=layer_name
+    )
diff --git a/py/torch_tensorrt/fx/converters/mul.py b/py/torch_tensorrt/fx/converters/mul.py
index d1796bf59a..a1d9858ebd 100644
--- a/py/torch_tensorrt/fx/converters/mul.py
+++ b/py/torch_tensorrt/fx/converters/mul.py
@@ -20,7 +20,9 @@ def mul(network, target, args, kwargs, layer_name):
         lhs_val, rhs_val = kwargs["input"], kwargs["other"]
 
     if not all(isinstance(arg, trt.tensorrt.ITensor) for arg in [lhs_val, rhs_val]):
-        raise RuntimeError("mul() received an input that is not part of the TensorRT region!")
+        raise RuntimeError(
+            "mul() received an input that is not part of the TensorRT region!"
+        )
 
     layer = network.add_elementwise(lhs_val, rhs_val, trt.ElementWiseOperation.PROD)
     layer.name = layer_name
@@ -34,7 +36,9 @@ def quantized_mul(network, target, args, kwargs, layer_name):
     lhs_val, rhs_val = kwargs["qa"], kwargs["qb"]
 
     if not all(isinstance(i, trt.tensorrt.ITensor) for i in [lhs_val, rhs_val]):
-        raise RuntimeError("Quantized mul received an input that is not part of the TensorRT region!")
+        raise RuntimeError(
+            "Quantized mul received an input that is not part of the TensorRT region!"
+        )
 
     layer = network.add_elementwise(lhs_val, rhs_val, trt.ElementWiseOperation.PROD)
     layer.name = layer_name
diff --git a/py/torch_tensorrt/fx/converters/quantization.py b/py/torch_tensorrt/fx/converters/quantization.py
index f460241f25..6b75f93278 100644
--- a/py/torch_tensorrt/fx/converters/quantization.py
+++ b/py/torch_tensorrt/fx/converters/quantization.py
@@ -16,7 +16,10 @@ def dequantize(network, submod, args, kwargs, layer_name):
     input_val = args[0]
 
     if not isinstance(input_val, trt.tensorrt.ITensor):
-        raise RuntimeError(f"Dequantize received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"Dequantize received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     return input_val
 
@@ -26,7 +29,9 @@ def dequantize(network, submod, args, kwargs, layer_name):
 def quantize(network, submod, args, kwargs, layer_name):
     # If submod is not nn.Module then it's quantize_per_tensor
     if not isinstance(submod, torch.nn.Module):
-        input_val, scale, zero_point, dtype = get_inputs_from_args_and_kwargs(args, kwargs, quantize_per_tensor_inputs)
+        input_val, scale, zero_point, dtype = get_inputs_from_args_and_kwargs(
+            args, kwargs, quantize_per_tensor_inputs
+        )
     else:
         input_val = args[0]
         scale = submod.scale
@@ -34,10 +39,15 @@ def quantize(network, submod, args, kwargs, layer_name):
         dtype = submod.dtype
 
     if not isinstance(input_val, trt.tensorrt.ITensor):
-        raise RuntimeError(f"Quantize received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"Quantize received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     if dtype != torch.quint8:
-        raise RuntimeError(f"Only support torch.quint8 quantized type for activation, get {dtype}.")
+        raise RuntimeError(
+            f"Only support torch.quint8 quantized type for activation, get {dtype}."
+        )
 
     input_val.dynamic_range = get_dyn_range(scale, zero_point, dtype)
     return input_val
@@ -48,6 +58,9 @@ def identity(network, submod, args, kwargs, layer_name):
     input_val = kwargs["input"]
 
     if not isinstance(input_val, trt.tensorrt.ITensor):
-        raise RuntimeError(f"Identity received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"Identity received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     return input_val
diff --git a/py/torch_tensorrt/fx/converters/transformation.py b/py/torch_tensorrt/fx/converters/transformation.py
index 4e049656b9..62cfef8453 100644
--- a/py/torch_tensorrt/fx/converters/transformation.py
+++ b/py/torch_tensorrt/fx/converters/transformation.py
@@ -14,13 +14,18 @@ def torch_flatten(network, target, args, kwargs, name):
     input_val = kwargs["input"]
 
     if not isinstance(input_val, trt.tensorrt.ITensor):
-        raise RuntimeError(f"Flatten received input {input_val} that is not part " "of the TensorRT region!")
+        raise RuntimeError(
+            f"Flatten received input {input_val} that is not part "
+            "of the TensorRT region!"
+        )
 
     # For trt shape we don't have batch dim
     start_dim = kwargs["start_dim"] - 1
     end_dim = len(input_val.shape) if kwargs["end_dim"] == -1 else kwargs["end_dim"] - 1
 
-    assert start_dim >= 0, "Expect non negtive start_dim, this probably due to flatten batch dim."
+    assert (
+        start_dim >= 0
+    ), "Expect non negtive start_dim, this probably due to flatten batch dim."
 
     new_shape = []
     flatten_dim = 1
diff --git a/py/torch_tensorrt/fx/diagnostics.py b/py/torch_tensorrt/fx/diagnostics.py
index 7925d98a3f..0ba2a30652 100644
--- a/py/torch_tensorrt/fx/diagnostics.py
+++ b/py/torch_tensorrt/fx/diagnostics.py
@@ -15,10 +15,14 @@
 WriteObj = t.Union[TWrite, t.Callable[[], TWrite]]
 
 _CURRENT_WRITER: ContextVar["DiagnosticsWriter"] = ContextVar("_CURRENT_WRITER")
-_CURRENT_COLLECTOR: ContextVar["DiagnosticsCollector"] = ContextVar("_CURRENT_COLLECTOR")
+_CURRENT_COLLECTOR: ContextVar["DiagnosticsCollector"] = ContextVar(
+    "_CURRENT_COLLECTOR"
+)
 # Allows a collector to indicate subsequent collections should be suppressed to
 # avoid duplicate collections.
-_SUBSEQUENT_COLLECT_SUPPRESSED_BY: ContextVar[object] = ContextVar("_SUBSEQUENT_COLLECT_SUPPRESSED_BY")
+_SUBSEQUENT_COLLECT_SUPPRESSED_BY: ContextVar[object] = ContextVar(
+    "_SUBSEQUENT_COLLECT_SUPPRESSED_BY"
+)
 # Indicates current execution context is within a context manager by
 # `collect_when`. Only when it's set do we actually write diagnostics.
 _IS_IN_COLLECT_CONTEXT: ContextVar[bool] = ContextVar("_IS_IN_COLLECT_CONTEXT")
@@ -33,7 +37,9 @@ class CollectionConditionContext:
 CollectionCondition = t.Callable[[CollectionConditionContext], bool]
 
 
-def collect_when(condition: "CollectionCondition", supress_subsequent_collect: bool = True):
+def collect_when(
+    condition: "CollectionCondition", supress_subsequent_collect: bool = True
+):
     """See `DiagnosticsCollector.collect_when`"""
     return get_current_collector().collect_when(condition, supress_subsequent_collect)
 
@@ -151,7 +157,9 @@ def when_fail(cls) -> "CollectionCondition":
         return lambda ctx: ctx.exception is not None
 
     @classmethod
-    def when_called_by_function(cls, func_name: str, match_prefix: bool = False) -> "CollectionCondition":
+    def when_called_by_function(
+        cls, func_name: str, match_prefix: bool = False
+    ) -> "CollectionCondition":
         def _when_called_by_function(ctx: CollectionConditionContext) -> bool:
             frames = inspect.stack()
             for frame in frames:
@@ -167,12 +175,16 @@ def _when_called_by_function(ctx: CollectionConditionContext) -> bool:
 
     @classmethod
     def when_not_in_tests(cls) -> CollectionCondition:
-        return CollectionConditions.not_(CollectionConditions.when_called_by_function("test_", match_prefix=True))
+        return CollectionConditions.not_(
+            CollectionConditions.when_called_by_function("test_", match_prefix=True)
+        )
 
 
 class DiagnosticsCollector:
     @contextlib.contextmanager
-    def collect_when(self, condition: "CollectionCondition", supress_subsequent_collect: bool = True):
+    def collect_when(
+        self, condition: "CollectionCondition", supress_subsequent_collect: bool = True
+    ):
         """
         Context manager to collect diagnostics when the enclosed code completes
         and *any* of the given condition is met.
@@ -233,7 +245,9 @@ def collect(self) -> str:
         return ""
 
     @classmethod
-    def _test_condition(cls, cond: CollectionCondition, ctx: CollectionConditionContext) -> bool:
+    def _test_condition(
+        cls, cond: CollectionCondition, ctx: CollectionConditionContext
+    ) -> bool:
         try:
             return cond(ctx)
         except Exception as e:
@@ -262,7 +276,10 @@ def _res_or_err(data: WriteObj) -> t.Tuple[TWrite, str]:
     if isinstance(data, (str, bytes)):
         return data, ""
     if not callable(data):
-        raise TypeError(f"data must be a callable that returns actual data to" f"write, but got {type(data)}")
+        raise TypeError(
+            f"data must be a callable that returns actual data to"
+            f"write, but got {type(data)}"
+        )
     try:
         return data(), ""
     except Exception as e:
diff --git a/py/torch_tensorrt/fx/fx2trt.py b/py/torch_tensorrt/fx/fx2trt.py
index 7fcb473805..0c6e64c78a 100644
--- a/py/torch_tensorrt/fx/fx2trt.py
+++ b/py/torch_tensorrt/fx/fx2trt.py
@@ -19,9 +19,9 @@
 
 _LOGGER: logging.Logger = logging.getLogger(__name__)
 
-TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[Callable[[torch.fx.GraphModule], None]] = Observer(
-    "TRT_INTERPRETER_CALL_PRE_OBSERVER"
-)
+TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[
+    Callable[[torch.fx.GraphModule], None]
+] = Observer("TRT_INTERPRETER_CALL_PRE_OBSERVER")
 
 
 class TRTInterpreterResult(NamedTuple):
@@ -47,18 +47,23 @@ def __init__(
 
         flag = 0
         if explicit_batch_dimension:
-            EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+            EXPLICIT_BATCH = 1 << (int)(
+                trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH
+            )
             flag |= EXPLICIT_BATCH
 
         if explicit_precision:
-            EXPLICIT_PRECISION = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION)
+            EXPLICIT_PRECISION = 1 << (int)(
+                trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION
+            )
             flag |= EXPLICIT_PRECISION
         self.network = self.builder.create_network(flag)
 
         missing_ops = self.validate_conversion()
         if missing_ops:
             warnings.warn(
-                "Interpretation will fail due to missing operations \n" + "\n".join(f"{i}" for i in missing_ops)
+                "Interpretation will fail due to missing operations \n"
+                + "\n".join(f"{i}" for i in missing_ops)
             )
 
         self.optimization_profiles: Optional[List] = None
@@ -68,19 +73,26 @@ def __init__(
         self._cur_node_name: Optional[str] = None
         self._input_names: List[str] = []
         self._output_names: List[str] = []
-        self._itensor_to_tensor_meta: Dict[trt.tensorrt.ITensor, TensorMetadata] = dict()
+        self._itensor_to_tensor_meta: Dict[
+            trt.tensorrt.ITensor, TensorMetadata
+        ] = dict()
 
     def validate_input_specs(self):
         for shape, _, _, shape_ranges, has_batch_dim in self.input_specs:
             if not self.network.has_implicit_batch_dimension:
-                assert has_batch_dim, "It's required to specify batch dimension when it's explicit in TensorRT network."
+                assert (
+                    has_batch_dim
+                ), "It's required to specify batch dimension when it's explicit in TensorRT network."
 
             dynamic_dims = get_dynamic_dims(shape)
             if len(dynamic_dims):
                 assert not self.network.has_implicit_batch_dimension, (
-                    "Can't have dynamic dim when " f"batch dim is implicit, got {shape}."
+                    "Can't have dynamic dim when "
+                    f"batch dim is implicit, got {shape}."
                 )
-                assert len(shape_ranges), "shape_ranges must be provided when shape has dynamic dim."
+                assert len(
+                    shape_ranges
+                ), "shape_ranges must be provided when shape has dynamic dim."
 
                 if self.optimization_profiles:
                     assert len(shape_ranges) == len(self.optimization_profiles), (
@@ -90,11 +102,14 @@ def validate_input_specs(self):
                     )
                 else:
                     self.optimization_profiles = [
-                        self.builder.create_optimization_profile() for _ in range(len(shape_ranges))
+                        self.builder.create_optimization_profile()
+                        for _ in range(len(shape_ranges))
                     ]
 
                 for shape_range in shape_ranges:
-                    assert len(shape_range) == 3, f"Expect three elements in shape_range, got {len(shape_range)}"
+                    assert (
+                        len(shape_range) == 3
+                    ), f"Expect three elements in shape_range, got {len(shape_range)}"
                     assert all(len(s) == len(shape) for s in shape_range), (
                         "Expect elements in shape_range"
                         f" {shape_range} have the same number of dimension as the provided shape {len(shape)}"
@@ -102,7 +117,10 @@ def validate_input_specs(self):
 
                     for i in range(len(shape)):
                         if i in dynamic_dims:
-                            assert all(shape_range[j][i] <= shape_range[j + 1][i] for j in range(2)), (
+                            assert all(
+                                shape_range[j][i] <= shape_range[j + 1][i]
+                                for j in range(2)
+                            ), (
                                 "Expect dynamic dim"
                                 f" {i} to have incremental value for shapes in shape_range {shape_range}."
                             )
@@ -112,7 +130,9 @@ def validate_input_specs(self):
                                 f" for all shapes in shape_range {shape_range}."
                             )
             else:
-                assert len(shape_ranges) == 0, "shape_ranges are provided for input that doesn't have dynamic dim."
+                assert (
+                    len(shape_ranges) == 0
+                ), "shape_ranges are provided for input that doesn't have dynamic dim."
 
     def validate_conversion(self):
         missing_converter = set()
@@ -162,18 +182,28 @@ def run(
 
         # For float outputs, we set their dtype to fp16 only if lower_precision == LowerPrecision.FP16 and
         # force_fp32_output=False.
-        self.output_fp16 = not force_fp32_output and lower_precision == LowerPrecision.FP16
-
-        if lower_precision == LowerPrecision.INT8 and not self.builder.platform_has_fast_int8:
+        self.output_fp16 = (
+            not force_fp32_output and lower_precision == LowerPrecision.FP16
+        )
+
+        if (
+            lower_precision == LowerPrecision.INT8
+            and not self.builder.platform_has_fast_int8
+        ):
             raise RuntimeError("Current platform doesn't support fast native int8!")
 
-        if lower_precision == LowerPrecision.FP16 and not self.builder.platform_has_fast_fp16:
+        if (
+            lower_precision == LowerPrecision.FP16
+            and not self.builder.platform_has_fast_fp16
+        ):
             warnings.warn("Current platform doesn't support fast native fp16!")
 
         self.input_specs_iter = 0
         run_module_start_time = datetime.now()
         super().run()
-        _LOGGER.info(f"Run Module elapsed time: {datetime.now() - run_module_start_time}")
+        _LOGGER.info(
+            f"Run Module elapsed time: {datetime.now() - run_module_start_time}"
+        )
         build_engine_start_time = datetime.now()
 
         self.builder.max_batch_size = max_batch_size
@@ -190,7 +220,9 @@ def run(
 
         if trt.__version__ >= "8.2":
             builder_config.profiling_verbosity = (
-                profiling_verbosity if profiling_verbosity else trt.ProfilingVerbosity.LAYER_NAMES_ONLY
+                profiling_verbosity
+                if profiling_verbosity
+                else trt.ProfilingVerbosity.LAYER_NAMES_ONLY
             )
         if lower_precision == LowerPrecision.FP16:
             builder_config.set_flag(trt.BuilderFlag.FP16)
@@ -218,10 +250,18 @@ def run(
         engine = self.builder.build_engine(self.network, builder_config)
         assert engine
 
-        serialized_cache = bytearray(cache.serialize()) if builder_config.get_timing_cache() else bytearray()
-        _LOGGER.info(f"Build TRT engine elapsed time: {datetime.now() - build_engine_start_time}")
+        serialized_cache = (
+            bytearray(cache.serialize())
+            if builder_config.get_timing_cache()
+            else bytearray()
+        )
+        _LOGGER.info(
+            f"Build TRT engine elapsed time: {datetime.now() - build_engine_start_time}"
+        )
 
-        return TRTInterpreterResult(engine, self._input_names, self._output_names, serialized_cache)
+        return TRTInterpreterResult(
+            engine, self._input_names, self._output_names, serialized_cache
+        )
 
     def run_node(self, n):
         self._cur_node_name = str(n)
@@ -245,7 +285,9 @@ def run_node(self, n):
 
     def placeholder(self, target, args, kwargs):
         self._input_names.append(target)
-        shape, dtype, _, shape_ranges, has_batch_dim = self.input_specs[self.input_specs_iter]
+        shape, dtype, _, shape_ranges, has_batch_dim = self.input_specs[
+            self.input_specs_iter
+        ]
         self.input_specs_iter += 1
 
         if self.network.has_implicit_batch_dimension:
@@ -256,7 +298,9 @@ def placeholder(self, target, args, kwargs):
                 assert self.optimization_profiles
                 self.optimization_profiles[i].set_shape(target, *shape_range)
 
-        return self.network.add_input(name=target, shape=tuple(shape), dtype=torch_dtype_to_trt(dtype))
+        return self.network.add_input(
+            name=target, shape=tuple(shape), dtype=torch_dtype_to_trt(dtype)
+        )
 
     def call_module(self, target, args, kwargs):
         assert isinstance(target, str)
@@ -265,7 +309,9 @@ def call_module(self, target, args, kwargs):
         converter = CONVERTERS.get(submod_type)
 
         if not converter:
-            raise RuntimeError(f"Conversion of module of type {submod_type} not currently supported!")
+            raise RuntimeError(
+                f"Conversion of module of type {submod_type} not currently supported!"
+            )
 
         assert self._cur_node_name is not None
         return converter(self.network, submod, args, kwargs, self._cur_node_name)
@@ -274,7 +320,9 @@ def call_function(self, target, args, kwargs):
         converter = CONVERTERS.get(target)
 
         if not converter:
-            raise RuntimeError(f"Conversion of function {torch.typename(target)} not currently supported!")
+            raise RuntimeError(
+                f"Conversion of function {torch.typename(target)} not currently supported!"
+            )
 
         assert self._cur_node_name is not None
         return converter(self.network, target, args, kwargs, self._cur_node_name)
@@ -284,7 +332,9 @@ def call_method(self, target, args, kwargs):
         converter = CONVERTERS.get(target)
 
         if not converter:
-            raise RuntimeError(f"Conversion of method {target} not currently supported!")
+            raise RuntimeError(
+                f"Conversion of method {target} not currently supported!"
+            )
 
         assert self._cur_node_name is not None
         return converter(self.network, target, args, kwargs, self._cur_node_name)
diff --git a/py/torch_tensorrt/fx/lower_setting.py b/py/torch_tensorrt/fx/lower_setting.py
index 4ea82a87f6..c1d02229e3 100644
--- a/py/torch_tensorrt/fx/lower_setting.py
+++ b/py/torch_tensorrt/fx/lower_setting.py
@@ -78,7 +78,9 @@ class LowerSetting(LowerSettingBasic):
     max_workspace_size: int = 1 << 30
     strict_type_constraints: bool = False
     customized_fuse_pass: PassManager = PassManager.build_from_passlist([])
-    lower_basic_fuse_pass: PassManager = PassManager.build_from_passlist([fuse_permute_matmul, fuse_permute_linear])
+    lower_basic_fuse_pass: PassManager = PassManager.build_from_passlist(
+        [fuse_permute_matmul, fuse_permute_linear]
+    )
     verbose_log: bool = False
     algo_selector = None
     timing_cache_prefix: str = ""
diff --git a/py/torch_tensorrt/fx/observer.py b/py/torch_tensorrt/fx/observer.py
index 8afeb040c0..3742bd2840 100644
--- a/py/torch_tensorrt/fx/observer.py
+++ b/py/torch_tensorrt/fx/observer.py
@@ -13,7 +13,9 @@
 # variable on the observer instance, however, contextvars document advice
 # against creating context variables not at module-global level.
 # https://docs.python.org/3/library/contextvars.html#contextvars.ContextVar
-_CALLBACKS: ContextVar[t.Dict["Observer", t.List[t.Callable]]] = ContextVar("_CALLBACKS", default=None)
+_CALLBACKS: ContextVar[t.Dict["Observer", t.List[t.Callable]]] = ContextVar(
+    "_CALLBACKS", default=None
+)
 
 TObserverCallback = t.TypeVar("TObserverCallback", bound=t.Callable[..., t.Any])
 
@@ -63,7 +65,9 @@ def _add():
 
     def observe(self, *args, **kwargs) -> None:
         for callback in self._get_callbacks():
-            with _log_error("Error calling observer callback", rethrow=RETHROW_CALLBACK_EXCEPTION):
+            with _log_error(
+                "Error calling observer callback", rethrow=RETHROW_CALLBACK_EXCEPTION
+            ):
                 callback(*args, **kwargs)
 
     def _get_callbacks(self) -> t.List[t.Callable]:
@@ -169,7 +173,9 @@ def observed_func(*args, **kwargs):
             return_value = orig_func(*args, **kwargs)
             return return_value
         finally:
-            observers.post.observe(ObserveContext(orig_func, args, kwargs, return_value))
+            observers.post.observe(
+                ObserveContext(orig_func, args, kwargs, return_value)
+            )
 
     observed_func.orig_func = orig_func
     observed_func.observers = observers
diff --git a/py/torch_tensorrt/fx/passes/lower_basic_pass.py b/py/torch_tensorrt/fx/passes/lower_basic_pass.py
index 60447217b6..6dc2e86f22 100644
--- a/py/torch_tensorrt/fx/passes/lower_basic_pass.py
+++ b/py/torch_tensorrt/fx/passes/lower_basic_pass.py
@@ -111,7 +111,9 @@ def forward(self, x):
     return gm
 
 
-def trt_transposed_matmul(lhs: torch.Tensor, rhs: torch.Tensor, lhs_transposed: bool, rhs_transposed: bool):
+def trt_transposed_matmul(
+    lhs: torch.Tensor, rhs: torch.Tensor, lhs_transposed: bool, rhs_transposed: bool
+):
     if lhs_transposed:
         lhs = lhs.transpose(-1, -2)
     if rhs_transposed:
@@ -119,7 +121,9 @@ def trt_transposed_matmul(lhs: torch.Tensor, rhs: torch.Tensor, lhs_transposed:
     return torch.matmul(lhs, rhs)
 
 
-def trt_transposed_linear(input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor):
+def trt_transposed_linear(
+    input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor
+):
     return torch.matmul(input.transpose(-1, -2), weight.t()) + bias
 
 
@@ -147,7 +151,9 @@ def fuse_permute_linear(gm: torch.fx.GraphModule, input: Input):
                 weight = node.kwargs["weight"]
                 bias = node.kwargs["bias"]
                 with gm.graph.inserting_before(node):
-                    fused_node = gm.graph.call_function(trt_transposed_linear, args=(inp, weight, bias))
+                    fused_node = gm.graph.call_function(
+                        trt_transposed_linear, args=(inp, weight, bias)
+                    )
                     node.replace_all_uses_with(fused_node)
 
     gm.graph.eliminate_dead_code()
@@ -215,9 +221,13 @@ def trt_transposed_matmul_converter(network, target, args, kwargs, name):
             rhs = get_trt_tensor(network, rhs, f"{name}_rhs")
         layer = network.add_matrix_multiply(
             lhs,
-            trt.MatrixOperation.TRANSPOSE if lhs_transposed else trt.MatrixOperation.NONE,
+            trt.MatrixOperation.TRANSPOSE
+            if lhs_transposed
+            else trt.MatrixOperation.NONE,
             rhs,
-            trt.MatrixOperation.TRANSPOSE if rhs_transposed else trt.MatrixOperation.NONE,
+            trt.MatrixOperation.TRANSPOSE
+            if rhs_transposed
+            else trt.MatrixOperation.NONE,
         )
         set_layer_name(layer, target, name)
         return layer.get_output(0)
@@ -280,22 +290,30 @@ def slice_list(sli: slice, dim: int, size: int):
             return [slice_all, slice_all, slice_all, sli]
 
 
-def split_across(gm: torch.fx.GraphModule, sli: slice, input_node: torch.fx.Node, dim: int, size: int):
+def split_across(
+    gm: torch.fx.GraphModule, sli: slice, input_node: torch.fx.Node, dim: int, size: int
+):
     start_node = end_node = mid_node = None
     if sli.start is None and sli.stop is None:
         return (start_node, input_node, end_node)
     if sli.start is not None:
         st_sli = slice(0, sli.start, None)
         slice_list_gen = slice_list(st_sli, dim, size)
-        start_node = gm.graph.call_function(operator.getitem, args=(input_node, slice_list_gen))
+        start_node = gm.graph.call_function(
+            operator.getitem, args=(input_node, slice_list_gen)
+        )
     if sli.stop is not None:
         end_sli = slice(sli.stop, None, None)
         slice_list_gen = slice_list(end_sli, dim, size)
-        end_node = gm.graph.call_function(operator.getitem, args=(input_node, slice_list_gen))
+        end_node = gm.graph.call_function(
+            operator.getitem, args=(input_node, slice_list_gen)
+        )
     if dim != size - 1:
         mid_sli = slice(sli.start, sli.stop, None)
         slice_list_gen = slice_list(mid_sli, dim, size)
-        mid_node = gm.graph.call_function(operator.getitem, args=(input_node, slice_list_gen))
+        mid_node = gm.graph.call_function(
+            operator.getitem, args=(input_node, slice_list_gen)
+        )
     return (start_node, mid_node, end_node)
 
 
@@ -352,7 +370,9 @@ def transform_setitem(gm: torch.fx.GraphModule, input: Input):
 
                 if inp_flag:
                     with gm.graph.inserting_before(inp):
-                        new_node = gm.graph.call_function(operator.getitem, args=(inp.args[0], new_args))
+                        new_node = gm.graph.call_function(
+                            operator.getitem, args=(inp.args[0], new_args)
+                        )
                         inp.replace_all_uses_with(new_node)
                     inp = new_node
 
@@ -373,29 +393,61 @@ def transform_setitem(gm: torch.fx.GraphModule, input: Input):
             dimension = len(sli)
             with gm.graph.inserting_before(node):
                 if dimension == 1:
-                    start_node_0, _, end_node_0 = split_across(gm, sli[0], input_node, dim=0, size=1)
+                    start_node_0, _, end_node_0 = split_across(
+                        gm, sli[0], input_node, dim=0, size=1
+                    )
                     concat_node_0 = list_gen(start_node_0, end_node_0, inp, gm, 0)
                 elif dimension == 2:
-                    start_node_0, mid_node_0, end_node_0 = split_across(gm, sli[0], input_node, dim=0, size=2)
-                    start_node_1, _, end_node_1 = split_across(gm, sli[1], mid_node_0, dim=1, size=2)
+                    start_node_0, mid_node_0, end_node_0 = split_across(
+                        gm, sli[0], input_node, dim=0, size=2
+                    )
+                    start_node_1, _, end_node_1 = split_across(
+                        gm, sli[1], mid_node_0, dim=1, size=2
+                    )
                     concat_node_1 = list_gen(start_node_1, end_node_1, inp, gm, 1)
-                    concat_node_0 = list_gen(start_node_0, end_node_0, concat_node_1, gm, 0)
+                    concat_node_0 = list_gen(
+                        start_node_0, end_node_0, concat_node_1, gm, 0
+                    )
                 elif dimension == 3:
-                    start_node_0, mid_node_0, end_node_0 = split_across(gm, sli[0], input_node, dim=0, size=3)
-                    start_node_1, mid_node_1, end_node_1 = split_across(gm, sli[1], mid_node_0, dim=1, size=3)
-                    start_node_2, _, end_node_2 = split_across(gm, sli[2], mid_node_1, dim=2, size=3)
+                    start_node_0, mid_node_0, end_node_0 = split_across(
+                        gm, sli[0], input_node, dim=0, size=3
+                    )
+                    start_node_1, mid_node_1, end_node_1 = split_across(
+                        gm, sli[1], mid_node_0, dim=1, size=3
+                    )
+                    start_node_2, _, end_node_2 = split_across(
+                        gm, sli[2], mid_node_1, dim=2, size=3
+                    )
                     concat_node_2 = list_gen(start_node_2, end_node_2, inp, gm, 2)
-                    concat_node_1 = list_gen(start_node_1, end_node_1, concat_node_2, gm, 1)
-                    concat_node_0 = list_gen(start_node_0, end_node_0, concat_node_1, gm, 0)
+                    concat_node_1 = list_gen(
+                        start_node_1, end_node_1, concat_node_2, gm, 1
+                    )
+                    concat_node_0 = list_gen(
+                        start_node_0, end_node_0, concat_node_1, gm, 0
+                    )
                 elif dimension == 4:
-                    start_node_0, mid_node_0, end_node_0 = split_across(gm, sli[0], input_node, dim=0, size=4)
-                    start_node_1, mid_node_1, end_node_1 = split_across(gm, sli[1], mid_node_0, dim=1, size=4)
-                    start_node_2, mid_node_2, end_node_2 = split_across(gm, sli[2], mid_node_1, dim=2, size=4)
-                    start_node_3, _, end_node_3 = split_across(gm, sli[3], mid_node_2, dim=3, size=4)
+                    start_node_0, mid_node_0, end_node_0 = split_across(
+                        gm, sli[0], input_node, dim=0, size=4
+                    )
+                    start_node_1, mid_node_1, end_node_1 = split_across(
+                        gm, sli[1], mid_node_0, dim=1, size=4
+                    )
+                    start_node_2, mid_node_2, end_node_2 = split_across(
+                        gm, sli[2], mid_node_1, dim=2, size=4
+                    )
+                    start_node_3, _, end_node_3 = split_across(
+                        gm, sli[3], mid_node_2, dim=3, size=4
+                    )
                     concat_node_3 = list_gen(start_node_3, end_node_3, inp, gm, 3)
-                    concat_node_2 = list_gen(start_node_2, end_node_2, concat_node_3, gm, 2)
-                    concat_node_1 = list_gen(start_node_1, end_node_1, concat_node_2, gm, 1)
-                    concat_node_0 = list_gen(start_node_0, end_node_0, concat_node_1, gm, 0)
+                    concat_node_2 = list_gen(
+                        start_node_2, end_node_2, concat_node_3, gm, 2
+                    )
+                    concat_node_1 = list_gen(
+                        start_node_1, end_node_1, concat_node_2, gm, 1
+                    )
+                    concat_node_0 = list_gen(
+                        start_node_0, end_node_0, concat_node_1, gm, 0
+                    )
                 else:
                     warnings.warn(f"setitem does not support dimension={dimension}")
                     continue
diff --git a/py/torch_tensorrt/fx/passes/pass_utils.py b/py/torch_tensorrt/fx/passes/pass_utils.py
index e5abef446c..d430a67408 100644
--- a/py/torch_tensorrt/fx/passes/pass_utils.py
+++ b/py/torch_tensorrt/fx/passes/pass_utils.py
@@ -70,10 +70,14 @@ def pass_with_validation(
                         f"Pass {pass_} failed correctness check, get original model output as {x} and processed model output as {y} for output {kk}."
                     )
                     if suppress_accuracy_check_failure:
-                        _LOGGER.error(f"Pass {pass_} failed correctness check due to output {kk}.")
+                        _LOGGER.error(
+                            f"Pass {pass_} failed correctness check due to output {kk}."
+                        )
                         return processed_module
                     else:
-                        raise RuntimeError(f"Pass {pass_} failed correctness check due to output {kk}")
+                        raise RuntimeError(
+                            f"Pass {pass_} failed correctness check due to output {kk}"
+                        )
             return processed_module
 
         return pass_with_validation
@@ -104,7 +108,9 @@ def log_before_after(pass_: PassFunc) -> PassFunc:
     """
 
     @wraps(pass_)
-    def pass_with_before_after_log(module: fx.GraphModule, input: Input) -> fx.GraphModule:
+    def pass_with_before_after_log(
+        module: fx.GraphModule, input: Input
+    ) -> fx.GraphModule:
         with tempfile.NamedTemporaryFile(
             mode="w",
             encoding="utf-8",
diff --git a/py/torch_tensorrt/fx/passes/remove_duplicate_output_args.py b/py/torch_tensorrt/fx/passes/remove_duplicate_output_args.py
index 6d8538df89..84a522a3f0 100644
--- a/py/torch_tensorrt/fx/passes/remove_duplicate_output_args.py
+++ b/py/torch_tensorrt/fx/passes/remove_duplicate_output_args.py
@@ -97,7 +97,9 @@ def _ensure_proper_output_use(user: fx.Node, target_node: fx.Node) -> int:
 
 def _remove_duplicate_output_args(gm: fx.GraphModule) -> RemoveDuplicateResult:
     output_nodes = [n for n in gm.graph.nodes if n.op == "output"]
-    assert len(output_nodes) == 1, f"Expecting exactly one `output` node, but got {len(output_nodes)}"
+    assert (
+        len(output_nodes) == 1
+    ), f"Expecting exactly one `output` node, but got {len(output_nodes)}"
 
     changed = False
     # arg node name to its index in the new output args tuple
@@ -126,7 +128,10 @@ def _remove_duplicate_output_args(gm: fx.GraphModule) -> RemoveDuplicateResult:
             name_to_idx[a.name] = len(args_new) - 1
         else:
             changed = True
-            _LOGGER.warning(f"Replaced duplicate output arg '{a.name}': " f"{idx} -> {name_to_idx[a.name]}")
+            _LOGGER.warning(
+                f"Replaced duplicate output arg '{a.name}': "
+                f"{idx} -> {name_to_idx[a.name]}"
+            )
         replacement_map[idx] = name_to_idx[a.name]
 
     output_node.args = (tuple(args_new),)
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_adaptive_avgpool.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_adaptive_avgpool.py
index 741e6328dc..af211f79d3 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_adaptive_avgpool.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_adaptive_avgpool.py
@@ -44,7 +44,9 @@ def forward(self, x):
                 shape_ranges=[((1, 1, 256, 256), (3, 3, 256, 256), (5, 5, 256, 256))],
             ),
         ]
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.adaptive_avg_pool2d})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.adaptive_avg_pool2d}
+        )
 
     @parameterized.expand(
         [
@@ -81,10 +83,14 @@ def forward(self, x):
             InputTensorSpec(
                 shape=(-1, -1, 32, 64, 64),
                 dtype=torch.float32,
-                shape_ranges=[((1, 1, 32, 64, 64), (3, 3, 32, 64, 64), (5, 5, 32, 64, 64))],
+                shape_ranges=[
+                    ((1, 1, 32, 64, 64), (3, 3, 32, 64, 64), (5, 5, 32, 64, 64))
+                ],
             ),
         ]
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.adaptive_avg_pool3d})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.adaptive_avg_pool3d}
+        )
 
     #  Testing with shape(-1, -1, -1, -1) results into error: "AdaptiveAvgPool2d and AdaptiveAvgPool3d currently doesn't support dynamic shapes for last two dims."
 
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_any.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_any.py
index c8deb79aee..e1d24766ae 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_any.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_any.py
@@ -42,7 +42,9 @@ def forward(self, x):
                 return torch.any(x, dim, keepdim=True)
 
         inputs = [torch.randn(2, 3).to(input_dtype)]
-        self.run_test(TestModule(), inputs, expected_ops={}, test_implicit_batch_dim=False)
+        self.run_test(
+            TestModule(), inputs, expected_ops={}, test_implicit_batch_dim=False
+        )
 
     @parameterized.expand(
         [
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_binary_ops.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_binary_ops.py
index c53c0234c0..e122c2a414 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_binary_ops.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_binary_ops.py
@@ -58,7 +58,9 @@ def forward(self, x):
         self.run_test(m, inputs, expected_ops={expected_op})
 
     @parameterized.expand([(op[1].__name__, op[0], op[1]) for op in elementwise_ops])
-    def test_elementwise_ops_with_one_constant(self, name, orig_op: Callable, expected_op):
+    def test_elementwise_ops_with_one_constant(
+        self, name, orig_op: Callable, expected_op
+    ):
         class TestModule(nn.Module):
             def __init__(self, orig_op):
                 super().__init__()
@@ -73,8 +75,12 @@ def forward(self, x):
         inputs = [torch.randn(2, 2)]
         self.run_test(m, inputs, expected_ops={expected_op})
 
-    @parameterized.expand([(op[1].__name__, op[0], op[1]) for op in elementwise_ops if op[2]])
-    def test_elementwise_op_with_both_constants(self, name, orig_op: Callable, expected_op):
+    @parameterized.expand(
+        [(op[1].__name__, op[0], op[1]) for op in elementwise_ops if op[2]]
+    )
+    def test_elementwise_op_with_both_constants(
+        self, name, orig_op: Callable, expected_op
+    ):
         class TestModule(nn.Module):
             def __init__(self, orig_op):
                 super().__init__()
@@ -156,7 +162,9 @@ def forward(self, x, y):
             for op in elementwise_ops
         ]
     )
-    def test_elementwise_op_with_dynamic_shape_four_dimensions(self, _, orig_op, expected_op):
+    def test_elementwise_op_with_dynamic_shape_four_dimensions(
+        self, _, orig_op, expected_op
+    ):
         class Op(nn.Module):
             def forward(self, x, y):
                 return orig_op(x, y)
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_chunk.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_chunk.py
index 3cff52b9bf..555f0ba24b 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_chunk.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_chunk.py
@@ -45,7 +45,9 @@ def forward(self, x):
                 shape_ranges=[((1, 10, 20), (5, 10, 20), (10, 10, 20))],
             ),
         ]
-        self.run_test_with_dynamic_shape(Chunk(), input_specs, expected_ops={acc_ops.chunk})
+        self.run_test_with_dynamic_shape(
+            Chunk(), input_specs, expected_ops={acc_ops.chunk}
+        )
 
     # Testing with (-1, -1, -1, -1) results in Error: AssertionError: Can't chunk on dynamic shape dimension!
     @parameterized.expand(
@@ -68,7 +70,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(Chunk(), input_specs, expected_ops={acc_ops.chunk})
+        self.run_test_with_dynamic_shape(
+            Chunk(), input_specs, expected_ops={acc_ops.chunk}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py
index d463b8ae0f..7f32b749c5 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py
@@ -41,7 +41,9 @@ def forward(self, x):
                 shape_ranges=[((1, 1, 1), (1, 2, 3), (3, 3, 3))],
             ),
         ]
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.dequantize})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.dequantize}
+        )
 
     def test_dequantize_with_dynamic_shape_four_dimensions(self):
         class TestModule(nn.Module):
@@ -57,7 +59,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.dequantize})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.dequantize}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_einsum.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_einsum.py
index 2af62dde77..efc2c97a92 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_einsum.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_einsum.py
@@ -37,7 +37,9 @@ def forward(self, x, y):
             # TRT does not support ellipsis or diagonal operations
         ]
     )
-    def test_einsum_with_dynamic_shape_four_dimensions(self, _, equation, x_size, y_size):
+    def test_einsum_with_dynamic_shape_four_dimensions(
+        self, _, equation, x_size, y_size
+    ):
         class Einsum(nn.Module):
             def forward(self, x, y):
                 return torch.einsum(equation, x, y)
@@ -55,7 +57,9 @@ def forward(self, x, y):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(Einsum(), input_specs, expected_ops={acc_ops.einsum})
+        self.run_test_with_dynamic_shape(
+            Einsum(), input_specs, expected_ops={acc_ops.einsum}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_elu.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_elu.py
index 5420736863..1482654cfd 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_elu.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_elu.py
@@ -26,7 +26,9 @@ def forward(self, x):
                 shape_ranges=[((1, 1, 1), (1, 2, 3), (3, 3, 3))],
             ),
         ]
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.elu})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.elu}
+        )
 
     def test_elu_with_dynamic_shape_four_dimensions(self):
         class TestModule(nn.Module):
@@ -41,7 +43,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.elu})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.elu}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_embedding.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_embedding.py
index ee1d2b7402..19a867d78d 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_embedding.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_embedding.py
@@ -8,7 +8,9 @@
 from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec
 
 
-@unittest.skip("Current implementation is limited. All implementations in hf use int64. T113156424")
+@unittest.skip(
+    "Current implementation is limited. All implementations in hf use int64. T113156424"
+)
 class TestEmbeddingConverter(AccTestCase):
     @parameterized.expand(
         [
@@ -96,7 +98,9 @@ def forward(self, indices, weights):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestEmbedding(), input_specs, expected_ops={acc_ops.embedding})
+        self.run_test_with_dynamic_shape(
+            TestEmbedding(), input_specs, expected_ops={acc_ops.embedding}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_eq.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_eq.py
index 00e3d60650..257375c7ca 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_eq.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_eq.py
@@ -43,7 +43,9 @@ def forward(self, x, y):
             input,
             other,
         ]
-        self.run_test(Eq(), inputs, expected_ops={acc_ops.eq}, test_implicit_batch_dim=False)
+        self.run_test(
+            Eq(), inputs, expected_ops={acc_ops.eq}, test_implicit_batch_dim=False
+        )
 
 
 class TestEqMethodConverter(AccTestCase):
@@ -84,7 +86,9 @@ def forward(self, x, y):
             input,
             other,
         ]
-        self.run_test(Eq(), inputs, expected_ops={acc_ops.eq}, test_implicit_batch_dim=False)
+        self.run_test(
+            Eq(), inputs, expected_ops={acc_ops.eq}, test_implicit_batch_dim=False
+        )
 
 
 class TestEqOperatorConverter(AccTestCase):
@@ -125,7 +129,9 @@ def forward(self, x, y):
             input,
             other,
         ]
-        self.run_test(Eq(), inputs, expected_ops={acc_ops.eq}, test_implicit_batch_dim=False)
+        self.run_test(
+            Eq(), inputs, expected_ops={acc_ops.eq}, test_implicit_batch_dim=False
+        )
 
 
 class TestEqOperatorSimpleConverter(AccTestCase):
@@ -173,7 +179,9 @@ def forward(self, x, y):
             input,
             other,
         ]
-        self.run_test(Eq(), inputs, expected_ops={acc_ops.eq}, test_implicit_batch_dim=False)
+        self.run_test(
+            Eq(), inputs, expected_ops={acc_ops.eq}, test_implicit_batch_dim=False
+        )
 
 
 class TestEqOperatorSimpleConverterWithDynamicShape(AccTestCase):
@@ -234,7 +242,9 @@ def forward(self, x):
         inputs = [
             input,
         ]
-        self.run_test(Eq(), inputs, expected_ops={acc_ops.eq}, test_implicit_batch_dim=False)
+        self.run_test(
+            Eq(), inputs, expected_ops={acc_ops.eq}, test_implicit_batch_dim=False
+        )
 
 
 class TestConstInputConverter(AccTestCase):
@@ -250,7 +260,9 @@ def forward(self, x):
         inputs = [
             input,
         ]
-        self.run_test(Eq(), inputs, expected_ops={acc_ops.eq}, test_implicit_batch_dim=False)
+        self.run_test(
+            Eq(), inputs, expected_ops={acc_ops.eq}, test_implicit_batch_dim=False
+        )
 
 
 class TestConstInputConverterWithDynamicShape(AccTestCase):
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_getitem.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_getitem.py
index 701f697b8d..27779919fe 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_getitem.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_getitem.py
@@ -105,7 +105,9 @@ def forward(self, x):
                 shape_ranges=[((1, 256, 256), (3, 256, 256), (5, 256, 256))],
             ),
         ]
-        self.run_test_with_dynamic_shape(Getitem(idx), input_specs, expected_ops={acc_ops.getitem})
+        self.run_test_with_dynamic_shape(
+            Getitem(idx), input_specs, expected_ops={acc_ops.getitem}
+        )
 
     @parameterized.expand(
         [
@@ -142,7 +144,9 @@ def forward(self, x):
                 shape_ranges=[((1, 128, 256), (3, 192, 256), (5, 256, 256))],
             ),
         ]
-        self.run_test_with_dynamic_shape(Getitem(idx), input_specs, expected_ops={acc_ops.getitem})
+        self.run_test_with_dynamic_shape(
+            Getitem(idx), input_specs, expected_ops={acc_ops.getitem}
+        )
 
     # Testing with following parameters results into Error:
     # AssertionError: We don't support slicing tensor on dynamic shape.
@@ -185,7 +189,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(Getitem(idx), input_specs, expected_ops={acc_ops.getitem})
+        self.run_test_with_dynamic_shape(
+            Getitem(idx), input_specs, expected_ops={acc_ops.getitem}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_gt.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_gt.py
index b75b633e1b..4dc725e9f7 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_gt.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_gt.py
@@ -38,7 +38,9 @@ def forward(self, x, y):
             input,
             other,
         ]
-        self.run_test(Gt(), inputs, expected_ops={acc_ops.gt}, test_implicit_batch_dim=False)
+        self.run_test(
+            Gt(), inputs, expected_ops={acc_ops.gt}, test_implicit_batch_dim=False
+        )
 
 
 class TestGtMethodConverter(AccTestCase):
@@ -74,7 +76,9 @@ def forward(self, x, y):
             input,
             other,
         ]
-        self.run_test(Gt(), inputs, expected_ops={acc_ops.gt}, test_implicit_batch_dim=False)
+        self.run_test(
+            Gt(), inputs, expected_ops={acc_ops.gt}, test_implicit_batch_dim=False
+        )
 
 
 class TestGtOperatorConverter(AccTestCase):
@@ -110,7 +114,9 @@ def forward(self, x, y):
             input,
             other,
         ]
-        self.run_test(Gt(), inputs, expected_ops={acc_ops.gt}, test_implicit_batch_dim=False)
+        self.run_test(
+            Gt(), inputs, expected_ops={acc_ops.gt}, test_implicit_batch_dim=False
+        )
 
 
 class TestEqOperatorSimpleConverter(AccTestCase):
@@ -158,7 +164,9 @@ def forward(self, x, y):
             input,
             other,
         ]
-        self.run_test(Eq(), inputs, expected_ops={acc_ops.gt}, test_implicit_batch_dim=False)
+        self.run_test(
+            Eq(), inputs, expected_ops={acc_ops.gt}, test_implicit_batch_dim=False
+        )
 
 
 class TestEqOperatorSimpleConverterWithDynamicShape(AccTestCase):
@@ -221,7 +229,9 @@ def forward(self, x):
         inputs = [
             input,
         ]
-        self.run_test(Eq(), inputs, expected_ops={acc_ops.gt}, test_implicit_batch_dim=False)
+        self.run_test(
+            Eq(), inputs, expected_ops={acc_ops.gt}, test_implicit_batch_dim=False
+        )
 
 
 class TestConstInputConverter(AccTestCase):
@@ -237,7 +247,9 @@ def forward(self, x):
         inputs = [
             input,
         ]
-        self.run_test(Gt(), inputs, expected_ops={acc_ops.gt}, test_implicit_batch_dim=False)
+        self.run_test(
+            Gt(), inputs, expected_ops={acc_ops.gt}, test_implicit_batch_dim=False
+        )
 
 
 class TestConstInputConverterWithDynamicShape(AccTestCase):
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_hard_sigmoid.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_hard_sigmoid.py
index 2e3d49d41d..ad0c9bd0fe 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_hard_sigmoid.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_hard_sigmoid.py
@@ -34,7 +34,9 @@ def forward(self, x):
                 shape_ranges=[((1, 1, 1), (1, 2, 3), (3, 3, 3))],
             ),
         ]
-        self.run_test_with_dynamic_shape(Hardsigmoid(), input_specs, expected_ops={acc_ops.hardsigmoid})
+        self.run_test_with_dynamic_shape(
+            Hardsigmoid(), input_specs, expected_ops={acc_ops.hardsigmoid}
+        )
 
     def test_hardsigmoid_with_dynamic_shape_four_dimensions(self):
         class Hardsigmoid(nn.Module):
@@ -48,7 +50,9 @@ def forward(self, x):
                 shape_ranges=[((1, 1, 1, 1), (1, 2, 3, 3), (3, 3, 3, 3))],
             ),
         ]
-        self.run_test_with_dynamic_shape(Hardsigmoid(), input_specs, expected_ops={acc_ops.hardsigmoid})
+        self.run_test_with_dynamic_shape(
+            Hardsigmoid(), input_specs, expected_ops={acc_ops.hardsigmoid}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_hardtanh.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_hardtanh.py
index 90aad52ceb..c1d2ed650e 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_hardtanh.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_hardtanh.py
@@ -17,7 +17,9 @@ class TestHardtanhConverter(AccTestCase):
     def test_hardtanh(self, test_min_value, test_max_value):
         class Hardtanh(nn.Module):
             def forward(self, x):
-                return nn.functional.hardtanh(x, min_val=test_min_value, max_val=test_max_value)
+                return nn.functional.hardtanh(
+                    x, min_val=test_min_value, max_val=test_max_value
+                )
 
         inputs = [torch.randn(2, 10, 10, 10)]
         self.run_test(Hardtanh(), inputs, expected_ops={acc_ops.hardtanh})
@@ -34,7 +36,9 @@ class TestHardtanhConverterWithDynamicShape(AccTestCase):
     def test_hardtanh(self, test_min_value, test_max_value):
         class Hardtanh(nn.Module):
             def forward(self, x):
-                return nn.functional.hardtanh(x, min_val=test_min_value, max_val=test_max_value)
+                return nn.functional.hardtanh(
+                    x, min_val=test_min_value, max_val=test_max_value
+                )
 
         input_specs = [
             InputTensorSpec(
@@ -44,7 +48,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(Hardtanh(), input_specs, expected_ops={acc_ops.hardtanh})
+        self.run_test_with_dynamic_shape(
+            Hardtanh(), input_specs, expected_ops={acc_ops.hardtanh}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_isinf.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_isinf.py
index 4b6d2d5690..d8ec10a71b 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_isinf.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_isinf.py
@@ -21,7 +21,9 @@ def forward(self, x):
         inputs = [
             input,
         ]
-        self.run_test(Test(), inputs, expected_ops={acc_ops.isinf}, test_implicit_batch_dim=False)
+        self.run_test(
+            Test(), inputs, expected_ops={acc_ops.isinf}, test_implicit_batch_dim=False
+        )
 
     def test_isinf_large(self):
         class Test(torch.nn.Module):
@@ -35,7 +37,9 @@ def forward(self, x):
         inputs = [
             input,
         ]
-        self.run_test(Test(), inputs, expected_ops={acc_ops.isinf}, test_implicit_batch_dim=False)
+        self.run_test(
+            Test(), inputs, expected_ops={acc_ops.isinf}, test_implicit_batch_dim=False
+        )
 
     def test_isinf_large_with_dynamic_shape_four_dimensions(self):
         class Test(torch.nn.Module):
@@ -50,7 +54,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(Test(), input_specs, expected_ops={acc_ops.isinf})
+        self.run_test_with_dynamic_shape(
+            Test(), input_specs, expected_ops={acc_ops.isinf}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_leaky_relu.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_leaky_relu.py
index 88ff3d0ca5..0df494baac 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_leaky_relu.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_leaky_relu.py
@@ -26,7 +26,9 @@ def forward(self, x):
                 shape_ranges=[((1, 1, 1), (1, 2, 3), (3, 3, 3))],
             ),
         ]
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.leaky_relu})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.leaky_relu}
+        )
 
     def test_leaky_relu_with_dynamic_shape_four_dimensions(self):
         class TestModule(nn.Module):
@@ -41,7 +43,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.leaky_relu})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.leaky_relu}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_logical_and.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_logical_and.py
index f1a49960ca..9ca6e176a5 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_logical_and.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_logical_and.py
@@ -77,7 +77,9 @@ def forward(self, x, y):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(And(), input_specs, expected_ops={acc_ops.logical_and})
+        self.run_test_with_dynamic_shape(
+            And(), input_specs, expected_ops={acc_ops.logical_and}
+        )
 
 
 class TestAndFunctionSimpleConverter(AccTestCase):
@@ -219,7 +221,9 @@ def forward(self, x, y):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(And(), input_specs, expected_ops={acc_ops.logical_and})
+        self.run_test_with_dynamic_shape(
+            And(), input_specs, expected_ops={acc_ops.logical_and}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_logical_or.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_logical_or.py
index 2f910bf3d4..7dba20b214 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_logical_or.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_logical_or.py
@@ -64,7 +64,9 @@ def forward(self, x, y):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(LogicalOr(), input_specs, expected_ops={acc_ops.logical_or})
+        self.run_test_with_dynamic_shape(
+            LogicalOr(), input_specs, expected_ops={acc_ops.logical_or}
+        )
 
 
 class TestLogicalOrFunctionSimpleConverter(AccTestCase):
@@ -126,7 +128,9 @@ def forward(self, x, y):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(LogicalOr(), input_specs, expected_ops={acc_ops.logical_or})
+        self.run_test_with_dynamic_shape(
+            LogicalOr(), input_specs, expected_ops={acc_ops.logical_or}
+        )
 
 
 class TestLogicalOrOperatorSimpleConverter(AccTestCase):
@@ -188,7 +192,9 @@ def forward(self, x, y):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(LogicalOr(), input_specs, expected_ops={acc_ops.logical_or})
+        self.run_test_with_dynamic_shape(
+            LogicalOr(), input_specs, expected_ops={acc_ops.logical_or}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_logical_xor.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_logical_xor.py
index 8470bf938c..54b0490a57 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_logical_xor.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_logical_xor.py
@@ -64,7 +64,9 @@ def forward(self, x, y):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(LogicalXor(), input_specs, expected_ops={acc_ops.logical_xor})
+        self.run_test_with_dynamic_shape(
+            LogicalXor(), input_specs, expected_ops={acc_ops.logical_xor}
+        )
 
 
 class TestLogicalXorFunctionSimpleConverter(AccTestCase):
@@ -126,7 +128,9 @@ def forward(self, x, y):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(LogicalXor(), input_specs, expected_ops={acc_ops.logical_xor})
+        self.run_test_with_dynamic_shape(
+            LogicalXor(), input_specs, expected_ops={acc_ops.logical_xor}
+        )
 
 
 class TestLogicalXorOperatorSimpleConverter(AccTestCase):
@@ -188,7 +192,9 @@ def forward(self, x, y):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(LogicalXor(), input_specs, expected_ops={acc_ops.logical_xor})
+        self.run_test_with_dynamic_shape(
+            LogicalXor(), input_specs, expected_ops={acc_ops.logical_xor}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_lt.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_lt.py
index 6dd785e8d6..7184e80656 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_lt.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_lt.py
@@ -38,7 +38,9 @@ def forward(self, x, y):
             input,
             other,
         ]
-        self.run_test(Lt(), inputs, expected_ops={acc_ops.lt}, test_implicit_batch_dim=False)
+        self.run_test(
+            Lt(), inputs, expected_ops={acc_ops.lt}, test_implicit_batch_dim=False
+        )
 
 
 class TestLtMethodConverter(AccTestCase):
@@ -74,7 +76,9 @@ def forward(self, x, y):
             input,
             other,
         ]
-        self.run_test(Lt(), inputs, expected_ops={acc_ops.lt}, test_implicit_batch_dim=False)
+        self.run_test(
+            Lt(), inputs, expected_ops={acc_ops.lt}, test_implicit_batch_dim=False
+        )
 
 
 class TestLtOperatorConverter(AccTestCase):
@@ -110,7 +114,9 @@ def forward(self, x, y):
             input,
             other,
         ]
-        self.run_test(Lt(), inputs, expected_ops={acc_ops.lt}, test_implicit_batch_dim=False)
+        self.run_test(
+            Lt(), inputs, expected_ops={acc_ops.lt}, test_implicit_batch_dim=False
+        )
 
 
 class TestEqOperatorSimpleConverter(AccTestCase):
@@ -158,7 +164,9 @@ def forward(self, x, y):
             input,
             other,
         ]
-        self.run_test(Eq(), inputs, expected_ops={acc_ops.lt}, test_implicit_batch_dim=False)
+        self.run_test(
+            Eq(), inputs, expected_ops={acc_ops.lt}, test_implicit_batch_dim=False
+        )
 
 
 class TestEqOperatorSimpleConverterWithDynamicShape(AccTestCase):
@@ -219,7 +227,9 @@ def forward(self, x):
         inputs = [
             input,
         ]
-        self.run_test(Eq(), inputs, expected_ops={acc_ops.lt}, test_implicit_batch_dim=False)
+        self.run_test(
+            Eq(), inputs, expected_ops={acc_ops.lt}, test_implicit_batch_dim=False
+        )
 
 
 class TestConstInputConverter(AccTestCase):
@@ -235,7 +245,9 @@ def forward(self, x):
         inputs = [
             input,
         ]
-        self.run_test(Lt(), inputs, expected_ops={acc_ops.lt}, test_implicit_batch_dim=False)
+        self.run_test(
+            Lt(), inputs, expected_ops={acc_ops.lt}, test_implicit_batch_dim=False
+        )
 
 
 class TestConstInputConverterWithDynamicShape(AccTestCase):
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_maximum.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_maximum.py
index b180a990ed..3924173911 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_maximum.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_maximum.py
@@ -36,7 +36,9 @@ def forward(self, x, y):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(Maximum(), input_specs, expected_ops={acc_ops.maximum})
+        self.run_test_with_dynamic_shape(
+            Maximum(), input_specs, expected_ops={acc_ops.maximum}
+        )
 
 
 class TestMaximumMethodConverter(AccTestCase):
@@ -71,7 +73,9 @@ def forward(self, x, y):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(Maximum(), input_specs, expected_ops={acc_ops.maximum})
+        self.run_test_with_dynamic_shape(
+            Maximum(), input_specs, expected_ops={acc_ops.maximum}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_maxpool.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_maxpool.py
index 82777d05a4..024452e8e5 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_maxpool.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_maxpool.py
@@ -28,7 +28,9 @@ def test_max_pool1d(
         class TestModule(torch.nn.Module):
             def __init__(self):
                 super().__init__()
-                self.max_pool = torch.nn.MaxPool1d(kernel_size, stride, padding, ceil_mode=ceil_mode, dilation=dilation)
+                self.max_pool = torch.nn.MaxPool1d(
+                    kernel_size, stride, padding, ceil_mode=ceil_mode, dilation=dilation
+                )
 
             def forward(self, x):
                 return self.max_pool(x)
@@ -86,7 +88,9 @@ def test_max_pool2d(
         class TestModule(torch.nn.Module):
             def __init__(self):
                 super().__init__()
-                self.max_pool = torch.nn.MaxPool2d(kernel_size, stride, padding, ceil_mode=ceil_mode)
+                self.max_pool = torch.nn.MaxPool2d(
+                    kernel_size, stride, padding, ceil_mode=ceil_mode
+                )
 
             def forward(self, x):
                 return self.max_pool(x)
@@ -112,7 +116,9 @@ def forward(self, x):
                 shape_ranges=[((1, 1, 1, 1), (1, 2, 4, 4), (2, 4, 4, 4))],
             ),
         ]
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.max_pool2d})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.max_pool2d}
+        )
 
     @parameterized.expand(
         [
@@ -134,7 +140,9 @@ def test_max_pool3d(
         class TestModule(torch.nn.Module):
             def __init__(self):
                 super().__init__()
-                self.max_pool = torch.nn.MaxPool3d(kernel_size, stride, padding, ceil_mode=ceil_mode)
+                self.max_pool = torch.nn.MaxPool3d(
+                    kernel_size, stride, padding, ceil_mode=ceil_mode
+                )
 
             def forward(self, x):
                 return self.max_pool(x)
@@ -158,7 +166,9 @@ def forward(self, x):
                 shape_ranges=[((1, 1, 1, 1, 1), (1, 2, 4, 4, 4), (2, 4, 4, 4, 4))],
             ),
         ]
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.max_pool3d})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.max_pool3d}
+        )
 
     @parameterized.expand(
         [
@@ -325,7 +335,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.max_pool2d})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.max_pool2d}
+        )
 
     @parameterized.expand(
         [
@@ -358,7 +370,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.max_pool3d})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.max_pool3d}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_minimum.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_minimum.py
index 5af2af4073..e0bd2ee94f 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_minimum.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_minimum.py
@@ -49,7 +49,9 @@ def forward(self, x, y):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(Minimum(), input_specs, expected_ops={acc_ops.minimum})
+        self.run_test_with_dynamic_shape(
+            Minimum(), input_specs, expected_ops={acc_ops.minimum}
+        )
 
 
 class TestMinimumMethodConverterWithDynamicShape(AccTestCase):
@@ -71,7 +73,9 @@ def forward(self, x, y):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(Minimum(), input_specs, expected_ops={acc_ops.minimum})
+        self.run_test_with_dynamic_shape(
+            Minimum(), input_specs, expected_ops={acc_ops.minimum}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_ne.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_ne.py
index 90ac8007be..0e0e8f70d9 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_ne.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_ne.py
@@ -50,7 +50,9 @@ def forward(self, x, y):
             input,
             other,
         ]
-        self.run_test(Ne(), inputs, expected_ops={acc_ops.ne}, test_implicit_batch_dim=False)
+        self.run_test(
+            Ne(), inputs, expected_ops={acc_ops.ne}, test_implicit_batch_dim=False
+        )
 
 
 class TestNeFunctionConverterWithDynamicShape(AccTestCase):
@@ -120,7 +122,9 @@ def forward(self, x, y):
             input,
             other,
         ]
-        self.run_test(Ne(), inputs, expected_ops={acc_ops.ne}, test_implicit_batch_dim=False)
+        self.run_test(
+            Ne(), inputs, expected_ops={acc_ops.ne}, test_implicit_batch_dim=False
+        )
 
 
 class TestNeMethodConverterWithDynamicShape(AccTestCase):
@@ -190,7 +194,9 @@ def forward(self, x, y):
             input,
             other,
         ]
-        self.run_test(Ne(), inputs, expected_ops={acc_ops.ne}, test_implicit_batch_dim=False)
+        self.run_test(
+            Ne(), inputs, expected_ops={acc_ops.ne}, test_implicit_batch_dim=False
+        )
 
 
 class TestNeOperatorConverterWithDynamicShape(AccTestCase):
@@ -251,7 +257,9 @@ def forward(self, x):
         inputs = [
             input,
         ]
-        self.run_test(Ne(), inputs, expected_ops={acc_ops.ne}, test_implicit_batch_dim=False)
+        self.run_test(
+            Ne(), inputs, expected_ops={acc_ops.ne}, test_implicit_batch_dim=False
+        )
 
 
 class TestConstInputConverter(AccTestCase):
@@ -267,7 +275,9 @@ def forward(self, x):
         inputs = [
             input,
         ]
-        self.run_test(Ne(), inputs, expected_ops={acc_ops.ne}, test_implicit_batch_dim=False)
+        self.run_test(
+            Ne(), inputs, expected_ops={acc_ops.ne}, test_implicit_batch_dim=False
+        )
 
 
 class TestConstInputConverterWithDynamicShape(AccTestCase):
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_new_ones.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_new_ones.py
index ed6d8e5dab..206d088a55 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_new_ones.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_new_ones.py
@@ -60,7 +60,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.new_ones})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.new_ones}
+        )
 
     def test_newone_no_dtype(self):
         class TestModule(nn.Module):
@@ -75,7 +77,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.new_ones})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.new_ones}
+        )
 
     def test_newone_device(self):
         class TestModule(nn.Module):
@@ -90,7 +94,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.new_ones})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.new_ones}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_permute.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_permute.py
index 8acd236c3b..4e85248b8c 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_permute.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_permute.py
@@ -61,7 +61,9 @@ def forward(self, x):
                 shape_ranges=[((1, 1, 1), (1, 2, 3), (3, 3, 3))],
             ),
         ]
-        self.run_test_with_dynamic_shape(Permute(), input_specs, expected_ops={acc_ops.permute})
+        self.run_test_with_dynamic_shape(
+            Permute(), input_specs, expected_ops={acc_ops.permute}
+        )
 
     def test_permute_with_dynamic_shape_four_dimensions(self):
         class Permute(nn.Module):
@@ -76,7 +78,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(Permute(), input_specs, expected_ops={acc_ops.permute})
+        self.run_test_with_dynamic_shape(
+            Permute(), input_specs, expected_ops={acc_ops.permute}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py
index 92d383c3f9..c7b050c4ac 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py
@@ -39,7 +39,9 @@ def forward(self, x):
                 shape_ranges=[((1, 1, 1), (1, 2, 3), (3, 3, 3))],
             ),
         ]
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.quantize_per_tensor})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.quantize_per_tensor}
+        )
 
     def test_quantize_per_tensor_with_dynamic_shape_four_dimensions(self):
         class TestModule(nn.Module):
@@ -54,7 +56,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.quantize_per_tensor})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.quantize_per_tensor}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_relu.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_relu.py
index 0a169f6a4b..0ef2558ca0 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_relu.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_relu.py
@@ -26,7 +26,9 @@ def forward(self, x):
                 shape_ranges=[((1, 1, 1), (1, 2, 3), (3, 3, 3))],
             ),
         ]
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.relu})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.relu}
+        )
 
     def test_relu_with_dynamic_shape_four_dimensions(self):
         class TestModule(nn.Module):
@@ -41,7 +43,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.relu})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.relu}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_repeat_interleave.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_repeat_interleave.py
index daac7a25c3..e933146441 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_repeat_interleave.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_repeat_interleave.py
@@ -67,7 +67,9 @@ def forward(self, x):
                 ],
             ),
         ]
-        self.run_test_with_dynamic_shape(RepeatInterleave(dim), input_specs, expected_ops={acc_ops.tile})
+        self.run_test_with_dynamic_shape(
+            RepeatInterleave(dim), input_specs, expected_ops={acc_ops.tile}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_reshape.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_reshape.py
index 8b2c434163..4776ed7a95 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_reshape.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_reshape.py
@@ -46,7 +46,9 @@ def forward(self, x):
                 shape_ranges=[((1, 1, 1), (1, 2, 3), (3, 3, 3))],
             ),
         ]
-        self.run_test_with_dynamic_shape(TestModule(target_shape), input_specs, expected_ops={acc_ops.reshape})
+        self.run_test_with_dynamic_shape(
+            TestModule(target_shape), input_specs, expected_ops={acc_ops.reshape}
+        )
 
     @parameterized.expand(
         [
@@ -71,7 +73,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(target_shape), input_specs, expected_ops={acc_ops.reshape})
+        self.run_test_with_dynamic_shape(
+            TestModule(target_shape), input_specs, expected_ops={acc_ops.reshape}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_selu.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_selu.py
index 9ba2e5fad9..955ddc82f7 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_selu.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_selu.py
@@ -26,7 +26,9 @@ def forward(self, x):
                 shape_ranges=[((1, 1, 1), (1, 2, 3), (3, 3, 3))],
             ),
         ]
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.selu})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.selu}
+        )
 
     def test_selu_with_dynamic_shape_four_dimensions(self):
         class TestModule(nn.Module):
@@ -41,7 +43,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.selu})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.selu}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_sigmoid.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_sigmoid.py
index 7108e63fde..835e50c10a 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_sigmoid.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_sigmoid.py
@@ -26,7 +26,9 @@ def forward(self, x):
                 shape_ranges=[((1, 1, 1, 1), (1, 2, 3, 3), (3, 3, 3, 3))],
             ),
         ]
-        self.run_test_with_dynamic_shape(Sigmoid(), input_specs, expected_ops={acc_ops.sigmoid})
+        self.run_test_with_dynamic_shape(
+            Sigmoid(), input_specs, expected_ops={acc_ops.sigmoid}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_silu.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_silu.py
index 94eadc1dd6..38d8f5b645 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_silu.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_silu.py
@@ -26,7 +26,9 @@ def forward(self, x):
                 shape_ranges=[((1, 1, 1), (1, 2, 3), (3, 3, 3))],
             ),
         ]
-        self.run_test_with_dynamic_shape(Silu(), input_specs, expected_ops={acc_ops.sigmoid, acc_ops.mul})
+        self.run_test_with_dynamic_shape(
+            Silu(), input_specs, expected_ops={acc_ops.sigmoid, acc_ops.mul}
+        )
 
     def test_silu_with_dynamic_shape_four_dimensions(self):
         class Silu(nn.Module):
@@ -41,7 +43,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(Silu(), input_specs, expected_ops={acc_ops.sigmoid, acc_ops.mul})
+        self.run_test_with_dynamic_shape(
+            Silu(), input_specs, expected_ops={acc_ops.sigmoid, acc_ops.mul}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_size.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_size.py
index 2eca25534f..f7e55b12f6 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_size.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_size.py
@@ -44,7 +44,9 @@ def forward(self, x):
                 shape_ranges=[((1, 12, 32), (3, 12, 32), (100, 12, 32))],
             ),
         ]
-        self.run_test_with_dynamic_shape(Size(), input_specs, expected_ops={acc_ops.size})
+        self.run_test_with_dynamic_shape(
+            Size(), input_specs, expected_ops={acc_ops.size}
+        )
 
     def test_size_dynamic_shape_four_dimensions(self):
         class Size(nn.Module):
@@ -60,7 +62,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(Size(), input_specs, expected_ops={acc_ops.size})
+        self.run_test_with_dynamic_shape(
+            Size(), input_specs, expected_ops={acc_ops.size}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_softmax.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_softmax.py
index 670b0261f1..eca8a01607 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_softmax.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_softmax.py
@@ -7,7 +7,9 @@
 
 
 class TestSoftmaxConverter(AccTestCase):
-    @parameterized.expand([("none_dim", None), ("basic", 1), ("batch_dim", 0), ("negative_dim", -2)])
+    @parameterized.expand(
+        [("none_dim", None), ("basic", 1), ("batch_dim", 0), ("negative_dim", -2)]
+    )
     def test_softmax(self, _, dim):
         class Softmax(nn.Module):
             def __init__(self, dim):
@@ -37,7 +39,9 @@ def forward(self, x):
                 shape_ranges=[((1, 1, 1), (1, 2, 3), (3, 3, 3))],
             ),
         ]
-        self.run_test_with_dynamic_shape(Softmax(), input_specs, expected_ops={acc_ops.softmax})
+        self.run_test_with_dynamic_shape(
+            Softmax(), input_specs, expected_ops={acc_ops.softmax}
+        )
 
     def test_softmax_with_dynamic_shape_four_dimensions(self):
         class Softmax(nn.Module):
@@ -52,7 +56,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(Softmax(), input_specs, expected_ops={acc_ops.softmax})
+        self.run_test_with_dynamic_shape(
+            Softmax(), input_specs, expected_ops={acc_ops.softmax}
+        )
 
     def test_softmax_with_implicit_batch_dim0_fail(self):
         class Softmax(nn.Module):
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_softsign.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_softsign.py
index dbfe4855f0..5f1b907bac 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_softsign.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_softsign.py
@@ -26,7 +26,9 @@ def forward(self, x):
                 shape_ranges=[((1, 1, 1), (1, 2, 3), (3, 3, 3))],
             ),
         ]
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.softsign})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.softsign}
+        )
 
     def test_softsign_with_dynamic_shape_four_dimensions(self):
         class TestModule(nn.Module):
@@ -41,7 +43,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.softsign})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.softsign}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_split.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_split.py
index 1474d7f0ee..29d174d9fd 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_split.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_split.py
@@ -22,7 +22,11 @@ def forward(self, x):
         self.run_test(
             Split(),
             inputs,
-            expected_ops={acc_ops.split if isinstance(split_size_or_sections, int) else acc_ops.slice_tensor},
+            expected_ops={
+                acc_ops.split
+                if isinstance(split_size_or_sections, int)
+                else acc_ops.slice_tensor
+            },
             test_explicit_batch_dim=False,
         )
 
@@ -65,7 +69,11 @@ def forward(self, x):
         self.run_test_with_dynamic_shape(
             Split(),
             input_specs,
-            expected_ops={acc_ops.split if isinstance(split_size_or_sections, int) else acc_ops.slice_tensor},
+            expected_ops={
+                acc_ops.split
+                if isinstance(split_size_or_sections, int)
+                else acc_ops.slice_tensor
+            },
         )
 
     # Testing with (-1, -1, -1) results into following error:
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_squeeze.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_squeeze.py
index b655b7ba0b..d265def896 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_squeeze.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_squeeze.py
@@ -32,7 +32,9 @@ def forward(self, x):
                 shape_ranges=[((1, 1, 2), (1, 2, 2), (1, 3, 2))],
             ),
         ]
-        self.run_test_with_dynamic_shape(Squeeze(), input_specs, expected_ops={acc_ops.squeeze})
+        self.run_test_with_dynamic_shape(
+            Squeeze(), input_specs, expected_ops={acc_ops.squeeze}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_tanh.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_tanh.py
index dab68d4bdd..5b4ce2903a 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_tanh.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_tanh.py
@@ -26,7 +26,9 @@ def forward(self, x):
                 shape_ranges=[((1, 1, 1), (1, 2, 3), (3, 3, 3))],
             ),
         ]
-        self.run_test_with_dynamic_shape(Tanh(), input_specs, expected_ops={acc_ops.tanh})
+        self.run_test_with_dynamic_shape(
+            Tanh(), input_specs, expected_ops={acc_ops.tanh}
+        )
 
     def test_tanh_with_dynamic_shape_four_dimensions(self):
         class Tanh(nn.Module):
@@ -41,7 +43,9 @@ def forward(self, x):
             ),
         ]
 
-        self.run_test_with_dynamic_shape(Tanh(), input_specs, expected_ops={acc_ops.tanh})
+        self.run_test_with_dynamic_shape(
+            Tanh(), input_specs, expected_ops={acc_ops.tanh}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_transpose_convolution.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_transpose_convolution.py
index f9732eae68..04376a306b 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_transpose_convolution.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_transpose_convolution.py
@@ -66,7 +66,9 @@ def forward(self, x):
                 shape_ranges=[((1, 3, 1, 1), (1, 3, 4, 4), (32, 3, 128, 128))],
             ),
         ]
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.conv_transpose2d})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.conv_transpose2d}
+        )
 
     @parameterized.expand(
         [
@@ -126,7 +128,9 @@ def forward(self, x):
                 shape_ranges=[((1, 3, 1, 1, 1), (1, 3, 4, 4, 4), (8, 3, 32, 32, 32))],
             ),
         ]
-        self.run_test_with_dynamic_shape(TestModule(), input_specs, expected_ops={acc_ops.conv_transpose3d})
+        self.run_test_with_dynamic_shape(
+            TestModule(), input_specs, expected_ops={acc_ops.conv_transpose3d}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_unsqueeze.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_unsqueeze.py
index 17773dc50c..26d23e0e54 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_unsqueeze.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_unsqueeze.py
@@ -51,7 +51,9 @@ def forward(self, x):
                 shape_ranges=[((1, 2, 3), (2, 2, 3), (3, 2, 3))],
             ),
         ]
-        self.run_test_with_dynamic_shape(Unsqueeze(dim), input_specs, expected_ops={acc_ops.unsqueeze})
+        self.run_test_with_dynamic_shape(
+            Unsqueeze(dim), input_specs, expected_ops={acc_ops.unsqueeze}
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/converters/vanilla/test_convolution_vanilla.py b/py/torch_tensorrt/fx/test/converters/vanilla/test_convolution_vanilla.py
index a4b24b6d07..384d55d44e 100644
--- a/py/torch_tensorrt/fx/test/converters/vanilla/test_convolution_vanilla.py
+++ b/py/torch_tensorrt/fx/test/converters/vanilla/test_convolution_vanilla.py
@@ -31,7 +31,9 @@ def test_conv1d(
         class TestModule(torch.nn.Module):
             def __init__(self):
                 super().__init__()
-                self.conv = torch.nn.Conv1d(3, 6, kernel_size, stride, padding, dilation, groups, bias)
+                self.conv = torch.nn.Conv1d(
+                    3, 6, kernel_size, stride, padding, dilation, groups, bias
+                )
 
             def forward(self, x):
                 return self.conv(x)
@@ -62,7 +64,9 @@ def test_conv2d(
         class TestModule(torch.nn.Module):
             def __init__(self):
                 super().__init__()
-                self.conv = torch.nn.Conv2d(3, 6, kernel_size, stride, padding, dilation, groups, bias)
+                self.conv = torch.nn.Conv2d(
+                    3, 6, kernel_size, stride, padding, dilation, groups, bias
+                )
 
             def forward(self, x):
                 return self.conv(x)
@@ -93,7 +97,9 @@ def test_conv3d(
         class TestModule(torch.nn.Module):
             def __init__(self):
                 super().__init__()
-                self.conv = torch.nn.Conv3d(3, 6, kernel_size, stride, padding, dilation, groups, bias)
+                self.conv = torch.nn.Conv3d(
+                    3, 6, kernel_size, stride, padding, dilation, groups, bias
+                )
 
             def forward(self, x):
                 return self.conv(x)
diff --git a/py/torch_tensorrt/fx/test/core/test_trt_module.py b/py/torch_tensorrt/fx/test/core/test_trt_module.py
index 9639fdf280..ce44be758d 100644
--- a/py/torch_tensorrt/fx/test/core/test_trt_module.py
+++ b/py/torch_tensorrt/fx/test/core/test_trt_module.py
@@ -27,7 +27,9 @@ def forward(self, x):
         torch.save(trt_mod, "trt.pt")
         reload_trt_mod = torch.load("trt.pt")
 
-        torch.testing.assert_allclose(reload_trt_mod(inputs[0].cuda()).cpu(), ref_output, rtol=1e-04, atol=1e-04)
+        torch.testing.assert_allclose(
+            reload_trt_mod(inputs[0].cuda()).cpu(), ref_output, rtol=1e-04, atol=1e-04
+        )
         os.remove(f"{os.getcwd()}/trt.pt")
 
     def test_save_and_load_state_dict(self):
@@ -47,7 +49,9 @@ def forward(self, x):
         new_trt_mod = TRTModule()
         new_trt_mod.load_state_dict(st)
 
-        torch.testing.assert_allclose(new_trt_mod(inputs[0].cuda()).cpu(), ref_output, rtol=1e-04, atol=1e-04)
+        torch.testing.assert_allclose(
+            new_trt_mod(inputs[0].cuda()).cpu(), ref_output, rtol=1e-04, atol=1e-04
+        )
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/passes/test_fuse_permute_matmul_trt.py b/py/torch_tensorrt/fx/test/passes/test_fuse_permute_matmul_trt.py
index a750fa21b5..11f2cd3ce2 100644
--- a/py/torch_tensorrt/fx/test/passes/test_fuse_permute_matmul_trt.py
+++ b/py/torch_tensorrt/fx/test/passes/test_fuse_permute_matmul_trt.py
@@ -23,7 +23,9 @@ class TestFusePermuteMatmul(AccTestCase):
     @parameterized.expand(
         [
             ("transpose_lhs_bmm", (3, 3, 2), (3, 3, 4), tranpose_last_two_dims),
-            param("transpose_rhs_bmm", (3, 2, 3), (3, 4, 3), rhs_op=tranpose_last_two_dims),
+            param(
+                "transpose_rhs_bmm", (3, 2, 3), (3, 4, 3), rhs_op=tranpose_last_two_dims
+            ),
             ("permute_lhs_bmm", (3, 3, 2), (3, 3, 4), permute021),
             param("permute_rhs_bmm", (3, 2, 3), (3, 4, 3), rhs_op=permute021),
             ("permute_both_bmm", (3, 3, 2), (3, 4, 3), permute021, permute021),
diff --git a/py/torch_tensorrt/fx/test/passes/test_remove_duplicate_output_args.py b/py/torch_tensorrt/fx/test/passes/test_remove_duplicate_output_args.py
index adc385cdf8..1bb76c6691 100644
--- a/py/torch_tensorrt/fx/test/passes/test_remove_duplicate_output_args.py
+++ b/py/torch_tensorrt/fx/test/passes/test_remove_duplicate_output_args.py
@@ -54,7 +54,9 @@ def is_leaf_module(self, m, qn):
     %add : [#users=1] = call_function[target=operator.add](args = (%getitem, %getitem_1), kwargs = {})
     return add
 """.strip()
-        assert ttop_graph_expected == ttop_graph_actual, f"Unexpected ttop graph: {ttop_graph_actual}"
+        assert (
+            ttop_graph_expected == ttop_graph_actual
+        ), f"Unexpected ttop graph: {ttop_graph_actual}"
 
         ttop_a_graph_actual = str(ttop.a.graph).strip()
         ttop_a_graph_expected = """
@@ -62,7 +64,9 @@ def is_leaf_module(self, m, qn):
     %x : [#users=1] = placeholder[target=x]
     return (x,)
 """.strip()
-        assert ttop_a_graph_expected == ttop_a_graph_actual, f"Unexpected ttop.a graph: {ttop_a_graph_actual}"
+        assert (
+            ttop_a_graph_expected == ttop_a_graph_actual
+        ), f"Unexpected ttop.a graph: {ttop_a_graph_actual}"
 
 
 if __name__ == "__main__":
diff --git a/py/torch_tensorrt/fx/test/passes/test_setitem.py b/py/torch_tensorrt/fx/test/passes/test_setitem.py
index 3a2ca84abb..357d15be30 100644
--- a/py/torch_tensorrt/fx/test/passes/test_setitem.py
+++ b/py/torch_tensorrt/fx/test/passes/test_setitem.py
@@ -248,7 +248,9 @@ def transform_fx(gm, example_inputs):
             ("c2", (3, 2, 4), (5, 2, 6), 1, 4, 1, 5),
         ]
     )
-    def test_setitem3d_2v_ext(self, name, x_shape, y_shape, start_0, end_0, start_2, end_2):
+    def test_setitem3d_2v_ext(
+        self, name, x_shape, y_shape, start_0, end_0, start_2, end_2
+    ):
         class TestModule(torch.nn.Module):
             def __init__(self):
                 super().__init__()
@@ -281,7 +283,9 @@ def transform_fx(gm, example_inputs):
             ("c2", (2, 3, 4), (4, 5, 6), 1, 3, 1, 4, 1, 5),
         ]
     )
-    def test_setitem3d_3v(self, name, x_shape, y_shape, start_0, end_0, start_1, end_1, start_2, end_2):
+    def test_setitem3d_3v(
+        self, name, x_shape, y_shape, start_0, end_0, start_1, end_1, start_2, end_2
+    ):
         class TestModule(torch.nn.Module):
             def __init__(self):
                 super().__init__()
@@ -348,7 +352,9 @@ def transform_fx(gm, example_inputs):
             ("c2", (2, 3, 4, 5), (2, 5, 4, 7), 1, 4, 1, 6),
         ]
     )
-    def test_setitem4d_2v_ext(self, name, x_shape, y_shape, start_1, end_1, start_3, end_3):
+    def test_setitem4d_2v_ext(
+        self, name, x_shape, y_shape, start_1, end_1, start_3, end_3
+    ):
         class TestModule(torch.nn.Module):
             def __init__(self):
                 super().__init__()
@@ -381,7 +387,9 @@ def transform_fx(gm, example_inputs):
             ("c2", (2, 3, 4, 5), (2, 5, 6, 7), 1, 4, 1, 5, 1, 6),
         ]
     )
-    def test_setitem4d_3v(self, name, x_shape, y_shape, start_1, end_1, start_2, end_2, start_3, end_3):
+    def test_setitem4d_3v(
+        self, name, x_shape, y_shape, start_1, end_1, start_2, end_2, start_3, end_3
+    ):
         class TestModule(torch.nn.Module):
             def __init__(self):
                 super().__init__()
diff --git a/py/torch_tensorrt/fx/test/tracer/test_acc_tracer.py b/py/torch_tensorrt/fx/test/tracer/test_acc_tracer.py
index 71dba94a79..3abba43ccb 100644
--- a/py/torch_tensorrt/fx/test/tracer/test_acc_tracer.py
+++ b/py/torch_tensorrt/fx/test/tracer/test_acc_tracer.py
@@ -104,13 +104,25 @@ def forward(self, a: torch.Tensor) -> torch.Tensor:
             outputs = [outputs]
             outputs_again = [outputs_again]
 
-        for ref_output, output, output_again in zip(ref_outputs, outputs, outputs_again):
+        for ref_output, output, output_again in zip(
+            ref_outputs, outputs, outputs_again
+        ):
             if enable_allclose:
-                torch.testing.assert_allclose(torch.nan_to_num(ref_output), torch.nan_to_num(output))
-                torch.testing.assert_allclose(torch.nan_to_num(ref_output), torch.nan_to_num(output_again))
+                torch.testing.assert_allclose(
+                    torch.nan_to_num(ref_output), torch.nan_to_num(output)
+                )
+                torch.testing.assert_allclose(
+                    torch.nan_to_num(ref_output), torch.nan_to_num(output_again)
+                )
             else:
-                self.assertTrue(torch.equal(torch.nan_to_num(ref_output), torch.nan_to_num(output)))
-                self.assertTrue(torch.equal(torch.nan_to_num(ref_output), torch.nan_to_num(output_again)))
+                self.assertTrue(
+                    torch.equal(torch.nan_to_num(ref_output), torch.nan_to_num(output))
+                )
+                self.assertTrue(
+                    torch.equal(
+                        torch.nan_to_num(ref_output), torch.nan_to_num(output_again)
+                    )
+                )
 
     def test_sum(self):
         self._make_acc_op_function_test(acc_ops.sum, torch.sum)
@@ -122,26 +134,40 @@ def test_prod(self):
 
     def test_mean(self):
         self._make_acc_op_function_test(acc_ops.mean, torch.mean)
-        self._make_acc_op_function_test(acc_ops.mean, torch.mean, dim=(1,), keepdim=True)
+        self._make_acc_op_function_test(
+            acc_ops.mean, torch.mean, dim=(1,), keepdim=True
+        )
 
     def test_pad(self):
-        self._make_acc_op_function_test(acc_ops.pad, torch.nn.functional.pad, pad=(2, 0))
+        self._make_acc_op_function_test(
+            acc_ops.pad, torch.nn.functional.pad, pad=(2, 0)
+        )
 
     def test_max(self):
         def torch_max(x, *args, **kwargs):
             return x.max(*args, **kwargs)
 
         self._make_acc_op_function_test(acc_ops.max_full_reduce, torch_max)
-        self._make_acc_op_function_test(acc_ops.max_dim_reduce, torch_max, dim=1, keepdim=True)
-        self._make_acc_op_function_test(acc_ops.max_dim_reduce, torch_max, input_shape=(1, 4), dim=1, keepdim=True)
-        self._make_acc_op_function_test(acc_ops.max_dim_reduce, torch_max, input_shape=(3, 4, 3), dim=2)
+        self._make_acc_op_function_test(
+            acc_ops.max_dim_reduce, torch_max, dim=1, keepdim=True
+        )
+        self._make_acc_op_function_test(
+            acc_ops.max_dim_reduce, torch_max, input_shape=(1, 4), dim=1, keepdim=True
+        )
+        self._make_acc_op_function_test(
+            acc_ops.max_dim_reduce, torch_max, input_shape=(3, 4, 3), dim=2
+        )
 
     @parameterized.expand(
         [
             param("max_maximum", orig_op=torch.max, expected_op=acc_ops.maximum),
-            param("maximum_maximum", orig_op=torch.maximum, expected_op=acc_ops.maximum),
+            param(
+                "maximum_maximum", orig_op=torch.maximum, expected_op=acc_ops.maximum
+            ),
             param("min_minimum", orig_op=torch.min, expected_op=acc_ops.minimum),
-            param("minimum_minimum", orig_op=torch.minimum, expected_op=acc_ops.minimum),
+            param(
+                "minimum_minimum", orig_op=torch.minimum, expected_op=acc_ops.minimum
+            ),
         ]
     )
     def test_maximum_minimum(self, _: str, orig_op, expected_op):
@@ -230,7 +256,9 @@ def forward(self, a: torch.Tensor) -> torch.Tensor:
                 return self.conv(a)
 
         m = TestModule()
-        input = torch.quantize_per_tensor(torch.randn(1, 3, 1, 1), scale=0.01, zero_point=3, dtype=torch.quint8)
+        input = torch.quantize_per_tensor(
+            torch.randn(1, 3, 1, 1), scale=0.01, zero_point=3, dtype=torch.quint8
+        )
         traced = acc_tracer.trace(m, [input])
         _LOGGER.info(traced.graph)
         ph = weight_attr = bias_attr = conv = None
@@ -265,7 +293,9 @@ def forward(self, a: torch.Tensor) -> torch.Tensor:
                 return self.conv(a)
 
         m = TestModule()
-        input = torch.quantize_per_tensor(torch.randn(1, 3, 1, 1), scale=0.01, zero_point=3, dtype=torch.quint8)
+        input = torch.quantize_per_tensor(
+            torch.randn(1, 3, 1, 1), scale=0.01, zero_point=3, dtype=torch.quint8
+        )
         traced = acc_tracer.trace(m, [input])
         ph = weight_attr = bias_attr = conv = relu = None
         for node in traced.graph.nodes:
@@ -556,7 +586,9 @@ def run_embedding_bag_test(is_4bit, use_weights):
             num_lengths = 10
 
             weights = torch.from_numpy(
-                (np.random.random_sample((num_embeddings, embedding_dim)) + 1).astype(np.float32)
+                (np.random.random_sample((num_embeddings, embedding_dim)) + 1).astype(
+                    np.float32
+                )
             )
             q_weights = (
                 torch.ops.quantized.embedding_bag_4bit_prepack(weights)
@@ -566,7 +598,9 @@ def run_embedding_bag_test(is_4bit, use_weights):
             np_lengths = np.random.randint(0, num_lengths, size=10).astype(np.int32)
 
             num_lengths = np.sum(np_lengths)
-            indices = torch.from_numpy(np.random.randint(low=0, high=num_embeddings, size=num_lengths)).int()
+            indices = torch.from_numpy(
+                np.random.randint(low=0, high=num_embeddings, size=num_lengths)
+            ).int()
 
             lengths = torch.from_numpy(np_lengths)
             offsets = torch.cat([torch.zeros([1]), torch.cumsum(lengths, 0)]).int()
@@ -597,7 +631,9 @@ def run_embedding_bag_test(is_4bit, use_weights):
             _LOGGER.info(traced.graph)
 
             expected_target = (
-                acc_ops.embedding_bag_4bit_rowwise_offsets if is_4bit else acc_ops.embedding_bag_byte_rowwise_offsets
+                acc_ops.embedding_bag_4bit_rowwise_offsets
+                if is_4bit
+                else acc_ops.embedding_bag_byte_rowwise_offsets
             )
 
             for node in traced.graph.nodes:
@@ -642,7 +678,9 @@ def forward(self, a: torch.Tensor) -> torch.Tensor:
 
         m = TestModule()
         m.eval()
-        input = torch.quantize_per_tensor(torch.randn(1, 3, 1, 1), scale=0.01, zero_point=3, dtype=torch.quint8)
+        input = torch.quantize_per_tensor(
+            torch.randn(1, 3, 1, 1), scale=0.01, zero_point=3, dtype=torch.quint8
+        )
         traced = acc_tracer.trace(m, [input])
         ph = weight_attr = bias_attr = bn_mean = bn_var = bn = None
         for node in traced.graph.nodes:
@@ -669,7 +707,9 @@ def forward(self, a: torch.Tensor) -> torch.Tensor:
                 self.assertEqual(node.kwargs["running_mean"], bn_mean)
                 self.assertEqual(node.kwargs["running_var"], bn_var)
                 self.assertEqual(node.kwargs["acc_out_ty"][6]["scale"], bn_scale)
-                self.assertEqual(node.kwargs["acc_out_ty"][6]["zero_point"], bn_zero_point)
+                self.assertEqual(
+                    node.kwargs["acc_out_ty"][6]["zero_point"], bn_zero_point
+                )
                 bn = node
             elif node.op == "output":
                 self.assertEqual(bn, node.args[0])
@@ -727,7 +767,9 @@ def forward(self, a: torch.Tensor) -> torch.Tensor:
                 return self.linear(a)
 
         m = TestModule()
-        input = torch.quantize_per_tensor(torch.randn(2, 3), scale=0.01, zero_point=3, dtype=torch.quint8)
+        input = torch.quantize_per_tensor(
+            torch.randn(2, 3), scale=0.01, zero_point=3, dtype=torch.quint8
+        )
         traced = acc_tracer.trace(m, [input])
         ph = weight_attr = bias_attr = linear = None
         for node in traced.graph.nodes:
@@ -806,7 +848,10 @@ def forward(self, a: torch.Tensor) -> torch.Tensor:
                 self.assertEqual(node.kwargs["running_mean"], mean)
                 self.assertEqual(node.kwargs["running_var"], var)
                 bn = node
-            elif node.op == "call_module" and node.target == "bn._conditional_exception_wrapper_ValueError":
+            elif (
+                node.op == "call_module"
+                and node.target == "bn._conditional_exception_wrapper_ValueError"
+            ):
                 exception_wrapper = node
             elif node.op == "output":
                 self.assertEqual(bn, node.args[0])
@@ -982,7 +1027,9 @@ def forward(self, a, b):
                 else:
                     self.assertTrue(str(node.target) == "b")
             elif node.op == "call_module":
-                self.assertEqual(node.target, "_conditional_exception_wrapper_AssertionError")
+                self.assertEqual(
+                    node.target, "_conditional_exception_wrapper_AssertionError"
+                )
                 exception_wrapper = node
             elif node.op == "output":
                 self.assertEqual(ph_a, node.args[0])
@@ -1028,7 +1075,9 @@ def forward(self, a, b):
                 else:
                     self.assertTrue(str(node.target) == "b")
             elif node.op == "call_module":
-                self.assertEqual(node.target, "_conditional_exception_wrapper_RuntimeError")
+                self.assertEqual(
+                    node.target, "_conditional_exception_wrapper_RuntimeError"
+                )
                 exception_wrapper = node
             elif node.op == "output":
                 self.assertEqual(ph_a, node.args[0])
@@ -1123,7 +1172,9 @@ def forward(self, a, b):
                 else:
                     self.assertTrue(str(node.target) == "b")
             elif node.op == "call_module":
-                self.assertEqual(node.target, "_conditional_exception_wrapper_AssertionError")
+                self.assertEqual(
+                    node.target, "_conditional_exception_wrapper_AssertionError"
+                )
                 exception_wrapper = node
             elif node.op == "output":
                 self.assertEqual(ph_a, node.args[0])
@@ -1140,8 +1191,12 @@ def test_quantized_add(self):
         class TestModule(nn.Module):
             def __init__(self):
                 super().__init__()
-                self.q_input = torch.nn.quantized.Quantize(scale=1.0 / 128, zero_point=5, dtype=torch.quint8)
-                self.q_other = torch.nn.quantized.Quantize(scale=1.0 / 128, zero_point=10, dtype=torch.quint8)
+                self.q_input = torch.nn.quantized.Quantize(
+                    scale=1.0 / 128, zero_point=5, dtype=torch.quint8
+                )
+                self.q_other = torch.nn.quantized.Quantize(
+                    scale=1.0 / 128, zero_point=10, dtype=torch.quint8
+                )
 
             def forward(self, input: torch.Tensor, other: torch.Tensor) -> torch.Tensor:
                 return torch.ops.quantized.add(
@@ -1163,7 +1218,10 @@ def forward(self, input: torch.Tensor, other: torch.Tensor) -> torch.Tensor:
                 else:
                     self.assertTrue(str(node.target) == "other")
                     other_ph = node
-            elif node.op == "call_function" and node.target == acc_ops.quantize_per_tensor:
+            elif (
+                node.op == "call_function"
+                and node.target == acc_ops.quantize_per_tensor
+            ):
                 qparams = {
                     "scale": 1.0 / 128,
                     "zero_point": 5,
@@ -1207,8 +1265,12 @@ def test_quantized_mul(self):
         class TestModule(nn.Module):
             def __init__(self):
                 super().__init__()
-                self.q_input = torch.nn.quantized.Quantize(scale=1.0 / 128, zero_point=5, dtype=torch.quint8)
-                self.q_other = torch.nn.quantized.Quantize(scale=1.0 / 128, zero_point=10, dtype=torch.quint8)
+                self.q_input = torch.nn.quantized.Quantize(
+                    scale=1.0 / 128, zero_point=5, dtype=torch.quint8
+                )
+                self.q_other = torch.nn.quantized.Quantize(
+                    scale=1.0 / 128, zero_point=10, dtype=torch.quint8
+                )
 
             def forward(self, input: torch.Tensor, other: torch.Tensor) -> torch.Tensor:
                 return torch.ops.quantized.mul(
@@ -1230,7 +1292,10 @@ def forward(self, input: torch.Tensor, other: torch.Tensor) -> torch.Tensor:
                 else:
                     self.assertTrue(str(node.target) == "other")
                     other_ph = node
-            elif node.op == "call_function" and node.target == acc_ops.quantize_per_tensor:
+            elif (
+                node.op == "call_function"
+                and node.target == acc_ops.quantize_per_tensor
+            ):
                 qparams = {
                     "scale": 1.0 / 128,
                     "zero_point": 5,
@@ -1323,7 +1388,9 @@ def test_transpose(self):
         """
         Test that torch.transpose is traced correctly.
         """
-        self._make_acc_op_function_test(acc_ops.permute, lambda x: torch.transpose(x, 1, 0))
+        self._make_acc_op_function_test(
+            acc_ops.permute, lambda x: torch.transpose(x, 1, 0)
+        )
 
     def test_permute(self):
         """
@@ -1378,10 +1445,14 @@ def forward(self, a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
         self.assertTrue(torch.equal(m(a, b), traced(a, b)))
 
     def test_bmm(self):
-        self._make_acc_op_function_test(acc_ops.matmul, lambda x: torch.bmm(x, x), input_shape=(2, 4, 4))
+        self._make_acc_op_function_test(
+            acc_ops.matmul, lambda x: torch.bmm(x, x), input_shape=(2, 4, 4)
+        )
 
     def test_tile(self):
-        return self._make_acc_op_function_test(acc_ops.tile, lambda x: torch.tile(x, (2, 1, 2)), input_shape=(1, 2))
+        return self._make_acc_op_function_test(
+            acc_ops.tile, lambda x: torch.tile(x, (2, 1, 2)), input_shape=(1, 2)
+        )
 
     def test_dropout(self):
         self._make_acc_op_function_test(
@@ -1393,7 +1464,9 @@ def test_dropout(self):
     def test_stochastic_depth(self):
         self._make_acc_op_function_test(
             None,
-            lambda x, p, mode, training: torchvision.ops.stochastic_depth(x, p=p, mode=mode, training=training),
+            lambda x, p, mode, training: torchvision.ops.stochastic_depth(
+                x, p=p, mode=mode, training=training
+            ),
             input_shape=(1, 2, 3),
             p=0.5,
             mode="row",
@@ -1550,7 +1623,9 @@ def test_relu(self):
         self._make_acc_op_function_test(acc_ops.relu, torch.relu)
 
     def test_leaky_relu(self):
-        self._make_acc_op_function_test(acc_ops.leaky_relu, torch.nn.functional.leaky_relu)
+        self._make_acc_op_function_test(
+            acc_ops.leaky_relu, torch.nn.functional.leaky_relu
+        )
 
     def test_elu(self):
         self._make_acc_op_function_test(acc_ops.elu, torch.nn.functional.elu)
@@ -1645,10 +1720,14 @@ def test_fmod(self):
         self._make_acc_op_function_test(acc_ops.fmod, lambda x: torch.fmod(x, -0.4))
 
     def test_floor_div(self):
-        self._make_acc_op_function_test(acc_ops.floor_div, lambda x: torch.div(x, 2, rounding_mode="floor"))
+        self._make_acc_op_function_test(
+            acc_ops.floor_div, lambda x: torch.div(x, 2, rounding_mode="floor")
+        )
 
     def test_trunc_div(self):
-        self._make_acc_op_function_test(acc_ops.trunc_div, lambda x: torch.div(x, 2, rounding_mode="trunc"))
+        self._make_acc_op_function_test(
+            acc_ops.trunc_div, lambda x: torch.div(x, 2, rounding_mode="trunc")
+        )
         # does not behave the same as floor_divide
         # self._make_acc_op_function_test(
         #     acc_ops.trunc_div, lambda x: torch.floor_divide(x, 2)
@@ -1754,7 +1833,9 @@ def test_flatten(self):
         """
         Test that torch.flatten is traced correctly.
         """
-        self._make_acc_op_function_test(acc_ops.flatten, torch.flatten, start_dim=1, end_dim=1)
+        self._make_acc_op_function_test(
+            acc_ops.flatten, torch.flatten, start_dim=1, end_dim=1
+        )
         self._make_acc_op_function_test(acc_ops.flatten, lambda x: x.flatten())
 
     def test_topk_multi_output(self):
@@ -1797,7 +1878,9 @@ class TestModule(torch.nn.Module):
             def __init__(self):
                 super().__init__()
 
-            def forward(self, input: torch.Tensor, a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
+            def forward(
+                self, input: torch.Tensor, a: torch.Tensor, b: torch.Tensor
+            ) -> torch.Tensor:
                 return torch.addmm(input, a, b, alpha=1.2, beta=1.1)
 
         m = TestModule()
@@ -1871,7 +1954,9 @@ def forward(self, input: torch.Tensor) -> torch.Tensor:
     @parameterized.expand([(torch.float,), (torch.float16,)])
     def test_addmm(self, dtype):
         class TestModule(torch.nn.Module):
-            def forward(self, input: torch.Tensor, a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
+            def forward(
+                self, input: torch.Tensor, a: torch.Tensor, b: torch.Tensor
+            ) -> torch.Tensor:
                 return torch.addmm(input, a, b)
 
         m = TestModule()
@@ -1947,7 +2032,9 @@ def forward(self, input: torch.Tensor) -> torch.Tensor:
                     self.assertEqual(node.kwargs["input"], ph_in)
                     flatten = node
                 elif node.target == acc_ops.topk:
-                    self.assertEqual(node.kwargs["input"], flatten if flatten else ph_in)
+                    self.assertEqual(
+                        node.kwargs["input"], flatten if flatten else ph_in
+                    )
                     topk = node
                 elif node.target == acc_ops.getitem:
                     self.assertEqual(node.kwargs["input"], topk)
@@ -1970,7 +2057,9 @@ def test_t(self):
         Test Tensor.t() is traced correctly.
         """
         self._make_acc_op_function_test(acc_ops.permute, lambda x: x.t())
-        self._make_acc_op_function_test(acc_ops.permute, lambda x: x.t(), input_shape=(3,))
+        self._make_acc_op_function_test(
+            acc_ops.permute, lambda x: x.t(), input_shape=(3,)
+        )
 
     def test_split_size(self):
         self._make_acc_op_function_test(
@@ -2126,7 +2215,9 @@ def test_resnext50_32x4d(self):
 
     def test_cumsum(self):
         self._make_acc_op_function_test(acc_ops.cumsum, torch.cumsum, dim=1)
-        self._make_acc_op_function_test(acc_ops.cumsum, torch.cumsum, dim=1, dtype=torch.float)
+        self._make_acc_op_function_test(
+            acc_ops.cumsum, torch.cumsum, dim=1, dtype=torch.float
+        )
 
     def test_chunk(self):
         self._make_acc_op_function_test(acc_ops.chunk, torch.chunk, chunks=2, dim=0)
@@ -2291,7 +2382,9 @@ def forward(self, a):
         results = traced(a)
         references = m(a)
         for res, ref in zip(results, references):
-            self.assertTrue(torch.equal(ref, res), f"Tensors at don't match {ref=} {res=}")
+            self.assertTrue(
+                torch.equal(ref, res), f"Tensors at don't match {ref=} {res=}"
+            )
 
     def test_inplace_raise(self):
         """
diff --git a/py/torch_tensorrt/fx/test/tracer/test_dispatch_tracer.py b/py/torch_tensorrt/fx/test/tracer/test_dispatch_tracer.py
index b17e8b490c..c5b7a22ec6 100644
--- a/py/torch_tensorrt/fx/test/tracer/test_dispatch_tracer.py
+++ b/py/torch_tensorrt/fx/test/tracer/test_dispatch_tracer.py
@@ -98,7 +98,10 @@ def f(x, y):
         # through the op registration method, the module is defined in a call_function
         call_function_node = None
         for node in gm.graph.nodes:
-            if node.op == "call_function" and node.target == torch.ops.wrap.wrapped_leaf:
+            if (
+                node.op == "call_function"
+                and node.target == torch.ops.wrap.wrapped_leaf
+            ):
                 call_function_node = node
         self.assertIsNotNone(call_function_node)
 
diff --git a/py/torch_tensorrt/fx/test/trt_lower/test_diagnostics.py b/py/torch_tensorrt/fx/test/trt_lower/test_diagnostics.py
index 77f68c5ad6..e23ab5dd81 100644
--- a/py/torch_tensorrt/fx/test/trt_lower/test_diagnostics.py
+++ b/py/torch_tensorrt/fx/test/trt_lower/test_diagnostics.py
@@ -75,7 +75,9 @@ def test_condition_func_name(self):
         diag.set_current_collector(collector)
 
         with diag.collect_when(
-            diag.CollectionConditions.when_called_by_function(self.test_condition_func_name.__name__)
+            diag.CollectionConditions.when_called_by_function(
+                self.test_condition_func_name.__name__
+            )
         ):
             diag.write("aaa", "hello")
 
@@ -98,7 +100,9 @@ def test_write_without_collect(self):
     def test_conditions(self):
 
         _test_cond(
-            diag.CollectionConditions.when_called_by_function(self.test_conditions.__name__),
+            diag.CollectionConditions.when_called_by_function(
+                self.test_conditions.__name__
+            ),
             should_collect=True,
         )
 
diff --git a/py/torch_tensorrt/fx/test/trt_lower/test_observer_gpu.py b/py/torch_tensorrt/fx/test/trt_lower/test_observer_gpu.py
index c620d1f572..9ed3b9df06 100644
--- a/py/torch_tensorrt/fx/test/trt_lower/test_observer_gpu.py
+++ b/py/torch_tensorrt/fx/test/trt_lower/test_observer_gpu.py
@@ -33,7 +33,9 @@ def forward(self, x, y):
 
         with execution_verifier() as verify_execution:
 
-            lowerer = lower.Lowerer.create(lower_setting=LowerSetting(min_acc_module_size=0))
+            lowerer = lower.Lowerer.create(
+                lower_setting=LowerSetting(min_acc_module_size=0)
+            )
 
             @verify_execution
             def observe_fuse_permute_linear_post(ctx: ob.ObserveContext):
@@ -45,5 +47,7 @@ def observe_fuse_permute_linear_post(ctx: ob.ObserveContext):
                 assert ctx.callable is fuse_permute_linear.orig_func
 
             # Register the observer callback and do the lowering
-            with fuse_permute_linear.observers.post.add(observe_fuse_permute_linear_post):
+            with fuse_permute_linear.observers.post.add(
+                observe_fuse_permute_linear_post
+            ):
                 lowerer(mod, inp)
diff --git a/py/torch_tensorrt/fx/test/trt_lower/trt_splitter_test.py b/py/torch_tensorrt/fx/test/trt_lower/trt_splitter_test.py
index ffb6035315..916394e944 100644
--- a/py/torch_tensorrt/fx/test/trt_lower/trt_splitter_test.py
+++ b/py/torch_tensorrt/fx/test/trt_lower/trt_splitter_test.py
@@ -40,7 +40,9 @@ def find_inputs(module):
 
 
 def find_fun_calls(module, target):
-    return [n for n in module.graph.nodes if n.op == "call_function" and n.target == target]
+    return [
+        n for n in module.graph.nodes if n.op == "call_function" and n.target == target
+    ]
 
 
 def find_output(module):
@@ -146,11 +148,19 @@ def test_splitter(splitter):
             st_split = splitter()
             verify_split_model(st_split)
             # Should be "a", "conv.weight", "conv.bias".
-            get_attr_nodes = [node.target for node in st_split._run_on_gpu_0.graph.nodes if node.op == "get_attr"]
+            get_attr_nodes = [
+                node.target
+                for node in st_split._run_on_gpu_0.graph.nodes
+                if node.op == "get_attr"
+            ]
             assert len(get_attr_nodes) == 3 and "a" in get_attr_nodes
 
             # Should be "b", "conv.weight", "conv.bias".
-            get_attr_nodes = [node.target for node in st_split._run_on_acc_1.graph.nodes if node.op == "get_attr"]
+            get_attr_nodes = [
+                node.target
+                for node in st_split._run_on_acc_1.graph.nodes
+                if node.op == "get_attr"
+            ]
             assert len(get_attr_nodes) == 3 and "b" in get_attr_nodes
 
         test_splitter(splitter)
@@ -204,10 +214,14 @@ def test_splitter(splitter):
             self.assertEqual(arg.name, topk.kwargs["input"].name)
             self.assertEqual(3, topk.kwargs["k"])
 
-            [topk_res1, topk_res2] = find_fun_calls(st_split._run_on_acc_0, acc_ops.getitem)
+            [topk_res1, topk_res2] = find_fun_calls(
+                st_split._run_on_acc_0, acc_ops.getitem
+            )
 
             [sigmoid] = find_fun_calls(st_split._run_on_acc_0, acc_ops.sigmoid)
-            self.assertIn(sigmoid.kwargs["input"].name, {topk_res1.name, topk_res2.name})
+            self.assertIn(
+                sigmoid.kwargs["input"].name, {topk_res1.name, topk_res2.name}
+            )
 
             # Main graph returns a tuple
             output = find_output(st_split._run_on_acc_0)
@@ -246,7 +260,9 @@ def __init__(self, relu_module, sin_module):
             def forward(self, x):
                 return self.relu_module(x) + self.sin_module(x)
 
-        mod = acc_tracer.trace(TestModule3(ReluModule(), SinModule()), [torch.randn(2, 3)])
+        mod = acc_tracer.trace(
+            TestModule3(ReluModule(), SinModule()), [torch.randn(2, 3)]
+        )
 
         # Making sin(x) run on ACC
         splitter = TRTSplitter(
@@ -772,9 +788,13 @@ def test_splitter(splitter):
             except RuntimeError as err:
                 self.assertEqual(str(err), ERROR_MSG_MULTI_ACC_MODULES)
 
-            self.assertEqual({acc_ops.relu}, find_call_targets(module_fx_split._run_on_acc_0))
+            self.assertEqual(
+                {acc_ops.relu}, find_call_targets(module_fx_split._run_on_acc_0)
+            )
 
-            self.assertEqual({acc_ops.cos}, find_call_targets(module_fx_split._run_on_gpu_1))
+            self.assertEqual(
+                {acc_ops.cos}, find_call_targets(module_fx_split._run_on_gpu_1)
+            )
 
             self.assertEqual(
                 {acc_ops.size, acc_ops.getitem, acc_ops.add, acc_ops.sigmoid},
@@ -1011,7 +1031,11 @@ def test_acc_fusions_finder_1(self):
         module_fx = torch.fx.symbolic_trace(module_nn)
         shape_prop.ShapeProp(module_fx).propagate(torch.randn(1, 1, 1))
 
-        acc_node = {node for node in module_fx.graph.nodes if node.op in torch.fx.passes.tools_common.CALLABLE_NODE_OPS}
+        acc_node = {
+            node
+            for node in module_fx.graph.nodes
+            if node.op in torch.fx.passes.tools_common.CALLABLE_NODE_OPS
+        }
 
         fusions_finder = torch.fx.passes.splitter_base.FxNetAccFusionsFinder(
             module_fx,
@@ -1033,7 +1057,9 @@ def test_acc_fusions_finder_2(self):
         module_fx = torch.fx.symbolic_trace(module_nn)
         shape_prop.ShapeProp(module_fx).propagate(torch.randn(1, 1, 1))
 
-        acc_node = {node for node in module_fx.graph.nodes if node.target == operator.add}
+        acc_node = {
+            node for node in module_fx.graph.nodes if node.target == operator.add
+        }
         fusions_finder = torch.fx.passes.splitter_base.FxNetAccFusionsFinder(
             module_fx,
             acc_node,
diff --git a/py/torch_tensorrt/fx/tools/engine_layer_visualize.py b/py/torch_tensorrt/fx/tools/engine_layer_visualize.py
index 87db48eb8e..cecd1ecb20 100644
--- a/py/torch_tensorrt/fx/tools/engine_layer_visualize.py
+++ b/py/torch_tensorrt/fx/tools/engine_layer_visualize.py
@@ -53,7 +53,9 @@ def from_string(cls, string, tactic_names, layer_times=None):
         )[0]
 
         if kernel_name != "Constant":
-            inputs = re.findall("[, ]*(.+?)\\[([Half|Float|Int8]+\\(\\d[,\\d]*\\))\\]", inputs)
+            inputs = re.findall(
+                "[, ]*(.+?)\\[([Half|Float|Int8]+\\(\\d[,\\d]*\\))\\]", inputs
+            )
             for input_name, input_type in inputs:
                 input_names.append(input_name)
                 input_types.append(input_type)
@@ -155,7 +157,9 @@ def build_edge(layer, graph, reformat_layers, output_name2node, layer_name2node)
                 tactic_names = {}
 
         if tactic_name_start and "Set Tactic Name:" in line:
-            layer_name, kernel_name, _ = re.findall("VERBOSE: (.*) Set Tactic Name: (.*) Tactic: (.*)$", line)[0]
+            layer_name, kernel_name, _ = re.findall(
+                "VERBOSE: (.*) Set Tactic Name: (.*) Tactic: (.*)$", line
+            )[0]
             tactic_names[layer_name] = kernel_name
 
         # Some reformat layers aren't displayed in Engine Layer Information
@@ -193,7 +197,9 @@ def build_edge(layer, graph, reformat_layers, output_name2node, layer_name2node)
             layer_name2node[layer.layer_name] = node
 
         for layer in layers:
-            build_edge(layer, dot_graph, reformat_layers, output_name2node, layer_name2node)
+            build_edge(
+                layer, dot_graph, reformat_layers, output_name2node, layer_name2node
+            )
 
         dot_graph.write_raw(f"EngineLayers_{i}.dot")
         i += 1
diff --git a/py/torch_tensorrt/fx/tools/model_packager.py b/py/torch_tensorrt/fx/tools/model_packager.py
index 246b0335be..0ef0ff05a4 100644
--- a/py/torch_tensorrt/fx/tools/model_packager.py
+++ b/py/torch_tensorrt/fx/tools/model_packager.py
@@ -36,7 +36,9 @@ def flatten_model(model: torch.fx.GraphModule) -> torch.fx.GraphModule:
     return model
 
 
-def generate_standalone_repro(model: torch.fx.GraphModule, output: Union[str, Path, TextIO], prelude: str = "") -> None:
+def generate_standalone_repro(
+    model: torch.fx.GraphModule, output: Union[str, Path, TextIO], prelude: str = ""
+) -> None:
     """
     Generate a standalone python file for the model where weights are randomized
     and the model flattened.
@@ -59,13 +61,22 @@ def generate_standalone_repro(model: torch.fx.GraphModule, output: Union[str, Pa
         shape = ", ".join([str(i) for i in v.shape])
         rand_func = "randn" if torch.is_floating_point(v) else "randint"
         int_range = "" if torch.is_floating_point(v) else "0, 5, "
-        lines.append(f"{INDENT * 2}self.{k} = nn.Parameter(torch.{rand_func}({int_range}{shape}, dtype={v.dtype}))")
+        lines.append(
+            f"{INDENT * 2}self.{k} = nn.Parameter(torch.{rand_func}({int_range}{shape}, dtype={v.dtype}))"
+        )
     code = str(model.code)
 
     def dump(f):
         f.write(prelude)
         f.write("\n".join(lines))
-        f.write("\n".join([INDENT + line.replace("self._holder.", "self.") for line in code.split("\n")]))
+        f.write(
+            "\n".join(
+                [
+                    INDENT + line.replace("self._holder.", "self.")
+                    for line in code.split("\n")
+                ]
+            )
+        )
         f.write("\n")
 
     if isinstance(output, (Path, str)):
diff --git a/py/torch_tensorrt/fx/tools/node_profiler.py b/py/torch_tensorrt/fx/tools/node_profiler.py
index f9fd581678..1a37c27197 100644
--- a/py/torch_tensorrt/fx/tools/node_profiler.py
+++ b/py/torch_tensorrt/fx/tools/node_profiler.py
@@ -35,7 +35,9 @@ def run_node(self, n: fx.Node) -> Any:
         end_event.record()
         torch.cuda.synchronize()
 
-        self.execution_time[f"{n.name}"] = start_event.elapsed_time(end_event) / self.iter
+        self.execution_time[f"{n.name}"] = (
+            start_event.elapsed_time(end_event) / self.iter
+        )
         self.node_map[n.name] = n
         return result
 
diff --git a/py/torch_tensorrt/fx/tools/timing_cache_utils.py b/py/torch_tensorrt/fx/tools/timing_cache_utils.py
index c7816fda8f..4580843e98 100644
--- a/py/torch_tensorrt/fx/tools/timing_cache_utils.py
+++ b/py/torch_tensorrt/fx/tools/timing_cache_utils.py
@@ -27,7 +27,9 @@ def get_timing_cache_trt(self, timing_cache_file: str) -> bytearray:
         except Exception:
             return None
 
-    def update_timing_cache(self, timing_cache_file: str, serilized_cache: bytearray) -> None:
+    def update_timing_cache(
+        self, timing_cache_file: str, serilized_cache: bytearray
+    ) -> None:
         if not self.save_timing_cache:
             return
         timing_cache_file = self.get_file_full_name(timing_cache_file)
diff --git a/py/torch_tensorrt/fx/tools/trt_profiler_sorted.py b/py/torch_tensorrt/fx/tools/trt_profiler_sorted.py
index ac8dade619..59d2f49042 100644
--- a/py/torch_tensorrt/fx/tools/trt_profiler_sorted.py
+++ b/py/torch_tensorrt/fx/tools/trt_profiler_sorted.py
@@ -19,14 +19,18 @@ def __init__(self):
     def report_layer_time(self, layer_name: str, ms: int) -> None:
         self.layers[layer_name] = ms
 
-    def print_sorted_profile(self, additional_info: Optional[Mapping[str, str]]) -> None:
+    def print_sorted_profile(
+        self, additional_info: Optional[Mapping[str, str]]
+    ) -> None:
         additional_info = {} if additional_info is None else additional_info
         for k, v in sorted(self.layers.items(), key=operator.itemgetter(1)):
             additional_str = additional_info.get(k, "")
             _LOGGER.info(f"{k} {additional_str}: {v}ms")
 
 
-def profile_trt_module(name: str, trt_mod: TRTModule, mod_input: List[torch.Tensor]) -> None:
+def profile_trt_module(
+    name: str, trt_mod: TRTModule, mod_input: List[torch.Tensor]
+) -> None:
     """
     Provide per layer timing and shape info
     """
@@ -34,8 +38,12 @@ def profile_trt_module(name: str, trt_mod: TRTModule, mod_input: List[torch.Tens
     shape_map = {}
     for layer in layer_info["Layers"]:
         name = layer["Name"]
-        input_str = ", ".join([str(x.get("Dimensions", "[]")) for x in layer.get("Inputs", [])])
-        output_str = ", ".join([str(x.get("Dimensions", "[]")) for x in layer.get("Outputs", [])])
+        input_str = ", ".join(
+            [str(x.get("Dimensions", "[]")) for x in layer.get("Inputs", [])]
+        )
+        output_str = ", ".join(
+            [str(x.get("Dimensions", "[]")) for x in layer.get("Outputs", [])]
+        )
         shape_map[name] = f"({input_str}) -> ({output_str})"
 
     trt_mod.enable_profiling(profiler=SortedTRTProfiler())  # pyre-ignore[29]
diff --git a/py/torch_tensorrt/fx/tracer/acc_tracer/acc_normalizer.py b/py/torch_tensorrt/fx/tracer/acc_tracer/acc_normalizer.py
index 02197772a7..fd2c26ac2f 100644
--- a/py/torch_tensorrt/fx/tracer/acc_tracer/acc_normalizer.py
+++ b/py/torch_tensorrt/fx/tracer/acc_tracer/acc_normalizer.py
@@ -65,7 +65,9 @@ class NormalizationInfo(NamedTuple):
     # (tensor_meta_field_name, orginal_field_name)
     # when move_to_qparams is True, we'll move the field to qparams
     # dictionary, otherwise it will stay in TensorMeta itself
-    kwargs_to_move_to_acc_out_ty: Optional[List[Union[Tuple[str, str, bool], Tuple[str, str]]]]
+    kwargs_to_move_to_acc_out_ty: Optional[
+        List[Union[Tuple[str, str, bool], Tuple[str, str]]]
+    ]
     needs_shapes_for_normalization: bool
 
 
@@ -81,7 +83,9 @@ def _insert_fun(
     arg_replacement_tuples: List[Tuple],
     new_fn_target: Optional[Callable] = None,
     custom_mapping_fn: Optional[Callable] = None,
-    kwargs_to_move_to_acc_out_ty: Optional[List[Union[Tuple[str, str, bool], Tuple[str, str]]]] = None,
+    kwargs_to_move_to_acc_out_ty: Optional[
+        List[Union[Tuple[str, str, bool], Tuple[str, str]]]
+    ] = None,
     needs_shapes_for_normalization=False,
     allow_normalize_from_torch_package=False,
 ):
@@ -114,7 +118,9 @@ def _insert_fun(
         for k in orig_kwarg:
             if k in ALIAS_MAP:
                 orig_kwarg_set.update(ALIAS_MAP[k])
-        final_arg_replacement_tuples.append((tuple(orig_kwarg_set), new_kwarg, is_optional))
+        final_arg_replacement_tuples.append(
+            (tuple(orig_kwarg_set), new_kwarg, is_optional)
+        )
 
     assert op_and_target not in _normalization_dict.keys()
     norm_info = NormalizationInfo(
@@ -167,7 +173,9 @@ def register_acc_op_mapping(
             ]
         ]
     ] = None,
-    kwargs_to_move_to_acc_out_ty: Optional[List[Union[Tuple[str, str, bool], Tuple[str, str]]]] = None,
+    kwargs_to_move_to_acc_out_ty: Optional[
+        List[Union[Tuple[str, str, bool], Tuple[str, str]]]
+    ] = None,
 ):
     """
     Use this decorator to map a non-acc operator to an acc operator.
@@ -242,7 +250,9 @@ def move_kwargs_to_acc_out_ty(
     if normalization_info.kwargs_to_move_to_acc_out_ty is None:
         return
 
-    assert acc_utils.is_acc_op_with_kwarg(normalization_info.new_fn_target, "acc_out_ty")
+    assert acc_utils.is_acc_op_with_kwarg(
+        normalization_info.new_fn_target, "acc_out_ty"
+    )
 
     # Build a dict representing the new TensorMetadata to use for acc_out_ty,
     # and then remove the kwarg from the new_kwargs since it's passed in via
@@ -270,7 +280,9 @@ def move_kwargs_to_acc_out_ty(
     new_kwargs["acc_out_ty"] = acc_utils.build_raw_tensor_meta(**tmd_dict)
 
 
-def get_normalized_kwargs(node: torch.fx.Node, arg_replacement_tuples: ArgReplacementTuplesType):
+def get_normalized_kwargs(
+    node: torch.fx.Node, arg_replacement_tuples: ArgReplacementTuplesType
+):
     new_kwargs = {}
     final_arg_is_varg = False
     for i, replacement_tuple in enumerate(arg_replacement_tuples):
@@ -298,7 +310,9 @@ def get_normalized_kwargs(node: torch.fx.Node, arg_replacement_tuples: ArgReplac
                 new_kwargs[new_kwarg_name] = node.args[i]
             else:
                 # Verify the arg we're trying to normalize was optional.
-                assert is_optional, f"Cannot normalize {orig_kwargs_names} to {new_kwarg_name} for {node.name}"
+                assert (
+                    is_optional
+                ), f"Cannot normalize {orig_kwargs_names} to {new_kwarg_name} for {node.name}"
         else:
             new_kwargs[new_kwarg_name] = node.kwargs[orig_kwargs_name]
 
@@ -401,7 +415,9 @@ def normalize_to_acc_op(
         else:
             normalized_args = ()
             try:
-                normalized_kwargs = get_normalized_kwargs(node, normalization_info.arg_replacement_tuples)
+                normalized_kwargs = get_normalized_kwargs(
+                    node, normalization_info.arg_replacement_tuples
+                )
             except Exception:
                 _LOGGER.error(
                     f"Error during kwarg normalization for: {node.format_node()}; "
@@ -409,7 +425,10 @@ def normalize_to_acc_op(
                 )
                 raise
 
-        if normalization_info.needs_shapes_for_normalization and not expect_nodes_have_shapes:
+        if (
+            normalization_info.needs_shapes_for_normalization
+            and not expect_nodes_have_shapes
+        ):
             # All nodes needing shapes for normalization should be custom mapped.
             assert normalization_info.custom_mapping_fn is not None
             # For custom mapping, the normalized_kwargs are used for the original op,
@@ -420,7 +439,9 @@ def normalize_to_acc_op(
             continue
 
         try:
-            normalize_to_acc_op(node, normalization_info, normalized_args, normalized_kwargs)
+            normalize_to_acc_op(
+                node, normalization_info, normalized_args, normalized_kwargs
+            )
         except Exception:
             _LOGGER.error(f"Error during normalization for node: {node.format_node()}")
             raise
diff --git a/py/torch_tensorrt/fx/tracer/acc_tracer/acc_ops.py b/py/torch_tensorrt/fx/tracer/acc_tracer/acc_ops.py
index 3ba4cc6e5a..d1a5322316 100644
--- a/py/torch_tensorrt/fx/tracer/acc_tracer/acc_ops.py
+++ b/py/torch_tensorrt/fx/tracer/acc_tracer/acc_ops.py
@@ -141,7 +141,9 @@ def max_pool2d(
 
 @register_acc_op_mapping(op_and_target=("call_function", nn.functional.max_pool3d))
 @register_acc_op
-def max_pool3d(*, input, kernel_size, stride, padding, dilation, ceil_mode, return_indices):
+def max_pool3d(
+    *, input, kernel_size, stride, padding, dilation, ceil_mode, return_indices
+):
     return nn.functional.max_pool3d(
         input=input,
         kernel_size=kernel_size,
@@ -153,13 +155,17 @@ def max_pool3d(*, input, kernel_size, stride, padding, dilation, ceil_mode, retu
     )
 
 
-@register_acc_op_mapping(op_and_target=("call_function", nn.functional.adaptive_avg_pool2d))
+@register_acc_op_mapping(
+    op_and_target=("call_function", nn.functional.adaptive_avg_pool2d)
+)
 @register_acc_op
 def adaptive_avg_pool2d(*, input, output_size):
     return nn.functional.adaptive_avg_pool2d(input=input, output_size=output_size)
 
 
-@register_acc_op_mapping(op_and_target=("call_function", nn.functional.adaptive_avg_pool3d))
+@register_acc_op_mapping(
+    op_and_target=("call_function", nn.functional.adaptive_avg_pool3d)
+)
 @register_acc_op
 def adaptive_avg_pool3d(*, input, output_size):
     return nn.functional.adaptive_avg_pool3d(input=input, output_size=output_size)
@@ -306,7 +312,9 @@ def custom_getattr_mapper(node: torch.fx.Node, _: nn.Module) -> torch.fx.Node:
     input_obj = node.args[0]
     attr_name = node.args[1]
     assert isinstance(input_obj, torch.fx.Node)
-    assert input_obj.meta["type"] == torch.Tensor, f"Expected torch.Tensor type for {input_obj.meta['type']}"
+    assert (
+        input_obj.meta["type"] == torch.Tensor
+    ), f"Expected torch.Tensor type for {input_obj.meta['type']}"
     assert (
         attr_name == "shape" or attr_name == "device" or attr_name == "dtype"
     ), f"Only supporting shape, device and dtype getattr for now, not {attr_name}"
@@ -336,14 +344,18 @@ def tensor_size_mapper(node: torch.fx.Node, _: nn.Module) -> torch.fx.Node:
     """
 
     with node.graph.inserting_before(node):
-        size_node = node.graph.call_function(size, kwargs={"input": node.kwargs["input"]})
+        size_node = node.graph.call_function(
+            size, kwargs={"input": node.kwargs["input"]}
+        )
 
         if "dim" not in node.kwargs:
             size_node.meta = node.meta.copy()
             return size_node
 
         size_node.meta["type"] = torch.Size
-        getitem_node = node.graph.call_function(getitem, kwargs={"input": size_node, "idx": node.kwargs["dim"]})
+        getitem_node = node.graph.call_function(
+            getitem, kwargs={"input": size_node, "idx": node.kwargs["dim"]}
+        )
         getitem_node.meta = node.meta.copy()
         return getitem_node
 
@@ -418,7 +430,9 @@ def repeat_interleave_mapper(node: torch.fx.Node, _: nn.Module):
     input_node = node.kwargs["input"]
     repeats = cast(int, node.kwargs["repeats"])
     dim = node.kwargs["dim"]
-    assert type(repeats) is int, "We currently only support `repeat_interleave` with int repeats"
+    assert (
+        type(repeats) is int
+    ), "We currently only support `repeat_interleave` with int repeats"
     rank = node.meta["tensor_rank"]
     if dim is None:
         repeat_dim = rank - 1
@@ -619,12 +633,16 @@ def addmm_mapper(node: torch.fx.Node, _: nn.Module) -> torch.fx.Node:
     """
     with node.graph.inserting_before(node):
         mm_kwargs = {"input": node.kwargs["mat1"], "other": node.kwargs["mat2"]}
-        mm_node = node.graph.create_node("call_function", matmul, kwargs=mm_kwargs, name=f"{node.name}_mm")
+        mm_node = node.graph.create_node(
+            "call_function", matmul, kwargs=mm_kwargs, name=f"{node.name}_mm"
+        )
         mm_node.meta = node.meta.copy()
 
         if node.kwargs["alpha"] != 1:
             mul_kwargs = {"input": mm_node, "other": node.kwargs["alpha"]}
-            mm_node = node.graph.create_node("call_function", mul, kwargs=mul_kwargs, name=f"{mm_node.name}_mul")
+            mm_node = node.graph.create_node(
+                "call_function", mul, kwargs=mul_kwargs, name=f"{mm_node.name}_mul"
+            )
         mm_node.meta = node.meta.copy()
 
         input_node = node.kwargs["input"]
@@ -638,7 +656,9 @@ def addmm_mapper(node: torch.fx.Node, _: nn.Module) -> torch.fx.Node:
             input_node = new_input_node
 
         add_kwargs = {"input": mm_node, "other": input_node}
-        add_node = node.graph.create_node("call_function", add, kwargs=add_kwargs, name=f"{node.name}_add")
+        add_node = node.graph.create_node(
+            "call_function", add, kwargs=add_kwargs, name=f"{node.name}_add"
+        )
         add_node.meta = node.meta.copy()
         return add_node
 
@@ -698,7 +718,9 @@ def permute(*, input, permutation):
 def square_mapper(node: torch.fx.Node, _: nn.Module) -> torch.fx.Node:
     input_node = node.kwargs["input"]
     with node.graph.inserting_before(node):
-        new_node = node.graph.call_function(mul, kwargs={"input": input_node, "other": input_node})
+        new_node = node.graph.call_function(
+            mul, kwargs={"input": input_node, "other": input_node}
+        )
         new_node.meta = node.meta.copy()
         return new_node
 
@@ -741,7 +763,9 @@ def matmul(*, input, other):
     op_and_target=("call_function", nn.functional.dropout),
     arg_replacement_tuples=[("input", "input")],
 )
-@register_custom_acc_mapper_fn(op_and_target=("call_method", "detach"), arg_replacement_tuples=[("input", "input")])
+@register_custom_acc_mapper_fn(
+    op_and_target=("call_method", "detach"), arg_replacement_tuples=[("input", "input")]
+)
 def dropout_mapper(node: torch.fx.Node, mod: nn.Module):
     """
     Remove dropout node and directly map its input to output.
@@ -795,7 +819,9 @@ def silu(node: torch.fx.Node, _: nn.Module) -> torch.fx.Node:
     with node.graph.inserting_before(node):
         sigmoid_node = node.graph.call_function(sigmoid, kwargs={"input": input_node})
         sigmoid_node.meta = node.meta.copy()
-        new_node = node.graph.call_function(mul, kwargs={"input": sigmoid_node, "other": input_node})
+        new_node = node.graph.call_function(
+            mul, kwargs={"input": sigmoid_node, "other": input_node}
+        )
         new_node.meta = node.meta.copy()
         return new_node
 
@@ -809,9 +835,13 @@ def silu(node: torch.fx.Node, _: nn.Module) -> torch.fx.Node:
 def hardswish_mapper(node: torch.fx.Node, _: nn.Module) -> torch.fx.Node:
     input_node = node.kwargs["input"]
     with node.graph.inserting_before(node):
-        new_sigmoid_node = node.graph.call_function(hardsigmoid, kwargs={"input": input_node})
+        new_sigmoid_node = node.graph.call_function(
+            hardsigmoid, kwargs={"input": input_node}
+        )
         new_sigmoid_node.meta = node.meta.copy()
-        new_node = node.graph.call_function(mul, kwargs={"input": new_sigmoid_node, "other": input_node})
+        new_node = node.graph.call_function(
+            mul, kwargs={"input": new_sigmoid_node, "other": input_node}
+        )
         new_node.meta = node.meta.copy()
         return new_node
 
@@ -889,7 +919,9 @@ def quantize_per_tensor(*, input, acc_out_ty=None):
     assert acc_out_ty is not None
     qparams = acc_out_ty.qparams
     dtype = acc_out_ty.dtype
-    return torch.quantize_per_tensor(input, qparams["scale"], qparams["zero_point"], dtype)
+    return torch.quantize_per_tensor(
+        input, qparams["scale"], qparams["zero_point"], dtype
+    )
 
 
 @register_acc_op_properties(AccOpProperty.unary)
@@ -931,7 +963,9 @@ def dequantize(*, input):
     return torch.dequantize(input)
 
 
-@register_acc_op_properties(AccOpProperty.pointwise, AccOpProperty.unary, AccOpProperty.quantized)
+@register_acc_op_properties(
+    AccOpProperty.pointwise, AccOpProperty.unary, AccOpProperty.quantized
+)
 @register_acc_op
 def rescale_quantize_per_tensor(*, input, acc_out_ty=None):
     assert acc_out_ty is not None
@@ -991,7 +1025,9 @@ def div_mapper(node: torch.fx.Node, mod: torch.fx.GraphModule) -> torch.fx.Node:
                 kwargs={"input": div_kwargs["input"], "other": div_kwargs["other"]},
             )
         else:
-            raise RuntimeError(f"Unhandled div rounding mode {div_kwargs['rounding_mode']}")
+            raise RuntimeError(
+                f"Unhandled div rounding mode {div_kwargs['rounding_mode']}"
+            )
         div_node.meta = node.meta.copy()
         return div_node
 
@@ -1048,10 +1084,14 @@ def relu(*, input, inplace=False):
 
 
 @register_acc_op_properties(AccOpProperty.pointwise, AccOpProperty.unary)
-@register_acc_op_mapping(op_and_target=("call_function", torch.nn.functional.leaky_relu))
+@register_acc_op_mapping(
+    op_and_target=("call_function", torch.nn.functional.leaky_relu)
+)
 @register_acc_op
 def leaky_relu(*, input, negative_slope=0.01, inplace=False):
-    return nn.functional.leaky_relu(input=input, negative_slope=negative_slope, inplace=inplace)
+    return nn.functional.leaky_relu(
+        input=input, negative_slope=negative_slope, inplace=inplace
+    )
 
 
 @register_acc_op_properties(AccOpProperty.pointwise, AccOpProperty.unary)
@@ -1092,7 +1132,9 @@ def torch_log1p_mapper(node: torch.fx.Node, _: torch.nn.Module) -> torch.fx.Node
         return log_node
 
 
-def reduce_op_mapper(node: torch.fx.Node, mod: torch.fx.GraphModule, func) -> torch.fx.Node:
+def reduce_op_mapper(
+    node: torch.fx.Node, mod: torch.fx.GraphModule, func
+) -> torch.fx.Node:
     with node.graph.inserting_before(node):
         kwargs = dict(node.kwargs)
         if "dim" in kwargs and isinstance(kwargs["dim"], int):
@@ -1230,7 +1272,9 @@ def std_mapper(node, mod):
     dim = node.kwargs.get("dim")
     keepdim = node.kwargs.get("keepdim")
     # assert unbiased is False or unbiased is None, "We currently do not support `std` with unbiased=True where n-1 is used"
-    assert dim is not None and keepdim is not None, "We currently do not support `std` with dim=None and keepdim=None"
+    assert (
+        dim is not None and keepdim is not None
+    ), "We currently do not support `std` with dim=None and keepdim=None"
 
     with node.graph.inserting_before(node):
         # mean(X)
@@ -1307,7 +1351,9 @@ def std_mapper(node, mod):
         ("keepdim", "keepdim", this_arg_is_optional),
     ],
 )
-def add_maximum_minimum_mapper(node: torch.fx.Node, mod: torch.fx.GraphModule) -> torch.fx.Node:
+def add_maximum_minimum_mapper(
+    node: torch.fx.Node, mod: torch.fx.GraphModule
+) -> torch.fx.Node:
     # there are effectively three versions of torch.max / torch.min
     # full reduce: torch.max(input) -> Tensor
     # dimensional reduce: torch.max(input, dim, keepdim=False, *, out=None) -> (Tensor, LongTensor)
@@ -1735,7 +1781,9 @@ def conv3d(*, input, weight, bias, stride, padding, dilation, groups):
     )
 
 
-@register_acc_op_mapping(op_and_target=("call_function", torch.nn.functional.conv_transpose2d))
+@register_acc_op_mapping(
+    op_and_target=("call_function", torch.nn.functional.conv_transpose2d)
+)
 @register_acc_op
 def conv_transpose2d(
     *,
@@ -1760,7 +1808,9 @@ def conv_transpose2d(
     )
 
 
-@register_acc_op_mapping(op_and_target=("call_function", torch.nn.functional.conv_transpose3d))
+@register_acc_op_mapping(
+    op_and_target=("call_function", torch.nn.functional.conv_transpose3d)
+)
 @register_acc_op
 def conv_transpose3d(
     *,
@@ -1832,7 +1882,9 @@ def argmin_max_mapper_impl(node: torch.fx.Node, largest: bool) -> torch.fx.Node:
     keepdim = node.kwargs["keepdim"]
 
     if dim is None and keepdim:
-        raise RuntimeError("We currently don't support argmin/argmax with dim=None and keepdim=True")
+        raise RuntimeError(
+            "We currently don't support argmin/argmax with dim=None and keepdim=True"
+        )
 
     with node.graph.inserting_before(node):
         if dim is None:
@@ -1951,7 +2003,9 @@ def torch_split_mapper(node: torch.fx.Node, mod: nn.Module) -> torch.fx.Node:
             slice_nodes.append(new_node)
             start += i
 
-        new_node = node.graph.call_function(tuple_construct, kwargs={"tensors": tuple(slice_nodes)})
+        new_node = node.graph.call_function(
+            tuple_construct, kwargs={"tensors": tuple(slice_nodes)}
+        )
         new_node.meta = node.meta.copy()
         return new_node
 
@@ -2218,7 +2272,9 @@ def slice_tensor(*, input, dim, start, stop, step):
     ],
 )
 def custom_narrow_mapper(node: torch.fx.Node, mod: nn.Module) -> torch.fx.Node:
-    assert isinstance(node.kwargs["start"], int) and isinstance(node.kwargs["length"], int)
+    assert isinstance(node.kwargs["start"], int) and isinstance(
+        node.kwargs["length"], int
+    )
     kwargs = {
         "input": node.kwargs["input"],
         "dim": node.kwargs["dim"],
@@ -2372,9 +2428,13 @@ def custom_tensor_to_mapper(node: torch.fx.Node, _: nn.Module):
             input_obj = node.kwargs["input"]
             other_obj = dest
             with node.graph.inserting_before(node):
-                dtype_node = node.graph.call_function(dtype, kwargs={"input": other_obj})
+                dtype_node = node.graph.call_function(
+                    dtype, kwargs={"input": other_obj}
+                )
                 dtype_node.meta["type"] = torch.dtype
-                device_node = node.graph.call_function(device, kwargs={"input": other_obj})
+                device_node = node.graph.call_function(
+                    device, kwargs={"input": other_obj}
+                )
                 device_node.meta["type"] = torch.device
                 new_kwargs = {
                     "input": input_obj,
@@ -2405,7 +2465,9 @@ def custom_tensor_to_mapper(node: torch.fx.Node, _: nn.Module):
     }
 
     with node.graph.inserting_before(node):
-        new_node = node.graph.create_node("call_function", to_dtype, kwargs=new_kwargs, name=node.name)
+        new_node = node.graph.create_node(
+            "call_function", to_dtype, kwargs=new_kwargs, name=node.name
+        )
         new_node.meta = node.meta
         return new_node
 
@@ -2446,7 +2508,9 @@ def custom_torch_add_mapper(node: torch.fx.Node, mod: nn.Module) -> torch.fx.Nod
         else:
             add_kwargs = node.kwargs
 
-        new_node = node.graph.create_node("call_function", add, kwargs=add_kwargs, name=node.name)
+        new_node = node.graph.create_node(
+            "call_function", add, kwargs=add_kwargs, name=node.name
+        )
         new_node.meta = node.meta
         return new_node
 
@@ -2457,7 +2521,9 @@ def custom_torch_add_mapper(node: torch.fx.Node, mod: nn.Module) -> torch.fx.Nod
         ("input", "input"),
     ],
 )
-def packed_quantized_linear_mapper(node: torch.fx.Node, mod: nn.Module) -> torch.fx.Node:
+def packed_quantized_linear_mapper(
+    node: torch.fx.Node, mod: nn.Module
+) -> torch.fx.Node:
     """
     Mapping from quantized_linear module to acc_op.linear. We unpack weight and bias
     in this mapper and pass them directly to linear node.
@@ -2476,12 +2542,16 @@ def packed_quantized_linear_mapper(node: torch.fx.Node, mod: nn.Module) -> torch
     with node.graph.inserting_before(node):
         # Insert get_attr nodes for weight and bias
         get_weight = node.graph.get_attr(weight_name)
-        get_weight.meta["tensor_meta"] = _extract_tensor_metadata(linear_module.weight())
+        get_weight.meta["tensor_meta"] = _extract_tensor_metadata(
+            linear_module.weight()
+        )
 
         get_bias = None
         if linear_module.bias() is not None:
             get_bias = node.graph.get_attr(bias_name)
-            get_bias.meta["tensor_meta"] = _extract_tensor_metadata(linear_module.bias())
+            get_bias.meta["tensor_meta"] = _extract_tensor_metadata(
+                linear_module.bias()
+            )
 
         qparams = {"scale": linear_module.scale, "zero_point": linear_module.zero_point}
         # Create kwargs for acc_op.quantized_linear
@@ -2503,7 +2573,9 @@ def packed_quantized_linear_mapper(node: torch.fx.Node, mod: nn.Module) -> torch
         ("input", "input"),
     ],
 )
-def packed_quantized_conv2d_mapper(node: torch.fx.Node, mod: nn.Module) -> torch.fx.Node:
+def packed_quantized_conv2d_mapper(
+    node: torch.fx.Node, mod: nn.Module
+) -> torch.fx.Node:
     """
     Mapping from quantzed Conv2d module to acc_op.conv. We unpack all the parameters
     in this mapper and pass them directly to conv2d node.
@@ -2558,7 +2630,9 @@ def packed_quantized_conv2d_mapper(node: torch.fx.Node, mod: nn.Module) -> torch
         ("zero_point", "zero_point"),
     ],
 )
-def add_relu_unfuse_mapper(node: torch.fx.Node, mod: torch.fx.GraphModule) -> torch.fx.Node:
+def add_relu_unfuse_mapper(
+    node: torch.fx.Node, mod: torch.fx.GraphModule
+) -> torch.fx.Node:
     with node.graph.inserting_before(node):
         qparams = {
             "scale": node.kwargs["scale"],
@@ -2572,7 +2646,9 @@ def add_relu_unfuse_mapper(node: torch.fx.Node, mod: torch.fx.GraphModule) -> to
         add_node = node.graph.call_function(quantized_add, kwargs=add_kwargs)
         add_node.meta = node.meta.copy()
 
-        relu_node = node.graph.call_function(relu, kwargs={"input": add_node, "inplace": False})
+        relu_node = node.graph.call_function(
+            relu, kwargs={"input": add_node, "inplace": False}
+        )
         relu_node.meta = node.meta
         return relu_node
 
@@ -2583,7 +2659,9 @@ def add_relu_unfuse_mapper(node: torch.fx.Node, mod: torch.fx.GraphModule) -> to
         ("input", "input"),
     ],
 )
-def packed_quantized_convrelu2d_mapper(node: torch.fx.Node, mod: nn.Module) -> torch.fx.Node:
+def packed_quantized_convrelu2d_mapper(
+    node: torch.fx.Node, mod: nn.Module
+) -> torch.fx.Node:
     """
     Mapping from quantized ConvReLU2d module to acc_op.relu. We use packed_quantized_conv2d_mapper to unpack all the parameters
     in this mapper and pass the returned conv2d node directly to relu node.
@@ -2594,7 +2672,9 @@ def packed_quantized_convrelu2d_mapper(node: torch.fx.Node, mod: nn.Module) -> t
         conv2d_node = packed_quantized_conv2d_mapper(node, mod)
 
         # relu op
-        relu_node = node.graph.call_function(relu, kwargs={"input": conv2d_node, "inplace": False})
+        relu_node = node.graph.call_function(
+            relu, kwargs={"input": conv2d_node, "inplace": False}
+        )
         relu_node.meta = node.meta
         return relu_node
 
@@ -2657,10 +2737,14 @@ def expand_as_mapper(node: torch.fx.Node, _: nn.Module) -> torch.fx.Node:
     Maps expand_as(other) to expand(other.size())
     """
     with node.graph.inserting_before(node):
-        size_node = node.graph.call_function(size, kwargs={"input": node.kwargs["other"]})
+        size_node = node.graph.call_function(
+            size, kwargs={"input": node.kwargs["other"]}
+        )
         size_node.meta["type"] = torch.Size
 
-        expand_node = node.graph.call_function(expand, kwargs={"input": node.kwargs["input"], "sizes": size_node})
+        expand_node = node.graph.call_function(
+            expand, kwargs={"input": node.kwargs["input"], "sizes": size_node}
+        )
         expand_node.meta = node.meta.copy()
         return expand_node
 
@@ -2752,7 +2836,8 @@ def tensor_split(*, input, indices_or_sections, dim=0):
         return torch.tensor_split(input, indices=tuple(indices_or_sections), dim=dim)
     else:
         raise RuntimeError(
-            f"Expected int, Iterable or Tensor for " f"indices_or_sections arg, got: {type(indices_or_sections)}"
+            f"Expected int, Iterable or Tensor for "
+            f"indices_or_sections arg, got: {type(indices_or_sections)}"
         )
 
 
@@ -2820,7 +2905,9 @@ def einsum(*, equation, operands):
 )
 @register_acc_op
 def as_strided(*, input, size, stride, storage_offset=0):
-    return torch.as_strided(input=input, size=size, stride=stride, storage_offset=storage_offset)
+    return torch.as_strided(
+        input=input, size=size, stride=stride, storage_offset=storage_offset
+    )
 
 
 @register_acc_op_mapping(op_and_target=("call_function", torch.var))
diff --git a/py/torch_tensorrt/fx/tracer/acc_tracer/acc_utils.py b/py/torch_tensorrt/fx/tracer/acc_tracer/acc_utils.py
index 18174ba659..4c3a79dc4c 100644
--- a/py/torch_tensorrt/fx/tracer/acc_tracer/acc_utils.py
+++ b/py/torch_tensorrt/fx/tracer/acc_tracer/acc_utils.py
@@ -60,7 +60,9 @@ def is_acc_op(node_or_target: Union[Callable, torch.fx.Node]) -> bool:
     return "acc_ops" in target.__module__
 
 
-def is_acc_op_with_kwarg(node_or_target: Union[Callable, torch.fx.Node], kwarg: str) -> bool:
+def is_acc_op_with_kwarg(
+    node_or_target: Union[Callable, torch.fx.Node], kwarg: str
+) -> bool:
     """
     Helper that inspects `node_or_target` and returns whether it is an acc_op node
     (or a target for an acc_op) that has an arg signature that includes `kwarg`.
@@ -68,7 +70,11 @@ def is_acc_op_with_kwarg(node_or_target: Union[Callable, torch.fx.Node], kwarg:
     if not is_acc_op(node_or_target):
         return False
 
-    target = node_or_target.target if isinstance(node_or_target, torch.fx.Node) else node_or_target
+    target = (
+        node_or_target.target
+        if isinstance(node_or_target, torch.fx.Node)
+        else node_or_target
+    )
     assert not isinstance(target, str)
     return kwarg in inspect.signature(inspect.unwrap(target)).parameters
 
@@ -174,7 +180,9 @@ def map_tensor_metadata(a: Any, fn: Callable):
         return fn(a)
     elif isinstance(a, tuple):
         return tuple(map_tensor_metadata(elem, fn) for elem in a)
-    assert isinstance(a, list), f"Only supporting tuple/list/TensorMetadata, but found {type(a)}"
+    assert isinstance(
+        a, list
+    ), f"Only supporting tuple/list/TensorMetadata, but found {type(a)}"
     return immutable_list(map_tensor_metadata(elem, fn) for elem in a)
 
 
diff --git a/py/torch_tensorrt/fx/tracer/dispatch_tracer/tracer.py b/py/torch_tensorrt/fx/tracer/dispatch_tracer/tracer.py
index 4b9c358d26..f3ba9abe3f 100644
--- a/py/torch_tensorrt/fx/tracer/dispatch_tracer/tracer.py
+++ b/py/torch_tensorrt/fx/tracer/dispatch_tracer/tracer.py
@@ -46,7 +46,9 @@ def wrap_with_proxy(e, proxy):
             return e
 
     if isinstance(real_out, tuple):
-        return tuple([wrap_with_proxy(e, proxy_out[idx]) for idx, e in enumerate(real_out)])
+        return tuple(
+            [wrap_with_proxy(e, proxy_out[idx]) for idx, e in enumerate(real_out)]
+        )
     elif isinstance(real_out, list):
         return [wrap_with_proxy(e, proxy_out[idx]) for idx, e in enumerate(real_out)]
     elif type(real_out) == torch.Tensor:
@@ -101,7 +103,9 @@ class DispatchTracer(Tracer):
 
     def __init__(self, leaf_module_list: Optional[Set[str]] = None):
         super().__init__()
-        self.leaf_module_list = (leaf_module_list or set()).union(DEFAULT_LEAF_MODULE_LIST)
+        self.leaf_module_list = (leaf_module_list or set()).union(
+            DEFAULT_LEAF_MODULE_LIST
+        )
 
     # User can use leaf_module_list but it won't work combine with functionalize
     def call_module(
@@ -121,9 +125,13 @@ def call_module(
             setattr(self.root, qualname, m)
             proxy_args = pytree.tree_map(unwrap_proxy, args)
             proxy_kwargs = pytree.tree_map(unwrap_proxy, kwargs)
-            proxy_out = self.create_proxy("call_module", qualname, proxy_args, proxy_kwargs)
+            proxy_out = self.create_proxy(
+                "call_module", qualname, proxy_args, proxy_kwargs
+            )
 
-            return build_outputs(forward, forward, args, kwargs, proxy_out, call_module=True)
+            return build_outputs(
+                forward, forward, args, kwargs, proxy_out, call_module=True
+            )
         return forward(*args, **kwargs)
 
     def is_leaf_module(self, m) -> bool:
@@ -164,7 +172,9 @@ def dispatch_trace(
     leaf_module_list: Optional[Set[str]] = None,
     concrete_args=None,
 ) -> GraphModule:
-    name = root.__class__.__name__ if isinstance(root, torch.nn.Module) else root.__name__
+    name = (
+        root.__class__.__name__ if isinstance(root, torch.nn.Module) else root.__name__
+    )
     tracer = DispatchTracer(leaf_module_list)
     graph = tracer.trace(root, concrete_args=concrete_args)
     gm = GraphModule(tracer.root, graph, name)
@@ -189,7 +199,9 @@ def wrapped(*args):
         out = f(*tree_args)
         flat_outs, out_spec = pytree.tree_flatten(out)
         for idx in range(len(flat_outs)):
-            if isinstance(flat_outs[idx], torch.Tensor) and isinstance(flat_outs[idx], DispatchTensor):
+            if isinstance(flat_outs[idx], torch.Tensor) and isinstance(
+                flat_outs[idx], DispatchTensor
+            ):
                 flat_outs[idx] = flat_outs[idx].proxy
         return pytree.tree_unflatten(flat_outs, out_spec)
 
diff --git a/py/torch_tensorrt/fx/trt_module.py b/py/torch_tensorrt/fx/trt_module.py
index f1ae3533c0..099bbfcdc9 100644
--- a/py/torch_tensorrt/fx/trt_module.py
+++ b/py/torch_tensorrt/fx/trt_module.py
@@ -8,7 +8,9 @@
 
 
 class TRTModule(torch.nn.Module):
-    def __init__(self, engine=None, input_names=None, output_names=None, cuda_graph_batch_size=-1):
+    def __init__(
+        self, engine=None, input_names=None, output_names=None, cuda_graph_batch_size=-1
+    ):
         super(TRTModule, self).__init__()
         self._register_state_dict_hook(TRTModule._on_state_dict)
         self.engine = engine
@@ -37,26 +39,35 @@ def _initialize(self):
         primary_input_outputs.update(self.output_binding_indices_in_order)
         self.hidden_output_binding_indices_in_order: Sequence[int] = []
         self.hidden_output_names: Sequence[str] = []
-        for i in range(self.engine.num_bindings // self.engine.num_optimization_profiles):
+        for i in range(
+            self.engine.num_bindings // self.engine.num_optimization_profiles
+        ):
             if i not in primary_input_outputs:
                 self.hidden_output_binding_indices_in_order.append(i)
                 self.hidden_output_names.append(self.engine.get_binding_name(i))
 
         assert (self.engine.num_bindings // self.engine.num_optimization_profiles) == (
-            len(self.input_names) + len(self.output_names) + len(self.hidden_output_names)
+            len(self.input_names)
+            + len(self.output_names)
+            + len(self.hidden_output_names)
         )
 
         self.input_dtypes: Sequence[torch.dtype] = [
-            torch_dtype_from_trt(self.engine.get_binding_dtype(idx)) for idx in self.input_binding_indices_in_order
+            torch_dtype_from_trt(self.engine.get_binding_dtype(idx))
+            for idx in self.input_binding_indices_in_order
         ]
         self.input_shapes: Sequence[Sequence[int]] = [
-            tuple(self.engine.get_binding_shape(idx)) for idx in self.input_binding_indices_in_order
+            tuple(self.engine.get_binding_shape(idx))
+            for idx in self.input_binding_indices_in_order
         ]
         self.output_dtypes: Sequence[torch.dtype] = [
-            torch_dtype_from_trt(self.engine.get_binding_dtype(idx)) for idx in self.output_binding_indices_in_order
+            torch_dtype_from_trt(self.engine.get_binding_dtype(idx))
+            for idx in self.output_binding_indices_in_order
         ]
         self.output_shapes = [
-            tuple(self.engine.get_binding_shape(idx)) if self.engine.has_implicit_batch_dimension else tuple()
+            tuple(self.engine.get_binding_shape(idx))
+            if self.engine.has_implicit_batch_dimension
+            else tuple()
             for idx in self.output_binding_indices_in_order
         ]
         self.hidden_output_dtypes: Sequence[torch.dtype] = [
@@ -64,7 +75,9 @@ def _initialize(self):
             for idx in self.hidden_output_binding_indices_in_order
         ]
         self.hidden_output_shapes = [
-            tuple(self.engine.get_binding_shape(idx)) if self.engine.has_implicit_batch_dimension else tuple()
+            tuple(self.engine.get_binding_shape(idx))
+            if self.engine.has_implicit_batch_dimension
+            else tuple()
             for idx in self.hidden_output_binding_indices_in_order
         ]
 
@@ -126,11 +139,15 @@ def forward(self, *inputs):
                 batch_size = inputs[0].shape[0]
                 contiguous_inputs: List[torch.Tensor] = [i.contiguous() for i in inputs]
                 bindings: List[Any] = [None] * (
-                    len(self.input_names) + len(self.output_names) + len(self.hidden_output_names)
+                    len(self.input_names)
+                    + len(self.output_names)
+                    + len(self.hidden_output_names)
                 )
 
                 for i, input_name in enumerate(self.input_names):
-                    assert inputs[i].is_cuda, f"{i}th input({input_name}) is not on cuda device."
+                    assert inputs[
+                        i
+                    ].is_cuda, f"{i}th input({input_name}) is not on cuda device."
                     assert (
                         inputs[i].dtype == self.input_dtypes[i]
                     ), f"Dtype mismatch for {i}th input({input_name}). Expect {self.input_dtypes[i]}, got {inputs[i].dtype}."
@@ -139,7 +156,9 @@ def forward(self, *inputs):
                     bindings[idx] = contiguous_inputs[i].data_ptr()
 
                     if not self.engine.has_implicit_batch_dimension:
-                        self.context.set_binding_shape(idx, tuple(contiguous_inputs[i].shape))
+                        self.context.set_binding_shape(
+                            idx, tuple(contiguous_inputs[i].shape)
+                        )
                     else:
                         assert inputs[i].size()[1:] == self.input_shapes[i], (
                             f"Shape mismatch for {i}th input({input_name}). "
@@ -179,9 +198,13 @@ def forward(self, *inputs):
 
             with torch.autograd.profiler.record_function("TRTModule:TensorRTRuntime"):
                 if self.engine.has_implicit_batch_dimension:
-                    self.context.execute_async(batch_size, bindings, torch.cuda.current_stream().cuda_stream)
+                    self.context.execute_async(
+                        batch_size, bindings, torch.cuda.current_stream().cuda_stream
+                    )
                 else:
-                    self.context.execute_async_v2(bindings, torch.cuda.current_stream().cuda_stream)
+                    self.context.execute_async_v2(
+                        bindings, torch.cuda.current_stream().cuda_stream
+                    )
 
             if len(outputs) == 1:
                 return outputs[0]
diff --git a/py/torch_tensorrt/ptq.py b/py/torch_tensorrt/ptq.py
index 8a365024e0..326f35f942 100644
--- a/py/torch_tensorrt/ptq.py
+++ b/py/torch_tensorrt/ptq.py
@@ -81,7 +81,9 @@ def __new__(cls, *args, **kwargs):
         if not isinstance(dataloader, torch.utils.data.DataLoader):
             log(
                 Level.Error,
-                "Dataloader : {} is not a valid instance of torch.utils.data.DataLoader".format(dataloader),
+                "Dataloader : {} is not a valid instance of torch.utils.data.DataLoader".format(
+                    dataloader
+                ),
             )
 
         if not cache_file:
@@ -116,13 +118,21 @@ def __new__(cls, *args, **kwargs):
 
         # Using type metaclass to construct calibrator class based on algorithm type
         if algo_type == CalibrationAlgo.ENTROPY_CALIBRATION:
-            return type("DataLoaderCalibrator", (_C.IInt8EntropyCalibrator,), attribute_mapping)()
+            return type(
+                "DataLoaderCalibrator", (_C.IInt8EntropyCalibrator,), attribute_mapping
+            )()
         elif algo_type == CalibrationAlgo.ENTROPY_CALIBRATION_2:
-            return type("DataLoaderCalibrator", (_C.IInt8MinMaxCalibrator,), attribute_mapping)()
+            return type(
+                "DataLoaderCalibrator", (_C.IInt8MinMaxCalibrator,), attribute_mapping
+            )()
         elif algo_type == CalibrationAlgo.LEGACY_CALIBRATION:
-            return type("DataLoaderCalibrator", (_C.IInt8LegacyCalibrator,), attribute_mapping)()
+            return type(
+                "DataLoaderCalibrator", (_C.IInt8LegacyCalibrator,), attribute_mapping
+            )()
         elif algo_type == CalibrationAlgo.MINMAX_CALIBRATION:
-            return type("DataLoaderCalibrator", (_C.IInt8MinMaxCalibrator,), attribute_mapping)()
+            return type(
+                "DataLoaderCalibrator", (_C.IInt8MinMaxCalibrator,), attribute_mapping
+            )()
         else:
             log(
                 Level.Error,
@@ -164,13 +174,21 @@ def __new__(cls, *args, **kwargs):
         }
         # Using type metaclass to construct calibrator class based on algorithm type
         if algo_type == CalibrationAlgo.ENTROPY_CALIBRATION:
-            return type("DataLoaderCalibrator", (_C.IInt8EntropyCalibrator,), attribute_mapping)()
+            return type(
+                "DataLoaderCalibrator", (_C.IInt8EntropyCalibrator,), attribute_mapping
+            )()
         elif algo_type == CalibrationAlgo.ENTROPY_CALIBRATION_2:
-            return type("DataLoaderCalibrator", (_C.IInt8MinMaxCalibrator,), attribute_mapping)()
+            return type(
+                "DataLoaderCalibrator", (_C.IInt8MinMaxCalibrator,), attribute_mapping
+            )()
         elif algo_type == CalibrationAlgo.LEGACY_CALIBRATION:
-            return type("DataLoaderCalibrator", (_C.IInt8LegacyCalibrator,), attribute_mapping)()
+            return type(
+                "DataLoaderCalibrator", (_C.IInt8LegacyCalibrator,), attribute_mapping
+            )()
         elif algo_type == CalibrationAlgo.MINMAX_CALIBRATION:
-            return type("DataLoaderCalibrator", (_C.IInt8MinMaxCalibrator,), attribute_mapping)()
+            return type(
+                "DataLoaderCalibrator", (_C.IInt8MinMaxCalibrator,), attribute_mapping
+            )()
         else:
             log(
                 Level.Error,
diff --git a/py/torch_tensorrt/ts/_compile_spec.py b/py/torch_tensorrt/ts/_compile_spec.py
index bd57582e80..154b29dd7b 100644
--- a/py/torch_tensorrt/ts/_compile_spec.py
+++ b/py/torch_tensorrt/ts/_compile_spec.py
@@ -39,15 +39,22 @@ def _supported_input_size_type(input_size: Any) -> bool:
 
 def _parse_input_ranges(input_sizes: List) -> List:
 
-    if any(not isinstance(i, dict) and not _supported_input_size_type(i) for i in input_sizes):
-        raise KeyError("An input size must either be a static size or a range of three sizes (min, opt, max) as Dict")
+    if any(
+        not isinstance(i, dict) and not _supported_input_size_type(i)
+        for i in input_sizes
+    ):
+        raise KeyError(
+            "An input size must either be a static size or a range of three sizes (min, opt, max) as Dict"
+        )
 
     parsed_input_sizes = []
     for i in input_sizes:
         if isinstance(i, dict):
             if all(k in i for k in ["min", "opt", "min"]):
                 parsed_input_sizes.append(
-                    Input(min_shape=i["min"], opt_shape=i["opt"], max_shape=i["max"])._to_internal()
+                    Input(
+                        min_shape=i["min"], opt_shape=i["opt"], max_shape=i["max"]
+                    )._to_internal()
                 )
 
             elif "opt" in i:
@@ -109,7 +116,11 @@ def _parse_device_type(device: Any) -> _enums.DeviceType:
         if device.type == "cuda":
             return _enums.DeviceType.gpu
         else:
-            ValueError("Got a device type other than GPU or DLA (type: " + str(device.type) + ")")
+            ValueError(
+                "Got a device type other than GPU or DLA (type: "
+                + str(device.type)
+                + ")"
+            )
     elif isinstance(device, _enums.DeviceType):
         return device
     elif isinstance(device, str):
@@ -118,7 +129,9 @@ def _parse_device_type(device: Any) -> _enums.DeviceType:
         elif device == "dla" or device == "DLA":
             return _enums.DeviceType.dla
         else:
-            ValueError("Got a device type other than GPU or DLA (type: " + str(device) + ")")
+            ValueError(
+                "Got a device type other than GPU or DLA (type: " + str(device) + ")"
+            )
     else:
         raise TypeError(
             "Device specification must be of type torch.device, string or torch_tensorrt.DeviceType, but got: "
@@ -193,12 +206,22 @@ def _parse_input_signature(input_signature: Any):
             input = _parse_input_signature(item)
             input_list.append(input)
         return input_list
-    elif isinstance(input_signature, Input) or isinstance(input_signature, torch.Tensor):
-        i = Input._from_tensor(input_signature) if isinstance(input_signature, torch.Tensor) else input_signature
+    elif isinstance(input_signature, Input) or isinstance(
+        input_signature, torch.Tensor
+    ):
+        i = (
+            Input._from_tensor(input_signature)
+            if isinstance(input_signature, torch.Tensor)
+            else input_signature
+        )
         clone = _internal_input_to_torch_class_input(i._to_internal())
         return clone
     else:
-        raise KeyError("Input signature contains an unsupported type {}".format(type(input_signature)))
+        raise KeyError(
+            "Input signature contains an unsupported type {}".format(
+                type(input_signature)
+            )
+        )
 
 
 def _parse_compile_spec(compile_spec_: Dict[str, Any]) -> _ts_C.CompileSpec:
@@ -207,18 +230,29 @@ def _parse_compile_spec(compile_spec_: Dict[str, Any]) -> _ts_C.CompileSpec:
     info = _ts_C.CompileSpec()
 
     if len(compile_spec["inputs"]) > 0:
-        if not all([isinstance(i, torch.Tensor) or isinstance(i, Input) for i in compile_spec["inputs"]]):
+        if not all(
+            [
+                isinstance(i, torch.Tensor) or isinstance(i, Input)
+                for i in compile_spec["inputs"]
+            ]
+        ):
             raise KeyError(
                 "Input specs should be either torch_tensorrt.Input or torch.Tensor, found types: {}".format(
                     [type(i) for i in compile_spec["inputs"]]
                 )
             )
 
-        inputs = [Input._from_tensor(i) if isinstance(i, torch.Tensor) else i for i in compile_spec["inputs"]]
+        inputs = [
+            Input._from_tensor(i) if isinstance(i, torch.Tensor) else i
+            for i in compile_spec["inputs"]
+        ]
         info.inputs = [i._to_internal() for i in inputs]
 
     elif compile_spec["input_signature"] is not None:
-        log(Level.Warning, "Input signature parsing is an experimental feature, behavior and APIs may change")
+        log(
+            Level.Warning,
+            "Input signature parsing is an experimental feature, behavior and APIs may change",
+        )
         signature = _parse_input_signature(compile_spec["input_signature"])
         info.input_signature = _C.InputSignature(signature)  # py_object
 
@@ -227,7 +261,10 @@ def _parse_compile_spec(compile_spec_: Dict[str, Any]) -> _ts_C.CompileSpec:
                 "Grouped inputs currently requires partial compilation to be enabled, this restriction will be relaxed in a future release"
             )
 
-        log(Level.Debug, "Grouped inputs currently requires additional settings to enable the feature")
+        log(
+            Level.Debug,
+            "Grouped inputs currently requires additional settings to enable the feature",
+        )
         log(
             Level.Debug,
             """Adding the following ops to torch_executed_ops:
@@ -239,12 +276,20 @@ def _parse_compile_spec(compile_spec_: Dict[str, Any]) -> _ts_C.CompileSpec:
     - prim::TupleUnpack
 """,
         )
-        compile_spec["torch_fallback"]["forced_fallback_ops"].append("aten::__getitem__")
-        compile_spec["torch_fallback"]["forced_fallback_ops"].append("prim::ListConstruct")
+        compile_spec["torch_fallback"]["forced_fallback_ops"].append(
+            "aten::__getitem__"
+        )
+        compile_spec["torch_fallback"]["forced_fallback_ops"].append(
+            "prim::ListConstruct"
+        )
         compile_spec["torch_fallback"]["forced_fallback_ops"].append("prim::ListUnpack")
         compile_spec["torch_fallback"]["forced_fallback_ops"].append("prim::TupleIndex")
-        compile_spec["torch_fallback"]["forced_fallback_ops"].append("prim::TupleConstruct")
-        compile_spec["torch_fallback"]["forced_fallback_ops"].append("prim::TupleUnpack")
+        compile_spec["torch_fallback"]["forced_fallback_ops"].append(
+            "prim::TupleConstruct"
+        )
+        compile_spec["torch_fallback"]["forced_fallback_ops"].append(
+            "prim::TupleUnpack"
+        )
 
     else:
         raise KeyError(
@@ -252,7 +297,9 @@ def _parse_compile_spec(compile_spec_: Dict[str, Any]) -> _ts_C.CompileSpec:
         )
 
     if "enabled_precisions" in compile_spec:
-        info.enabled_precisions = _parse_enabled_precisions(compile_spec["enabled_precisions"])
+        info.enabled_precisions = _parse_enabled_precisions(
+            compile_spec["enabled_precisions"]
+        )
 
     if "calibrator" in compile_spec:
         info.ptq_calibrator = compile_spec["calibrator"]
@@ -392,7 +439,9 @@ def TensorRTCompileSpec(
     backend_spec = torch.classes.tensorrt.CompileSpec()
 
     if input_signature is not None:
-        raise ValueError("Input signature parsing is not currently supported in the TorchScript backend integration")
+        raise ValueError(
+            "Input signature parsing is not currently supported in the TorchScript backend integration"
+        )
 
     for i in parsed_spec.inputs:
         clone = _internal_input_to_torch_class_input(i)
@@ -412,8 +461,12 @@ def TensorRTCompileSpec(
     torch_fallback = torch.classes.tensorrt._TorchFallback()
     torch_fallback._set_enabled(parsed_spec.torch_fallback.enabled)
     torch_fallback._set_min_block_size(parsed_spec.torch_fallback.min_block_size)
-    torch_fallback._set_forced_fallback_operators(parsed_spec.torch_fallback.forced_fallback_operators)
-    torch_fallback._set_forced_fallback_modules(parsed_spec.torch_fallback.forced_fallback_modules)
+    torch_fallback._set_forced_fallback_operators(
+        parsed_spec.torch_fallback.forced_fallback_operators
+    )
+    torch_fallback._set_forced_fallback_modules(
+        parsed_spec.torch_fallback.forced_fallback_modules
+    )
 
     backend_spec._set_device(d)
     backend_spec._set_torch_fallback(torch_fallback)
diff --git a/py/torch_tensorrt/ts/_compiler.py b/py/torch_tensorrt/ts/_compiler.py
index e1b6c18c49..c88651f7ba 100644
--- a/py/torch_tensorrt/ts/_compiler.py
+++ b/py/torch_tensorrt/ts/_compiler.py
@@ -104,7 +104,9 @@ def compile(
             "torch.jit.ScriptFunction currently is not directly supported, wrap the function in a module to compile"
         )
 
-    if require_full_compilation and (len(torch_executed_modules) > 0 or len(torch_executed_ops) > 0):
+    if require_full_compilation and (
+        len(torch_executed_modules) > 0 or len(torch_executed_ops) > 0
+    ):
         raise ValueError(
             f"require_full_compilation is enabled however the list of modules and ops to run in torch is not empty. Found: torch_executed_ops: {torch_executed_ops}, torch_executed_modules: {torch_executed_modules}"
         )
@@ -236,10 +238,14 @@ def convert_method_to_trt_engine(
         "truncate_long_and_double": truncate_long_and_double,
     }
 
-    return _C.convert_graph_to_trt_engine(module._c, method_name, _parse_compile_spec(compile_spec))
+    return _C.convert_graph_to_trt_engine(
+        module._c, method_name, _parse_compile_spec(compile_spec)
+    )
 
 
-def embed_engine_in_new_module(serialized_engine: bytes, device=Device._current_device()) -> torch.jit.ScriptModule:
+def embed_engine_in_new_module(
+    serialized_engine: bytes, device=Device._current_device()
+) -> torch.jit.ScriptModule:
     """Takes a pre-built serialized TensorRT engine and embeds it within a TorchScript module
 
     Takes a pre-built serialied TensorRT engine (as bytes) and embeds it within a TorchScript module.
diff --git a/tests/modules/hub.py b/tests/modules/hub.py
index e0c27f0b28..6b1b87d08d 100644
--- a/tests/modules/hub.py
+++ b/tests/modules/hub.py
@@ -14,7 +14,9 @@
 
 # Detect case of no GPU before deserialization of models on GPU
 if not torch.cuda.is_available():
-    raise Exception("No GPU found. Please check if installed torch version is compatible with CUDA version")
+    raise Exception(
+        "No GPU found. Please check if installed torch version is compatible with CUDA version"
+    )
 
 # Downloads all model files again if manifest file is not present
 MANIFEST_FILE = "model_manifest.json"
@@ -45,14 +47,19 @@
         "path": "both",
     },
     "ssd": {
-        "model": torch.hub.load("NVIDIA/DeepLearningExamples:torchhub", "nvidia_ssd", model_math="fp32"),
+        "model": torch.hub.load(
+            "NVIDIA/DeepLearningExamples:torchhub", "nvidia_ssd", model_math="fp32"
+        ),
         "path": "trace",
     },
     "efficientnet_b0": {
         "model": timm.create_model("efficientnet_b0", pretrained=True),
         "path": "script",
     },
-    "vit": {"model": timm.create_model("vit_base_patch16_224", pretrained=True), "path": "script"},
+    "vit": {
+        "model": timm.create_model("vit_base_patch16_224", pretrained=True),
+        "path": "script",
+    },
     "pooling": {"model": cm.Pool(), "path": "trace"},
     "module_fallback": {"model": cm.ModuleFallbackMain(), "path": "script"},
     "loop_fallback_eval": {"model": cm.LoopFallbackEval(), "path": "script"},
@@ -107,7 +114,11 @@ def download_models(version_matches, manifest):
             traced_filename = n + "_traced.jit.pt"
             # Check if model file exists on disk
             if (
-                (m["path"] == "both" and os.path.exists(scripted_filename) and os.path.exists(traced_filename))
+                (
+                    m["path"] == "both"
+                    and os.path.exists(scripted_filename)
+                    and os.path.exists(traced_filename)
+                )
                 or (m["path"] == "script" and os.path.exists(scripted_filename))
                 or (m["path"] == "trace" and os.path.exists(traced_filename))
             ):
diff --git a/tests/py/api/test_classes.py b/tests/py/api/test_classes.py
index ff1edfa734..ff3c50155b 100644
--- a/tests/py/api/test_classes.py
+++ b/tests/py/api/test_classes.py
@@ -69,17 +69,25 @@ def field_is_correct(field, equal_fn, a1, a2):
         min_ = field_is_correct("min", list_eq, internal.min, target["min"])
         opt_ = field_is_correct("opt", list_eq, internal.opt, target["opt"])
         max_ = field_is_correct("max", list_eq, internal.max, target["max"])
-        is_dynamic_ = field_is_correct("is_dynamic", eq, internal.input_is_dynamic, target["input_is_dynamic"])
+        is_dynamic_ = field_is_correct(
+            "is_dynamic", eq, internal.input_is_dynamic, target["input_is_dynamic"]
+        )
         explicit_set_dtype_ = field_is_correct(
             "explicit_dtype",
             eq,
             internal._explicit_set_dtype,
             target["explicit_set_dtype"],
         )
-        dtype_ = field_is_correct("dtype", eq, int(internal.dtype), int(target["dtype"]))
-        format_ = field_is_correct("format", eq, int(internal.format), int(target["format"]))
+        dtype_ = field_is_correct(
+            "dtype", eq, int(internal.dtype), int(target["dtype"])
+        )
+        format_ = field_is_correct(
+            "format", eq, int(internal.format), int(target["format"])
+        )
 
-        return all([min_, opt_, max_, is_dynamic_, explicit_set_dtype_, dtype_, format_])
+        return all(
+            [min_, opt_, max_, is_dynamic_, explicit_set_dtype_, dtype_, format_]
+        )
 
     def test_infer_from_example_tensor(self):
         shape = [1, 3, 255, 255]
@@ -177,7 +185,9 @@ def test_dynamic_shape(self):
             "explicit_set_dtype": False,
         }
 
-        i = torchtrt.Input(min_shape=min_shape, opt_shape=opt_shape, max_shape=max_shape)
+        i = torchtrt.Input(
+            min_shape=min_shape, opt_shape=opt_shape, max_shape=max_shape
+        )
         self.assertTrue(self._verify_correctness(i, target))
 
         i = torchtrt.Input(
diff --git a/tests/py/api/test_collections.py b/tests/py/api/test_collections.py
index d284fe873b..dfae3f18c9 100644
--- a/tests/py/api/test_collections.py
+++ b/tests/py/api/test_collections.py
@@ -24,16 +24,27 @@ class TestStandardTensorInput(unittest.TestCase):
     def test_compile(self):
 
         self.input = torch.randn((1, 3, 224, 224)).to("cuda")
-        self.model = torch.jit.load(MODULE_DIR + "/standard_tensor_input_scripted.jit.pt").eval().to("cuda")
+        self.model = (
+            torch.jit.load(MODULE_DIR + "/standard_tensor_input_scripted.jit.pt")
+            .eval()
+            .to("cuda")
+        )
 
         compile_spec = {
-            "inputs": [torchtrt.Input(self.input.shape), torchtrt.Input(self.input.shape)],
+            "inputs": [
+                torchtrt.Input(self.input.shape),
+                torchtrt.Input(self.input.shape),
+            ],
             "device": torchtrt.Device("gpu:0"),
             "enabled_precisions": {torch.float},
         }
 
         trt_mod = torchtrt.ts.compile(self.model, **compile_spec)
-        same = (trt_mod(self.input, self.input) - self.model(self.input, self.input)).abs().max()
+        same = (
+            (trt_mod(self.input, self.input) - self.model(self.input, self.input))
+            .abs()
+            .max()
+        )
         self.assertTrue(same < 2e-2)
 
 
@@ -41,17 +52,27 @@ class TestTupleInput(unittest.TestCase):
     def test_compile(self):
 
         self.input = torch.randn((1, 3, 224, 224)).to("cuda")
-        self.model = torch.jit.load(MODULE_DIR + "/tuple_input_scripted.jit.pt").eval().to("cuda")
+        self.model = (
+            torch.jit.load(MODULE_DIR + "/tuple_input_scripted.jit.pt")
+            .eval()
+            .to("cuda")
+        )
 
         compile_spec = {
-            "input_signature": ((torchtrt.Input(self.input.shape), torchtrt.Input(self.input.shape)),),
+            "input_signature": (
+                (torchtrt.Input(self.input.shape), torchtrt.Input(self.input.shape)),
+            ),
             "device": torchtrt.Device("gpu:0"),
             "enabled_precisions": {torch.float},
             "min_block_size": 1,
         }
 
         trt_mod = torchtrt.ts.compile(self.model, **compile_spec)
-        same = (trt_mod((self.input, self.input)) - self.model((self.input, self.input))).abs().max()
+        same = (
+            (trt_mod((self.input, self.input)) - self.model((self.input, self.input)))
+            .abs()
+            .max()
+        )
         self.assertTrue(same < 2e-2)
 
 
@@ -59,17 +80,25 @@ class TestListInput(unittest.TestCase):
     def test_compile(self):
 
         self.input = torch.randn((1, 3, 224, 224)).to("cuda")
-        self.model = torch.jit.load(MODULE_DIR + "/list_input_scripted.jit.pt").eval().to("cuda")
+        self.model = (
+            torch.jit.load(MODULE_DIR + "/list_input_scripted.jit.pt").eval().to("cuda")
+        )
 
         compile_spec = {
-            "input_signature": ([torchtrt.Input(self.input.shape), torchtrt.Input(self.input.shape)],),
+            "input_signature": (
+                [torchtrt.Input(self.input.shape), torchtrt.Input(self.input.shape)],
+            ),
             "device": torchtrt.Device("gpu:0"),
             "enabled_precisions": {torch.float},
             "min_block_size": 1,
         }
 
         trt_mod = torchtrt.ts.compile(self.model, **compile_spec)
-        same = (trt_mod([self.input, self.input]) - self.model([self.input, self.input])).abs().max()
+        same = (
+            (trt_mod([self.input, self.input]) - self.model([self.input, self.input]))
+            .abs()
+            .max()
+        )
         self.assertTrue(same < 2e-2)
 
 
@@ -77,10 +106,16 @@ class TestTupleInputOutput(unittest.TestCase):
     def test_compile(self):
 
         self.input = torch.randn((1, 3, 224, 224)).to("cuda")
-        self.model = torch.jit.load(MODULE_DIR + "/tuple_input_output_scripted.jit.pt").eval().to("cuda")
+        self.model = (
+            torch.jit.load(MODULE_DIR + "/tuple_input_output_scripted.jit.pt")
+            .eval()
+            .to("cuda")
+        )
 
         compile_spec = {
-            "input_signature": ((torchtrt.Input(self.input.shape), torchtrt.Input(self.input.shape)),),
+            "input_signature": (
+                (torchtrt.Input(self.input.shape), torchtrt.Input(self.input.shape)),
+            ),
             "device": torchtrt.Device("gpu:0"),
             "enabled_precisions": {torch.float},
             "min_block_size": 1,
@@ -97,10 +132,16 @@ class TestListInputOutput(unittest.TestCase):
     def test_compile(self):
 
         self.input = torch.randn((1, 3, 224, 224)).to("cuda")
-        self.model = torch.jit.load(MODULE_DIR + "/list_input_output_scripted.jit.pt").eval().to("cuda")
+        self.model = (
+            torch.jit.load(MODULE_DIR + "/list_input_output_scripted.jit.pt")
+            .eval()
+            .to("cuda")
+        )
 
         compile_spec = {
-            "input_signature": ([torchtrt.Input(self.input.shape), torchtrt.Input(self.input.shape)],),
+            "input_signature": (
+                [torchtrt.Input(self.input.shape), torchtrt.Input(self.input.shape)],
+            ),
             "device": torchtrt.Device("gpu:0"),
             "enabled_precisions": {torch.float},
             "min_block_size": 1,
@@ -117,10 +158,16 @@ class TestListInputTupleOutput(unittest.TestCase):
     def test_compile(self):
 
         self.input = torch.randn((1, 3, 224, 224)).to("cuda")
-        self.model = torch.jit.load(MODULE_DIR + "/list_input_tuple_output_scripted.jit.pt").eval().to("cuda")
+        self.model = (
+            torch.jit.load(MODULE_DIR + "/list_input_tuple_output_scripted.jit.pt")
+            .eval()
+            .to("cuda")
+        )
 
         compile_spec = {
-            "input_signature": ([torchtrt.Input(self.input.shape), torchtrt.Input(self.input.shape)],),
+            "input_signature": (
+                [torchtrt.Input(self.input.shape), torchtrt.Input(self.input.shape)],
+            ),
             "device": torchtrt.Device("gpu:0"),
             "enabled_precisions": {torch.float},
             "min_block_size": 1,
diff --git a/tests/py/api/test_e2e_behavior.py b/tests/py/api/test_e2e_behavior.py
index 3be13c06d6..d1da3e0465 100644
--- a/tests/py/api/test_e2e_behavior.py
+++ b/tests/py/api/test_e2e_behavior.py
@@ -23,7 +23,11 @@ def test_compile_script_half(self):
         }
 
         trt_mod = torchtrt.ts.compile(self.scripted_model, **compile_spec)
-        same = (trt_mod(self.input.half()) - self.scripted_model(self.input.half())).abs().max()
+        same = (
+            (trt_mod(self.input.half()) - self.scripted_model(self.input.half()))
+            .abs()
+            .max()
+        )
         torchtrt.logging.log(torchtrt.logging.Level.Debug, "Max diff: " + str(same))
         self.assertTrue(same < 3e-2)
 
@@ -43,7 +47,11 @@ def test_compile_script_half_by_default(self):
         }
 
         trt_mod = torchtrt.ts.compile(self.scripted_model, **compile_spec)
-        same = (trt_mod(self.input.half()) - self.scripted_model(self.input.half())).abs().max()
+        same = (
+            (trt_mod(self.input.half()) - self.scripted_model(self.input.half()))
+            .abs()
+            .max()
+        )
         torchtrt.logging.log(torchtrt.logging.Level.Debug, "Max diff: " + str(same))
         self.assertTrue(same < 3e-2)
 
@@ -159,7 +167,9 @@ def test_input_use_default_fp16_without_fp16_enabled(self):
         half_mod = torch.jit.script(self.model)
         half_mod.half()
 
-        trt_mod = torchtrt.ts.compile(half_mod, inputs=[torchtrt.Input(self.input.shape)])
+        trt_mod = torchtrt.ts.compile(
+            half_mod, inputs=[torchtrt.Input(self.input.shape)]
+        )
         trt_mod(self.input.half())
 
     def test_input_respect_user_setting_fp16_weights_fp32_in(self):
diff --git a/tests/py/api/test_ts_backend.py b/tests/py/api/test_ts_backend.py
index 4dd89b5518..d0654a8f75 100644
--- a/tests/py/api/test_ts_backend.py
+++ b/tests/py/api/test_ts_backend.py
@@ -13,7 +13,11 @@ def test_compile_traced(self):
         self.traced_model = torch.jit.trace(self.model, [self.input])
 
         compile_spec = {
-            "inputs": [torchtrt.Input(self.input.shape, dtype=torch.float, format=torch.contiguous_format)],
+            "inputs": [
+                torchtrt.Input(
+                    self.input.shape, dtype=torch.float, format=torch.contiguous_format
+                )
+            ],
             "device": {
                 "device_type": torchtrt.DeviceType.GPU,
                 "gpu_id": 0,
@@ -140,8 +144,12 @@ def test_pt_to_trt_to_pt(self):
             },
         }
 
-        trt_engine = torchtrt.ts.convert_method_to_trt_engine(self.ts_model, "forward", **compile_spec)
-        trt_mod = torchtrt.ts.embed_engine_in_new_module(trt_engine, torchtrt.Device("cuda:0"))
+        trt_engine = torchtrt.ts.convert_method_to_trt_engine(
+            self.ts_model, "forward", **compile_spec
+        )
+        trt_mod = torchtrt.ts.embed_engine_in_new_module(
+            trt_engine, torchtrt.Device("cuda:0")
+        )
         same = (trt_mod(self.input) - self.ts_model(self.input)).abs().max()
         self.assertTrue(same < 2e-3)
 
diff --git a/tests/py/hw/test_api_dla.py b/tests/py/hw/test_api_dla.py
index 78efdea59b..57b149faa7 100644
--- a/tests/py/hw/test_api_dla.py
+++ b/tests/py/hw/test_api_dla.py
@@ -61,7 +61,9 @@ def test_compile_script(self):
 
 def test_suite():
     suite = unittest.TestSuite()
-    suite.addTest(TestCompile.parametrize(TestCompile, model=models.resnet18(pretrained=True)))
+    suite.addTest(
+        TestCompile.parametrize(TestCompile, model=models.resnet18(pretrained=True))
+    )
 
     return suite
 
diff --git a/tests/py/hw/test_multi_gpu.py b/tests/py/hw/test_multi_gpu.py
index d9685a0968..c068cc71b0 100644
--- a/tests/py/hw/test_multi_gpu.py
+++ b/tests/py/hw/test_multi_gpu.py
@@ -9,7 +9,9 @@
 class TestMultiGpuSwitching(ModelTestCase):
     def setUp(self):
         if torch.cuda.device_count() < 2:
-            self.fail("Test is not relevant for this platform since number of available CUDA devices is less than 2")
+            self.fail(
+                "Test is not relevant for this platform since number of available CUDA devices is less than 2"
+            )
 
         torchtrt.set_device(0)
         self.target_gpu = 1
@@ -60,7 +62,9 @@ def test_compile_script(self):
 class TestMultiGpuSerializeDeserializeSwitching(ModelTestCase):
     def setUp(self):
         if torch.cuda.device_count() < 2:
-            self.fail("Test is not relevant for this platform since number of available CUDA devices is less than 2")
+            self.fail(
+                "Test is not relevant for this platform since number of available CUDA devices is less than 2"
+            )
 
         self.target_gpu = 0
         torchtrt.set_device(0)
@@ -110,7 +114,11 @@ def test_compile_script(self):
 
 def test_suite():
     suite = unittest.TestSuite()
-    suite.addTest(TestMultiGpuSwitching.parametrize(TestMultiGpuSwitching, model=models.resnet18(pretrained=True)))
+    suite.addTest(
+        TestMultiGpuSwitching.parametrize(
+            TestMultiGpuSwitching, model=models.resnet18(pretrained=True)
+        )
+    )
     suite.addTest(
         TestMultiGpuSerializeDeserializeSwitching.parametrize(
             TestMultiGpuSwitching, model=models.resnet18(pretrained=True)
diff --git a/tests/py/integrations/test_to_backend_api.py b/tests/py/integrations/test_to_backend_api.py
index f66a0fea92..16d839b1b0 100644
--- a/tests/py/integrations/test_to_backend_api.py
+++ b/tests/py/integrations/test_to_backend_api.py
@@ -31,7 +31,9 @@ def setUp(self):
 
     def test_to_backend_lowering(self):
         trt_mod = torch._C._jit_to_backend("tensorrt", self.scripted_model, self.spec)
-        same = (trt_mod.forward(self.input) - self.scripted_model(self.input)).abs().max()
+        same = (
+            (trt_mod.forward(self.input) - self.scripted_model(self.input)).abs().max()
+        )
         self.assertTrue(same < 2e-3)
 
 
diff --git a/tests/py/integrations/test_trt_intercompatibility.py b/tests/py/integrations/test_trt_intercompatibility.py
index e0db41adf9..96b47b7ccc 100644
--- a/tests/py/integrations/test_trt_intercompatibility.py
+++ b/tests/py/integrations/test_trt_intercompatibility.py
@@ -22,7 +22,9 @@ def test_pt_to_trt(self):
             },
         }
 
-        trt_engine = torchtrt.ts.convert_method_to_trt_engine(self.ts_model, "forward", **compile_spec)
+        trt_engine = torchtrt.ts.convert_method_to_trt_engine(
+            self.ts_model, "forward", **compile_spec
+        )
 
         TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
         with trt.Runtime(TRT_LOGGER) as rt:
@@ -36,7 +38,9 @@ def test_pt_to_trt(self):
                 ctx.execute_async(
                     batch_size=1,
                     bindings=bindings,
-                    stream_handle=torch.cuda.current_stream(device="cuda:0").cuda_stream,
+                    stream_handle=torch.cuda.current_stream(
+                        device="cuda:0"
+                    ).cuda_stream,
                 )
                 same = (out - self.ts_model(self.input)).abs().max()
                 self.assertTrue(same < 2e-3)
diff --git a/tests/py/ptq/test_ptq_dataloader_calibrator.py b/tests/py/ptq/test_ptq_dataloader_calibrator.py
index 92df6876cb..2ee1fa5b08 100644
--- a/tests/py/ptq/test_ptq_dataloader_calibrator.py
+++ b/tests/py/ptq/test_ptq_dataloader_calibrator.py
@@ -52,7 +52,9 @@ def compute_accuracy(testing_dataloader, model):
 class TestAccuracy(unittest.TestCase):
     def test_compile_script(self):
 
-        self.model = torch.jit.load(MODULE_DIR + "/trained_vgg16.jit.pt").eval().to("cuda")
+        self.model = (
+            torch.jit.load(MODULE_DIR + "/trained_vgg16.jit.pt").eval().to("cuda")
+        )
         self.input = torch.randn((1, 3, 32, 32)).to("cuda")
         self.testing_dataset = torchvision.datasets.CIFAR10(
             root="./data",
@@ -61,7 +63,9 @@ def test_compile_script(self):
             transform=transforms.Compose(
                 [
                     transforms.ToTensor(),
-                    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
+                    transforms.Normalize(
+                        (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
+                    ),
                 ]
             ),
         )
diff --git a/tests/py/ptq/test_ptq_to_backend.py b/tests/py/ptq/test_ptq_to_backend.py
index 1676137a56..3a0a5bf336 100644
--- a/tests/py/ptq/test_ptq_to_backend.py
+++ b/tests/py/ptq/test_ptq_to_backend.py
@@ -50,7 +50,9 @@ def compute_accuracy(testing_dataloader, model):
 
 class TestAccuracy(unittest.TestCase):
     def test_compile_script(self):
-        self.model = torch.jit.load(MODULE_DIR + "/trained_vgg16.jit.pt").eval().to("cuda")
+        self.model = (
+            torch.jit.load(MODULE_DIR + "/trained_vgg16.jit.pt").eval().to("cuda")
+        )
         self.input = torch.randn((1, 3, 32, 32)).to("cuda")
         self.testing_dataset = torchvision.datasets.CIFAR10(
             root="./data",
@@ -59,7 +61,9 @@ def test_compile_script(self):
             transform=transforms.Compose(
                 [
                     transforms.ToTensor(),
-                    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
+                    transforms.Normalize(
+                        (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
+                    ),
                 ]
             ),
         )
diff --git a/tests/py/ptq/test_ptq_trt_calibrator.py b/tests/py/ptq/test_ptq_trt_calibrator.py
index 03a1ed89cd..bda117d3a5 100644
--- a/tests/py/ptq/test_ptq_trt_calibrator.py
+++ b/tests/py/ptq/test_ptq_trt_calibrator.py
@@ -69,7 +69,10 @@ def get_batch_size(self):
     # You don't necessarily have to use them, but they can be useful to understand the order of
     # the inputs. The bindings list is expected to have the same ordering as 'names'.
     def get_batch(self, names):
-        if self.current_batch_idx + self.batch_size > self.dataloader.dataset.data.shape[0]:
+        if (
+            self.current_batch_idx + self.batch_size
+            > self.dataloader.dataset.data.shape[0]
+        ):
             return None
 
         batch = self.dataset_iterator.next()
@@ -93,7 +96,9 @@ def write_calibration_cache(self, cache):
 
 class TestAccuracy(unittest.TestCase):
     def test_compile_script(self):
-        self.model = torch.jit.load(MODULE_DIR + "/trained_vgg16.jit.pt").eval().to("cuda")
+        self.model = (
+            torch.jit.load(MODULE_DIR + "/trained_vgg16.jit.pt").eval().to("cuda")
+        )
         self.input = torch.randn((1, 3, 32, 32)).to("cuda")
         self.testing_dataset = torchvision.datasets.CIFAR10(
             root="./data",
@@ -102,7 +107,9 @@ def test_compile_script(self):
             transform=transforms.Compose(
                 [
                     transforms.ToTensor(),
-                    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
+                    transforms.Normalize(
+                        (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
+                    ),
                 ]
             ),
         )
diff --git a/tests/py/qat/test_qat_trt_accuracy.py b/tests/py/qat/test_qat_trt_accuracy.py
index b89edfbf42..ce574c57fe 100644
--- a/tests/py/qat/test_qat_trt_accuracy.py
+++ b/tests/py/qat/test_qat_trt_accuracy.py
@@ -53,7 +53,9 @@ def compute_accuracy(testing_dataloader, model):
 
 class TestAccuracy(unittest.TestCase):
     def test_compile_script(self):
-        self.model = torch.jit.load(MODULE_DIR + "/trained_vgg16_qat.jit.pt").eval().to("cuda")
+        self.model = (
+            torch.jit.load(MODULE_DIR + "/trained_vgg16_qat.jit.pt").eval().to("cuda")
+        )
         self.testing_dataset = torchvision.datasets.CIFAR10(
             root="./data",
             train=False,
@@ -61,7 +63,9 @@ def test_compile_script(self):
             transform=transforms.Compose(
                 [
                     transforms.ToTensor(),
-                    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
+                    transforms.Normalize(
+                        (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
+                    ),
                 ]
             ),
         )
diff --git a/tools/linter/cpplint.py b/tools/linter/cpplint.py
index 2d085917f5..43a6474305 100644
--- a/tools/linter/cpplint.py
+++ b/tools/linter/cpplint.py
@@ -26,7 +26,9 @@ def lint(user, target_files, change_file=True):
     USER = pwd.getpwuid(os.getuid())[0]
     projects = utils.CHECK_PROJECTS(sys.argv[1:])
     if "//..." in projects:
-        projects = [p.replace(BAZEL_ROOT, "/")[:-1] for p in glob.glob(BAZEL_ROOT + "/*/")]
+        projects = [
+            p.replace(BAZEL_ROOT, "/")[:-1] for p in glob.glob(BAZEL_ROOT + "/*/")
+        ]
         projects = [p for p in projects if p not in utils.BLACKLISTED_BAZEL_TARGETS]
 
     for p in projects:
diff --git a/tools/linter/cpplint_diff.py b/tools/linter/cpplint_diff.py
index 89bc7965fe..307978e43f 100644
--- a/tools/linter/cpplint_diff.py
+++ b/tools/linter/cpplint_diff.py
@@ -10,7 +10,9 @@ def lint(target_files, color=True):
     failure = False
     for f in target_files:
         with open("/tmp/changes.txt", "w") as changes:
-            subprocess.run([clang_format._get_executable("clang-format"), f], stdout=changes)
+            subprocess.run(
+                [clang_format._get_executable("clang-format"), f], stdout=changes
+            )
         args = ["git", "diff", "-u", "--exit-code"]
         if color:
             args += ["--color"]
@@ -30,7 +32,9 @@ def lint(target_files, color=True):
 
     projects = utils.CHECK_PROJECTS(sys.argv[1:])
     if "//..." in projects:
-        projects = [p.replace(BAZEL_ROOT, "/")[:-1] for p in glob.glob(BAZEL_ROOT + "/*/")]
+        projects = [
+            p.replace(BAZEL_ROOT, "/")[:-1] for p in glob.glob(BAZEL_ROOT + "/*/")
+        ]
         projects = [p for p in projects if p not in utils.BLACKLISTED_BAZEL_TARGETS]
 
     failure = False
diff --git a/tools/linter/pylint.py b/tools/linter/pylint.py
index d32f89ef5f..d5ce8f2e15 100644
--- a/tools/linter/pylint.py
+++ b/tools/linter/pylint.py
@@ -28,7 +28,9 @@ def lint(user, target_files, change_file=True):
     USER = pwd.getpwuid(os.getuid())[0]
     projects = utils.CHECK_PROJECTS(sys.argv[1:])
     if "//..." in projects:
-        projects = [p.replace(BAZEL_ROOT, "/")[:-1] for p in glob.glob(BAZEL_ROOT + "/*/")]
+        projects = [
+            p.replace(BAZEL_ROOT, "/")[:-1] for p in glob.glob(BAZEL_ROOT + "/*/")
+        ]
         projects = [p for p in projects if p not in utils.BLACKLISTED_BAZEL_TARGETS]
 
     for p in projects:
diff --git a/tools/linter/pylint_diff.py b/tools/linter/pylint_diff.py
index beb585540d..de11bfa0af 100644
--- a/tools/linter/pylint_diff.py
+++ b/tools/linter/pylint_diff.py
@@ -30,7 +30,9 @@ def lint(target_files, color=True):
 
     projects = utils.CHECK_PROJECTS(sys.argv[1:])
     if "//..." in projects:
-        projects = [p.replace(BAZEL_ROOT, "/")[:-1] for p in glob.glob(BAZEL_ROOT + "/*/")]
+        projects = [
+            p.replace(BAZEL_ROOT, "/")[:-1] for p in glob.glob(BAZEL_ROOT + "/*/")
+        ]
         projects = [p for p in projects if p not in utils.BLACKLISTED_BAZEL_TARGETS]
 
     failure = False
diff --git a/tools/perf/perf_run.py b/tools/perf/perf_run.py
index 09be07ec51..f0386f4e5a 100644
--- a/tools/perf/perf_run.py
+++ b/tools/perf/perf_run.py
@@ -148,7 +148,9 @@ def run_tensorrt(model, input_tensors, params, precision, is_trt_engine=False):
 
         print("Converting method to TensorRT engine...")
         with torch.no_grad():
-            model = torchtrt.ts.convert_method_to_trt_engine(model, "forward", **compile_settings)
+            model = torchtrt.ts.convert_method_to_trt_engine(
+                model, "forward", **compile_settings
+            )
 
     # Deserialize the TensorRT engine
     with trt.Logger() as logger, trt.Runtime(logger) as runtime:
@@ -178,12 +180,16 @@ def run_tensorrt(model, input_tensors, params, precision, is_trt_engine=False):
     timings = []
     with engine.create_execution_context() as context:
         for i in range(WARMUP_ITER):
-            context.execute_async(batch_size, bindings, torch.cuda.current_stream().cuda_stream)
+            context.execute_async(
+                batch_size, bindings, torch.cuda.current_stream().cuda_stream
+            )
             torch.cuda.synchronize()
 
         for i in range(iters):
             start_time = timeit.default_timer()
-            context.execute_async(batch_size, bindings, torch.cuda.current_stream().cuda_stream)
+            context.execute_async(
+                batch_size, bindings, torch.cuda.current_stream().cuda_stream
+            )
             torch.cuda.synchronize()
             end_time = timeit.default_timer()
             meas_time = end_time - start_time
@@ -199,10 +205,16 @@ def run(model, input_tensors, params, precision, is_trt_engine=False):
 
         if precision == "int8":
             if backend == "all" or backend == "torch":
-                print("int8 precision is not supported for torch runtime in this script yet")
+                print(
+                    "int8 precision is not supported for torch runtime in this script yet"
+                )
                 return False
 
-            if backend == "all" or backend == "torch_tensorrt" or params.get("calibration_cache", None) == None:
+            if (
+                backend == "all"
+                or backend == "torch_tensorrt"
+                or params.get("calibration_cache", None) == None
+            ):
                 print("int8 precision expects calibration cache file for inference")
                 return False
 
@@ -289,7 +301,9 @@ def load_model(params):
 
 
 if __name__ == "__main__":
-    arg_parser = argparse.ArgumentParser(description="Run inference on a model with random input values")
+    arg_parser = argparse.ArgumentParser(
+        description="Run inference on a model with random input values"
+    )
     arg_parser.add_argument(
         "--config",
         help="Load YAML based configuration file to run the inference. If this is used other params will be ignored",

From 37336a06bc764a9d791bd2a99145109715a37349 Mon Sep 17 00:00:00 2001
From: Wei Wei <wwei6@fb.com>
Date: Fri, 12 Aug 2022 16:13:39 -0700
Subject: [PATCH 6/6] update

---
 .../fx/test/converters/acc_op/test_convolution.py              | 3 ++-
 py/torch_tensorrt/fx/test/converters/acc_op/test_type_as.py    | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_convolution.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_convolution.py
index e08484cd56..1410510d65 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_convolution.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_convolution.py
@@ -144,7 +144,8 @@ def forward(self, x):
             ("tuple_parameters", 1, (1, 1, 1), (1, 1, 1)),
             param("non_zero_padding", 1, padding=1),
             param("dilation", 1, dilation=2),
-            param("groups", 1, groups=3),
+            # TODO TRT 8.4.1 will trigger issue with this test. T127981773
+            # param("groups", 1, groups=3),
         ]
     )
     def test_conv3d(
diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_type_as.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_type_as.py
index 839ff44566..0bfffd210f 100644
--- a/py/torch_tensorrt/fx/test/converters/acc_op/test_type_as.py
+++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_type_as.py
@@ -1,4 +1,5 @@
 import torch
+import unittest
 import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops
 from torch.testing._internal.common_utils import run_tests
 from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec
@@ -103,6 +104,7 @@ def forward(self, input):
             precision=LowerPrecision.FP16,
         )
 
+    @unittest.skip("Does not pass in TRT 8.4.1 T127981773")
     def test_type_tensor_with_dynamic_shape_four_dimensions(self):
         class Type_as(torch.nn.Module):
             def forward(self, input):