-
Notifications
You must be signed in to change notification settings - Fork 359
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Aten scatter converter #2664
Aten scatter converter #2664
Conversation
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are some changes that do not conform to Python style guidelines:
--- /home/runner/work/TensorRT/TensorRT/examples/int8/training/vgg16/vgg16.py 2024-02-27 21:00:39.705994+00:00
+++ /home/runner/work/TensorRT/TensorRT/examples/int8/training/vgg16/vgg16.py 2024-02-27 21:02:32.418639+00:00
@@ -1,10 +1,11 @@
"""
# Reference
- [Very Deep Convolutional Networks for Large-Scale Image Recognition](
https://arxiv.org/abs/1409.1556) (ICLR 2015)
"""
+
import torch
import torch.nn as nn
import torch.nn.functional as F
from functools import reduce
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/_Device.py 2024-02-27 21:00:39.709994+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/_Device.py 2024-02-27 21:02:32.512531+00:00
@@ -30,16 +30,18 @@
gpu_id (int): Device ID for target GPU
dla_core (int): Core ID for target DLA core
allow_gpu_fallback (bool): Whether falling back to GPU if DLA cannot support an op should be allowed
"""
- device_type: Optional[
- trt.DeviceType
- ] = None #: Target device type (GPU or DLA). Set implicitly based on if dla_core is specified.
+ device_type: Optional[trt.DeviceType] = (
+ None #: Target device type (GPU or DLA). Set implicitly based on if dla_core is specified.
+ )
gpu_id: int = -1 #: Device ID for target GPU
dla_core: int = -1 #: Core ID for target DLA core
- allow_gpu_fallback: bool = False #: Whether falling back to GPU if DLA cannot support an op should be allowed
+ allow_gpu_fallback: bool = (
+ False #: Whether falling back to GPU if DLA cannot support an op should be allowed
+ )
def __init__(self, *args: Any, **kwargs: Any):
"""__init__ Method for torch_tensorrt.Device
Device accepts one of a few construction patterns
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/_Input.py 2024-02-27 21:00:39.709994+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/_Input.py 2024-02-27 21:02:32.716334+00:00
@@ -26,16 +26,16 @@
class _ShapeMode(Enum):
STATIC = 0
DYNAMIC = 1
- shape_mode: Optional[
- _ShapeMode
- ] = None #: Is input statically or dynamically shaped
- shape: Optional[
- Tuple[int, ...] | Dict[str, Tuple[int, ...]]
- ] = None #: Either a single Tuple or a dict of tuples defining the input shape. Static shaped inputs will have a single tuple. Dynamic inputs will have a dict of the form ``{ "min_shape": Tuple, "opt_shape": Tuple, "max_shape": Tuple }``
+ shape_mode: Optional[_ShapeMode] = (
+ None #: Is input statically or dynamically shaped
+ )
+ shape: Optional[Tuple[int, ...] | Dict[str, Tuple[int, ...]]] = (
+ None #: Either a single Tuple or a dict of tuples defining the input shape. Static shaped inputs will have a single tuple. Dynamic inputs will have a dict of the form ``{ "min_shape": Tuple, "opt_shape": Tuple, "max_shape": Tuple }``
+ )
dtype: _enums.dtype = (
_enums.dtype.unknown
) #: The expected data type of the input tensor (default: torch_tensorrt.dtype.float32)
_explicit_set_dtype: bool = False
format: _enums.TensorFormat = (
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_compiler.py 2024-02-27 21:00:39.709994+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_compiler.py 2024-02-27 21:02:32.828822+00:00
@@ -215,13 +215,13 @@
"precision": precision,
"debug": debug,
"device": device,
"workspace_size": workspace_size,
"min_block_size": min_block_size,
- "torch_executed_ops": torch_executed_ops
- if torch_executed_ops is not None
- else set(),
+ "torch_executed_ops": (
+ torch_executed_ops if torch_executed_ops is not None else set()
+ ),
"pass_through_build_failures": pass_through_build_failures,
"max_aux_streams": max_aux_streams,
"version_compatible": version_compatible,
"optimization_level": optimization_level,
"use_python_runtime": use_python_runtime,
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py 2024-02-27 21:00:39.713994+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py 2024-02-27 21:02:32.957463+00:00
@@ -26,13 +26,13 @@
from packaging import version
_LOGGER: logging.Logger = logging.getLogger(__name__)
-TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[
- Callable[[torch.fx.GraphModule], None]
-] = Observer("TRT_INTERPRETER_CALL_PRE_OBSERVER")
+TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[Callable[[torch.fx.GraphModule], None]] = (
+ Observer("TRT_INTERPRETER_CALL_PRE_OBSERVER")
+)
class UnsupportedOperatorException(RuntimeError):
pass
@@ -90,13 +90,13 @@
self.input_specs_iter = 0
self._cur_node_name: Optional[str] = None
self._cur_node: Optional[torch.fx.Node] = None
self._input_names: List[str] = []
self._output_names: List[str] = []
- self._itensor_to_tensor_meta: Dict[
- trt.tensorrt.ITensor, TensorMetadata
- ] = dict()
+ self._itensor_to_tensor_meta: Dict[trt.tensorrt.ITensor, TensorMetadata] = (
+ dict()
+ )
self.compilation_settings = compilation_settings
# Data types for TRT Module output Tensors
self.output_dtypes = output_dtypes
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/converter_utils.py 2024-02-27 21:00:39.713994+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/converter_utils.py 2024-02-27 21:02:33.024354+00:00
@@ -322,17 +322,15 @@
else:
raise AssertionError(f"Cannot convert {input_val} to TRT constant")
@overload
-def get_positive_dim(dim: int, dim_size: int) -> int:
- ...
+def get_positive_dim(dim: int, dim_size: int) -> int: ...
@overload
-def get_positive_dim(dim: Sequence[int], dim_size: int) -> Tuple[int, ...]:
- ...
+def get_positive_dim(dim: Sequence[int], dim_size: int) -> Tuple[int, ...]: ...
def get_positive_dim(
dim: Union[int, Sequence[int]], dim_size: int
) -> Union[int, Tuple[int, ...]]:
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/_decomposition_groups.py 2024-02-27 21:00:39.713994+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/_decomposition_groups.py 2024-02-27 21:02:33.419324+00:00
@@ -5,13 +5,13 @@
from torch._decomp import get_decompositions as get_torch_decompositions
from torch._ops import OpOverload, OpOverloadPacket
aten = torch.ops.aten
-_core_aten_decompositions: Dict[
- OpOverload, Callable[[Any], Any]
-] = core_aten_decompositions()
+_core_aten_decompositions: Dict[OpOverload, Callable[[Any], Any]] = (
+ core_aten_decompositions()
+)
torch_enabled_decompositions: Set[Union[OpOverload, OpOverloadPacket]] = {
aten._adaptive_avg_pool2d_backward,
aten.addcdiv,
aten.addcdiv_,
aten.addcmul,
@@ -178,13 +178,13 @@
torch_disabled_decompositions: Set[Union[OpOverload, OpOverloadPacket]] = {
aten._softmax.default,
}
-ENABLED_TORCH_DECOMPOSITIONS: Dict[
- OpOverload, Callable[[Any], Any]
-] = get_torch_decompositions(torch_enabled_decompositions)
+ENABLED_TORCH_DECOMPOSITIONS: Dict[OpOverload, Callable[[Any], Any]] = (
+ get_torch_decompositions(torch_enabled_decompositions)
+)
TORCH_TRT_DECOMPOSITIONS: Dict[OpOverload, Callable[[Any], Any]] = {}
def check_decomp_set_invariants() -> None:
"""Validates no overlap between enabled and disabled decomposition sets"""
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/lower_linear.py 2024-02-27 21:00:39.713994+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/lower_linear.py 2024-02-27 21:02:33.444249+00:00
@@ -20,16 +20,14 @@
logger.debug(f"Graph after lowering linear:\n{gm.graph}")
return gm
-def linear_replacement() -> (
- Tuple[
- torch.fx.GraphModule,
- Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor],
- ]
-):
+def linear_replacement() -> Tuple[
+ torch.fx.GraphModule,
+ Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor],
+]:
"""Constructs the original and replacement functions for linear"""
# Original graph
def orig(
input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/view_to_reshape.py 2024-02-27 21:00:39.713994+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/view_to_reshape.py 2024-02-27 21:02:33.492240+00:00
@@ -20,16 +20,14 @@
logger.debug(f"Graph after replacing view with reshape:\n{gm.graph}")
return gm
-def view_replacement() -> (
- Tuple[
- torch.fx.GraphModule,
- Callable[[torch.Tensor, List[torch.SymInt]], torch.Tensor],
- ]
-):
+def view_replacement() -> Tuple[
+ torch.fx.GraphModule,
+ Callable[[torch.Tensor, List[torch.SymInt]], torch.Tensor],
+]:
"""Constructs the original and replacement functions for view"""
# Original graph
def orig(input: torch.Tensor, shape: List[torch.SymInt]) -> torch.Tensor:
return torch.ops.aten.view.default(input, shape)
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/impl/select.py 2024-02-27 21:00:39.713994+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/impl/select.py 2024-02-27 21:02:33.493144+00:00
@@ -387,29 +387,29 @@
f"scatter_tensor received input {input} that is not part "
"of the TensorRT region!"
)
input_shape = input.shape
index_shape = index.shape
- if (len(input_shape) != len(index_shape)):
- raise RuntimeError(
- f"The no of dimensions of input and index should be equal"
- )
+ if len(input_shape) != len(index_shape):
+ raise RuntimeError(f"The no of dimensions of input and index should be equal")
ranks = len(input_shape)
dim = get_positive_dim(cast(int, dim), ranks)
dynamic_shape = has_dynamic_shape(input.shape)
if dynamic_shape:
# Check whether slice target dim is dynamic shape dim
assert input.shape[dim] != -1, "Can't scatter on negative shape dimension!"
-
+
input_dims = len(input.shape)
for i in range(0, input_dims):
if index[i] >= input.shape[i]:
raise RuntimeError(
f"cannot have index greater than the dimension length! {input.shape[dim]}"
)
value_tensor = value * torch.ones(index.shape)
- scatter_layer = ctx.net.add_scatter(input, index, value_tensor, trt.tensorrt.ScatterModekELEMENT)
+ scatter_layer = ctx.net.add_scatter(
+ input, index, value_tensor, trt.tensorrt.ScatterModekELEMENT
+ )
scatter_layer.set_axis(dim)
set_layer_name(scatter_layer, target, name + "_scatter_layer", source_ir)
out = scatter_layer.get_output(0)
return out
@@ -430,31 +430,29 @@
"of the TensorRT region!"
)
input_shape = input.shape
index_shape = index.shape
src_shape = src.shape
- if (len(input_shape) != len(index_shape)):
- raise RuntimeError(
- f"The no of dimensions of input and index should be equal"
- )
- if (len(index_shape) != len(src_shape)):
- raise RuntimeError(
- f"The no of dimensions of src and index should be equal"
- )
-
+ if len(input_shape) != len(index_shape):
+ raise RuntimeError(f"The no of dimensions of input and index should be equal")
+ if len(index_shape) != len(src_shape):
+ raise RuntimeError(f"The no of dimensions of src and index should be equal")
+
input_dims = len(input_shape)
dim = get_positive_dim(cast(int, dim), input_dims)
dynamic_shape = has_dynamic_shape(input.shape)
if dynamic_shape:
# Check whether slice target dim is dynamic shape dim
assert input.shape[dim] != -1, "Can't scatter on negative shape dimension!"
-
+
for i in range(0, input_dims):
if index[i] >= input.shape[i]:
raise RuntimeError(
f"cannot have index greater than the dimension length! {input.shape[dim]}"
)
- scatter_layer = ctx.net.add_scatter(input, index, src, trt.tensorrt.ScatterModekELEMENT)
+ scatter_layer = ctx.net.add_scatter(
+ input, index, src, trt.tensorrt.ScatterModekELEMENT
+ )
scatter_layer.set_axis(dim)
set_layer_name(scatter_layer, target, name + "_scatter_layer", source_ir)
out = scatter_layer.get_output(0)
- return out
\ No newline at end of file
+ return out
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/lower_scaled_dot_product_attention.py 2024-02-27 21:00:39.713994+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/lower_scaled_dot_product_attention.py 2024-02-27 21:02:33.499837+00:00
@@ -58,16 +58,14 @@
logger.debug(f"Graph after lowering scaled dot product attention:\n{gm.graph}")
return gm
-def scaled_dot_product_attention_replacement() -> (
- Tuple[
- Sequence[Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor]],
- Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor],
- ]
-):
+def scaled_dot_product_attention_replacement() -> Tuple[
+ Sequence[Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor]],
+ Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor],
+]:
"""Constructs the original and replacement functions for efficient attention"""
# Efficient Attention original graph
def efficient(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor) -> torch.Tensor:
outputs = torch.ops.aten._scaled_dot_product_efficient_attention.default(
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py 2024-02-27 21:00:39.713994+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py 2024-02-27 21:02:33.780753+00:00
@@ -99,25 +99,29 @@
self.engine.get_binding_dtype(idx), Frameworks.TORCH
)
for idx in self.output_binding_indices_in_order
]
self.output_shapes = [
- tuple(self.engine.get_binding_shape(idx))
- if self.engine.has_implicit_batch_dimension
- else tuple()
+ (
+ tuple(self.engine.get_binding_shape(idx))
+ if self.engine.has_implicit_batch_dimension
+ else tuple()
+ )
for idx in self.output_binding_indices_in_order
]
self.hidden_output_dtypes = [
unified_dtype_converter(
self.engine.get_binding_dtype(idx), Frameworks.TORCH
)
for idx in self.hidden_output_binding_indices_in_order
]
self.hidden_output_shapes = [
- tuple(self.engine.get_binding_shape(idx))
- if self.engine.has_implicit_batch_dimension
- else tuple()
+ (
+ tuple(self.engine.get_binding_shape(idx))
+ if self.engine.has_implicit_batch_dimension
+ else tuple()
+ )
for idx in self.hidden_output_binding_indices_in_order
]
def _check_initialized(self) -> None:
if not self.initialized:
@@ -165,13 +169,15 @@
self.__dict__.update(state)
if self.engine:
self.context = self.engine.create_execution_context()
def forward(self, *inputs: torch.Tensor) -> torch.Tensor | Tuple[torch.Tensor, ...]:
- with torch.autograd.profiler.record_function(
- "PythonTorchTensorRTModule:Forward"
- ) if self.profiling_enabled else nullcontext():
+ with (
+ torch.autograd.profiler.record_function("PythonTorchTensorRTModule:Forward")
+ if self.profiling_enabled
+ else nullcontext()
+ ):
self._check_initialized()
# If in safe mode, check at each iteration for for whether a switch is required
if (
torch_tensorrt.runtime.multi_device_safe_mode._PY_RT_MULTI_DEVICE_SAFE_MODE
@@ -198,13 +204,17 @@
torch.cuda.set_device(device_id)
inputs = tuple([tensor.to(device) for tensor in inputs])
logger.warning(f"Moved all input Tensors to cuda:{device_id}")
- with torch.autograd.profiler.record_function(
- "PythonTorchTensorRTModule:ProcessInputs"
- ) if self.profiling_enabled else nullcontext():
+ with (
+ torch.autograd.profiler.record_function(
+ "PythonTorchTensorRTModule:ProcessInputs"
+ )
+ if self.profiling_enabled
+ else nullcontext()
+ ):
assert len(inputs) == len(
self.input_names
), f"Wrong number of inputs, expect {len(self.input_names)} get {len(inputs)}."
contiguous_inputs: List[torch.Tensor] = [i.contiguous() for i in inputs]
@@ -237,13 +247,17 @@
self.context.set_binding_shape(
idx, tuple(contiguous_inputs[i].shape)
)
- with torch.autograd.profiler.record_function(
- "PythonTorchTensorRTModule:ProcessOutputs"
- ) if self.profiling_enabled else nullcontext():
+ with (
+ torch.autograd.profiler.record_function(
+ "PythonTorchTensorRTModule:ProcessOutputs"
+ )
+ if self.profiling_enabled
+ else nullcontext()
+ ):
# create output tensors
outputs: List[torch.Tensor] = []
for i, idx in enumerate(self.output_binding_indices_in_order):
shape = tuple(self.context.get_binding_shape(idx))
@@ -264,13 +278,17 @@
dtype=self.hidden_output_dtypes[i],
device=torch.cuda.current_device(),
)
bindings[idx] = output.data_ptr()
- with torch.autograd.profiler.record_function(
- "PythonTorchTensorRTModule:TensorRTRuntime"
- ) if self.profiling_enabled else nullcontext():
+ with (
+ torch.autograd.profiler.record_function(
+ "PythonTorchTensorRTModule:TensorRTRuntime"
+ )
+ if self.profiling_enabled
+ else nullcontext()
+ ):
self.context.execute_async_v2(
bindings, torch.cuda.current_stream().cuda_stream
)
if len(outputs) == 1:
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/converters/aten_ops_converters.py 2024-02-27 21:00:39.717994+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/converters/aten_ops_converters.py 2024-02-27 21:02:34.081156+00:00
@@ -315,25 +315,21 @@
name: str,
) -> Union[TRTTensor, Sequence[TRTTensor]]:
kwargs_new = {
"input": args[0],
"kernel_size": args[1],
- "stride": args[2]
- if len(args) > 2
- else (None, None)
- if len(args[1]) == 2
- else (None, None, None),
- "padding": args[3]
- if len(args) > 3
- else (0, 0)
- if len(args[1]) == 2
- else (0, 0, 0),
- "dilation": args[4]
- if len(args) > 4
- else (1, 1)
- if len(args[1]) == 2
- else (1, 1, 1),
+ "stride": (
+ args[2]
+ if len(args) > 2
+ else (None, None) if len(args[1]) == 2 else (None, None, None)
+ ),
+ "padding": (
+ args[3] if len(args) > 3 else (0, 0) if len(args[1]) == 2 else (0, 0, 0)
+ ),
+ "dilation": (
+ args[4] if len(args) > 4 else (1, 1) if len(args[1]) == 2 else (1, 1, 1)
+ ),
"ceil_mode": args[5] if len(args) > 5 else False,
}
return acc_ops_converters.acc_ops_max_poolnd(
network, target, None, kwargs_new, name
)
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/lower.py 2024-02-27 21:00:39.717994+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/lower.py 2024-02-27 21:02:34.128486+00:00
@@ -124,25 +124,29 @@
interpreter = TRTInterpreter(
mod,
input_specs=self.lower_setting.input_specs,
explicit_batch_dimension=self.lower_setting.explicit_batch_dimension,
explicit_precision=self.lower_setting.explicit_precision,
- logger_level=trt.Logger.VERBOSE
- if self.lower_setting.verbose_log
- else trt.Logger.WARNING,
+ logger_level=(
+ trt.Logger.VERBOSE
+ if self.lower_setting.verbose_log
+ else trt.Logger.WARNING
+ ),
)
interp_result: TRTInterpreterResult = interpreter.run(
max_batch_size=self.lower_setting.max_batch_size,
max_workspace_size=self.lower_setting.max_workspace_size,
lower_precision=self.lower_setting.lower_precision,
strict_type_constraints=self.lower_setting.strict_type_constraints,
algorithm_selector=algo_selector,
timing_cache=cache_data,
- profiling_verbosity=trt.ProfilingVerbosity.DETAILED
- if self.lower_setting.verbose_profile
- else trt.ProfilingVerbosity.LAYER_NAMES_ONLY,
+ profiling_verbosity=(
+ trt.ProfilingVerbosity.DETAILED
+ if self.lower_setting.verbose_profile
+ else trt.ProfilingVerbosity.LAYER_NAMES_ONLY
+ ),
tactic_sources=self.lower_setting.tactic_sources,
)
# Update timing cache file if needed
timing_cache = interp_result.serialized_cache
@@ -295,14 +299,12 @@
module.half()
# A custom conversion function can be passed to the lowerer to
# handle inputs with custom types. By default, just handle
# tensors and NoneType.
if fp16_conversion_fn is None:
- conversion_fn = (
- lambda x: x.half()
- if x is not None and x.dtype == torch.float32
- else x
+ conversion_fn = lambda x: (
+ x.half() if x is not None and x.dtype == torch.float32 else x
)
else:
conversion_fn = fp16_conversion_fn
inputs = tuple(conversion_fn(x) for x in inputs)
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/fx2trt.py 2024-02-27 21:00:39.717994+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/fx2trt.py 2024-02-27 21:02:34.129471+00:00
@@ -19,13 +19,13 @@
from .observer import Observer
from .utils import get_dynamic_dims, LowerPrecision, unified_dtype_converter, Frameworks
_LOGGER: logging.Logger = logging.getLogger(__name__)
-TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[
- Callable[[torch.fx.GraphModule], None]
-] = Observer("TRT_INTERPRETER_CALL_PRE_OBSERVER")
+TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[Callable[[torch.fx.GraphModule], None]] = (
+ Observer("TRT_INTERPRETER_CALL_PRE_OBSERVER")
+)
class TRTInterpreterResult(NamedTuple):
engine: Any
input_names: Sequence[str]
@@ -73,13 +73,13 @@
self.input_specs_iter = 0
self.validate_input_specs()
self._cur_node_name: Optional[str] = None
self._input_names: List[str] = []
self._output_names: List[str] = []
- self._itensor_to_tensor_meta: Dict[
- trt.tensorrt.ITensor, TensorMetadata
- ] = dict()
+ self._itensor_to_tensor_meta: Dict[trt.tensorrt.ITensor, TensorMetadata] = (
+ dict()
+ )
def validate_input_specs(self):
for shape, _, _, shape_ranges, has_batch_dim in self.input_specs:
if not self.network.has_implicit_batch_dimension:
assert (
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/lower_pass_manager_builder.py 2024-02-27 21:00:39.717994+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/lower_pass_manager_builder.py 2024-02-27 21:02:34.353937+00:00
@@ -194,13 +194,15 @@
lowering_start_time = datetime.datetime.now()
self.lower_setting.input_specs = generate_input_specs(
submod_inputs,
self.lower_setting,
- additional_submodule_inputs[submod_name]
- if additional_submodule_inputs
- else None,
+ (
+ additional_submodule_inputs[submod_name]
+ if additional_submodule_inputs
+ else None
+ ),
)
lowered_module = self._lower_func(
submod, submod_inputs, self.lower_setting, submod_name
)
setattr(split_result.split_module, submod_name, lowered_module)
@@ -234,13 +236,15 @@
if not submod_name.startswith(split_result.non_acc_submodule_prefix):
_LOGGER.info(f"ACC submodule graph: {submod.graph}")
lowering_start_time = datetime.datetime.now()
self.lower_setting.additional_inputs = (
- additional_submodule_inputs[submod_name]
- if additional_submodule_inputs
- else None,
+ (
+ additional_submodule_inputs[submod_name]
+ if additional_submodule_inputs
+ else None
+ ),
)
lowered_module = self._lower_func(
submod, submod_inputs, self.lower_setting, submod_name
)
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/pass_utils.py 2024-02-27 21:00:39.717994+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/pass_utils.py 2024-02-27 21:02:34.560428+00:00
@@ -193,13 +193,11 @@
kwargs2 = {"equal_nan": True}
if rtol:
kwargs2["rtol"] = rtol
if atol:
kwargs2["atol"] = atol
- kwargs2[
- "msg"
- ] = (
+ kwargs2["msg"] = (
lambda msg: f"Pass {pass_} failed correctness check due at output {kk}:\n{msg}"
)
# If tensors are on different devices, make sure to compare
# their copies that are on the same device.
if x.get_device() != y.get_device():
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/lower_basic_pass.py 2024-02-27 21:00:39.717994+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/lower_basic_pass.py 2024-02-27 21:02:34.635790+00:00
@@ -536,13 +536,13 @@
reshape_batch_size: Optional[fx.Node] = get_reshape_batch_size_as_node(
maybe_reshape
)
if not reshape_batch_size:
continue
- reshape_batch_size_inferred_source: Optional[
- fx.Node
- ] = get_reshape_batch_size_inferred_source(reshape_batch_size)
+ reshape_batch_size_inferred_source: Optional[fx.Node] = (
+ get_reshape_batch_size_inferred_source(reshape_batch_size)
+ )
if not reshape_batch_size_inferred_source:
continue
reshape_input: fx.Node = maybe_reshape.kwargs["input"]
if reshape_input == reshape_batch_size_inferred_source:
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/test/converters/acc_op/test_split.py 2024-02-27 21:00:39.721994+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/test/converters/acc_op/test_split.py 2024-02-27 21:02:35.035019+00:00
@@ -21,13 +21,15 @@
inputs = [torch.randn(1, 10)]
self.run_test(
Split(),
inputs,
expected_ops={
- acc_ops.split
- if isinstance(split_size_or_sections, int)
- else acc_ops.slice_tensor
+ (
+ acc_ops.split
+ if isinstance(split_size_or_sections, int)
+ else acc_ops.slice_tensor
+ )
},
test_explicit_batch_dim=False,
)
@parameterized.expand(
@@ -68,13 +70,15 @@
]
self.run_test_with_dynamic_shape(
Split(),
input_specs,
expected_ops={
- acc_ops.split
- if isinstance(split_size_or_sections, int)
- else acc_ops.slice_tensor
+ (
+ acc_ops.split
+ if isinstance(split_size_or_sections, int)
+ else acc_ops.slice_tensor
+ )
},
)
# Testing with (-1, -1, -1) results into following error:
# AssertionError: Can't chunk on dynamic shape dimension!
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/tools/common_fx2trt.py 2024-02-27 21:00:39.725994+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/tools/common_fx2trt.py 2024-02-27 21:02:35.708714+00:00
@@ -152,13 +152,13 @@
mod.eval()
if len(expected_ops):
self.assert_has_op(mod, expected_ops)
interpreter_result = interpreter.run(
- lower_precision=LowerPrecision.FP16
- if fp16_mode
- else LowerPrecision.FP32
+ lower_precision=(
+ LowerPrecision.FP16 if fp16_mode else LowerPrecision.FP32
+ )
)
trt_mod = TRTModule(
interpreter_result.engine,
interpreter_result.input_names,
interpreter_result.output_names,
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/trt_module.py 2024-02-27 21:00:39.725994+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/trt_module.py 2024-02-27 21:02:36.043493+00:00
@@ -67,25 +67,29 @@
self.engine.get_binding_dtype(idx), Frameworks.TORCH
)
for idx in self.output_binding_indices_in_order
]
self.output_shapes = [
- tuple(self.engine.get_binding_shape(idx))
- if self.engine.has_implicit_batch_dimension
- else tuple()
+ (
+ tuple(self.engine.get_binding_shape(idx))
+ if self.engine.has_implicit_batch_dimension
+ else tuple()
+ )
for idx in self.output_binding_indices_in_order
]
self.hidden_output_dtypes: Sequence[torch.dtype] = [
unified_dtype_converter(
self.engine.get_binding_dtype(idx), Frameworks.TORCH
)
for idx in self.hidden_output_binding_indices_in_order
]
self.hidden_output_shapes = [
- tuple(self.engine.get_binding_shape(idx))
- if self.engine.has_implicit_batch_dimension
- else tuple()
+ (
+ tuple(self.engine.get_binding_shape(idx))
+ if self.engine.has_implicit_batch_dimension
+ else tuple()
+ )
for idx in self.hidden_output_binding_indices_in_order
]
def _check_initialized(self):
if not self.initialized:
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/ts/_compile_spec.py 2024-02-27 21:00:39.725994+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/ts/_compile_spec.py 2024-02-27 21:02:36.403250+00:00
@@ -404,13 +404,13 @@
"inputs": inputs if inputs is not None else [],
# "input_signature": input_signature,
"device": device,
"disable_tf32": disable_tf32, # Force FP32 layers to use traditional as FP32 format vs the default behavior of rounding the inputs to 10-bit mantissas before multiplying, but accumulates the sum using 23-bit mantissas
"sparse_weights": sparse_weights, # Enable sparsity for convolution and fully connected layers.
- "enabled_precisions": enabled_precisions
- if enabled_precisions is not None
- else set(), # Enabling FP16 kernels
+ "enabled_precisions": (
+ enabled_precisions if enabled_precisions is not None else set()
+ ), # Enabling FP16 kernels
"refit": refit, # enable refit
"debug": debug, # enable debuggable engine
"capability": capability, # Restrict kernel selection to safe gpu kernels or safe dla kernels
"num_avg_timing_iters": num_avg_timing_iters, # Number of averaging timing iterations used to select kernels
"workspace_size": workspace_size, # Maximum size of workspace given to TensorRT
--- /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/conversion/test_scatter_aten.py 2024-02-27 21:00:39.737994+00:00
+++ /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/conversion/test_scatter_aten.py 2024-02-27 21:02:36.819871+00:00
@@ -21,11 +21,11 @@
super().__init__()
def forward(self, input, src):
return torch.ops.aten.scatter.value(input, dim, index, value)
- input = [torch.zeros(3, 5, dtype = torch.int32)]
+ input = [torch.zeros(3, 5, dtype=torch.int32)]
self.run_test(
TestModule(),
input,
)
@@ -44,16 +44,13 @@
def __init__(self):
super().__init__()
def forward(self, input, src):
return torch.ops.aten.scatter.src(input, dim, index, src)
-
- src = [torch.arange(1, 11).reshape((2,5))]
- input = torch.zeros(3, 5, dtype = src.dtype)
+
+ src = [torch.arange(1, 11).reshape((2, 5))]
+ input = torch.zeros(3, 5, dtype=src.dtype)
inputs = [input, src]
self.run_test(
TestModule(),
inputs,
)
-
-
-
\ No newline at end of file
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are some changes that do not conform to Python style guidelines:
--- /home/runner/work/TensorRT/TensorRT/examples/int8/training/vgg16/vgg16.py 2024-02-27 21:49:12.401933+00:00
+++ /home/runner/work/TensorRT/TensorRT/examples/int8/training/vgg16/vgg16.py 2024-02-27 21:51:06.069062+00:00
@@ -1,10 +1,11 @@
"""
# Reference
- [Very Deep Convolutional Networks for Large-Scale Image Recognition](
https://arxiv.org/abs/1409.1556) (ICLR 2015)
"""
+
import torch
import torch.nn as nn
import torch.nn.functional as F
from functools import reduce
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/_Device.py 2024-02-27 21:49:12.405933+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/_Device.py 2024-02-27 21:51:06.177336+00:00
@@ -30,16 +30,18 @@
gpu_id (int): Device ID for target GPU
dla_core (int): Core ID for target DLA core
allow_gpu_fallback (bool): Whether falling back to GPU if DLA cannot support an op should be allowed
"""
- device_type: Optional[
- trt.DeviceType
- ] = None #: Target device type (GPU or DLA). Set implicitly based on if dla_core is specified.
+ device_type: Optional[trt.DeviceType] = (
+ None #: Target device type (GPU or DLA). Set implicitly based on if dla_core is specified.
+ )
gpu_id: int = -1 #: Device ID for target GPU
dla_core: int = -1 #: Core ID for target DLA core
- allow_gpu_fallback: bool = False #: Whether falling back to GPU if DLA cannot support an op should be allowed
+ allow_gpu_fallback: bool = (
+ False #: Whether falling back to GPU if DLA cannot support an op should be allowed
+ )
def __init__(self, *args: Any, **kwargs: Any):
"""__init__ Method for torch_tensorrt.Device
Device accepts one of a few construction patterns
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/_Input.py 2024-02-27 21:49:12.405933+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/_Input.py 2024-02-27 21:51:06.373382+00:00
@@ -26,16 +26,16 @@
class _ShapeMode(Enum):
STATIC = 0
DYNAMIC = 1
- shape_mode: Optional[
- _ShapeMode
- ] = None #: Is input statically or dynamically shaped
- shape: Optional[
- Tuple[int, ...] | Dict[str, Tuple[int, ...]]
- ] = None #: Either a single Tuple or a dict of tuples defining the input shape. Static shaped inputs will have a single tuple. Dynamic inputs will have a dict of the form ``{ "min_shape": Tuple, "opt_shape": Tuple, "max_shape": Tuple }``
+ shape_mode: Optional[_ShapeMode] = (
+ None #: Is input statically or dynamically shaped
+ )
+ shape: Optional[Tuple[int, ...] | Dict[str, Tuple[int, ...]]] = (
+ None #: Either a single Tuple or a dict of tuples defining the input shape. Static shaped inputs will have a single tuple. Dynamic inputs will have a dict of the form ``{ "min_shape": Tuple, "opt_shape": Tuple, "max_shape": Tuple }``
+ )
dtype: _enums.dtype = (
_enums.dtype.unknown
) #: The expected data type of the input tensor (default: torch_tensorrt.dtype.float32)
_explicit_set_dtype: bool = False
format: _enums.TensorFormat = (
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_compiler.py 2024-02-27 21:49:12.409933+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_compiler.py 2024-02-27 21:51:06.495781+00:00
@@ -215,13 +215,13 @@
"precision": precision,
"debug": debug,
"device": device,
"workspace_size": workspace_size,
"min_block_size": min_block_size,
- "torch_executed_ops": torch_executed_ops
- if torch_executed_ops is not None
- else set(),
+ "torch_executed_ops": (
+ torch_executed_ops if torch_executed_ops is not None else set()
+ ),
"pass_through_build_failures": pass_through_build_failures,
"max_aux_streams": max_aux_streams,
"version_compatible": version_compatible,
"optimization_level": optimization_level,
"use_python_runtime": use_python_runtime,
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py 2024-02-27 21:49:12.409933+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py 2024-02-27 21:51:06.618377+00:00
@@ -26,13 +26,13 @@
from packaging import version
_LOGGER: logging.Logger = logging.getLogger(__name__)
-TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[
- Callable[[torch.fx.GraphModule], None]
-] = Observer("TRT_INTERPRETER_CALL_PRE_OBSERVER")
+TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[Callable[[torch.fx.GraphModule], None]] = (
+ Observer("TRT_INTERPRETER_CALL_PRE_OBSERVER")
+)
class UnsupportedOperatorException(RuntimeError):
pass
@@ -90,13 +90,13 @@
self.input_specs_iter = 0
self._cur_node_name: Optional[str] = None
self._cur_node: Optional[torch.fx.Node] = None
self._input_names: List[str] = []
self._output_names: List[str] = []
- self._itensor_to_tensor_meta: Dict[
- trt.tensorrt.ITensor, TensorMetadata
- ] = dict()
+ self._itensor_to_tensor_meta: Dict[trt.tensorrt.ITensor, TensorMetadata] = (
+ dict()
+ )
self.compilation_settings = compilation_settings
# Data types for TRT Module output Tensors
self.output_dtypes = output_dtypes
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/converter_utils.py 2024-02-27 21:49:12.409933+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/converter_utils.py 2024-02-27 21:51:06.708526+00:00
@@ -322,17 +322,15 @@
else:
raise AssertionError(f"Cannot convert {input_val} to TRT constant")
@overload
-def get_positive_dim(dim: int, dim_size: int) -> int:
- ...
+def get_positive_dim(dim: int, dim_size: int) -> int: ...
@overload
-def get_positive_dim(dim: Sequence[int], dim_size: int) -> Tuple[int, ...]:
- ...
+def get_positive_dim(dim: Sequence[int], dim_size: int) -> Tuple[int, ...]: ...
def get_positive_dim(
dim: Union[int, Sequence[int]], dim_size: int
) -> Union[int, Tuple[int, ...]]:
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/lower_linear.py 2024-02-27 21:49:12.413933+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/lower_linear.py 2024-02-27 21:51:07.085443+00:00
@@ -20,16 +20,14 @@
logger.debug(f"Graph after lowering linear:\n{gm.graph}")
return gm
-def linear_replacement() -> (
- Tuple[
- torch.fx.GraphModule,
- Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor],
- ]
-):
+def linear_replacement() -> Tuple[
+ torch.fx.GraphModule,
+ Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor],
+]:
"""Constructs the original and replacement functions for linear"""
# Original graph
def orig(
input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/_decomposition_groups.py 2024-02-27 21:49:12.413933+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/_decomposition_groups.py 2024-02-27 21:51:07.086691+00:00
@@ -5,13 +5,13 @@
from torch._decomp import get_decompositions as get_torch_decompositions
from torch._ops import OpOverload, OpOverloadPacket
aten = torch.ops.aten
-_core_aten_decompositions: Dict[
- OpOverload, Callable[[Any], Any]
-] = core_aten_decompositions()
+_core_aten_decompositions: Dict[OpOverload, Callable[[Any], Any]] = (
+ core_aten_decompositions()
+)
torch_enabled_decompositions: Set[Union[OpOverload, OpOverloadPacket]] = {
aten._adaptive_avg_pool2d_backward,
aten.addcdiv,
aten.addcdiv_,
aten.addcmul,
@@ -178,13 +178,13 @@
torch_disabled_decompositions: Set[Union[OpOverload, OpOverloadPacket]] = {
aten._softmax.default,
}
-ENABLED_TORCH_DECOMPOSITIONS: Dict[
- OpOverload, Callable[[Any], Any]
-] = get_torch_decompositions(torch_enabled_decompositions)
+ENABLED_TORCH_DECOMPOSITIONS: Dict[OpOverload, Callable[[Any], Any]] = (
+ get_torch_decompositions(torch_enabled_decompositions)
+)
TORCH_TRT_DECOMPOSITIONS: Dict[OpOverload, Callable[[Any], Any]] = {}
def check_decomp_set_invariants() -> None:
"""Validates no overlap between enabled and disabled decomposition sets"""
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/view_to_reshape.py 2024-02-27 21:49:12.413933+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/view_to_reshape.py 2024-02-27 21:51:07.118147+00:00
@@ -20,16 +20,14 @@
logger.debug(f"Graph after replacing view with reshape:\n{gm.graph}")
return gm
-def view_replacement() -> (
- Tuple[
- torch.fx.GraphModule,
- Callable[[torch.Tensor, List[torch.SymInt]], torch.Tensor],
- ]
-):
+def view_replacement() -> Tuple[
+ torch.fx.GraphModule,
+ Callable[[torch.Tensor, List[torch.SymInt]], torch.Tensor],
+]:
"""Constructs the original and replacement functions for view"""
# Original graph
def orig(input: torch.Tensor, shape: List[torch.SymInt]) -> torch.Tensor:
return torch.ops.aten.view.default(input, shape)
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/lower_scaled_dot_product_attention.py 2024-02-27 21:49:12.413933+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/lower_scaled_dot_product_attention.py 2024-02-27 21:51:07.147012+00:00
@@ -58,16 +58,14 @@
logger.debug(f"Graph after lowering scaled dot product attention:\n{gm.graph}")
return gm
-def scaled_dot_product_attention_replacement() -> (
- Tuple[
- Sequence[Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor]],
- Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor],
- ]
-):
+def scaled_dot_product_attention_replacement() -> Tuple[
+ Sequence[Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor]],
+ Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor],
+]:
"""Constructs the original and replacement functions for efficient attention"""
# Efficient Attention original graph
def efficient(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor) -> torch.Tensor:
outputs = torch.ops.aten._scaled_dot_product_efficient_attention.default(
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py 2024-02-27 21:49:12.413933+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py 2024-02-27 21:51:07.444589+00:00
@@ -99,25 +99,29 @@
self.engine.get_binding_dtype(idx), Frameworks.TORCH
)
for idx in self.output_binding_indices_in_order
]
self.output_shapes = [
- tuple(self.engine.get_binding_shape(idx))
- if self.engine.has_implicit_batch_dimension
- else tuple()
+ (
+ tuple(self.engine.get_binding_shape(idx))
+ if self.engine.has_implicit_batch_dimension
+ else tuple()
+ )
for idx in self.output_binding_indices_in_order
]
self.hidden_output_dtypes = [
unified_dtype_converter(
self.engine.get_binding_dtype(idx), Frameworks.TORCH
)
for idx in self.hidden_output_binding_indices_in_order
]
self.hidden_output_shapes = [
- tuple(self.engine.get_binding_shape(idx))
- if self.engine.has_implicit_batch_dimension
- else tuple()
+ (
+ tuple(self.engine.get_binding_shape(idx))
+ if self.engine.has_implicit_batch_dimension
+ else tuple()
+ )
for idx in self.hidden_output_binding_indices_in_order
]
def _check_initialized(self) -> None:
if not self.initialized:
@@ -165,13 +169,15 @@
self.__dict__.update(state)
if self.engine:
self.context = self.engine.create_execution_context()
def forward(self, *inputs: torch.Tensor) -> torch.Tensor | Tuple[torch.Tensor, ...]:
- with torch.autograd.profiler.record_function(
- "PythonTorchTensorRTModule:Forward"
- ) if self.profiling_enabled else nullcontext():
+ with (
+ torch.autograd.profiler.record_function("PythonTorchTensorRTModule:Forward")
+ if self.profiling_enabled
+ else nullcontext()
+ ):
self._check_initialized()
# If in safe mode, check at each iteration for for whether a switch is required
if (
torch_tensorrt.runtime.multi_device_safe_mode._PY_RT_MULTI_DEVICE_SAFE_MODE
@@ -198,13 +204,17 @@
torch.cuda.set_device(device_id)
inputs = tuple([tensor.to(device) for tensor in inputs])
logger.warning(f"Moved all input Tensors to cuda:{device_id}")
- with torch.autograd.profiler.record_function(
- "PythonTorchTensorRTModule:ProcessInputs"
- ) if self.profiling_enabled else nullcontext():
+ with (
+ torch.autograd.profiler.record_function(
+ "PythonTorchTensorRTModule:ProcessInputs"
+ )
+ if self.profiling_enabled
+ else nullcontext()
+ ):
assert len(inputs) == len(
self.input_names
), f"Wrong number of inputs, expect {len(self.input_names)} get {len(inputs)}."
contiguous_inputs: List[torch.Tensor] = [i.contiguous() for i in inputs]
@@ -237,13 +247,17 @@
self.context.set_binding_shape(
idx, tuple(contiguous_inputs[i].shape)
)
- with torch.autograd.profiler.record_function(
- "PythonTorchTensorRTModule:ProcessOutputs"
- ) if self.profiling_enabled else nullcontext():
+ with (
+ torch.autograd.profiler.record_function(
+ "PythonTorchTensorRTModule:ProcessOutputs"
+ )
+ if self.profiling_enabled
+ else nullcontext()
+ ):
# create output tensors
outputs: List[torch.Tensor] = []
for i, idx in enumerate(self.output_binding_indices_in_order):
shape = tuple(self.context.get_binding_shape(idx))
@@ -264,13 +278,17 @@
dtype=self.hidden_output_dtypes[i],
device=torch.cuda.current_device(),
)
bindings[idx] = output.data_ptr()
- with torch.autograd.profiler.record_function(
- "PythonTorchTensorRTModule:TensorRTRuntime"
- ) if self.profiling_enabled else nullcontext():
+ with (
+ torch.autograd.profiler.record_function(
+ "PythonTorchTensorRTModule:TensorRTRuntime"
+ )
+ if self.profiling_enabled
+ else nullcontext()
+ ):
self.context.execute_async_v2(
bindings, torch.cuda.current_stream().cuda_stream
)
if len(outputs) == 1:
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/converters/aten_ops_converters.py 2024-02-27 21:49:12.413933+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/converters/aten_ops_converters.py 2024-02-27 21:51:07.697063+00:00
@@ -315,25 +315,21 @@
name: str,
) -> Union[TRTTensor, Sequence[TRTTensor]]:
kwargs_new = {
"input": args[0],
"kernel_size": args[1],
- "stride": args[2]
- if len(args) > 2
- else (None, None)
- if len(args[1]) == 2
- else (None, None, None),
- "padding": args[3]
- if len(args) > 3
- else (0, 0)
- if len(args[1]) == 2
- else (0, 0, 0),
- "dilation": args[4]
- if len(args) > 4
- else (1, 1)
- if len(args[1]) == 2
- else (1, 1, 1),
+ "stride": (
+ args[2]
+ if len(args) > 2
+ else (None, None) if len(args[1]) == 2 else (None, None, None)
+ ),
+ "padding": (
+ args[3] if len(args) > 3 else (0, 0) if len(args[1]) == 2 else (0, 0, 0)
+ ),
+ "dilation": (
+ args[4] if len(args) > 4 else (1, 1) if len(args[1]) == 2 else (1, 1, 1)
+ ),
"ceil_mode": args[5] if len(args) > 5 else False,
}
return acc_ops_converters.acc_ops_max_poolnd(
network, target, None, kwargs_new, name
)
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/lower.py 2024-02-27 21:49:12.413933+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/lower.py 2024-02-27 21:51:07.821808+00:00
@@ -124,25 +124,29 @@
interpreter = TRTInterpreter(
mod,
input_specs=self.lower_setting.input_specs,
explicit_batch_dimension=self.lower_setting.explicit_batch_dimension,
explicit_precision=self.lower_setting.explicit_precision,
- logger_level=trt.Logger.VERBOSE
- if self.lower_setting.verbose_log
- else trt.Logger.WARNING,
+ logger_level=(
+ trt.Logger.VERBOSE
+ if self.lower_setting.verbose_log
+ else trt.Logger.WARNING
+ ),
)
interp_result: TRTInterpreterResult = interpreter.run(
max_batch_size=self.lower_setting.max_batch_size,
max_workspace_size=self.lower_setting.max_workspace_size,
lower_precision=self.lower_setting.lower_precision,
strict_type_constraints=self.lower_setting.strict_type_constraints,
algorithm_selector=algo_selector,
timing_cache=cache_data,
- profiling_verbosity=trt.ProfilingVerbosity.DETAILED
- if self.lower_setting.verbose_profile
- else trt.ProfilingVerbosity.LAYER_NAMES_ONLY,
+ profiling_verbosity=(
+ trt.ProfilingVerbosity.DETAILED
+ if self.lower_setting.verbose_profile
+ else trt.ProfilingVerbosity.LAYER_NAMES_ONLY
+ ),
tactic_sources=self.lower_setting.tactic_sources,
)
# Update timing cache file if needed
timing_cache = interp_result.serialized_cache
@@ -295,14 +299,12 @@
module.half()
# A custom conversion function can be passed to the lowerer to
# handle inputs with custom types. By default, just handle
# tensors and NoneType.
if fp16_conversion_fn is None:
- conversion_fn = (
- lambda x: x.half()
- if x is not None and x.dtype == torch.float32
- else x
+ conversion_fn = lambda x: (
+ x.half() if x is not None and x.dtype == torch.float32 else x
)
else:
conversion_fn = fp16_conversion_fn
inputs = tuple(conversion_fn(x) for x in inputs)
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/fx2trt.py 2024-02-27 21:49:12.413933+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/fx2trt.py 2024-02-27 21:51:07.822625+00:00
@@ -19,13 +19,13 @@
from .observer import Observer
from .utils import get_dynamic_dims, LowerPrecision, unified_dtype_converter, Frameworks
_LOGGER: logging.Logger = logging.getLogger(__name__)
-TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[
- Callable[[torch.fx.GraphModule], None]
-] = Observer("TRT_INTERPRETER_CALL_PRE_OBSERVER")
+TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[Callable[[torch.fx.GraphModule], None]] = (
+ Observer("TRT_INTERPRETER_CALL_PRE_OBSERVER")
+)
class TRTInterpreterResult(NamedTuple):
engine: Any
input_names: Sequence[str]
@@ -73,13 +73,13 @@
self.input_specs_iter = 0
self.validate_input_specs()
self._cur_node_name: Optional[str] = None
self._input_names: List[str] = []
self._output_names: List[str] = []
- self._itensor_to_tensor_meta: Dict[
- trt.tensorrt.ITensor, TensorMetadata
- ] = dict()
+ self._itensor_to_tensor_meta: Dict[trt.tensorrt.ITensor, TensorMetadata] = (
+ dict()
+ )
def validate_input_specs(self):
for shape, _, _, shape_ranges, has_batch_dim in self.input_specs:
if not self.network.has_implicit_batch_dimension:
assert (
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/lower_pass_manager_builder.py 2024-02-27 21:49:12.413933+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/lower_pass_manager_builder.py 2024-02-27 21:51:08.048521+00:00
@@ -194,13 +194,15 @@
lowering_start_time = datetime.datetime.now()
self.lower_setting.input_specs = generate_input_specs(
submod_inputs,
self.lower_setting,
- additional_submodule_inputs[submod_name]
- if additional_submodule_inputs
- else None,
+ (
+ additional_submodule_inputs[submod_name]
+ if additional_submodule_inputs
+ else None
+ ),
)
lowered_module = self._lower_func(
submod, submod_inputs, self.lower_setting, submod_name
)
setattr(split_result.split_module, submod_name, lowered_module)
@@ -234,13 +236,15 @@
if not submod_name.startswith(split_result.non_acc_submodule_prefix):
_LOGGER.info(f"ACC submodule graph: {submod.graph}")
lowering_start_time = datetime.datetime.now()
self.lower_setting.additional_inputs = (
- additional_submodule_inputs[submod_name]
- if additional_submodule_inputs
- else None,
+ (
+ additional_submodule_inputs[submod_name]
+ if additional_submodule_inputs
+ else None
+ ),
)
lowered_module = self._lower_func(
submod, submod_inputs, self.lower_setting, submod_name
)
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/pass_utils.py 2024-02-27 21:49:12.413933+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/pass_utils.py 2024-02-27 21:51:08.267158+00:00
@@ -193,13 +193,11 @@
kwargs2 = {"equal_nan": True}
if rtol:
kwargs2["rtol"] = rtol
if atol:
kwargs2["atol"] = atol
- kwargs2[
- "msg"
- ] = (
+ kwargs2["msg"] = (
lambda msg: f"Pass {pass_} failed correctness check due at output {kk}:\n{msg}"
)
# If tensors are on different devices, make sure to compare
# their copies that are on the same device.
if x.get_device() != y.get_device():
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/lower_basic_pass.py 2024-02-27 21:49:12.413933+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/lower_basic_pass.py 2024-02-27 21:51:08.273854+00:00
@@ -536,13 +536,13 @@
reshape_batch_size: Optional[fx.Node] = get_reshape_batch_size_as_node(
maybe_reshape
)
if not reshape_batch_size:
continue
- reshape_batch_size_inferred_source: Optional[
- fx.Node
- ] = get_reshape_batch_size_inferred_source(reshape_batch_size)
+ reshape_batch_size_inferred_source: Optional[fx.Node] = (
+ get_reshape_batch_size_inferred_source(reshape_batch_size)
+ )
if not reshape_batch_size_inferred_source:
continue
reshape_input: fx.Node = maybe_reshape.kwargs["input"]
if reshape_input == reshape_batch_size_inferred_source:
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/test/converters/acc_op/test_split.py 2024-02-27 21:49:12.417933+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/test/converters/acc_op/test_split.py 2024-02-27 21:51:08.726405+00:00
@@ -21,13 +21,15 @@
inputs = [torch.randn(1, 10)]
self.run_test(
Split(),
inputs,
expected_ops={
- acc_ops.split
- if isinstance(split_size_or_sections, int)
- else acc_ops.slice_tensor
+ (
+ acc_ops.split
+ if isinstance(split_size_or_sections, int)
+ else acc_ops.slice_tensor
+ )
},
test_explicit_batch_dim=False,
)
@parameterized.expand(
@@ -68,13 +70,15 @@
]
self.run_test_with_dynamic_shape(
Split(),
input_specs,
expected_ops={
- acc_ops.split
- if isinstance(split_size_or_sections, int)
- else acc_ops.slice_tensor
+ (
+ acc_ops.split
+ if isinstance(split_size_or_sections, int)
+ else acc_ops.slice_tensor
+ )
},
)
# Testing with (-1, -1, -1) results into following error:
# AssertionError: Can't chunk on dynamic shape dimension!
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/tools/common_fx2trt.py 2024-02-27 21:49:12.421933+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/tools/common_fx2trt.py 2024-02-27 21:51:09.459775+00:00
@@ -152,13 +152,13 @@
mod.eval()
if len(expected_ops):
self.assert_has_op(mod, expected_ops)
interpreter_result = interpreter.run(
- lower_precision=LowerPrecision.FP16
- if fp16_mode
- else LowerPrecision.FP32
+ lower_precision=(
+ LowerPrecision.FP16 if fp16_mode else LowerPrecision.FP32
+ )
)
trt_mod = TRTModule(
interpreter_result.engine,
interpreter_result.input_names,
interpreter_result.output_names,
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/trt_module.py 2024-02-27 21:49:12.421933+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/trt_module.py 2024-02-27 21:51:09.775898+00:00
@@ -67,25 +67,29 @@
self.engine.get_binding_dtype(idx), Frameworks.TORCH
)
for idx in self.output_binding_indices_in_order
]
self.output_shapes = [
- tuple(self.engine.get_binding_shape(idx))
- if self.engine.has_implicit_batch_dimension
- else tuple()
+ (
+ tuple(self.engine.get_binding_shape(idx))
+ if self.engine.has_implicit_batch_dimension
+ else tuple()
+ )
for idx in self.output_binding_indices_in_order
]
self.hidden_output_dtypes: Sequence[torch.dtype] = [
unified_dtype_converter(
self.engine.get_binding_dtype(idx), Frameworks.TORCH
)
for idx in self.hidden_output_binding_indices_in_order
]
self.hidden_output_shapes = [
- tuple(self.engine.get_binding_shape(idx))
- if self.engine.has_implicit_batch_dimension
- else tuple()
+ (
+ tuple(self.engine.get_binding_shape(idx))
+ if self.engine.has_implicit_batch_dimension
+ else tuple()
+ )
for idx in self.hidden_output_binding_indices_in_order
]
def _check_initialized(self):
if not self.initialized:
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/ts/_compile_spec.py 2024-02-27 21:49:12.421933+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/ts/_compile_spec.py 2024-02-27 21:51:10.134843+00:00
@@ -404,13 +404,13 @@
"inputs": inputs if inputs is not None else [],
# "input_signature": input_signature,
"device": device,
"disable_tf32": disable_tf32, # Force FP32 layers to use traditional as FP32 format vs the default behavior of rounding the inputs to 10-bit mantissas before multiplying, but accumulates the sum using 23-bit mantissas
"sparse_weights": sparse_weights, # Enable sparsity for convolution and fully connected layers.
- "enabled_precisions": enabled_precisions
- if enabled_precisions is not None
- else set(), # Enabling FP16 kernels
+ "enabled_precisions": (
+ enabled_precisions if enabled_precisions is not None else set()
+ ), # Enabling FP16 kernels
"refit": refit, # enable refit
"debug": debug, # enable debuggable engine
"capability": capability, # Restrict kernel selection to safe gpu kernels or safe dla kernels
"num_avg_timing_iters": num_avg_timing_iters, # Number of averaging timing iterations used to select kernels
"workspace_size": workspace_size, # Maximum size of workspace given to TensorRT
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are some changes that do not conform to Python style guidelines:
--- /home/runner/work/TensorRT/TensorRT/examples/int8/training/vgg16/vgg16.py 2024-03-06 09:51:06.918534+00:00
+++ /home/runner/work/TensorRT/TensorRT/examples/int8/training/vgg16/vgg16.py 2024-03-06 09:53:00.667566+00:00
@@ -1,10 +1,11 @@
"""
# Reference
- [Very Deep Convolutional Networks for Large-Scale Image Recognition](
https://arxiv.org/abs/1409.1556) (ICLR 2015)
"""
+
import torch
import torch.nn as nn
import torch.nn.functional as F
from functools import reduce
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/_Device.py 2024-03-06 09:51:06.922534+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/_Device.py 2024-03-06 09:53:00.770537+00:00
@@ -30,16 +30,18 @@
gpu_id (int): Device ID for target GPU
dla_core (int): Core ID for target DLA core
allow_gpu_fallback (bool): Whether falling back to GPU if DLA cannot support an op should be allowed
"""
- device_type: Optional[
- trt.DeviceType
- ] = None #: Target device type (GPU or DLA). Set implicitly based on if dla_core is specified.
+ device_type: Optional[trt.DeviceType] = (
+ None #: Target device type (GPU or DLA). Set implicitly based on if dla_core is specified.
+ )
gpu_id: int = -1 #: Device ID for target GPU
dla_core: int = -1 #: Core ID for target DLA core
- allow_gpu_fallback: bool = False #: Whether falling back to GPU if DLA cannot support an op should be allowed
+ allow_gpu_fallback: bool = (
+ False #: Whether falling back to GPU if DLA cannot support an op should be allowed
+ )
def __init__(self, *args: Any, **kwargs: Any):
"""__init__ Method for torch_tensorrt.Device
Device accepts one of a few construction patterns
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/_Input.py 2024-03-06 09:51:06.922534+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/_Input.py 2024-03-06 09:53:00.990126+00:00
@@ -26,16 +26,16 @@
class _ShapeMode(Enum):
STATIC = 0
DYNAMIC = 1
- shape_mode: Optional[
- _ShapeMode
- ] = None #: Is input statically or dynamically shaped
- shape: Optional[
- Tuple[int, ...] | Dict[str, Tuple[int, ...]]
- ] = None #: Either a single Tuple or a dict of tuples defining the input shape. Static shaped inputs will have a single tuple. Dynamic inputs will have a dict of the form ``{ "min_shape": Tuple, "opt_shape": Tuple, "max_shape": Tuple }``
+ shape_mode: Optional[_ShapeMode] = (
+ None #: Is input statically or dynamically shaped
+ )
+ shape: Optional[Tuple[int, ...] | Dict[str, Tuple[int, ...]]] = (
+ None #: Either a single Tuple or a dict of tuples defining the input shape. Static shaped inputs will have a single tuple. Dynamic inputs will have a dict of the form ``{ "min_shape": Tuple, "opt_shape": Tuple, "max_shape": Tuple }``
+ )
dtype: _enums.dtype = (
_enums.dtype.unknown
) #: The expected data type of the input tensor (default: torch_tensorrt.dtype.float32)
_explicit_set_dtype: bool = False
format: _enums.TensorFormat = (
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_compiler.py 2024-03-06 09:51:06.926534+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_compiler.py 2024-03-06 09:53:01.135055+00:00
@@ -215,13 +215,13 @@
"precision": precision,
"debug": debug,
"device": device,
"workspace_size": workspace_size,
"min_block_size": min_block_size,
- "torch_executed_ops": torch_executed_ops
- if torch_executed_ops is not None
- else set(),
+ "torch_executed_ops": (
+ torch_executed_ops if torch_executed_ops is not None else set()
+ ),
"pass_through_build_failures": pass_through_build_failures,
"max_aux_streams": max_aux_streams,
"version_compatible": version_compatible,
"optimization_level": optimization_level,
"use_python_runtime": use_python_runtime,
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py 2024-03-06 09:51:06.926534+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py 2024-03-06 09:53:01.233148+00:00
@@ -26,13 +26,13 @@
from packaging import version
_LOGGER: logging.Logger = logging.getLogger(__name__)
-TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[
- Callable[[torch.fx.GraphModule], None]
-] = Observer("TRT_INTERPRETER_CALL_PRE_OBSERVER")
+TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[Callable[[torch.fx.GraphModule], None]] = (
+ Observer("TRT_INTERPRETER_CALL_PRE_OBSERVER")
+)
class UnsupportedOperatorException(RuntimeError):
pass
@@ -90,13 +90,13 @@
self.input_specs_iter = 0
self._cur_node_name: Optional[str] = None
self._cur_node: Optional[torch.fx.Node] = None
self._input_names: List[str] = []
self._output_names: List[str] = []
- self._itensor_to_tensor_meta: Dict[
- trt.tensorrt.ITensor, TensorMetadata
- ] = dict()
+ self._itensor_to_tensor_meta: Dict[trt.tensorrt.ITensor, TensorMetadata] = (
+ dict()
+ )
self.compilation_settings = compilation_settings
# Data types for TRT Module output Tensors
self.output_dtypes = output_dtypes
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/converter_utils.py 2024-03-06 09:51:06.926534+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/converter_utils.py 2024-03-06 09:53:01.322305+00:00
@@ -322,17 +322,15 @@
else:
raise AssertionError(f"Cannot convert {input_val} to TRT constant")
@overload
-def get_positive_dim(dim: int, dim_size: int) -> int:
- ...
+def get_positive_dim(dim: int, dim_size: int) -> int: ...
@overload
-def get_positive_dim(dim: Sequence[int], dim_size: int) -> Tuple[int, ...]:
- ...
+def get_positive_dim(dim: Sequence[int], dim_size: int) -> Tuple[int, ...]: ...
def get_positive_dim(
dim: Union[int, Sequence[int]], dim_size: int
) -> Union[int, Tuple[int, ...]]:
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/lower_linear.py 2024-03-06 09:51:06.930534+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/lower_linear.py 2024-03-06 09:53:01.717472+00:00
@@ -20,16 +20,14 @@
logger.debug(f"Graph after lowering linear:\n{gm.graph}")
return gm
-def linear_replacement() -> (
- Tuple[
- torch.fx.GraphModule,
- Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor],
- ]
-):
+def linear_replacement() -> Tuple[
+ torch.fx.GraphModule,
+ Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor],
+]:
"""Constructs the original and replacement functions for linear"""
# Original graph
def orig(
input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/_decomposition_groups.py 2024-03-06 09:51:06.926534+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/_decomposition_groups.py 2024-03-06 09:53:01.729879+00:00
@@ -5,13 +5,13 @@
from torch._decomp import get_decompositions as get_torch_decompositions
from torch._ops import OpOverload, OpOverloadPacket
aten = torch.ops.aten
-_core_aten_decompositions: Dict[
- OpOverload, Callable[[Any], Any]
-] = core_aten_decompositions()
+_core_aten_decompositions: Dict[OpOverload, Callable[[Any], Any]] = (
+ core_aten_decompositions()
+)
torch_enabled_decompositions: Set[Union[OpOverload, OpOverloadPacket]] = {
aten._adaptive_avg_pool2d_backward,
aten.addcdiv,
aten.addcdiv_,
aten.addcmul,
@@ -178,13 +178,13 @@
torch_disabled_decompositions: Set[Union[OpOverload, OpOverloadPacket]] = {
aten._softmax.default,
}
-ENABLED_TORCH_DECOMPOSITIONS: Dict[
- OpOverload, Callable[[Any], Any]
-] = get_torch_decompositions(torch_enabled_decompositions)
+ENABLED_TORCH_DECOMPOSITIONS: Dict[OpOverload, Callable[[Any], Any]] = (
+ get_torch_decompositions(torch_enabled_decompositions)
+)
TORCH_TRT_DECOMPOSITIONS: Dict[OpOverload, Callable[[Any], Any]] = {}
def check_decomp_set_invariants() -> None:
"""Validates no overlap between enabled and disabled decomposition sets"""
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/view_to_reshape.py 2024-03-06 09:51:06.930534+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/view_to_reshape.py 2024-03-06 09:53:01.760017+00:00
@@ -20,16 +20,14 @@
logger.debug(f"Graph after replacing view with reshape:\n{gm.graph}")
return gm
-def view_replacement() -> (
- Tuple[
- torch.fx.GraphModule,
- Callable[[torch.Tensor, List[torch.SymInt]], torch.Tensor],
- ]
-):
+def view_replacement() -> Tuple[
+ torch.fx.GraphModule,
+ Callable[[torch.Tensor, List[torch.SymInt]], torch.Tensor],
+]:
"""Constructs the original and replacement functions for view"""
# Original graph
def orig(input: torch.Tensor, shape: List[torch.SymInt]) -> torch.Tensor:
return torch.ops.aten.view.default(input, shape)
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/lower_scaled_dot_product_attention.py 2024-03-06 09:51:06.930534+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/lower_scaled_dot_product_attention.py 2024-03-06 09:53:01.766275+00:00
@@ -58,16 +58,14 @@
logger.debug(f"Graph after lowering scaled dot product attention:\n{gm.graph}")
return gm
-def scaled_dot_product_attention_replacement() -> (
- Tuple[
- Sequence[Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor]],
- Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor],
- ]
-):
+def scaled_dot_product_attention_replacement() -> Tuple[
+ Sequence[Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor]],
+ Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor],
+]:
"""Constructs the original and replacement functions for efficient attention"""
# Efficient Attention original graph
def efficient(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor) -> torch.Tensor:
outputs = torch.ops.aten._scaled_dot_product_efficient_attention.default(
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py 2024-03-06 09:51:06.930534+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py 2024-03-06 09:53:02.058536+00:00
@@ -99,25 +99,29 @@
self.engine.get_binding_dtype(idx), Frameworks.TORCH
)
for idx in self.output_binding_indices_in_order
]
self.output_shapes = [
- tuple(self.engine.get_binding_shape(idx))
- if self.engine.has_implicit_batch_dimension
- else tuple()
+ (
+ tuple(self.engine.get_binding_shape(idx))
+ if self.engine.has_implicit_batch_dimension
+ else tuple()
+ )
for idx in self.output_binding_indices_in_order
]
self.hidden_output_dtypes = [
unified_dtype_converter(
self.engine.get_binding_dtype(idx), Frameworks.TORCH
)
for idx in self.hidden_output_binding_indices_in_order
]
self.hidden_output_shapes = [
- tuple(self.engine.get_binding_shape(idx))
- if self.engine.has_implicit_batch_dimension
- else tuple()
+ (
+ tuple(self.engine.get_binding_shape(idx))
+ if self.engine.has_implicit_batch_dimension
+ else tuple()
+ )
for idx in self.hidden_output_binding_indices_in_order
]
def _check_initialized(self) -> None:
if not self.initialized:
@@ -165,13 +169,15 @@
self.__dict__.update(state)
if self.engine:
self.context = self.engine.create_execution_context()
def forward(self, *inputs: torch.Tensor) -> torch.Tensor | Tuple[torch.Tensor, ...]:
- with torch.autograd.profiler.record_function(
- "PythonTorchTensorRTModule:Forward"
- ) if self.profiling_enabled else nullcontext():
+ with (
+ torch.autograd.profiler.record_function("PythonTorchTensorRTModule:Forward")
+ if self.profiling_enabled
+ else nullcontext()
+ ):
self._check_initialized()
# If in safe mode, check at each iteration for for whether a switch is required
if (
torch_tensorrt.runtime.multi_device_safe_mode._PY_RT_MULTI_DEVICE_SAFE_MODE
@@ -198,13 +204,17 @@
torch.cuda.set_device(device_id)
inputs = tuple([tensor.to(device) for tensor in inputs])
logger.warning(f"Moved all input Tensors to cuda:{device_id}")
- with torch.autograd.profiler.record_function(
- "PythonTorchTensorRTModule:ProcessInputs"
- ) if self.profiling_enabled else nullcontext():
+ with (
+ torch.autograd.profiler.record_function(
+ "PythonTorchTensorRTModule:ProcessInputs"
+ )
+ if self.profiling_enabled
+ else nullcontext()
+ ):
assert len(inputs) == len(
self.input_names
), f"Wrong number of inputs, expect {len(self.input_names)} get {len(inputs)}."
contiguous_inputs: List[torch.Tensor] = [i.contiguous() for i in inputs]
@@ -237,13 +247,17 @@
self.context.set_binding_shape(
idx, tuple(contiguous_inputs[i].shape)
)
- with torch.autograd.profiler.record_function(
- "PythonTorchTensorRTModule:ProcessOutputs"
- ) if self.profiling_enabled else nullcontext():
+ with (
+ torch.autograd.profiler.record_function(
+ "PythonTorchTensorRTModule:ProcessOutputs"
+ )
+ if self.profiling_enabled
+ else nullcontext()
+ ):
# create output tensors
outputs: List[torch.Tensor] = []
for i, idx in enumerate(self.output_binding_indices_in_order):
shape = tuple(self.context.get_binding_shape(idx))
@@ -264,13 +278,17 @@
dtype=self.hidden_output_dtypes[i],
device=torch.cuda.current_device(),
)
bindings[idx] = output.data_ptr()
- with torch.autograd.profiler.record_function(
- "PythonTorchTensorRTModule:TensorRTRuntime"
- ) if self.profiling_enabled else nullcontext():
+ with (
+ torch.autograd.profiler.record_function(
+ "PythonTorchTensorRTModule:TensorRTRuntime"
+ )
+ if self.profiling_enabled
+ else nullcontext()
+ ):
self.context.execute_async_v2(
bindings, torch.cuda.current_stream().cuda_stream
)
if len(outputs) == 1:
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/converters/aten_ops_converters.py 2024-03-06 09:51:06.930534+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/converters/aten_ops_converters.py 2024-03-06 09:53:02.363697+00:00
@@ -315,25 +315,21 @@
name: str,
) -> Union[TRTTensor, Sequence[TRTTensor]]:
kwargs_new = {
"input": args[0],
"kernel_size": args[1],
- "stride": args[2]
- if len(args) > 2
- else (None, None)
- if len(args[1]) == 2
- else (None, None, None),
- "padding": args[3]
- if len(args) > 3
- else (0, 0)
- if len(args[1]) == 2
- else (0, 0, 0),
- "dilation": args[4]
- if len(args) > 4
- else (1, 1)
- if len(args[1]) == 2
- else (1, 1, 1),
+ "stride": (
+ args[2]
+ if len(args) > 2
+ else (None, None) if len(args[1]) == 2 else (None, None, None)
+ ),
+ "padding": (
+ args[3] if len(args) > 3 else (0, 0) if len(args[1]) == 2 else (0, 0, 0)
+ ),
+ "dilation": (
+ args[4] if len(args) > 4 else (1, 1) if len(args[1]) == 2 else (1, 1, 1)
+ ),
"ceil_mode": args[5] if len(args) > 5 else False,
}
return acc_ops_converters.acc_ops_max_poolnd(
network, target, None, kwargs_new, name
)
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/lower.py 2024-03-06 09:51:06.930534+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/lower.py 2024-03-06 09:53:02.432738+00:00
@@ -124,25 +124,29 @@
interpreter = TRTInterpreter(
mod,
input_specs=self.lower_setting.input_specs,
explicit_batch_dimension=self.lower_setting.explicit_batch_dimension,
explicit_precision=self.lower_setting.explicit_precision,
- logger_level=trt.Logger.VERBOSE
- if self.lower_setting.verbose_log
- else trt.Logger.WARNING,
+ logger_level=(
+ trt.Logger.VERBOSE
+ if self.lower_setting.verbose_log
+ else trt.Logger.WARNING
+ ),
)
interp_result: TRTInterpreterResult = interpreter.run(
max_batch_size=self.lower_setting.max_batch_size,
max_workspace_size=self.lower_setting.max_workspace_size,
lower_precision=self.lower_setting.lower_precision,
strict_type_constraints=self.lower_setting.strict_type_constraints,
algorithm_selector=algo_selector,
timing_cache=cache_data,
- profiling_verbosity=trt.ProfilingVerbosity.DETAILED
- if self.lower_setting.verbose_profile
- else trt.ProfilingVerbosity.LAYER_NAMES_ONLY,
+ profiling_verbosity=(
+ trt.ProfilingVerbosity.DETAILED
+ if self.lower_setting.verbose_profile
+ else trt.ProfilingVerbosity.LAYER_NAMES_ONLY
+ ),
tactic_sources=self.lower_setting.tactic_sources,
)
# Update timing cache file if needed
timing_cache = interp_result.serialized_cache
@@ -295,14 +299,12 @@
module.half()
# A custom conversion function can be passed to the lowerer to
# handle inputs with custom types. By default, just handle
# tensors and NoneType.
if fp16_conversion_fn is None:
- conversion_fn = (
- lambda x: x.half()
- if x is not None and x.dtype == torch.float32
- else x
+ conversion_fn = lambda x: (
+ x.half() if x is not None and x.dtype == torch.float32 else x
)
else:
conversion_fn = fp16_conversion_fn
inputs = tuple(conversion_fn(x) for x in inputs)
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/fx2trt.py 2024-03-06 09:51:06.930534+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/fx2trt.py 2024-03-06 09:53:02.444417+00:00
@@ -19,13 +19,13 @@
from .observer import Observer
from .utils import get_dynamic_dims, LowerPrecision, unified_dtype_converter, Frameworks
_LOGGER: logging.Logger = logging.getLogger(__name__)
-TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[
- Callable[[torch.fx.GraphModule], None]
-] = Observer("TRT_INTERPRETER_CALL_PRE_OBSERVER")
+TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[Callable[[torch.fx.GraphModule], None]] = (
+ Observer("TRT_INTERPRETER_CALL_PRE_OBSERVER")
+)
class TRTInterpreterResult(NamedTuple):
engine: Any
input_names: Sequence[str]
@@ -73,13 +73,13 @@
self.input_specs_iter = 0
self.validate_input_specs()
self._cur_node_name: Optional[str] = None
self._input_names: List[str] = []
self._output_names: List[str] = []
- self._itensor_to_tensor_meta: Dict[
- trt.tensorrt.ITensor, TensorMetadata
- ] = dict()
+ self._itensor_to_tensor_meta: Dict[trt.tensorrt.ITensor, TensorMetadata] = (
+ dict()
+ )
def validate_input_specs(self):
for shape, _, _, shape_ranges, has_batch_dim in self.input_specs:
if not self.network.has_implicit_batch_dimension:
assert (
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/lower_pass_manager_builder.py 2024-03-06 09:51:06.930534+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/lower_pass_manager_builder.py 2024-03-06 09:53:02.676478+00:00
@@ -194,13 +194,15 @@
lowering_start_time = datetime.datetime.now()
self.lower_setting.input_specs = generate_input_specs(
submod_inputs,
self.lower_setting,
- additional_submodule_inputs[submod_name]
- if additional_submodule_inputs
- else None,
+ (
+ additional_submodule_inputs[submod_name]
+ if additional_submodule_inputs
+ else None
+ ),
)
lowered_module = self._lower_func(
submod, submod_inputs, self.lower_setting, submod_name
)
setattr(split_result.split_module, submod_name, lowered_module)
@@ -234,13 +236,15 @@
if not submod_name.startswith(split_result.non_acc_submodule_prefix):
_LOGGER.info(f"ACC submodule graph: {submod.graph}")
lowering_start_time = datetime.datetime.now()
self.lower_setting.additional_inputs = (
- additional_submodule_inputs[submod_name]
- if additional_submodule_inputs
- else None,
+ (
+ additional_submodule_inputs[submod_name]
+ if additional_submodule_inputs
+ else None
+ ),
)
lowered_module = self._lower_func(
submod, submod_inputs, self.lower_setting, submod_name
)
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/pass_utils.py 2024-03-06 09:51:06.930534+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/pass_utils.py 2024-03-06 09:53:02.897211+00:00
@@ -193,13 +193,11 @@
kwargs2 = {"equal_nan": True}
if rtol:
kwargs2["rtol"] = rtol
if atol:
kwargs2["atol"] = atol
- kwargs2[
- "msg"
- ] = (
+ kwargs2["msg"] = (
lambda msg: f"Pass {pass_} failed correctness check due at output {kk}:\n{msg}"
)
# If tensors are on different devices, make sure to compare
# their copies that are on the same device.
if x.get_device() != y.get_device():
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/lower_basic_pass.py 2024-03-06 09:51:06.930534+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/lower_basic_pass.py 2024-03-06 09:53:02.937735+00:00
@@ -536,13 +536,13 @@
reshape_batch_size: Optional[fx.Node] = get_reshape_batch_size_as_node(
maybe_reshape
)
if not reshape_batch_size:
continue
- reshape_batch_size_inferred_source: Optional[
- fx.Node
- ] = get_reshape_batch_size_inferred_source(reshape_batch_size)
+ reshape_batch_size_inferred_source: Optional[fx.Node] = (
+ get_reshape_batch_size_inferred_source(reshape_batch_size)
+ )
if not reshape_batch_size_inferred_source:
continue
reshape_input: fx.Node = maybe_reshape.kwargs["input"]
if reshape_input == reshape_batch_size_inferred_source:
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/test/converters/acc_op/test_split.py 2024-03-06 09:51:06.934534+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/test/converters/acc_op/test_split.py 2024-03-06 09:53:03.381476+00:00
@@ -21,13 +21,15 @@
inputs = [torch.randn(1, 10)]
self.run_test(
Split(),
inputs,
expected_ops={
- acc_ops.split
- if isinstance(split_size_or_sections, int)
- else acc_ops.slice_tensor
+ (
+ acc_ops.split
+ if isinstance(split_size_or_sections, int)
+ else acc_ops.slice_tensor
+ )
},
test_explicit_batch_dim=False,
)
@parameterized.expand(
@@ -68,13 +70,15 @@
]
self.run_test_with_dynamic_shape(
Split(),
input_specs,
expected_ops={
- acc_ops.split
- if isinstance(split_size_or_sections, int)
- else acc_ops.slice_tensor
+ (
+ acc_ops.split
+ if isinstance(split_size_or_sections, int)
+ else acc_ops.slice_tensor
+ )
},
)
# Testing with (-1, -1, -1) results into following error:
# AssertionError: Can't chunk on dynamic shape dimension!
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/tools/common_fx2trt.py 2024-03-06 09:51:06.938534+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/tools/common_fx2trt.py 2024-03-06 09:53:04.085588+00:00
@@ -152,13 +152,13 @@
mod.eval()
if len(expected_ops):
self.assert_has_op(mod, expected_ops)
interpreter_result = interpreter.run(
- lower_precision=LowerPrecision.FP16
- if fp16_mode
- else LowerPrecision.FP32
+ lower_precision=(
+ LowerPrecision.FP16 if fp16_mode else LowerPrecision.FP32
+ )
)
trt_mod = TRTModule(
interpreter_result.engine,
interpreter_result.input_names,
interpreter_result.output_names,
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/trt_module.py 2024-03-06 09:51:06.938534+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/trt_module.py 2024-03-06 09:53:04.462633+00:00
@@ -67,25 +67,29 @@
self.engine.get_binding_dtype(idx), Frameworks.TORCH
)
for idx in self.output_binding_indices_in_order
]
self.output_shapes = [
- tuple(self.engine.get_binding_shape(idx))
- if self.engine.has_implicit_batch_dimension
- else tuple()
+ (
+ tuple(self.engine.get_binding_shape(idx))
+ if self.engine.has_implicit_batch_dimension
+ else tuple()
+ )
for idx in self.output_binding_indices_in_order
]
self.hidden_output_dtypes: Sequence[torch.dtype] = [
unified_dtype_converter(
self.engine.get_binding_dtype(idx), Frameworks.TORCH
)
for idx in self.hidden_output_binding_indices_in_order
]
self.hidden_output_shapes = [
- tuple(self.engine.get_binding_shape(idx))
- if self.engine.has_implicit_batch_dimension
- else tuple()
+ (
+ tuple(self.engine.get_binding_shape(idx))
+ if self.engine.has_implicit_batch_dimension
+ else tuple()
+ )
for idx in self.hidden_output_binding_indices_in_order
]
def _check_initialized(self):
if not self.initialized:
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/ts/_compile_spec.py 2024-03-06 09:51:06.938534+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/ts/_compile_spec.py 2024-03-06 09:53:04.779335+00:00
@@ -404,13 +404,13 @@
"inputs": inputs if inputs is not None else [],
# "input_signature": input_signature,
"device": device,
"disable_tf32": disable_tf32, # Force FP32 layers to use traditional as FP32 format vs the default behavior of rounding the inputs to 10-bit mantissas before multiplying, but accumulates the sum using 23-bit mantissas
"sparse_weights": sparse_weights, # Enable sparsity for convolution and fully connected layers.
- "enabled_precisions": enabled_precisions
- if enabled_precisions is not None
- else set(), # Enabling FP16 kernels
+ "enabled_precisions": (
+ enabled_precisions if enabled_precisions is not None else set()
+ ), # Enabling FP16 kernels
"refit": refit, # enable refit
"debug": debug, # enable debuggable engine
"capability": capability, # Restrict kernel selection to safe gpu kernels or safe dla kernels
"num_avg_timing_iters": num_avg_timing_iters, # Number of averaging timing iterations used to select kernels
"workspace_size": workspace_size, # Maximum size of workspace given to TensorRT
cc07bb5
to
a42dcbf
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are some changes that do not conform to Python style guidelines:
--- /home/runner/work/TensorRT/TensorRT/examples/int8/training/vgg16/vgg16.py 2024-03-06 09:54:46.457073+00:00
+++ /home/runner/work/TensorRT/TensorRT/examples/int8/training/vgg16/vgg16.py 2024-03-06 09:56:34.535411+00:00
@@ -1,10 +1,11 @@
"""
# Reference
- [Very Deep Convolutional Networks for Large-Scale Image Recognition](
https://arxiv.org/abs/1409.1556) (ICLR 2015)
"""
+
import torch
import torch.nn as nn
import torch.nn.functional as F
from functools import reduce
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/_Device.py 2024-03-06 09:54:46.465074+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/_Device.py 2024-03-06 09:56:34.629565+00:00
@@ -30,16 +30,18 @@
gpu_id (int): Device ID for target GPU
dla_core (int): Core ID for target DLA core
allow_gpu_fallback (bool): Whether falling back to GPU if DLA cannot support an op should be allowed
"""
- device_type: Optional[
- trt.DeviceType
- ] = None #: Target device type (GPU or DLA). Set implicitly based on if dla_core is specified.
+ device_type: Optional[trt.DeviceType] = (
+ None #: Target device type (GPU or DLA). Set implicitly based on if dla_core is specified.
+ )
gpu_id: int = -1 #: Device ID for target GPU
dla_core: int = -1 #: Core ID for target DLA core
- allow_gpu_fallback: bool = False #: Whether falling back to GPU if DLA cannot support an op should be allowed
+ allow_gpu_fallback: bool = (
+ False #: Whether falling back to GPU if DLA cannot support an op should be allowed
+ )
def __init__(self, *args: Any, **kwargs: Any):
"""__init__ Method for torch_tensorrt.Device
Device accepts one of a few construction patterns
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/_Input.py 2024-03-06 09:54:46.465074+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/_Input.py 2024-03-06 09:56:34.841641+00:00
@@ -26,16 +26,16 @@
class _ShapeMode(Enum):
STATIC = 0
DYNAMIC = 1
- shape_mode: Optional[
- _ShapeMode
- ] = None #: Is input statically or dynamically shaped
- shape: Optional[
- Tuple[int, ...] | Dict[str, Tuple[int, ...]]
- ] = None #: Either a single Tuple or a dict of tuples defining the input shape. Static shaped inputs will have a single tuple. Dynamic inputs will have a dict of the form ``{ "min_shape": Tuple, "opt_shape": Tuple, "max_shape": Tuple }``
+ shape_mode: Optional[_ShapeMode] = (
+ None #: Is input statically or dynamically shaped
+ )
+ shape: Optional[Tuple[int, ...] | Dict[str, Tuple[int, ...]]] = (
+ None #: Either a single Tuple or a dict of tuples defining the input shape. Static shaped inputs will have a single tuple. Dynamic inputs will have a dict of the form ``{ "min_shape": Tuple, "opt_shape": Tuple, "max_shape": Tuple }``
+ )
dtype: _enums.dtype = (
_enums.dtype.unknown
) #: The expected data type of the input tensor (default: torch_tensorrt.dtype.float32)
_explicit_set_dtype: bool = False
format: _enums.TensorFormat = (
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_compiler.py 2024-03-06 09:54:46.465074+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_compiler.py 2024-03-06 09:56:34.953851+00:00
@@ -215,13 +215,13 @@
"precision": precision,
"debug": debug,
"device": device,
"workspace_size": workspace_size,
"min_block_size": min_block_size,
- "torch_executed_ops": torch_executed_ops
- if torch_executed_ops is not None
- else set(),
+ "torch_executed_ops": (
+ torch_executed_ops if torch_executed_ops is not None else set()
+ ),
"pass_through_build_failures": pass_through_build_failures,
"max_aux_streams": max_aux_streams,
"version_compatible": version_compatible,
"optimization_level": optimization_level,
"use_python_runtime": use_python_runtime,
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py 2024-03-06 09:54:46.465074+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py 2024-03-06 09:56:35.070485+00:00
@@ -26,13 +26,13 @@
from packaging import version
_LOGGER: logging.Logger = logging.getLogger(__name__)
-TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[
- Callable[[torch.fx.GraphModule], None]
-] = Observer("TRT_INTERPRETER_CALL_PRE_OBSERVER")
+TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[Callable[[torch.fx.GraphModule], None]] = (
+ Observer("TRT_INTERPRETER_CALL_PRE_OBSERVER")
+)
class UnsupportedOperatorException(RuntimeError):
pass
@@ -90,13 +90,13 @@
self.input_specs_iter = 0
self._cur_node_name: Optional[str] = None
self._cur_node: Optional[torch.fx.Node] = None
self._input_names: List[str] = []
self._output_names: List[str] = []
- self._itensor_to_tensor_meta: Dict[
- trt.tensorrt.ITensor, TensorMetadata
- ] = dict()
+ self._itensor_to_tensor_meta: Dict[trt.tensorrt.ITensor, TensorMetadata] = (
+ dict()
+ )
self.compilation_settings = compilation_settings
# Data types for TRT Module output Tensors
self.output_dtypes = output_dtypes
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/converter_utils.py 2024-03-06 09:54:46.465074+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/converter_utils.py 2024-03-06 09:56:35.164098+00:00
@@ -322,17 +322,15 @@
else:
raise AssertionError(f"Cannot convert {input_val} to TRT constant")
@overload
-def get_positive_dim(dim: int, dim_size: int) -> int:
- ...
+def get_positive_dim(dim: int, dim_size: int) -> int: ...
@overload
-def get_positive_dim(dim: Sequence[int], dim_size: int) -> Tuple[int, ...]:
- ...
+def get_positive_dim(dim: Sequence[int], dim_size: int) -> Tuple[int, ...]: ...
def get_positive_dim(
dim: Union[int, Sequence[int]], dim_size: int
) -> Union[int, Tuple[int, ...]]:
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/lower_linear.py 2024-03-06 09:54:46.469074+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/lower_linear.py 2024-03-06 09:56:35.536135+00:00
@@ -20,16 +20,14 @@
logger.debug(f"Graph after lowering linear:\n{gm.graph}")
return gm
-def linear_replacement() -> (
- Tuple[
- torch.fx.GraphModule,
- Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor],
- ]
-):
+def linear_replacement() -> Tuple[
+ torch.fx.GraphModule,
+ Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor],
+]:
"""Constructs the original and replacement functions for linear"""
# Original graph
def orig(
input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/_decomposition_groups.py 2024-03-06 09:54:46.469074+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/_decomposition_groups.py 2024-03-06 09:56:35.535591+00:00
@@ -5,13 +5,13 @@
from torch._decomp import get_decompositions as get_torch_decompositions
from torch._ops import OpOverload, OpOverloadPacket
aten = torch.ops.aten
-_core_aten_decompositions: Dict[
- OpOverload, Callable[[Any], Any]
-] = core_aten_decompositions()
+_core_aten_decompositions: Dict[OpOverload, Callable[[Any], Any]] = (
+ core_aten_decompositions()
+)
torch_enabled_decompositions: Set[Union[OpOverload, OpOverloadPacket]] = {
aten._adaptive_avg_pool2d_backward,
aten.addcdiv,
aten.addcdiv_,
aten.addcmul,
@@ -178,13 +178,13 @@
torch_disabled_decompositions: Set[Union[OpOverload, OpOverloadPacket]] = {
aten._softmax.default,
}
-ENABLED_TORCH_DECOMPOSITIONS: Dict[
- OpOverload, Callable[[Any], Any]
-] = get_torch_decompositions(torch_enabled_decompositions)
+ENABLED_TORCH_DECOMPOSITIONS: Dict[OpOverload, Callable[[Any], Any]] = (
+ get_torch_decompositions(torch_enabled_decompositions)
+)
TORCH_TRT_DECOMPOSITIONS: Dict[OpOverload, Callable[[Any], Any]] = {}
def check_decomp_set_invariants() -> None:
"""Validates no overlap between enabled and disabled decomposition sets"""
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/view_to_reshape.py 2024-03-06 09:54:46.469074+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/view_to_reshape.py 2024-03-06 09:56:35.573978+00:00
@@ -20,16 +20,14 @@
logger.debug(f"Graph after replacing view with reshape:\n{gm.graph}")
return gm
-def view_replacement() -> (
- Tuple[
- torch.fx.GraphModule,
- Callable[[torch.Tensor, List[torch.SymInt]], torch.Tensor],
- ]
-):
+def view_replacement() -> Tuple[
+ torch.fx.GraphModule,
+ Callable[[torch.Tensor, List[torch.SymInt]], torch.Tensor],
+]:
"""Constructs the original and replacement functions for view"""
# Original graph
def orig(input: torch.Tensor, shape: List[torch.SymInt]) -> torch.Tensor:
return torch.ops.aten.view.default(input, shape)
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/lower_scaled_dot_product_attention.py 2024-03-06 09:54:46.469074+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/lower_scaled_dot_product_attention.py 2024-03-06 09:56:35.580265+00:00
@@ -58,16 +58,14 @@
logger.debug(f"Graph after lowering scaled dot product attention:\n{gm.graph}")
return gm
-def scaled_dot_product_attention_replacement() -> (
- Tuple[
- Sequence[Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor]],
- Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor],
- ]
-):
+def scaled_dot_product_attention_replacement() -> Tuple[
+ Sequence[Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor]],
+ Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor],
+]:
"""Constructs the original and replacement functions for efficient attention"""
# Efficient Attention original graph
def efficient(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor) -> torch.Tensor:
outputs = torch.ops.aten._scaled_dot_product_efficient_attention.default(
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py 2024-03-06 09:54:46.469074+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py 2024-03-06 09:56:35.835239+00:00
@@ -99,25 +99,29 @@
self.engine.get_binding_dtype(idx), Frameworks.TORCH
)
for idx in self.output_binding_indices_in_order
]
self.output_shapes = [
- tuple(self.engine.get_binding_shape(idx))
- if self.engine.has_implicit_batch_dimension
- else tuple()
+ (
+ tuple(self.engine.get_binding_shape(idx))
+ if self.engine.has_implicit_batch_dimension
+ else tuple()
+ )
for idx in self.output_binding_indices_in_order
]
self.hidden_output_dtypes = [
unified_dtype_converter(
self.engine.get_binding_dtype(idx), Frameworks.TORCH
)
for idx in self.hidden_output_binding_indices_in_order
]
self.hidden_output_shapes = [
- tuple(self.engine.get_binding_shape(idx))
- if self.engine.has_implicit_batch_dimension
- else tuple()
+ (
+ tuple(self.engine.get_binding_shape(idx))
+ if self.engine.has_implicit_batch_dimension
+ else tuple()
+ )
for idx in self.hidden_output_binding_indices_in_order
]
def _check_initialized(self) -> None:
if not self.initialized:
@@ -165,13 +169,15 @@
self.__dict__.update(state)
if self.engine:
self.context = self.engine.create_execution_context()
def forward(self, *inputs: torch.Tensor) -> torch.Tensor | Tuple[torch.Tensor, ...]:
- with torch.autograd.profiler.record_function(
- "PythonTorchTensorRTModule:Forward"
- ) if self.profiling_enabled else nullcontext():
+ with (
+ torch.autograd.profiler.record_function("PythonTorchTensorRTModule:Forward")
+ if self.profiling_enabled
+ else nullcontext()
+ ):
self._check_initialized()
# If in safe mode, check at each iteration for for whether a switch is required
if (
torch_tensorrt.runtime.multi_device_safe_mode._PY_RT_MULTI_DEVICE_SAFE_MODE
@@ -198,13 +204,17 @@
torch.cuda.set_device(device_id)
inputs = tuple([tensor.to(device) for tensor in inputs])
logger.warning(f"Moved all input Tensors to cuda:{device_id}")
- with torch.autograd.profiler.record_function(
- "PythonTorchTensorRTModule:ProcessInputs"
- ) if self.profiling_enabled else nullcontext():
+ with (
+ torch.autograd.profiler.record_function(
+ "PythonTorchTensorRTModule:ProcessInputs"
+ )
+ if self.profiling_enabled
+ else nullcontext()
+ ):
assert len(inputs) == len(
self.input_names
), f"Wrong number of inputs, expect {len(self.input_names)} get {len(inputs)}."
contiguous_inputs: List[torch.Tensor] = [i.contiguous() for i in inputs]
@@ -237,13 +247,17 @@
self.context.set_binding_shape(
idx, tuple(contiguous_inputs[i].shape)
)
- with torch.autograd.profiler.record_function(
- "PythonTorchTensorRTModule:ProcessOutputs"
- ) if self.profiling_enabled else nullcontext():
+ with (
+ torch.autograd.profiler.record_function(
+ "PythonTorchTensorRTModule:ProcessOutputs"
+ )
+ if self.profiling_enabled
+ else nullcontext()
+ ):
# create output tensors
outputs: List[torch.Tensor] = []
for i, idx in enumerate(self.output_binding_indices_in_order):
shape = tuple(self.context.get_binding_shape(idx))
@@ -264,13 +278,17 @@
dtype=self.hidden_output_dtypes[i],
device=torch.cuda.current_device(),
)
bindings[idx] = output.data_ptr()
- with torch.autograd.profiler.record_function(
- "PythonTorchTensorRTModule:TensorRTRuntime"
- ) if self.profiling_enabled else nullcontext():
+ with (
+ torch.autograd.profiler.record_function(
+ "PythonTorchTensorRTModule:TensorRTRuntime"
+ )
+ if self.profiling_enabled
+ else nullcontext()
+ ):
self.context.execute_async_v2(
bindings, torch.cuda.current_stream().cuda_stream
)
if len(outputs) == 1:
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/converters/aten_ops_converters.py 2024-03-06 09:54:46.469074+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/converters/aten_ops_converters.py 2024-03-06 09:56:36.156030+00:00
@@ -315,25 +315,21 @@
name: str,
) -> Union[TRTTensor, Sequence[TRTTensor]]:
kwargs_new = {
"input": args[0],
"kernel_size": args[1],
- "stride": args[2]
- if len(args) > 2
- else (None, None)
- if len(args[1]) == 2
- else (None, None, None),
- "padding": args[3]
- if len(args) > 3
- else (0, 0)
- if len(args[1]) == 2
- else (0, 0, 0),
- "dilation": args[4]
- if len(args) > 4
- else (1, 1)
- if len(args[1]) == 2
- else (1, 1, 1),
+ "stride": (
+ args[2]
+ if len(args) > 2
+ else (None, None) if len(args[1]) == 2 else (None, None, None)
+ ),
+ "padding": (
+ args[3] if len(args) > 3 else (0, 0) if len(args[1]) == 2 else (0, 0, 0)
+ ),
+ "dilation": (
+ args[4] if len(args) > 4 else (1, 1) if len(args[1]) == 2 else (1, 1, 1)
+ ),
"ceil_mode": args[5] if len(args) > 5 else False,
}
return acc_ops_converters.acc_ops_max_poolnd(
network, target, None, kwargs_new, name
)
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/lower.py 2024-03-06 09:54:46.469074+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/lower.py 2024-03-06 09:56:36.216405+00:00
@@ -124,25 +124,29 @@
interpreter = TRTInterpreter(
mod,
input_specs=self.lower_setting.input_specs,
explicit_batch_dimension=self.lower_setting.explicit_batch_dimension,
explicit_precision=self.lower_setting.explicit_precision,
- logger_level=trt.Logger.VERBOSE
- if self.lower_setting.verbose_log
- else trt.Logger.WARNING,
+ logger_level=(
+ trt.Logger.VERBOSE
+ if self.lower_setting.verbose_log
+ else trt.Logger.WARNING
+ ),
)
interp_result: TRTInterpreterResult = interpreter.run(
max_batch_size=self.lower_setting.max_batch_size,
max_workspace_size=self.lower_setting.max_workspace_size,
lower_precision=self.lower_setting.lower_precision,
strict_type_constraints=self.lower_setting.strict_type_constraints,
algorithm_selector=algo_selector,
timing_cache=cache_data,
- profiling_verbosity=trt.ProfilingVerbosity.DETAILED
- if self.lower_setting.verbose_profile
- else trt.ProfilingVerbosity.LAYER_NAMES_ONLY,
+ profiling_verbosity=(
+ trt.ProfilingVerbosity.DETAILED
+ if self.lower_setting.verbose_profile
+ else trt.ProfilingVerbosity.LAYER_NAMES_ONLY
+ ),
tactic_sources=self.lower_setting.tactic_sources,
)
# Update timing cache file if needed
timing_cache = interp_result.serialized_cache
@@ -295,14 +299,12 @@
module.half()
# A custom conversion function can be passed to the lowerer to
# handle inputs with custom types. By default, just handle
# tensors and NoneType.
if fp16_conversion_fn is None:
- conversion_fn = (
- lambda x: x.half()
- if x is not None and x.dtype == torch.float32
- else x
+ conversion_fn = lambda x: (
+ x.half() if x is not None and x.dtype == torch.float32 else x
)
else:
conversion_fn = fp16_conversion_fn
inputs = tuple(conversion_fn(x) for x in inputs)
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/fx2trt.py 2024-03-06 09:54:46.469074+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/fx2trt.py 2024-03-06 09:56:36.225761+00:00
@@ -19,13 +19,13 @@
from .observer import Observer
from .utils import get_dynamic_dims, LowerPrecision, unified_dtype_converter, Frameworks
_LOGGER: logging.Logger = logging.getLogger(__name__)
-TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[
- Callable[[torch.fx.GraphModule], None]
-] = Observer("TRT_INTERPRETER_CALL_PRE_OBSERVER")
+TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[Callable[[torch.fx.GraphModule], None]] = (
+ Observer("TRT_INTERPRETER_CALL_PRE_OBSERVER")
+)
class TRTInterpreterResult(NamedTuple):
engine: Any
input_names: Sequence[str]
@@ -73,13 +73,13 @@
self.input_specs_iter = 0
self.validate_input_specs()
self._cur_node_name: Optional[str] = None
self._input_names: List[str] = []
self._output_names: List[str] = []
- self._itensor_to_tensor_meta: Dict[
- trt.tensorrt.ITensor, TensorMetadata
- ] = dict()
+ self._itensor_to_tensor_meta: Dict[trt.tensorrt.ITensor, TensorMetadata] = (
+ dict()
+ )
def validate_input_specs(self):
for shape, _, _, shape_ranges, has_batch_dim in self.input_specs:
if not self.network.has_implicit_batch_dimension:
assert (
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/lower_pass_manager_builder.py 2024-03-06 09:54:46.473073+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/lower_pass_manager_builder.py 2024-03-06 09:56:36.476114+00:00
@@ -194,13 +194,15 @@
lowering_start_time = datetime.datetime.now()
self.lower_setting.input_specs = generate_input_specs(
submod_inputs,
self.lower_setting,
- additional_submodule_inputs[submod_name]
- if additional_submodule_inputs
- else None,
+ (
+ additional_submodule_inputs[submod_name]
+ if additional_submodule_inputs
+ else None
+ ),
)
lowered_module = self._lower_func(
submod, submod_inputs, self.lower_setting, submod_name
)
setattr(split_result.split_module, submod_name, lowered_module)
@@ -234,13 +236,15 @@
if not submod_name.startswith(split_result.non_acc_submodule_prefix):
_LOGGER.info(f"ACC submodule graph: {submod.graph}")
lowering_start_time = datetime.datetime.now()
self.lower_setting.additional_inputs = (
- additional_submodule_inputs[submod_name]
- if additional_submodule_inputs
- else None,
+ (
+ additional_submodule_inputs[submod_name]
+ if additional_submodule_inputs
+ else None
+ ),
)
lowered_module = self._lower_func(
submod, submod_inputs, self.lower_setting, submod_name
)
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/pass_utils.py 2024-03-06 09:54:46.473073+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/pass_utils.py 2024-03-06 09:56:36.650348+00:00
@@ -193,13 +193,11 @@
kwargs2 = {"equal_nan": True}
if rtol:
kwargs2["rtol"] = rtol
if atol:
kwargs2["atol"] = atol
- kwargs2[
- "msg"
- ] = (
+ kwargs2["msg"] = (
lambda msg: f"Pass {pass_} failed correctness check due at output {kk}:\n{msg}"
)
# If tensors are on different devices, make sure to compare
# their copies that are on the same device.
if x.get_device() != y.get_device():
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/lower_basic_pass.py 2024-03-06 09:54:46.469074+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/lower_basic_pass.py 2024-03-06 09:56:36.702484+00:00
@@ -536,13 +536,13 @@
reshape_batch_size: Optional[fx.Node] = get_reshape_batch_size_as_node(
maybe_reshape
)
if not reshape_batch_size:
continue
- reshape_batch_size_inferred_source: Optional[
- fx.Node
- ] = get_reshape_batch_size_inferred_source(reshape_batch_size)
+ reshape_batch_size_inferred_source: Optional[fx.Node] = (
+ get_reshape_batch_size_inferred_source(reshape_batch_size)
+ )
if not reshape_batch_size_inferred_source:
continue
reshape_input: fx.Node = maybe_reshape.kwargs["input"]
if reshape_input == reshape_batch_size_inferred_source:
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/test/converters/acc_op/test_split.py 2024-03-06 09:54:46.473073+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/test/converters/acc_op/test_split.py 2024-03-06 09:56:37.135472+00:00
@@ -21,13 +21,15 @@
inputs = [torch.randn(1, 10)]
self.run_test(
Split(),
inputs,
expected_ops={
- acc_ops.split
- if isinstance(split_size_or_sections, int)
- else acc_ops.slice_tensor
+ (
+ acc_ops.split
+ if isinstance(split_size_or_sections, int)
+ else acc_ops.slice_tensor
+ )
},
test_explicit_batch_dim=False,
)
@parameterized.expand(
@@ -68,13 +70,15 @@
]
self.run_test_with_dynamic_shape(
Split(),
input_specs,
expected_ops={
- acc_ops.split
- if isinstance(split_size_or_sections, int)
- else acc_ops.slice_tensor
+ (
+ acc_ops.split
+ if isinstance(split_size_or_sections, int)
+ else acc_ops.slice_tensor
+ )
},
)
# Testing with (-1, -1, -1) results into following error:
# AssertionError: Can't chunk on dynamic shape dimension!
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/tools/common_fx2trt.py 2024-03-06 09:54:46.477073+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/tools/common_fx2trt.py 2024-03-06 09:56:37.812873+00:00
@@ -152,13 +152,13 @@
mod.eval()
if len(expected_ops):
self.assert_has_op(mod, expected_ops)
interpreter_result = interpreter.run(
- lower_precision=LowerPrecision.FP16
- if fp16_mode
- else LowerPrecision.FP32
+ lower_precision=(
+ LowerPrecision.FP16 if fp16_mode else LowerPrecision.FP32
+ )
)
trt_mod = TRTModule(
interpreter_result.engine,
interpreter_result.input_names,
interpreter_result.output_names,
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/trt_module.py 2024-03-06 09:54:46.477073+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/trt_module.py 2024-03-06 09:56:38.165514+00:00
@@ -67,25 +67,29 @@
self.engine.get_binding_dtype(idx), Frameworks.TORCH
)
for idx in self.output_binding_indices_in_order
]
self.output_shapes = [
- tuple(self.engine.get_binding_shape(idx))
- if self.engine.has_implicit_batch_dimension
- else tuple()
+ (
+ tuple(self.engine.get_binding_shape(idx))
+ if self.engine.has_implicit_batch_dimension
+ else tuple()
+ )
for idx in self.output_binding_indices_in_order
]
self.hidden_output_dtypes: Sequence[torch.dtype] = [
unified_dtype_converter(
self.engine.get_binding_dtype(idx), Frameworks.TORCH
)
for idx in self.hidden_output_binding_indices_in_order
]
self.hidden_output_shapes = [
- tuple(self.engine.get_binding_shape(idx))
- if self.engine.has_implicit_batch_dimension
- else tuple()
+ (
+ tuple(self.engine.get_binding_shape(idx))
+ if self.engine.has_implicit_batch_dimension
+ else tuple()
+ )
for idx in self.hidden_output_binding_indices_in_order
]
def _check_initialized(self):
if not self.initialized:
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/ts/_compile_spec.py 2024-03-06 09:54:46.481073+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/ts/_compile_spec.py 2024-03-06 09:56:38.464658+00:00
@@ -404,13 +404,13 @@
"inputs": inputs if inputs is not None else [],
# "input_signature": input_signature,
"device": device,
"disable_tf32": disable_tf32, # Force FP32 layers to use traditional as FP32 format vs the default behavior of rounding the inputs to 10-bit mantissas before multiplying, but accumulates the sum using 23-bit mantissas
"sparse_weights": sparse_weights, # Enable sparsity for convolution and fully connected layers.
- "enabled_precisions": enabled_precisions
- if enabled_precisions is not None
- else set(), # Enabling FP16 kernels
+ "enabled_precisions": (
+ enabled_precisions if enabled_precisions is not None else set()
+ ), # Enabling FP16 kernels
"refit": refit, # enable refit
"debug": debug, # enable debuggable engine
"capability": capability, # Restrict kernel selection to safe gpu kernels or safe dla kernels
"num_avg_timing_iters": num_avg_timing_iters, # Number of averaging timing iterations used to select kernels
"workspace_size": workspace_size, # Maximum size of workspace given to TensorRT
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are some changes that do not conform to Python style guidelines:
--- /home/runner/work/TensorRT/TensorRT/examples/int8/training/vgg16/vgg16.py 2024-03-06 10:00:58.135756+00:00
+++ /home/runner/work/TensorRT/TensorRT/examples/int8/training/vgg16/vgg16.py 2024-03-06 10:02:48.703582+00:00
@@ -1,10 +1,11 @@
"""
# Reference
- [Very Deep Convolutional Networks for Large-Scale Image Recognition](
https://arxiv.org/abs/1409.1556) (ICLR 2015)
"""
+
import torch
import torch.nn as nn
import torch.nn.functional as F
from functools import reduce
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/_Device.py 2024-03-06 10:00:58.139756+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/_Device.py 2024-03-06 10:02:48.808146+00:00
@@ -30,16 +30,18 @@
gpu_id (int): Device ID for target GPU
dla_core (int): Core ID for target DLA core
allow_gpu_fallback (bool): Whether falling back to GPU if DLA cannot support an op should be allowed
"""
- device_type: Optional[
- trt.DeviceType
- ] = None #: Target device type (GPU or DLA). Set implicitly based on if dla_core is specified.
+ device_type: Optional[trt.DeviceType] = (
+ None #: Target device type (GPU or DLA). Set implicitly based on if dla_core is specified.
+ )
gpu_id: int = -1 #: Device ID for target GPU
dla_core: int = -1 #: Core ID for target DLA core
- allow_gpu_fallback: bool = False #: Whether falling back to GPU if DLA cannot support an op should be allowed
+ allow_gpu_fallback: bool = (
+ False #: Whether falling back to GPU if DLA cannot support an op should be allowed
+ )
def __init__(self, *args: Any, **kwargs: Any):
"""__init__ Method for torch_tensorrt.Device
Device accepts one of a few construction patterns
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/_Input.py 2024-03-06 10:00:58.139756+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/_Input.py 2024-03-06 10:02:49.002821+00:00
@@ -26,16 +26,16 @@
class _ShapeMode(Enum):
STATIC = 0
DYNAMIC = 1
- shape_mode: Optional[
- _ShapeMode
- ] = None #: Is input statically or dynamically shaped
- shape: Optional[
- Tuple[int, ...] | Dict[str, Tuple[int, ...]]
- ] = None #: Either a single Tuple or a dict of tuples defining the input shape. Static shaped inputs will have a single tuple. Dynamic inputs will have a dict of the form ``{ "min_shape": Tuple, "opt_shape": Tuple, "max_shape": Tuple }``
+ shape_mode: Optional[_ShapeMode] = (
+ None #: Is input statically or dynamically shaped
+ )
+ shape: Optional[Tuple[int, ...] | Dict[str, Tuple[int, ...]]] = (
+ None #: Either a single Tuple or a dict of tuples defining the input shape. Static shaped inputs will have a single tuple. Dynamic inputs will have a dict of the form ``{ "min_shape": Tuple, "opt_shape": Tuple, "max_shape": Tuple }``
+ )
dtype: _enums.dtype = (
_enums.dtype.unknown
) #: The expected data type of the input tensor (default: torch_tensorrt.dtype.float32)
_explicit_set_dtype: bool = False
format: _enums.TensorFormat = (
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_compiler.py 2024-03-06 10:00:58.139756+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/_compiler.py 2024-03-06 10:02:49.124838+00:00
@@ -215,13 +215,13 @@
"precision": precision,
"debug": debug,
"device": device,
"workspace_size": workspace_size,
"min_block_size": min_block_size,
- "torch_executed_ops": torch_executed_ops
- if torch_executed_ops is not None
- else set(),
+ "torch_executed_ops": (
+ torch_executed_ops if torch_executed_ops is not None else set()
+ ),
"pass_through_build_failures": pass_through_build_failures,
"max_aux_streams": max_aux_streams,
"version_compatible": version_compatible,
"optimization_level": optimization_level,
"use_python_runtime": use_python_runtime,
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py 2024-03-06 10:00:58.143756+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py 2024-03-06 10:02:49.259904+00:00
@@ -26,13 +26,13 @@
from packaging import version
_LOGGER: logging.Logger = logging.getLogger(__name__)
-TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[
- Callable[[torch.fx.GraphModule], None]
-] = Observer("TRT_INTERPRETER_CALL_PRE_OBSERVER")
+TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[Callable[[torch.fx.GraphModule], None]] = (
+ Observer("TRT_INTERPRETER_CALL_PRE_OBSERVER")
+)
class UnsupportedOperatorException(RuntimeError):
pass
@@ -90,13 +90,13 @@
self.input_specs_iter = 0
self._cur_node_name: Optional[str] = None
self._cur_node: Optional[torch.fx.Node] = None
self._input_names: List[str] = []
self._output_names: List[str] = []
- self._itensor_to_tensor_meta: Dict[
- trt.tensorrt.ITensor, TensorMetadata
- ] = dict()
+ self._itensor_to_tensor_meta: Dict[trt.tensorrt.ITensor, TensorMetadata] = (
+ dict()
+ )
self.compilation_settings = compilation_settings
# Data types for TRT Module output Tensors
self.output_dtypes = output_dtypes
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/converter_utils.py 2024-03-06 10:00:58.143756+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/conversion/converter_utils.py 2024-03-06 10:02:49.328998+00:00
@@ -322,17 +322,15 @@
else:
raise AssertionError(f"Cannot convert {input_val} to TRT constant")
@overload
-def get_positive_dim(dim: int, dim_size: int) -> int:
- ...
+def get_positive_dim(dim: int, dim_size: int) -> int: ...
@overload
-def get_positive_dim(dim: Sequence[int], dim_size: int) -> Tuple[int, ...]:
- ...
+def get_positive_dim(dim: Sequence[int], dim_size: int) -> Tuple[int, ...]: ...
def get_positive_dim(
dim: Union[int, Sequence[int]], dim_size: int
) -> Union[int, Tuple[int, ...]]:
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/lower_linear.py 2024-03-06 10:00:58.143756+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/lower_linear.py 2024-03-06 10:02:49.703324+00:00
@@ -20,16 +20,14 @@
logger.debug(f"Graph after lowering linear:\n{gm.graph}")
return gm
-def linear_replacement() -> (
- Tuple[
- torch.fx.GraphModule,
- Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor],
- ]
-):
+def linear_replacement() -> Tuple[
+ torch.fx.GraphModule,
+ Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor],
+]:
"""Constructs the original and replacement functions for linear"""
# Original graph
def orig(
input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/_decomposition_groups.py 2024-03-06 10:00:58.143756+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/_decomposition_groups.py 2024-03-06 10:02:49.708249+00:00
@@ -5,13 +5,13 @@
from torch._decomp import get_decompositions as get_torch_decompositions
from torch._ops import OpOverload, OpOverloadPacket
aten = torch.ops.aten
-_core_aten_decompositions: Dict[
- OpOverload, Callable[[Any], Any]
-] = core_aten_decompositions()
+_core_aten_decompositions: Dict[OpOverload, Callable[[Any], Any]] = (
+ core_aten_decompositions()
+)
torch_enabled_decompositions: Set[Union[OpOverload, OpOverloadPacket]] = {
aten._adaptive_avg_pool2d_backward,
aten.addcdiv,
aten.addcdiv_,
aten.addcmul,
@@ -178,13 +178,13 @@
torch_disabled_decompositions: Set[Union[OpOverload, OpOverloadPacket]] = {
aten._softmax.default,
}
-ENABLED_TORCH_DECOMPOSITIONS: Dict[
- OpOverload, Callable[[Any], Any]
-] = get_torch_decompositions(torch_enabled_decompositions)
+ENABLED_TORCH_DECOMPOSITIONS: Dict[OpOverload, Callable[[Any], Any]] = (
+ get_torch_decompositions(torch_enabled_decompositions)
+)
TORCH_TRT_DECOMPOSITIONS: Dict[OpOverload, Callable[[Any], Any]] = {}
def check_decomp_set_invariants() -> None:
"""Validates no overlap between enabled and disabled decomposition sets"""
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/view_to_reshape.py 2024-03-06 10:00:58.143756+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/view_to_reshape.py 2024-03-06 10:02:49.740461+00:00
@@ -20,16 +20,14 @@
logger.debug(f"Graph after replacing view with reshape:\n{gm.graph}")
return gm
-def view_replacement() -> (
- Tuple[
- torch.fx.GraphModule,
- Callable[[torch.Tensor, List[torch.SymInt]], torch.Tensor],
- ]
-):
+def view_replacement() -> Tuple[
+ torch.fx.GraphModule,
+ Callable[[torch.Tensor, List[torch.SymInt]], torch.Tensor],
+]:
"""Constructs the original and replacement functions for view"""
# Original graph
def orig(input: torch.Tensor, shape: List[torch.SymInt]) -> torch.Tensor:
return torch.ops.aten.view.default(input, shape)
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/lower_scaled_dot_product_attention.py 2024-03-06 10:00:58.143756+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/lowering/passes/lower_scaled_dot_product_attention.py 2024-03-06 10:02:49.764258+00:00
@@ -58,16 +58,14 @@
logger.debug(f"Graph after lowering scaled dot product attention:\n{gm.graph}")
return gm
-def scaled_dot_product_attention_replacement() -> (
- Tuple[
- Sequence[Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor]],
- Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor],
- ]
-):
+def scaled_dot_product_attention_replacement() -> Tuple[
+ Sequence[Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor]],
+ Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor],
+]:
"""Constructs the original and replacement functions for efficient attention"""
# Efficient Attention original graph
def efficient(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor) -> torch.Tensor:
outputs = torch.ops.aten._scaled_dot_product_efficient_attention.default(
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py 2024-03-06 10:00:58.147756+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py 2024-03-06 10:02:50.004781+00:00
@@ -99,25 +99,29 @@
self.engine.get_binding_dtype(idx), Frameworks.TORCH
)
for idx in self.output_binding_indices_in_order
]
self.output_shapes = [
- tuple(self.engine.get_binding_shape(idx))
- if self.engine.has_implicit_batch_dimension
- else tuple()
+ (
+ tuple(self.engine.get_binding_shape(idx))
+ if self.engine.has_implicit_batch_dimension
+ else tuple()
+ )
for idx in self.output_binding_indices_in_order
]
self.hidden_output_dtypes = [
unified_dtype_converter(
self.engine.get_binding_dtype(idx), Frameworks.TORCH
)
for idx in self.hidden_output_binding_indices_in_order
]
self.hidden_output_shapes = [
- tuple(self.engine.get_binding_shape(idx))
- if self.engine.has_implicit_batch_dimension
- else tuple()
+ (
+ tuple(self.engine.get_binding_shape(idx))
+ if self.engine.has_implicit_batch_dimension
+ else tuple()
+ )
for idx in self.hidden_output_binding_indices_in_order
]
def _check_initialized(self) -> None:
if not self.initialized:
@@ -165,13 +169,15 @@
self.__dict__.update(state)
if self.engine:
self.context = self.engine.create_execution_context()
def forward(self, *inputs: torch.Tensor) -> torch.Tensor | Tuple[torch.Tensor, ...]:
- with torch.autograd.profiler.record_function(
- "PythonTorchTensorRTModule:Forward"
- ) if self.profiling_enabled else nullcontext():
+ with (
+ torch.autograd.profiler.record_function("PythonTorchTensorRTModule:Forward")
+ if self.profiling_enabled
+ else nullcontext()
+ ):
self._check_initialized()
# If in safe mode, check at each iteration for for whether a switch is required
if (
torch_tensorrt.runtime.multi_device_safe_mode._PY_RT_MULTI_DEVICE_SAFE_MODE
@@ -198,13 +204,17 @@
torch.cuda.set_device(device_id)
inputs = tuple([tensor.to(device) for tensor in inputs])
logger.warning(f"Moved all input Tensors to cuda:{device_id}")
- with torch.autograd.profiler.record_function(
- "PythonTorchTensorRTModule:ProcessInputs"
- ) if self.profiling_enabled else nullcontext():
+ with (
+ torch.autograd.profiler.record_function(
+ "PythonTorchTensorRTModule:ProcessInputs"
+ )
+ if self.profiling_enabled
+ else nullcontext()
+ ):
assert len(inputs) == len(
self.input_names
), f"Wrong number of inputs, expect {len(self.input_names)} get {len(inputs)}."
contiguous_inputs: List[torch.Tensor] = [i.contiguous() for i in inputs]
@@ -237,13 +247,17 @@
self.context.set_binding_shape(
idx, tuple(contiguous_inputs[i].shape)
)
- with torch.autograd.profiler.record_function(
- "PythonTorchTensorRTModule:ProcessOutputs"
- ) if self.profiling_enabled else nullcontext():
+ with (
+ torch.autograd.profiler.record_function(
+ "PythonTorchTensorRTModule:ProcessOutputs"
+ )
+ if self.profiling_enabled
+ else nullcontext()
+ ):
# create output tensors
outputs: List[torch.Tensor] = []
for i, idx in enumerate(self.output_binding_indices_in_order):
shape = tuple(self.context.get_binding_shape(idx))
@@ -264,13 +278,17 @@
dtype=self.hidden_output_dtypes[i],
device=torch.cuda.current_device(),
)
bindings[idx] = output.data_ptr()
- with torch.autograd.profiler.record_function(
- "PythonTorchTensorRTModule:TensorRTRuntime"
- ) if self.profiling_enabled else nullcontext():
+ with (
+ torch.autograd.profiler.record_function(
+ "PythonTorchTensorRTModule:TensorRTRuntime"
+ )
+ if self.profiling_enabled
+ else nullcontext()
+ ):
self.context.execute_async_v2(
bindings, torch.cuda.current_stream().cuda_stream
)
if len(outputs) == 1:
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/converters/aten_ops_converters.py 2024-03-06 10:00:58.147756+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/converters/aten_ops_converters.py 2024-03-06 10:02:50.330681+00:00
@@ -315,25 +315,21 @@
name: str,
) -> Union[TRTTensor, Sequence[TRTTensor]]:
kwargs_new = {
"input": args[0],
"kernel_size": args[1],
- "stride": args[2]
- if len(args) > 2
- else (None, None)
- if len(args[1]) == 2
- else (None, None, None),
- "padding": args[3]
- if len(args) > 3
- else (0, 0)
- if len(args[1]) == 2
- else (0, 0, 0),
- "dilation": args[4]
- if len(args) > 4
- else (1, 1)
- if len(args[1]) == 2
- else (1, 1, 1),
+ "stride": (
+ args[2]
+ if len(args) > 2
+ else (None, None) if len(args[1]) == 2 else (None, None, None)
+ ),
+ "padding": (
+ args[3] if len(args) > 3 else (0, 0) if len(args[1]) == 2 else (0, 0, 0)
+ ),
+ "dilation": (
+ args[4] if len(args) > 4 else (1, 1) if len(args[1]) == 2 else (1, 1, 1)
+ ),
"ceil_mode": args[5] if len(args) > 5 else False,
}
return acc_ops_converters.acc_ops_max_poolnd(
network, target, None, kwargs_new, name
)
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/fx2trt.py 2024-03-06 10:00:58.147756+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/fx2trt.py 2024-03-06 10:02:50.365042+00:00
@@ -19,13 +19,13 @@
from .observer import Observer
from .utils import get_dynamic_dims, LowerPrecision, unified_dtype_converter, Frameworks
_LOGGER: logging.Logger = logging.getLogger(__name__)
-TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[
- Callable[[torch.fx.GraphModule], None]
-] = Observer("TRT_INTERPRETER_CALL_PRE_OBSERVER")
+TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[Callable[[torch.fx.GraphModule], None]] = (
+ Observer("TRT_INTERPRETER_CALL_PRE_OBSERVER")
+)
class TRTInterpreterResult(NamedTuple):
engine: Any
input_names: Sequence[str]
@@ -73,13 +73,13 @@
self.input_specs_iter = 0
self.validate_input_specs()
self._cur_node_name: Optional[str] = None
self._input_names: List[str] = []
self._output_names: List[str] = []
- self._itensor_to_tensor_meta: Dict[
- trt.tensorrt.ITensor, TensorMetadata
- ] = dict()
+ self._itensor_to_tensor_meta: Dict[trt.tensorrt.ITensor, TensorMetadata] = (
+ dict()
+ )
def validate_input_specs(self):
for shape, _, _, shape_ranges, has_batch_dim in self.input_specs:
if not self.network.has_implicit_batch_dimension:
assert (
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/lower.py 2024-03-06 10:00:58.147756+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/lower.py 2024-03-06 10:02:50.369382+00:00
@@ -124,25 +124,29 @@
interpreter = TRTInterpreter(
mod,
input_specs=self.lower_setting.input_specs,
explicit_batch_dimension=self.lower_setting.explicit_batch_dimension,
explicit_precision=self.lower_setting.explicit_precision,
- logger_level=trt.Logger.VERBOSE
- if self.lower_setting.verbose_log
- else trt.Logger.WARNING,
+ logger_level=(
+ trt.Logger.VERBOSE
+ if self.lower_setting.verbose_log
+ else trt.Logger.WARNING
+ ),
)
interp_result: TRTInterpreterResult = interpreter.run(
max_batch_size=self.lower_setting.max_batch_size,
max_workspace_size=self.lower_setting.max_workspace_size,
lower_precision=self.lower_setting.lower_precision,
strict_type_constraints=self.lower_setting.strict_type_constraints,
algorithm_selector=algo_selector,
timing_cache=cache_data,
- profiling_verbosity=trt.ProfilingVerbosity.DETAILED
- if self.lower_setting.verbose_profile
- else trt.ProfilingVerbosity.LAYER_NAMES_ONLY,
+ profiling_verbosity=(
+ trt.ProfilingVerbosity.DETAILED
+ if self.lower_setting.verbose_profile
+ else trt.ProfilingVerbosity.LAYER_NAMES_ONLY
+ ),
tactic_sources=self.lower_setting.tactic_sources,
)
# Update timing cache file if needed
timing_cache = interp_result.serialized_cache
@@ -295,14 +299,12 @@
module.half()
# A custom conversion function can be passed to the lowerer to
# handle inputs with custom types. By default, just handle
# tensors and NoneType.
if fp16_conversion_fn is None:
- conversion_fn = (
- lambda x: x.half()
- if x is not None and x.dtype == torch.float32
- else x
+ conversion_fn = lambda x: (
+ x.half() if x is not None and x.dtype == torch.float32 else x
)
else:
conversion_fn = fp16_conversion_fn
inputs = tuple(conversion_fn(x) for x in inputs)
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/lower_pass_manager_builder.py 2024-03-06 10:00:58.147756+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/lower_pass_manager_builder.py 2024-03-06 10:02:50.599448+00:00
@@ -194,13 +194,15 @@
lowering_start_time = datetime.datetime.now()
self.lower_setting.input_specs = generate_input_specs(
submod_inputs,
self.lower_setting,
- additional_submodule_inputs[submod_name]
- if additional_submodule_inputs
- else None,
+ (
+ additional_submodule_inputs[submod_name]
+ if additional_submodule_inputs
+ else None
+ ),
)
lowered_module = self._lower_func(
submod, submod_inputs, self.lower_setting, submod_name
)
setattr(split_result.split_module, submod_name, lowered_module)
@@ -234,13 +236,15 @@
if not submod_name.startswith(split_result.non_acc_submodule_prefix):
_LOGGER.info(f"ACC submodule graph: {submod.graph}")
lowering_start_time = datetime.datetime.now()
self.lower_setting.additional_inputs = (
- additional_submodule_inputs[submod_name]
- if additional_submodule_inputs
- else None,
+ (
+ additional_submodule_inputs[submod_name]
+ if additional_submodule_inputs
+ else None
+ ),
)
lowered_module = self._lower_func(
submod, submod_inputs, self.lower_setting, submod_name
)
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/pass_utils.py 2024-03-06 10:00:58.147756+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/pass_utils.py 2024-03-06 10:02:50.817813+00:00
@@ -193,13 +193,11 @@
kwargs2 = {"equal_nan": True}
if rtol:
kwargs2["rtol"] = rtol
if atol:
kwargs2["atol"] = atol
- kwargs2[
- "msg"
- ] = (
+ kwargs2["msg"] = (
lambda msg: f"Pass {pass_} failed correctness check due at output {kk}:\n{msg}"
)
# If tensors are on different devices, make sure to compare
# their copies that are on the same device.
if x.get_device() != y.get_device():
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/lower_basic_pass.py 2024-03-06 10:00:58.147756+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/passes/lower_basic_pass.py 2024-03-06 10:02:50.870753+00:00
@@ -536,13 +536,13 @@
reshape_batch_size: Optional[fx.Node] = get_reshape_batch_size_as_node(
maybe_reshape
)
if not reshape_batch_size:
continue
- reshape_batch_size_inferred_source: Optional[
- fx.Node
- ] = get_reshape_batch_size_inferred_source(reshape_batch_size)
+ reshape_batch_size_inferred_source: Optional[fx.Node] = (
+ get_reshape_batch_size_inferred_source(reshape_batch_size)
+ )
if not reshape_batch_size_inferred_source:
continue
reshape_input: fx.Node = maybe_reshape.kwargs["input"]
if reshape_input == reshape_batch_size_inferred_source:
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/test/converters/acc_op/test_split.py 2024-03-06 10:00:58.151756+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/test/converters/acc_op/test_split.py 2024-03-06 10:02:51.279303+00:00
@@ -21,13 +21,15 @@
inputs = [torch.randn(1, 10)]
self.run_test(
Split(),
inputs,
expected_ops={
- acc_ops.split
- if isinstance(split_size_or_sections, int)
- else acc_ops.slice_tensor
+ (
+ acc_ops.split
+ if isinstance(split_size_or_sections, int)
+ else acc_ops.slice_tensor
+ )
},
test_explicit_batch_dim=False,
)
@parameterized.expand(
@@ -68,13 +70,15 @@
]
self.run_test_with_dynamic_shape(
Split(),
input_specs,
expected_ops={
- acc_ops.split
- if isinstance(split_size_or_sections, int)
- else acc_ops.slice_tensor
+ (
+ acc_ops.split
+ if isinstance(split_size_or_sections, int)
+ else acc_ops.slice_tensor
+ )
},
)
# Testing with (-1, -1, -1) results into following error:
# AssertionError: Can't chunk on dynamic shape dimension!
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/tools/common_fx2trt.py 2024-03-06 10:00:58.155756+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/tools/common_fx2trt.py 2024-03-06 10:02:51.961775+00:00
@@ -152,13 +152,13 @@
mod.eval()
if len(expected_ops):
self.assert_has_op(mod, expected_ops)
interpreter_result = interpreter.run(
- lower_precision=LowerPrecision.FP16
- if fp16_mode
- else LowerPrecision.FP32
+ lower_precision=(
+ LowerPrecision.FP16 if fp16_mode else LowerPrecision.FP32
+ )
)
trt_mod = TRTModule(
interpreter_result.engine,
interpreter_result.input_names,
interpreter_result.output_names,
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/trt_module.py 2024-03-06 10:00:58.155756+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/fx/trt_module.py 2024-03-06 10:02:52.302469+00:00
@@ -67,25 +67,29 @@
self.engine.get_binding_dtype(idx), Frameworks.TORCH
)
for idx in self.output_binding_indices_in_order
]
self.output_shapes = [
- tuple(self.engine.get_binding_shape(idx))
- if self.engine.has_implicit_batch_dimension
- else tuple()
+ (
+ tuple(self.engine.get_binding_shape(idx))
+ if self.engine.has_implicit_batch_dimension
+ else tuple()
+ )
for idx in self.output_binding_indices_in_order
]
self.hidden_output_dtypes: Sequence[torch.dtype] = [
unified_dtype_converter(
self.engine.get_binding_dtype(idx), Frameworks.TORCH
)
for idx in self.hidden_output_binding_indices_in_order
]
self.hidden_output_shapes = [
- tuple(self.engine.get_binding_shape(idx))
- if self.engine.has_implicit_batch_dimension
- else tuple()
+ (
+ tuple(self.engine.get_binding_shape(idx))
+ if self.engine.has_implicit_batch_dimension
+ else tuple()
+ )
for idx in self.hidden_output_binding_indices_in_order
]
def _check_initialized(self):
if not self.initialized:
--- /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/ts/_compile_spec.py 2024-03-06 10:00:58.155756+00:00
+++ /home/runner/work/TensorRT/TensorRT/py/torch_tensorrt/ts/_compile_spec.py 2024-03-06 10:02:52.569300+00:00
@@ -404,13 +404,13 @@
"inputs": inputs if inputs is not None else [],
# "input_signature": input_signature,
"device": device,
"disable_tf32": disable_tf32, # Force FP32 layers to use traditional as FP32 format vs the default behavior of rounding the inputs to 10-bit mantissas before multiplying, but accumulates the sum using 23-bit mantissas
"sparse_weights": sparse_weights, # Enable sparsity for convolution and fully connected layers.
- "enabled_precisions": enabled_precisions
- if enabled_precisions is not None
- else set(), # Enabling FP16 kernels
+ "enabled_precisions": (
+ enabled_precisions if enabled_precisions is not None else set()
+ ), # Enabling FP16 kernels
"refit": refit, # enable refit
"debug": debug, # enable debuggable engine
"capability": capability, # Restrict kernel selection to safe gpu kernels or safe dla kernels
"num_avg_timing_iters": num_avg_timing_iters, # Number of averaging timing iterations used to select kernels
"workspace_size": workspace_size, # Maximum size of workspace given to TensorRT
a42dcbf
to
279f00b
Compare
279f00b
to
975ce31
Compare
if not isinstance(input, TRTTensor): | ||
raise RuntimeError( | ||
f"scatter_tensor received input {input} that is not part " | ||
"of the TensorRT region!" | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can remove in favor of enforce_tensor_types
decorator on converter
if not (isinstance(index, TRTTensor)): | ||
index = get_trt_tensor(ctx, index, f"_index_tensor") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can also add this input to enforce_tensor_types
and avoid this call
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This will be required, since index
can be a constant too.
if len(input_shape) != len(index_shape): | ||
raise RuntimeError(f"The no of dimensions of input and index should be equal") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If this would throw an error in Torch, it can be omitted here; Torch would have caught it already
dynamic_shape = has_dynamic_shape(input.shape) | ||
if dynamic_shape: | ||
# Check whether slice target dim is dynamic shape dim | ||
assert input.shape[dim] != -1, "Can't scatter on negative shape dimension!" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be checked in a converter validator, since it would otherwise cause a model failure
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should it be a converter validator? Since a validator would be some feature of the converter such that it will be supported later? But this is not so in this case.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In this case, even if there is no way to support the feature later, the check should still be in the validator so that we can fall back to Torch if this behavior is not supported
input_dims = len(input_shape) | ||
for i in range(0, input_dims): | ||
if i != dim and (index_shape[i] >= input.shape[i]): | ||
raise RuntimeError( | ||
f"cannot have index size greater than the input size along dimension {dim}" | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this cause an error in Torch? If so, it can be omitted, otherwise, it should go in the validator
@dynamo_tensorrt_converter(torch.ops.aten.scatter.value) | ||
def aten_ops_scatter_value( | ||
ctx: ConversionContext, | ||
target: Target, | ||
args: Tuple[Argument, ...], | ||
kwargs: Dict[str, Argument], | ||
name: str, | ||
) -> Union[TRTTensor, Sequence[TRTTensor]]: | ||
return impl.select.scatter_value( | ||
ctx, target, SourceIR.ATEN, name, args[0], args[1], args[2], args[3] | ||
) | ||
|
||
|
||
@dynamo_tensorrt_converter(torch.ops.aten.scatter.src) | ||
def aten_ops_scatter_src( | ||
ctx: ConversionContext, | ||
target: Target, | ||
args: Tuple[Argument, ...], | ||
kwargs: Dict[str, Argument], | ||
name: str, | ||
) -> Union[TRTTensor, Sequence[TRTTensor]]: | ||
return impl.select.scatter_src( | ||
ctx, target, SourceIR.ATEN, name, args[0], args[1], args[2], args[3] | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems the main difference between impl.select.scatter_src
and impl.select.scatter_value
is the casting of the value
tensor. Could these functions (converters and implementation) be merged to a single function that can either accept float
or TRTTensor
, and differentiates accordingly?
a745362
to
aea9d94
Compare
aea9d94
to
c4de771
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are some changes that do not conform to Python style guidelines:
--- /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/conversion/harness.py 2024-05-02 17:33:02.444196+00:00
+++ /home/runner/work/TensorRT/TensorRT/tests/py/dynamo/conversion/harness.py 2024-05-02 17:34:57.877044+00:00
@@ -257,11 +257,11 @@
list(trt_inputs[:num_input])
+ [
input.to(dtype_32bit),
]
+ list(trt_inputs[num_input + 1 :])
- )
+ )
trt_input_specs = [Input.from_tensor(i) for i in trt_inputs]
input_specs = [Input.from_tensor(i) for i in inputs]
output_dtypes = None
The
|
52e552f
to
3844d35
Compare
@@ -289,7 +314,7 @@ def run_test_compare_tensor_attributes_only( | |||
# We replicate this behavior here | |||
compilation_settings = CompilationSettings( | |||
enabled_precisions={dtype._from(precision)}, | |||
truncate_long_and_double=True, | |||
truncate_and_double=True, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
truncate_double
0f33bbe
to
9ca3f62
Compare
9ca3f62
to
19049bc
Compare
…in supported in TRT10
…es conversion to int32 in TRT10 for tests
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Overall looks good - added a few suggestions
if isinstance(index, torch.Tensor): | ||
if index.dtype == torch.int64: | ||
index = index.to(torch.int32) | ||
elif isinstance(index, np.ndarray): | ||
if index.dtype == np.int64: | ||
index = index.astype(np.int32) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since TRT supports int64
now, is this still necessary?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If not, the enforce_tensor_types
could be used to autocast any input to an ITensor
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like in the scatter converter layer in TRT still needs int32 since it complains of
ERROR:torch_tensorrt [TensorRT Conversion Context]:4: [SCATTER]-[aten_ops.scatter.value]-[scatter_value_scatter_layer]: Indices tensor must be Int32.
It remains a case where torch needs int64 while scatter layer in TRT needs int32. Earlier this was handled in get_trt_tensor
due to TensorRT requirement, but not anymore.
As for enforce_tensor_types
it would take care of if the input is TRTTensor, torch.Tensor or np.ndarray and cast it otherwise if promote is true. In this case we want to cast to int64 to int32 as well. Ok so I will replace this with enforce_tensor_type
and then use cast_trt_tensor
dynamic_shape = has_dynamic_shape(input.shape) | ||
if dynamic_shape: | ||
# Check whether slice target dim is dynamic shape dim | ||
assert input.shape[dim] != -1, "Can't scatter on negative shape dimension!" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This can be removed, and should be addressed by #2796
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So after this PR is merged the above would need to be addressed accordingly?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I believe the above would have to be addressed in a future PR which adds dynamic shape support for this converter. With #2796, this converter would be registered as static-only by default
# scatter.value | ||
if isinstance(src, int) or isinstance(src, float): | ||
src_tensor = get_trt_tensor( | ||
ctx, src * torch.ones(index_shape_list), name + "_value_tensor" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is preferred to use np.ones
here to avoid FakeTensor issues
@@ -1,5 +1,4 @@ | |||
# type: ignore | |||
|
|||
import copy |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this import used?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No will remove
@@ -21,6 +21,7 @@ | |||
set_layer_name, | |||
) | |||
from torch_tensorrt.fx.types import Shape, TRTTensor | |||
from torch_tensorrt.fx.utils import Frameworks, unified_dtype_converter |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will remove this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added a small comment, otherwise looks good pending CI
if index.dtype == trt.int64: | ||
index = cast_trt_tensor(ctx, index, trt.int32, name + "_cast_index_tensor") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the index must be trt.int32
and no other types (trt.float32
, trt.float16
, etc.) are acceptable, then it is fine to remove the if
statement, as the cast_trt_tensor
function will not insert a cast if the type is already int32
, as here:
if input_val.dtype != trt_dtype: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be required for trt.int64
cases, which will be the case in our test cases since torch requires int64 inputs.
2d2d214
to
487906d
Compare
Dependency of PR- #2519