diff --git a/py/torch_tensorrt/dynamo/backend/__init__.py b/py/torch_tensorrt/dynamo/backend/__init__.py index a096015a60..7b7e42f619 100644 --- a/py/torch_tensorrt/dynamo/backend/__init__.py +++ b/py/torch_tensorrt/dynamo/backend/__init__.py @@ -4,7 +4,7 @@ import torch_tensorrt from functools import partial -from typing import Any, Sequence +from typing import Any, Optional, Sequence from torch_tensorrt import EngineCapability, Device from torch_tensorrt.fx.utils import LowerPrecision @@ -17,6 +17,9 @@ WORKSPACE_SIZE, MIN_BLOCK_SIZE, PASS_THROUGH_BUILD_FAILURES, + MAX_AUX_STREAMS, + VERSION_COMPATIBLE, + OPTIMIZATION_LEVEL, USE_EXPERIMENTAL_RT, ) @@ -46,6 +49,9 @@ def compile( min_block_size=MIN_BLOCK_SIZE, torch_executed_ops=[], torch_executed_modules=[], + max_aux_streams=MAX_AUX_STREAMS, + version_compatible=VERSION_COMPATIBLE, + optimization_level=OPTIMIZATION_LEVEL, use_experimental_rt=USE_EXPERIMENTAL_RT, **kwargs, ): @@ -95,6 +101,9 @@ def compile( workspace_size=workspace_size, min_block_size=min_block_size, torch_executed_ops=torch_executed_ops, + max_aux_streams=max_aux_streams, + version_compatible=version_compatible, + optimization_level=optimization_level, use_experimental_rt=use_experimental_rt, **kwargs, ) @@ -119,6 +128,9 @@ def create_backend( min_block_size: int = MIN_BLOCK_SIZE, torch_executed_ops: Sequence[str] = set(), pass_through_build_failures: bool = PASS_THROUGH_BUILD_FAILURES, + max_aux_streams: Optional[int] = MAX_AUX_STREAMS, + version_compatible: bool = VERSION_COMPATIBLE, + optimization_level: Optional[int] = OPTIMIZATION_LEVEL, use_experimental_rt: bool = USE_EXPERIMENTAL_RT, **kwargs, ): @@ -131,6 +143,10 @@ def create_backend( min_block_size: Minimum number of operators per TRT-Engine Block torch_executed_ops: Sequence of operations to run in Torch, regardless of converter coverage pass_through_build_failures: Whether to fail on TRT engine build errors (True) or not (False) + max_aux_streams: Maximum number of allowed auxiliary TRT streams for each engine + version_compatible: Provide version forward-compatibility for engine plan files + optimization_level: Builder optimization 0-5, higher levels imply longer build time, + searching for more optimization options. TRT defaults to 3 use_experimental_rt: Whether to use the new experimental TRTModuleNext for TRT engines Returns: Backend for torch.compile @@ -145,6 +161,9 @@ def create_backend( min_block_size=min_block_size, torch_executed_ops=torch_executed_ops, pass_through_build_failures=pass_through_build_failures, + max_aux_streams=max_aux_streams, + version_compatible=version_compatible, + optimization_level=optimization_level, use_experimental_rt=use_experimental_rt, ) diff --git a/py/torch_tensorrt/dynamo/backend/_defaults.py b/py/torch_tensorrt/dynamo/backend/_defaults.py index 51e43f8ab2..286c60c2fa 100644 --- a/py/torch_tensorrt/dynamo/backend/_defaults.py +++ b/py/torch_tensorrt/dynamo/backend/_defaults.py @@ -6,4 +6,7 @@ WORKSPACE_SIZE = 0 MIN_BLOCK_SIZE = 5 PASS_THROUGH_BUILD_FAILURES = False +MAX_AUX_STREAMS = None +VERSION_COMPATIBLE = False +OPTIMIZATION_LEVEL = None USE_EXPERIMENTAL_RT = False diff --git a/py/torch_tensorrt/dynamo/backend/_settings.py b/py/torch_tensorrt/dynamo/backend/_settings.py index 7ba58bf9f2..7ec4cc596e 100644 --- a/py/torch_tensorrt/dynamo/backend/_settings.py +++ b/py/torch_tensorrt/dynamo/backend/_settings.py @@ -1,5 +1,5 @@ from dataclasses import dataclass, field -from typing import Sequence +from typing import Optional, Sequence from torch_tensorrt.fx.utils import LowerPrecision from torch_tensorrt.dynamo.backend._defaults import ( @@ -8,6 +8,9 @@ WORKSPACE_SIZE, MIN_BLOCK_SIZE, PASS_THROUGH_BUILD_FAILURES, + MAX_AUX_STREAMS, + VERSION_COMPATIBLE, + OPTIMIZATION_LEVEL, USE_EXPERIMENTAL_RT, ) @@ -20,4 +23,7 @@ class CompilationSettings: min_block_size: int = MIN_BLOCK_SIZE torch_executed_ops: Sequence[str] = field(default_factory=set) pass_through_build_failures: bool = PASS_THROUGH_BUILD_FAILURES + max_aux_streams: Optional[int] = MAX_AUX_STREAMS + version_compatible: bool = VERSION_COMPATIBLE + optimization_level: Optional[int] = OPTIMIZATION_LEVEL use_experimental_rt: bool = USE_EXPERIMENTAL_RT diff --git a/py/torch_tensorrt/dynamo/backend/conversion.py b/py/torch_tensorrt/dynamo/backend/conversion.py index b7927ca95f..46388255bf 100644 --- a/py/torch_tensorrt/dynamo/backend/conversion.py +++ b/py/torch_tensorrt/dynamo/backend/conversion.py @@ -42,6 +42,9 @@ def convert_module( if settings.debug else trt.ProfilingVerbosity.LAYER_NAMES_ONLY ), + max_aux_streams=settings.max_aux_streams, + version_compatible=settings.version_compatible, + optimization_level=settings.optimization_level, ) if settings.use_experimental_rt: