Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Wrap dynamic size handling in a compilation flag #1851

Merged
merged 2 commits into from
May 15, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions core/conversion/conversionctx/ConversionCtx.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ struct BuilderSettings {
bool refit = false;
bool debug = false;
bool truncate_long_and_double = false;
bool allow_shape_tensors = false;
ir::Device device;
nvinfer1::EngineCapability capability = TRT_ENGINE_CAPABILITY_STANDARD;
nvinfer1::IInt8Calibrator* calibrator = nullptr;
Expand Down
12 changes: 10 additions & 2 deletions core/conversion/evaluators/aten.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,11 @@ auto aten_registrations TORCHTRT_UNUSED =
if (tensor_var.isITensor()) {
auto tensor = tensor_var.ITensor();
if (ctx->input_is_dynamic) {
return dynamic_size_layer(ctx, n, args);
if (ctx->settings.allow_shape_tensors) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should be checking if input is dynamic, shape tensors are enabled and the input contains a placeholder dimension before using the shape tensor code path. Otherwise static aten size is sufficient and will cause less errors

return dynamic_size_layer(ctx, n, args);
} else {
LOG_WARNING("There may be undefined behavior using dynamic shape and aten::size ");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this undefined behavior due to using aten::size with dynamic shape without shape tensors or the other way around. Might need rewording

}
}
return util::toVec(tensor->getDimensions());
} else if (tensor_var.IValue()->isTensor()) {
Expand All @@ -286,7 +290,11 @@ auto aten_registrations TORCHTRT_UNUSED =
auto dim = args.at(n->input(1)).unwrapToInt();
if (tensor_var.isITensor()) {
if (ctx->input_is_dynamic) {
return dynamic_size_layer(ctx, n, args);
if (ctx->settings.allow_shape_tensors) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comments here from above about conditions and warnings

return dynamic_size_layer(ctx, n, args);
} else {
LOG_WARNING("There may be undefined behavior using dynamic shape and aten::size ");
}
}
auto tensor = tensor_var.ITensor();
auto dims = util::toVec(tensor->getDimensions());
Expand Down
32 changes: 25 additions & 7 deletions core/conversion/evaluators/eval_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ nvinfer1::ITensor* index_layer(
c10::IValue dynamic_size_layer(ConversionCtx* ctx, const torch::jit::Node* n, kwargs& args) {
LOG_DEBUG("Using dynamic version of aten::size evaluator");
auto in = args.at(n->input(0)).ITensorOrFreeze(ctx);
LOG_DEBUG("Input dimensions: " << in->getDimensions());
auto input_dims = in->getDimensions();
LOG_DEBUG("Input dimensions: " << input_dims);

auto shape_layer = ctx->net->addShape(*in);
TORCHTRT_CHECK(shape_layer, "Unable to create shape layer from node: " << *n);
auto shape_1d_tensor = shape_layer->getOutput(0);
Expand All @@ -44,15 +46,31 @@ c10::IValue dynamic_size_layer(ConversionCtx* ctx, const torch::jit::Node* n, kw
dim = dim < 0 ? dim + maxDim : dim;
LOG_DEBUG("Dimension to select: " << dim);
shape_1d_tensor = index_layer(ctx, n, shape_1d_tensor, dim);
}
LOG_DEBUG("Output tensor shape: " << shape_1d_tensor->getDimensions());

LOG_DEBUG("Output tensor shape: " << shape_1d_tensor->getDimensions());
auto tensor_holder = TensorContainer();
tensor_holder.hold_tensor(shape_1d_tensor);
auto shape_1d_ivalue = c10::IValue(std::move(c10::make_intrusive<TensorContainer>(tensor_holder)));

auto tensor_holder = TensorContainer();
tensor_holder.hold_tensor(shape_1d_tensor);
auto shape_1d_ivalue = c10::IValue(std::move(c10::make_intrusive<TensorContainer>(tensor_holder)));
return shape_1d_ivalue;

return shape_1d_ivalue;
} else {
auto input_size = c10::impl::GenericList(c10::AnyType::get());
// Only express the dynamic dimension with a shape layer output.
// The static dimensions are preserved in the input size.
for (int32_t i = 0; i < input_dims.nbDims; i++) {
if (input_dims.d[i] == -1) {
auto dynamic_dim_tensor = index_layer(ctx, n, shape_1d_tensor, i);
auto dynamic_dim_holder = TensorContainer();
dynamic_dim_holder.hold_tensor(dynamic_dim_tensor);
auto dynamic_dim_ivalue = c10::IValue(std::move(c10::make_intrusive<TensorContainer>(dynamic_dim_holder)));
input_size.emplace_back(std::move(dynamic_dim_ivalue));
} else {
input_size.emplace_back(input_dims.d[i]);
}
}
return c10::IValue(input_size);
}
}

int64_t normalizeIndex(int64_t idx, int64_t list_size) {
Expand Down
10 changes: 10 additions & 0 deletions cpp/bin/torchtrtc/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,12 @@ int main(int argc, char** argv) {
"Truncate weights that are provided in 64bit to 32bit (Long, Double to Int, Float)",
{"truncate", "truncate-long-double", "truncate-64bit"});

args::Flag allow_shape_tensors(
parser,
"allow-shape-tensors",
"(Experimental) Allow aten::size to output shape tensors using IShapeLayer in TensorRT",
{"allow-shape-tensors"});

args::Flag save_engine(
parser,
"save_engine",
Expand Down Expand Up @@ -443,6 +449,10 @@ int main(int argc, char** argv) {
compile_settings.truncate_long_and_double = true;
}

if (allow_shape_tensors) {
compile_settings.allow_shape_tensors = true;
}

torch::jit::Module mod;
try {
// Deserialize the ScriptModule from a file using torch::jit::load().
Expand Down
5 changes: 5 additions & 0 deletions cpp/include/torch_tensorrt/torch_tensorrt.h
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,11 @@ struct CompileSpec {
*/
bool truncate_long_and_double = false;

/**
* Allow shape tensors (from IShape layer) in the graph
*/
bool allow_shape_tensors = false;

/**
* Target Device
*/
Expand Down
1 change: 1 addition & 0 deletions cpp/src/compile_spec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ torchtrt::core::CompileSpec to_internal_compile_spec(CompileSpec external) {
internal.convert_info.engine_settings.refit = external.refit;
internal.convert_info.engine_settings.debug = external.debug;
internal.convert_info.engine_settings.truncate_long_and_double = external.truncate_long_and_double;
internal.convert_info.engine_settings.allow_shape_tensors = external.allow_shape_tensors;
internal.convert_info.engine_settings.device.allow_gpu_fallback = external.device.allow_gpu_fallback;
internal.lower_info.target_device.allow_gpu_fallback = external.device.allow_gpu_fallback;
internal.partitioning_info.target_device.allow_gpu_fallback = external.device.allow_gpu_fallback;
Expand Down
1 change: 1 addition & 0 deletions py/torch_tensorrt/csrc/register_tensorrt_classes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ void RegisterTRTCompileSpec() {
TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, dla_global_dram_size);
ADD_FIELD_GET_SET_REGISTRATION(
TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, truncate_long_and_double);
ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, allow_shape_tensors);
}

struct TRTTSRegistrations {
Expand Down
2 changes: 2 additions & 0 deletions py/torch_tensorrt/csrc/tensorrt_classes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,7 @@ core::CompileSpec CompileSpec::toInternalCompileSpec() {
info.partitioning_info.truncate_long_and_double = truncate_long_and_double;
info.lower_info.forced_fallback_modules = torch_fallback.forced_fallback_modules;
info.convert_info.engine_settings.truncate_long_and_double = truncate_long_and_double;
info.convert_info.engine_settings.allow_shape_tensors = allow_shape_tensors;

info.convert_info.engine_settings.capability = toTRTEngineCapability(capability);
TORCHTRT_CHECK(num_avg_timing_iters >= 0, "num_avg_timing_iters must be 0 or greater");
Expand Down Expand Up @@ -423,6 +424,7 @@ std::string CompileSpec::stringify() {
ss << " \"DLA Local DRAM Size\": " << dla_local_dram_size << std::endl;
ss << " \"DLA Global DRAM Size\": " << dla_global_dram_size << std::endl;
ss << " \"Truncate long and double\": " << truncate_long_and_double << std::endl;
ss << " \"Allow Shape tensors\": " << allow_shape_tensors << std::endl;
ss << " \"Torch Fallback\": " << torch_fallback.to_str();
ss << "}";
return ss.str();
Expand Down
2 changes: 2 additions & 0 deletions py/torch_tensorrt/csrc/tensorrt_classes.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ struct CompileSpec : torch::CustomClassHolder {
ADD_FIELD_GET_SET(dla_local_dram_size, int64_t);
ADD_FIELD_GET_SET(dla_global_dram_size, int64_t);
ADD_FIELD_GET_SET(truncate_long_and_double, bool);
ADD_FIELD_GET_SET(allow_shape_tensors, bool);
ADD_FIELD_GET_SET(device, Device);
ADD_FIELD_GET_SET(torch_fallback, TorchFallback);
ADD_FIELD_GET_SET(ptq_calibrator, nvinfer1::IInt8Calibrator*);
Expand All @@ -180,6 +181,7 @@ struct CompileSpec : torch::CustomClassHolder {
bool refit = false;
bool debug = false;
bool truncate_long_and_double = false;
bool allow_shape_tensors = false;
Device device;
TorchFallback torch_fallback;
EngineCapability capability = EngineCapability::kDEFAULT;
Expand Down
3 changes: 2 additions & 1 deletion py/torch_tensorrt/csrc/torch_tensorrt_py.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,8 @@ PYBIND11_MODULE(_C, m) {
.def_readwrite("dla_local_dram_size", &CompileSpec::dla_local_dram_size)
.def_readwrite("dla_global_dram_size", &CompileSpec::dla_global_dram_size)
.def_readwrite("torch_fallback", &CompileSpec::torch_fallback)
.def_readwrite("truncate_long_and_double", &CompileSpec::truncate_long_and_double);
.def_readwrite("truncate_long_and_double", &CompileSpec::truncate_long_and_double)
.def_readwrite("allow_shape_tensors", &CompileSpec::allow_shape_tensors);

py::class_<TorchFallback>(ts_sub_mod, "TorchFallback")
.def(py::init<>())
Expand Down
8 changes: 8 additions & 0 deletions py/torch_tensorrt/ts/_compile_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,10 @@ def _parse_compile_spec(compile_spec_: Dict[str, Any]) -> _ts_C.CompileSpec:
assert isinstance(compile_spec["debug"], bool)
info.debug = compile_spec["debug"]

if "allow_shape_tensors" in compile_spec:
assert isinstance(compile_spec["allow_shape_tensors"], bool)
info.allow_shape_tensors = compile_spec["allow_shape_tensors"]

if "device" in compile_spec:
info.device = _parse_device(compile_spec["device"])

Expand Down Expand Up @@ -354,6 +358,7 @@ def TensorRTCompileSpec(
dla_global_dram_size=536870912,
truncate_long_and_double=False,
calibrator=None,
allow_shape_tensors=False,
) -> torch.classes.tensorrt.CompileSpec:
"""Utility to create a formated spec dictionary for using the PyTorch TensorRT backend

Expand Down Expand Up @@ -388,6 +393,7 @@ def TensorRTCompileSpec(
workspace_size (int): Maximum size of workspace given to TensorRT
truncate_long_and_double (bool): Truncate weights provided in int64 or double (float64) to int32 and float32
calibrator (Union(torch_tensorrt._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration
allow_shape_tensors: (Experimental) Allow aten::size to output shape tensors using IShapeLayer in TensorRT

Returns:
torch.classes.tensorrt.CompileSpec: List of methods and formated spec objects to be provided to ``torch._C._jit_to_tensorrt``
Expand All @@ -410,6 +416,7 @@ def TensorRTCompileSpec(
"dla_global_dram_size": dla_global_dram_size, # Host RAM used by DLA to store weights and metadata for execution
"calibrator": calibrator,
"truncate_long_and_double": truncate_long_and_double,
"allow_shape_tensors": allow_shape_tensors,
}

parsed_spec = _parse_compile_spec(compile_spec)
Expand Down Expand Up @@ -461,6 +468,7 @@ def TensorRTCompileSpec(
backend_spec._set_dla_local_dram_size(parsed_spec.dla_local_dram_size)
backend_spec._set_dla_global_dram_size(parsed_spec.dla_global_dram_size)
backend_spec._set_truncate_long_and_double(parsed_spec.truncate_long_and_double)
backend_spec._set_allow_shape_tensors(parsed_spec.allow_shape_tensors)
backend_spec._set_ptq_calibrator(parsed_spec._get_calibrator_handle())

return backend_spec
6 changes: 6 additions & 0 deletions py/torch_tensorrt/ts/_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def compile(
min_block_size=3,
torch_executed_ops=[],
torch_executed_modules=[],
allow_shape_tensors=False,
) -> torch.jit.ScriptModule:
"""Compile a TorchScript module for NVIDIA GPUs using TensorRT

Expand Down Expand Up @@ -94,6 +95,7 @@ def compile(
min_block_size (int): The minimum number of contiguous TensorRT convertable operations in order to run a set of operations in TensorRT
torch_executed_ops (List[str]): List of aten operators that must be run in PyTorch. An error will be thrown if this list is not empty but ``require_full_compilation`` is True
torch_executed_modules (List[str]): List of modules that must be run in PyTorch. An error will be thrown if this list is not empty but ``require_full_compilation`` is True
allow_shape_tensors: (Experimental) Allow aten::size to output shape tensors using IShapeLayer in TensorRT

Returns:
torch.jit.ScriptModule: Compiled TorchScript Module, when run it will execute via TensorRT
Expand Down Expand Up @@ -131,6 +133,7 @@ def compile(
"forced_fallback_modules": torch_executed_modules,
"min_block_size": min_block_size,
},
"allow_shape_tensors": allow_shape_tensors,
}

compiled_cpp_mod = _C.compile_graph(module._c, _parse_compile_spec(spec))
Expand All @@ -156,6 +159,7 @@ def convert_method_to_trt_engine(
dla_global_dram_size=536870912,
truncate_long_and_double=False,
calibrator=None,
allow_shape_tensors=False,
) -> bytearray:
"""Convert a TorchScript module method to a serialized TensorRT engine

Expand Down Expand Up @@ -214,6 +218,7 @@ def convert_method_to_trt_engine(
dla_global_dram_size (int): Host RAM used by DLA to store weights and metadata for execution
truncate_long_and_double (bool): Truncate weights provided in int64 or double (float64) to int32 and float32
calibrator (Union(torch_tensorrt._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration
allow_shape_tensors: (Experimental) Allow aten::size to output shape tensors using IShapeLayer in TensorRT

Returns:
bytearray: Serialized TensorRT engine, can either be saved to a file or deserialized via TensorRT APIs
Expand All @@ -236,6 +241,7 @@ def convert_method_to_trt_engine(
"workspace_size": workspace_size, # Maximum size of workspace given to TensorRT
"calibrator": calibrator,
"truncate_long_and_double": truncate_long_and_double,
"allow_shape_tensors": allow_shape_tensors,
}

engine_str = _C.convert_graph_to_trt_engine(
Expand Down
12 changes: 8 additions & 4 deletions tests/cpp/test_dynamic_size.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ TEST(Converters, ATenResizeDynamicShapeCorrectly) {

auto trt_in = at::clone(in);
params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto trt_results = torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {in}, true);
auto trt_results =
torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {in}, true, /*allow_shape_tensors=*/true);

auto trt = trt_results[0].reshape(jit_results[0].sizes());

Expand All @@ -53,7 +54,8 @@ TEST(Converters, ATenResizeDynamicInputCorrectly) {

auto trt_in = at::clone(in);
params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto trt_results = torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {in}, true);
auto trt_results =
torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {in}, true, /*allow_shape_tensors=*/true);

auto trt = trt_results[0].reshape(jit_results[0].sizes());

Expand Down Expand Up @@ -83,7 +85,8 @@ TEST(Converters, ATenResizeGetItemDynShapeCorrectly) {

auto trt_in = at::clone(in);
params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto trt_results = torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {in}, true);
auto trt_results =
torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {in}, true, /*allow_shape_tensors=*/true);

auto trt = trt_results[0].reshape(jit_results[0].sizes());

Expand Down Expand Up @@ -115,7 +118,8 @@ TEST(Converters, ATenResizeGetItemDynShapeMulCorrectly) {

auto trt_in = at::clone(in);
params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto trt_results = torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {in}, true);
auto trt_results =
torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {in}, true, /*allow_shape_tensors=*/true);

auto trt = trt_results[0].reshape(jit_results[0].sizes());

Expand Down
4 changes: 3 additions & 1 deletion tests/util/run_graph_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,14 @@ std::vector<at::Tensor> RunGraphEngineDynamic(
std::shared_ptr<torch::jit::Graph>& g,
core::ir::StaticParams& named_params,
std::vector<at::Tensor> inputs,
bool dynamic_batch) {
bool dynamic_batch = false,
bool allow_shape_tensors = false) {
LOG_DEBUG("Running TRT version");
auto var_ins = get_var_inputs(g->inputs(), named_params);
auto in = core::ir::pair_input_vals_with_specs(var_ins, toInputsDynamic(inputs, dynamic_batch));
auto info = core::conversion::ConversionInfo();
info.inputs = std::move(in);
info.engine_settings.allow_shape_tensors = allow_shape_tensors;
std::string eng = core::conversion::ConvertBlockToEngine(g->block(), info, named_params);
return RunEngine(eng, inputs);
}
Expand Down
3 changes: 2 additions & 1 deletion tests/util/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ std::vector<at::Tensor> RunGraphEngineDynamic(
std::shared_ptr<torch::jit::Graph>& g,
core::ir::StaticParams& named_params,
std::vector<at::Tensor> inputs,
bool dynamic_batch = false);
bool dynamic_batch = false,
bool allow_shape_tensors = false);

// Run the forward method of a module and return results
torch::jit::IValue RunModuleForward(torch::jit::Module& mod, std::vector<torch::jit::IValue> inputs);
Expand Down