Skip to content

Commit

Permalink
[TensorRT EP] Add new provider option to exclude ops from running on …
Browse files Browse the repository at this point in the history
…TRT (#23705)

This PR removes the implicit filtering-out DDS ops from running on TRT.
In other words, by default, DDS nodes will be run by TRT if it supports.

Moreover, it adds new provider option `trt_op_types_to_exclude`: 
- User can provide op type list to be excluded from running on TRT
- e.g. `trt_op_types_to_exclude="NonMaxSuppression,NonZero,RoiAlignl"`

(This PR basically adds back
[feature](#22681
previously being held to merge.)


[Note] 
There may be potential performance issues in TRT 10 when running models
that contain DDS operations such as NonMaxSuppression, NonZero, and
RoiAlign (e.g., Faster-RCNN).
If user encounters significant performance degradation, we suggest
specifying those DDS ops to be excluded from running by TRT, i.e.
trt_op_types_to_exclude=\"NonMaxSuppression,NonZero,RoiAlign\". Those
DDS nodes will be run by CUDA EP or CPU.
  • Loading branch information
chilo-ms authored Feb 21, 2025
1 parent 1b0a2ba commit 23f787e
Show file tree
Hide file tree
Showing 12 changed files with 139 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -88,4 +88,5 @@ struct OrtTensorRTProviderOptionsV2 {

const char* trt_engine_cache_prefix{nullptr}; // specify engine cache prefix
int trt_engine_hw_compatible{0}; // Enable hardware compatibility. Default 0 = false, nonzero = true
const char* trt_op_types_to_exclude{}; // Exclude specific ops from running on TRT.
};
32 changes: 20 additions & 12 deletions onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1379,6 +1379,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
profile_opt_shapes = info.profile_opt_shapes;
cuda_graph_enable_ = info.cuda_graph_enable;
engine_hw_compatible_ = info.engine_hw_compatible;
op_types_to_exclude_ = info.op_types_to_exclude;
} else {
try {
const std::string max_partition_iterations_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxPartitionIterations);
Expand Down Expand Up @@ -1565,6 +1566,11 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
cuda_graph_enable_ = (std::stoi(cuda_graph_enable_env) == 0 ? false : true);
}

const std::string op_types_to_exclude_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kOpTypesToExclude);
if (!op_types_to_exclude_env.empty()) {
op_types_to_exclude_ = op_types_to_exclude_env;
}

} catch (const std::invalid_argument& ex) {
LOGS_DEFAULT(WARNING) << "[TensorRT EP] Invalid Argument (from environment variables): " << ex.what();
} catch (const std::out_of_range& ex) {
Expand Down Expand Up @@ -1768,7 +1774,8 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
<< ", trt_ep_context_embed_mode: " << ep_context_embed_mode_
<< ", trt_cache_prefix: " << cache_prefix_
<< ", trt_engine_hw_compatible: " << engine_hw_compatible_
<< ", trt_onnx_model_bytestream_size_: " << onnx_model_bytestream_size_;
<< ", trt_onnx_model_bytestream_size_: " << onnx_model_bytestream_size_
<< ", trt_op_types_to_exclude: " << op_types_to_exclude_;
}

TensorrtExecutionProvider::~TensorrtExecutionProvider() {
Expand Down Expand Up @@ -2482,18 +2489,19 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph,
std::vector<size_t> nodes_vector(number_of_ort_nodes);
std::iota(std::begin(nodes_vector), std::end(nodes_vector), 0);

std::set<std::string> exclude_ops_set;
auto get_exclude_ops_set = [&](std::string node_list_to_exclude) -> std::set<std::string> {
std::set<std::string> set;
if (!node_list_to_exclude.empty()) {
std::stringstream node_list(node_list_to_exclude);
std::string node;
while (std::getline(node_list, node, ',')) {
set.insert(node);
}
}
return set;
};

/*
* There is a known performance issue with the DDS ops (NonMaxSuppression, NonZero and RoiAlign) in TRT 10.
* TRT EP automatically excludes DDS ops from running on TRT.
*/
if (trt_version_ >= 100000 && trt_version_ < 110000) {
exclude_ops_set.insert("NonMaxSuppression");
exclude_ops_set.insert("NonZero");
exclude_ops_set.insert("RoiAlign");
LOGS_DEFAULT(VERBOSE) << "There is a known performance issue with the DDS ops (NonMaxSuppression, NonZero and RoiAlign) in TRT 10. TRT EP automatically excludes DDS ops from running on TRT, if applicable";
}
auto exclude_ops_set = get_exclude_ops_set(op_types_to_exclude_);

SubGraphCollection_t parser_nodes_vector, supported_nodes_vector;
const std::vector<NodeIndex>& node_index = graph.GetNodesInTopologicalOrder(1 /*priority-based topological sort*/);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ static const std::string kDumpEpContextModel = "ORT_DUMP_EP_CONTEXT_MODEL";
static const std::string kEpContextEmbedMode = "ORT_EP_CONTEXT_EMBED_MODE";
static const std::string kEpContextComputeCapabilityEnable = "ORT_EP_CONTEXT_COMPUTE_CAPABILITY_ENABLE";
static const std::string kEngineCachePrefix = "ORT_TENSORRT_CACHE_PREFIX";
static const std::string kOpTypesToExclude = "ORT_TENSORRT_OP_TYPES_TO_EXCLUDE";
// Old env variable for backward compatibility
static const std::string kEngineCachePath = "ORT_TENSORRT_ENGINE_CACHE_PATH";
} // namespace tensorrt_env_vars
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ constexpr const char* kDumpEpContextModel = "trt_dump_ep_context_model";
constexpr const char* kEngineHwCompatible = "trt_engine_hw_compatible";
constexpr const char* kONNXBytestream = "trt_onnx_bytestream";
constexpr const char* kONNXBytestreamSize = "trt_onnx_bytestream_size";
constexpr const char* kOpTypesToExclude = "trt_op_types_to_exclude";

} // namespace provider_option_names
} // namespace tensorrt
Expand Down Expand Up @@ -134,6 +135,7 @@ TensorrtExecutionProviderInfo TensorrtExecutionProviderInfo::FromProviderOptions
return Status::OK();
})
.AddAssignmentToReference(tensorrt::provider_option_names::kONNXBytestreamSize, info.onnx_bytestream_size)
.AddAssignmentToReference(tensorrt::provider_option_names::kOpTypesToExclude, info.op_types_to_exclude)
.Parse(options)); // add new provider option here.

info.user_compute_stream = user_compute_stream;
Expand Down Expand Up @@ -188,6 +190,7 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE
{tensorrt::provider_option_names::kEngineHwCompatible, MakeStringWithClassicLocale(info.engine_hw_compatible)},
{tensorrt::provider_option_names::kONNXBytestream, MakeStringWithClassicLocale(info.onnx_bytestream)},
{tensorrt::provider_option_names::kONNXBytestreamSize, MakeStringWithClassicLocale(info.onnx_bytestream_size)},
{tensorrt::provider_option_names::kOpTypesToExclude, MakeStringWithClassicLocale(info.op_types_to_exclude)},
};
return options;
}
Expand All @@ -206,6 +209,7 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensor
const std::string kProfilesOptShapes_ = empty_if_null(info.trt_profile_opt_shapes);
const std::string kEpContextFilePath_ = empty_if_null(info.trt_ep_context_file_path);
const std::string kOnnxModelFolderPath_ = empty_if_null(info.trt_onnx_model_folder_path);
const std::string kOpTypesToExclude_ = empty_if_null(info.trt_op_types_to_exclude);

const ProviderOptions options{
{tensorrt::provider_option_names::kDeviceId, MakeStringWithClassicLocale(info.device_id)},
Expand Down Expand Up @@ -251,6 +255,7 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensor
{tensorrt::provider_option_names::kEngineHwCompatible, MakeStringWithClassicLocale(info.trt_engine_hw_compatible)},
{tensorrt::provider_option_names::kONNXBytestream, MakeStringWithClassicLocale(reinterpret_cast<size_t>(info.trt_onnx_bytestream))},
{tensorrt::provider_option_names::kONNXBytestreamSize, MakeStringWithClassicLocale(info.trt_onnx_bytestream_size)},
{tensorrt::provider_option_names::kOpTypesToExclude, kOpTypesToExclude_},
};
return options;
}
Expand Down Expand Up @@ -355,5 +360,6 @@ void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options
trt_provider_options_v2.trt_engine_hw_compatible = internal_options.engine_hw_compatible;
trt_provider_options_v2.trt_onnx_bytestream = internal_options.onnx_bytestream;
trt_provider_options_v2.trt_onnx_bytestream_size = internal_options.onnx_bytestream_size;
trt_provider_options_v2.trt_op_types_to_exclude = copy_string_if_needed(internal_options.op_types_to_exclude);
}
} // namespace onnxruntime
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ struct TensorrtExecutionProviderInfo {
int ep_context_embed_mode{0};
std::string engine_cache_prefix{""};
bool engine_hw_compatible{false};
std::string op_types_to_exclude{""};

static TensorrtExecutionProviderInfo FromProviderOptions(const ProviderOptions& options);
static ProviderOptions ToProviderOptions(const TensorrtExecutionProviderInfo& info);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ struct Tensorrt_Provider : Provider {
info.engine_hw_compatible = options.trt_engine_hw_compatible != 0;
info.onnx_bytestream = options.trt_onnx_bytestream;
info.onnx_bytestream_size = options.trt_onnx_bytestream_size;
info.op_types_to_exclude = options.trt_op_types_to_exclude == nullptr ? "" : options.trt_op_types_to_exclude;

return std::make_shared<TensorrtProviderFactory>(info);
}
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/session/provider_bridge_ort.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2583,6 +2583,7 @@ ORT_API(void, OrtApis::ReleaseTensorRTProviderOptions, _Frees_ptr_opt_ OrtTensor
delete[] ptr->trt_profile_opt_shapes;
delete[] ptr->trt_ep_context_file_path;
delete[] ptr->trt_onnx_model_folder_path;
delete[] ptr->trt_op_types_to_exclude;
}

std::unique_ptr<OrtTensorRTProviderOptionsV2> p(ptr);
Expand Down
5 changes: 4 additions & 1 deletion onnxruntime/python/onnxruntime_pybind_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
// and TRT EP instance, so it won't be released.)
std::string calibration_table, cache_path, cache_prefix, timing_cache_path, lib_path, trt_tactic_sources,
trt_extra_plugin_lib_paths, min_profile, max_profile, opt_profile, ep_context_file_path,
onnx_model_folder_path;
onnx_model_folder_path, trt_op_types_to_exclude;
auto it = provider_options_map.find(type);
if (it != provider_options_map.end()) {
OrtTensorRTProviderOptionsV2 params;
Expand Down Expand Up @@ -824,6 +824,9 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
} else {
ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_engine_hw_compatible' should be 'True' or 'False'. Default value is 'False'.\n");
}
} else if (option.first == "trt_op_types_to_exclude") {
trt_op_types_to_exclude = option.second;
params.trt_op_types_to_exclude = trt_op_types_to_exclude.c_str();
} else {
ORT_THROW("Invalid TensorRT EP option: ", option.first);
}
Expand Down
10 changes: 10 additions & 0 deletions onnxruntime/test/providers/base_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,11 @@ void BaseTester::ExecuteModel(Model& model, SessionType& session,
ASSERT_EQ(expect_result, ExpectResult::kExpectFailure) << "Initialize failed but expected success: "
<< status.ErrorMessage();

// No need to check expected failure string if empty string is given.
if (expected_failure_string.empty()) {
return;
}

// Disable expected_failure_string checks for OpenVINO EP
if (provider_type != kOpenVINOExecutionProvider) {
EXPECT_THAT(status.ErrorMessage(), testing::HasSubstr(expected_failure_string));
Expand All @@ -337,6 +342,11 @@ void BaseTester::ExecuteModel(Model& model, SessionType& session,
ASSERT_EQ(expect_result, ExpectResult::kExpectFailure) << "Run failed but expected success: "
<< status.ErrorMessage();

// No need to check expected failure string if empty string is given.
if (expected_failure_string.empty()) {
return;
}

// Disable expected_failure_string checks for MKL-DNN and OpenVINO EP's
if (provider_type != kDnnlExecutionProvider &&
provider_type != kOpenVINOExecutionProvider) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,21 @@ TEST(NonMaxSuppressionOpTest, TwoClasses) {
test.AddInput<int64_t>("max_output_boxes_per_class", {}, {6L});
test.AddInput<float>("iou_threshold", {}, {0.5f});
test.AddInput<float>("score_threshold", {}, {0.0f});
// The selected_indices in ORT is sorted by class, whereas in TRT the selected_indices is sorted by score,
// So it needs to sort the output to pass the output check when there is more than one class using TRT EP.
#ifdef USE_TENSORRT
bool sort_output = true;
#else
bool sort_output = false; // default
#endif
test.AddOutput<int64_t>("selected_indices", {6, 3},
{0L, 0L, 3L,
0L, 0L, 0L,
0L, 0L, 5L,
0L, 1L, 3L,
0L, 1L, 0L,
0L, 1L, 5L});
0L, 1L, 5L},
sort_output);
test.Run();
}

Expand Down Expand Up @@ -125,6 +133,13 @@ TEST(NonMaxSuppressionOpTest, TwoBatches_TwoClasses) {
0.1f, 0.2f, 0.6f, 0.3f, 0.9f});
test.AddInput<int64_t>("max_output_boxes_per_class", {}, {2L});
test.AddInput<float>("iou_threshold", {}, {0.8f});
// The selected_indices in ORT is sorted by class, whereas in TRT the selected_indices is sorted by score,
// So it needs to sort the output to pass the output check when there is more than one class using TRT EP.
#ifdef USE_TENSORRT
bool sort_output = true;
#else
bool sort_output = false; // default
#endif
test.AddOutput<int64_t>("selected_indices", {8, 3},
{0L, 0L, 4L,
0L, 0L, 2L,
Expand All @@ -134,7 +149,8 @@ TEST(NonMaxSuppressionOpTest, TwoBatches_TwoClasses) {
1L, 0L, 4L,
1L, 0L, 1L,
1L, 1L, 4L,
1L, 1L, 1L});
1L, 1L, 1L},
sort_output);
test.Run();
}

Expand Down Expand Up @@ -302,7 +318,11 @@ TEST(NonMaxSuppressionOpTest, InconsistentBoxAndScoreShapes) {
test.AddInput<float>("iou_threshold", {}, {0.5f});
test.AddInput<float>("score_threshold", {}, {0.0f});
test.AddOutput<int64_t>("selected_indices", {0, 3}, {});
#ifdef USE_TENSORRT
test.Run(OpTester::ExpectResult::kExpectFailure, ""); // TensorRT EP will output different failure message, providing empty string simply skips checking the error message.
#else
test.Run(OpTester::ExpectResult::kExpectFailure, "boxes and scores should have same spatial_dimension.");
#endif
}

TEST(NonMaxSuppressionOpTest, InvalidIOUThreshold) {
Expand All @@ -313,7 +333,8 @@ TEST(NonMaxSuppressionOpTest, InvalidIOUThreshold) {
test.AddInput<float>("iou_threshold", {}, {1.2f});
test.AddInput<float>("score_threshold", {}, {0.0f});
test.AddOutput<int64_t>("selected_indices", {0, 3}, {});
test.Run(OpTester::ExpectResult::kExpectFailure, "iou_threshold must be in range [0, 1]");
// TRT is missing a runtime check validating IouThreshold value. Once the bug is fixed, we will add back this unit test for TRT.
test.Run(OpTester::ExpectResult::kExpectFailure, "iou_threshold must be in range [0, 1]", {kTensorrtExecutionProvider});
}

TEST(NonMaxSuppressionOpTest, EmptyInput) {
Expand Down
10 changes: 10 additions & 0 deletions onnxruntime/test/providers/cpu/tensor/nonzero_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,23 @@ TEST(NonZeroOpTest, Scalar) {
{
OpTester test{kOpName, kOpVersion};
test.AddInput<int32_t>("X", {}, {0});
#ifdef USE_TENSORRT
// TensorRT follows ONNX spec where NonZero produces output shape (0, N) instead of (1, N) for scalar input
test.AddOutput<int64_t>("Y", {0, 0}, {});
#else
test.AddOutput<int64_t>("Y", {1, 0}, {});
#endif
test.Run(so);
}
{
OpTester test{kOpName, kOpVersion};
test.AddInput<int32_t>("X", {}, {1});
#ifdef USE_TENSORRT
// TensorRT follows ONNX spec where NonZero produces output shape (0, N) instead of (1, N) for scalar input
test.AddOutput<int64_t>("Y", {0, 1}, {});
#else
test.AddOutput<int64_t>("Y", {1, 1}, {0});
#endif
test.Run(so);
}
}
Expand Down
60 changes: 60 additions & 0 deletions onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,66 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
RunSession(session_object9, run_options, feeds, output_names, expected_dims_mul_m, expected_values_mul_m);
}

TEST(TensorrtExecutionProviderTest, ExcludeOpsTest) {
/* The mnist.onnx looks like this:
* Conv
* |
* Add
* .
* .
* |
* MaxPool
* |
* .
* .
* MaxPool
* |
* Reshape
* |
* MatMul
* .
* .
*
*/
PathString model_name = ORT_TSTR("testdata/mnist.onnx");
SessionOptions so;
so.session_logid = "TensorrtExecutionProviderExcludeOpsTest";
RunOptions run_options;
run_options.run_tag = so.session_logid;
InferenceSession session_object{so, GetEnvironment()};
auto cuda_provider = DefaultCudaExecutionProvider();
auto cpu_allocator = cuda_provider->CreatePreferredAllocators()[1];
std::vector<int64_t> dims_op_x = {1, 1, 28, 28};
std::vector<float> values_op_x(784, 1.0f); // 784=1*1*28*28
OrtValue ml_value_x;
CreateMLValue<float>(cpu_allocator, dims_op_x, values_op_x, &ml_value_x);
NameMLValMap feeds;
feeds.insert(std::make_pair("Input3", ml_value_x));

// prepare outputs
std::vector<std::string> output_names;
output_names.push_back("Plus214_Output_0");
std::vector<OrtValue> fetches;

RemoveCachesByType("./", ".engine");
OrtTensorRTProviderOptionsV2 params;
params.trt_engine_cache_enable = 1;
params.trt_op_types_to_exclude = "MaxPool";
std::unique_ptr<IExecutionProvider> execution_provider = TensorrtExecutionProviderWithOptions(&params);
EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
auto status = session_object.Load(model_name);
ASSERT_TRUE(status.IsOK());
status = session_object.Initialize();
ASSERT_TRUE(status.IsOK());
status = session_object.Run(run_options, feeds, output_names, &fetches);
ASSERT_TRUE(status.IsOK());

std::vector<fs::path> engine_files;
engine_files = GetCachesByType("./", ".engine");
// The whole graph should be partitioned into 3 TRT subgraphs and 2 cpu nodes
ASSERT_EQ(engine_files.size(), 3);
}

TEST(TensorrtExecutionProviderTest, TRTPluginsCustomOpTest) {
PathString model_name = ORT_TSTR("testdata/trt_plugin_custom_op_test.onnx");
SessionOptions so;
Expand Down

0 comments on commit 23f787e

Please sign in to comment.