diff --git a/common/cuda_utils/CMakeLists.txt b/common/cuda_utils/CMakeLists.txt
new file mode 100644
index 0000000000000..c0115bc9f3b89
--- /dev/null
+++ b/common/cuda_utils/CMakeLists.txt
@@ -0,0 +1,22 @@
+cmake_minimum_required(VERSION 3.5)
+project(cuda_utils)
+
+find_package(autoware_cmake REQUIRED)
+autoware_package()
+
+find_package(CUDA REQUIRED)
+
+install(
+  DIRECTORY include/${PROJECT_NAME}/
+  DESTINATION include/${PROJECT_NAME}
+)
+
+if(BUILD_TESTING)
+  find_package(ament_lint_auto REQUIRED)
+  ament_lint_auto_find_test_dependencies()
+endif()
+
+ament_export_include_directories(include)
+ament_export_dependencies(CUDA)
+
+ament_auto_package()
diff --git a/common/cuda_utils/include/cuda_utils/cuda_check_error.hpp b/common/cuda_utils/include/cuda_utils/cuda_check_error.hpp
new file mode 100644
index 0000000000000..8c2d96eee5f13
--- /dev/null
+++ b/common/cuda_utils/include/cuda_utils/cuda_check_error.hpp
@@ -0,0 +1,44 @@
+// Copyright 2022 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This code is licensed under CC0 1.0 Universal (Public Domain).
+// You can use this without any limitation.
+// https://creativecommons.org/publicdomain/zero/1.0/deed.en
+// borrowed from https://proc-cpuinfo.fixstars.com/2019/02/cuda_smart_pointer/
+
+#ifndef CUDA_UTILS__CUDA_CHECK_ERROR_HPP_
+#define CUDA_UTILS__CUDA_CHECK_ERROR_HPP_
+
+#include <cuda_runtime_api.h>
+
+#include <sstream>
+#include <stdexcept>
+
+namespace cuda_utils
+{
+template <typename F, typename N>
+void cuda_check_error(const ::cudaError_t e, F && f, N && n)
+{
+  if (e != ::cudaSuccess) {
+    std::stringstream s;
+    s << ::cudaGetErrorName(e) << " (" << e << ")@" << f << "#L" << n << ": "
+      << ::cudaGetErrorString(e);
+    throw std::runtime_error{s.str()};
+  }
+}
+}  // namespace cuda_utils
+
+#define CHECK_CUDA_ERROR(e) (cuda_utils::cuda_check_error(e, __FILE__, __LINE__))
+
+#endif  // CUDA_UTILS__CUDA_CHECK_ERROR_HPP_
diff --git a/common/cuda_utils/include/cuda_utils/cuda_unique_ptr.hpp b/common/cuda_utils/include/cuda_utils/cuda_unique_ptr.hpp
new file mode 100644
index 0000000000000..6e01a00d6681c
--- /dev/null
+++ b/common/cuda_utils/include/cuda_utils/cuda_unique_ptr.hpp
@@ -0,0 +1,81 @@
+// Copyright 2022 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This code is licensed under CC0 1.0 Universal (Public Domain).
+// You can use this without any limitation.
+// https://creativecommons.org/publicdomain/zero/1.0/deed.en
+// borrowed from https://proc-cpuinfo.fixstars.com/2019/02/cuda_smart_pointer/
+
+#ifndef CUDA_UTILS__CUDA_UNIQUE_PTR_HPP_
+#define CUDA_UTILS__CUDA_UNIQUE_PTR_HPP_
+
+#include "cuda_utils/cuda_check_error.hpp"
+
+#include <memory>
+#include <type_traits>
+
+namespace cuda_utils
+{
+struct CudaDeleter
+{
+  void operator()(void * p) const { CHECK_CUDA_ERROR(::cudaFree(p)); }
+};
+template <typename T>
+using CudaUniquePtr = std::unique_ptr<T, CudaDeleter>;
+
+template <typename T>
+typename std::enable_if_t<std::is_array<T>::value, CudaUniquePtr<T>> make_unique(
+  const std::size_t n)
+{
+  using U = typename std::remove_extent_t<T>;
+  U * p;
+  CHECK_CUDA_ERROR(::cudaMalloc(reinterpret_cast<void **>(&p), sizeof(U) * n));
+  return CudaUniquePtr<T>{p};
+}
+
+template <typename T>
+CudaUniquePtr<T> make_unique()
+{
+  T * p;
+  CHECK_CUDA_ERROR(::cudaMalloc(reinterpret_cast<void **>(&p), sizeof(T)));
+  return CudaUniquePtr<T>{p};
+}
+
+struct CudaDeleterHost
+{
+  void operator()(void * p) const { CHECK_CUDA_ERROR(::cudaFreeHost(p)); }
+};
+template <typename T>
+using CudaUniquePtrHost = std::unique_ptr<T, CudaDeleterHost>;
+
+template <typename T>
+typename std::enable_if_t<std::is_array<T>::value, CudaUniquePtrHost<T>> make_unique_host(
+  const std::size_t n, unsigned int flag)
+{
+  using U = typename std::remove_extent_t<T>;
+  U * p;
+  CHECK_CUDA_ERROR(::cudaHostAlloc(reinterpret_cast<void **>(&p), sizeof(U) * n, flag));
+  return CudaUniquePtrHost<T>{p};
+}
+
+template <typename T>
+CudaUniquePtrHost<T> make_unique_host(unsigned int flag = cudaHostAllocDefault)
+{
+  T * p;
+  CHECK_CUDA_ERROR(::cudaHostAlloc(reinterpret_cast<void **>(&p), sizeof(T), flag));
+  return CudaUniquePtrHost<T>{p};
+}
+}  // namespace cuda_utils
+
+#endif  // CUDA_UTILS__CUDA_UNIQUE_PTR_HPP_
diff --git a/common/cuda_utils/include/cuda_utils/stream_unique_ptr.hpp b/common/cuda_utils/include/cuda_utils/stream_unique_ptr.hpp
new file mode 100644
index 0000000000000..0f3a1bc6ccebf
--- /dev/null
+++ b/common/cuda_utils/include/cuda_utils/stream_unique_ptr.hpp
@@ -0,0 +1,52 @@
+// Copyright 2022 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This code is licensed under CC0 1.0 Universal (Public Domain).
+// You can use this without any limitation.
+// https://creativecommons.org/publicdomain/zero/1.0/deed.en
+// borrowed from https://proc-cpuinfo.fixstars.com/2019/02/cuda_smart_pointer/
+
+#ifndef CUDA_UTILS__STREAM_UNIQUE_PTR_HPP_
+#define CUDA_UTILS__STREAM_UNIQUE_PTR_HPP_
+
+#include <cuda_runtime_api.h>
+
+#include <memory>
+
+namespace cuda_utils
+{
+struct StreamDeleter
+{
+  void operator()(cudaStream_t * stream)
+  {
+    if (stream) {
+      cudaStreamDestroy(*stream);
+      delete stream;
+    }
+  }
+};
+
+using StreamUniquePtr = std::unique_ptr<cudaStream_t, StreamDeleter>;
+
+inline StreamUniquePtr makeCudaStream(const uint32_t flags = cudaStreamDefault)
+{
+  StreamUniquePtr stream(new cudaStream_t, StreamDeleter());
+  if (cudaStreamCreateWithFlags(stream.get(), flags) != cudaSuccess) {
+    stream.reset(nullptr);
+  }
+  return stream;
+}
+}  // namespace cuda_utils
+
+#endif  // CUDA_UTILS__STREAM_UNIQUE_PTR_HPP_
diff --git a/common/cuda_utils/package.xml b/common/cuda_utils/package.xml
new file mode 100644
index 0000000000000..53127da0c1ff6
--- /dev/null
+++ b/common/cuda_utils/package.xml
@@ -0,0 +1,23 @@
+<?xml version="1.0"?>
+<package format="3">
+  <name>cuda_utils</name>
+  <version>0.0.1</version>
+  <description>cuda utility library</description>
+
+  <author email="daisuke.nishimatsu@tier4.jp">Daisuke Nishimatsu</author>
+  <maintainer email="daisuke.nishimatsu@tier4.jp">Daisuke Nishimatsu</maintainer>
+  <maintainer email="manato.hirabayashi@tier4.jp">Manato Hirabayashi</maintainer>
+
+  <license>Apache License 2.0</license>
+
+  <buildtool_depend>ament_cmake_auto</buildtool_depend>
+
+  <build_depend>autoware_cmake</build_depend>
+
+  <test_depend>ament_lint_auto</test_depend>
+  <test_depend>autoware_lint_common</test_depend>
+
+  <export>
+    <build_type>ament_cmake</build_type>
+  </export>
+</package>
diff --git a/common/tensorrt_common/CMakeLists.txt b/common/tensorrt_common/CMakeLists.txt
new file mode 100644
index 0000000000000..49a2e40dd8234
--- /dev/null
+++ b/common/tensorrt_common/CMakeLists.txt
@@ -0,0 +1,54 @@
+cmake_minimum_required(VERSION 3.5)
+project(tensorrt_common)
+
+find_package(autoware_cmake REQUIRED)
+autoware_package()
+
+find_package(CUDA REQUIRED)
+find_package(CUDNN REQUIRED)
+find_package(TENSORRT REQUIRED)
+
+if(NOT (${CUDA_FOUND} AND ${CUDNN_FOUND} AND ${TENSORRT_FOUND}))
+  message(WARNING "cuda, cudnn, tensorrt libraries are not found")
+  return()
+endif()
+
+cuda_add_library(${PROJECT_NAME} SHARED
+  src/tensorrt_common.cpp
+)
+
+ament_target_dependencies(${PROJECT_NAME}
+  rclcpp
+)
+
+target_include_directories(${PROJECT_NAME}
+  PUBLIC
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+    $<INSTALL_INTERFACE:include>
+    ${TENSORRT_INCLUDE_DIRS}
+)
+
+target_link_libraries(${PROJECT_NAME}
+  ${TENSORRT_LIBRARIES}
+  stdc++fs
+)
+
+target_compile_definitions(${PROJECT_NAME} PRIVATE
+  TENSORRT_VERSION_MAJOR=${TENSORRT_VERSION_MAJOR}
+)
+
+list(APPEND ${PROJECT_NAME}_LIBRARIES "${PROJECT_NAME}")
+
+if(BUILD_TESTING)
+  find_package(ament_lint_auto REQUIRED)
+  ament_lint_auto_find_test_dependencies()
+endif()
+
+ament_export_include_directories(include)
+ament_export_dependencies(CUDA)
+ament_export_dependencies(cudnn_cmake_module)
+ament_export_dependencies(CUDNN)
+ament_export_dependencies(tensorrt_cmake_module)
+ament_export_dependencies(TENSORRT)
+
+ament_auto_package()
diff --git a/common/tensorrt_common/include/tensorrt_common/tensorrt_common.hpp b/common/tensorrt_common/include/tensorrt_common/tensorrt_common.hpp
new file mode 100644
index 0000000000000..bb271f0689002
--- /dev/null
+++ b/common/tensorrt_common/include/tensorrt_common/tensorrt_common.hpp
@@ -0,0 +1,134 @@
+// Copyright 2022 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TENSORRT_COMMON__TENSORRT_COMMON_HPP_
+#define TENSORRT_COMMON__TENSORRT_COMMON_HPP_
+
+#include <rclcpp/rclcpp.hpp>
+
+#include <NvInfer.h>
+#include <NvOnnxParser.h>
+
+#if (defined(_MSC_VER) or (defined(__GNUC__) and (7 <= __GNUC_MAJOR__)))
+#include <filesystem>
+namespace fs = ::std::filesystem;
+#else
+#include <experimental/filesystem>
+namespace fs = ::std::experimental::filesystem;
+#endif
+
+#include <memory>
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace tensorrt_common
+{
+class Logger : public nvinfer1::ILogger  // NOLINT
+{
+public:
+  Logger() : Logger(Severity::kINFO) {}
+
+  explicit Logger(Severity severity) : reportable_severity_(severity) {}
+
+  void log(Severity severity, const char * msg) noexcept override
+  {
+    // suppress messages with severity enum value greater than the reportable
+    if (severity > reportable_severity_) {
+      return;
+    }
+
+    switch (severity) {
+      case Severity::kINTERNAL_ERROR:
+        RCLCPP_ERROR_STREAM(logger_, msg);
+        break;
+      case Severity::kERROR:
+        RCLCPP_ERROR_STREAM(logger_, msg);
+        break;
+      case Severity::kWARNING:
+        RCLCPP_WARN_STREAM(logger_, msg);
+        break;
+      case Severity::kINFO:
+        RCLCPP_INFO_STREAM(logger_, msg);
+        break;
+      default:
+        RCLCPP_INFO_STREAM(logger_, msg);
+        break;
+    }
+  }
+
+  Severity reportable_severity_{Severity::kWARNING};
+  rclcpp::Logger logger_{rclcpp::get_logger("tensorrt_common")};
+};
+
+template <typename T>
+struct InferDeleter  // NOLINT
+{
+  void operator()(T * obj) const
+  {
+    if (obj) {
+#if TENSORRT_VERSION_MAJOR >= 8
+      delete obj;
+#else
+      obj->destroy();
+#endif
+    }
+  }
+};
+
+template <typename T>
+using TrtUniquePtr = std::unique_ptr<T, InferDeleter<T>>;
+
+using BatchConfig = std::array<int32_t, 3>;
+
+class TrtCommon  // NOLINT
+{
+public:
+  TrtCommon(
+    const std::string & model_path, const std::string & precision,
+    std::unique_ptr<nvinfer1::IInt8EntropyCalibrator2> calibrator = nullptr,
+    const BatchConfig & batch_config = {1, 1, 1}, const size_t max_workspace_size = (16 << 20),
+    const std::vector<std::string> & plugin_paths = {});
+
+  bool loadEngine(const std::string & engine_file_path);
+  bool buildEngineFromOnnx(
+    const std::string & onnx_file_path, const std::string & output_engine_file_path);
+  void setup();
+
+  bool isInitialized();
+
+  nvinfer1::Dims getBindingDimensions(const int32_t index) const;
+  int32_t getNbBindings();
+  bool setBindingDimensions(const int32_t index, const nvinfer1::Dims & dimensions) const;
+  bool enqueueV2(void ** bindings, cudaStream_t stream, cudaEvent_t * input_consumed);
+
+private:
+  Logger logger_;
+  fs::path model_file_path_;
+  TrtUniquePtr<nvinfer1::IRuntime> runtime_;
+  TrtUniquePtr<nvinfer1::ICudaEngine> engine_;
+  TrtUniquePtr<nvinfer1::IExecutionContext> context_;
+  std::unique_ptr<nvinfer1::IInt8EntropyCalibrator2> calibrator_;
+
+  nvinfer1::Dims input_dims_;
+  nvinfer1::Dims output_dims_;
+  std::string precision_;
+  BatchConfig batch_config_;
+  size_t max_workspace_size_;
+  bool is_initialized_{false};
+};
+
+}  // namespace tensorrt_common
+
+#endif  // TENSORRT_COMMON__TENSORRT_COMMON_HPP_
diff --git a/common/tensorrt_common/package.xml b/common/tensorrt_common/package.xml
new file mode 100644
index 0000000000000..6e8a0b042dd70
--- /dev/null
+++ b/common/tensorrt_common/package.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0"?>
+<package format="3">
+  <name>tensorrt_common</name>
+  <version>0.0.1</version>
+  <description>tensorrt utility wrapper</description>
+
+  <author email="taichi.higashide@tier4.jp">Taichi Higashide</author>
+  <author email="daisuke.nishimatsu@tier4.jp">Daisuke Nishimatsu</author>
+  <maintainer email="daisuke.nishimatsu@tier4.jp">Daisuke Nishimatsu</maintainer>
+  <maintainer email="manato.hirabayashi@tier4.jp">Manato Hirabayashi</maintainer>
+
+  <license>Apache License 2.0</license>
+
+  <buildtool_depend>ament_cmake_auto</buildtool_depend>
+  <buildtool_depend>cudnn_cmake_module</buildtool_depend>
+  <buildtool_depend>tensorrt_cmake_module</buildtool_depend>
+
+  <build_depend>autoware_cmake</build_depend>
+
+  <depend>rclcpp</depend>
+
+  <test_depend>ament_lint_auto</test_depend>
+  <test_depend>autoware_lint_common</test_depend>
+
+  <export>
+    <build_type>ament_cmake</build_type>
+  </export>
+</package>
diff --git a/common/tensorrt_common/src/tensorrt_common.cpp b/common/tensorrt_common/src/tensorrt_common.cpp
new file mode 100644
index 0000000000000..50028f51332e6
--- /dev/null
+++ b/common/tensorrt_common/src/tensorrt_common.cpp
@@ -0,0 +1,222 @@
+// Copyright 2020 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <tensorrt_common/tensorrt_common.hpp>
+
+#include <NvInferPlugin.h>
+#include <dlfcn.h>
+
+#include <fstream>
+#include <functional>
+#include <memory>
+#include <string>
+#include <utility>
+
+namespace tensorrt_common
+{
+
+TrtCommon::TrtCommon(
+  const std::string & model_path, const std::string & precision,
+  std::unique_ptr<nvinfer1::IInt8EntropyCalibrator2> calibrator,
+  const tensorrt_common::BatchConfig & batch_config, const size_t max_workspace_size,
+  const std::vector<std::string> & plugin_paths)
+: model_file_path_(model_path),
+  calibrator_(std::move(calibrator)),
+  precision_(precision),
+  batch_config_(batch_config),
+  max_workspace_size_(max_workspace_size)
+{
+  for (const auto & plugin_path : plugin_paths) {
+    int32_t flags{RTLD_LAZY};
+#if ENABLE_ASAN
+    // https://github.com/google/sanitizers/issues/89
+    // asan doesn't handle module unloading correctly and there are no plans on doing
+    // so. In order to get proper stack traces, don't delete the shared library on
+    // close so that asan can resolve the symbols correctly.
+    flags |= RTLD_NODELETE;
+#endif  // ENABLE_ASAN
+    void * handle = dlopen(plugin_path.c_str(), flags);
+    if (!handle) {
+      logger_.log(nvinfer1::ILogger::Severity::kERROR, "Could not load plugin library");
+    }
+  }
+  runtime_ = TrtUniquePtr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(logger_));
+  initLibNvInferPlugins(&logger_, "");
+}
+
+void TrtCommon::setup()
+{
+  if (!fs::exists(model_file_path_)) {
+    is_initialized_ = false;
+    return;
+  }
+  if (model_file_path_.extension() == ".engine") {
+    loadEngine(model_file_path_);
+  } else if (model_file_path_.extension() == ".onnx") {
+    fs::path cache_engine_path{model_file_path_};
+    cache_engine_path.replace_extension("engine");
+    if (fs::exists(cache_engine_path)) {
+      loadEngine(cache_engine_path);
+    } else {
+      logger_.log(nvinfer1::ILogger::Severity::kINFO, "Start build engine");
+      buildEngineFromOnnx(model_file_path_, cache_engine_path);
+      logger_.log(nvinfer1::ILogger::Severity::kINFO, "End build engine");
+    }
+  } else {
+    is_initialized_ = false;
+    return;
+  }
+
+  context_ = TrtUniquePtr<nvinfer1::IExecutionContext>(engine_->createExecutionContext());
+  if (!context_) {
+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create context");
+    is_initialized_ = false;
+    return;
+  }
+
+  is_initialized_ = true;
+}
+
+bool TrtCommon::loadEngine(const std::string & engine_file_path)
+{
+  std::ifstream engine_file(engine_file_path);
+  std::stringstream engine_buffer;
+  engine_buffer << engine_file.rdbuf();
+  std::string engine_str = engine_buffer.str();
+  engine_ = TrtUniquePtr<nvinfer1::ICudaEngine>(runtime_->deserializeCudaEngine(
+    reinterpret_cast<const void *>(engine_str.data()), engine_str.size()));
+  return true;
+}
+
+bool TrtCommon::buildEngineFromOnnx(
+  const std::string & onnx_file_path, const std::string & output_engine_file_path)
+{
+  auto builder = TrtUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(logger_));
+  if (!builder) {
+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create builder");
+    return false;
+  }
+
+  const auto explicitBatch =
+    1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
+
+  auto network =
+    TrtUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(explicitBatch));
+  if (!network) {
+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create network");
+    return false;
+  }
+
+  auto config = TrtUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
+  if (!config) {
+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create builder config");
+    return false;
+  }
+
+  if (precision_ == "fp16" || precision_ == "int8") {
+    config->setFlag(nvinfer1::BuilderFlag::kFP16);
+  }
+#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8400
+  config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, max_workspace_size_);
+#else
+  config->setMaxWorkspaceSize(max_workspace_size_);
+#endif
+
+  auto parser = TrtUniquePtr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, logger_));
+  if (!parser->parseFromFile(
+        onnx_file_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kERROR))) {
+    return false;
+  }
+
+  const auto input = network->getInput(0);
+  const auto input_dims = input->getDimensions();
+  const auto input_channel = input_dims.d[1];
+  const auto input_height = input_dims.d[2];
+  const auto input_width = input_dims.d[3];
+
+  auto profile = builder->createOptimizationProfile();
+  profile->setDimensions(
+    network->getInput(0)->getName(), nvinfer1::OptProfileSelector::kMIN,
+    nvinfer1::Dims4{batch_config_.at(0), input_channel, input_height, input_width});
+  profile->setDimensions(
+    network->getInput(0)->getName(), nvinfer1::OptProfileSelector::kOPT,
+    nvinfer1::Dims4{batch_config_.at(1), input_channel, input_height, input_width});
+  profile->setDimensions(
+    network->getInput(0)->getName(), nvinfer1::OptProfileSelector::kMAX,
+    nvinfer1::Dims4{batch_config_.at(2), input_channel, input_height, input_width});
+  config->addOptimizationProfile(profile);
+
+  if (precision_ == "int8" && calibrator_) {
+    config->setFlag(nvinfer1::BuilderFlag::kINT8);
+    config->setInt8Calibrator(calibrator_.get());
+  }
+
+#if TENSORRT_VERSION_MAJOR >= 8
+  auto plan =
+    TrtUniquePtr<nvinfer1::IHostMemory>(builder->buildSerializedNetwork(*network, *config));
+  if (!plan) {
+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create host memory");
+    return false;
+  }
+  engine_ = TrtUniquePtr<nvinfer1::ICudaEngine>(
+    runtime_->deserializeCudaEngine(plan->data(), plan->size()));
+#else
+  engine_ = TrtUniquePtr<nvinfer1::ICudaEngine>(builder->buildEngineWithConfig(*network, *config));
+#endif
+
+  if (!engine_) {
+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create engine");
+    return false;
+  }
+
+  // save engine
+#if TENSORRT_VERSION_MAJOR < 8
+  auto data = TrtUniquePtr<nvinfer1::IHostMemory>(engine_->serialize());
+#endif
+  std::ofstream file;
+  file.open(output_engine_file_path, std::ios::binary | std::ios::out);
+  if (!file.is_open()) {
+    return false;
+  }
+#if TENSORRT_VERSION_MAJOR < 8
+  file.write(reinterpret_cast<const char *>(data->data()), data->size());
+#else
+  file.write(reinterpret_cast<const char *>(plan->data()), plan->size());
+#endif
+
+  file.close();
+
+  return true;
+}
+
+bool TrtCommon::isInitialized() { return is_initialized_; }
+
+nvinfer1::Dims TrtCommon::getBindingDimensions(const int32_t index) const
+{
+  return context_->getBindingDimensions(index);
+}
+
+int32_t TrtCommon::getNbBindings() { return engine_->getNbBindings(); }
+
+bool TrtCommon::setBindingDimensions(const int32_t index, const nvinfer1::Dims & dimensions) const
+{
+  return context_->setBindingDimensions(index, dimensions);
+}
+
+bool TrtCommon::enqueueV2(void ** bindings, cudaStream_t stream, cudaEvent_t * input_consumed)
+{
+  return context_->enqueueV2(bindings, stream, input_consumed);
+}
+
+}  // namespace tensorrt_common
diff --git a/perception/tensorrt_yolox/CMakeLists.txt b/perception/tensorrt_yolox/CMakeLists.txt
new file mode 100644
index 0000000000000..6b0719a814893
--- /dev/null
+++ b/perception/tensorrt_yolox/CMakeLists.txt
@@ -0,0 +1,123 @@
+cmake_minimum_required(VERSION 3.5)
+project(tensorrt_yolox)
+
+find_package(autoware_cmake REQUIRED)
+autoware_package()
+
+find_package(OpenCV REQUIRED)
+find_package(OpenMP)
+if(OpenMP_FOUND)
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+endif()
+
+##########
+# Download pretrained model
+set(DATA_PATH "${CMAKE_CURRENT_SOURCE_DIR}/data")
+if(NOT EXISTS "${DATA_PATH}")
+  execute_process(COMMAND mkdir -p ${DATA_PATH})
+endif()
+function(download FILE_NAME FILE_HASH)
+  message(STATUS "Checking and downloading ${FILE_NAME}")
+  set(FILE_PATH ${DATA_PATH}/${FILE_NAME})
+  set(STATUS_CODE 0)
+  message(STATUS "start ${FILE_NAME}")
+  if(EXISTS ${FILE_PATH})
+    message(STATUS "found ${FILE_NAME}")
+    file(MD5 ${FILE_PATH} EXISTING_FILE_HASH)
+    if(${FILE_HASH} STREQUAL ${EXISTING_FILE_HASH})
+      message(STATUS "same ${FILE_NAME}")
+      message(STATUS "File already exists.")
+    else()
+      message(STATUS "diff ${FILE_NAME}")
+      message(STATUS "File hash changes. Downloading now ...")
+      file(DOWNLOAD https://awf.ml.dev.web.auto/perception/models/${FILE_NAME} ${FILE_PATH} STATUS DOWNLOAD_STATUS TIMEOUT 3600)
+      list(GET DOWNLOAD_STATUS 0 STATUS_CODE)
+      list(GET DOWNLOAD_STATUS 1 ERROR_MESSAGE)
+    endif()
+  else()
+    message(STATUS "not found ${FILE_NAME}")
+    message(STATUS "File doesn't exists. Downloading now ...")
+    file(DOWNLOAD https://awf.ml.dev.web.auto/perception/models/${FILE_NAME} ${FILE_PATH} STATUS DOWNLOAD_STATUS TIMEOUT 3600)
+    list(GET DOWNLOAD_STATUS 0 STATUS_CODE)
+    list(GET DOWNLOAD_STATUS 1 ERROR_MESSAGE)
+  endif()
+  if(${STATUS_CODE} EQUAL 0)
+    message(STATUS "Download completed successfully!")
+  else()
+    message(FATAL_ERROR "Error occurred during download: ${ERROR_MESSAGE}")
+  endif()
+endfunction()
+
+download(yolox-tiny.onnx 97028baf73ce55e115599c9c60651b08)
+download(label.txt 9ceadca8b72b6169ee6aabb861fe3e1e)
+
+##########
+# tensorrt_yolox
+ament_auto_add_library(${PROJECT_NAME} SHARED
+  src/tensorrt_yolox.cpp
+)
+
+ament_target_dependencies(${PROJECT_NAME}
+  OpenCV
+)
+
+target_link_libraries(${PROJECT_NAME}
+  ${tensorrt_common_LIBRARIES}
+)
+
+target_compile_definitions(${PROJECT_NAME} PRIVATE
+  TENSORRT_VERSION_MAJOR=${TENSORRT_VERSION_MAJOR}
+)
+
+ament_auto_add_library(yolox_single_image_inference_node SHARED
+  src/yolox_single_image_inference_node.cpp
+)
+
+ament_target_dependencies(yolox_single_image_inference_node
+  OpenCV
+)
+
+target_link_libraries(yolox_single_image_inference_node
+  ${PROJECT_NAME}
+  stdc++fs
+)
+
+target_compile_definitions(yolox_single_image_inference_node PRIVATE
+  TENSORRT_VERSION_MAJOR=${TENSORRT_VERSION_MAJOR}
+)
+
+rclcpp_components_register_node(yolox_single_image_inference_node
+  PLUGIN "tensorrt_yolox::YoloXSingleImageInferenceNode"
+  EXECUTABLE yolox_single_image_inference
+)
+
+ament_auto_add_library(${PROJECT_NAME}_node SHARED
+  src/tensorrt_yolox_node.cpp
+)
+
+ament_target_dependencies(${PROJECT_NAME}_node
+  OpenCV
+)
+
+target_link_libraries(${PROJECT_NAME}_node
+  ${PROJECT_NAME}
+)
+
+target_compile_definitions(${PROJECT_NAME}_node PRIVATE
+  TENSORRT_VERSION_MAJOR=${TENSORRT_VERSION_MAJOR}
+)
+
+rclcpp_components_register_node(${PROJECT_NAME}_node
+  PLUGIN "tensorrt_yolox::TrtYoloXNode"
+  EXECUTABLE ${PROJECT_NAME}_node_exe
+)
+
+if(BUILD_TESTING)
+  find_package(ament_lint_auto REQUIRED)
+  ament_lint_auto_find_test_dependencies()
+endif()
+
+ament_auto_package(INSTALL_TO_SHARE
+  data
+)
diff --git a/perception/tensorrt_yolox/README.md b/perception/tensorrt_yolox/README.md
new file mode 100644
index 0000000000000..aaf51198e4429
--- /dev/null
+++ b/perception/tensorrt_yolox/README.md
@@ -0,0 +1,142 @@
+# tensorrt_yolox
+
+## Purpose
+
+This package detects target objects e.g., cars, trucks, bicycles, and pedestrians on a image based on [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX) model.
+
+## Inner-workings / Algorithms
+
+### Cite
+
+Zheng Ge, Songtao Liu, Feng Wang, Zeming Li, Jian Sun, "YOLOX: Exceeding YOLO Series in 2021", arXiv preprint arXiv:2107.08430, 2021 [[ref](https://arxiv.org/abs/2107.08430)]
+
+## Inputs / Outputs
+
+### Input
+
+| Name       | Type                | Description     |
+| ---------- | ------------------- | --------------- |
+| `in/image` | `sensor_msgs/Image` | The input image |
+
+### Output
+
+| Name          | Type                                               | Description                                        |
+| ------------- | -------------------------------------------------- | -------------------------------------------------- |
+| `out/objects` | `tier4_perception_msgs/DetectedObjectsWithFeature` | The detected objects with 2D bounding boxes        |
+| `out/image`   | `sensor_msgs/Image`                                | The image with 2D bounding boxes for visualization |
+
+## Parameters
+
+### Core Parameters
+
+| Name              | Type  | Default Value | Description                                                                           |
+| ----------------- | ----- | ------------- | ------------------------------------------------------------------------------------- |
+| `score_threshold` | float | 0.3           | If the objectness score is less than this value, the object is ignored in yolo layer. |
+| `nms_threshold`   | float | 0.7           | The IoU threshold for NMS method                                                      |
+
+**NOTE:** These two parameters are only valid for "plain" model (described later).
+
+### Node Parameters
+
+| Name         | Type   | Default Value | Description                                                        |
+| ------------ | ------ | ------------- | ------------------------------------------------------------------ |
+| `onnx_file`  | string | ""            | The onnx file name for yolo model                                  |
+| `label_file` | string | ""            | The label file with label names for detected objects written on it |
+| `mode`       | string | "fp32"        | The inference mode: "fp32", "fp16", "int8"                         |
+
+## Assumptions / Known limits
+
+The label contained in detected 2D bounding boxes (i.e., `out/objects`) will be either one of the followings:
+
+- CAR
+- PEDESTRIAN ("PERSON" will also be categorized as "PEDESTRIAN")
+- BUS
+- TRUCK
+- BICYCLE
+- MOTORCYCLE
+
+If other labels (case insensitive) are contained in the file specified via the `label_file` parameter,
+those are labeled as `UNKNOWN`, while detected rectangles are drawn in the visualization result (`out/image`).
+
+## Onnx model
+
+A sample model (named `yolox-tiny.onnx`) is downloaded automatically during the build process.
+To accelerate Non-maximum-suppression (NMS), which is one of the common post-process after object detection inference,
+`EfficientNMS_TRT` module is attached after the ordinal YOLOX (tiny) network.
+The `EfficientNMS_TRT` module contains fixed values for `score_threshold` and `nms_threshold` in it,
+hence these parameters are ignored when users specify ONNX models including this module.
+
+This package accepts both `EfficientNMS_TRT` attached ONNXs and [models published from the official YOLOX repository](https://github.com/Megvii-BaseDetection/YOLOX/tree/main/demo/ONNXRuntime#download-onnx-models) (we referred to them as "plain" models).
+
+All models are automatically converted to TensorRT format.
+These converted files will be saved in the same directory as specified ONNX files
+with `.engine` filename extension and reused from the next run.
+The conversion process may take a while (typically a few minutes) and the inference process is blocked
+until complete the conversion, so it will take some time until detection results are published on the first run.
+
+### Package acceptable model generation
+
+To convert users' own model that saved in PyTorch's `pth` format into ONNX,
+users can exploit the converter offered by the official repository.
+For the convenience, only procedures are described below.
+Please refer [the official document](https://github.com/Megvii-BaseDetection/YOLOX/tree/main/demo/ONNXRuntime#convert-your-model-to-onnx) for more detail.
+
+#### For plain models
+
+1. Install dependency
+
+   ```shell
+   git clone git@github.com:Megvii-BaseDetection/YOLOX.git
+   cd YOLOX
+   python3 setup.py develop --user
+   ```
+
+2. Convert pth into ONNX
+
+   ```shell
+   python3 tools/export_onnx.py \
+     --output-name YOUR_YOLOX.onnx \
+     -f YOUR_YOLOX.py \
+     -c YOUR_YOLOX.pth
+   ```
+
+#### For EfficientNMS_TRT embedded models
+
+1. Install dependency
+
+   ```shell
+   git clone git@github.com:Megvii-BaseDetection/YOLOX.git
+   cd YOLOX
+   python3 setup.py develop --user
+   pip3 install git+ssh://git@github.com/wep21/yolox_onnx_modifier.git --user
+   ```
+
+2. Convert pth into ONNX
+
+   ```shell
+   python3 tools/export_onnx.py \
+     --output-name YOUR_YOLOX.onnx \
+     -f YOUR_YOLOX.py \
+     -c YOUR_YOLOX.pth
+     --decode_in_inference
+   ```
+
+3. Embed `EfficientNMS_TRT` to the end of YOLOX
+
+   ```shell
+   yolox_onnx_modifier YOUR_YOLOX.onnx -o YOUR_YOLOX_WITH_NMS.onnx
+   ```
+
+## Label file
+
+A sample label file (named `label.txt`)is also downloaded automatically during the build process
+(**NOTE:** This file is incompatible with models that output labels for the COCO dataset (e.g., models from the official YOLOX repository)).
+
+This file represents the correspondence between class index (integer outputted from YOLOX network) and
+class label (strings making understanding easier). This package maps class IDs (incremented from 0)
+with labels according to the order in this file.
+
+## Reference repositories
+
+- <https://github.com/Megvii-BaseDetection/YOLOX>
+- <https://github.com/wep21/yolox_onnx_modifier>
diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
new file mode 100644
index 0000000000000..7f0f64dc89f67
--- /dev/null
+++ b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
@@ -0,0 +1,123 @@
+// Copyright 2022 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TENSORRT_YOLOX__TENSORRT_YOLOX_HPP_
+#define TENSORRT_YOLOX__TENSORRT_YOLOX_HPP_
+
+#include <cuda_utils/cuda_unique_ptr.hpp>
+#include <cuda_utils/stream_unique_ptr.hpp>
+#include <opencv2/opencv.hpp>
+#include <tensorrt_common/tensorrt_common.hpp>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace tensorrt_yolox
+{
+using cuda_utils::CudaUniquePtr;
+using cuda_utils::CudaUniquePtrHost;
+using cuda_utils::makeCudaStream;
+using cuda_utils::StreamUniquePtr;
+
+struct Object
+{
+  int32_t x_offset;
+  int32_t y_offset;
+  int32_t height;
+  int32_t width;
+  float score;
+  int32_t type;
+};
+
+using ObjectArray = std::vector<Object>;
+using ObjectArrays = std::vector<ObjectArray>;
+
+struct GridAndStride
+{
+  int grid0;
+  int grid1;
+  int stride;
+};
+
+class TrtYoloX
+{
+public:
+  TrtYoloX(
+    const std::string & model_path, const std::string & precision, const int num_class = 8,
+    const float score_threshold = 0.3, const float nms_threshold = 0.7,
+    const std::string & cache_dir = "",
+    const tensorrt_common::BatchConfig & batch_config = {1, 1, 1},
+    const size_t max_workspace_size = (1 << 30));
+
+  bool doInference(const std::vector<cv::Mat> & images, ObjectArrays & objects);
+
+private:
+  void preprocess(const std::vector<cv::Mat> & images);
+  bool feedforward(const std::vector<cv::Mat> & images, ObjectArrays & objects);
+  bool feedforwardAndDecode(const std::vector<cv::Mat> & images, ObjectArrays & objects);
+  void decodeOutputs(float * prob, ObjectArray & objects, float scale, cv::Size & img_size) const;
+  void generateGridsAndStride(
+    const int target_w, const int target_h, std::vector<int> & strides,
+    std::vector<GridAndStride> & grid_strides) const;
+  void generateYoloxProposals(
+    std::vector<GridAndStride> grid_strides, float * feat_blob, float prob_threshold,
+    ObjectArray & objects) const;
+  void qsortDescentInplace(ObjectArray & faceobjects, int left, int right) const;
+  inline void qsortDescentInplace(ObjectArray & objects) const
+  {
+    if (objects.empty()) {
+      return;
+    }
+    qsortDescentInplace(objects, 0, objects.size() - 1);
+  }
+  inline float intersectionArea(const Object & a, const Object & b) const
+  {
+    cv::Rect a_rect(a.x_offset, a.y_offset, a.width, a.height);
+    cv::Rect b_rect(b.x_offset, b.y_offset, b.width, b.height);
+    cv::Rect_<float> inter = a_rect & b_rect;
+    return inter.area();
+  }
+  void nmsSortedBboxes(
+    const ObjectArray & faceobjects, std::vector<int> & picked, float nms_threshold) const;
+
+  std::unique_ptr<tensorrt_common::TrtCommon> trt_common_;
+
+  std::vector<float> input_h_;
+  CudaUniquePtr<float[]> input_d_;
+  CudaUniquePtr<int32_t[]> out_num_detections_d_;
+  CudaUniquePtr<float[]> out_boxes_d_;
+  CudaUniquePtr<float[]> out_scores_d_;
+  CudaUniquePtr<int32_t[]> out_classes_d_;
+
+  bool needs_output_decode_;
+  size_t out_elem_num_;
+  size_t out_elem_num_per_batch_;
+  CudaUniquePtr<float[]> out_prob_d_;
+
+  StreamUniquePtr stream_{makeCudaStream()};
+
+  int32_t max_detections_;
+  std::vector<float> scales_;
+
+  int num_class_;
+  float score_threshold_;
+  float nms_threshold_;
+
+  CudaUniquePtrHost<float[]> out_prob_h_;
+};
+
+}  // namespace tensorrt_yolox
+
+#endif  // TENSORRT_YOLOX__TENSORRT_YOLOX_HPP_
diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
new file mode 100644
index 0000000000000..9a9921b36dcf1
--- /dev/null
+++ b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
@@ -0,0 +1,63 @@
+// Copyright 2022 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TENSORRT_YOLOX__TENSORRT_YOLOX_NODE_HPP_
+#define TENSORRT_YOLOX__TENSORRT_YOLOX_NODE_HPP_
+
+#include <image_transport/image_transport.hpp>
+#include <opencv2/opencv.hpp>
+#include <rclcpp/rclcpp.hpp>
+#include <tensorrt_yolox/tensorrt_yolox.hpp>
+
+#include <sensor_msgs/msg/image.hpp>
+#include <std_msgs/msg/header.hpp>
+#include <tier4_perception_msgs/msg/detected_objects_with_feature.hpp>
+
+#include <cv_bridge/cv_bridge.h>
+
+#include <chrono>
+#include <fstream>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace tensorrt_yolox
+{
+using LabelMap = std::map<int, std::string>;
+
+class TrtYoloXNode : public rclcpp::Node
+{
+public:
+  explicit TrtYoloXNode(const rclcpp::NodeOptions & node_options);
+
+private:
+  void onConnect();
+  void onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg);
+  bool readLabelFile(const std::string & label_path);
+
+  image_transport::Publisher image_pub_;
+  rclcpp::Publisher<tier4_perception_msgs::msg::DetectedObjectsWithFeature>::SharedPtr objects_pub_;
+
+  image_transport::Subscriber image_sub_;
+
+  rclcpp::TimerBase::SharedPtr timer_;
+
+  LabelMap label_map_;
+  std::unique_ptr<tensorrt_yolox::TrtYoloX> trt_yolox_;
+};
+
+}  // namespace tensorrt_yolox
+
+#endif  // TENSORRT_YOLOX__TENSORRT_YOLOX_NODE_HPP_
diff --git a/perception/tensorrt_yolox/package.xml b/perception/tensorrt_yolox/package.xml
new file mode 100644
index 0000000000000..dcd04c264ea95
--- /dev/null
+++ b/perception/tensorrt_yolox/package.xml
@@ -0,0 +1,39 @@
+<?xml version="1.0"?>
+<package format="3">
+  <name>tensorrt_yolox</name>
+  <version>0.0.1</version>
+  <description>tensorrt library implementation for yolox</description>
+
+  <author email="daisuke.nishimatsu@tier4.jp">Daisuke Nishimatsu</author>
+  <maintainer email="daisuke.nishimatsu@tier4.jp">Daisuke Nishimatsu</maintainer>
+  <maintainer email="manato.hirabayashi@tier4.jp">Manato Hirabayashi</maintainer>
+
+  <license>Apache License 2.0</license>
+
+  <buildtool_depend>ament_cmake_auto</buildtool_depend>
+  <buildtool_depend>cudnn_cmake_module</buildtool_depend>
+  <buildtool_depend>tensorrt_cmake_module</buildtool_depend>
+
+  <buildtool_export_depend>cudnn_cmake_module</buildtool_export_depend>
+  <buildtool_export_depend>tensorrt_cmake_module</buildtool_export_depend>
+
+  <build_depend>autoware_cmake</build_depend>
+
+  <depend>autoware_auto_perception_msgs</depend>
+  <depend>cuda_utils</depend>
+  <depend>cv_bridge</depend>
+  <depend>image_transport</depend>
+  <depend>libopencv-dev</depend>
+  <depend>rclcpp</depend>
+  <depend>rclcpp_components</depend>
+  <depend>sensor_msgs</depend>
+  <depend>tensorrt_common</depend>
+  <depend>tier4_perception_msgs</depend>
+
+  <test_depend>ament_lint_auto</test_depend>
+  <test_depend>autoware_lint_common</test_depend>
+
+  <export>
+    <build_type>ament_cmake</build_type>
+  </export>
+</package>
diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
new file mode 100644
index 0000000000000..191a3832d028d
--- /dev/null
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
@@ -0,0 +1,390 @@
+// Copyright 2022 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "cuda_utils/cuda_check_error.hpp"
+#include "cuda_utils/cuda_unique_ptr.hpp"
+
+#include <tensorrt_yolox/tensorrt_yolox.hpp>
+
+#include <algorithm>
+#include <functional>
+#include <memory>
+#include <numeric>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+namespace tensorrt_yolox
+{
+TrtYoloX::TrtYoloX(
+  const std::string & model_path, const std::string & precision, const int num_class,
+  const float score_threshold, const float nms_threshold,
+  [[maybe_unused]] const std::string & cache_dir, const tensorrt_common::BatchConfig & batch_config,
+  const size_t max_workspace_size)
+{
+  trt_common_ = std::make_unique<tensorrt_common::TrtCommon>(
+    model_path, precision, nullptr, batch_config, max_workspace_size);
+  trt_common_->setup();
+
+  if (!trt_common_->isInitialized()) {
+    return;
+  }
+
+  // Judge whether decoding output is required
+  // Plain models require decoding, while models with EfficientNMS_TRT module don't.
+  // If getNbBindings == 5, the model contains EfficientNMS_TRT
+  switch (trt_common_->getNbBindings()) {
+    case 2:
+      // Specified model is plain one.
+      // Bindings are: [input, output]
+      needs_output_decode_ = true;
+      // The following three values are considered only if the specified model is plain one
+      num_class_ = num_class;
+      score_threshold_ = score_threshold;
+      nms_threshold_ = nms_threshold;
+      break;
+    case 5:
+      // Specified model contains Efficient NMS_TRT.
+      // Bindings are[input, detection_classes, detection_scores, detection_boxes, num_detections]
+      needs_output_decode_ = false;
+      break;
+    default:
+      std::stringstream s;
+      s << "\"" << model_path << "\" is unsuppoerted format";
+      std::runtime_error{s.str()};
+  }
+
+  // GPU memory allocation
+  const auto input_dims = trt_common_->getBindingDimensions(0);
+  const auto input_size =
+    std::accumulate(input_dims.d + 1, input_dims.d + input_dims.nbDims, 1, std::multiplies<int>());
+  if (needs_output_decode_) {
+    const auto output_dims = trt_common_->getBindingDimensions(1);
+    input_d_ = cuda_utils::make_unique<float[]>(batch_config[2] * input_size);
+    out_elem_num_ = std::accumulate(
+      output_dims.d + 1, output_dims.d + output_dims.nbDims, 1, std::multiplies<int>());
+    out_elem_num_per_batch_ = static_cast<int>(out_elem_num_ / batch_config[2]);
+    out_prob_d_ = cuda_utils::make_unique<float[]>(out_elem_num_);
+    out_prob_h_ = cuda_utils::make_unique_host<float[]>(out_elem_num_, cudaHostAllocPortable);
+  } else {
+    const auto out_scores_dims = trt_common_->getBindingDimensions(3);
+    max_detections_ = out_scores_dims.d[1];
+    input_d_ = cuda_utils::make_unique<float[]>(batch_config[2] * input_size);
+    out_num_detections_d_ = cuda_utils::make_unique<int32_t[]>(batch_config[2]);
+    out_boxes_d_ = cuda_utils::make_unique<float[]>(batch_config[2] * max_detections_ * 4);
+    out_scores_d_ = cuda_utils::make_unique<float[]>(batch_config[2] * max_detections_);
+    out_classes_d_ = cuda_utils::make_unique<int32_t[]>(batch_config[2] * max_detections_);
+  }
+}
+
+void TrtYoloX::preprocess(const std::vector<cv::Mat> & images)
+{
+  const auto batch_size = images.size();
+  auto input_dims = trt_common_->getBindingDimensions(0);
+  input_dims.d[0] = batch_size;
+  trt_common_->setBindingDimensions(0, input_dims);
+  const float input_height = static_cast<float>(input_dims.d[2]);
+  const float input_width = static_cast<float>(input_dims.d[3]);
+  std::vector<cv::Mat> dst_images;
+  scales_.clear();
+  for (const auto & image : images) {
+    cv::Mat dst_image;
+    const float scale = std::min(input_width / image.cols, input_height / image.rows);
+    scales_.emplace_back(scale);
+    const auto scale_size = cv::Size(image.cols * scale, image.rows * scale);
+    cv::resize(image, dst_image, scale_size, 0, 0, cv::INTER_CUBIC);
+    const auto bottom = input_height - dst_image.rows;
+    const auto right = input_width - dst_image.cols;
+    copyMakeBorder(dst_image, dst_image, 0, bottom, 0, right, cv::BORDER_CONSTANT, {114, 114, 114});
+    dst_images.emplace_back(dst_image);
+  }
+  const auto chw_images =
+    cv::dnn::blobFromImages(dst_images, 1.0, cv::Size(), cv::Scalar(), false, false, CV_32F);
+
+  const auto data_length = chw_images.total();
+  input_h_.reserve(data_length);
+  const auto flat = chw_images.reshape(1, data_length);
+  input_h_ = chw_images.isContinuous() ? flat : flat.clone();
+}
+
+bool TrtYoloX::doInference(const std::vector<cv::Mat> & images, ObjectArrays & objects)
+{
+  if (!trt_common_->isInitialized()) {
+    return false;
+  }
+
+  preprocess(images);
+
+  CHECK_CUDA_ERROR(cudaMemcpy(
+    input_d_.get(), input_h_.data(), input_h_.size() * sizeof(float), cudaMemcpyHostToDevice));
+
+  if (needs_output_decode_) {
+    return feedforwardAndDecode(images, objects);
+  } else {
+    return feedforward(images, objects);
+  }
+}
+
+// This method is assumed to be called when specified YOLOX model contains
+// EfficientNMS_TRT module.
+bool TrtYoloX::feedforward(const std::vector<cv::Mat> & images, ObjectArrays & objects)
+{
+  std::vector<void *> buffers = {
+    input_d_.get(), out_num_detections_d_.get(), out_boxes_d_.get(), out_scores_d_.get(),
+    out_classes_d_.get()};
+
+  trt_common_->enqueueV2(buffers.data(), *stream_, nullptr);
+
+  const auto batch_size = images.size();
+  auto out_num_detections = std::make_unique<int32_t[]>(batch_size);
+  auto out_boxes = std::make_unique<float[]>(4 * batch_size * max_detections_);
+  auto out_scores = std::make_unique<float[]>(batch_size * max_detections_);
+  auto out_classes = std::make_unique<int32_t[]>(batch_size * max_detections_);
+
+  CHECK_CUDA_ERROR(cudaMemcpyAsync(
+    out_num_detections.get(), out_num_detections_d_.get(), sizeof(int32_t) * batch_size,
+    cudaMemcpyDeviceToHost, *stream_));
+  CHECK_CUDA_ERROR(cudaMemcpyAsync(
+    out_boxes.get(), out_boxes_d_.get(), sizeof(float) * 4 * batch_size * max_detections_,
+    cudaMemcpyDeviceToHost, *stream_));
+  CHECK_CUDA_ERROR(cudaMemcpyAsync(
+    out_scores.get(), out_scores_d_.get(), sizeof(float) * batch_size * max_detections_,
+    cudaMemcpyDeviceToHost, *stream_));
+  CHECK_CUDA_ERROR(cudaMemcpyAsync(
+    out_classes.get(), out_classes_d_.get(), sizeof(int32_t) * batch_size * max_detections_,
+    cudaMemcpyDeviceToHost, *stream_));
+  cudaStreamSynchronize(*stream_);
+  objects.clear();
+  for (size_t i = 0; i < batch_size; ++i) {
+    const size_t num_detection = static_cast<size_t>(out_num_detections[i]);
+    ObjectArray object_array(num_detection);
+    for (size_t j = 0; j < num_detection; ++j) {
+      Object object{};
+      const auto x1 = out_boxes[i * max_detections_ * 4 + j * 4] / scales_[i];
+      const auto y1 = out_boxes[i * max_detections_ * 4 + j * 4 + 1] / scales_[i];
+      const auto x2 = out_boxes[i * max_detections_ * 4 + j * 4 + 2] / scales_[i];
+      const auto y2 = out_boxes[i * max_detections_ * 4 + j * 4 + 3] / scales_[i];
+      object.x_offset = std::clamp(0, static_cast<int32_t>(x1), images[i].cols);
+      object.y_offset = std::clamp(0, static_cast<int32_t>(y1), images[i].rows);
+      object.width = static_cast<int32_t>(std::max(0.0F, x2 - x1));
+      object.height = static_cast<int32_t>(std::max(0.0F, y2 - y1));
+      object.score = out_scores[i * max_detections_ + j];
+      object.type = out_classes[i * max_detections_ + j];
+      object_array.emplace_back(object);
+    }
+    objects.emplace_back(object_array);
+  }
+  return true;
+}
+
+bool TrtYoloX::feedforwardAndDecode(const std::vector<cv::Mat> & images, ObjectArrays & objects)
+{
+  std::vector<void *> buffers = {input_d_.get(), out_prob_d_.get()};
+
+  trt_common_->enqueueV2(buffers.data(), *stream_, nullptr);
+
+  const auto batch_size = images.size();
+
+  CHECK_CUDA_ERROR(cudaMemcpyAsync(
+    out_prob_h_.get(), out_prob_d_.get(), sizeof(float) * out_elem_num_, cudaMemcpyDeviceToHost,
+    *stream_));
+  cudaStreamSynchronize(*stream_);
+  objects.clear();
+
+  for (size_t i = 0; i < batch_size; ++i) {
+    auto image_size = images[i].size();
+    float * batch_prob = out_prob_h_.get() + (i * out_elem_num_per_batch_);
+    ObjectArray object_array;
+    decodeOutputs(batch_prob, object_array, scales_[i], image_size);
+    objects.emplace_back(object_array);
+  }
+  return true;
+}
+
+void TrtYoloX::decodeOutputs(
+  float * prob, ObjectArray & objects, float scale, cv::Size & img_size) const
+{
+  ObjectArray proposals;
+  auto input_dims = trt_common_->getBindingDimensions(0);
+  const float input_height = static_cast<float>(input_dims.d[2]);
+  const float input_width = static_cast<float>(input_dims.d[3]);
+  std::vector<int> strides = {8, 16, 32};
+  std::vector<GridAndStride> grid_strides;
+  generateGridsAndStride(input_width, input_height, strides, grid_strides);
+  generateYoloxProposals(grid_strides, prob, score_threshold_, proposals);
+
+  qsortDescentInplace(proposals);
+
+  std::vector<int> picked;
+  nmsSortedBboxes(proposals, picked, nms_threshold_);
+
+  int count = static_cast<int>(picked.size());
+  objects.resize(count);
+  for (int i = 0; i < count; i++) {
+    objects[i] = proposals[picked[i]];
+
+    // adjust offset to original unpadded
+    float x0 = (objects[i].x_offset) / scale;
+    float y0 = (objects[i].y_offset) / scale;
+    float x1 = (objects[i].x_offset + objects[i].width) / scale;
+    float y1 = (objects[i].y_offset + objects[i].height) / scale;
+
+    // clip
+    x0 = std::clamp(x0, 0.f, static_cast<float>(img_size.width - 1));
+    y0 = std::clamp(y0, 0.f, static_cast<float>(img_size.height - 1));
+    x1 = std::clamp(x1, 0.f, static_cast<float>(img_size.width - 1));
+    y1 = std::clamp(y1, 0.f, static_cast<float>(img_size.height - 1));
+
+    objects[i].x_offset = x0;
+    objects[i].y_offset = y0;
+    objects[i].width = x1 - x0;
+    objects[i].height = y1 - y0;
+  }
+}
+
+void TrtYoloX::generateGridsAndStride(
+  const int target_w, const int target_h, std::vector<int> & strides,
+  std::vector<GridAndStride> & grid_strides) const
+{
+  for (auto stride : strides) {
+    int num_grid_w = target_w / stride;
+    int num_grid_h = target_h / stride;
+    for (int g1 = 0; g1 < num_grid_h; g1++) {
+      for (int g0 = 0; g0 < num_grid_w; g0++) {
+        grid_strides.push_back(GridAndStride{g0, g1, stride});
+      }
+    }
+  }
+}
+
+void TrtYoloX::generateYoloxProposals(
+  std::vector<GridAndStride> grid_strides, float * feat_blob, float prob_threshold,
+  ObjectArray & objects) const
+{
+  const int num_anchors = grid_strides.size();
+
+  for (int anchor_idx = 0; anchor_idx < num_anchors; anchor_idx++) {
+    const int grid0 = grid_strides[anchor_idx].grid0;
+    const int grid1 = grid_strides[anchor_idx].grid1;
+    const int stride = grid_strides[anchor_idx].stride;
+
+    const int basic_pos = anchor_idx * (num_class_ + 5);
+
+    // yolox/models/yolo_head.py decode logic
+    // To apply this logic, YOLOX head must output raw value
+    // (i.e., `decode_in_inference` should be False)
+    float x_center = (feat_blob[basic_pos + 0] + grid0) * stride;
+    float y_center = (feat_blob[basic_pos + 1] + grid1) * stride;
+    float w = exp(feat_blob[basic_pos + 2]) * stride;
+    float h = exp(feat_blob[basic_pos + 3]) * stride;
+    float x0 = x_center - w * 0.5f;
+    float y0 = y_center - h * 0.5f;
+
+    float box_objectness = feat_blob[basic_pos + 4];
+    for (int class_idx = 0; class_idx < num_class_; class_idx++) {
+      float box_cls_score = feat_blob[basic_pos + 5 + class_idx];
+      float box_prob = box_objectness * box_cls_score;
+      if (box_prob > prob_threshold) {
+        Object obj;
+        obj.x_offset = x0;
+        obj.y_offset = y0;
+        obj.height = h;
+        obj.width = w;
+        obj.type = class_idx;
+        obj.score = box_prob;
+
+        objects.push_back(obj);
+      }
+    }  // class loop
+  }    // point anchor loop
+}
+
+void TrtYoloX::qsortDescentInplace(ObjectArray & faceobjects, int left, int right) const
+{
+  int i = left;
+  int j = right;
+  float p = faceobjects[(left + right) / 2].score;
+
+  while (i <= j) {
+    while (faceobjects[i].score > p) {
+      i++;
+    }
+
+    while (faceobjects[j].score < p) {
+      j--;
+    }
+
+    if (i <= j) {
+      // swap
+      std::swap(faceobjects[i], faceobjects[j]);
+
+      i++;
+      j--;
+    }
+  }
+
+#pragma omp parallel sections
+  {
+#pragma omp section
+    {
+      if (left < j) {
+        qsortDescentInplace(faceobjects, left, j);
+      }
+    }
+#pragma omp section
+    {
+      if (i < right) {
+        qsortDescentInplace(faceobjects, i, right);
+      }
+    }
+  }
+}
+
+void TrtYoloX::nmsSortedBboxes(
+  const ObjectArray & faceobjects, std::vector<int> & picked, float nms_threshold) const
+{
+  picked.clear();
+  const int n = faceobjects.size();
+
+  std::vector<float> areas(n);
+  for (int i = 0; i < n; i++) {
+    cv::Rect rect(
+      faceobjects[i].x_offset, faceobjects[i].y_offset, faceobjects[i].width,
+      faceobjects[i].height);
+    areas[i] = rect.area();
+  }
+
+  for (int i = 0; i < n; i++) {
+    const Object & a = faceobjects[i];
+
+    int keep = 1;
+    for (int j = 0; j < static_cast<int>(picked.size()); j++) {
+      const Object & b = faceobjects[picked[j]];
+
+      // intersection over union
+      float inter_area = intersectionArea(a, b);
+      float union_area = areas[i] + areas[picked[j]] - inter_area;
+      // float IoU = inter_area / union_area
+      if (inter_area / union_area > nms_threshold) {
+        keep = 0;
+      }
+    }
+
+    if (keep) {
+      picked.push_back(i);
+    }
+  }
+}
+
+}  // namespace tensorrt_yolox
diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
new file mode 100644
index 0000000000000..1cc3f6d5ec2cc
--- /dev/null
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
@@ -0,0 +1,155 @@
+// Copyright 2022 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <tensorrt_yolox/tensorrt_yolox_node.hpp>
+
+#include <autoware_auto_perception_msgs/msg/object_classification.hpp>
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace tensorrt_yolox
+{
+TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options)
+: Node("tensorrt_yolox", node_options)
+{
+  using std::placeholders::_1;
+  using std::chrono_literals::operator""ms;
+
+  std::string model_path = declare_parameter("model_path", "");
+  std::string label_path = declare_parameter("label_path", "");
+  std::string precision = declare_parameter("precision", "fp32");
+  // Objects with a score lower than this value will be ignored.
+  // This threshold will be ignored if specified model contains EfficientNMS_TRT module in it
+  float score_threshold = declare_parameter("score_threshold", 0.3);
+  // Detection results will be ignored if IoU over this value.
+  // This threshold will be ignored if specified model contains EfficientNMS_TRT module in it
+  float nms_threshold = declare_parameter("nms_threshold", 0.7);
+
+  if (!readLabelFile(label_path)) {
+    RCLCPP_ERROR(this->get_logger(), "Could not find label file");
+    rclcpp::shutdown();
+  }
+  trt_yolox_ = std::make_unique<tensorrt_yolox::TrtYoloX>(
+    model_path, precision, label_map_.size(), score_threshold, nms_threshold);
+
+  timer_ =
+    rclcpp::create_timer(this, get_clock(), 100ms, std::bind(&TrtYoloXNode::onConnect, this));
+
+  objects_pub_ = this->create_publisher<tier4_perception_msgs::msg::DetectedObjectsWithFeature>(
+    "~/out/objects", 1);
+  image_pub_ = image_transport::create_publisher(this, "~/out/image");
+}
+
+void TrtYoloXNode::onConnect()
+{
+  using std::placeholders::_1;
+  if (
+    objects_pub_->get_subscription_count() == 0 &&
+    objects_pub_->get_intra_process_subscription_count() == 0 &&
+    image_pub_.getNumSubscribers() == 0) {
+    image_sub_.shutdown();
+  } else if (!image_sub_) {
+    image_sub_ = image_transport::create_subscription(
+      this, "~/in/image", std::bind(&TrtYoloXNode::onImage, this, _1), "raw",
+      rmw_qos_profile_sensor_data);
+  }
+}
+
+void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
+{
+  using Label = autoware_auto_perception_msgs::msg::ObjectClassification;
+
+  tier4_perception_msgs::msg::DetectedObjectsWithFeature out_objects;
+
+  cv_bridge::CvImagePtr in_image_ptr;
+  try {
+    in_image_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::BGR8);
+  } catch (cv_bridge::Exception & e) {
+    RCLCPP_ERROR(this->get_logger(), "cv_bridge exception: %s", e.what());
+    return;
+  }
+  const auto width = in_image_ptr->image.cols;
+  const auto height = in_image_ptr->image.rows;
+
+  tensorrt_yolox::ObjectArrays objects;
+  if (!trt_yolox_->doInference({in_image_ptr->image}, objects)) {
+    RCLCPP_WARN(this->get_logger(), "Fail to inference");
+    return;
+  }
+  for (const auto & yolox_object : objects.at(0)) {
+    tier4_perception_msgs::msg::DetectedObjectWithFeature object;
+    object.feature.roi.x_offset = yolox_object.x_offset;
+    object.feature.roi.y_offset = yolox_object.y_offset;
+    object.feature.roi.width = yolox_object.width;
+    object.feature.roi.height = yolox_object.height;
+    object.object.classification.emplace_back(autoware_auto_perception_msgs::build<Label>()
+                                                .label(Label::UNKNOWN)
+                                                .probability(yolox_object.score));
+    if (label_map_[yolox_object.type] == "CAR") {
+      object.object.classification.front().label = Label::CAR;
+    } else if (
+      label_map_[yolox_object.type] == "PEDESTRIAN" || label_map_[yolox_object.type] == "PERSON") {
+      object.object.classification.front().label = Label::PEDESTRIAN;
+    } else if (label_map_[yolox_object.type] == "BUS") {
+      object.object.classification.front().label = Label::BUS;
+    } else if (label_map_[yolox_object.type] == "TRUCK") {
+      object.object.classification.front().label = Label::TRUCK;
+    } else if (label_map_[yolox_object.type] == "BICYCLE") {
+      object.object.classification.front().label = Label::BICYCLE;
+    } else if (label_map_[yolox_object.type] == "MOTORCYCLE") {
+      object.object.classification.front().label = Label::MOTORCYCLE;
+    }
+    out_objects.feature_objects.push_back(object);
+    const auto left = std::max(0, static_cast<int>(object.feature.roi.x_offset));
+    const auto top = std::max(0, static_cast<int>(object.feature.roi.y_offset));
+    const auto right =
+      std::min(static_cast<int>(object.feature.roi.x_offset + object.feature.roi.width), width);
+    const auto bottom =
+      std::min(static_cast<int>(object.feature.roi.y_offset + object.feature.roi.height), height);
+    cv::rectangle(
+      in_image_ptr->image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(0, 0, 255), 3,
+      8, 0);
+  }
+  image_pub_.publish(in_image_ptr->toImageMsg());
+
+  out_objects.header = msg->header;
+  objects_pub_->publish(out_objects);
+}
+
+bool TrtYoloXNode::readLabelFile(const std::string & label_path)
+{
+  std::ifstream label_file(label_path);
+  if (!label_file.is_open()) {
+    RCLCPP_ERROR(this->get_logger(), "Could not open label file. [%s]", label_path.c_str());
+    return false;
+  }
+  int label_index{};
+  std::string label;
+  while (getline(label_file, label)) {
+    std::transform(
+      label.begin(), label.end(), label.begin(), [](auto c) { return std::toupper(c); });
+    label_map_.insert({label_index, label});
+    ++label_index;
+  }
+  return true;
+}
+
+}  // namespace tensorrt_yolox
+
+#include "rclcpp_components/register_node_macro.hpp"
+RCLCPP_COMPONENTS_REGISTER_NODE(tensorrt_yolox::TrtYoloXNode)
diff --git a/perception/tensorrt_yolox/src/yolox_single_image_inference_node.cpp b/perception/tensorrt_yolox/src/yolox_single_image_inference_node.cpp
new file mode 100644
index 0000000000000..0657f0096b07e
--- /dev/null
+++ b/perception/tensorrt_yolox/src/yolox_single_image_inference_node.cpp
@@ -0,0 +1,71 @@
+// Copyright 2022 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <rclcpp/rclcpp.hpp>
+#include <tensorrt_yolox/tensorrt_yolox.hpp>
+
+#if (defined(_MSC_VER) or (defined(__GNUC__) and (7 <= __GNUC_MAJOR__)))
+#include <filesystem>
+namespace fs = ::std::filesystem;
+#else
+#include <experimental/filesystem>
+namespace fs = ::std::experimental::filesystem;
+#endif
+
+#include <memory>
+#include <string>
+
+namespace tensorrt_yolox
+{
+class YoloXSingleImageInferenceNode : public rclcpp::Node
+{
+public:
+  explicit YoloXSingleImageInferenceNode(const rclcpp::NodeOptions & node_options)
+  : Node("yolox_single_image_inference", node_options)
+  {
+    const auto image_path = declare_parameter("image_path", "");
+    const auto model_path = declare_parameter("model_path", "");
+    const auto precision = declare_parameter("precision", "fp32");
+    const auto save_image = declare_parameter("save_image", false);
+    auto p = fs::path(image_path);
+    const auto ext = p.extension().string();
+    p.replace_extension("");
+    const auto output_image_path =
+      declare_parameter("output_image_path", p.string() + "_detect" + ext);
+
+    auto trt_yolox = std::make_unique<tensorrt_yolox::TrtYoloX>(model_path, precision);
+    auto image = cv::imread(image_path);
+    tensorrt_yolox::ObjectArrays objects;
+    trt_yolox->doInference({image}, objects);
+    for (const auto & object : objects[0]) {
+      const auto left = object.x_offset;
+      const auto top = object.y_offset;
+      const auto right = std::clamp(left + object.width, 0, image.cols);
+      const auto bottom = std::clamp(top + object.height, 0, image.rows);
+      cv::rectangle(
+        image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(0, 0, 255), 3, 8, 0);
+    }
+    if (!save_image) {
+      cv::imshow("inference image", image);
+      cv::waitKey(0);
+      rclcpp::shutdown();
+    }
+    cv::imwrite(output_image_path, image);
+    rclcpp::shutdown();
+  }
+};
+}  // namespace tensorrt_yolox
+
+#include "rclcpp_components/register_node_macro.hpp"
+RCLCPP_COMPONENTS_REGISTER_NODE(tensorrt_yolox::YoloXSingleImageInferenceNode)