diff --git a/perception/tensorrt_yolo/.gitignore b/perception/tensorrt_yolo/.gitignore
new file mode 100755
index 0000000000000..160e7b321280b
--- /dev/null
+++ b/perception/tensorrt_yolo/.gitignore
@@ -0,0 +1,2 @@
+data/
+calib_image/
diff --git a/perception/tensorrt_yolo/CMakeLists.txt b/perception/tensorrt_yolo/CMakeLists.txt
new file mode 100755
index 0000000000000..c5ad50afde187
--- /dev/null
+++ b/perception/tensorrt_yolo/CMakeLists.txt
@@ -0,0 +1,272 @@
+cmake_minimum_required(VERSION 3.5)
+project(tensorrt_yolo)
+
+if(NOT CMAKE_CXX_STANDARD)
+  set(CMAKE_CXX_STANDARD 17)
+  set(CMAKE_CXX_STANDARD_REQUIRED ON)
+  set(CMAKE_CXX_EXTENSIONS OFF)
+endif()
+if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+  add_compile_options(-Wall -Wextra -Wpedantic -Wno-deprecated-declarations -Werror)
+endif()
+
+option(CUDA_VERBOSE "Verbose output of CUDA modules" OFF)
+
+find_package(ament_cmake_auto REQUIRED)
+ament_auto_find_build_dependencies()
+
+find_package(OpenCV REQUIRED)
+
+# set flags for CUDA availability
+option(CUDA_AVAIL "CUDA available" OFF)
+find_package(CUDA)
+if(CUDA_FOUND)
+  find_library(CUBLAS_LIBRARIES cublas HINTS
+    ${CUDA_TOOLKIT_ROOT_DIR}/lib64
+    ${CUDA_TOOLKIT_ROOT_DIR}/lib
+  )
+  if(CUDA_VERBOSE)
+    message(STATUS "CUDA is available!")
+    message(STATUS "CUDA Libs: ${CUDA_LIBRARIES}")
+    message(STATUS "CUDA Headers: ${CUDA_INCLUDE_DIRS}")
+  endif()
+  set(CUDA_AVAIL ON)
+else()
+  message("CUDA NOT FOUND")
+  set(CUDA_AVAIL OFF)
+endif()
+
+# set flags for TensorRT availability
+option(TRT_AVAIL "TensorRT available" OFF)
+# try to find the tensorRT modules
+find_library(NVINFER NAMES nvinfer)
+find_library(NVONNXPARSER nvonnxparser)
+find_library(NVINFER_PLUGIN NAMES nvinfer_plugin)
+if(NVINFER AND NVONNXPARSER AND NVINFER_PLUGIN)
+  if(CUDA_VERBOSE)
+    message(STATUS "TensorRT is available!")
+    message(STATUS "NVINFER: ${NVINFER}")
+    message(STATUS "NVPARSERS: ${NVPARSERS}")
+    message(STATUS "NVINFER_PLUGIN: ${NVINFER_PLUGIN}")
+    message(STATUS "NVONNXPARSER: ${NVONNXPARSER}")
+  endif()
+  set(TRT_AVAIL ON)
+else()
+  message("TensorRT is NOT Available")
+  set(TRT_AVAIL OFF)
+endif()
+
+# set flags for CUDNN availability
+option(CUDNN_AVAIL "CUDNN available" OFF)
+# try to find the CUDNN module
+find_library(CUDNN_LIBRARY
+NAMES libcudnn.so${__cudnn_ver_suffix} libcudnn${__cudnn_ver_suffix}.dylib ${__cudnn_lib_win_name}
+PATHS $ENV{LD_LIBRARY_PATH} ${__libpath_cudart} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX}
+PATH_SUFFIXES lib lib64 bin
+DOC "CUDNN library."
+)
+if(CUDNN_LIBRARY)
+  if(CUDA_VERBOSE)
+    message(STATUS "CUDNN is available!")
+    message(STATUS "CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
+  endif()
+  set(CUDNN_AVAIL ON)
+else()
+  message("CUDNN is NOT Available")
+  set(CUDNN_AVAIL OFF)
+endif()
+
+# Download onnx
+find_program(GDOWN_AVAIL "gdown")
+if(NOT GDOWN_AVAIL)
+  message("gdown: command not found. External files could not be downloaded.")
+endif()
+set(PATH "${CMAKE_CURRENT_SOURCE_DIR}/data")
+if(NOT EXISTS "${PATH}")
+  execute_process(COMMAND mkdir -p ${PATH})
+endif()
+# yolov3
+set(FILE "${PATH}/yolov3.onnx")
+message(STATUS "Checking and downloading yolov3.onnx")
+if(NOT EXISTS "${FILE}")
+  message(STATUS "... file does not exist. Downloading now ...")
+  execute_process(
+    COMMAND gdown "https://drive.google.com/uc?id=1ZYoBqVynmO5kKntyN56GEbELRpuXG8Ym" -O ${PATH}/yolov3.onnx
+    ERROR_QUIET
+  )
+endif()
+
+# yolov4
+set(FILE "${PATH}/yolov4.onnx")
+message(STATUS "Checking and downloading yolov4.onnx")
+if(NOT EXISTS "${FILE}")
+  message(STATUS "... file does not exist. Downloading now ...")
+  execute_process(
+    COMMAND gdown "https://drive.google.com//uc?id=1vkNmSwcIpTkJ_-BrKhxtit0PBJeJYTVX" -O ${PATH}/yolov4.onnx
+    ERROR_QUIET
+  )
+endif()
+
+# yolov4-tiny
+set(FILE "${PATH}/yolov4-tiny.onnx")
+message(STATUS "Checking and downloading yolov4.onnx")
+if(NOT EXISTS "${FILE}")
+  message(STATUS "... file does not exist. Downloading now ...")
+  execute_process(
+    COMMAND gdown "https://drive.google.com/uc?id=1rUHjV_JfkmlVFgb20XXrOMWo_HZAjrFh" -O ${PATH}/yolov4-tiny.onnx
+    ERROR_QUIET
+  )
+endif()
+
+# yolov5s
+set(FILE "${PATH}/yolov5s.onnx")
+message(STATUS "Checking and downloading yolov5s.onnx")
+if(NOT EXISTS "${FILE}")
+  message(STATUS "... file does not exist. Downloading now ...")
+  execute_process(
+    COMMAND gdown "https://drive.google.com//uc?id=1CF21nQWigwCPTr5psQZXg6cBQIOYKbad" -O ${PATH}/yolov5s.onnx
+    ERROR_QUIET
+  )
+endif()
+
+# yolov5m
+set(FILE "${PATH}/yolov5m.onnx")
+message(STATUS "Checking and downloading yolov5m.onnx")
+if(NOT EXISTS "${FILE}")
+  message(STATUS "... file does not exist. Downloading now ...")
+  execute_process(
+    COMMAND gdown "https://drive.google.com//uc?id=1a1h50KJH6slwmjKZpPlS-errukF-BrgG" -O ${PATH}/yolov5m.onnx
+    ERROR_QUIET
+  )
+endif()
+
+# yolov5l
+set(FILE "${PATH}/yolov5l.onnx")
+message(STATUS "Checking and downloading yolov5l.onnx")
+if(NOT EXISTS "${FILE}")
+  message(STATUS "... file does not exist. Downloading now ...")
+  execute_process(
+    COMMAND gdown "https://drive.google.com/uc?id=1xO8S92Cq7qrmx93UHHyA7Cd7-dJsBDP8" -O ${PATH}/yolov5l.onnx
+    ERROR_QUIET
+  )
+endif()
+
+# yolov5x
+set(FILE "${PATH}/yolov5x.onnx")
+message(STATUS "Checking and downloading yolov5x.onnx")
+if(NOT EXISTS "${FILE}")
+  message(STATUS "... file does not exist. Downloading now ...")
+  execute_process(
+    COMMAND gdown "https://drive.google.com/uc?id=1kAHuNJUCxpD-yWrS6t95H3zbAPfClLxI" -O ${PATH}/yolov5x.onnx
+    ERROR_QUIET
+  )
+endif()
+
+set(FILE "${PATH}/coco.names")
+message(STATUS "Checking and downloading coco.names")
+if(NOT EXISTS "${FILE}")
+  message(STATUS "... file does not exist. Downloading now ...")
+  execute_process(
+    COMMAND gdown "https://drive.google.com/uc?id=19wXD23PE16kJDkZ7j2W3Ijvx5I1kO9kJ" -O ${PATH}/coco.names
+    ERROR_QUIET
+  )
+endif()
+
+# create calib image directory for int8 calibration
+set(CALIB_IMAGE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/calib_image")
+if(NOT EXISTS "${CALIB_IMAGE_PATH}")
+  execute_process(COMMAND mkdir -p ${CALIB_IMAGE_PATH})
+endif()
+
+if(TRT_AVAIL AND CUDA_AVAIL AND CUDNN_AVAIL)
+  include_directories(
+    include
+    lib/include
+    ${OpenCV_INCLUDE_DIRS}
+    ${CUDA_INCLUDE_DIRS}
+  )
+
+
+  ### yolo ###
+  cuda_add_library(mish_plugin SHARED
+    lib/src/plugins/mish.cu
+    lib/src/plugins/mish_plugin.cpp
+  )
+
+  cuda_add_library(yolo_layer_plugin SHARED
+    lib/src/plugins/yolo_layer.cu
+    lib/src/plugins/yolo_layer_plugin.cpp
+  )
+
+  cuda_add_library(nms_plugin SHARED
+    lib/src/plugins/nms.cu
+    lib/src/plugins/nms_plugin.cpp
+  )
+
+  ament_auto_add_library(yolo SHARED
+    lib/src/trt_yolo.cpp
+  )
+
+  target_include_directories(yolo PUBLIC
+    lib/include
+  )
+
+  target_link_libraries(yolo
+    ${NVINFER}
+    ${NVONNXPARSER}
+    ${NVINFER_PLUGIN}
+    ${CUDA_LIBRARIES}
+    ${CUBLAS_LIBRARIES}
+    ${CUDA_curand_LIBRARY}
+    ${CUDNN_LIBRARY}
+    mish_plugin
+    yolo_layer_plugin
+    nms_plugin
+  )
+
+  ament_auto_add_library(tensorrt_yolo_nodelet SHARED
+    src/nodelet.cpp
+  )
+
+  target_link_libraries(tensorrt_yolo_nodelet
+    ${OpenCV_LIBS}
+    yolo
+    mish_plugin
+    yolo_layer_plugin
+    nms_plugin
+  )
+
+  rclcpp_components_register_node(tensorrt_yolo_nodelet
+    PLUGIN "object_recognition::TensorrtYoloNodelet"
+    EXECUTABLE tensorrt_yolo_node
+  )
+
+  if(BUILD_TESTING)
+    find_package(ament_lint_auto REQUIRED)
+    ament_lint_auto_find_test_dependencies()
+  endif()
+
+  ament_auto_package(INSTALL_TO_SHARE
+    config
+    data
+    launch
+  )
+
+  install(
+    TARGETS
+      mish_plugin
+      yolo_layer_plugin
+      nms_plugin
+    LIBRARY DESTINATION lib
+    ARCHIVE DESTINATION lib
+    RUNTIME DESTINATION bin
+  )
+
+else()
+  message("TensorrtYolo won't be built, CUDA and/or TensorRT were not found.")
+  ament_auto_package(INSTALL_TO_SHARE
+    config
+    data
+    launch
+  )
+endif()
diff --git a/perception/tensorrt_yolo/README.md b/perception/tensorrt_yolo/README.md
new file mode 100644
index 0000000000000..ac12efef1d7f4
--- /dev/null
+++ b/perception/tensorrt_yolo/README.md
@@ -0,0 +1,99 @@
+# tensorrt_yolo
+
+## Purpose
+
+This package detects 2D bounding boxes for target objects e.g., cars, trucks, bicycles, and pedestrians on a image based on YOLO(You only look once) model.
+
+## Inner-workings / Algorithms
+
+### Cite
+
+yolov3
+
+Redmon, J., & Farhadi, A. (2018). Yolov3: An incremental improvement. arXiv preprint arXiv:1804.02767.
+
+yolov4
+
+Bochkovskiy, A., Wang, C. Y., & Liao, H. Y. M. (2020). Yolov4: Optimal speed and accuracy of object detection. arXiv preprint arXiv:2004.10934.
+
+yolov5
+
+Jocher, G., et al. (2021). ultralytics/yolov5: v6.0 - YOLOv5n 'Nano' models, Roboflow integration, TensorFlow export, OpenCV DNN support (v6.0). Zenodo. <https://doi.org/10.5281/zenodo.5563715>
+
+## Inputs / Outputs
+
+### Input
+
+| Name       | Type                | Description     |
+| ---------- | ------------------- | --------------- |
+| `in/image` | `sensor_msgs/Image` | The input image |
+
+### Output
+
+| Name          | Type                                                  | Description                                        |
+| ------------- | ----------------------------------------------------- | -------------------------------------------------- |
+| `out/objects` | `autoware_perception_msgs/DetectedObjectsWithFeature` | The detected objects with 2D bounding boxes        |
+| `out/image`   | `sensor_msgs/Image`                                   | The image with 2D bounding boxes for visualization |
+
+## Parameters
+
+### Core Parameters
+
+| Name                | Type         | Default Value                                                                                                      | Description                                                                           |
+| ------------------- | ------------ | ------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------- |
+| `anchors`           | double array | [10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0, 116.0, 90.0, 156.0, 198.0, 373.0, 326.0] | The anchors to create bounding box candidates                                         |
+| `scale_x_y`         | double array | [1.0, 1.0, 1.0]                                                                                                    | The scale parameter to eliminate grid sensitivity                                     |
+| `score_thresh`      | double       | 0.1                                                                                                                | If the objectness score is less than this value, the object is ignored in yolo layer. |
+| `iou_thresh`        | double       | 0.45                                                                                                               | The iou threshold for NMS method                                                      |
+| `detections_per_im` | int          | 100                                                                                                                | The maximum detection number for one frame                                            |
+| `use_darknet_layer` | bool         | true                                                                                                               | The flag to use yolo layer in darknet                                                 |
+| `ignore_thresh`     | double       | 0.5                                                                                                                | If the output score is less than this value, ths object is ignored.                   |
+
+### Node Parameters
+
+| Name                    | Type   | Default Value | Description                                                        |
+| ----------------------- | ------ | ------------- | ------------------------------------------------------------------ |
+| `onnx_file`             | string | ""            | The onnx file name for yolo model                                  |
+| `engine_file`           | string | ""            | The tensorrt engine file name for yolo model                       |
+| `label_file`            | string | ""            | The label file with label names for detected objects written on it |
+| `calib_image_directory` | string | ""            | The directory name including calibration images for int8 inference |
+| `calib_cache_file`      | string | ""            | The calibration cache file for int8 inference                      |
+| `mode`                  | string | "FP32"        | The inference mode: "FP32", "FP16", "INT8"                         |
+
+## Assumptions / Known limits
+
+This package includes multiple licenses.
+
+## Onnx model
+
+### YOLOv3
+
+[YOLOv3](https://drive.google.com/uc?id=1ZYoBqVynmO5kKntyN56GEbELRpuXG8Ym "YOLOv3"): Converted from darknet [weight file](https://pjreddie.com/media/files/yolov3.weights "weight file") and [conf file](https://github.com/pjreddie/darknet/blob/master/cfg/yolov3.cfg "conf file").
+
+### YOLOv4
+
+[YOLOv4](https://drive.google.com//uc?id=1vkNmSwcIpTkJ_-BrKhxtit0PBJeJYTVX "YOLOv4"): Converted from darknet [weight file](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights "weight file") and [conf file](https://github.com/AlexeyAB/darknet/blob/master/cfg/yolov4.cfg "conf file").
+
+### YOLOv5
+
+Refer to [this guide](https://github.com/ultralytics/yolov5/issues/251 "guide")
+
+- [YOLOv5s](https://drive.google.com//uc?id=1CF21nQWigwCPTr5psQZXg6cBQIOYKbad "YOLOv5s")
+
+- [YOLOv5m](https://drive.google.com//uc?id=1a1h50KJH6slwmjKZpPlS-errukF-BrgG "YOLOv5m")
+
+- [YOLOv5l](https://drive.google.com/uc?id=1xO8S92Cq7qrmx93UHHyA7Cd7-dJsBDP8 "YOLOv5l")
+
+- [YOLOv5x](https://drive.google.com/uc?id=1kAHuNJUCxpD-yWrS6t95H3zbAPfClLxI "YOLOv5x")
+
+## Reference repositories
+
+- <https://github.com/pjreddie/darknet>
+
+- <https://github.com/AlexeyAB/darknet>
+
+- <https://github.com/ultralytics/yolov5>
+
+- <https://github.com/wang-xinyu/tensorrtx>
+
+- <https://github.com/NVIDIA/retinanet-examples>
diff --git a/perception/tensorrt_yolo/config/yolov3.param.yaml b/perception/tensorrt_yolo/config/yolov3.param.yaml
new file mode 100644
index 0000000000000..5ce50f3acfa96
--- /dev/null
+++ b/perception/tensorrt_yolo/config/yolov3.param.yaml
@@ -0,0 +1,10 @@
+/**:
+  ros__parameters:
+    num_anchors: 3
+    anchors: [116.0, 90.0, 156.0, 198.0, 373.0, 326.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0, 10.0, 13.0, 16.0, 30.0, 33.0, 23.0]
+    scale_x_y: [1.0, 1.0, 1.0]
+    score_threshold: 0.1
+    iou_thresh: 0.45
+    detections_per_im: 100
+    use_darknet_layer: true
+    ignore_thresh: 0.5
diff --git a/perception/tensorrt_yolo/config/yolov4-tiny.param.yaml b/perception/tensorrt_yolo/config/yolov4-tiny.param.yaml
new file mode 100644
index 0000000000000..2ee53ee68f764
--- /dev/null
+++ b/perception/tensorrt_yolo/config/yolov4-tiny.param.yaml
@@ -0,0 +1,10 @@
+/**:
+  ros__parameters:
+    num_anchors: 3
+    anchors: [81.0, 82.0, 135.0, 169.0, 344.0, 319.0, 23.0, 27.0, 37.0, 58.0, 81.0, 82.0]
+    scale_x_y: [1.05, 1.05]
+    score_threshold: 0.1
+    iou_thresh: 0.45
+    detections_per_im: 100
+    use_darknet_layer: true
+    ignore_thresh: 0.5
diff --git a/perception/tensorrt_yolo/config/yolov4.param.yaml b/perception/tensorrt_yolo/config/yolov4.param.yaml
new file mode 100644
index 0000000000000..6122af667c470
--- /dev/null
+++ b/perception/tensorrt_yolo/config/yolov4.param.yaml
@@ -0,0 +1,10 @@
+/**:
+  ros__parameters:
+    num_anchors: 3
+    anchors: [12.0, 16.0, 19.0, 36.0, 40.0, 28.0, 36.0, 75.0, 76.0, 55.0, 72.0, 146.0, 142.0, 110.0, 192.0, 243.0, 459.0, 401.0]
+    scale_x_y: [1.2, 1.1, 1.05]
+    score_threshold: 0.1
+    iou_thresh: 0.45
+    detections_per_im: 100
+    use_darknet_layer: true
+    ignore_thresh: 0.5
diff --git a/perception/tensorrt_yolo/config/yolov5l.param.yaml b/perception/tensorrt_yolo/config/yolov5l.param.yaml
new file mode 100644
index 0000000000000..8a709d057594b
--- /dev/null
+++ b/perception/tensorrt_yolo/config/yolov5l.param.yaml
@@ -0,0 +1,10 @@
+/**:
+  ros__parameters:
+    num_anchors: 3
+    anchors: [10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0, 116.0, 90.0, 156.0, 198.0, 373.0, 326.0]
+    scale_x_y: [1.0, 1.0, 1.0]
+    score_threshold: 0.1
+    iou_thresh: 0.45
+    detections_per_im: 100
+    use_darknet_layer: false
+    ignore_thresh: 0.5
diff --git a/perception/tensorrt_yolo/config/yolov5m.param.yaml b/perception/tensorrt_yolo/config/yolov5m.param.yaml
new file mode 100644
index 0000000000000..8a709d057594b
--- /dev/null
+++ b/perception/tensorrt_yolo/config/yolov5m.param.yaml
@@ -0,0 +1,10 @@
+/**:
+  ros__parameters:
+    num_anchors: 3
+    anchors: [10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0, 116.0, 90.0, 156.0, 198.0, 373.0, 326.0]
+    scale_x_y: [1.0, 1.0, 1.0]
+    score_threshold: 0.1
+    iou_thresh: 0.45
+    detections_per_im: 100
+    use_darknet_layer: false
+    ignore_thresh: 0.5
diff --git a/perception/tensorrt_yolo/config/yolov5s.param.yaml b/perception/tensorrt_yolo/config/yolov5s.param.yaml
new file mode 100644
index 0000000000000..8a709d057594b
--- /dev/null
+++ b/perception/tensorrt_yolo/config/yolov5s.param.yaml
@@ -0,0 +1,10 @@
+/**:
+  ros__parameters:
+    num_anchors: 3
+    anchors: [10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0, 116.0, 90.0, 156.0, 198.0, 373.0, 326.0]
+    scale_x_y: [1.0, 1.0, 1.0]
+    score_threshold: 0.1
+    iou_thresh: 0.45
+    detections_per_im: 100
+    use_darknet_layer: false
+    ignore_thresh: 0.5
diff --git a/perception/tensorrt_yolo/config/yolov5x.param.yaml b/perception/tensorrt_yolo/config/yolov5x.param.yaml
new file mode 100644
index 0000000000000..8a709d057594b
--- /dev/null
+++ b/perception/tensorrt_yolo/config/yolov5x.param.yaml
@@ -0,0 +1,10 @@
+/**:
+  ros__parameters:
+    num_anchors: 3
+    anchors: [10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0, 116.0, 90.0, 156.0, 198.0, 373.0, 326.0]
+    scale_x_y: [1.0, 1.0, 1.0]
+    score_threshold: 0.1
+    iou_thresh: 0.45
+    detections_per_im: 100
+    use_darknet_layer: false
+    ignore_thresh: 0.5
diff --git a/perception/tensorrt_yolo/include/tensorrt_yolo/nodelet.hpp b/perception/tensorrt_yolo/include/tensorrt_yolo/nodelet.hpp
new file mode 100644
index 0000000000000..5bd846291bcd8
--- /dev/null
+++ b/perception/tensorrt_yolo/include/tensorrt_yolo/nodelet.hpp
@@ -0,0 +1,71 @@
+// Copyright 2020 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TENSORRT_YOLO__NODELET_HPP_
+#define TENSORRT_YOLO__NODELET_HPP_
+
+#include <image_transport/image_transport.hpp>
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/opencv.hpp>
+#include <rclcpp/rclcpp.hpp>
+#include <trt_yolo.hpp>
+
+#include <autoware_perception_msgs/msg/detected_objects_with_feature.hpp>
+#include <sensor_msgs/image_encodings.hpp>
+#include <sensor_msgs/msg/image.hpp>
+#include <std_msgs/msg/header.hpp>
+
+#include <cv_bridge/cv_bridge.h>
+
+#include <chrono>
+#include <fstream>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <vector>
+
+namespace object_recognition
+{
+class TensorrtYoloNodelet : public rclcpp::Node
+{
+public:
+  explicit TensorrtYoloNodelet(const rclcpp::NodeOptions & options);
+  void connectCb();
+  void callback(const sensor_msgs::msg::Image::ConstSharedPtr image_msg);
+  bool readLabelFile(const std::string & filepath, std::vector<std::string> * labels);
+
+private:
+  std::mutex connect_mutex_;
+
+  image_transport::Publisher image_pub_;
+  rclcpp::Publisher<autoware_perception_msgs::msg::DetectedObjectsWithFeature>::SharedPtr
+    objects_pub_;
+
+  image_transport::Subscriber image_sub_;
+
+  rclcpp::TimerBase::SharedPtr timer_;
+
+  yolo::Config yolo_config_;
+
+  std::vector<std::string> labels_;
+  std::unique_ptr<float[]> out_scores_;
+  std::unique_ptr<float[]> out_boxes_;
+  std::unique_ptr<float[]> out_classes_;
+  std::unique_ptr<yolo::Net> net_ptr_;
+};
+
+}  // namespace object_recognition
+
+#endif  // TENSORRT_YOLO__NODELET_HPP_
diff --git a/perception/tensorrt_yolo/launch/tensorrt_yolo.launch.xml b/perception/tensorrt_yolo/launch/tensorrt_yolo.launch.xml
new file mode 100755
index 0000000000000..6faa6b385c482
--- /dev/null
+++ b/perception/tensorrt_yolo/launch/tensorrt_yolo.launch.xml
@@ -0,0 +1,20 @@
+<launch>
+  <arg name="yolo_type" default="yolov3"/>
+  <arg name="label_file" default="coco.names"/>
+  <arg name="input_topic" default="/image_raw"/>
+  <arg name="output_topic" default="rois"/>
+  <arg name="calib_image_directory" default="$(find-pkg-share tensorrt_yolo)/calib_image/"/>
+  <arg name="mode" default="FP32"/>
+  <node pkg="tensorrt_yolo" exec="tensorrt_yolo_node" name="$(anon tensorrt_yolo)" output="screen">
+    <remap from="in/image" to="$(var input_topic)" />
+    <remap from="out/objects" to="$(var output_topic)" />
+    <remap from="out/image" to="$(var output_topic)/debug/image" />
+    <param name="onnx_file" type="str" value="$(find-pkg-share tensorrt_yolo)/data/$(var yolo_type).onnx" />
+    <param name="engine_file" type="str" value="$(find-pkg-share tensorrt_yolo)/data/$(var yolo_type).engine" />
+    <param name="label_file" type="str" value="$(find-pkg-share tensorrt_yolo)/data/$(var label_file)"/>
+    <param name="calib_image_directory" type="str" value="$(var calib_image_directory)"/>
+    <param name="calib_cache_file" type="str" value="$(find-pkg-share tensorrt_yolo)/data/$(var yolo_type).cache" />
+    <param name="mode" type="str" value="$(var mode)"/>
+    <param from="$(find-pkg-share tensorrt_yolo)/config/$(var yolo_type).param.yaml"/>
+  </node>
+</launch>
diff --git a/perception/tensorrt_yolo/launch/yolo.launch.xml b/perception/tensorrt_yolo/launch/yolo.launch.xml
new file mode 100644
index 0000000000000..fa357e32186a9
--- /dev/null
+++ b/perception/tensorrt_yolo/launch/yolo.launch.xml
@@ -0,0 +1,45 @@
+<launch>
+  <arg name="image_raw0" default="/image_raw0"/>
+  <arg name="image_raw1" default=""/>
+  <arg name="image_raw2" default=""/>
+  <arg name="image_raw3" default=""/>
+  <arg name="image_raw4" default=""/>
+  <arg name="image_raw5" default=""/>
+  <arg name="image_raw6" default=""/>
+  <arg name="image_raw7" default=""/>
+  <arg name="image_number" default="1"/>
+  <arg name="output_topic" default="rois"/>
+
+  <include if="$(eval &quot;'$(var image_number)'>='1'&quot;)" file="$(find-pkg-share tensorrt_yolo)/launch/tensorrt_yolo.launch.xml">
+    <arg name="input_topic" value="$(var image_raw0)"/>
+    <arg name="output_topic" value="rois0"/>
+  </include>
+  <include if="$(eval &quot;'$(var image_number)'>='2'&quot;)" file="$(find-pkg-share tensorrt_yolo)/launch/tensorrt_yolo.launch.xml">
+    <arg name="input_topic" value="$(var image_raw1)"/>
+    <arg name="output_topic" value="rois1"/>
+  </include>
+  <include if="$(eval &quot;'$(var image_number)'>='3'&quot;)" file="$(find-pkg-share tensorrt_yolo)/launch/tensorrt_yolo.launch.xml">
+    <arg name="input_topic" value="$(var image_raw2)"/>
+    <arg name="output_topic" value="rois2"/>
+  </include>
+  <include if="$(eval &quot;'$(var image_number)'>='4'&quot;)" file="$(find-pkg-share tensorrt_yolo)/launch/tensorrt_yolo.launch.xml">
+    <arg name="input_topic" value="$(var image_raw3)"/>
+    <arg name="output_topic" value="rois3"/>
+  </include>
+  <include if="$(eval &quot;'$(var image_number)'>='5'&quot;)" file="$(find-pkg-share tensorrt_yolo)/launch/tensorrt_yolo.launch.xml">
+    <arg name="input_topic" value="$(var image_raw4)"/>
+    <arg name="output_topic" value="rois4"/>
+  </include>
+  <include if="$(eval &quot;'$(var image_number)'>='6'&quot;)" file="$(find-pkg-share tensorrt_yolo)/launch/tensorrt_yolo.launch.xml">
+    <arg name="input_topic" value="$(var image_raw5)"/>
+    <arg name="output_topic" value="rois5"/>
+  </include>
+  <include if="$(eval &quot;'$(var image_number)'>='7'&quot;)" file="$(find-pkg-share tensorrt_yolo)/launch/tensorrt_yolo.launch.xml">
+    <arg name="input_topic" value="$(var image_raw6)"/>
+    <arg name="output_topic" value="rois6"/>
+  </include>
+  <include if="$(eval &quot;'$(var image_number)'>='8'&quot;)" file="$(find-pkg-share tensorrt_yolo)/launch/tensorrt_yolo.launch.xml">
+    <arg name="input_topic" value="$(var image_raw7)"/>
+    <arg name="output_topic" value="rois7"/>
+  </include>
+</launch>
diff --git a/perception/tensorrt_yolo/lib/include/calibrator.hpp b/perception/tensorrt_yolo/lib/include/calibrator.hpp
new file mode 100644
index 0000000000000..edfbc126d645a
--- /dev/null
+++ b/perception/tensorrt_yolo/lib/include/calibrator.hpp
@@ -0,0 +1,200 @@
+// Copyright 2020 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef CALIBRATOR_HPP_
+#define CALIBRATOR_HPP_
+
+#include <cuda_utils.hpp>
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/opencv.hpp>
+
+#include <NvInfer.h>
+#include <assert.h>
+
+#include <algorithm>
+#include <fstream>
+#include <iterator>
+#include <string>
+#include <vector>
+
+namespace yolo
+{
+class ImageStream
+{
+public:
+  ImageStream(
+    int batch_size, nvinfer1::Dims input_dims, const std::vector<std::string> calibration_images)
+  : batch_size_(batch_size),
+    calibration_images_(calibration_images),
+    current_batch_(0),
+    max_batches_(calibration_images.size() / batch_size_),
+    input_dims_(input_dims)
+  {
+    batch_.resize(batch_size_ * input_dims_.d[1] * input_dims_.d[2] * input_dims_.d[3]);
+  }
+
+  int getBatchSize() const { return batch_size_; }
+
+  int getMaxBatches() const { return max_batches_; }
+
+  float * getBatch() { return &batch_[0]; }
+
+  nvinfer1::Dims getInputDims() { return input_dims_; }
+
+  std::vector<float> preprocess(const cv::Mat & in_img, const int c, const int w, const int h) const
+  {
+    cv::Mat rgb;
+    cv::cvtColor(in_img, rgb, cv::COLOR_BGR2RGB);
+
+    cv::resize(rgb, rgb, cv::Size(w, h));
+
+    cv::Mat img_float;
+    rgb.convertTo(img_float, CV_32FC3, 1 / 255.0);
+
+    // HWC TO CHW
+    std::vector<cv::Mat> input_channels(c);
+    cv::split(img_float, input_channels);
+
+    std::vector<float> result(h * w * c);
+    auto data = result.data();
+    int channel_length = h * w;
+    for (int i = 0; i < c; ++i) {
+      memcpy(data, input_channels[i].data, channel_length * sizeof(float));
+      data += channel_length;
+    }
+
+    return result;
+  }
+
+  bool next()
+  {
+    if (current_batch_ == max_batches_) {
+      return false;
+    }
+
+    for (int i = 0; i < batch_size_; ++i) {
+      auto image =
+        cv::imread(calibration_images_[batch_size_ * current_batch_ + i].c_str(), cv::IMREAD_COLOR);
+      auto input = preprocess(image, input_dims_.d[1], input_dims_.d[3], input_dims_.d[2]);
+      batch_.insert(
+        batch_.begin() + i * input_dims_.d[1] * input_dims_.d[2] * input_dims_.d[3], input.begin(),
+        input.end());
+    }
+
+    ++current_batch_;
+    return true;
+  }
+
+  void reset() { current_batch_ = 0; }
+
+private:
+  int batch_size_;
+  std::vector<std::string> calibration_images_;
+  int current_batch_;
+  int max_batches_;
+  nvinfer1::Dims input_dims_;
+
+  std::vector<float> batch_;
+};
+
+class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator2
+{
+public:
+  Int8EntropyCalibrator(
+    ImageStream & stream, const std::string calibration_cache_file, bool read_cache = true)
+  : stream_(stream), calibration_cache_file_(calibration_cache_file), read_cache_(read_cache)
+  {
+    auto d = stream_.getInputDims();
+    input_count_ = stream_.getBatchSize() * d.d[1] * d.d[2] * d.d[3];
+    CHECK_CUDA_ERROR(cudaMalloc(&device_input_, input_count_ * sizeof(float)));
+  }
+
+  int getBatchSize() const noexcept override { return stream_.getBatchSize(); }
+
+  virtual ~Int8EntropyCalibrator() { CHECK_CUDA_ERROR(cudaFree(device_input_)); }
+
+  bool getBatch(void * bindings[], const char * names[], int nb_bindings) noexcept override
+  {
+    (void)names;
+    (void)nb_bindings;
+
+    if (!stream_.next()) {
+      return false;
+    }
+
+    try {
+      CHECK_CUDA_ERROR(cudaMemcpy(
+        device_input_, stream_.getBatch(), input_count_ * sizeof(float), cudaMemcpyHostToDevice));
+    } catch (const std::exception & e) {
+      // Do nothing
+    }
+    bindings[0] = device_input_;
+    return true;
+  }
+
+  const void * readCalibrationCache(size_t & length) noexcept override
+  {
+    calib_cache_.clear();
+    std::ifstream input(calibration_cache_file_, std::ios::binary);
+    input >> std::noskipws;
+    if (read_cache_ && input.good()) {
+      std::copy(
+        std::istream_iterator<char>(input), std::istream_iterator<char>(),
+        std::back_inserter(calib_cache_));
+    }
+
+    length = calib_cache_.size();
+    return length ? &calib_cache_[0] : nullptr;
+  }
+
+  void writeCalibrationCache(const void * cache, size_t length) noexcept override
+  {
+    std::ofstream output(calibration_cache_file_, std::ios::binary);
+    output.write(reinterpret_cast<const char *>(cache), length);
+  }
+
+private:
+  ImageStream stream_;
+  const std::string calibration_cache_file_;
+  bool read_cache_{true};
+  size_t input_count_;
+  void * device_input_{nullptr};
+  std::vector<char> calib_cache_;
+};
+}  // namespace yolo
+
+#endif  // CALIBRATOR_HPP_
diff --git a/perception/tensorrt_yolo/lib/include/cuda_utils.hpp b/perception/tensorrt_yolo/lib/include/cuda_utils.hpp
new file mode 100644
index 0000000000000..a3b53a73720a1
--- /dev/null
+++ b/perception/tensorrt_yolo/lib/include/cuda_utils.hpp
@@ -0,0 +1,120 @@
+// Copyright 2020 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * This code is licensed under CC0 1.0 Universal (Public Domain).
+ * You can use this without any limitation.
+ * https://creativecommons.org/publicdomain/zero/1.0/deed.en
+ */
+
+#ifndef CUDA_UTILS_HPP_
+#define CUDA_UTILS_HPP_
+
+#include <cuda_runtime_api.h>
+
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <type_traits>
+
+#define CHECK_CUDA_ERROR(e) (cuda::check_error(e, __FILE__, __LINE__))
+
+namespace cuda
+{
+void check_error(const ::cudaError_t e, const char * f, int n)
+{
+  if (e != ::cudaSuccess) {
+    std::stringstream s;
+    s << ::cudaGetErrorName(e) << " (" << e << ")@" << f << "#L" << n << ": "
+      << ::cudaGetErrorString(e);
+    throw std::runtime_error{s.str()};
+  }
+}
+
+struct deleter
+{
+  void operator()(void * p) const { CHECK_CUDA_ERROR(::cudaFree(p)); }
+};
+template <typename T>
+using unique_ptr = std::unique_ptr<T, deleter>;
+
+template <typename T>
+typename std::enable_if<std::is_array<T>::value, cuda::unique_ptr<T>>::type make_unique(
+  const std::size_t n)
+{
+  using U = typename std::remove_extent<T>::type;
+  U * p;
+  CHECK_CUDA_ERROR(::cudaMalloc(reinterpret_cast<void **>(&p), sizeof(U) * n));
+  return cuda::unique_ptr<T>{p};
+}
+
+template <typename T>
+cuda::unique_ptr<T> make_unique()
+{
+  T * p;
+  CHECK_CUDA_ERROR(::cudaMalloc(reinterpret_cast<void **>(&p), sizeof(T)));
+  return cuda::unique_ptr<T>{p};
+}
+
+constexpr size_t CUDA_ALIGN = 256;
+
+template <typename T>
+inline size_t get_size_aligned(size_t num_elem)
+{
+  size_t size = num_elem * sizeof(T);
+  size_t extra_align = 0;
+  if (size % CUDA_ALIGN != 0) {
+    extra_align = CUDA_ALIGN - size % CUDA_ALIGN;
+  }
+  return size + extra_align;
+}
+
+template <typename T>
+inline T * get_next_ptr(size_t num_elem, void *& workspace, size_t & workspace_size)
+{
+  size_t size = get_size_aligned<T>(num_elem);
+  if (size > workspace_size) {
+    throw std::runtime_error("Workspace is too small!");
+  }
+  workspace_size -= size;
+  T * ptr = reinterpret_cast<T *>(workspace);
+  workspace = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(workspace) + size);
+  return ptr;
+}
+
+}  // namespace cuda
+
+#endif  // CUDA_UTILS_HPP_
diff --git a/perception/tensorrt_yolo/lib/include/mish.hpp b/perception/tensorrt_yolo/lib/include/mish.hpp
new file mode 100644
index 0000000000000..566cd0203ff41
--- /dev/null
+++ b/perception/tensorrt_yolo/lib/include/mish.hpp
@@ -0,0 +1,51 @@
+// Copyright 2020 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+ * MIT License
+
+ * Copyright (c) 2019-2020 Wang Xinyu
+
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MISH_HPP_
+#define MISH_HPP_
+
+#include <cuda_runtime_api.h>
+
+#include <vector>
+
+namespace yolo
+{
+int mish(cudaStream_t stream, const float * input, float * output, int n);
+}  // namespace yolo
+
+#endif  // MISH_HPP_
diff --git a/perception/tensorrt_yolo/lib/include/mish_plugin.hpp b/perception/tensorrt_yolo/lib/include/mish_plugin.hpp
new file mode 100644
index 0000000000000..e49d2d2ba5ac9
--- /dev/null
+++ b/perception/tensorrt_yolo/lib/include/mish_plugin.hpp
@@ -0,0 +1,131 @@
+// Copyright 2020 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+ * MIT License
+
+ * Copyright (c) 2019-2020 Wang Xinyu
+
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MISH_PLUGIN_HPP_
+#define MISH_PLUGIN_HPP_
+
+#include <NvInferPlugin.h>
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+namespace yolo
+{
+class MishPlugin : public nvinfer1::IPluginV2DynamicExt
+{
+public:
+  MishPlugin();
+  MishPlugin(const void * data, size_t length);
+
+  // IPluginV2 methods
+  const char * getPluginType() const noexcept override;
+  const char * getPluginVersion() const noexcept override;
+  int getNbOutputs() const noexcept override;
+  int initialize() noexcept override;
+  void terminate() noexcept override;
+  size_t getSerializationSize() const noexcept override;
+  void serialize(void * buffer) const noexcept override;
+  void destroy() noexcept override;
+  void setPluginNamespace(const char * libNamespace) noexcept override;
+  const char * getPluginNamespace() const noexcept override;
+
+  // IPluginV2Ext methods
+  nvinfer1::DataType getOutputDataType(
+    int index, const nvinfer1::DataType * inputType, int nbInputs) const noexcept override;
+
+  // IPluginV2DynamicExt methods
+  nvinfer1::IPluginV2DynamicExt * clone() const noexcept override;
+  nvinfer1::DimsExprs getOutputDimensions(
+    int outputIndex, const nvinfer1::DimsExprs * inputs, int nbInputs,
+    nvinfer1::IExprBuilder & exprBuilder) noexcept override;
+  bool supportsFormatCombination(
+    int pos, const nvinfer1::PluginTensorDesc * inOut, int nbInputs,
+    int nbOutputs) noexcept override;
+  void configurePlugin(
+    const nvinfer1::DynamicPluginTensorDesc * in, int nbInputs,
+    const nvinfer1::DynamicPluginTensorDesc * out, int nbOutputs) noexcept override;
+  size_t getWorkspaceSize(
+    const nvinfer1::PluginTensorDesc * inputs, int nbInputs,
+    const nvinfer1::PluginTensorDesc * outputs, int nbOutputs) const noexcept override;
+  int enqueue(
+    const nvinfer1::PluginTensorDesc * inputDesc, const nvinfer1::PluginTensorDesc * outputDesc,
+    const void * const * inputs, void * const * outputs, void * workspace,
+    cudaStream_t stream) noexcept override;
+
+private:
+  const char * mPluginNamespace;
+};
+
+class MishPluginCreator : public nvinfer1::IPluginCreator
+{
+public:
+  MishPluginCreator();
+
+  ~MishPluginCreator() override = default;
+
+  const char * getPluginName() const noexcept override;
+
+  const char * getPluginVersion() const noexcept override;
+
+  const nvinfer1::PluginFieldCollection * getFieldNames() noexcept override;
+
+  nvinfer1::IPluginV2DynamicExt * createPlugin(
+    const char * name, const nvinfer1::PluginFieldCollection * fc) noexcept override;
+
+  nvinfer1::IPluginV2DynamicExt * deserializePlugin(
+    const char * name, const void * serialData, size_t serialLength) noexcept override;
+
+  void setPluginNamespace(const char * libNamespace) noexcept override
+  {
+    mNamespace = libNamespace;
+  }
+
+  const char * getPluginNamespace() const noexcept override { return mNamespace.c_str(); }
+
+private:
+  std::string mNamespace;
+  static nvinfer1::PluginFieldCollection mFC;
+  static std::vector<nvinfer1::PluginField> mPluginAttributes;
+};
+
+REGISTER_TENSORRT_PLUGIN(MishPluginCreator);
+
+}  // namespace yolo
+
+#endif  // MISH_PLUGIN_HPP_
diff --git a/perception/tensorrt_yolo/lib/include/nms.hpp b/perception/tensorrt_yolo/lib/include/nms.hpp
new file mode 100644
index 0000000000000..1070c50f35807
--- /dev/null
+++ b/perception/tensorrt_yolo/lib/include/nms.hpp
@@ -0,0 +1,52 @@
+// Copyright 2020 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NMS_HPP_
+#define NMS_HPP_
+
+#include <cuda_runtime_api.h>
+
+#include <vector>
+
+namespace yolo
+{
+int nms(
+  int batchSize, const void * const * inputs, void * const * outputs, size_t count,
+  int detections_per_im, float nms_thresh, void * workspace, size_t workspace_size,
+  cudaStream_t stream);
+}
+
+#endif  // NMS_HPP_
diff --git a/perception/tensorrt_yolo/lib/include/nms_plugin.hpp b/perception/tensorrt_yolo/lib/include/nms_plugin.hpp
new file mode 100644
index 0000000000000..58bc49e57adf4
--- /dev/null
+++ b/perception/tensorrt_yolo/lib/include/nms_plugin.hpp
@@ -0,0 +1,123 @@
+// Copyright 2020 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NMS_PLUGIN_HPP_
+#define NMS_PLUGIN_HPP_
+
+#include <NvInfer.h>
+
+#include <cassert>
+#include <vector>
+
+namespace yolo
+{
+class NMSPlugin : public nvinfer1::IPluginV2DynamicExt
+{
+public:
+  NMSPlugin(float nms_thresh, int detections_per_im);
+  NMSPlugin(float nms_thresh, int detections_per_im, size_t count);
+  NMSPlugin(const void * data, size_t length);
+
+  // IPluginV2 methods
+  const char * getPluginType() const noexcept override;
+  const char * getPluginVersion() const noexcept override;
+  int getNbOutputs() const noexcept override;
+  int initialize() noexcept override;
+  void terminate() noexcept override;
+  size_t getSerializationSize() const noexcept override;
+  void serialize(void * buffer) const noexcept override;
+  void destroy() noexcept override;
+  void setPluginNamespace(const char * libNamespace) noexcept override;
+  const char * getPluginNamespace() const noexcept override;
+
+  // IPluginV2Ext methods
+  nvinfer1::DataType getOutputDataType(
+    int index, const nvinfer1::DataType * inputType, int nbInputs) const noexcept override;
+
+  // IPluginV2DynamicExt methods
+  nvinfer1::IPluginV2DynamicExt * clone() const noexcept override;
+  nvinfer1::DimsExprs getOutputDimensions(
+    int outputIndex, const nvinfer1::DimsExprs * inputs, int nbInputs,
+    nvinfer1::IExprBuilder & exprBuilder) noexcept override;
+  bool supportsFormatCombination(
+    int pos, const nvinfer1::PluginTensorDesc * inOut, int nbInputs,
+    int nbOutputs) noexcept override;
+  void configurePlugin(
+    const nvinfer1::DynamicPluginTensorDesc * in, int nbInputs,
+    const nvinfer1::DynamicPluginTensorDesc * out, int nbOutputs) noexcept override;
+  size_t getWorkspaceSize(
+    const nvinfer1::PluginTensorDesc * inputs, int nbInputs,
+    const nvinfer1::PluginTensorDesc * outputs, int nbOutputs) const noexcept override;
+  int enqueue(
+    const nvinfer1::PluginTensorDesc * inputDesc, const nvinfer1::PluginTensorDesc * outputDesc,
+    const void * const * inputs, void * const * outputs, void * workspace,
+    cudaStream_t stream) noexcept override;
+
+private:
+  float nms_thresh_;
+  int detections_per_im_;
+
+  size_t count_;
+  mutable int size = -1;
+};
+
+class NMSPluginCreator : public nvinfer1::IPluginCreator
+{
+public:
+  NMSPluginCreator();
+
+  const char * getPluginName() const noexcept override;
+
+  const char * getPluginVersion() const noexcept override;
+
+  const nvinfer1::PluginFieldCollection * getFieldNames() noexcept override;
+
+  nvinfer1::IPluginV2DynamicExt * createPlugin(
+    const char * name, const nvinfer1::PluginFieldCollection * fc) noexcept override;
+
+  nvinfer1::IPluginV2DynamicExt * deserializePlugin(
+    const char * name, const void * serialData, size_t serialLength) noexcept override;
+
+  void setPluginNamespace(const char * libNamespace) noexcept override;
+
+  const char * getPluginNamespace() const noexcept override;
+};
+
+REGISTER_TENSORRT_PLUGIN(NMSPluginCreator);
+
+}  // namespace yolo
+
+#endif  // NMS_PLUGIN_HPP_
diff --git a/perception/tensorrt_yolo/lib/include/trt_yolo.hpp b/perception/tensorrt_yolo/lib/include/trt_yolo.hpp
new file mode 100644
index 0000000000000..813811226a0a6
--- /dev/null
+++ b/perception/tensorrt_yolo/lib/include/trt_yolo.hpp
@@ -0,0 +1,151 @@
+// Copyright 2020 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef TRT_YOLO_HPP_
+#define TRT_YOLO_HPP_
+
+#include <cuda_utils.hpp>
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/opencv.hpp>
+#include <yolo_layer.hpp>
+
+#include <NvInfer.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace yolo
+{
+struct Deleter
+{
+  template <typename T>
+  void operator()(T * obj) const
+  {
+    if (obj) {
+      obj->destroy();
+    }
+  }
+};
+
+template <typename T>
+using unique_ptr = std::unique_ptr<T, Deleter>;
+
+class Logger : public nvinfer1::ILogger
+{
+public:
+  explicit Logger(bool verbose) : verbose_(verbose) {}
+
+  void log(Severity severity, const char * msg) noexcept override
+  {
+    if (verbose_ || ((severity != Severity::kINFO) && (severity != Severity::kVERBOSE))) {
+      std::cout << msg << std::endl;
+    }
+  }
+
+private:
+  bool verbose_{false};
+};
+
+struct Config
+{
+  int num_anchors;
+  std::vector<float> anchors;
+  std::vector<float> scale_x_y;
+  float score_thresh;
+  float iou_thresh;
+  int detections_per_im;
+  bool use_darknet_layer;
+  float ignore_thresh;
+};
+
+class Net
+{
+public:
+  // Create engine from engine path
+  explicit Net(const std::string & engine_path, bool verbose = false);
+
+  // Create engine from serialized onnx model
+  Net(
+    const std::string & onnx_file_path, const std::string & precision, const int max_batch_size,
+    const Config & yolo_config, const std::vector<std::string> & calibration_images,
+    const std::string & calibration_table, bool verbose = false,
+    size_t workspace_size = (1ULL << 30));
+
+  ~Net();
+
+  // Save model to path
+  void save(const std::string & path) const;
+
+  bool detect(const cv::Mat & in_img, float * out_scores, float * out_boxes, float * out_classes);
+
+  // Get (c, h, w) size of the fixed input
+  std::vector<int> getInputDims() const;
+
+  std::vector<int> getOutputScoreSize() const;
+
+  // Get max allowed batch size
+  int getMaxBatchSize() const;
+
+  // Get max number of detections
+  int getMaxDetections() const;
+
+  int getInputSize() const;
+
+private:
+  unique_ptr<nvinfer1::IRuntime> runtime_ = nullptr;
+  unique_ptr<nvinfer1::ICudaEngine> engine_ = nullptr;
+  unique_ptr<nvinfer1::IExecutionContext> context_ = nullptr;
+  cudaStream_t stream_ = nullptr;
+  cuda::unique_ptr<float[]> input_d_ = nullptr;
+  cuda::unique_ptr<float[]> out_scores_d_ = nullptr;
+  cuda::unique_ptr<float[]> out_boxes_d_ = nullptr;
+  cuda::unique_ptr<float[]> out_classes_d_ = nullptr;
+
+  void load(const std::string & path);
+  bool prepare();
+  std::vector<float> preprocess(
+    const cv::Mat & in_img, const int c, const int h, const int w) const;
+  // Infer using pre-allocated GPU buffers {data, scores, boxes}
+  void infer(std::vector<void *> & buffers, const int batch_size);
+};
+
+}  // namespace yolo
+
+#endif  // TRT_YOLO_HPP_
diff --git a/perception/tensorrt_yolo/lib/include/yolo_layer.hpp b/perception/tensorrt_yolo/lib/include/yolo_layer.hpp
new file mode 100644
index 0000000000000..62dc601185140
--- /dev/null
+++ b/perception/tensorrt_yolo/lib/include/yolo_layer.hpp
@@ -0,0 +1,77 @@
+// Copyright 2020 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+ * MIT License
+
+ * Copyright (c) 2019-2020 Wang Xinyu
+
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef YOLO_LAYER_HPP_
+#define YOLO_LAYER_HPP_
+
+#include <cuda_runtime_api.h>
+
+#include <vector>
+
+namespace yolo
+{
+int yoloLayer(
+  int batch_size, const void * const * inputs, void * const * outputs, int grid_width,
+  int grid_height, int num_classes, int num_anchors, const std::vector<float> & anchors,
+  int input_width, int input_height, float scale_xy, float score_thresh, bool use_darknet_layer,
+  void * workspace, size_t workspace_size, cudaStream_t stream);
+}  // namespace yolo
+
+#endif  // YOLO_LAYER_HPP_
diff --git a/perception/tensorrt_yolo/lib/include/yolo_layer_plugin.hpp b/perception/tensorrt_yolo/lib/include/yolo_layer_plugin.hpp
new file mode 100644
index 0000000000000..a7e10e8a6351d
--- /dev/null
+++ b/perception/tensorrt_yolo/lib/include/yolo_layer_plugin.hpp
@@ -0,0 +1,155 @@
+// Copyright 2020 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+ * MIT License
+
+ * Copyright (c) 2019-2020 Wang Xinyu
+
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef YOLO_LAYER_PLUGIN_HPP_
+#define YOLO_LAYER_PLUGIN_HPP_
+
+#include <yolo_layer.hpp>
+
+#include <NvInferPlugin.h>
+
+#include <array>
+#include <iostream>
+#include <string>
+#include <vector>
+
+namespace yolo
+{
+class YoloLayerPlugin : public nvinfer1::IPluginV2DynamicExt
+{
+public:
+  explicit YoloLayerPlugin(
+    int width, int height, int num_classes, std::vector<float> & anchors, float scale_xy,
+    float score_thresh, bool use_darknet_layer);
+  YoloLayerPlugin(const void * data, size_t length);
+
+  // IPluginV2 methods
+  const char * getPluginType() const noexcept override;
+  const char * getPluginVersion() const noexcept override;
+  int getNbOutputs() const noexcept override;
+  int initialize() noexcept override;
+  void terminate() noexcept override;
+  size_t getSerializationSize() const noexcept override;
+  void serialize(void * buffer) const noexcept override;
+  void destroy() noexcept override;
+  void setPluginNamespace(const char * libNamespace) noexcept override;
+  const char * getPluginNamespace() const noexcept override;
+
+  // IPluginV2Ext methods
+  nvinfer1::DataType getOutputDataType(
+    int index, const nvinfer1::DataType * inputType, int nbInputs) const noexcept override;
+
+  // IPluginV2DynamicExt methods
+  nvinfer1::IPluginV2DynamicExt * clone() const noexcept override;
+  nvinfer1::DimsExprs getOutputDimensions(
+    int outputIndex, const nvinfer1::DimsExprs * inputs, int nbInputs,
+    nvinfer1::IExprBuilder & exprBuilder) noexcept override;
+  bool supportsFormatCombination(
+    int pos, const nvinfer1::PluginTensorDesc * inOut, int nbInputs,
+    int nbOutputs) noexcept override;
+  void configurePlugin(
+    const nvinfer1::DynamicPluginTensorDesc * in, int nbInputs,
+    const nvinfer1::DynamicPluginTensorDesc * out, int nbOutputs) noexcept override;
+  size_t getWorkspaceSize(
+    const nvinfer1::PluginTensorDesc * inputs, int nbInputs,
+    const nvinfer1::PluginTensorDesc * outputs, int nbOutputs) const noexcept override;
+  int enqueue(
+    const nvinfer1::PluginTensorDesc * inputDesc, const nvinfer1::PluginTensorDesc * outputDesc,
+    const void * const * inputs, void * const * outputs, void * workspace,
+    cudaStream_t stream) noexcept override;
+
+private:
+  int width_;
+  int height_;
+  int num_anchors_;
+  std::vector<float> anchors_;
+  float scale_x_y_;
+  float score_thresh_;
+  bool use_darknet_layer_;
+  mutable int size = -1;
+};
+
+class YoloLayerPluginCreator : public nvinfer1::IPluginCreator
+{
+public:
+  YoloLayerPluginCreator();
+
+  const char * getPluginName() const noexcept override;
+
+  const char * getPluginVersion() const noexcept override;
+
+  const nvinfer1::PluginFieldCollection * getFieldNames() noexcept override;
+
+  nvinfer1::IPluginV2DynamicExt * createPlugin(
+    const char * name, const nvinfer1::PluginFieldCollection * fc) noexcept override;
+
+  nvinfer1::IPluginV2DynamicExt * deserializePlugin(
+    const char * name, const void * serialData, size_t serialLength) noexcept override;
+
+  void setPluginNamespace(const char * libNamespace) noexcept override;
+
+  const char * getPluginNamespace() const noexcept override;
+};
+
+REGISTER_TENSORRT_PLUGIN(YoloLayerPluginCreator);
+
+}  // namespace yolo
+
+#endif  // YOLO_LAYER_PLUGIN_HPP_
diff --git a/perception/tensorrt_yolo/lib/src/plugins/mish.cu b/perception/tensorrt_yolo/lib/src/plugins/mish.cu
new file mode 100755
index 0000000000000..341d036cf53d0
--- /dev/null
+++ b/perception/tensorrt_yolo/lib/src/plugins/mish.cu
@@ -0,0 +1,71 @@
+// Copyright 2020 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+ * MIT License
+
+ * Copyright (c) 2019-2020 Wang Xinyu
+
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdio.h>
+
+#include <cuda_runtime_api.h>
+
+#include <mish.hpp>
+
+namespace yolo
+{
+__device__ float mish(float x)
+{
+  float e = __expf(x);
+  float n = e * e + 2 * e;
+  if (x <= -0.6f) return x * __fdividef(n, n + 2);
+
+  return x - 2 * __fdividef(x, n + 2);
+}
+template <typename T, unsigned TPB>
+__global__ void mishKernel(const T * input, T * output, int num_elem)
+{
+  int idx = threadIdx.x + TPB * blockIdx.x;
+  if (idx >= num_elem) return;
+  output[idx] = mish(input[idx]);
+}
+
+int mish(cudaStream_t stream, const float * input, float * output, int n)
+{
+  constexpr int blockSize = 256;
+  const int gridSize = (n + blockSize - 1) / blockSize;
+  mishKernel<float, blockSize><<<gridSize, blockSize, 0, stream>>>(input, output, n);
+  return 0;
+}
+
+}  // namespace yolo
diff --git a/perception/tensorrt_yolo/lib/src/plugins/mish_plugin.cpp b/perception/tensorrt_yolo/lib/src/plugins/mish_plugin.cpp
new file mode 100644
index 0000000000000..efdf8540ee922
--- /dev/null
+++ b/perception/tensorrt_yolo/lib/src/plugins/mish_plugin.cpp
@@ -0,0 +1,235 @@
+// Copyright 2020 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+ * MIT License
+
+ * Copyright (c) 2019-2020 Wang Xinyu
+
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <mish.hpp>
+#include <mish_plugin.hpp>
+
+#include <stdio.h>
+
+#include <cassert>
+#include <cmath>
+#include <iostream>
+#include <vector>
+
+using nvinfer1::DataType;
+using nvinfer1::Dims;
+using nvinfer1::DimsExprs;
+using nvinfer1::DynamicPluginTensorDesc;
+using nvinfer1::IExprBuilder;
+using nvinfer1::IPluginV2DynamicExt;
+using nvinfer1::PluginField;
+using nvinfer1::PluginFieldCollection;
+using nvinfer1::PluginFormat;
+using nvinfer1::PluginTensorDesc;
+
+namespace
+{
+const char * MISH_PLUGIN_VERSION{"1"};
+const char * MISH_PLUGIN_NAME{"Mish_TRT"};
+
+inline int64_t volume(const Dims & d)
+{
+  int64_t v = 1;
+  for (int64_t i = 0; i < d.nbDims; i++) {
+    v *= d.d[i];
+  }
+  return v;
+}
+}  // namespace
+
+namespace yolo
+{
+MishPlugin::MishPlugin() {}
+
+// create the plugin at runtime from a byte stream
+MishPlugin::MishPlugin(const void * data, size_t length)
+{
+  (void)data;
+  (void)length;
+}
+
+// IPluginV2 Methods
+
+const char * MishPlugin::getPluginType() const noexcept { return MISH_PLUGIN_NAME; }
+
+const char * MishPlugin::getPluginVersion() const noexcept { return MISH_PLUGIN_VERSION; }
+
+int MishPlugin::getNbOutputs() const noexcept { return 1; }
+
+int MishPlugin::initialize() noexcept { return 0; }
+
+void MishPlugin::terminate() noexcept {}
+
+size_t MishPlugin::getSerializationSize() const noexcept { return 0; }
+
+void MishPlugin::serialize(void * buffer) const noexcept { (void)buffer; }
+
+void MishPlugin::destroy() noexcept { delete this; }
+
+void MishPlugin::setPluginNamespace(const char * pluginNamespace) noexcept
+{
+  mPluginNamespace = pluginNamespace;
+}
+
+const char * MishPlugin::getPluginNamespace() const noexcept { return mPluginNamespace; }
+
+// IPluginV2Ext Methods
+
+DataType MishPlugin::getOutputDataType(
+  int index, const DataType * inputTypes, int nbInputs) const noexcept
+{
+  (void)index;
+  (void)inputTypes;
+  (void)nbInputs;
+
+  assert(inputTypes[0] == DataType::kFLOAT);
+  return inputTypes[0];
+}
+
+// IPluginV2DynamicExt Methods
+
+IPluginV2DynamicExt * MishPlugin::clone() const noexcept
+{
+  auto plugin = new MishPlugin(*this);
+  plugin->setPluginNamespace(mPluginNamespace);
+  return plugin;
+}
+
+DimsExprs MishPlugin::getOutputDimensions(
+  int outputIndex, const DimsExprs * inputs, int nbInputs, IExprBuilder & exprBuilder) noexcept
+{
+  (void)outputIndex;
+  (void)nbInputs;
+  (void)exprBuilder;
+
+  return inputs[0];
+}
+
+bool MishPlugin::supportsFormatCombination(
+  int pos, const PluginTensorDesc * inOut, int nbInputs, int nbOutputs) noexcept
+{
+  (void)nbInputs;
+  (void)nbOutputs;
+
+  return inOut[pos].type == DataType::kFLOAT && inOut[pos].format == PluginFormat::kLINEAR;
+}
+
+void MishPlugin::configurePlugin(
+  const DynamicPluginTensorDesc * in, int nbInput, const DynamicPluginTensorDesc * out,
+  int nbOutput) noexcept
+{
+  (void)in;
+  (void)nbInput;
+  (void)out;
+  (void)nbOutput;
+
+  assert(nbInput == 1);
+  assert(nbOutput == 1);
+}
+
+size_t MishPlugin::getWorkspaceSize(
+  const PluginTensorDesc * inputs, int nbInputs, const PluginTensorDesc * outputs,
+  int nbOutputs) const noexcept
+{
+  (void)inputs;
+  (void)nbInputs;
+  (void)outputs;
+  (void)nbOutputs;
+
+  return 0;
+}
+
+int MishPlugin::enqueue(
+  const PluginTensorDesc * inputDesc, const PluginTensorDesc * outputDesc,
+  const void * const * inputs, void * const * outputs, void * workspace,
+  cudaStream_t stream) noexcept
+{
+  (void)inputDesc;
+  (void)outputDesc;
+  (void)workspace;
+
+  const int input_volume = volume(inputDesc[0].dims);
+
+  int status = -1;
+
+  const float * input = static_cast<const float *>(inputs[0]);
+  float * output = static_cast<float *>(outputs[0]);
+  status = mish(stream, input, output, input_volume);
+  return status;
+}
+
+PluginFieldCollection MishPluginCreator::mFC{};
+std::vector<PluginField> MishPluginCreator::mPluginAttributes;
+
+MishPluginCreator::MishPluginCreator()
+{
+  mPluginAttributes.clear();
+
+  mFC.nbFields = mPluginAttributes.size();
+  mFC.fields = mPluginAttributes.data();
+}
+
+const char * MishPluginCreator::getPluginName() const noexcept { return MISH_PLUGIN_NAME; }
+
+const char * MishPluginCreator::getPluginVersion() const noexcept { return MISH_PLUGIN_VERSION; }
+
+const PluginFieldCollection * MishPluginCreator::getFieldNames() noexcept { return &mFC; }
+
+IPluginV2DynamicExt * MishPluginCreator::createPlugin(
+  const char * name, const PluginFieldCollection * fc) noexcept
+{
+  (void)name;
+  (void)fc;
+
+  MishPlugin * obj = new MishPlugin();
+  obj->setPluginNamespace(mNamespace.c_str());
+  return obj;
+}
+
+IPluginV2DynamicExt * MishPluginCreator::deserializePlugin(
+  const char * name, const void * serialData, size_t serialLength) noexcept
+{
+  (void)name;
+
+  // This object will be deleted when the network is destroyed, which will
+  // call MishPlugin::destroy()
+  MishPlugin * obj = new MishPlugin(serialData, serialLength);
+  obj->setPluginNamespace(mNamespace.c_str());
+  return obj;
+}
+}  // namespace yolo
diff --git a/perception/tensorrt_yolo/lib/src/plugins/nms.cu b/perception/tensorrt_yolo/lib/src/plugins/nms.cu
new file mode 100755
index 0000000000000..a7c735b5ae0ce
--- /dev/null
+++ b/perception/tensorrt_yolo/lib/src/plugins/nms.cu
@@ -0,0 +1,188 @@
+/// Copyright 2020 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <iostream>
+#include <stdexcept>
+#include <vector>
+
+#include <cuda.h>
+#include <thrust/device_ptr.h>
+#include <thrust/execution_policy.h>
+#include <thrust/gather.h>
+#include <thrust/sequence.h>
+
+#if CUDART_VERSION >= 11000
+#include <cub/device/device_radix_sort.cuh>
+#include <cub/iterator/counting_input_iterator.cuh>
+#else
+#include <thrust/system/cuda/detail/cub/device/device_radix_sort.cuh>
+#include <thrust/system/cuda/detail/cub/iterator/counting_input_iterator.cuh>
+using namespace thrust::cuda_cub;
+#endif
+
+#include <cuda_utils.hpp>
+#include <nms.hpp>
+
+namespace yolo
+{
+__global__ void nms_kernel(
+  const int num_per_thread, const float threshold, const int num_detections, const int * indices,
+  float * scores, const float * classes, const float4 * boxes)
+{
+  // Go through detections by descending score
+  for (int m = 0; m < num_detections; m++) {
+    for (int n = 0; n < num_per_thread; n++) {
+      int i = threadIdx.x * num_per_thread + n;
+      if (i < num_detections && m < i && scores[m] > 0.0f) {
+        int idx = indices[i];
+        int max_idx = indices[m];
+        int icls = classes[idx];
+        int mcls = classes[max_idx];
+        if (mcls == icls) {
+          float4 ibox = boxes[idx];
+          float4 mbox = boxes[max_idx];
+          float x1 = max(ibox.x, mbox.x);
+          float y1 = max(ibox.y, mbox.y);
+          float x2 = min(ibox.x + ibox.z, mbox.x + mbox.z);
+          float y2 = min(ibox.y + ibox.w, ibox.y + mbox.w);
+          float w = max(0.0f, x2 - x1);
+          float h = max(0.0f, y2 - y1);
+          float iarea = ibox.z * ibox.w;
+          float marea = mbox.z * mbox.w;
+          float inter = w * h;
+          float overlap = inter / (iarea + marea - inter);
+          if (overlap > threshold) {
+            scores[i] = 0.0f;
+          }
+        }
+      }
+    }
+
+    // Sync discarded detections
+    __syncthreads();
+  }
+}
+
+int nms(
+  int batch_size, const void * const * inputs, void * const * outputs, size_t count,
+  int detections_per_im, float nms_thresh, void * workspace, size_t workspace_size,
+  cudaStream_t stream)
+{
+  if (!workspace || !workspace_size) {
+    // Return required scratch space size cub style
+    workspace_size = cuda::get_size_aligned<bool>(count);    // flags
+    workspace_size += cuda::get_size_aligned<int>(count);    // indices
+    workspace_size += cuda::get_size_aligned<int>(count);    // indices_sorted
+    workspace_size += cuda::get_size_aligned<float>(count);  // scores
+    workspace_size += cuda::get_size_aligned<float>(count);  // scores_sorted
+
+    size_t temp_size_flag = 0;
+    cub::DeviceSelect::Flagged(
+      (void *)nullptr, temp_size_flag, cub::CountingInputIterator<int>(count), (bool *)nullptr,
+      (int *)nullptr, (int *)nullptr, count);
+    size_t temp_size_sort = 0;
+    cub::DeviceRadixSort::SortPairsDescending(
+      (void *)nullptr, temp_size_sort, (float *)nullptr, (float *)nullptr, (int *)nullptr,
+      (int *)nullptr, count);
+    workspace_size += std::max(temp_size_flag, temp_size_sort);
+
+    return workspace_size;
+  }
+
+  auto on_stream = thrust::cuda::par.on(stream);
+
+  auto flags = cuda::get_next_ptr<bool>(count, workspace, workspace_size);
+  auto indices = cuda::get_next_ptr<int>(count, workspace, workspace_size);
+  auto indices_sorted = cuda::get_next_ptr<int>(count, workspace, workspace_size);
+  auto scores = cuda::get_next_ptr<float>(count, workspace, workspace_size);
+  auto scores_sorted = cuda::get_next_ptr<float>(count, workspace, workspace_size);
+
+  for (int batch = 0; batch < batch_size; batch++) {
+    auto in_scores = static_cast<const float *>(inputs[0]) + batch * count;
+    auto in_boxes = static_cast<const float4 *>(inputs[1]) + batch * count;
+    auto in_classes = static_cast<const float *>(inputs[2]) + batch * count;
+
+    auto out_scores = static_cast<float *>(outputs[0]) + batch * detections_per_im;
+    auto out_boxes = static_cast<float4 *>(outputs[1]) + batch * detections_per_im;
+    auto out_classes = static_cast<float *>(outputs[2]) + batch * detections_per_im;
+
+    // Discard null scores
+    thrust::transform(
+      on_stream, in_scores, in_scores + count, flags, thrust::placeholders::_1 > 0.0f);
+
+    int * num_selected = reinterpret_cast<int *>(indices_sorted);
+    cub::DeviceSelect::Flagged(
+      workspace, workspace_size, cub::CountingInputIterator<int>(0), flags, indices, num_selected,
+      count, stream);
+    cudaStreamSynchronize(stream);
+    int num_detections = *thrust::device_pointer_cast(num_selected);
+
+    // Sort scores and corresponding indices
+    thrust::gather(on_stream, indices, indices + num_detections, in_scores, scores);
+    cub::DeviceRadixSort::SortPairsDescending(
+      workspace, workspace_size, scores, scores_sorted, indices, indices_sorted, num_detections, 0,
+      sizeof(*scores) * 8, stream);
+
+    // Launch actual NMS kernel - 1 block with each thread handling n detections
+    const int max_threads = 1024;
+    int num_per_thread = ceil((float)num_detections / max_threads);
+    nms_kernel<<<1, max_threads, 0, stream>>>(
+      num_per_thread, nms_thresh, num_detections, indices_sorted, scores_sorted, in_classes,
+      in_boxes);
+
+    // Re-sort with updated scores
+    cub::DeviceRadixSort::SortPairsDescending(
+      workspace, workspace_size, scores_sorted, scores, indices_sorted, indices, num_detections, 0,
+      sizeof(*scores) * 8, stream);
+
+    // Gather filtered scores, boxes, classes
+    num_detections = min(detections_per_im, num_detections);
+    cudaMemcpyAsync(
+      out_scores, scores, num_detections * sizeof *scores, cudaMemcpyDeviceToDevice, stream);
+    if (num_detections < detections_per_im) {
+      thrust::fill_n(on_stream, out_scores + num_detections, detections_per_im - num_detections, 0);
+    }
+    thrust::gather(on_stream, indices, indices + num_detections, in_boxes, out_boxes);
+    thrust::gather(on_stream, indices, indices + num_detections, in_classes, out_classes);
+  }
+
+  return 0;
+}
+
+}  // namespace yolo
diff --git a/perception/tensorrt_yolo/lib/src/plugins/nms_plugin.cpp b/perception/tensorrt_yolo/lib/src/plugins/nms_plugin.cpp
new file mode 100644
index 0000000000000..783e8d83f7a01
--- /dev/null
+++ b/perception/tensorrt_yolo/lib/src/plugins/nms_plugin.cpp
@@ -0,0 +1,245 @@
+// Copyright 2020 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nms.hpp>
+#include <nms_plugin.hpp>
+
+#include <cuda_runtime_api.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <cassert>
+#include <cmath>
+
+using nvinfer1::DataType;
+using nvinfer1::DimsExprs;
+using nvinfer1::DynamicPluginTensorDesc;
+using nvinfer1::IExprBuilder;
+using nvinfer1::IPluginV2DynamicExt;
+using nvinfer1::PluginFieldCollection;
+using nvinfer1::PluginFormat;
+using nvinfer1::PluginTensorDesc;
+
+namespace
+{
+const char * NMS_PLUGIN_VERSION{"1"};
+const char * NMS_PLUGIN_NAME{"YOLO_NMS_TRT"};
+const char * NMS_PLUGIN_NAMESPACE{""};
+
+template <typename T>
+void write(char *& buffer, const T & val)
+{
+  *reinterpret_cast<T *>(buffer) = val;
+  buffer += sizeof(T);
+}
+
+template <typename T>
+void read(const char *& buffer, T & val)
+{
+  val = *reinterpret_cast<const T *>(buffer);
+  buffer += sizeof(T);
+}
+
+}  // namespace
+
+namespace yolo
+{
+NMSPlugin::NMSPlugin(float nms_thresh, int detections_per_im)
+: nms_thresh_(nms_thresh), detections_per_im_(detections_per_im)
+{
+  assert(nms_thresh > 0);
+  assert(detections_per_im > 0);
+}
+
+NMSPlugin::NMSPlugin(float nms_thresh, int detections_per_im, size_t count)
+: nms_thresh_(nms_thresh), detections_per_im_(detections_per_im), count_(count)
+{
+  assert(nms_thresh > 0);
+  assert(detections_per_im > 0);
+  assert(count > 0);
+}
+
+NMSPlugin::NMSPlugin(void const * data, size_t length)
+{
+  (void)length;
+
+  const char * d = static_cast<const char *>(data);
+  read(d, nms_thresh_);
+  read(d, detections_per_im_);
+  read(d, count_);
+}
+
+const char * NMSPlugin::getPluginType() const noexcept { return NMS_PLUGIN_NAME; }
+
+const char * NMSPlugin::getPluginVersion() const noexcept { return NMS_PLUGIN_VERSION; }
+
+int NMSPlugin::getNbOutputs() const noexcept { return 3; }
+
+int NMSPlugin::initialize() noexcept { return 0; }
+
+void NMSPlugin::terminate() noexcept {}
+
+size_t NMSPlugin::getSerializationSize() const noexcept
+{
+  return sizeof(nms_thresh_) + sizeof(detections_per_im_) + sizeof(count_);
+}
+
+void NMSPlugin::serialize(void * buffer) const noexcept
+{
+  char * d = static_cast<char *>(buffer);
+  write(d, nms_thresh_);
+  write(d, detections_per_im_);
+  write(d, count_);
+}
+
+void NMSPlugin::destroy() noexcept { delete this; }
+
+void NMSPlugin::setPluginNamespace(const char * N) noexcept { (void)N; }
+
+const char * NMSPlugin::getPluginNamespace() const noexcept { return NMS_PLUGIN_NAMESPACE; }
+
+// IPluginV2Ext Methods
+
+DataType NMSPlugin::getOutputDataType(
+  int index, const DataType * inputTypes, int nbInputs) const noexcept
+{
+  (void)index;
+  (void)inputTypes;
+  (void)nbInputs;
+
+  assert(index < 3);
+  return DataType::kFLOAT;
+}
+
+// IPluginV2DynamicExt Methods
+
+IPluginV2DynamicExt * NMSPlugin::clone() const noexcept
+{
+  return new NMSPlugin(nms_thresh_, detections_per_im_, count_);
+}
+
+DimsExprs NMSPlugin::getOutputDimensions(
+  int outputIndex, const DimsExprs * inputs, int nbInputs, IExprBuilder & exprBuilder) noexcept
+{
+  (void)nbInputs;
+
+  DimsExprs output(inputs[0]);
+  output.d[1] = exprBuilder.constant(detections_per_im_ * (outputIndex == 1 ? 4 : 1));
+  output.d[2] = exprBuilder.constant(1);
+  output.d[3] = exprBuilder.constant(1);
+  return output;
+}
+
+bool NMSPlugin::supportsFormatCombination(
+  int pos, const PluginTensorDesc * inOut, int nbInputs, int nbOutputs) noexcept
+{
+  (void)nbInputs;
+  (void)nbOutputs;
+
+  assert(nbInputs == 3);
+  assert(nbOutputs == 3);
+  assert(pos < 6);
+  return inOut[pos].type == DataType::kFLOAT && inOut[pos].format == PluginFormat::kLINEAR;
+}
+
+void NMSPlugin::configurePlugin(
+  const DynamicPluginTensorDesc * in, int nbInputs, const DynamicPluginTensorDesc * out,
+  int nbOutputs) noexcept
+{
+  (void)nbInputs;
+  (void)nbOutputs;
+  (void)out;
+  (void)nbOutputs;
+
+  assert(nbInputs == 3);
+  assert(in[0].desc.dims.d[1] == in[2].desc.dims.d[1]);
+  assert(in[1].desc.dims.d[1] == in[2].desc.dims.d[1] * 4);
+  count_ = in[0].desc.dims.d[1];
+}
+
+size_t NMSPlugin::getWorkspaceSize(
+  const PluginTensorDesc * inputs, int nbInputs, const PluginTensorDesc * outputs,
+  int nbOutputs) const noexcept
+{
+  (void)nbInputs;
+  (void)outputs;
+  (void)nbOutputs;
+
+  if (size < 0) {
+    size = nms(
+      inputs->dims.d[0], nullptr, nullptr, count_, detections_per_im_, nms_thresh_, nullptr, 0,
+      nullptr);
+  }
+  return size;
+}
+
+int NMSPlugin::enqueue(
+  const PluginTensorDesc * inputDesc, const PluginTensorDesc * outputDesc,
+  const void * const * inputs, void * const * outputs, void * workspace,
+  cudaStream_t stream) noexcept
+{
+  return nms(
+    inputDesc->dims.d[0], inputs, outputs, count_, detections_per_im_, nms_thresh_, workspace,
+    getWorkspaceSize(inputDesc, 3, outputDesc, 3), stream);
+}
+
+NMSPluginCreator::NMSPluginCreator() {}
+
+const char * NMSPluginCreator::getPluginName() const noexcept { return NMS_PLUGIN_NAME; }
+
+const char * NMSPluginCreator::getPluginVersion() const noexcept { return NMS_PLUGIN_VERSION; }
+
+const char * NMSPluginCreator::getPluginNamespace() const noexcept { return NMS_PLUGIN_NAMESPACE; }
+
+void NMSPluginCreator::setPluginNamespace(const char * N) noexcept { (void)N; }
+const PluginFieldCollection * NMSPluginCreator::getFieldNames() noexcept { return nullptr; }
+IPluginV2DynamicExt * NMSPluginCreator::createPlugin(
+  const char * name, const PluginFieldCollection * fc) noexcept
+{
+  (void)name;
+  (void)fc;
+  return nullptr;
+}
+
+IPluginV2DynamicExt * NMSPluginCreator::deserializePlugin(
+  const char * name, const void * serialData, size_t serialLength) noexcept
+{
+  (void)name;
+
+  return new NMSPlugin(serialData, serialLength);
+}
+
+}  // namespace yolo
diff --git a/perception/tensorrt_yolo/lib/src/plugins/yolo_layer.cu b/perception/tensorrt_yolo/lib/src/plugins/yolo_layer.cu
new file mode 100755
index 0000000000000..492efd8293a30
--- /dev/null
+++ b/perception/tensorrt_yolo/lib/src/plugins/yolo_layer.cu
@@ -0,0 +1,166 @@
+// Copyright 2020 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+ * MIT License
+
+ * Copyright (c) 2019-2020 Wang Xinyu
+
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdexcept>
+
+#include <math_constants.h>
+
+#include <cuda_utils.hpp>
+#include <yolo_layer.hpp>
+
+namespace yolo
+{
+inline __device__ float sigmoid(float x) { return 1.0f / (1.0f + __expf(-x)); }
+
+inline __device__ float scaleSigmoid(float x, float scale)
+{
+  return scale * sigmoid(x) - (scale - 1.0f) * 0.5f;
+}
+
+template <unsigned TPB>
+__global__ void yoloLayerKernel(
+  const float * input, float * out_scores, float4 * out_boxes, float * out_classes, int grid_width,
+  int grid_height, int num_classes, int num_anchors, const float * anchors, int input_width,
+  int input_height, float scale_x_y, float score_thresh, bool use_darknet_layer)
+{
+  int idx = threadIdx.x + TPB * blockIdx.x;
+  int total_grids = grid_width * grid_height;
+  if (idx >= total_grids * num_anchors) return;
+  auto out_score = (out_scores) + idx;
+  auto out_box = (out_boxes) + idx;
+  auto out_class = (out_classes) + idx;
+
+  int anchor_idx = idx / total_grids;
+  idx = idx - total_grids * anchor_idx;
+  int info_len = 5 + num_classes;
+  auto cur_input = static_cast<const float *>(input) + anchor_idx * (info_len * total_grids);
+
+  int class_id;
+  float max_cls_logit = -CUDART_INF_F;  // minus infinity
+  for (int i = 5; i < info_len; ++i) {
+    float l = cur_input[idx + i * total_grids];
+    if (l > max_cls_logit) {
+      max_cls_logit = l;
+      class_id = i - 5;
+    }
+  }
+  float max_cls_prob = sigmoid(max_cls_logit);
+  float objectness = sigmoid(cur_input[idx + 4 * total_grids]);
+
+  int row = idx / grid_width;
+  int col = idx % grid_width;
+  float x = 0, y = 0, w = 0, h = 0;
+
+  if (use_darknet_layer) {
+    x = (col + scaleSigmoid(cur_input[idx + 0 * total_grids], scale_x_y)) / grid_width;    // [0, 1]
+    y = (row + scaleSigmoid(cur_input[idx + 1 * total_grids], scale_x_y)) / grid_height;   // [0, 1]
+    w = __expf(cur_input[idx + 2 * total_grids]) * anchors[2 * anchor_idx] / input_width;  // [0, 1]
+    h = __expf(cur_input[idx + 3 * total_grids]) * anchors[2 * anchor_idx + 1] /
+        input_height;  // [0, 1]
+  } else {
+    x = (col + sigmoid(cur_input[idx + 0 * total_grids]) * 2 - 0.5) / grid_width;   // [0, 1]
+    y = (row + sigmoid(cur_input[idx + 1 * total_grids]) * 2 - 0.5) / grid_height;  // [0, 1]
+    w = (sigmoid(cur_input[idx + 2 * total_grids]) * 2) *
+        (sigmoid(cur_input[idx + 2 * total_grids]) * 2) * anchors[2 * anchor_idx] /
+        input_width;  // [0, 1]
+    h = (sigmoid(cur_input[idx + 3 * total_grids]) * 2) *
+        (sigmoid(cur_input[idx + 3 * total_grids]) * 2) * anchors[2 * anchor_idx + 1] /
+        input_height;  // [0, 1]
+  }
+  x -= w / 2;  // shift from center to top-left
+  y -= h / 2;
+  *out_box = make_float4(x, y, w, h);
+  *out_class = class_id;
+  *out_score = objectness < score_thresh ? 0.0 : max_cls_prob * objectness;
+}
+
+int yoloLayer(
+  int batch_size, const void * const * inputs, void * const * outputs, int grid_width,
+  int grid_height, int num_classes, int num_anchors, const std::vector<float> & anchors,
+  int input_width, int input_height, float scale_x_y, float score_thresh, bool use_darknet_layer,
+  void * workspace, size_t workspace_size, cudaStream_t stream)
+{
+  if (!workspace || !workspace_size) {
+    workspace_size = cuda::get_size_aligned<float>(anchors.size());
+    return workspace_size;
+  }
+
+  auto anchors_d = cuda::get_next_ptr<float>(anchors.size(), workspace, workspace_size);
+  cudaMemcpyAsync(
+    anchors_d, anchors.data(), anchors.size() * sizeof *anchors_d, cudaMemcpyHostToDevice, stream);
+
+  int num_elements = num_anchors * grid_width * grid_height;
+  constexpr int block_size = 256;
+  const int grid_size = (num_elements + block_size - 1) / block_size;
+  for (int batch = 0; batch < batch_size; ++batch) {
+    auto input = static_cast<const float *>(inputs[0]) +
+                 batch * num_anchors * (num_classes + 5) * grid_width * grid_height;
+    auto out_scores = static_cast<float *>(outputs[0]) + batch * num_elements;
+    auto out_boxes = static_cast<float4 *>(outputs[1]) + batch * num_elements;
+    auto out_classes = static_cast<float *>(outputs[2]) + batch * num_elements;
+    yoloLayerKernel<block_size><<<grid_size, block_size, 0, stream>>>(
+      input, out_scores, out_boxes, out_classes, grid_width, grid_height, num_classes, num_anchors,
+      anchors_d, input_width, input_height, scale_x_y, score_thresh, use_darknet_layer);
+  }
+  return 0;
+}
+
+}  // namespace yolo
diff --git a/perception/tensorrt_yolo/lib/src/plugins/yolo_layer_plugin.cpp b/perception/tensorrt_yolo/lib/src/plugins/yolo_layer_plugin.cpp
new file mode 100644
index 0000000000000..5e73b989089c7
--- /dev/null
+++ b/perception/tensorrt_yolo/lib/src/plugins/yolo_layer_plugin.cpp
@@ -0,0 +1,311 @@
+// Copyright 2020 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+ * MIT License
+
+ * Copyright (c) 2019-2020 Wang Xinyu
+
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <yolo_layer.hpp>
+#include <yolo_layer_plugin.hpp>
+
+#include <cuda_runtime_api.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <cassert>
+#include <cmath>
+#include <vector>
+
+using nvinfer1::DataType;
+using nvinfer1::DimsExprs;
+using nvinfer1::DynamicPluginTensorDesc;
+using nvinfer1::IExprBuilder;
+using nvinfer1::IPluginV2DynamicExt;
+using nvinfer1::PluginFieldCollection;
+using nvinfer1::PluginFormat;
+using nvinfer1::PluginTensorDesc;
+
+namespace
+{
+const char * YOLO_LAYER_PLUGIN_VERSION{"1"};
+const char * YOLO_LAYER_PLUGIN_NAME{"YOLO_Layer_TRT"};
+const char * YOLO_LAYER_PLUGIN_NAMESPACE{""};
+
+template <typename T>
+void write(char *& buffer, const T & val)
+{
+  *reinterpret_cast<T *>(buffer) = val;
+  buffer += sizeof(T);
+}
+
+template <typename T>
+void read(const char *& buffer, T & val)
+{
+  val = *reinterpret_cast<const T *>(buffer);
+  buffer += sizeof(T);
+}
+}  // namespace
+
+namespace yolo
+{
+YoloLayerPlugin::YoloLayerPlugin(
+  int width, int height, int num_anchors, std::vector<float> & anchors, float scale_x_y,
+  float score_thresh, bool use_darknet_layer)
+: width_(width),
+  height_(height),
+  num_anchors_(num_anchors),
+  anchors_(anchors),
+  scale_x_y_(scale_x_y),
+  score_thresh_(score_thresh),
+  use_darknet_layer_(use_darknet_layer)
+{
+}
+
+// create the plugin at runtime from a byte stream
+YoloLayerPlugin::YoloLayerPlugin(const void * data, size_t length)
+{
+  (void)length;
+
+  const char * d = static_cast<const char *>(data);
+  read(d, width_);
+  read(d, height_);
+  read(d, num_anchors_);
+  int anchor_size = num_anchors_ * 2;
+  while (anchor_size--) {
+    float val;
+    read(d, val);
+    anchors_.push_back(val);
+  }
+  read(d, scale_x_y_);
+  read(d, score_thresh_);
+  read(d, use_darknet_layer_);
+}
+// IPluginV2 Methods
+
+const char * YoloLayerPlugin::getPluginType() const noexcept { return YOLO_LAYER_PLUGIN_NAME; }
+
+const char * YoloLayerPlugin::getPluginVersion() const noexcept
+{
+  return YOLO_LAYER_PLUGIN_VERSION;
+}
+
+int YoloLayerPlugin::getNbOutputs() const noexcept { return 3; }
+
+int YoloLayerPlugin::initialize() noexcept { return 0; }
+
+void YoloLayerPlugin::terminate() noexcept {}
+
+size_t YoloLayerPlugin::getSerializationSize() const noexcept
+{
+  return sizeof(width_) + sizeof(height_) + sizeof(num_anchors_) +
+         sizeof(float) * num_anchors_ * 2 + sizeof(scale_x_y_) + sizeof(score_thresh_) +
+         sizeof(use_darknet_layer_);
+}
+
+void YoloLayerPlugin::serialize(void * buffer) const noexcept
+{
+  char * d = reinterpret_cast<char *>(buffer);
+  write(d, width_);
+  write(d, height_);
+  write(d, num_anchors_);
+  for (int i = 0; i < num_anchors_ * 2; ++i) {
+    write(d, anchors_[i]);
+  }
+  write(d, scale_x_y_);
+  write(d, score_thresh_);
+  write(d, use_darknet_layer_);
+}
+
+void YoloLayerPlugin::destroy() noexcept { delete this; }
+
+void YoloLayerPlugin::setPluginNamespace(const char * N) noexcept { (void)N; }
+
+const char * YoloLayerPlugin::getPluginNamespace() const noexcept
+{
+  return YOLO_LAYER_PLUGIN_NAMESPACE;
+}
+
+// IPluginV2Ext Methods
+
+DataType YoloLayerPlugin::getOutputDataType(
+  int index, const DataType * inputTypes, int nbInputs) const noexcept
+{
+  (void)index;
+  (void)inputTypes;
+  (void)nbInputs;
+
+  return DataType::kFLOAT;
+}
+
+// IPluginV2DynamicExt Methods
+
+IPluginV2DynamicExt * YoloLayerPlugin::clone() const noexcept { return new YoloLayerPlugin(*this); }
+
+DimsExprs YoloLayerPlugin::getOutputDimensions(
+  int outputIndex, const DimsExprs * inputs, int nbInputs, IExprBuilder & exprBuilder) noexcept
+{
+  (void)nbInputs;
+
+  DimsExprs ret = inputs[0];
+  ret.nbDims = 3;
+  const auto total_count =
+    ret.d[2]->getConstantValue() * ret.d[3]->getConstantValue() * num_anchors_;
+  ret.d[1] = exprBuilder.constant(total_count * (outputIndex == 1 ? 4 : 1));
+  ret.d[2] = exprBuilder.constant(1);
+  ret.d[3] = exprBuilder.constant(1);
+  return ret;
+}
+
+bool YoloLayerPlugin::supportsFormatCombination(
+  int pos, const PluginTensorDesc * inOut, int nbInputs, int nbOutputs) noexcept
+{
+  (void)nbInputs;
+  (void)nbOutputs;
+
+  assert(nbInputs == 1);
+  assert(nbOutputs == 3);
+  assert(pos < 4);
+  return inOut[pos].type == DataType::kFLOAT && inOut[pos].format == PluginFormat::kLINEAR;
+}
+
+void YoloLayerPlugin::configurePlugin(
+  const DynamicPluginTensorDesc * in, int nbInput, const DynamicPluginTensorDesc * out,
+  int nbOutput) noexcept
+{
+  (void)in;
+  (void)nbInput;
+  (void)out;
+  (void)nbOutput;
+
+  assert(nbInput == 1);
+  assert(nbOutput == 3);
+}
+
+size_t YoloLayerPlugin::getWorkspaceSize(
+  const PluginTensorDesc * inputs, int nbInputs, const PluginTensorDesc * outputs,
+  int nbOutputs) const noexcept
+{
+  (void)nbInputs;
+  (void)outputs;
+  (void)nbOutputs;
+
+  if (size < 0) {
+    const int batch_size = inputs[0].dims.d[0];
+    const int grid_width = inputs[0].dims.d[2];
+    const int grid_height = inputs[0].dims.d[3];
+    const int num_classes = inputs[0].dims.d[1] / num_anchors_ - 5;
+    size = yoloLayer(
+      batch_size, nullptr, nullptr, grid_width, grid_height, num_classes, num_anchors_, anchors_,
+      width_, height_, scale_x_y_, score_thresh_, use_darknet_layer_, nullptr, 0, nullptr);
+  }
+  return size;
+}
+
+int YoloLayerPlugin::enqueue(
+  const PluginTensorDesc * inputDesc, const PluginTensorDesc * outputDesc,
+  const void * const * inputs, void * const * outputs, void * workspace,
+  cudaStream_t stream) noexcept
+{
+  const int batch_size = inputDesc[0].dims.d[0];
+  const int grid_width = inputDesc[0].dims.d[2];
+  const int grid_height = inputDesc[0].dims.d[3];
+  const int num_classes = inputDesc[0].dims.d[1] / num_anchors_ - 5;
+
+  int status = -1;
+  status = yoloLayer(
+    batch_size, inputs, outputs, grid_width, grid_height, num_classes, num_anchors_, anchors_,
+    width_, height_, scale_x_y_, score_thresh_, use_darknet_layer_, workspace,
+    getWorkspaceSize(inputDesc, 1, outputDesc, 3), stream);
+  return status;
+}
+
+YoloLayerPluginCreator::YoloLayerPluginCreator() {}
+
+const char * YoloLayerPluginCreator::getPluginName() const noexcept
+{
+  return YOLO_LAYER_PLUGIN_NAME;
+}
+
+const char * YoloLayerPluginCreator::getPluginVersion() const noexcept
+{
+  return YOLO_LAYER_PLUGIN_VERSION;
+}
+
+const char * YoloLayerPluginCreator::getPluginNamespace() const noexcept
+{
+  return YOLO_LAYER_PLUGIN_NAMESPACE;
+}
+
+void YoloLayerPluginCreator::setPluginNamespace(const char * N) noexcept { (void)N; }
+const PluginFieldCollection * YoloLayerPluginCreator::getFieldNames() noexcept { return nullptr; }
+
+IPluginV2DynamicExt * YoloLayerPluginCreator::createPlugin(
+  const char * name, const PluginFieldCollection * fc) noexcept
+{
+  (void)name;
+  (void)fc;
+
+  return nullptr;
+}
+
+IPluginV2DynamicExt * YoloLayerPluginCreator::deserializePlugin(
+  const char * name, const void * serialData, size_t serialLength) noexcept
+{
+  (void)name;
+
+  return new YoloLayerPlugin(serialData, serialLength);
+}
+}  // namespace yolo
diff --git a/perception/tensorrt_yolo/lib/src/trt_yolo.cpp b/perception/tensorrt_yolo/lib/src/trt_yolo.cpp
new file mode 100644
index 0000000000000..1be5f279714ba
--- /dev/null
+++ b/perception/tensorrt_yolo/lib/src/trt_yolo.cpp
@@ -0,0 +1,330 @@
+// Copyright 2020 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <calibrator.hpp>
+#include <cuda_utils.hpp>
+#include <mish_plugin.hpp>
+#include <nms_plugin.hpp>
+#include <trt_yolo.hpp>
+#include <yolo_layer_plugin.hpp>
+
+#include <NvOnnxParser.h>
+
+#include <algorithm>
+#include <fstream>
+#include <functional>
+#include <memory>
+#include <numeric>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+namespace yolo
+{
+void Net::load(const std::string & path)
+{
+  std::ifstream file(path, std::ios::in | std::ios::binary);
+  file.seekg(0, file.end);
+  size_t size = file.tellg();
+  file.seekg(0, file.beg);
+
+  char * buffer = new char[size];
+  file.read(buffer, size);
+  file.close();
+  if (runtime_) {
+    engine_ =
+      unique_ptr<nvinfer1::ICudaEngine>(runtime_->deserializeCudaEngine(buffer, size, nullptr));
+  }
+  delete[] buffer;
+}
+
+bool Net::prepare()
+{
+  if (!engine_) {
+    return false;
+  }
+  context_ = unique_ptr<nvinfer1::IExecutionContext>(engine_->createExecutionContext());
+  if (!context_) {
+    return false;
+  }
+  input_d_ = cuda::make_unique<float[]>(getMaxBatchSize() * getInputSize());
+  out_scores_d_ = cuda::make_unique<float[]>(getMaxBatchSize() * getMaxDetections());
+  out_boxes_d_ = cuda::make_unique<float[]>(getMaxBatchSize() * getMaxDetections() * 4);
+  out_classes_d_ = cuda::make_unique<float[]>(getMaxBatchSize() * getMaxDetections());
+  cudaStreamCreate(&stream_);
+  return true;
+}
+
+std::vector<float> Net::preprocess(
+  const cv::Mat & in_img, const int c, const int w, const int h) const
+{
+  cv::Mat rgb;
+  cv::cvtColor(in_img, rgb, cv::COLOR_BGR2RGB);
+
+  cv::resize(rgb, rgb, cv::Size(w, h));
+
+  cv::Mat img_float;
+  rgb.convertTo(img_float, CV_32FC3, 1 / 255.0);
+
+  // HWC TO CHW
+  std::vector<cv::Mat> input_channels(c);
+  cv::split(img_float, input_channels);
+
+  std::vector<float> result(h * w * c);
+  auto data = result.data();
+  int channel_length = h * w;
+  for (int i = 0; i < c; ++i) {
+    memcpy(data, input_channels[i].data, channel_length * sizeof(float));
+    data += channel_length;
+  }
+
+  return result;
+}
+
+Net::Net(const std::string & path, bool verbose)
+{
+  Logger logger(verbose);
+  runtime_ = unique_ptr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(logger));
+  load(path);
+  if (!prepare()) {
+    std::cout << "Fail to prepare engine" << std::endl;
+    return;
+  }
+}
+
+Net::~Net()
+{
+  if (stream_) {
+    cudaStreamSynchronize(stream_);
+    cudaStreamDestroy(stream_);
+  }
+}
+
+Net::Net(
+  const std::string & onnx_file_path, const std::string & precision, const int max_batch_size,
+  const Config & yolo_config, const std::vector<std::string> & calibration_images,
+  const std::string & calibration_table, bool verbose, size_t workspace_size)
+{
+  Logger logger(verbose);
+  runtime_ = unique_ptr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(logger));
+  if (!runtime_) {
+    std::cout << "Fail to create runtime" << std::endl;
+    return;
+  }
+  bool fp16 = precision.compare("FP16") == 0;
+  bool int8 = precision.compare("INT8") == 0;
+
+  // Create builder
+  auto builder = unique_ptr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(logger));
+  if (!builder) {
+    std::cout << "Fail to create builder" << std::endl;
+    return;
+  }
+  auto config = unique_ptr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
+  if (!config) {
+    std::cout << "Fail to create builder config" << std::endl;
+    return;
+  }
+  // Allow use of FP16 layers when running in INT8
+  if (fp16 || int8) {
+    config->setFlag(nvinfer1::BuilderFlag::kFP16);
+  }
+  config->setMaxWorkspaceSize(workspace_size);
+
+  // Parse ONNX FCN
+  std::cout << "Building " << precision << " core model..." << std::endl;
+  const auto flag =
+    1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
+  auto network = unique_ptr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(flag));
+  if (!network) {
+    std::cout << "Fail to create network" << std::endl;
+    return;
+  }
+  auto parser = unique_ptr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, logger));
+  if (!parser) {
+    std::cout << "Fail to create parser" << std::endl;
+    return;
+  }
+
+  parser->parseFromFile(
+    onnx_file_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kERROR));
+  std::vector<nvinfer1::ITensor *> scores, boxes, classes;
+  const auto input = network->getInput(0);
+  const auto num_outputs = network->getNbOutputs();
+  const auto input_dims = input->getDimensions();
+  const auto input_channel = input_dims.d[1];
+  const auto input_height = input_dims.d[2];
+  const auto input_width = input_dims.d[3];
+  for (int i = 0; i < num_outputs; ++i) {
+    auto output = network->getOutput(i);
+    std::vector<float> anchor(
+      yolo_config.anchors.begin() + i * yolo_config.num_anchors * 2,
+      yolo_config.anchors.begin() + (i + 1) * yolo_config.num_anchors * 2);
+    auto yoloLayerPlugin = yolo::YoloLayerPlugin(
+      input_width, input_height, 3, anchor, yolo_config.scale_x_y[i], yolo_config.score_thresh,
+      yolo_config.use_darknet_layer);
+    std::vector<nvinfer1::ITensor *> inputs = {output};
+    auto layer = network->addPluginV2(inputs.data(), inputs.size(), yoloLayerPlugin);
+    scores.push_back(layer->getOutput(0));
+    boxes.push_back(layer->getOutput(1));
+    classes.push_back(layer->getOutput(2));
+  }
+
+  // Cleanup outputs
+  for (int i = 0; i < num_outputs; i++) {
+    auto output = network->getOutput(0);
+    network->unmarkOutput(*output);
+  }
+
+  // Concat tensors from each feature map
+  std::vector<nvinfer1::ITensor *> concat;
+  for (auto tensors : {scores, boxes, classes}) {
+    auto layer = network->addConcatenation(tensors.data(), tensors.size());
+    layer->setAxis(1);
+    auto output = layer->getOutput(0);
+    concat.push_back(output);
+  }
+
+  // Add NMS plugin
+  auto nmsPlugin = yolo::NMSPlugin(yolo_config.iou_thresh, yolo_config.detections_per_im);
+  auto layer = network->addPluginV2(concat.data(), concat.size(), nmsPlugin);
+  for (int i = 0; i < layer->getNbOutputs(); i++) {
+    auto output = layer->getOutput(i);
+    network->markOutput(*output);
+  }
+
+  // create profile
+  auto profile = builder->createOptimizationProfile();
+  profile->setDimensions(
+    network->getInput(0)->getName(), nvinfer1::OptProfileSelector::kMIN,
+    nvinfer1::Dims4{max_batch_size, input_channel, input_height, input_width});
+  profile->setDimensions(
+    network->getInput(0)->getName(), nvinfer1::OptProfileSelector::kOPT,
+    nvinfer1::Dims4{max_batch_size, input_channel, input_height, input_width});
+  profile->setDimensions(
+    network->getInput(0)->getName(), nvinfer1::OptProfileSelector::kMAX,
+    nvinfer1::Dims4{max_batch_size, input_channel, input_height, input_width});
+  config->addOptimizationProfile(profile);
+
+  std::unique_ptr<yolo::Int8EntropyCalibrator> calib{nullptr};
+  if (int8) {
+    if (calibration_images.size() >= static_cast<size_t>(max_batch_size)) {
+      config->setFlag(nvinfer1::BuilderFlag::kINT8);
+      yolo::ImageStream stream(max_batch_size, input_dims, calibration_images);
+      calib = std::make_unique<yolo::Int8EntropyCalibrator>(stream, calibration_table);
+      config->setInt8Calibrator(calib.get());
+    } else {
+      std::cout << "Fail to find enough images for INT8 calibration. Build FP mode..." << std::endl;
+    }
+  }
+
+  // Build engine
+  std::cout << "Applying optimizations and building TRT CUDA engine..." << std::endl;
+  engine_ = unique_ptr<nvinfer1::ICudaEngine>(builder->buildEngineWithConfig(*network, *config));
+  if (!prepare()) {
+    std::cout << "Fail to prepare engine" << std::endl;
+    return;
+  }
+}
+
+void Net::save(const std::string & path) const
+{
+  std::cout << "Writing to " << path << "..." << std::endl;
+  auto serialized = unique_ptr<nvinfer1::IHostMemory>(engine_->serialize());
+  std::ofstream file(path, std::ios::out | std::ios::binary);
+  file.write(reinterpret_cast<const char *>(serialized->data()), serialized->size());
+}
+
+void Net::infer(std::vector<void *> & buffers, const int batch_size)
+{
+  if (!context_) {
+    throw std::runtime_error("Fail to create context");
+  }
+  auto input_dims = engine_->getBindingDimensions(0);
+  context_->setBindingDimensions(
+    0, nvinfer1::Dims4(batch_size, input_dims.d[1], input_dims.d[2], input_dims.d[3]));
+  context_->enqueueV2(buffers.data(), stream_, nullptr);
+  cudaStreamSynchronize(stream_);
+}
+
+bool Net::detect(const cv::Mat & in_img, float * out_scores, float * out_boxes, float * out_classes)
+{
+  const auto input_dims = getInputDims();
+  const auto input = preprocess(in_img, input_dims.at(0), input_dims.at(2), input_dims.at(1));
+  CHECK_CUDA_ERROR(
+    cudaMemcpy(input_d_.get(), input.data(), input.size() * sizeof(float), cudaMemcpyHostToDevice));
+  std::vector<void *> buffers = {
+    input_d_.get(), out_scores_d_.get(), out_boxes_d_.get(), out_classes_d_.get()};
+  try {
+    infer(buffers, 1);
+  } catch (const std::runtime_error & e) {
+    return false;
+  }
+  CHECK_CUDA_ERROR(cudaMemcpyAsync(
+    out_scores, out_scores_d_.get(), sizeof(float) * getMaxDetections(), cudaMemcpyDeviceToHost,
+    stream_));
+  CHECK_CUDA_ERROR(cudaMemcpyAsync(
+    out_boxes, out_boxes_d_.get(), sizeof(float) * 4 * getMaxDetections(), cudaMemcpyDeviceToHost,
+    stream_));
+  CHECK_CUDA_ERROR(cudaMemcpyAsync(
+    out_classes, out_classes_d_.get(), sizeof(float) * getMaxDetections(), cudaMemcpyDeviceToHost,
+    stream_));
+  cudaStreamSynchronize(stream_);
+  return true;
+}
+
+std::vector<int> Net::getInputDims() const
+{
+  auto dims = engine_->getBindingDimensions(0);
+  return {dims.d[1], dims.d[2], dims.d[3]};
+}
+
+int Net::getMaxBatchSize() const
+{
+  return engine_->getProfileDimensions(0, 0, nvinfer1::OptProfileSelector::kMAX).d[0];
+}
+
+int Net::getInputSize() const
+{
+  const auto input_dims = getInputDims();
+  const auto input_size =
+    std::accumulate(std::begin(input_dims), std::end(input_dims), 1, std::multiplies<int>());
+  return input_size;
+}
+
+int Net::getMaxDetections() const { return engine_->getBindingDimensions(1).d[1]; }
+
+}  // namespace yolo
diff --git a/perception/tensorrt_yolo/package.xml b/perception/tensorrt_yolo/package.xml
new file mode 100755
index 0000000000000..6a5d4c486ab10
--- /dev/null
+++ b/perception/tensorrt_yolo/package.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0"?>
+<package format="2">
+  <name>tensorrt_yolo</name>
+  <version>0.1.0</version>
+  <description>The tensorrt_yolo package</description>
+
+  <maintainer email="daisuke.nishimatsu@tier4.jp">Daisuke Nishimatsu</maintainer>
+
+  <license>Apache License 2.0</license>
+
+  <buildtool_depend>ament_cmake_auto</buildtool_depend>
+
+  <depend>autoware_auto_perception_msgs</depend>
+  <depend>autoware_perception_msgs</depend>
+  <depend>cv_bridge</depend>
+  <depend>image_transport</depend>
+  <depend>rclcpp</depend>
+  <depend>rclcpp_components</depend>
+  <depend>sensor_msgs</depend>
+
+  <test_depend>ament_lint_auto</test_depend>
+  <test_depend>autoware_lint_common</test_depend>
+
+  <export>
+    <build_type>ament_cmake</build_type>
+  </export>
+
+</package>
diff --git a/perception/tensorrt_yolo/src/nodelet.cpp b/perception/tensorrt_yolo/src/nodelet.cpp
new file mode 100644
index 0000000000000..fa04bf29c8d94
--- /dev/null
+++ b/perception/tensorrt_yolo/src/nodelet.cpp
@@ -0,0 +1,212 @@
+// Copyright 2020 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tensorrt_yolo/nodelet.hpp"
+
+#include <autoware_auto_perception_msgs/msg/object_classification.hpp>
+
+#include <glob.h>
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace
+{
+std::vector<std::string> getFilePath(const std::string & input_dir)
+{
+  glob_t globbuf;
+  std::vector<std::string> files;
+  glob((input_dir + "*").c_str(), 0, NULL, &globbuf);
+  for (size_t i = 0; i < globbuf.gl_pathc; i++) {
+    files.push_back(globbuf.gl_pathv[i]);
+  }
+  globfree(&globbuf);
+  return files;
+}
+}  // namespace
+namespace object_recognition
+{
+TensorrtYoloNodelet::TensorrtYoloNodelet(const rclcpp::NodeOptions & options)
+: Node("tensorrt_yolo", options)
+{
+  using std::placeholders::_1;
+
+  std::string onnx_file = declare_parameter("onnx_file", "");
+  std::string engine_file = declare_parameter("engine_file", "");
+  std::string label_file = declare_parameter("label_file", "");
+  std::string calib_image_directory = declare_parameter("calib_image_directory", "");
+  std::string calib_cache_file = declare_parameter("calib_cache_file", "");
+  std::string mode = declare_parameter("mode", "FP32");
+  yolo_config_.num_anchors = declare_parameter("num_anchors", 3);
+  auto anchors = declare_parameter(
+    "anchors", std::vector<double>{
+                 10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326});
+  std::vector<float> anchors_float(anchors.begin(), anchors.end());
+  yolo_config_.anchors = anchors_float;
+  auto scale_x_y = declare_parameter("scale_x_y", std::vector<double>{1.0, 1.0, 1.0});
+  std::vector<float> scale_x_y_float(scale_x_y.begin(), scale_x_y.end());
+  yolo_config_.scale_x_y = scale_x_y_float;
+  yolo_config_.score_thresh = declare_parameter("score_thresh", 0.1);
+  yolo_config_.iou_thresh = declare_parameter("iou_thresh", 0.45);
+  yolo_config_.detections_per_im = declare_parameter("detections_per_im", 100);
+  yolo_config_.use_darknet_layer = declare_parameter("use_darknet_layer", true);
+  yolo_config_.ignore_thresh = declare_parameter("ignore_thresh", 0.5);
+
+  if (!readLabelFile(label_file, &labels_)) {
+    RCLCPP_ERROR(this->get_logger(), "Could not find label file");
+  }
+  std::ifstream fs(engine_file);
+  const auto calibration_images = getFilePath(calib_image_directory);
+  if (fs.is_open()) {
+    RCLCPP_INFO(this->get_logger(), "Found %s", engine_file.c_str());
+    net_ptr_.reset(new yolo::Net(engine_file, false));
+    if (net_ptr_->getMaxBatchSize() != 1) {
+      RCLCPP_INFO(
+        this->get_logger(), "Max batch size %d should be 1. Rebuild engine from file",
+        net_ptr_->getMaxBatchSize());
+      net_ptr_.reset(
+        new yolo::Net(onnx_file, mode, 1, yolo_config_, calibration_images, calib_cache_file));
+      net_ptr_->save(engine_file);
+    }
+  } else {
+    RCLCPP_INFO(
+      this->get_logger(), "Could not find %s, try making TensorRT engine from onnx",
+      engine_file.c_str());
+    net_ptr_.reset(
+      new yolo::Net(onnx_file, mode, 1, yolo_config_, calibration_images, calib_cache_file));
+    net_ptr_->save(engine_file);
+  }
+  auto timer_callback = std::bind(&TensorrtYoloNodelet::connectCb, this);
+  const auto period_s = 0.1;
+  const auto period_ns =
+    std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::duration<double>(period_s));
+  timer_ = std::make_shared<rclcpp::GenericTimer<decltype(timer_callback)>>(
+    this->get_clock(), period_ns, std::move(timer_callback),
+    this->get_node_base_interface()->get_context());
+  this->get_node_timers_interface()->add_timer(timer_, nullptr);
+
+  std::lock_guard<std::mutex> lock(connect_mutex_);
+
+  objects_pub_ = this->create_publisher<autoware_perception_msgs::msg::DetectedObjectsWithFeature>(
+    "out/objects", 1);
+  image_pub_ = image_transport::create_publisher(this, "out/image");
+
+  out_scores_ =
+    std::make_unique<float[]>(net_ptr_->getMaxBatchSize() * net_ptr_->getMaxDetections());
+  out_boxes_ =
+    std::make_unique<float[]>(net_ptr_->getMaxBatchSize() * net_ptr_->getMaxDetections() * 4);
+  out_classes_ =
+    std::make_unique<float[]>(net_ptr_->getMaxBatchSize() * net_ptr_->getMaxDetections());
+}
+
+void TensorrtYoloNodelet::connectCb()
+{
+  using std::placeholders::_1;
+  std::lock_guard<std::mutex> lock(connect_mutex_);
+  if (objects_pub_->get_subscription_count() == 0 && image_pub_.getNumSubscribers() == 0) {
+    image_sub_.shutdown();
+  } else if (!image_sub_) {
+    image_sub_ = image_transport::create_subscription(
+      this, "in/image", std::bind(&TensorrtYoloNodelet::callback, this, _1), "raw",
+      rmw_qos_profile_sensor_data);
+  }
+}
+
+void TensorrtYoloNodelet::callback(const sensor_msgs::msg::Image::ConstSharedPtr in_image_msg)
+{
+  using Label = autoware_auto_perception_msgs::msg::ObjectClassification;
+
+  autoware_perception_msgs::msg::DetectedObjectsWithFeature out_objects;
+
+  cv_bridge::CvImagePtr in_image_ptr;
+  try {
+    in_image_ptr = cv_bridge::toCvCopy(in_image_msg, sensor_msgs::image_encodings::BGR8);
+  } catch (cv_bridge::Exception & e) {
+    RCLCPP_ERROR(this->get_logger(), "cv_bridge exception: %s", e.what());
+    return;
+  }
+  if (!net_ptr_->detect(
+        in_image_ptr->image, out_scores_.get(), out_boxes_.get(), out_classes_.get())) {
+    RCLCPP_WARN(this->get_logger(), "Fail to inference");
+    return;
+  }
+  const auto width = in_image_ptr->image.cols;
+  const auto height = in_image_ptr->image.rows;
+  for (int i = 0; i < yolo_config_.detections_per_im; ++i) {
+    if (out_scores_[i] < yolo_config_.ignore_thresh) {
+      break;
+    }
+    autoware_perception_msgs::msg::DetectedObjectWithFeature object;
+    object.feature.roi.x_offset = out_boxes_[4 * i] * width;
+    object.feature.roi.y_offset = out_boxes_[4 * i + 1] * height;
+    object.feature.roi.width = out_boxes_[4 * i + 2] * width;
+    object.feature.roi.height = out_boxes_[4 * i + 3] * height;
+    object.object.classification.emplace_back(autoware_auto_perception_msgs::build<Label>()
+                                                .label(Label::UNKNOWN)
+                                                .probability(out_scores_[i]));
+    const auto class_id = static_cast<int>(out_classes_[i]);
+    if (labels_[class_id] == "car") {
+      object.object.classification.front().label = Label::CAR;
+    } else if (labels_[class_id] == "person") {
+      object.object.classification.front().label = Label::PEDESTRIAN;
+    } else if (labels_[class_id] == "bus") {
+      object.object.classification.front().label = Label::BUS;
+    } else if (labels_[class_id] == "truck") {
+      object.object.classification.front().label = Label::TRUCK;
+    } else if (labels_[class_id] == "bicycle") {
+      object.object.classification.front().label = Label::BICYCLE;
+    } else if (labels_[class_id] == "motorbike") {
+      object.object.classification.front().label = Label::MOTORCYCLE;
+    } else {
+      object.object.classification.front().label = Label::UNKNOWN;
+    }
+    out_objects.feature_objects.push_back(object);
+    const auto left = std::max(0, static_cast<int>(object.feature.roi.x_offset));
+    const auto top = std::max(0, static_cast<int>(object.feature.roi.y_offset));
+    const auto right =
+      std::min(static_cast<int>(object.feature.roi.x_offset + object.feature.roi.width), width);
+    const auto bottom =
+      std::min(static_cast<int>(object.feature.roi.y_offset + object.feature.roi.height), height);
+    cv::rectangle(
+      in_image_ptr->image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(0, 0, 255), 3,
+      8, 0);
+  }
+  image_pub_.publish(in_image_ptr->toImageMsg());
+
+  out_objects.header = in_image_msg->header;
+  objects_pub_->publish(out_objects);
+}
+
+bool TensorrtYoloNodelet::readLabelFile(
+  const std::string & filepath, std::vector<std::string> * labels)
+{
+  std::ifstream labelsFile(filepath);
+  if (!labelsFile.is_open()) {
+    RCLCPP_ERROR(this->get_logger(), "Could not open label file. [%s]", filepath.c_str());
+    return false;
+  }
+  std::string label;
+  while (getline(labelsFile, label)) {
+    labels->push_back(label);
+  }
+  return true;
+}
+
+}  // namespace object_recognition
+
+#include <rclcpp_components/register_node_macro.hpp>
+RCLCPP_COMPONENTS_REGISTER_NODE(object_recognition::TensorrtYoloNodelet)