diff --git a/perception/tensorrt_yolo/.gitignore b/perception/tensorrt_yolo/.gitignore new file mode 100755 index 0000000000000..160e7b321280b --- /dev/null +++ b/perception/tensorrt_yolo/.gitignore @@ -0,0 +1,2 @@ +data/ +calib_image/ diff --git a/perception/tensorrt_yolo/CMakeLists.txt b/perception/tensorrt_yolo/CMakeLists.txt new file mode 100755 index 0000000000000..c5ad50afde187 --- /dev/null +++ b/perception/tensorrt_yolo/CMakeLists.txt @@ -0,0 +1,272 @@ +cmake_minimum_required(VERSION 3.5) +project(tensorrt_yolo) + +if(NOT CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 17) + set(CMAKE_CXX_STANDARD_REQUIRED ON) + set(CMAKE_CXX_EXTENSIONS OFF) +endif() +if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + add_compile_options(-Wall -Wextra -Wpedantic -Wno-deprecated-declarations -Werror) +endif() + +option(CUDA_VERBOSE "Verbose output of CUDA modules" OFF) + +find_package(ament_cmake_auto REQUIRED) +ament_auto_find_build_dependencies() + +find_package(OpenCV REQUIRED) + +# set flags for CUDA availability +option(CUDA_AVAIL "CUDA available" OFF) +find_package(CUDA) +if(CUDA_FOUND) + find_library(CUBLAS_LIBRARIES cublas HINTS + ${CUDA_TOOLKIT_ROOT_DIR}/lib64 + ${CUDA_TOOLKIT_ROOT_DIR}/lib + ) + if(CUDA_VERBOSE) + message(STATUS "CUDA is available!") + message(STATUS "CUDA Libs: ${CUDA_LIBRARIES}") + message(STATUS "CUDA Headers: ${CUDA_INCLUDE_DIRS}") + endif() + set(CUDA_AVAIL ON) +else() + message("CUDA NOT FOUND") + set(CUDA_AVAIL OFF) +endif() + +# set flags for TensorRT availability +option(TRT_AVAIL "TensorRT available" OFF) +# try to find the tensorRT modules +find_library(NVINFER NAMES nvinfer) +find_library(NVONNXPARSER nvonnxparser) +find_library(NVINFER_PLUGIN NAMES nvinfer_plugin) +if(NVINFER AND NVONNXPARSER AND NVINFER_PLUGIN) + if(CUDA_VERBOSE) + message(STATUS "TensorRT is available!") + message(STATUS "NVINFER: ${NVINFER}") + message(STATUS "NVPARSERS: ${NVPARSERS}") + message(STATUS "NVINFER_PLUGIN: ${NVINFER_PLUGIN}") + message(STATUS "NVONNXPARSER: ${NVONNXPARSER}") + endif() + set(TRT_AVAIL ON) +else() + message("TensorRT is NOT Available") + set(TRT_AVAIL OFF) +endif() + +# set flags for CUDNN availability +option(CUDNN_AVAIL "CUDNN available" OFF) +# try to find the CUDNN module +find_library(CUDNN_LIBRARY +NAMES libcudnn.so${__cudnn_ver_suffix} libcudnn${__cudnn_ver_suffix}.dylib ${__cudnn_lib_win_name} +PATHS $ENV{LD_LIBRARY_PATH} ${__libpath_cudart} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX} +PATH_SUFFIXES lib lib64 bin +DOC "CUDNN library." +) +if(CUDNN_LIBRARY) + if(CUDA_VERBOSE) + message(STATUS "CUDNN is available!") + message(STATUS "CUDNN_LIBRARY: ${CUDNN_LIBRARY}") + endif() + set(CUDNN_AVAIL ON) +else() + message("CUDNN is NOT Available") + set(CUDNN_AVAIL OFF) +endif() + +# Download onnx +find_program(GDOWN_AVAIL "gdown") +if(NOT GDOWN_AVAIL) + message("gdown: command not found. External files could not be downloaded.") +endif() +set(PATH "${CMAKE_CURRENT_SOURCE_DIR}/data") +if(NOT EXISTS "${PATH}") + execute_process(COMMAND mkdir -p ${PATH}) +endif() +# yolov3 +set(FILE "${PATH}/yolov3.onnx") +message(STATUS "Checking and downloading yolov3.onnx") +if(NOT EXISTS "${FILE}") + message(STATUS "... file does not exist. Downloading now ...") + execute_process( + COMMAND gdown "https://drive.google.com/uc?id=1ZYoBqVynmO5kKntyN56GEbELRpuXG8Ym" -O ${PATH}/yolov3.onnx + ERROR_QUIET + ) +endif() + +# yolov4 +set(FILE "${PATH}/yolov4.onnx") +message(STATUS "Checking and downloading yolov4.onnx") +if(NOT EXISTS "${FILE}") + message(STATUS "... file does not exist. Downloading now ...") + execute_process( + COMMAND gdown "https://drive.google.com//uc?id=1vkNmSwcIpTkJ_-BrKhxtit0PBJeJYTVX" -O ${PATH}/yolov4.onnx + ERROR_QUIET + ) +endif() + +# yolov4-tiny +set(FILE "${PATH}/yolov4-tiny.onnx") +message(STATUS "Checking and downloading yolov4.onnx") +if(NOT EXISTS "${FILE}") + message(STATUS "... file does not exist. Downloading now ...") + execute_process( + COMMAND gdown "https://drive.google.com/uc?id=1rUHjV_JfkmlVFgb20XXrOMWo_HZAjrFh" -O ${PATH}/yolov4-tiny.onnx + ERROR_QUIET + ) +endif() + +# yolov5s +set(FILE "${PATH}/yolov5s.onnx") +message(STATUS "Checking and downloading yolov5s.onnx") +if(NOT EXISTS "${FILE}") + message(STATUS "... file does not exist. Downloading now ...") + execute_process( + COMMAND gdown "https://drive.google.com//uc?id=1CF21nQWigwCPTr5psQZXg6cBQIOYKbad" -O ${PATH}/yolov5s.onnx + ERROR_QUIET + ) +endif() + +# yolov5m +set(FILE "${PATH}/yolov5m.onnx") +message(STATUS "Checking and downloading yolov5m.onnx") +if(NOT EXISTS "${FILE}") + message(STATUS "... file does not exist. Downloading now ...") + execute_process( + COMMAND gdown "https://drive.google.com//uc?id=1a1h50KJH6slwmjKZpPlS-errukF-BrgG" -O ${PATH}/yolov5m.onnx + ERROR_QUIET + ) +endif() + +# yolov5l +set(FILE "${PATH}/yolov5l.onnx") +message(STATUS "Checking and downloading yolov5l.onnx") +if(NOT EXISTS "${FILE}") + message(STATUS "... file does not exist. Downloading now ...") + execute_process( + COMMAND gdown "https://drive.google.com/uc?id=1xO8S92Cq7qrmx93UHHyA7Cd7-dJsBDP8" -O ${PATH}/yolov5l.onnx + ERROR_QUIET + ) +endif() + +# yolov5x +set(FILE "${PATH}/yolov5x.onnx") +message(STATUS "Checking and downloading yolov5x.onnx") +if(NOT EXISTS "${FILE}") + message(STATUS "... file does not exist. Downloading now ...") + execute_process( + COMMAND gdown "https://drive.google.com/uc?id=1kAHuNJUCxpD-yWrS6t95H3zbAPfClLxI" -O ${PATH}/yolov5x.onnx + ERROR_QUIET + ) +endif() + +set(FILE "${PATH}/coco.names") +message(STATUS "Checking and downloading coco.names") +if(NOT EXISTS "${FILE}") + message(STATUS "... file does not exist. Downloading now ...") + execute_process( + COMMAND gdown "https://drive.google.com/uc?id=19wXD23PE16kJDkZ7j2W3Ijvx5I1kO9kJ" -O ${PATH}/coco.names + ERROR_QUIET + ) +endif() + +# create calib image directory for int8 calibration +set(CALIB_IMAGE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/calib_image") +if(NOT EXISTS "${CALIB_IMAGE_PATH}") + execute_process(COMMAND mkdir -p ${CALIB_IMAGE_PATH}) +endif() + +if(TRT_AVAIL AND CUDA_AVAIL AND CUDNN_AVAIL) + include_directories( + include + lib/include + ${OpenCV_INCLUDE_DIRS} + ${CUDA_INCLUDE_DIRS} + ) + + + ### yolo ### + cuda_add_library(mish_plugin SHARED + lib/src/plugins/mish.cu + lib/src/plugins/mish_plugin.cpp + ) + + cuda_add_library(yolo_layer_plugin SHARED + lib/src/plugins/yolo_layer.cu + lib/src/plugins/yolo_layer_plugin.cpp + ) + + cuda_add_library(nms_plugin SHARED + lib/src/plugins/nms.cu + lib/src/plugins/nms_plugin.cpp + ) + + ament_auto_add_library(yolo SHARED + lib/src/trt_yolo.cpp + ) + + target_include_directories(yolo PUBLIC + lib/include + ) + + target_link_libraries(yolo + ${NVINFER} + ${NVONNXPARSER} + ${NVINFER_PLUGIN} + ${CUDA_LIBRARIES} + ${CUBLAS_LIBRARIES} + ${CUDA_curand_LIBRARY} + ${CUDNN_LIBRARY} + mish_plugin + yolo_layer_plugin + nms_plugin + ) + + ament_auto_add_library(tensorrt_yolo_nodelet SHARED + src/nodelet.cpp + ) + + target_link_libraries(tensorrt_yolo_nodelet + ${OpenCV_LIBS} + yolo + mish_plugin + yolo_layer_plugin + nms_plugin + ) + + rclcpp_components_register_node(tensorrt_yolo_nodelet + PLUGIN "object_recognition::TensorrtYoloNodelet" + EXECUTABLE tensorrt_yolo_node + ) + + if(BUILD_TESTING) + find_package(ament_lint_auto REQUIRED) + ament_lint_auto_find_test_dependencies() + endif() + + ament_auto_package(INSTALL_TO_SHARE + config + data + launch + ) + + install( + TARGETS + mish_plugin + yolo_layer_plugin + nms_plugin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib + RUNTIME DESTINATION bin + ) + +else() + message("TensorrtYolo won't be built, CUDA and/or TensorRT were not found.") + ament_auto_package(INSTALL_TO_SHARE + config + data + launch + ) +endif() diff --git a/perception/tensorrt_yolo/README.md b/perception/tensorrt_yolo/README.md new file mode 100644 index 0000000000000..ac12efef1d7f4 --- /dev/null +++ b/perception/tensorrt_yolo/README.md @@ -0,0 +1,99 @@ +# tensorrt_yolo + +## Purpose + +This package detects 2D bounding boxes for target objects e.g., cars, trucks, bicycles, and pedestrians on a image based on YOLO(You only look once) model. + +## Inner-workings / Algorithms + +### Cite + +yolov3 + +Redmon, J., & Farhadi, A. (2018). Yolov3: An incremental improvement. arXiv preprint arXiv:1804.02767. + +yolov4 + +Bochkovskiy, A., Wang, C. Y., & Liao, H. Y. M. (2020). Yolov4: Optimal speed and accuracy of object detection. arXiv preprint arXiv:2004.10934. + +yolov5 + +Jocher, G., et al. (2021). ultralytics/yolov5: v6.0 - YOLOv5n 'Nano' models, Roboflow integration, TensorFlow export, OpenCV DNN support (v6.0). Zenodo. + +## Inputs / Outputs + +### Input + +| Name | Type | Description | +| ---------- | ------------------- | --------------- | +| `in/image` | `sensor_msgs/Image` | The input image | + +### Output + +| Name | Type | Description | +| ------------- | ----------------------------------------------------- | -------------------------------------------------- | +| `out/objects` | `autoware_perception_msgs/DetectedObjectsWithFeature` | The detected objects with 2D bounding boxes | +| `out/image` | `sensor_msgs/Image` | The image with 2D bounding boxes for visualization | + +## Parameters + +### Core Parameters + +| Name | Type | Default Value | Description | +| ------------------- | ------------ | ------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------- | +| `anchors` | double array | [10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0, 116.0, 90.0, 156.0, 198.0, 373.0, 326.0] | The anchors to create bounding box candidates | +| `scale_x_y` | double array | [1.0, 1.0, 1.0] | The scale parameter to eliminate grid sensitivity | +| `score_thresh` | double | 0.1 | If the objectness score is less than this value, the object is ignored in yolo layer. | +| `iou_thresh` | double | 0.45 | The iou threshold for NMS method | +| `detections_per_im` | int | 100 | The maximum detection number for one frame | +| `use_darknet_layer` | bool | true | The flag to use yolo layer in darknet | +| `ignore_thresh` | double | 0.5 | If the output score is less than this value, ths object is ignored. | + +### Node Parameters + +| Name | Type | Default Value | Description | +| ----------------------- | ------ | ------------- | ------------------------------------------------------------------ | +| `onnx_file` | string | "" | The onnx file name for yolo model | +| `engine_file` | string | "" | The tensorrt engine file name for yolo model | +| `label_file` | string | "" | The label file with label names for detected objects written on it | +| `calib_image_directory` | string | "" | The directory name including calibration images for int8 inference | +| `calib_cache_file` | string | "" | The calibration cache file for int8 inference | +| `mode` | string | "FP32" | The inference mode: "FP32", "FP16", "INT8" | + +## Assumptions / Known limits + +This package includes multiple licenses. + +## Onnx model + +### YOLOv3 + +[YOLOv3](https://drive.google.com/uc?id=1ZYoBqVynmO5kKntyN56GEbELRpuXG8Ym "YOLOv3"): Converted from darknet [weight file](https://pjreddie.com/media/files/yolov3.weights "weight file") and [conf file](https://github.com/pjreddie/darknet/blob/master/cfg/yolov3.cfg "conf file"). + +### YOLOv4 + +[YOLOv4](https://drive.google.com//uc?id=1vkNmSwcIpTkJ_-BrKhxtit0PBJeJYTVX "YOLOv4"): Converted from darknet [weight file](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights "weight file") and [conf file](https://github.com/AlexeyAB/darknet/blob/master/cfg/yolov4.cfg "conf file"). + +### YOLOv5 + +Refer to [this guide](https://github.com/ultralytics/yolov5/issues/251 "guide") + +- [YOLOv5s](https://drive.google.com//uc?id=1CF21nQWigwCPTr5psQZXg6cBQIOYKbad "YOLOv5s") + +- [YOLOv5m](https://drive.google.com//uc?id=1a1h50KJH6slwmjKZpPlS-errukF-BrgG "YOLOv5m") + +- [YOLOv5l](https://drive.google.com/uc?id=1xO8S92Cq7qrmx93UHHyA7Cd7-dJsBDP8 "YOLOv5l") + +- [YOLOv5x](https://drive.google.com/uc?id=1kAHuNJUCxpD-yWrS6t95H3zbAPfClLxI "YOLOv5x") + +## Reference repositories + +- + +- + +- + +- + +- diff --git a/perception/tensorrt_yolo/config/yolov3.param.yaml b/perception/tensorrt_yolo/config/yolov3.param.yaml new file mode 100644 index 0000000000000..5ce50f3acfa96 --- /dev/null +++ b/perception/tensorrt_yolo/config/yolov3.param.yaml @@ -0,0 +1,10 @@ +/**: + ros__parameters: + num_anchors: 3 + anchors: [116.0, 90.0, 156.0, 198.0, 373.0, 326.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0, 10.0, 13.0, 16.0, 30.0, 33.0, 23.0] + scale_x_y: [1.0, 1.0, 1.0] + score_threshold: 0.1 + iou_thresh: 0.45 + detections_per_im: 100 + use_darknet_layer: true + ignore_thresh: 0.5 diff --git a/perception/tensorrt_yolo/config/yolov4-tiny.param.yaml b/perception/tensorrt_yolo/config/yolov4-tiny.param.yaml new file mode 100644 index 0000000000000..2ee53ee68f764 --- /dev/null +++ b/perception/tensorrt_yolo/config/yolov4-tiny.param.yaml @@ -0,0 +1,10 @@ +/**: + ros__parameters: + num_anchors: 3 + anchors: [81.0, 82.0, 135.0, 169.0, 344.0, 319.0, 23.0, 27.0, 37.0, 58.0, 81.0, 82.0] + scale_x_y: [1.05, 1.05] + score_threshold: 0.1 + iou_thresh: 0.45 + detections_per_im: 100 + use_darknet_layer: true + ignore_thresh: 0.5 diff --git a/perception/tensorrt_yolo/config/yolov4.param.yaml b/perception/tensorrt_yolo/config/yolov4.param.yaml new file mode 100644 index 0000000000000..6122af667c470 --- /dev/null +++ b/perception/tensorrt_yolo/config/yolov4.param.yaml @@ -0,0 +1,10 @@ +/**: + ros__parameters: + num_anchors: 3 + anchors: [12.0, 16.0, 19.0, 36.0, 40.0, 28.0, 36.0, 75.0, 76.0, 55.0, 72.0, 146.0, 142.0, 110.0, 192.0, 243.0, 459.0, 401.0] + scale_x_y: [1.2, 1.1, 1.05] + score_threshold: 0.1 + iou_thresh: 0.45 + detections_per_im: 100 + use_darknet_layer: true + ignore_thresh: 0.5 diff --git a/perception/tensorrt_yolo/config/yolov5l.param.yaml b/perception/tensorrt_yolo/config/yolov5l.param.yaml new file mode 100644 index 0000000000000..8a709d057594b --- /dev/null +++ b/perception/tensorrt_yolo/config/yolov5l.param.yaml @@ -0,0 +1,10 @@ +/**: + ros__parameters: + num_anchors: 3 + anchors: [10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0, 116.0, 90.0, 156.0, 198.0, 373.0, 326.0] + scale_x_y: [1.0, 1.0, 1.0] + score_threshold: 0.1 + iou_thresh: 0.45 + detections_per_im: 100 + use_darknet_layer: false + ignore_thresh: 0.5 diff --git a/perception/tensorrt_yolo/config/yolov5m.param.yaml b/perception/tensorrt_yolo/config/yolov5m.param.yaml new file mode 100644 index 0000000000000..8a709d057594b --- /dev/null +++ b/perception/tensorrt_yolo/config/yolov5m.param.yaml @@ -0,0 +1,10 @@ +/**: + ros__parameters: + num_anchors: 3 + anchors: [10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0, 116.0, 90.0, 156.0, 198.0, 373.0, 326.0] + scale_x_y: [1.0, 1.0, 1.0] + score_threshold: 0.1 + iou_thresh: 0.45 + detections_per_im: 100 + use_darknet_layer: false + ignore_thresh: 0.5 diff --git a/perception/tensorrt_yolo/config/yolov5s.param.yaml b/perception/tensorrt_yolo/config/yolov5s.param.yaml new file mode 100644 index 0000000000000..8a709d057594b --- /dev/null +++ b/perception/tensorrt_yolo/config/yolov5s.param.yaml @@ -0,0 +1,10 @@ +/**: + ros__parameters: + num_anchors: 3 + anchors: [10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0, 116.0, 90.0, 156.0, 198.0, 373.0, 326.0] + scale_x_y: [1.0, 1.0, 1.0] + score_threshold: 0.1 + iou_thresh: 0.45 + detections_per_im: 100 + use_darknet_layer: false + ignore_thresh: 0.5 diff --git a/perception/tensorrt_yolo/config/yolov5x.param.yaml b/perception/tensorrt_yolo/config/yolov5x.param.yaml new file mode 100644 index 0000000000000..8a709d057594b --- /dev/null +++ b/perception/tensorrt_yolo/config/yolov5x.param.yaml @@ -0,0 +1,10 @@ +/**: + ros__parameters: + num_anchors: 3 + anchors: [10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0, 116.0, 90.0, 156.0, 198.0, 373.0, 326.0] + scale_x_y: [1.0, 1.0, 1.0] + score_threshold: 0.1 + iou_thresh: 0.45 + detections_per_im: 100 + use_darknet_layer: false + ignore_thresh: 0.5 diff --git a/perception/tensorrt_yolo/include/tensorrt_yolo/nodelet.hpp b/perception/tensorrt_yolo/include/tensorrt_yolo/nodelet.hpp new file mode 100644 index 0000000000000..5bd846291bcd8 --- /dev/null +++ b/perception/tensorrt_yolo/include/tensorrt_yolo/nodelet.hpp @@ -0,0 +1,71 @@ +// Copyright 2020 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TENSORRT_YOLO__NODELET_HPP_ +#define TENSORRT_YOLO__NODELET_HPP_ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +namespace object_recognition +{ +class TensorrtYoloNodelet : public rclcpp::Node +{ +public: + explicit TensorrtYoloNodelet(const rclcpp::NodeOptions & options); + void connectCb(); + void callback(const sensor_msgs::msg::Image::ConstSharedPtr image_msg); + bool readLabelFile(const std::string & filepath, std::vector * labels); + +private: + std::mutex connect_mutex_; + + image_transport::Publisher image_pub_; + rclcpp::Publisher::SharedPtr + objects_pub_; + + image_transport::Subscriber image_sub_; + + rclcpp::TimerBase::SharedPtr timer_; + + yolo::Config yolo_config_; + + std::vector labels_; + std::unique_ptr out_scores_; + std::unique_ptr out_boxes_; + std::unique_ptr out_classes_; + std::unique_ptr net_ptr_; +}; + +} // namespace object_recognition + +#endif // TENSORRT_YOLO__NODELET_HPP_ diff --git a/perception/tensorrt_yolo/launch/tensorrt_yolo.launch.xml b/perception/tensorrt_yolo/launch/tensorrt_yolo.launch.xml new file mode 100755 index 0000000000000..6faa6b385c482 --- /dev/null +++ b/perception/tensorrt_yolo/launch/tensorrt_yolo.launch.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/perception/tensorrt_yolo/launch/yolo.launch.xml b/perception/tensorrt_yolo/launch/yolo.launch.xml new file mode 100644 index 0000000000000..fa357e32186a9 --- /dev/null +++ b/perception/tensorrt_yolo/launch/yolo.launch.xml @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/perception/tensorrt_yolo/lib/include/calibrator.hpp b/perception/tensorrt_yolo/lib/include/calibrator.hpp new file mode 100644 index 0000000000000..edfbc126d645a --- /dev/null +++ b/perception/tensorrt_yolo/lib/include/calibrator.hpp @@ -0,0 +1,200 @@ +// Copyright 2020 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CALIBRATOR_HPP_ +#define CALIBRATOR_HPP_ + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +namespace yolo +{ +class ImageStream +{ +public: + ImageStream( + int batch_size, nvinfer1::Dims input_dims, const std::vector calibration_images) + : batch_size_(batch_size), + calibration_images_(calibration_images), + current_batch_(0), + max_batches_(calibration_images.size() / batch_size_), + input_dims_(input_dims) + { + batch_.resize(batch_size_ * input_dims_.d[1] * input_dims_.d[2] * input_dims_.d[3]); + } + + int getBatchSize() const { return batch_size_; } + + int getMaxBatches() const { return max_batches_; } + + float * getBatch() { return &batch_[0]; } + + nvinfer1::Dims getInputDims() { return input_dims_; } + + std::vector preprocess(const cv::Mat & in_img, const int c, const int w, const int h) const + { + cv::Mat rgb; + cv::cvtColor(in_img, rgb, cv::COLOR_BGR2RGB); + + cv::resize(rgb, rgb, cv::Size(w, h)); + + cv::Mat img_float; + rgb.convertTo(img_float, CV_32FC3, 1 / 255.0); + + // HWC TO CHW + std::vector input_channels(c); + cv::split(img_float, input_channels); + + std::vector result(h * w * c); + auto data = result.data(); + int channel_length = h * w; + for (int i = 0; i < c; ++i) { + memcpy(data, input_channels[i].data, channel_length * sizeof(float)); + data += channel_length; + } + + return result; + } + + bool next() + { + if (current_batch_ == max_batches_) { + return false; + } + + for (int i = 0; i < batch_size_; ++i) { + auto image = + cv::imread(calibration_images_[batch_size_ * current_batch_ + i].c_str(), cv::IMREAD_COLOR); + auto input = preprocess(image, input_dims_.d[1], input_dims_.d[3], input_dims_.d[2]); + batch_.insert( + batch_.begin() + i * input_dims_.d[1] * input_dims_.d[2] * input_dims_.d[3], input.begin(), + input.end()); + } + + ++current_batch_; + return true; + } + + void reset() { current_batch_ = 0; } + +private: + int batch_size_; + std::vector calibration_images_; + int current_batch_; + int max_batches_; + nvinfer1::Dims input_dims_; + + std::vector batch_; +}; + +class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator2 +{ +public: + Int8EntropyCalibrator( + ImageStream & stream, const std::string calibration_cache_file, bool read_cache = true) + : stream_(stream), calibration_cache_file_(calibration_cache_file), read_cache_(read_cache) + { + auto d = stream_.getInputDims(); + input_count_ = stream_.getBatchSize() * d.d[1] * d.d[2] * d.d[3]; + CHECK_CUDA_ERROR(cudaMalloc(&device_input_, input_count_ * sizeof(float))); + } + + int getBatchSize() const noexcept override { return stream_.getBatchSize(); } + + virtual ~Int8EntropyCalibrator() { CHECK_CUDA_ERROR(cudaFree(device_input_)); } + + bool getBatch(void * bindings[], const char * names[], int nb_bindings) noexcept override + { + (void)names; + (void)nb_bindings; + + if (!stream_.next()) { + return false; + } + + try { + CHECK_CUDA_ERROR(cudaMemcpy( + device_input_, stream_.getBatch(), input_count_ * sizeof(float), cudaMemcpyHostToDevice)); + } catch (const std::exception & e) { + // Do nothing + } + bindings[0] = device_input_; + return true; + } + + const void * readCalibrationCache(size_t & length) noexcept override + { + calib_cache_.clear(); + std::ifstream input(calibration_cache_file_, std::ios::binary); + input >> std::noskipws; + if (read_cache_ && input.good()) { + std::copy( + std::istream_iterator(input), std::istream_iterator(), + std::back_inserter(calib_cache_)); + } + + length = calib_cache_.size(); + return length ? &calib_cache_[0] : nullptr; + } + + void writeCalibrationCache(const void * cache, size_t length) noexcept override + { + std::ofstream output(calibration_cache_file_, std::ios::binary); + output.write(reinterpret_cast(cache), length); + } + +private: + ImageStream stream_; + const std::string calibration_cache_file_; + bool read_cache_{true}; + size_t input_count_; + void * device_input_{nullptr}; + std::vector calib_cache_; +}; +} // namespace yolo + +#endif // CALIBRATOR_HPP_ diff --git a/perception/tensorrt_yolo/lib/include/cuda_utils.hpp b/perception/tensorrt_yolo/lib/include/cuda_utils.hpp new file mode 100644 index 0000000000000..a3b53a73720a1 --- /dev/null +++ b/perception/tensorrt_yolo/lib/include/cuda_utils.hpp @@ -0,0 +1,120 @@ +// Copyright 2020 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * This code is licensed under CC0 1.0 Universal (Public Domain). + * You can use this without any limitation. + * https://creativecommons.org/publicdomain/zero/1.0/deed.en + */ + +#ifndef CUDA_UTILS_HPP_ +#define CUDA_UTILS_HPP_ + +#include + +#include +#include +#include +#include + +#define CHECK_CUDA_ERROR(e) (cuda::check_error(e, __FILE__, __LINE__)) + +namespace cuda +{ +void check_error(const ::cudaError_t e, const char * f, int n) +{ + if (e != ::cudaSuccess) { + std::stringstream s; + s << ::cudaGetErrorName(e) << " (" << e << ")@" << f << "#L" << n << ": " + << ::cudaGetErrorString(e); + throw std::runtime_error{s.str()}; + } +} + +struct deleter +{ + void operator()(void * p) const { CHECK_CUDA_ERROR(::cudaFree(p)); } +}; +template +using unique_ptr = std::unique_ptr; + +template +typename std::enable_if::value, cuda::unique_ptr>::type make_unique( + const std::size_t n) +{ + using U = typename std::remove_extent::type; + U * p; + CHECK_CUDA_ERROR(::cudaMalloc(reinterpret_cast(&p), sizeof(U) * n)); + return cuda::unique_ptr{p}; +} + +template +cuda::unique_ptr make_unique() +{ + T * p; + CHECK_CUDA_ERROR(::cudaMalloc(reinterpret_cast(&p), sizeof(T))); + return cuda::unique_ptr{p}; +} + +constexpr size_t CUDA_ALIGN = 256; + +template +inline size_t get_size_aligned(size_t num_elem) +{ + size_t size = num_elem * sizeof(T); + size_t extra_align = 0; + if (size % CUDA_ALIGN != 0) { + extra_align = CUDA_ALIGN - size % CUDA_ALIGN; + } + return size + extra_align; +} + +template +inline T * get_next_ptr(size_t num_elem, void *& workspace, size_t & workspace_size) +{ + size_t size = get_size_aligned(num_elem); + if (size > workspace_size) { + throw std::runtime_error("Workspace is too small!"); + } + workspace_size -= size; + T * ptr = reinterpret_cast(workspace); + workspace = reinterpret_cast(reinterpret_cast(workspace) + size); + return ptr; +} + +} // namespace cuda + +#endif // CUDA_UTILS_HPP_ diff --git a/perception/tensorrt_yolo/lib/include/mish.hpp b/perception/tensorrt_yolo/lib/include/mish.hpp new file mode 100644 index 0000000000000..566cd0203ff41 --- /dev/null +++ b/perception/tensorrt_yolo/lib/include/mish.hpp @@ -0,0 +1,51 @@ +// Copyright 2020 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * MIT License + + * Copyright (c) 2019-2020 Wang Xinyu + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MISH_HPP_ +#define MISH_HPP_ + +#include + +#include + +namespace yolo +{ +int mish(cudaStream_t stream, const float * input, float * output, int n); +} // namespace yolo + +#endif // MISH_HPP_ diff --git a/perception/tensorrt_yolo/lib/include/mish_plugin.hpp b/perception/tensorrt_yolo/lib/include/mish_plugin.hpp new file mode 100644 index 0000000000000..e49d2d2ba5ac9 --- /dev/null +++ b/perception/tensorrt_yolo/lib/include/mish_plugin.hpp @@ -0,0 +1,131 @@ +// Copyright 2020 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * MIT License + + * Copyright (c) 2019-2020 Wang Xinyu + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MISH_PLUGIN_HPP_ +#define MISH_PLUGIN_HPP_ + +#include + +#include +#include +#include + +namespace yolo +{ +class MishPlugin : public nvinfer1::IPluginV2DynamicExt +{ +public: + MishPlugin(); + MishPlugin(const void * data, size_t length); + + // IPluginV2 methods + const char * getPluginType() const noexcept override; + const char * getPluginVersion() const noexcept override; + int getNbOutputs() const noexcept override; + int initialize() noexcept override; + void terminate() noexcept override; + size_t getSerializationSize() const noexcept override; + void serialize(void * buffer) const noexcept override; + void destroy() noexcept override; + void setPluginNamespace(const char * libNamespace) noexcept override; + const char * getPluginNamespace() const noexcept override; + + // IPluginV2Ext methods + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType * inputType, int nbInputs) const noexcept override; + + // IPluginV2DynamicExt methods + nvinfer1::IPluginV2DynamicExt * clone() const noexcept override; + nvinfer1::DimsExprs getOutputDimensions( + int outputIndex, const nvinfer1::DimsExprs * inputs, int nbInputs, + nvinfer1::IExprBuilder & exprBuilder) noexcept override; + bool supportsFormatCombination( + int pos, const nvinfer1::PluginTensorDesc * inOut, int nbInputs, + int nbOutputs) noexcept override; + void configurePlugin( + const nvinfer1::DynamicPluginTensorDesc * in, int nbInputs, + const nvinfer1::DynamicPluginTensorDesc * out, int nbOutputs) noexcept override; + size_t getWorkspaceSize( + const nvinfer1::PluginTensorDesc * inputs, int nbInputs, + const nvinfer1::PluginTensorDesc * outputs, int nbOutputs) const noexcept override; + int enqueue( + const nvinfer1::PluginTensorDesc * inputDesc, const nvinfer1::PluginTensorDesc * outputDesc, + const void * const * inputs, void * const * outputs, void * workspace, + cudaStream_t stream) noexcept override; + +private: + const char * mPluginNamespace; +}; + +class MishPluginCreator : public nvinfer1::IPluginCreator +{ +public: + MishPluginCreator(); + + ~MishPluginCreator() override = default; + + const char * getPluginName() const noexcept override; + + const char * getPluginVersion() const noexcept override; + + const nvinfer1::PluginFieldCollection * getFieldNames() noexcept override; + + nvinfer1::IPluginV2DynamicExt * createPlugin( + const char * name, const nvinfer1::PluginFieldCollection * fc) noexcept override; + + nvinfer1::IPluginV2DynamicExt * deserializePlugin( + const char * name, const void * serialData, size_t serialLength) noexcept override; + + void setPluginNamespace(const char * libNamespace) noexcept override + { + mNamespace = libNamespace; + } + + const char * getPluginNamespace() const noexcept override { return mNamespace.c_str(); } + +private: + std::string mNamespace; + static nvinfer1::PluginFieldCollection mFC; + static std::vector mPluginAttributes; +}; + +REGISTER_TENSORRT_PLUGIN(MishPluginCreator); + +} // namespace yolo + +#endif // MISH_PLUGIN_HPP_ diff --git a/perception/tensorrt_yolo/lib/include/nms.hpp b/perception/tensorrt_yolo/lib/include/nms.hpp new file mode 100644 index 0000000000000..1070c50f35807 --- /dev/null +++ b/perception/tensorrt_yolo/lib/include/nms.hpp @@ -0,0 +1,52 @@ +// Copyright 2020 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NMS_HPP_ +#define NMS_HPP_ + +#include + +#include + +namespace yolo +{ +int nms( + int batchSize, const void * const * inputs, void * const * outputs, size_t count, + int detections_per_im, float nms_thresh, void * workspace, size_t workspace_size, + cudaStream_t stream); +} + +#endif // NMS_HPP_ diff --git a/perception/tensorrt_yolo/lib/include/nms_plugin.hpp b/perception/tensorrt_yolo/lib/include/nms_plugin.hpp new file mode 100644 index 0000000000000..58bc49e57adf4 --- /dev/null +++ b/perception/tensorrt_yolo/lib/include/nms_plugin.hpp @@ -0,0 +1,123 @@ +// Copyright 2020 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NMS_PLUGIN_HPP_ +#define NMS_PLUGIN_HPP_ + +#include + +#include +#include + +namespace yolo +{ +class NMSPlugin : public nvinfer1::IPluginV2DynamicExt +{ +public: + NMSPlugin(float nms_thresh, int detections_per_im); + NMSPlugin(float nms_thresh, int detections_per_im, size_t count); + NMSPlugin(const void * data, size_t length); + + // IPluginV2 methods + const char * getPluginType() const noexcept override; + const char * getPluginVersion() const noexcept override; + int getNbOutputs() const noexcept override; + int initialize() noexcept override; + void terminate() noexcept override; + size_t getSerializationSize() const noexcept override; + void serialize(void * buffer) const noexcept override; + void destroy() noexcept override; + void setPluginNamespace(const char * libNamespace) noexcept override; + const char * getPluginNamespace() const noexcept override; + + // IPluginV2Ext methods + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType * inputType, int nbInputs) const noexcept override; + + // IPluginV2DynamicExt methods + nvinfer1::IPluginV2DynamicExt * clone() const noexcept override; + nvinfer1::DimsExprs getOutputDimensions( + int outputIndex, const nvinfer1::DimsExprs * inputs, int nbInputs, + nvinfer1::IExprBuilder & exprBuilder) noexcept override; + bool supportsFormatCombination( + int pos, const nvinfer1::PluginTensorDesc * inOut, int nbInputs, + int nbOutputs) noexcept override; + void configurePlugin( + const nvinfer1::DynamicPluginTensorDesc * in, int nbInputs, + const nvinfer1::DynamicPluginTensorDesc * out, int nbOutputs) noexcept override; + size_t getWorkspaceSize( + const nvinfer1::PluginTensorDesc * inputs, int nbInputs, + const nvinfer1::PluginTensorDesc * outputs, int nbOutputs) const noexcept override; + int enqueue( + const nvinfer1::PluginTensorDesc * inputDesc, const nvinfer1::PluginTensorDesc * outputDesc, + const void * const * inputs, void * const * outputs, void * workspace, + cudaStream_t stream) noexcept override; + +private: + float nms_thresh_; + int detections_per_im_; + + size_t count_; + mutable int size = -1; +}; + +class NMSPluginCreator : public nvinfer1::IPluginCreator +{ +public: + NMSPluginCreator(); + + const char * getPluginName() const noexcept override; + + const char * getPluginVersion() const noexcept override; + + const nvinfer1::PluginFieldCollection * getFieldNames() noexcept override; + + nvinfer1::IPluginV2DynamicExt * createPlugin( + const char * name, const nvinfer1::PluginFieldCollection * fc) noexcept override; + + nvinfer1::IPluginV2DynamicExt * deserializePlugin( + const char * name, const void * serialData, size_t serialLength) noexcept override; + + void setPluginNamespace(const char * libNamespace) noexcept override; + + const char * getPluginNamespace() const noexcept override; +}; + +REGISTER_TENSORRT_PLUGIN(NMSPluginCreator); + +} // namespace yolo + +#endif // NMS_PLUGIN_HPP_ diff --git a/perception/tensorrt_yolo/lib/include/trt_yolo.hpp b/perception/tensorrt_yolo/lib/include/trt_yolo.hpp new file mode 100644 index 0000000000000..813811226a0a6 --- /dev/null +++ b/perception/tensorrt_yolo/lib/include/trt_yolo.hpp @@ -0,0 +1,151 @@ +// Copyright 2020 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef TRT_YOLO_HPP_ +#define TRT_YOLO_HPP_ + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +namespace yolo +{ +struct Deleter +{ + template + void operator()(T * obj) const + { + if (obj) { + obj->destroy(); + } + } +}; + +template +using unique_ptr = std::unique_ptr; + +class Logger : public nvinfer1::ILogger +{ +public: + explicit Logger(bool verbose) : verbose_(verbose) {} + + void log(Severity severity, const char * msg) noexcept override + { + if (verbose_ || ((severity != Severity::kINFO) && (severity != Severity::kVERBOSE))) { + std::cout << msg << std::endl; + } + } + +private: + bool verbose_{false}; +}; + +struct Config +{ + int num_anchors; + std::vector anchors; + std::vector scale_x_y; + float score_thresh; + float iou_thresh; + int detections_per_im; + bool use_darknet_layer; + float ignore_thresh; +}; + +class Net +{ +public: + // Create engine from engine path + explicit Net(const std::string & engine_path, bool verbose = false); + + // Create engine from serialized onnx model + Net( + const std::string & onnx_file_path, const std::string & precision, const int max_batch_size, + const Config & yolo_config, const std::vector & calibration_images, + const std::string & calibration_table, bool verbose = false, + size_t workspace_size = (1ULL << 30)); + + ~Net(); + + // Save model to path + void save(const std::string & path) const; + + bool detect(const cv::Mat & in_img, float * out_scores, float * out_boxes, float * out_classes); + + // Get (c, h, w) size of the fixed input + std::vector getInputDims() const; + + std::vector getOutputScoreSize() const; + + // Get max allowed batch size + int getMaxBatchSize() const; + + // Get max number of detections + int getMaxDetections() const; + + int getInputSize() const; + +private: + unique_ptr runtime_ = nullptr; + unique_ptr engine_ = nullptr; + unique_ptr context_ = nullptr; + cudaStream_t stream_ = nullptr; + cuda::unique_ptr input_d_ = nullptr; + cuda::unique_ptr out_scores_d_ = nullptr; + cuda::unique_ptr out_boxes_d_ = nullptr; + cuda::unique_ptr out_classes_d_ = nullptr; + + void load(const std::string & path); + bool prepare(); + std::vector preprocess( + const cv::Mat & in_img, const int c, const int h, const int w) const; + // Infer using pre-allocated GPU buffers {data, scores, boxes} + void infer(std::vector & buffers, const int batch_size); +}; + +} // namespace yolo + +#endif // TRT_YOLO_HPP_ diff --git a/perception/tensorrt_yolo/lib/include/yolo_layer.hpp b/perception/tensorrt_yolo/lib/include/yolo_layer.hpp new file mode 100644 index 0000000000000..62dc601185140 --- /dev/null +++ b/perception/tensorrt_yolo/lib/include/yolo_layer.hpp @@ -0,0 +1,77 @@ +// Copyright 2020 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * MIT License + + * Copyright (c) 2019-2020 Wang Xinyu + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef YOLO_LAYER_HPP_ +#define YOLO_LAYER_HPP_ + +#include + +#include + +namespace yolo +{ +int yoloLayer( + int batch_size, const void * const * inputs, void * const * outputs, int grid_width, + int grid_height, int num_classes, int num_anchors, const std::vector & anchors, + int input_width, int input_height, float scale_xy, float score_thresh, bool use_darknet_layer, + void * workspace, size_t workspace_size, cudaStream_t stream); +} // namespace yolo + +#endif // YOLO_LAYER_HPP_ diff --git a/perception/tensorrt_yolo/lib/include/yolo_layer_plugin.hpp b/perception/tensorrt_yolo/lib/include/yolo_layer_plugin.hpp new file mode 100644 index 0000000000000..a7e10e8a6351d --- /dev/null +++ b/perception/tensorrt_yolo/lib/include/yolo_layer_plugin.hpp @@ -0,0 +1,155 @@ +// Copyright 2020 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * MIT License + + * Copyright (c) 2019-2020 Wang Xinyu + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef YOLO_LAYER_PLUGIN_HPP_ +#define YOLO_LAYER_PLUGIN_HPP_ + +#include + +#include + +#include +#include +#include +#include + +namespace yolo +{ +class YoloLayerPlugin : public nvinfer1::IPluginV2DynamicExt +{ +public: + explicit YoloLayerPlugin( + int width, int height, int num_classes, std::vector & anchors, float scale_xy, + float score_thresh, bool use_darknet_layer); + YoloLayerPlugin(const void * data, size_t length); + + // IPluginV2 methods + const char * getPluginType() const noexcept override; + const char * getPluginVersion() const noexcept override; + int getNbOutputs() const noexcept override; + int initialize() noexcept override; + void terminate() noexcept override; + size_t getSerializationSize() const noexcept override; + void serialize(void * buffer) const noexcept override; + void destroy() noexcept override; + void setPluginNamespace(const char * libNamespace) noexcept override; + const char * getPluginNamespace() const noexcept override; + + // IPluginV2Ext methods + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType * inputType, int nbInputs) const noexcept override; + + // IPluginV2DynamicExt methods + nvinfer1::IPluginV2DynamicExt * clone() const noexcept override; + nvinfer1::DimsExprs getOutputDimensions( + int outputIndex, const nvinfer1::DimsExprs * inputs, int nbInputs, + nvinfer1::IExprBuilder & exprBuilder) noexcept override; + bool supportsFormatCombination( + int pos, const nvinfer1::PluginTensorDesc * inOut, int nbInputs, + int nbOutputs) noexcept override; + void configurePlugin( + const nvinfer1::DynamicPluginTensorDesc * in, int nbInputs, + const nvinfer1::DynamicPluginTensorDesc * out, int nbOutputs) noexcept override; + size_t getWorkspaceSize( + const nvinfer1::PluginTensorDesc * inputs, int nbInputs, + const nvinfer1::PluginTensorDesc * outputs, int nbOutputs) const noexcept override; + int enqueue( + const nvinfer1::PluginTensorDesc * inputDesc, const nvinfer1::PluginTensorDesc * outputDesc, + const void * const * inputs, void * const * outputs, void * workspace, + cudaStream_t stream) noexcept override; + +private: + int width_; + int height_; + int num_anchors_; + std::vector anchors_; + float scale_x_y_; + float score_thresh_; + bool use_darknet_layer_; + mutable int size = -1; +}; + +class YoloLayerPluginCreator : public nvinfer1::IPluginCreator +{ +public: + YoloLayerPluginCreator(); + + const char * getPluginName() const noexcept override; + + const char * getPluginVersion() const noexcept override; + + const nvinfer1::PluginFieldCollection * getFieldNames() noexcept override; + + nvinfer1::IPluginV2DynamicExt * createPlugin( + const char * name, const nvinfer1::PluginFieldCollection * fc) noexcept override; + + nvinfer1::IPluginV2DynamicExt * deserializePlugin( + const char * name, const void * serialData, size_t serialLength) noexcept override; + + void setPluginNamespace(const char * libNamespace) noexcept override; + + const char * getPluginNamespace() const noexcept override; +}; + +REGISTER_TENSORRT_PLUGIN(YoloLayerPluginCreator); + +} // namespace yolo + +#endif // YOLO_LAYER_PLUGIN_HPP_ diff --git a/perception/tensorrt_yolo/lib/src/plugins/mish.cu b/perception/tensorrt_yolo/lib/src/plugins/mish.cu new file mode 100755 index 0000000000000..341d036cf53d0 --- /dev/null +++ b/perception/tensorrt_yolo/lib/src/plugins/mish.cu @@ -0,0 +1,71 @@ +// Copyright 2020 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * MIT License + + * Copyright (c) 2019-2020 Wang Xinyu + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include + +#include + +namespace yolo +{ +__device__ float mish(float x) +{ + float e = __expf(x); + float n = e * e + 2 * e; + if (x <= -0.6f) return x * __fdividef(n, n + 2); + + return x - 2 * __fdividef(x, n + 2); +} +template +__global__ void mishKernel(const T * input, T * output, int num_elem) +{ + int idx = threadIdx.x + TPB * blockIdx.x; + if (idx >= num_elem) return; + output[idx] = mish(input[idx]); +} + +int mish(cudaStream_t stream, const float * input, float * output, int n) +{ + constexpr int blockSize = 256; + const int gridSize = (n + blockSize - 1) / blockSize; + mishKernel<<>>(input, output, n); + return 0; +} + +} // namespace yolo diff --git a/perception/tensorrt_yolo/lib/src/plugins/mish_plugin.cpp b/perception/tensorrt_yolo/lib/src/plugins/mish_plugin.cpp new file mode 100644 index 0000000000000..efdf8540ee922 --- /dev/null +++ b/perception/tensorrt_yolo/lib/src/plugins/mish_plugin.cpp @@ -0,0 +1,235 @@ +// Copyright 2020 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * MIT License + + * Copyright (c) 2019-2020 Wang Xinyu + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include + +#include +#include +#include +#include + +using nvinfer1::DataType; +using nvinfer1::Dims; +using nvinfer1::DimsExprs; +using nvinfer1::DynamicPluginTensorDesc; +using nvinfer1::IExprBuilder; +using nvinfer1::IPluginV2DynamicExt; +using nvinfer1::PluginField; +using nvinfer1::PluginFieldCollection; +using nvinfer1::PluginFormat; +using nvinfer1::PluginTensorDesc; + +namespace +{ +const char * MISH_PLUGIN_VERSION{"1"}; +const char * MISH_PLUGIN_NAME{"Mish_TRT"}; + +inline int64_t volume(const Dims & d) +{ + int64_t v = 1; + for (int64_t i = 0; i < d.nbDims; i++) { + v *= d.d[i]; + } + return v; +} +} // namespace + +namespace yolo +{ +MishPlugin::MishPlugin() {} + +// create the plugin at runtime from a byte stream +MishPlugin::MishPlugin(const void * data, size_t length) +{ + (void)data; + (void)length; +} + +// IPluginV2 Methods + +const char * MishPlugin::getPluginType() const noexcept { return MISH_PLUGIN_NAME; } + +const char * MishPlugin::getPluginVersion() const noexcept { return MISH_PLUGIN_VERSION; } + +int MishPlugin::getNbOutputs() const noexcept { return 1; } + +int MishPlugin::initialize() noexcept { return 0; } + +void MishPlugin::terminate() noexcept {} + +size_t MishPlugin::getSerializationSize() const noexcept { return 0; } + +void MishPlugin::serialize(void * buffer) const noexcept { (void)buffer; } + +void MishPlugin::destroy() noexcept { delete this; } + +void MishPlugin::setPluginNamespace(const char * pluginNamespace) noexcept +{ + mPluginNamespace = pluginNamespace; +} + +const char * MishPlugin::getPluginNamespace() const noexcept { return mPluginNamespace; } + +// IPluginV2Ext Methods + +DataType MishPlugin::getOutputDataType( + int index, const DataType * inputTypes, int nbInputs) const noexcept +{ + (void)index; + (void)inputTypes; + (void)nbInputs; + + assert(inputTypes[0] == DataType::kFLOAT); + return inputTypes[0]; +} + +// IPluginV2DynamicExt Methods + +IPluginV2DynamicExt * MishPlugin::clone() const noexcept +{ + auto plugin = new MishPlugin(*this); + plugin->setPluginNamespace(mPluginNamespace); + return plugin; +} + +DimsExprs MishPlugin::getOutputDimensions( + int outputIndex, const DimsExprs * inputs, int nbInputs, IExprBuilder & exprBuilder) noexcept +{ + (void)outputIndex; + (void)nbInputs; + (void)exprBuilder; + + return inputs[0]; +} + +bool MishPlugin::supportsFormatCombination( + int pos, const PluginTensorDesc * inOut, int nbInputs, int nbOutputs) noexcept +{ + (void)nbInputs; + (void)nbOutputs; + + return inOut[pos].type == DataType::kFLOAT && inOut[pos].format == PluginFormat::kLINEAR; +} + +void MishPlugin::configurePlugin( + const DynamicPluginTensorDesc * in, int nbInput, const DynamicPluginTensorDesc * out, + int nbOutput) noexcept +{ + (void)in; + (void)nbInput; + (void)out; + (void)nbOutput; + + assert(nbInput == 1); + assert(nbOutput == 1); +} + +size_t MishPlugin::getWorkspaceSize( + const PluginTensorDesc * inputs, int nbInputs, const PluginTensorDesc * outputs, + int nbOutputs) const noexcept +{ + (void)inputs; + (void)nbInputs; + (void)outputs; + (void)nbOutputs; + + return 0; +} + +int MishPlugin::enqueue( + const PluginTensorDesc * inputDesc, const PluginTensorDesc * outputDesc, + const void * const * inputs, void * const * outputs, void * workspace, + cudaStream_t stream) noexcept +{ + (void)inputDesc; + (void)outputDesc; + (void)workspace; + + const int input_volume = volume(inputDesc[0].dims); + + int status = -1; + + const float * input = static_cast(inputs[0]); + float * output = static_cast(outputs[0]); + status = mish(stream, input, output, input_volume); + return status; +} + +PluginFieldCollection MishPluginCreator::mFC{}; +std::vector MishPluginCreator::mPluginAttributes; + +MishPluginCreator::MishPluginCreator() +{ + mPluginAttributes.clear(); + + mFC.nbFields = mPluginAttributes.size(); + mFC.fields = mPluginAttributes.data(); +} + +const char * MishPluginCreator::getPluginName() const noexcept { return MISH_PLUGIN_NAME; } + +const char * MishPluginCreator::getPluginVersion() const noexcept { return MISH_PLUGIN_VERSION; } + +const PluginFieldCollection * MishPluginCreator::getFieldNames() noexcept { return &mFC; } + +IPluginV2DynamicExt * MishPluginCreator::createPlugin( + const char * name, const PluginFieldCollection * fc) noexcept +{ + (void)name; + (void)fc; + + MishPlugin * obj = new MishPlugin(); + obj->setPluginNamespace(mNamespace.c_str()); + return obj; +} + +IPluginV2DynamicExt * MishPluginCreator::deserializePlugin( + const char * name, const void * serialData, size_t serialLength) noexcept +{ + (void)name; + + // This object will be deleted when the network is destroyed, which will + // call MishPlugin::destroy() + MishPlugin * obj = new MishPlugin(serialData, serialLength); + obj->setPluginNamespace(mNamespace.c_str()); + return obj; +} +} // namespace yolo diff --git a/perception/tensorrt_yolo/lib/src/plugins/nms.cu b/perception/tensorrt_yolo/lib/src/plugins/nms.cu new file mode 100755 index 0000000000000..a7c735b5ae0ce --- /dev/null +++ b/perception/tensorrt_yolo/lib/src/plugins/nms.cu @@ -0,0 +1,188 @@ +/// Copyright 2020 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#if CUDART_VERSION >= 11000 +#include +#include +#else +#include +#include +using namespace thrust::cuda_cub; +#endif + +#include +#include + +namespace yolo +{ +__global__ void nms_kernel( + const int num_per_thread, const float threshold, const int num_detections, const int * indices, + float * scores, const float * classes, const float4 * boxes) +{ + // Go through detections by descending score + for (int m = 0; m < num_detections; m++) { + for (int n = 0; n < num_per_thread; n++) { + int i = threadIdx.x * num_per_thread + n; + if (i < num_detections && m < i && scores[m] > 0.0f) { + int idx = indices[i]; + int max_idx = indices[m]; + int icls = classes[idx]; + int mcls = classes[max_idx]; + if (mcls == icls) { + float4 ibox = boxes[idx]; + float4 mbox = boxes[max_idx]; + float x1 = max(ibox.x, mbox.x); + float y1 = max(ibox.y, mbox.y); + float x2 = min(ibox.x + ibox.z, mbox.x + mbox.z); + float y2 = min(ibox.y + ibox.w, ibox.y + mbox.w); + float w = max(0.0f, x2 - x1); + float h = max(0.0f, y2 - y1); + float iarea = ibox.z * ibox.w; + float marea = mbox.z * mbox.w; + float inter = w * h; + float overlap = inter / (iarea + marea - inter); + if (overlap > threshold) { + scores[i] = 0.0f; + } + } + } + } + + // Sync discarded detections + __syncthreads(); + } +} + +int nms( + int batch_size, const void * const * inputs, void * const * outputs, size_t count, + int detections_per_im, float nms_thresh, void * workspace, size_t workspace_size, + cudaStream_t stream) +{ + if (!workspace || !workspace_size) { + // Return required scratch space size cub style + workspace_size = cuda::get_size_aligned(count); // flags + workspace_size += cuda::get_size_aligned(count); // indices + workspace_size += cuda::get_size_aligned(count); // indices_sorted + workspace_size += cuda::get_size_aligned(count); // scores + workspace_size += cuda::get_size_aligned(count); // scores_sorted + + size_t temp_size_flag = 0; + cub::DeviceSelect::Flagged( + (void *)nullptr, temp_size_flag, cub::CountingInputIterator(count), (bool *)nullptr, + (int *)nullptr, (int *)nullptr, count); + size_t temp_size_sort = 0; + cub::DeviceRadixSort::SortPairsDescending( + (void *)nullptr, temp_size_sort, (float *)nullptr, (float *)nullptr, (int *)nullptr, + (int *)nullptr, count); + workspace_size += std::max(temp_size_flag, temp_size_sort); + + return workspace_size; + } + + auto on_stream = thrust::cuda::par.on(stream); + + auto flags = cuda::get_next_ptr(count, workspace, workspace_size); + auto indices = cuda::get_next_ptr(count, workspace, workspace_size); + auto indices_sorted = cuda::get_next_ptr(count, workspace, workspace_size); + auto scores = cuda::get_next_ptr(count, workspace, workspace_size); + auto scores_sorted = cuda::get_next_ptr(count, workspace, workspace_size); + + for (int batch = 0; batch < batch_size; batch++) { + auto in_scores = static_cast(inputs[0]) + batch * count; + auto in_boxes = static_cast(inputs[1]) + batch * count; + auto in_classes = static_cast(inputs[2]) + batch * count; + + auto out_scores = static_cast(outputs[0]) + batch * detections_per_im; + auto out_boxes = static_cast(outputs[1]) + batch * detections_per_im; + auto out_classes = static_cast(outputs[2]) + batch * detections_per_im; + + // Discard null scores + thrust::transform( + on_stream, in_scores, in_scores + count, flags, thrust::placeholders::_1 > 0.0f); + + int * num_selected = reinterpret_cast(indices_sorted); + cub::DeviceSelect::Flagged( + workspace, workspace_size, cub::CountingInputIterator(0), flags, indices, num_selected, + count, stream); + cudaStreamSynchronize(stream); + int num_detections = *thrust::device_pointer_cast(num_selected); + + // Sort scores and corresponding indices + thrust::gather(on_stream, indices, indices + num_detections, in_scores, scores); + cub::DeviceRadixSort::SortPairsDescending( + workspace, workspace_size, scores, scores_sorted, indices, indices_sorted, num_detections, 0, + sizeof(*scores) * 8, stream); + + // Launch actual NMS kernel - 1 block with each thread handling n detections + const int max_threads = 1024; + int num_per_thread = ceil((float)num_detections / max_threads); + nms_kernel<<<1, max_threads, 0, stream>>>( + num_per_thread, nms_thresh, num_detections, indices_sorted, scores_sorted, in_classes, + in_boxes); + + // Re-sort with updated scores + cub::DeviceRadixSort::SortPairsDescending( + workspace, workspace_size, scores_sorted, scores, indices_sorted, indices, num_detections, 0, + sizeof(*scores) * 8, stream); + + // Gather filtered scores, boxes, classes + num_detections = min(detections_per_im, num_detections); + cudaMemcpyAsync( + out_scores, scores, num_detections * sizeof *scores, cudaMemcpyDeviceToDevice, stream); + if (num_detections < detections_per_im) { + thrust::fill_n(on_stream, out_scores + num_detections, detections_per_im - num_detections, 0); + } + thrust::gather(on_stream, indices, indices + num_detections, in_boxes, out_boxes); + thrust::gather(on_stream, indices, indices + num_detections, in_classes, out_classes); + } + + return 0; +} + +} // namespace yolo diff --git a/perception/tensorrt_yolo/lib/src/plugins/nms_plugin.cpp b/perception/tensorrt_yolo/lib/src/plugins/nms_plugin.cpp new file mode 100644 index 0000000000000..783e8d83f7a01 --- /dev/null +++ b/perception/tensorrt_yolo/lib/src/plugins/nms_plugin.cpp @@ -0,0 +1,245 @@ +// Copyright 2020 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include +#include +#include + +#include +#include + +using nvinfer1::DataType; +using nvinfer1::DimsExprs; +using nvinfer1::DynamicPluginTensorDesc; +using nvinfer1::IExprBuilder; +using nvinfer1::IPluginV2DynamicExt; +using nvinfer1::PluginFieldCollection; +using nvinfer1::PluginFormat; +using nvinfer1::PluginTensorDesc; + +namespace +{ +const char * NMS_PLUGIN_VERSION{"1"}; +const char * NMS_PLUGIN_NAME{"YOLO_NMS_TRT"}; +const char * NMS_PLUGIN_NAMESPACE{""}; + +template +void write(char *& buffer, const T & val) +{ + *reinterpret_cast(buffer) = val; + buffer += sizeof(T); +} + +template +void read(const char *& buffer, T & val) +{ + val = *reinterpret_cast(buffer); + buffer += sizeof(T); +} + +} // namespace + +namespace yolo +{ +NMSPlugin::NMSPlugin(float nms_thresh, int detections_per_im) +: nms_thresh_(nms_thresh), detections_per_im_(detections_per_im) +{ + assert(nms_thresh > 0); + assert(detections_per_im > 0); +} + +NMSPlugin::NMSPlugin(float nms_thresh, int detections_per_im, size_t count) +: nms_thresh_(nms_thresh), detections_per_im_(detections_per_im), count_(count) +{ + assert(nms_thresh > 0); + assert(detections_per_im > 0); + assert(count > 0); +} + +NMSPlugin::NMSPlugin(void const * data, size_t length) +{ + (void)length; + + const char * d = static_cast(data); + read(d, nms_thresh_); + read(d, detections_per_im_); + read(d, count_); +} + +const char * NMSPlugin::getPluginType() const noexcept { return NMS_PLUGIN_NAME; } + +const char * NMSPlugin::getPluginVersion() const noexcept { return NMS_PLUGIN_VERSION; } + +int NMSPlugin::getNbOutputs() const noexcept { return 3; } + +int NMSPlugin::initialize() noexcept { return 0; } + +void NMSPlugin::terminate() noexcept {} + +size_t NMSPlugin::getSerializationSize() const noexcept +{ + return sizeof(nms_thresh_) + sizeof(detections_per_im_) + sizeof(count_); +} + +void NMSPlugin::serialize(void * buffer) const noexcept +{ + char * d = static_cast(buffer); + write(d, nms_thresh_); + write(d, detections_per_im_); + write(d, count_); +} + +void NMSPlugin::destroy() noexcept { delete this; } + +void NMSPlugin::setPluginNamespace(const char * N) noexcept { (void)N; } + +const char * NMSPlugin::getPluginNamespace() const noexcept { return NMS_PLUGIN_NAMESPACE; } + +// IPluginV2Ext Methods + +DataType NMSPlugin::getOutputDataType( + int index, const DataType * inputTypes, int nbInputs) const noexcept +{ + (void)index; + (void)inputTypes; + (void)nbInputs; + + assert(index < 3); + return DataType::kFLOAT; +} + +// IPluginV2DynamicExt Methods + +IPluginV2DynamicExt * NMSPlugin::clone() const noexcept +{ + return new NMSPlugin(nms_thresh_, detections_per_im_, count_); +} + +DimsExprs NMSPlugin::getOutputDimensions( + int outputIndex, const DimsExprs * inputs, int nbInputs, IExprBuilder & exprBuilder) noexcept +{ + (void)nbInputs; + + DimsExprs output(inputs[0]); + output.d[1] = exprBuilder.constant(detections_per_im_ * (outputIndex == 1 ? 4 : 1)); + output.d[2] = exprBuilder.constant(1); + output.d[3] = exprBuilder.constant(1); + return output; +} + +bool NMSPlugin::supportsFormatCombination( + int pos, const PluginTensorDesc * inOut, int nbInputs, int nbOutputs) noexcept +{ + (void)nbInputs; + (void)nbOutputs; + + assert(nbInputs == 3); + assert(nbOutputs == 3); + assert(pos < 6); + return inOut[pos].type == DataType::kFLOAT && inOut[pos].format == PluginFormat::kLINEAR; +} + +void NMSPlugin::configurePlugin( + const DynamicPluginTensorDesc * in, int nbInputs, const DynamicPluginTensorDesc * out, + int nbOutputs) noexcept +{ + (void)nbInputs; + (void)nbOutputs; + (void)out; + (void)nbOutputs; + + assert(nbInputs == 3); + assert(in[0].desc.dims.d[1] == in[2].desc.dims.d[1]); + assert(in[1].desc.dims.d[1] == in[2].desc.dims.d[1] * 4); + count_ = in[0].desc.dims.d[1]; +} + +size_t NMSPlugin::getWorkspaceSize( + const PluginTensorDesc * inputs, int nbInputs, const PluginTensorDesc * outputs, + int nbOutputs) const noexcept +{ + (void)nbInputs; + (void)outputs; + (void)nbOutputs; + + if (size < 0) { + size = nms( + inputs->dims.d[0], nullptr, nullptr, count_, detections_per_im_, nms_thresh_, nullptr, 0, + nullptr); + } + return size; +} + +int NMSPlugin::enqueue( + const PluginTensorDesc * inputDesc, const PluginTensorDesc * outputDesc, + const void * const * inputs, void * const * outputs, void * workspace, + cudaStream_t stream) noexcept +{ + return nms( + inputDesc->dims.d[0], inputs, outputs, count_, detections_per_im_, nms_thresh_, workspace, + getWorkspaceSize(inputDesc, 3, outputDesc, 3), stream); +} + +NMSPluginCreator::NMSPluginCreator() {} + +const char * NMSPluginCreator::getPluginName() const noexcept { return NMS_PLUGIN_NAME; } + +const char * NMSPluginCreator::getPluginVersion() const noexcept { return NMS_PLUGIN_VERSION; } + +const char * NMSPluginCreator::getPluginNamespace() const noexcept { return NMS_PLUGIN_NAMESPACE; } + +void NMSPluginCreator::setPluginNamespace(const char * N) noexcept { (void)N; } +const PluginFieldCollection * NMSPluginCreator::getFieldNames() noexcept { return nullptr; } +IPluginV2DynamicExt * NMSPluginCreator::createPlugin( + const char * name, const PluginFieldCollection * fc) noexcept +{ + (void)name; + (void)fc; + return nullptr; +} + +IPluginV2DynamicExt * NMSPluginCreator::deserializePlugin( + const char * name, const void * serialData, size_t serialLength) noexcept +{ + (void)name; + + return new NMSPlugin(serialData, serialLength); +} + +} // namespace yolo diff --git a/perception/tensorrt_yolo/lib/src/plugins/yolo_layer.cu b/perception/tensorrt_yolo/lib/src/plugins/yolo_layer.cu new file mode 100755 index 0000000000000..492efd8293a30 --- /dev/null +++ b/perception/tensorrt_yolo/lib/src/plugins/yolo_layer.cu @@ -0,0 +1,166 @@ +// Copyright 2020 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * MIT License + + * Copyright (c) 2019-2020 Wang Xinyu + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include + +#include +#include + +namespace yolo +{ +inline __device__ float sigmoid(float x) { return 1.0f / (1.0f + __expf(-x)); } + +inline __device__ float scaleSigmoid(float x, float scale) +{ + return scale * sigmoid(x) - (scale - 1.0f) * 0.5f; +} + +template +__global__ void yoloLayerKernel( + const float * input, float * out_scores, float4 * out_boxes, float * out_classes, int grid_width, + int grid_height, int num_classes, int num_anchors, const float * anchors, int input_width, + int input_height, float scale_x_y, float score_thresh, bool use_darknet_layer) +{ + int idx = threadIdx.x + TPB * blockIdx.x; + int total_grids = grid_width * grid_height; + if (idx >= total_grids * num_anchors) return; + auto out_score = (out_scores) + idx; + auto out_box = (out_boxes) + idx; + auto out_class = (out_classes) + idx; + + int anchor_idx = idx / total_grids; + idx = idx - total_grids * anchor_idx; + int info_len = 5 + num_classes; + auto cur_input = static_cast(input) + anchor_idx * (info_len * total_grids); + + int class_id; + float max_cls_logit = -CUDART_INF_F; // minus infinity + for (int i = 5; i < info_len; ++i) { + float l = cur_input[idx + i * total_grids]; + if (l > max_cls_logit) { + max_cls_logit = l; + class_id = i - 5; + } + } + float max_cls_prob = sigmoid(max_cls_logit); + float objectness = sigmoid(cur_input[idx + 4 * total_grids]); + + int row = idx / grid_width; + int col = idx % grid_width; + float x = 0, y = 0, w = 0, h = 0; + + if (use_darknet_layer) { + x = (col + scaleSigmoid(cur_input[idx + 0 * total_grids], scale_x_y)) / grid_width; // [0, 1] + y = (row + scaleSigmoid(cur_input[idx + 1 * total_grids], scale_x_y)) / grid_height; // [0, 1] + w = __expf(cur_input[idx + 2 * total_grids]) * anchors[2 * anchor_idx] / input_width; // [0, 1] + h = __expf(cur_input[idx + 3 * total_grids]) * anchors[2 * anchor_idx + 1] / + input_height; // [0, 1] + } else { + x = (col + sigmoid(cur_input[idx + 0 * total_grids]) * 2 - 0.5) / grid_width; // [0, 1] + y = (row + sigmoid(cur_input[idx + 1 * total_grids]) * 2 - 0.5) / grid_height; // [0, 1] + w = (sigmoid(cur_input[idx + 2 * total_grids]) * 2) * + (sigmoid(cur_input[idx + 2 * total_grids]) * 2) * anchors[2 * anchor_idx] / + input_width; // [0, 1] + h = (sigmoid(cur_input[idx + 3 * total_grids]) * 2) * + (sigmoid(cur_input[idx + 3 * total_grids]) * 2) * anchors[2 * anchor_idx + 1] / + input_height; // [0, 1] + } + x -= w / 2; // shift from center to top-left + y -= h / 2; + *out_box = make_float4(x, y, w, h); + *out_class = class_id; + *out_score = objectness < score_thresh ? 0.0 : max_cls_prob * objectness; +} + +int yoloLayer( + int batch_size, const void * const * inputs, void * const * outputs, int grid_width, + int grid_height, int num_classes, int num_anchors, const std::vector & anchors, + int input_width, int input_height, float scale_x_y, float score_thresh, bool use_darknet_layer, + void * workspace, size_t workspace_size, cudaStream_t stream) +{ + if (!workspace || !workspace_size) { + workspace_size = cuda::get_size_aligned(anchors.size()); + return workspace_size; + } + + auto anchors_d = cuda::get_next_ptr(anchors.size(), workspace, workspace_size); + cudaMemcpyAsync( + anchors_d, anchors.data(), anchors.size() * sizeof *anchors_d, cudaMemcpyHostToDevice, stream); + + int num_elements = num_anchors * grid_width * grid_height; + constexpr int block_size = 256; + const int grid_size = (num_elements + block_size - 1) / block_size; + for (int batch = 0; batch < batch_size; ++batch) { + auto input = static_cast(inputs[0]) + + batch * num_anchors * (num_classes + 5) * grid_width * grid_height; + auto out_scores = static_cast(outputs[0]) + batch * num_elements; + auto out_boxes = static_cast(outputs[1]) + batch * num_elements; + auto out_classes = static_cast(outputs[2]) + batch * num_elements; + yoloLayerKernel<<>>( + input, out_scores, out_boxes, out_classes, grid_width, grid_height, num_classes, num_anchors, + anchors_d, input_width, input_height, scale_x_y, score_thresh, use_darknet_layer); + } + return 0; +} + +} // namespace yolo diff --git a/perception/tensorrt_yolo/lib/src/plugins/yolo_layer_plugin.cpp b/perception/tensorrt_yolo/lib/src/plugins/yolo_layer_plugin.cpp new file mode 100644 index 0000000000000..5e73b989089c7 --- /dev/null +++ b/perception/tensorrt_yolo/lib/src/plugins/yolo_layer_plugin.cpp @@ -0,0 +1,311 @@ +// Copyright 2020 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * MIT License + + * Copyright (c) 2019-2020 Wang Xinyu + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include +#include +#include + +#include +#include +#include + +using nvinfer1::DataType; +using nvinfer1::DimsExprs; +using nvinfer1::DynamicPluginTensorDesc; +using nvinfer1::IExprBuilder; +using nvinfer1::IPluginV2DynamicExt; +using nvinfer1::PluginFieldCollection; +using nvinfer1::PluginFormat; +using nvinfer1::PluginTensorDesc; + +namespace +{ +const char * YOLO_LAYER_PLUGIN_VERSION{"1"}; +const char * YOLO_LAYER_PLUGIN_NAME{"YOLO_Layer_TRT"}; +const char * YOLO_LAYER_PLUGIN_NAMESPACE{""}; + +template +void write(char *& buffer, const T & val) +{ + *reinterpret_cast(buffer) = val; + buffer += sizeof(T); +} + +template +void read(const char *& buffer, T & val) +{ + val = *reinterpret_cast(buffer); + buffer += sizeof(T); +} +} // namespace + +namespace yolo +{ +YoloLayerPlugin::YoloLayerPlugin( + int width, int height, int num_anchors, std::vector & anchors, float scale_x_y, + float score_thresh, bool use_darknet_layer) +: width_(width), + height_(height), + num_anchors_(num_anchors), + anchors_(anchors), + scale_x_y_(scale_x_y), + score_thresh_(score_thresh), + use_darknet_layer_(use_darknet_layer) +{ +} + +// create the plugin at runtime from a byte stream +YoloLayerPlugin::YoloLayerPlugin(const void * data, size_t length) +{ + (void)length; + + const char * d = static_cast(data); + read(d, width_); + read(d, height_); + read(d, num_anchors_); + int anchor_size = num_anchors_ * 2; + while (anchor_size--) { + float val; + read(d, val); + anchors_.push_back(val); + } + read(d, scale_x_y_); + read(d, score_thresh_); + read(d, use_darknet_layer_); +} +// IPluginV2 Methods + +const char * YoloLayerPlugin::getPluginType() const noexcept { return YOLO_LAYER_PLUGIN_NAME; } + +const char * YoloLayerPlugin::getPluginVersion() const noexcept +{ + return YOLO_LAYER_PLUGIN_VERSION; +} + +int YoloLayerPlugin::getNbOutputs() const noexcept { return 3; } + +int YoloLayerPlugin::initialize() noexcept { return 0; } + +void YoloLayerPlugin::terminate() noexcept {} + +size_t YoloLayerPlugin::getSerializationSize() const noexcept +{ + return sizeof(width_) + sizeof(height_) + sizeof(num_anchors_) + + sizeof(float) * num_anchors_ * 2 + sizeof(scale_x_y_) + sizeof(score_thresh_) + + sizeof(use_darknet_layer_); +} + +void YoloLayerPlugin::serialize(void * buffer) const noexcept +{ + char * d = reinterpret_cast(buffer); + write(d, width_); + write(d, height_); + write(d, num_anchors_); + for (int i = 0; i < num_anchors_ * 2; ++i) { + write(d, anchors_[i]); + } + write(d, scale_x_y_); + write(d, score_thresh_); + write(d, use_darknet_layer_); +} + +void YoloLayerPlugin::destroy() noexcept { delete this; } + +void YoloLayerPlugin::setPluginNamespace(const char * N) noexcept { (void)N; } + +const char * YoloLayerPlugin::getPluginNamespace() const noexcept +{ + return YOLO_LAYER_PLUGIN_NAMESPACE; +} + +// IPluginV2Ext Methods + +DataType YoloLayerPlugin::getOutputDataType( + int index, const DataType * inputTypes, int nbInputs) const noexcept +{ + (void)index; + (void)inputTypes; + (void)nbInputs; + + return DataType::kFLOAT; +} + +// IPluginV2DynamicExt Methods + +IPluginV2DynamicExt * YoloLayerPlugin::clone() const noexcept { return new YoloLayerPlugin(*this); } + +DimsExprs YoloLayerPlugin::getOutputDimensions( + int outputIndex, const DimsExprs * inputs, int nbInputs, IExprBuilder & exprBuilder) noexcept +{ + (void)nbInputs; + + DimsExprs ret = inputs[0]; + ret.nbDims = 3; + const auto total_count = + ret.d[2]->getConstantValue() * ret.d[3]->getConstantValue() * num_anchors_; + ret.d[1] = exprBuilder.constant(total_count * (outputIndex == 1 ? 4 : 1)); + ret.d[2] = exprBuilder.constant(1); + ret.d[3] = exprBuilder.constant(1); + return ret; +} + +bool YoloLayerPlugin::supportsFormatCombination( + int pos, const PluginTensorDesc * inOut, int nbInputs, int nbOutputs) noexcept +{ + (void)nbInputs; + (void)nbOutputs; + + assert(nbInputs == 1); + assert(nbOutputs == 3); + assert(pos < 4); + return inOut[pos].type == DataType::kFLOAT && inOut[pos].format == PluginFormat::kLINEAR; +} + +void YoloLayerPlugin::configurePlugin( + const DynamicPluginTensorDesc * in, int nbInput, const DynamicPluginTensorDesc * out, + int nbOutput) noexcept +{ + (void)in; + (void)nbInput; + (void)out; + (void)nbOutput; + + assert(nbInput == 1); + assert(nbOutput == 3); +} + +size_t YoloLayerPlugin::getWorkspaceSize( + const PluginTensorDesc * inputs, int nbInputs, const PluginTensorDesc * outputs, + int nbOutputs) const noexcept +{ + (void)nbInputs; + (void)outputs; + (void)nbOutputs; + + if (size < 0) { + const int batch_size = inputs[0].dims.d[0]; + const int grid_width = inputs[0].dims.d[2]; + const int grid_height = inputs[0].dims.d[3]; + const int num_classes = inputs[0].dims.d[1] / num_anchors_ - 5; + size = yoloLayer( + batch_size, nullptr, nullptr, grid_width, grid_height, num_classes, num_anchors_, anchors_, + width_, height_, scale_x_y_, score_thresh_, use_darknet_layer_, nullptr, 0, nullptr); + } + return size; +} + +int YoloLayerPlugin::enqueue( + const PluginTensorDesc * inputDesc, const PluginTensorDesc * outputDesc, + const void * const * inputs, void * const * outputs, void * workspace, + cudaStream_t stream) noexcept +{ + const int batch_size = inputDesc[0].dims.d[0]; + const int grid_width = inputDesc[0].dims.d[2]; + const int grid_height = inputDesc[0].dims.d[3]; + const int num_classes = inputDesc[0].dims.d[1] / num_anchors_ - 5; + + int status = -1; + status = yoloLayer( + batch_size, inputs, outputs, grid_width, grid_height, num_classes, num_anchors_, anchors_, + width_, height_, scale_x_y_, score_thresh_, use_darknet_layer_, workspace, + getWorkspaceSize(inputDesc, 1, outputDesc, 3), stream); + return status; +} + +YoloLayerPluginCreator::YoloLayerPluginCreator() {} + +const char * YoloLayerPluginCreator::getPluginName() const noexcept +{ + return YOLO_LAYER_PLUGIN_NAME; +} + +const char * YoloLayerPluginCreator::getPluginVersion() const noexcept +{ + return YOLO_LAYER_PLUGIN_VERSION; +} + +const char * YoloLayerPluginCreator::getPluginNamespace() const noexcept +{ + return YOLO_LAYER_PLUGIN_NAMESPACE; +} + +void YoloLayerPluginCreator::setPluginNamespace(const char * N) noexcept { (void)N; } +const PluginFieldCollection * YoloLayerPluginCreator::getFieldNames() noexcept { return nullptr; } + +IPluginV2DynamicExt * YoloLayerPluginCreator::createPlugin( + const char * name, const PluginFieldCollection * fc) noexcept +{ + (void)name; + (void)fc; + + return nullptr; +} + +IPluginV2DynamicExt * YoloLayerPluginCreator::deserializePlugin( + const char * name, const void * serialData, size_t serialLength) noexcept +{ + (void)name; + + return new YoloLayerPlugin(serialData, serialLength); +} +} // namespace yolo diff --git a/perception/tensorrt_yolo/lib/src/trt_yolo.cpp b/perception/tensorrt_yolo/lib/src/trt_yolo.cpp new file mode 100644 index 0000000000000..1be5f279714ba --- /dev/null +++ b/perception/tensorrt_yolo/lib/src/trt_yolo.cpp @@ -0,0 +1,330 @@ +// Copyright 2020 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace yolo +{ +void Net::load(const std::string & path) +{ + std::ifstream file(path, std::ios::in | std::ios::binary); + file.seekg(0, file.end); + size_t size = file.tellg(); + file.seekg(0, file.beg); + + char * buffer = new char[size]; + file.read(buffer, size); + file.close(); + if (runtime_) { + engine_ = + unique_ptr(runtime_->deserializeCudaEngine(buffer, size, nullptr)); + } + delete[] buffer; +} + +bool Net::prepare() +{ + if (!engine_) { + return false; + } + context_ = unique_ptr(engine_->createExecutionContext()); + if (!context_) { + return false; + } + input_d_ = cuda::make_unique(getMaxBatchSize() * getInputSize()); + out_scores_d_ = cuda::make_unique(getMaxBatchSize() * getMaxDetections()); + out_boxes_d_ = cuda::make_unique(getMaxBatchSize() * getMaxDetections() * 4); + out_classes_d_ = cuda::make_unique(getMaxBatchSize() * getMaxDetections()); + cudaStreamCreate(&stream_); + return true; +} + +std::vector Net::preprocess( + const cv::Mat & in_img, const int c, const int w, const int h) const +{ + cv::Mat rgb; + cv::cvtColor(in_img, rgb, cv::COLOR_BGR2RGB); + + cv::resize(rgb, rgb, cv::Size(w, h)); + + cv::Mat img_float; + rgb.convertTo(img_float, CV_32FC3, 1 / 255.0); + + // HWC TO CHW + std::vector input_channels(c); + cv::split(img_float, input_channels); + + std::vector result(h * w * c); + auto data = result.data(); + int channel_length = h * w; + for (int i = 0; i < c; ++i) { + memcpy(data, input_channels[i].data, channel_length * sizeof(float)); + data += channel_length; + } + + return result; +} + +Net::Net(const std::string & path, bool verbose) +{ + Logger logger(verbose); + runtime_ = unique_ptr(nvinfer1::createInferRuntime(logger)); + load(path); + if (!prepare()) { + std::cout << "Fail to prepare engine" << std::endl; + return; + } +} + +Net::~Net() +{ + if (stream_) { + cudaStreamSynchronize(stream_); + cudaStreamDestroy(stream_); + } +} + +Net::Net( + const std::string & onnx_file_path, const std::string & precision, const int max_batch_size, + const Config & yolo_config, const std::vector & calibration_images, + const std::string & calibration_table, bool verbose, size_t workspace_size) +{ + Logger logger(verbose); + runtime_ = unique_ptr(nvinfer1::createInferRuntime(logger)); + if (!runtime_) { + std::cout << "Fail to create runtime" << std::endl; + return; + } + bool fp16 = precision.compare("FP16") == 0; + bool int8 = precision.compare("INT8") == 0; + + // Create builder + auto builder = unique_ptr(nvinfer1::createInferBuilder(logger)); + if (!builder) { + std::cout << "Fail to create builder" << std::endl; + return; + } + auto config = unique_ptr(builder->createBuilderConfig()); + if (!config) { + std::cout << "Fail to create builder config" << std::endl; + return; + } + // Allow use of FP16 layers when running in INT8 + if (fp16 || int8) { + config->setFlag(nvinfer1::BuilderFlag::kFP16); + } + config->setMaxWorkspaceSize(workspace_size); + + // Parse ONNX FCN + std::cout << "Building " << precision << " core model..." << std::endl; + const auto flag = + 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); + auto network = unique_ptr(builder->createNetworkV2(flag)); + if (!network) { + std::cout << "Fail to create network" << std::endl; + return; + } + auto parser = unique_ptr(nvonnxparser::createParser(*network, logger)); + if (!parser) { + std::cout << "Fail to create parser" << std::endl; + return; + } + + parser->parseFromFile( + onnx_file_path.c_str(), static_cast(nvinfer1::ILogger::Severity::kERROR)); + std::vector scores, boxes, classes; + const auto input = network->getInput(0); + const auto num_outputs = network->getNbOutputs(); + const auto input_dims = input->getDimensions(); + const auto input_channel = input_dims.d[1]; + const auto input_height = input_dims.d[2]; + const auto input_width = input_dims.d[3]; + for (int i = 0; i < num_outputs; ++i) { + auto output = network->getOutput(i); + std::vector anchor( + yolo_config.anchors.begin() + i * yolo_config.num_anchors * 2, + yolo_config.anchors.begin() + (i + 1) * yolo_config.num_anchors * 2); + auto yoloLayerPlugin = yolo::YoloLayerPlugin( + input_width, input_height, 3, anchor, yolo_config.scale_x_y[i], yolo_config.score_thresh, + yolo_config.use_darknet_layer); + std::vector inputs = {output}; + auto layer = network->addPluginV2(inputs.data(), inputs.size(), yoloLayerPlugin); + scores.push_back(layer->getOutput(0)); + boxes.push_back(layer->getOutput(1)); + classes.push_back(layer->getOutput(2)); + } + + // Cleanup outputs + for (int i = 0; i < num_outputs; i++) { + auto output = network->getOutput(0); + network->unmarkOutput(*output); + } + + // Concat tensors from each feature map + std::vector concat; + for (auto tensors : {scores, boxes, classes}) { + auto layer = network->addConcatenation(tensors.data(), tensors.size()); + layer->setAxis(1); + auto output = layer->getOutput(0); + concat.push_back(output); + } + + // Add NMS plugin + auto nmsPlugin = yolo::NMSPlugin(yolo_config.iou_thresh, yolo_config.detections_per_im); + auto layer = network->addPluginV2(concat.data(), concat.size(), nmsPlugin); + for (int i = 0; i < layer->getNbOutputs(); i++) { + auto output = layer->getOutput(i); + network->markOutput(*output); + } + + // create profile + auto profile = builder->createOptimizationProfile(); + profile->setDimensions( + network->getInput(0)->getName(), nvinfer1::OptProfileSelector::kMIN, + nvinfer1::Dims4{max_batch_size, input_channel, input_height, input_width}); + profile->setDimensions( + network->getInput(0)->getName(), nvinfer1::OptProfileSelector::kOPT, + nvinfer1::Dims4{max_batch_size, input_channel, input_height, input_width}); + profile->setDimensions( + network->getInput(0)->getName(), nvinfer1::OptProfileSelector::kMAX, + nvinfer1::Dims4{max_batch_size, input_channel, input_height, input_width}); + config->addOptimizationProfile(profile); + + std::unique_ptr calib{nullptr}; + if (int8) { + if (calibration_images.size() >= static_cast(max_batch_size)) { + config->setFlag(nvinfer1::BuilderFlag::kINT8); + yolo::ImageStream stream(max_batch_size, input_dims, calibration_images); + calib = std::make_unique(stream, calibration_table); + config->setInt8Calibrator(calib.get()); + } else { + std::cout << "Fail to find enough images for INT8 calibration. Build FP mode..." << std::endl; + } + } + + // Build engine + std::cout << "Applying optimizations and building TRT CUDA engine..." << std::endl; + engine_ = unique_ptr(builder->buildEngineWithConfig(*network, *config)); + if (!prepare()) { + std::cout << "Fail to prepare engine" << std::endl; + return; + } +} + +void Net::save(const std::string & path) const +{ + std::cout << "Writing to " << path << "..." << std::endl; + auto serialized = unique_ptr(engine_->serialize()); + std::ofstream file(path, std::ios::out | std::ios::binary); + file.write(reinterpret_cast(serialized->data()), serialized->size()); +} + +void Net::infer(std::vector & buffers, const int batch_size) +{ + if (!context_) { + throw std::runtime_error("Fail to create context"); + } + auto input_dims = engine_->getBindingDimensions(0); + context_->setBindingDimensions( + 0, nvinfer1::Dims4(batch_size, input_dims.d[1], input_dims.d[2], input_dims.d[3])); + context_->enqueueV2(buffers.data(), stream_, nullptr); + cudaStreamSynchronize(stream_); +} + +bool Net::detect(const cv::Mat & in_img, float * out_scores, float * out_boxes, float * out_classes) +{ + const auto input_dims = getInputDims(); + const auto input = preprocess(in_img, input_dims.at(0), input_dims.at(2), input_dims.at(1)); + CHECK_CUDA_ERROR( + cudaMemcpy(input_d_.get(), input.data(), input.size() * sizeof(float), cudaMemcpyHostToDevice)); + std::vector buffers = { + input_d_.get(), out_scores_d_.get(), out_boxes_d_.get(), out_classes_d_.get()}; + try { + infer(buffers, 1); + } catch (const std::runtime_error & e) { + return false; + } + CHECK_CUDA_ERROR(cudaMemcpyAsync( + out_scores, out_scores_d_.get(), sizeof(float) * getMaxDetections(), cudaMemcpyDeviceToHost, + stream_)); + CHECK_CUDA_ERROR(cudaMemcpyAsync( + out_boxes, out_boxes_d_.get(), sizeof(float) * 4 * getMaxDetections(), cudaMemcpyDeviceToHost, + stream_)); + CHECK_CUDA_ERROR(cudaMemcpyAsync( + out_classes, out_classes_d_.get(), sizeof(float) * getMaxDetections(), cudaMemcpyDeviceToHost, + stream_)); + cudaStreamSynchronize(stream_); + return true; +} + +std::vector Net::getInputDims() const +{ + auto dims = engine_->getBindingDimensions(0); + return {dims.d[1], dims.d[2], dims.d[3]}; +} + +int Net::getMaxBatchSize() const +{ + return engine_->getProfileDimensions(0, 0, nvinfer1::OptProfileSelector::kMAX).d[0]; +} + +int Net::getInputSize() const +{ + const auto input_dims = getInputDims(); + const auto input_size = + std::accumulate(std::begin(input_dims), std::end(input_dims), 1, std::multiplies()); + return input_size; +} + +int Net::getMaxDetections() const { return engine_->getBindingDimensions(1).d[1]; } + +} // namespace yolo diff --git a/perception/tensorrt_yolo/package.xml b/perception/tensorrt_yolo/package.xml new file mode 100755 index 0000000000000..6a5d4c486ab10 --- /dev/null +++ b/perception/tensorrt_yolo/package.xml @@ -0,0 +1,28 @@ + + + tensorrt_yolo + 0.1.0 + The tensorrt_yolo package + + Daisuke Nishimatsu + + Apache License 2.0 + + ament_cmake_auto + + autoware_auto_perception_msgs + autoware_perception_msgs + cv_bridge + image_transport + rclcpp + rclcpp_components + sensor_msgs + + ament_lint_auto + autoware_lint_common + + + ament_cmake + + + diff --git a/perception/tensorrt_yolo/src/nodelet.cpp b/perception/tensorrt_yolo/src/nodelet.cpp new file mode 100644 index 0000000000000..fa04bf29c8d94 --- /dev/null +++ b/perception/tensorrt_yolo/src/nodelet.cpp @@ -0,0 +1,212 @@ +// Copyright 2020 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tensorrt_yolo/nodelet.hpp" + +#include + +#include + +#include +#include +#include +#include +#include + +namespace +{ +std::vector getFilePath(const std::string & input_dir) +{ + glob_t globbuf; + std::vector files; + glob((input_dir + "*").c_str(), 0, NULL, &globbuf); + for (size_t i = 0; i < globbuf.gl_pathc; i++) { + files.push_back(globbuf.gl_pathv[i]); + } + globfree(&globbuf); + return files; +} +} // namespace +namespace object_recognition +{ +TensorrtYoloNodelet::TensorrtYoloNodelet(const rclcpp::NodeOptions & options) +: Node("tensorrt_yolo", options) +{ + using std::placeholders::_1; + + std::string onnx_file = declare_parameter("onnx_file", ""); + std::string engine_file = declare_parameter("engine_file", ""); + std::string label_file = declare_parameter("label_file", ""); + std::string calib_image_directory = declare_parameter("calib_image_directory", ""); + std::string calib_cache_file = declare_parameter("calib_cache_file", ""); + std::string mode = declare_parameter("mode", "FP32"); + yolo_config_.num_anchors = declare_parameter("num_anchors", 3); + auto anchors = declare_parameter( + "anchors", std::vector{ + 10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326}); + std::vector anchors_float(anchors.begin(), anchors.end()); + yolo_config_.anchors = anchors_float; + auto scale_x_y = declare_parameter("scale_x_y", std::vector{1.0, 1.0, 1.0}); + std::vector scale_x_y_float(scale_x_y.begin(), scale_x_y.end()); + yolo_config_.scale_x_y = scale_x_y_float; + yolo_config_.score_thresh = declare_parameter("score_thresh", 0.1); + yolo_config_.iou_thresh = declare_parameter("iou_thresh", 0.45); + yolo_config_.detections_per_im = declare_parameter("detections_per_im", 100); + yolo_config_.use_darknet_layer = declare_parameter("use_darknet_layer", true); + yolo_config_.ignore_thresh = declare_parameter("ignore_thresh", 0.5); + + if (!readLabelFile(label_file, &labels_)) { + RCLCPP_ERROR(this->get_logger(), "Could not find label file"); + } + std::ifstream fs(engine_file); + const auto calibration_images = getFilePath(calib_image_directory); + if (fs.is_open()) { + RCLCPP_INFO(this->get_logger(), "Found %s", engine_file.c_str()); + net_ptr_.reset(new yolo::Net(engine_file, false)); + if (net_ptr_->getMaxBatchSize() != 1) { + RCLCPP_INFO( + this->get_logger(), "Max batch size %d should be 1. Rebuild engine from file", + net_ptr_->getMaxBatchSize()); + net_ptr_.reset( + new yolo::Net(onnx_file, mode, 1, yolo_config_, calibration_images, calib_cache_file)); + net_ptr_->save(engine_file); + } + } else { + RCLCPP_INFO( + this->get_logger(), "Could not find %s, try making TensorRT engine from onnx", + engine_file.c_str()); + net_ptr_.reset( + new yolo::Net(onnx_file, mode, 1, yolo_config_, calibration_images, calib_cache_file)); + net_ptr_->save(engine_file); + } + auto timer_callback = std::bind(&TensorrtYoloNodelet::connectCb, this); + const auto period_s = 0.1; + const auto period_ns = + std::chrono::duration_cast(std::chrono::duration(period_s)); + timer_ = std::make_shared>( + this->get_clock(), period_ns, std::move(timer_callback), + this->get_node_base_interface()->get_context()); + this->get_node_timers_interface()->add_timer(timer_, nullptr); + + std::lock_guard lock(connect_mutex_); + + objects_pub_ = this->create_publisher( + "out/objects", 1); + image_pub_ = image_transport::create_publisher(this, "out/image"); + + out_scores_ = + std::make_unique(net_ptr_->getMaxBatchSize() * net_ptr_->getMaxDetections()); + out_boxes_ = + std::make_unique(net_ptr_->getMaxBatchSize() * net_ptr_->getMaxDetections() * 4); + out_classes_ = + std::make_unique(net_ptr_->getMaxBatchSize() * net_ptr_->getMaxDetections()); +} + +void TensorrtYoloNodelet::connectCb() +{ + using std::placeholders::_1; + std::lock_guard lock(connect_mutex_); + if (objects_pub_->get_subscription_count() == 0 && image_pub_.getNumSubscribers() == 0) { + image_sub_.shutdown(); + } else if (!image_sub_) { + image_sub_ = image_transport::create_subscription( + this, "in/image", std::bind(&TensorrtYoloNodelet::callback, this, _1), "raw", + rmw_qos_profile_sensor_data); + } +} + +void TensorrtYoloNodelet::callback(const sensor_msgs::msg::Image::ConstSharedPtr in_image_msg) +{ + using Label = autoware_auto_perception_msgs::msg::ObjectClassification; + + autoware_perception_msgs::msg::DetectedObjectsWithFeature out_objects; + + cv_bridge::CvImagePtr in_image_ptr; + try { + in_image_ptr = cv_bridge::toCvCopy(in_image_msg, sensor_msgs::image_encodings::BGR8); + } catch (cv_bridge::Exception & e) { + RCLCPP_ERROR(this->get_logger(), "cv_bridge exception: %s", e.what()); + return; + } + if (!net_ptr_->detect( + in_image_ptr->image, out_scores_.get(), out_boxes_.get(), out_classes_.get())) { + RCLCPP_WARN(this->get_logger(), "Fail to inference"); + return; + } + const auto width = in_image_ptr->image.cols; + const auto height = in_image_ptr->image.rows; + for (int i = 0; i < yolo_config_.detections_per_im; ++i) { + if (out_scores_[i] < yolo_config_.ignore_thresh) { + break; + } + autoware_perception_msgs::msg::DetectedObjectWithFeature object; + object.feature.roi.x_offset = out_boxes_[4 * i] * width; + object.feature.roi.y_offset = out_boxes_[4 * i + 1] * height; + object.feature.roi.width = out_boxes_[4 * i + 2] * width; + object.feature.roi.height = out_boxes_[4 * i + 3] * height; + object.object.classification.emplace_back(autoware_auto_perception_msgs::build