From 98aaacc78db9041f5026ff85e1e01d465f8fa1f2 Mon Sep 17 00:00:00 2001 From: ismetatabay Date: Wed, 24 Jul 2024 17:09:41 +0300 Subject: [PATCH 01/11] init CUDA device option Signed-off-by: ismetatabay --- .../tensorrt_yolox/tensorrt_yolox_node.hpp | 2 ++ .../launch/yolox_s_plus_opt.launch.xml | 2 ++ .../launch/yolox_tiny.launch.xml | 2 ++ .../schema/yolox_s_plus_opt.schema.json | 8 ++++++- .../schema/yolox_tiny.schema.json | 8 ++++++- .../src/tensorrt_yolox_node.cpp | 21 +++++++++++++++++++ 6 files changed, 41 insertions(+), 2 deletions(-) diff --git a/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox_node.hpp b/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox_node.hpp index 2e317139f27c0..d45eec6afef8f 100644 --- a/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox_node.hpp +++ b/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox_node.hpp @@ -80,6 +80,7 @@ class TrtYoloXNode : public rclcpp::Node void overlapSegmentByRoi( const tensorrt_yolox::Object & object, cv::Mat & mask, const int width, const int height); int mapRoiLabel2SegLabel(const int32_t roi_label_index); + bool setCudaDeviceId(const int gpu_id); image_transport::Publisher image_pub_; image_transport::Publisher mask_pub_; @@ -96,6 +97,7 @@ class TrtYoloXNode : public rclcpp::Node bool is_roi_overlap_segment_; bool is_publish_color_mask_; float overlap_roi_score_threshold_; + int gpu_id_; // TODO(badai-nguyen): change to function std::map remap_roi_to_semantic_ = { {"UNKNOWN", 3}, // other diff --git a/perception/autoware_tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml b/perception/autoware_tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml index 0096a219c8573..dfc23ea25e5c4 100644 --- a/perception/autoware_tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml +++ b/perception/autoware_tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml @@ -4,6 +4,7 @@ + + diff --git a/perception/autoware_tensorrt_yolox/launch/yolox_tiny.launch.xml b/perception/autoware_tensorrt_yolox/launch/yolox_tiny.launch.xml index a3708d14b5392..a7075c115aa2b 100644 --- a/perception/autoware_tensorrt_yolox/launch/yolox_tiny.launch.xml +++ b/perception/autoware_tensorrt_yolox/launch/yolox_tiny.launch.xml @@ -7,6 +7,7 @@ + @@ -20,5 +21,6 @@ + diff --git a/perception/autoware_tensorrt_yolox/schema/yolox_s_plus_opt.schema.json b/perception/autoware_tensorrt_yolox/schema/yolox_s_plus_opt.schema.json index ce1ad6c2d0caf..a55158be80b7c 100644 --- a/perception/autoware_tensorrt_yolox/schema/yolox_s_plus_opt.schema.json +++ b/perception/autoware_tensorrt_yolox/schema/yolox_s_plus_opt.schema.json @@ -70,6 +70,11 @@ "default": true, "description": "If true, pre-processing is performed on GPU." }, + "gpu_id": { + "type": "integer", + "default": 0, + "description": "GPU ID for selecting CUDA device" + }, "calibration_image_list_path": { "type": "string", "default": "", @@ -88,7 +93,8 @@ "quantize_last_layer", "profile_per_layer", "clip_value", - "preprocess_on_gpu" + "preprocess_on_gpu", + "gpu_id" ] } }, diff --git a/perception/autoware_tensorrt_yolox/schema/yolox_tiny.schema.json b/perception/autoware_tensorrt_yolox/schema/yolox_tiny.schema.json index f47b28e47a3f8..707cd393d6828 100644 --- a/perception/autoware_tensorrt_yolox/schema/yolox_tiny.schema.json +++ b/perception/autoware_tensorrt_yolox/schema/yolox_tiny.schema.json @@ -70,6 +70,11 @@ "default": true, "description": "If true, pre-processing is performed on GPU." }, + "gpu_id": { + "type": "integer", + "default": 0, + "description": "GPU ID for selecting CUDA device" + }, "calibration_image_list_path": { "type": "string", "default": "", @@ -88,7 +93,8 @@ "quantize_last_layer", "profile_per_layer", "clip_value", - "preprocess_on_gpu" + "preprocess_on_gpu", + "gpu_id" ] } }, diff --git a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp index c613e7d1df52f..a2b434075ab33 100644 --- a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp +++ b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp @@ -68,6 +68,7 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options) is_roi_overlap_segment_ = declare_parameter("is_roi_overlap_segment"); is_publish_color_mask_ = declare_parameter("is_publish_color_mask"); overlap_roi_score_threshold_ = declare_parameter("overlap_roi_score_threshold"); + gpu_id_ = declare_parameter("gpu_id"); roi_overlay_segment_labels_.UNKNOWN = declare_parameter("roi_overlay_segment_label.UNKNOWN"); roi_overlay_segment_labels_.CAR = declare_parameter("roi_overlay_segment_label.CAR"); @@ -91,6 +92,10 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options) const tensorrt_common::BatchConfig batch_config{1, 1, 1}; const size_t max_workspace_size = (1 << 30); + if (!setCudaDeviceId(gpu_id_)) { + RCLCPP_ERROR(this->get_logger(), "Given GPU doesn't exist or suitable"); + } + trt_yolox_ = std::make_unique( model_path, precision, label_map_.size(), score_threshold, nms_threshold, build_config, preprocess_on_gpu, calibration_image_list_path, norm_factor, cache_dir, batch_config, @@ -129,6 +134,11 @@ void TrtYoloXNode::onConnect() void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg) { + if (!setCudaDeviceId(gpu_id_)) { + RCLCPP_ERROR(this->get_logger(), "Given GPU doesn't exist or suitable"); + return; + } + stop_watch_ptr_->toc("processing_time", true); tier4_perception_msgs::msg::DetectedObjectsWithFeature out_objects; @@ -224,6 +234,17 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg) } } +bool TrtYoloXNode::setCudaDeviceId(const int gpu_id) +{ + cudaError_t cuda_err = cudaSetDevice(gpu_id); + if (cuda_err != cudaSuccess) { + RCLCPP_ERROR(this->get_logger(), "Failed to set gpu device with id: %d ", gpu_id); + return false; + } else { + return true; + } +} + bool TrtYoloXNode::readLabelFile(const std::string & label_path) { std::ifstream label_file(label_path); From a93f5c304852bc8f678b3213a665b14bbe88fb9a Mon Sep 17 00:00:00 2001 From: ismetatabay Date: Mon, 29 Jul 2024 16:19:47 +0300 Subject: [PATCH 02/11] move GPU ID parameter to param file Signed-off-by: ismetatabay --- .../config/yolox_s_plus_opt.param.yaml | 1 + .../config/yolox_tiny.param.yaml | 1 + .../launch/multiple_yolox.launch.xml | 24 ------------------- .../launch/yolox_s_plus_opt.launch.xml | 2 -- .../launch/yolox_tiny.launch.xml | 2 -- 5 files changed, 2 insertions(+), 28 deletions(-) diff --git a/perception/autoware_tensorrt_yolox/config/yolox_s_plus_opt.param.yaml b/perception/autoware_tensorrt_yolox/config/yolox_s_plus_opt.param.yaml index d963074e51840..df01e2cb6eec6 100644 --- a/perception/autoware_tensorrt_yolox/config/yolox_s_plus_opt.param.yaml +++ b/perception/autoware_tensorrt_yolox/config/yolox_s_plus_opt.param.yaml @@ -36,4 +36,5 @@ profile_per_layer: false # If true, profiler function will be enabled. Since the profile function may affect execution speed, it is recommended to set this flag true only for development purpose. clip_value: 6.0 # If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration. preprocess_on_gpu: true # If true, pre-processing is performed on GPU. + gpu_id: 0 # GPU ID to select CUDA Device calibration_image_list_path: "" # Path to a file which contains path to images. Those images will be used for int8 quantization. diff --git a/perception/autoware_tensorrt_yolox/config/yolox_tiny.param.yaml b/perception/autoware_tensorrt_yolox/config/yolox_tiny.param.yaml index e1ece63a4940f..e8b5b38503c9b 100644 --- a/perception/autoware_tensorrt_yolox/config/yolox_tiny.param.yaml +++ b/perception/autoware_tensorrt_yolox/config/yolox_tiny.param.yaml @@ -36,4 +36,5 @@ profile_per_layer: false # If true, profiler function will be enabled. Since the profile function may affect execution speed, it is recommended to set this flag true only for development purpose. clip_value: 0.0 # If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration. preprocess_on_gpu: true # If true, pre-processing is performed on GPU. + gpu_id: 0 # GPU ID to select CUDA Device calibration_image_list_path: "" # Path to a file which contains path to images. Those images will be used for int8 quantization. diff --git a/perception/autoware_tensorrt_yolox/launch/multiple_yolox.launch.xml b/perception/autoware_tensorrt_yolox/launch/multiple_yolox.launch.xml index a039d25222490..a7952b12414b1 100644 --- a/perception/autoware_tensorrt_yolox/launch/multiple_yolox.launch.xml +++ b/perception/autoware_tensorrt_yolox/launch/multiple_yolox.launch.xml @@ -1,69 +1,45 @@ - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/perception/autoware_tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml b/perception/autoware_tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml index dfc23ea25e5c4..0096a219c8573 100644 --- a/perception/autoware_tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml +++ b/perception/autoware_tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml @@ -4,7 +4,6 @@ - - diff --git a/perception/autoware_tensorrt_yolox/launch/yolox_tiny.launch.xml b/perception/autoware_tensorrt_yolox/launch/yolox_tiny.launch.xml index a7075c115aa2b..a3708d14b5392 100644 --- a/perception/autoware_tensorrt_yolox/launch/yolox_tiny.launch.xml +++ b/perception/autoware_tensorrt_yolox/launch/yolox_tiny.launch.xml @@ -7,7 +7,6 @@ - @@ -21,6 +20,5 @@ - From 48db535615dc8db5800ac757c19796b92d4141fe Mon Sep 17 00:00:00 2001 From: ismetatabay Date: Tue, 30 Jul 2024 10:04:57 +0300 Subject: [PATCH 03/11] fix the grammar in the error log Signed-off-by: ismetatabay --- .../autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp index a2b434075ab33..47ee6b7b5bb4b 100644 --- a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp +++ b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp @@ -93,7 +93,7 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options) const size_t max_workspace_size = (1 << 30); if (!setCudaDeviceId(gpu_id_)) { - RCLCPP_ERROR(this->get_logger(), "Given GPU doesn't exist or suitable"); + RCLCPP_ERROR(this->get_logger(), "GPU %d does not exist or is not suitable.", gpu_id_); } trt_yolox_ = std::make_unique( @@ -135,7 +135,7 @@ void TrtYoloXNode::onConnect() void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg) { if (!setCudaDeviceId(gpu_id_)) { - RCLCPP_ERROR(this->get_logger(), "Given GPU doesn't exist or suitable"); + RCLCPP_ERROR(this->get_logger(), "GPU %d does not exist or is not suitable.", gpu_id_); return; } From 24f11bab26538ddd3333871872bb63e45a16dd80 Mon Sep 17 00:00:00 2001 From: ismetatabay Date: Fri, 2 Aug 2024 12:22:03 +0300 Subject: [PATCH 04/11] fix member variable type and error logs Signed-off-by: ismetatabay --- .../autoware/tensorrt_yolox/tensorrt_yolox_node.hpp | 4 ++-- .../autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox_node.hpp b/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox_node.hpp index d45eec6afef8f..86c7414995922 100644 --- a/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox_node.hpp +++ b/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox_node.hpp @@ -80,7 +80,7 @@ class TrtYoloXNode : public rclcpp::Node void overlapSegmentByRoi( const tensorrt_yolox::Object & object, cv::Mat & mask, const int width, const int height); int mapRoiLabel2SegLabel(const int32_t roi_label_index); - bool setCudaDeviceId(const int gpu_id); + bool setCudaDeviceId(const uint8_t gpu_id); image_transport::Publisher image_pub_; image_transport::Publisher mask_pub_; @@ -97,7 +97,7 @@ class TrtYoloXNode : public rclcpp::Node bool is_roi_overlap_segment_; bool is_publish_color_mask_; float overlap_roi_score_threshold_; - int gpu_id_; + uint8_t gpu_id_; // TODO(badai-nguyen): change to function std::map remap_roi_to_semantic_ = { {"UNKNOWN", 3}, // other diff --git a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp index 47ee6b7b5bb4b..a5120fa56f51c 100644 --- a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp +++ b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp @@ -135,7 +135,8 @@ void TrtYoloXNode::onConnect() void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg) { if (!setCudaDeviceId(gpu_id_)) { - RCLCPP_ERROR(this->get_logger(), "GPU %d does not exist or is not suitable.", gpu_id_); + RCLCPP_ERROR_THROTTLE( + get_logger(), *get_clock(), 2000, "GPU %d does not exist or is not suitable.", gpu_id_); return; } @@ -234,11 +235,12 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg) } } -bool TrtYoloXNode::setCudaDeviceId(const int gpu_id) +bool TrtYoloXNode::setCudaDeviceId(const uint8_t gpu_id) { cudaError_t cuda_err = cudaSetDevice(gpu_id); if (cuda_err != cudaSuccess) { - RCLCPP_ERROR(this->get_logger(), "Failed to set gpu device with id: %d ", gpu_id); + RCLCPP_ERROR_THROTTLE( + get_logger(), *get_clock(), 2000, "Failed to set gpu device with id: %d ", gpu_id); return false; } else { return true; From a85d239fb556c740d9db6f7c409af547429fdbb6 Mon Sep 17 00:00:00 2001 From: ismetatabay Date: Wed, 14 Aug 2024 18:58:49 +0300 Subject: [PATCH 05/11] move code to the tensorrt_yolox Signed-off-by: ismetatabay style(pre-commit): autofix --- .../tensorrt_yolox/tensorrt_yolox.hpp | 15 ++++++++-- .../tensorrt_yolox/tensorrt_yolox_node.hpp | 2 -- .../src/tensorrt_yolox.cpp | 30 +++++++++++++++---- .../src/tensorrt_yolox_node.cpp | 26 ++-------------- .../traffic_light_fine_detector.param.yaml | 1 + .../src/traffic_light_fine_detector_node.cpp | 3 +- 6 files changed, 42 insertions(+), 35 deletions(-) diff --git a/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp b/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp index 6610476dc33fc..be7e7f6541360 100644 --- a/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp +++ b/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp @@ -89,8 +89,9 @@ class TrtYoloX const std::string & model_path, const std::string & precision, const int num_class = 8, const float score_threshold = 0.3, const float nms_threshold = 0.7, const tensorrt_common::BuildConfig build_config = tensorrt_common::BuildConfig(), - const bool use_gpu_preprocess = false, std::string calibration_image_list_file = std::string(), - const double norm_factor = 1.0, [[maybe_unused]] const std::string & cache_dir = "", + const bool use_gpu_preprocess = false, const int gpu_id = 0, + std::string calibration_image_list_file = std::string(), const double norm_factor = 1.0, + [[maybe_unused]] const std::string & cache_dir = "", const tensorrt_common::BatchConfig & batch_config = {1, 1, 1}, const size_t max_workspace_size = (1 << 30), const std::string & color_map_path = ""); /** @@ -98,6 +99,12 @@ class TrtYoloX */ ~TrtYoloX(); + /** + * @brief select cuda device for a inference + * @param[in] GPU id + */ + bool setCudaDeviceId(const uint8_t gpu_id); + /** * @brief run inference including pre-process and post-process * @param[out] objects results for object detection @@ -267,7 +274,7 @@ class TrtYoloX size_t out_elem_num_per_batch_; CudaUniquePtr out_prob_d_; - StreamUniquePtr stream_{makeCudaStream()}; + StreamUniquePtr stream_; int32_t max_detections_; std::vector scales_; @@ -280,6 +287,8 @@ class TrtYoloX // flag whether preprocess are performed on GPU bool use_gpu_preprocess_; + // GPU id for inference + uint8_t gpu_id_; // host buffer for preprocessing on GPU CudaUniquePtrHost image_buf_h_; // device buffer for preprocessing on GPU diff --git a/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox_node.hpp b/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox_node.hpp index 86c7414995922..2e317139f27c0 100644 --- a/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox_node.hpp +++ b/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox_node.hpp @@ -80,7 +80,6 @@ class TrtYoloXNode : public rclcpp::Node void overlapSegmentByRoi( const tensorrt_yolox::Object & object, cv::Mat & mask, const int width, const int height); int mapRoiLabel2SegLabel(const int32_t roi_label_index); - bool setCudaDeviceId(const uint8_t gpu_id); image_transport::Publisher image_pub_; image_transport::Publisher mask_pub_; @@ -97,7 +96,6 @@ class TrtYoloXNode : public rclcpp::Node bool is_roi_overlap_segment_; bool is_publish_color_mask_; float overlap_roi_score_threshold_; - uint8_t gpu_id_; // TODO(badai-nguyen): change to function std::map remap_roi_to_semantic_ = { {"UNKNOWN", 3}, // other diff --git a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp index 675fee64812a2..fc37cf04abb9b 100644 --- a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp +++ b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp @@ -156,16 +156,25 @@ namespace autoware::tensorrt_yolox TrtYoloX::TrtYoloX( const std::string & model_path, const std::string & precision, const int num_class, const float score_threshold, const float nms_threshold, tensorrt_common::BuildConfig build_config, - const bool use_gpu_preprocess, std::string calibration_image_list_path, const double norm_factor, - [[maybe_unused]] const std::string & cache_dir, const tensorrt_common::BatchConfig & batch_config, - const size_t max_workspace_size, const std::string & color_map_path) + const bool use_gpu_preprocess, const int gpu_id, std::string calibration_image_list_path, + const double norm_factor, [[maybe_unused]] const std::string & cache_dir, + const tensorrt_common::BatchConfig & batch_config, const size_t max_workspace_size, + const std::string & color_map_path) { + gpu_id_ = gpu_id; + std::cout << "GPU" << gpu_id_ << "is selected for the inference !" << std::endl; + if (!setCudaDeviceId(gpu_id_)) { + throw std::runtime_error( + "GPU" + std::to_string(gpu_id_) + " does not exist or is not suitable."); + } src_width_ = -1; src_height_ = -1; norm_factor_ = norm_factor; batch_size_ = batch_config[2]; multitask_ = 0; sematic_color_map_ = get_seg_colormap(color_map_path); + stream_ = makeCudaStream(); + if (precision == "int8") { if (build_config.clip_value <= 0.0) { if (calibration_image_list_path.empty()) { @@ -217,7 +226,6 @@ TrtYoloX::TrtYoloX( calibrator.reset( new tensorrt_yolox::Int8MinMaxCalibrator(stream, calibration_table, norm_factor_)); } - trt_common_ = std::make_unique( model_path, precision, std::move(calibrator), batch_config, max_workspace_size, build_config); } else { @@ -262,7 +270,6 @@ TrtYoloX::TrtYoloX( throw std::runtime_error{s.str()}; */ } - // GPU memory allocation const auto input_dims = trt_common_->getBindingDimensions(0); const auto input_size = @@ -341,6 +348,16 @@ TrtYoloX::~TrtYoloX() } } +bool TrtYoloX::setCudaDeviceId(const uint8_t gpu_id) +{ + cudaError_t cuda_err = cudaSetDevice(gpu_id); + if (cuda_err != cudaSuccess) { + return false; + } else { + return true; + } +} + void TrtYoloX::initPreprocessBuffer(int width, int height) { // if size of source input has been changed... @@ -534,6 +551,9 @@ bool TrtYoloX::doInference( const std::vector & images, ObjectArrays & objects, std::vector & masks, [[maybe_unused]] std::vector & color_masks) { + if (!setCudaDeviceId(gpu_id_)) { + return false; + } if (!trt_common_->isInitialized()) { return false; } diff --git a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp index a5120fa56f51c..344c5b9a39759 100644 --- a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp +++ b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp @@ -57,6 +57,7 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options) const bool preprocess_on_gpu = this->declare_parameter("preprocess_on_gpu"); const std::string calibration_image_list_path = this->declare_parameter("calibration_image_list_path"); + const uint8_t gpu_id = this->declare_parameter("gpu_id"); std::string color_map_path = this->declare_parameter("color_map_path"); @@ -68,7 +69,6 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options) is_roi_overlap_segment_ = declare_parameter("is_roi_overlap_segment"); is_publish_color_mask_ = declare_parameter("is_publish_color_mask"); overlap_roi_score_threshold_ = declare_parameter("overlap_roi_score_threshold"); - gpu_id_ = declare_parameter("gpu_id"); roi_overlay_segment_labels_.UNKNOWN = declare_parameter("roi_overlay_segment_label.UNKNOWN"); roi_overlay_segment_labels_.CAR = declare_parameter("roi_overlay_segment_label.CAR"); @@ -92,13 +92,9 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options) const tensorrt_common::BatchConfig batch_config{1, 1, 1}; const size_t max_workspace_size = (1 << 30); - if (!setCudaDeviceId(gpu_id_)) { - RCLCPP_ERROR(this->get_logger(), "GPU %d does not exist or is not suitable.", gpu_id_); - } - trt_yolox_ = std::make_unique( model_path, precision, label_map_.size(), score_threshold, nms_threshold, build_config, - preprocess_on_gpu, calibration_image_list_path, norm_factor, cache_dir, batch_config, + preprocess_on_gpu, gpu_id, calibration_image_list_path, norm_factor, cache_dir, batch_config, max_workspace_size, color_map_path); timer_ = @@ -134,12 +130,6 @@ void TrtYoloXNode::onConnect() void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg) { - if (!setCudaDeviceId(gpu_id_)) { - RCLCPP_ERROR_THROTTLE( - get_logger(), *get_clock(), 2000, "GPU %d does not exist or is not suitable.", gpu_id_); - return; - } - stop_watch_ptr_->toc("processing_time", true); tier4_perception_msgs::msg::DetectedObjectsWithFeature out_objects; @@ -235,18 +225,6 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg) } } -bool TrtYoloXNode::setCudaDeviceId(const uint8_t gpu_id) -{ - cudaError_t cuda_err = cudaSetDevice(gpu_id); - if (cuda_err != cudaSuccess) { - RCLCPP_ERROR_THROTTLE( - get_logger(), *get_clock(), 2000, "Failed to set gpu device with id: %d ", gpu_id); - return false; - } else { - return true; - } -} - bool TrtYoloXNode::readLabelFile(const std::string & label_path) { std::ifstream label_file(label_path); diff --git a/perception/autoware_traffic_light_fine_detector/config/traffic_light_fine_detector.param.yaml b/perception/autoware_traffic_light_fine_detector/config/traffic_light_fine_detector.param.yaml index 0265d0c4c7abb..36d4413472a0b 100644 --- a/perception/autoware_traffic_light_fine_detector/config/traffic_light_fine_detector.param.yaml +++ b/perception/autoware_traffic_light_fine_detector/config/traffic_light_fine_detector.param.yaml @@ -5,3 +5,4 @@ fine_detector_precision: fp16 fine_detector_score_thresh: 0.3 fine_detector_nms_thresh: 0.65 + gpu_id: 0 diff --git a/perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp b/perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp index fec26b762dfe9..fe21696ace354 100644 --- a/perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp +++ b/perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp @@ -60,6 +60,7 @@ TrafficLightFineDetectorNode::TrafficLightFineDetectorNode(const rclcpp::NodeOpt std::string model_path = declare_parameter("fine_detector_model_path", ""); std::string label_path = declare_parameter("fine_detector_label_path", ""); std::string precision = declare_parameter("fine_detector_precision", "fp32"); + uint8_t gpu_id = declare_parameter("gpu_id", 0); // Objects with a score lower than this value will be ignored. // This threshold will be ignored if specified model contains EfficientNMS_TRT module in it score_thresh_ = declare_parameter("fine_detector_score_thresh", 0.3); @@ -86,7 +87,7 @@ TrafficLightFineDetectorNode::TrafficLightFineDetectorNode(const rclcpp::NodeOpt trt_yolox_ = std::make_unique( model_path, precision, num_class, score_thresh_, nms_threshold, build_config, cuda_preprocess, - calib_image_list, scale, cache_dir, batch_config); + gpu_id, calib_image_list, scale, cache_dir, batch_config); using std::chrono_literals::operator""ms; timer_ = rclcpp::create_timer( From f3e2c1c4a4cfd301991ec47381c1e9c1900e1ea5 Mon Sep 17 00:00:00 2001 From: ismetatabay Date: Thu, 15 Aug 2024 19:00:34 +0300 Subject: [PATCH 06/11] improve code performance Signed-off-by: ismetatabay --- .../include/autoware/tensorrt_yolox/tensorrt_yolox.hpp | 4 ++-- perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp | 8 ++++---- .../src/traffic_light_fine_detector_node.cpp | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp b/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp index be7e7f6541360..c6c9327c29c46 100644 --- a/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp +++ b/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp @@ -89,7 +89,7 @@ class TrtYoloX const std::string & model_path, const std::string & precision, const int num_class = 8, const float score_threshold = 0.3, const float nms_threshold = 0.7, const tensorrt_common::BuildConfig build_config = tensorrt_common::BuildConfig(), - const bool use_gpu_preprocess = false, const int gpu_id = 0, + const bool use_gpu_preprocess = false, const uint8_t gpu_id = 0, std::string calibration_image_list_file = std::string(), const double norm_factor = 1.0, [[maybe_unused]] const std::string & cache_dir = "", const tensorrt_common::BatchConfig & batch_config = {1, 1, 1}, @@ -288,7 +288,7 @@ class TrtYoloX // flag whether preprocess are performed on GPU bool use_gpu_preprocess_; // GPU id for inference - uint8_t gpu_id_; + const uint8_t gpu_id_; // host buffer for preprocessing on GPU CudaUniquePtrHost image_buf_h_; // device buffer for preprocessing on GPU diff --git a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp index fc37cf04abb9b..89d3bdd438fed 100644 --- a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp +++ b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp @@ -156,13 +156,12 @@ namespace autoware::tensorrt_yolox TrtYoloX::TrtYoloX( const std::string & model_path, const std::string & precision, const int num_class, const float score_threshold, const float nms_threshold, tensorrt_common::BuildConfig build_config, - const bool use_gpu_preprocess, const int gpu_id, std::string calibration_image_list_path, + const bool use_gpu_preprocess, const uint8_t gpu_id, std::string calibration_image_list_path, const double norm_factor, [[maybe_unused]] const std::string & cache_dir, const tensorrt_common::BatchConfig & batch_config, const size_t max_workspace_size, - const std::string & color_map_path) + const std::string & color_map_path) : gpu_id_(gpu_id) { - gpu_id_ = gpu_id; - std::cout << "GPU" << gpu_id_ << "is selected for the inference !" << std::endl; + std::cout << "GPU " << gpu_id_ << " is selected for the inference!" << std::endl; if (!setCudaDeviceId(gpu_id_)) { throw std::runtime_error( "GPU" + std::to_string(gpu_id_) + " does not exist or is not suitable."); @@ -352,6 +351,7 @@ bool TrtYoloX::setCudaDeviceId(const uint8_t gpu_id) { cudaError_t cuda_err = cudaSetDevice(gpu_id); if (cuda_err != cudaSuccess) { + std::cerr << "Failed to set CUDA device: " << cudaGetErrorString(cuda_err) << std::endl; return false; } else { return true; diff --git a/perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp b/perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp index fe21696ace354..2e80ebe4fd73b 100644 --- a/perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp +++ b/perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp @@ -60,7 +60,7 @@ TrafficLightFineDetectorNode::TrafficLightFineDetectorNode(const rclcpp::NodeOpt std::string model_path = declare_parameter("fine_detector_model_path", ""); std::string label_path = declare_parameter("fine_detector_label_path", ""); std::string precision = declare_parameter("fine_detector_precision", "fp32"); - uint8_t gpu_id = declare_parameter("gpu_id", 0); + const uint8_t gpu_id = declare_parameter("gpu_id", 0); // Objects with a score lower than this value will be ignored. // This threshold will be ignored if specified model contains EfficientNMS_TRT module in it score_thresh_ = declare_parameter("fine_detector_score_thresh", 0.3); From 3d811f55320a7593fe452dcfd76539b939e583f9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 15 Aug 2024 16:02:42 +0000 Subject: [PATCH 07/11] style(pre-commit): autofix --- perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp index 89d3bdd438fed..73752bbdc5d78 100644 --- a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp +++ b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp @@ -159,7 +159,8 @@ TrtYoloX::TrtYoloX( const bool use_gpu_preprocess, const uint8_t gpu_id, std::string calibration_image_list_path, const double norm_factor, [[maybe_unused]] const std::string & cache_dir, const tensorrt_common::BatchConfig & batch_config, const size_t max_workspace_size, - const std::string & color_map_path) : gpu_id_(gpu_id) + const std::string & color_map_path) +: gpu_id_(gpu_id) { std::cout << "GPU " << gpu_id_ << " is selected for the inference!" << std::endl; if (!setCudaDeviceId(gpu_id_)) { From 1f02967d231c9fa6c6dc731671f632064cf5ef52 Mon Sep 17 00:00:00 2001 From: ismetatabay Date: Fri, 16 Aug 2024 11:07:14 +0300 Subject: [PATCH 08/11] fix print type Signed-off-by: ismetatabay --- perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp index 73752bbdc5d78..ceeb5eefc8882 100644 --- a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp +++ b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp @@ -162,7 +162,7 @@ TrtYoloX::TrtYoloX( const std::string & color_map_path) : gpu_id_(gpu_id) { - std::cout << "GPU " << gpu_id_ << " is selected for the inference!" << std::endl; + std::cout << "GPU " << std::to_string(gpu_id_) << " is selected for the inference!" << std::endl; if (!setCudaDeviceId(gpu_id_)) { throw std::runtime_error( "GPU" + std::to_string(gpu_id_) + " does not exist or is not suitable."); From dfc2aa3881cb1a4b4f2fe1508111eb5709232bf0 Mon Sep 17 00:00:00 2001 From: ismetatabay Date: Tue, 20 Aug 2024 16:35:15 +0300 Subject: [PATCH 09/11] update log mechanism Signed-off-by: ismetatabay --- .../include/autoware/tensorrt_yolox/tensorrt_yolox.hpp | 7 +++++++ perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp | 8 ++++---- .../autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp | 7 +++++++ .../src/traffic_light_fine_detector_node.cpp | 7 +++++++ 4 files changed, 25 insertions(+), 4 deletions(-) diff --git a/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp b/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp index c6c9327c29c46..6fba801d1072b 100644 --- a/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp +++ b/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp @@ -105,6 +105,11 @@ class TrtYoloX */ bool setCudaDeviceId(const uint8_t gpu_id); + /** + * @brief return a flag for gpu initialization + */ + bool isGPUInitialized() const { return is_gpu_initialized_; } + /** * @brief run inference including pre-process and post-process * @param[out] objects results for object detection @@ -289,6 +294,8 @@ class TrtYoloX bool use_gpu_preprocess_; // GPU id for inference const uint8_t gpu_id_; + // flag for gpu initialization + bool is_gpu_initialized_; // host buffer for preprocessing on GPU CudaUniquePtrHost image_buf_h_; // device buffer for preprocessing on GPU diff --git a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp index ceeb5eefc8882..fd8aaee657048 100644 --- a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp +++ b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp @@ -160,13 +160,13 @@ TrtYoloX::TrtYoloX( const double norm_factor, [[maybe_unused]] const std::string & cache_dir, const tensorrt_common::BatchConfig & batch_config, const size_t max_workspace_size, const std::string & color_map_path) -: gpu_id_(gpu_id) +: gpu_id_(gpu_id), is_gpu_initialized_(false) { - std::cout << "GPU " << std::to_string(gpu_id_) << " is selected for the inference!" << std::endl; if (!setCudaDeviceId(gpu_id_)) { - throw std::runtime_error( - "GPU" + std::to_string(gpu_id_) + " does not exist or is not suitable."); + return; } + is_gpu_initialized_ = true; + src_width_ = -1; src_height_ = -1; norm_factor_ = norm_factor; diff --git a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp index 344c5b9a39759..2455411618ceb 100644 --- a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp +++ b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox_node.cpp @@ -97,6 +97,13 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options) preprocess_on_gpu, gpu_id, calibration_image_list_path, norm_factor, cache_dir, batch_config, max_workspace_size, color_map_path); + if (!trt_yolox_->isGPUInitialized()) { + RCLCPP_ERROR(this->get_logger(), "GPU %d does not exist or is not suitable.", gpu_id); + rclcpp::shutdown(); + return; + } + RCLCPP_INFO(this->get_logger(), "GPU %d is selected for the inference!", gpu_id); + timer_ = rclcpp::create_timer(this, get_clock(), 100ms, std::bind(&TrtYoloXNode::onConnect, this)); diff --git a/perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp b/perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp index 2e80ebe4fd73b..5963efe611c00 100644 --- a/perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp +++ b/perception/autoware_traffic_light_fine_detector/src/traffic_light_fine_detector_node.cpp @@ -89,6 +89,13 @@ TrafficLightFineDetectorNode::TrafficLightFineDetectorNode(const rclcpp::NodeOpt model_path, precision, num_class, score_thresh_, nms_threshold, build_config, cuda_preprocess, gpu_id, calib_image_list, scale, cache_dir, batch_config); + if (!trt_yolox_->isGPUInitialized()) { + RCLCPP_ERROR(this->get_logger(), "GPU %d does not exist or is not suitable.", gpu_id); + rclcpp::shutdown(); + return; + } + RCLCPP_INFO(this->get_logger(), "GPU %d is selected for the inference!", gpu_id); + using std::chrono_literals::operator""ms; timer_ = rclcpp::create_timer( this, get_clock(), 100ms, std::bind(&TrafficLightFineDetectorNode::connectCb, this)); From c29d6bc3983e37f52fe2102d37c2af7ef049b13e Mon Sep 17 00:00:00 2001 From: ismetatabay Date: Fri, 23 Aug 2024 15:22:02 +0300 Subject: [PATCH 10/11] remove redundant cerr message Signed-off-by: ismetatabay --- .../src/tensorrt_yolox.cpp | 1 - .../README.md | 15 ++++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp index fd8aaee657048..b6da363a9a237 100644 --- a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp +++ b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp @@ -352,7 +352,6 @@ bool TrtYoloX::setCudaDeviceId(const uint8_t gpu_id) { cudaError_t cuda_err = cudaSetDevice(gpu_id); if (cuda_err != cudaSuccess) { - std::cerr << "Failed to set CUDA device: " << cudaGetErrorString(cuda_err) << std::endl; return false; } else { return true; diff --git a/perception/autoware_traffic_light_fine_detector/README.md b/perception/autoware_traffic_light_fine_detector/README.md index 9e69c22fdc17b..d959ce108946b 100644 --- a/perception/autoware_traffic_light_fine_detector/README.md +++ b/perception/autoware_traffic_light_fine_detector/README.md @@ -51,13 +51,14 @@ Based on the camera image and the global ROI array detected by `map_based_detect ### Node Parameters -| Name | Type | Default Value | Description | -| -------------------------- | ------ | --------------------------- | ------------------------------------------------------------------ | -| `data_path` | string | "$(env HOME)/autoware_data" | packages data and artifacts directory path | -| `fine_detector_model_path` | string | "" | The onnx file name for yolo model | -| `fine_detector_label_path` | string | "" | The label file with label names for detected objects written on it | -| `fine_detector_precision` | string | "fp32" | The inference mode: "fp32", "fp16" | -| `approximate_sync` | bool | false | Flag for whether to ues approximate sync policy | +| Name | Type | Default Value | Description | +|----------------------------|---------|-----------------------------|--------------------------------------------------------------------| +| `data_path` | string | "$(env HOME)/autoware_data" | packages data and artifacts directory path | +| `fine_detector_model_path` | string | "" | The onnx file name for yolo model | +| `fine_detector_label_path` | string | "" | The label file with label names for detected objects written on it | +| `fine_detector_precision` | string | "fp32" | The inference mode: "fp32", "fp16" | +| `approximate_sync` | bool | false | Flag for whether to ues approximate sync policy | +| `gpu_id` | integer | 0 | ID for the selecting CUDA GPU device | ## Assumptions / Known limits From 8f97a9d280626f2f74fcd7be96e044eb2db67c6a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 23 Aug 2024 12:24:28 +0000 Subject: [PATCH 11/11] style(pre-commit): autofix --- perception/autoware_traffic_light_fine_detector/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perception/autoware_traffic_light_fine_detector/README.md b/perception/autoware_traffic_light_fine_detector/README.md index d959ce108946b..35b55c84aa087 100644 --- a/perception/autoware_traffic_light_fine_detector/README.md +++ b/perception/autoware_traffic_light_fine_detector/README.md @@ -52,7 +52,7 @@ Based on the camera image and the global ROI array detected by `map_based_detect ### Node Parameters | Name | Type | Default Value | Description | -|----------------------------|---------|-----------------------------|--------------------------------------------------------------------| +| -------------------------- | ------- | --------------------------- | ------------------------------------------------------------------ | | `data_path` | string | "$(env HOME)/autoware_data" | packages data and artifacts directory path | | `fine_detector_model_path` | string | "" | The onnx file name for yolo model | | `fine_detector_label_path` | string | "" | The label file with label names for detected objects written on it |