From 534b15e683550100bca6ef14ca251a738d0321c0 Mon Sep 17 00:00:00 2001 From: kminoda Date: Sat, 5 Aug 2023 01:02:05 +0900 Subject: [PATCH] feat: add IPM projection Signed-off-by: kminoda --- CMakeLists.txt | 5 +- launch/lightnet_trt.launch.xml | 25 +++++----- modules/class_detector.cpp | 6 +++ modules/class_detector.h | 1 + modules/class_yolo_detector.hpp | 82 +++++++++++++++++---------------- package.xml | 2 + samples/sample_detector.cpp | 4 +- src/lightnet_trt_core.cpp | 15 +----- src/lightnet_trt_core.hpp | 2 +- src/lightnet_trt_node.cpp | 68 +++++++++++++++++++++------ src/lightnet_trt_node.hpp | 26 +++++++++-- src/utils.cpp | 80 ++++++++++++++++++++++++++++++++ src/utils.hpp | 38 +++++++++++++++ 13 files changed, 270 insertions(+), 84 deletions(-) create mode 100644 src/utils.cpp create mode 100644 src/utils.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 859eada..57e7808 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,6 +34,8 @@ if(BUILD_ROS2_LIGHTNET_TRT) find_package(autoware_cmake REQUIRED) autoware_package() + find_package(Eigen3 REQUIRED) + # build nodelet # TODO @@ -42,11 +44,12 @@ if(BUILD_ROS2_LIGHTNET_TRT) src/main.cpp src/lightnet_trt_node.cpp src/lightnet_trt_core.cpp + src/utils.cpp ) ament_target_dependencies(lightnet_trt_node) target_include_directories(lightnet_trt_node PRIVATE modules/ extra/) target_link_libraries(lightnet_trt_node detector) - target_link_libraries(lightnet_trt_node ${CUDA_LIBRARIES} ${OpenCV_LIBS} nvinfer nvinfer_plugin nvparsers gflags boost_system boost_filesystem "stdc++fs" "stdc++") + target_link_libraries(lightnet_trt_node ${CUDA_LIBRARIES} ${OpenCV_LIBS} Eigen3::Eigen nvinfer nvinfer_plugin nvparsers gflags boost_system boost_filesystem "stdc++fs" "stdc++") ament_auto_package(INSTALL_TO_SHARE launch diff --git a/launch/lightnet_trt.launch.xml b/launch/lightnet_trt.launch.xml index bd8d63f..bec7325 100644 --- a/launch/lightnet_trt.launch.xml +++ b/launch/lightnet_trt.launch.xml @@ -1,23 +1,26 @@ - + + + - + - - - - + + + + - - - - + + + + - + + diff --git a/modules/class_detector.cpp b/modules/class_detector.cpp index 2ba628d..5be03e9 100644 --- a/modules/class_detector.cpp +++ b/modules/class_detector.cpp @@ -50,6 +50,12 @@ void Detector::segment(const std::vector &mat_image, std::string filena _impl->_detector.segment(mat_image, filename); } +void Detector::get_mask(std::vector &mask_results) +{ + _impl->_detector.get_mask(mask_results); +} + + void Detector::draw_BBox(cv::Mat &img,Result result) { _impl->_detector.draw_BBox(img, result); diff --git a/modules/class_detector.h b/modules/class_detector.h index 831005a..ddbf5a1 100644 --- a/modules/class_detector.h +++ b/modules/class_detector.h @@ -59,6 +59,7 @@ class API Detector void init(const Config &config); void save_image(cv::Mat &img, const std::string &dir, const std::string &name); void segment(const std::vector &mat_image, std::string filename); + void get_mask(std::vector &mask_results); void detect(const std::vector &mat_image, std::vector &vec_batch_result, const bool cuda); void dump_profiling(void); void draw_BBox(cv::Mat &img, Result result); diff --git a/modules/class_yolo_detector.hpp b/modules/class_yolo_detector.hpp index 2bb72eb..9c417da 100644 --- a/modules/class_yolo_detector.hpp +++ b/modules/class_yolo_detector.hpp @@ -78,10 +78,8 @@ class YoloDectector cv::putText(img, stream.str(), cv::Point(result.rect.x, result.rect.y - 5), 0, 0.5, cv::Scalar(colormap[3*id+2], colormap[3*id+1], colormap[3*id+0]), 1); } else { cv::putText(img, stream.str(), cv::Point(result.rect.x, result.rect.y - 5), 0, 0.5, cv::Scalar(255, 0, 0), 1); - } - - - } + } + } void segment(const std::vector &vec_image, std::string filename) { @@ -96,44 +94,50 @@ class YoloDectector int width = curImage.cols; for (uint32_t j = 0; j < mask.size(); j++) { - cv::Mat resized; - - cv::resize(mask[j], resized, cv::Size(width, height), 0, 0, cv::INTER_NEAREST); - cv::addWeighted(vec_image[i], 1.0, resized, 0.5, 0.0, vec_image[i]); - - cv::namedWindow("mask" + std::to_string(j), cv::WINDOW_NORMAL); - cv::imshow("mask"+std::to_string(j), mask[j]); - if (flg_save) { - fs::path p = save_path; - p.append("segmentation"); - fs::create_directory(p); - p.append(std::to_string(j)); - fs::create_directory(p); - if (0) { - //get filenames; - } else { - std::ostringstream sout; - sout << std::setfill('0') << std::setw(6) << GLOBAL_COUNTER; - //cv::resize(mask[j], resized, cv::Size(width, height), 0, 0, cv::INTER_NEAREST); - if (filename == "") { - std::string filename = "frame_" + sout.str() + ".png"; - } - save_image(resized, p.string(), filename); - } - } - auto depthmap = _p_net->get_depthmap(segmentation); - for (uint32_t j = 0; j < depthmap.size(); j++) { - cv::imshow("depthmap"+std::to_string(j), depthmap[j]); - } - } - if (i > 0) { - GLOBAL_COUNTER++; - } - } + cv::Mat resized; + + cv::resize(mask[j], resized, cv::Size(width, height), 0, 0, cv::INTER_NEAREST); + cv::addWeighted(vec_image[i], 1.0, resized, 0.5, 0.0, vec_image[i]); + + cv::namedWindow("mask" + std::to_string(j), cv::WINDOW_NORMAL); + cv::imshow("mask"+std::to_string(j), mask[j]); + if (flg_save) { + fs::path p = save_path; + p.append("segmentation"); + fs::create_directory(p); + p.append(std::to_string(j)); + fs::create_directory(p); + if (0) { + //get filenames; + } else { + std::ostringstream sout; + sout << std::setfill('0') << std::setw(6) << GLOBAL_COUNTER; + //cv::resize(mask[j], resized, cv::Size(width, height), 0, 0, cv::INTER_NEAREST); + if (filename == "") { + std::string filename = "frame_" + sout.str() + ".png"; + } + save_image(resized, p.string(), filename); + } + } + auto depthmap = _p_net->get_depthmap(segmentation); + for (uint32_t j = 0; j < depthmap.size(); j++) { + cv::imshow("depthmap"+std::to_string(j), depthmap[j]); + } + } + if (i > 0) { + GLOBAL_COUNTER++; + } + } GLOBAL_COUNTER++; - } + void get_mask(std::vector &mask_results) + { + auto segmentation = _p_net->apply_argmax(0); + mask_results = _p_net->get_colorlbl(segmentation); + GLOBAL_COUNTER++; + } + void dump_profiling(){ _p_net->print_profiling(); } diff --git a/package.xml b/package.xml index d7a625b..16761de 100644 --- a/package.xml +++ b/package.xml @@ -16,6 +16,8 @@ cv_bridge image_transport rclcpp_components + tier4_autoware_utils + tf2_eigen ament_cmake_ros ament_lint_auto diff --git a/samples/sample_detector.cpp b/samples/sample_detector.cpp index 5c23cc2..29aa7a5 100644 --- a/samples/sample_detector.cpp +++ b/samples/sample_detector.cpp @@ -168,8 +168,8 @@ int main(int argc, char** argv) // std::cout <<"batch "<draw_BBox(batch_img[i], r); } - cv::namedWindow("image" + std::to_string(i), cv::WINDOW_NORMAL); - cv::imshow("image"+std::to_string(i), batch_img[i]); + // cv::namedWindow("image" + std::to_string(i), cv::WINDOW_NORMAL); + // cv::imshow("image"+std::to_string(i), batch_img[i]); int k = cv::waitKey(0); if (k == 32) { std::cout << "Save... " << name << std::endl; diff --git a/src/lightnet_trt_core.cpp b/src/lightnet_trt_core.cpp index 20c014d..1c28704 100644 --- a/src/lightnet_trt_core.cpp +++ b/src/lightnet_trt_core.cpp @@ -22,20 +22,9 @@ LightNetTensorRT::LightNetTensorRT(const ::Config &config) detector_->init(config); } -bool LightNetTensorRT::doInference(std::vector images) +void LightNetTensorRT::doInference(const std::vector & images, std::vector & masks) { std::vector batch_res; - detector_->detect(images, batch_res, cuda); - detector_->segment(images, ""); - - for (int i = 0; i < images.size(); i++) - { - for (const auto &r : batch_res[i]) - { - detector_->draw_BBox(images[i], r); - } - } - - return true; + detector_->get_mask(masks); } diff --git a/src/lightnet_trt_core.hpp b/src/lightnet_trt_core.hpp index 88961b7..a1b21eb 100644 --- a/src/lightnet_trt_core.hpp +++ b/src/lightnet_trt_core.hpp @@ -27,7 +27,7 @@ class LightNetTensorRT public: LightNetTensorRT(const ::Config &config); - bool doInference(std::vector images); + void doInference(const std::vector & images, std::vector & masks); std::unique_ptr detector_; const bool cuda = false; diff --git a/src/lightnet_trt_node.cpp b/src/lightnet_trt_node.cpp index e990732..386668c 100644 --- a/src/lightnet_trt_node.cpp +++ b/src/lightnet_trt_node.cpp @@ -15,26 +15,31 @@ #include "lightnet_trt_node.hpp" LightNetTensorRTNode::LightNetTensorRTNode(const rclcpp::NodeOptions &node_options) - : Node("tensorrt_lightnet", node_options) +: Node("tensorrt_lightnet", node_options), + ipm_projector_(nullptr), + base_frame_("base_link") { - using std::placeholders::_1; - using std::chrono_literals::operator""ms; + transform_listener_ = std::make_shared(this); // Initialize a detector - ::Config config = loadConfig(); + config_ = loadConfig(); RCLCPP_INFO(this->get_logger(), "Start loading YOLO"); - RCLCPP_INFO(this->get_logger(), "Model Config: %s", config.file_model_cfg.c_str()); - RCLCPP_INFO(this->get_logger(), "Model Weights: %s", config.file_model_weights.c_str()); - RCLCPP_INFO(this->get_logger(), "Input size: (%d, %d)", config.width, config.height); - lightnet_trt_ = std::make_unique(config); + RCLCPP_INFO(this->get_logger(), "Model Config: %s", config_.file_model_cfg.c_str()); + RCLCPP_INFO(this->get_logger(), "Model Weights: %s", config_.file_model_weights.c_str()); + RCLCPP_INFO(this->get_logger(), "Input size: (%d, %d)", config_.width, config_.height); + lightnet_trt_ = std::make_unique(config_); RCLCPP_INFO(this->get_logger(), "Finished loading YOLO"); image_sub_ = image_transport::create_subscription( - this, "~/in/image", std::bind(&LightNetTensorRTNode::onImage, this, _1), "raw", + this, "~/in/image", [this](const sensor_msgs::msg::Image::ConstSharedPtr msg) { onImage(msg); }, "raw", rmw_qos_profile_sensor_data); + camera_info_sub_ = this->create_subscription( + "~/in/camera_info", rclcpp::SensorDataQoS(), + [this](const sensor_msgs::msg::CameraInfo::ConstSharedPtr msg) { onCameraInfo(msg); }); image_pub_ = image_transport::create_publisher(this, "~/out/image"); + image_projected_pub_ = image_transport::create_publisher(this, "~/out/image_projected"); } ::Config LightNetTensorRTNode::loadConfig() @@ -59,19 +64,56 @@ ::Config LightNetTensorRTNode::loadConfig() return config; } +void LightNetTensorRTNode::onCameraInfo(const sensor_msgs::msg::CameraInfo::ConstSharedPtr msg) +{ + if (ipm_projector_ != nullptr) return; + const Eigen::Matrix3f intrinsic = get_intrinsic_matrix(*msg); + const Eigen::Matrix3f intrinsic_resized = resize_intrinsic_matrix(intrinsic, config_.width, config_.height); + + const std::string camera_frame = msg->header.frame_id; + geometry_msgs::msg::TransformStamped::ConstSharedPtr tf_base2cam_ptr = + transform_listener_->getLatestTransform(base_frame_, camera_frame); + if (!tf_base2cam_ptr) return; + + const Eigen::Matrix4f extrinsic = tf2::transformToEigen(tf_base2cam_ptr->transform).matrix().cast(); + ipm_projector_ = std::make_unique(intrinsic_resized, extrinsic); + return; +} + void LightNetTensorRTNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg) { RCLCPP_INFO(this->get_logger(), "Image received"); cv_bridge::CvImagePtr in_image_ptr; in_image_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::BGR8); - if (!lightnet_trt_->doInference({in_image_ptr->image})) + std::vector masks; + lightnet_trt_->doInference({in_image_ptr->image}, masks); + RCLCPP_INFO(this->get_logger(), "Inference succeeded"); + + // Publish result + cv_bridge::CvImage out_image; + out_image.header = msg->header; + out_image.encoding = sensor_msgs::image_encodings::BGR8; + out_image.image = masks[0]; + image_pub_.publish(out_image.toImageMsg()); + + // Post processing + if (!ipm_projector_) { - RCLCPP_WARN(this->get_logger(), "Inference failed."); + RCLCPP_WARN(this->get_logger(), "IPM projector is not initialized."); return; } - image_pub_.publish(in_image_ptr->toImageMsg()); - RCLCPP_INFO(this->get_logger(), "Inference succeeded"); + RCLCPP_WARN(this->get_logger(), "IPM projector running."); + + cv::Mat projected_mask; + ipm_projector_->run(masks[0], projected_mask); + + // Publish result + cv_bridge::CvImage out_projected_image; + out_projected_image.header = msg->header; + out_projected_image.encoding = sensor_msgs::image_encodings::BGR8; + out_projected_image.image = projected_mask; + image_projected_pub_.publish(out_projected_image.toImageMsg()); } #include "rclcpp_components/register_node_macro.hpp" diff --git a/src/lightnet_trt_node.hpp b/src/lightnet_trt_node.hpp index b3fb05b..cac7a3a 100644 --- a/src/lightnet_trt_node.hpp +++ b/src/lightnet_trt_node.hpp @@ -15,13 +15,18 @@ #ifndef LIGHTNET_TRT__LIGHTNET_TRT_NODE_HPP_ #define LIGHTNET_TRT__LIGHTNET_TRT_NODE_HPP_ -#include -#include #include "lightnet_trt_core.hpp" +#include "utils.hpp" -#include +#include #include +#include +#include +#include +#include + +#include class LightNetTensorRTNode : public rclcpp::Node { @@ -30,10 +35,23 @@ class LightNetTensorRTNode : public rclcpp::Node private: void onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg); + void onCameraInfo(const sensor_msgs::msg::CameraInfo::ConstSharedPtr msg); ::Config loadConfig(); - image_transport::Publisher image_pub_; + // ROS related variables image_transport::Subscriber image_sub_; + rclcpp::Subscription::SharedPtr camera_info_sub_; + image_transport::Publisher image_pub_; + image_transport::Publisher image_projected_pub_; + std::shared_ptr transform_listener_; + + // Tools std::unique_ptr lightnet_trt_; + std::unique_ptr ipm_projector_; + + // Misc + bool received_camera_info_; + const std::string base_frame_; + ::Config config_; }; #endif // LIGHTNET_TRT__LIGHTNET_TRT_NODE_HPP_ diff --git a/src/utils.cpp b/src/utils.cpp new file mode 100644 index 0000000..7e87de4 --- /dev/null +++ b/src/utils.cpp @@ -0,0 +1,80 @@ +// Copyright 2023 TIER IV +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "utils.hpp" + +Eigen::Matrix3f get_intrinsic_matrix(const sensor_msgs::msg::CameraInfo & camera_info) +{ + Eigen::Matrix3f intrinsic_matrix; + intrinsic_matrix << camera_info.k[0], camera_info.k[1], camera_info.k[2], + camera_info.k[3], camera_info.k[4], camera_info.k[5], + camera_info.k[6], camera_info.k[7], camera_info.k[8]; + return intrinsic_matrix; +} + +Eigen::Matrix3f resize_intrinsic_matrix(const Eigen::Matrix3f & intrinsic, const int resized_width, const int resized_height) +{ + Eigen::Matrix3f resized_intrinsic; + const float width_ratio = resized_width / 2.0 / intrinsic(0, 2); + const float height_ratio = resized_height / 2.0 / intrinsic(1, 2); + resized_intrinsic << intrinsic(0, 0) * width_ratio, 0, resized_width / 2, + 0, intrinsic(1, 1) * height_ratio, resized_height / 2, + 0, 0, 1; + return resized_intrinsic; +} + +// TODO: Implement this in a smarter way +IPM::IPM(const Eigen::Matrix3f & intrinsic, const Eigen::Matrix4f & extrinsic) +{ + // Define points on the ground plane in the world coordinate system + // 50 x 20 m rectangle + const Eigen::Vector4f dst_p1{55, -10, 0, 1}; + const Eigen::Vector4f dst_p2{55, 10, 0, 1}; + const Eigen::Vector4f dst_p3{5, 10, 0, 1}; + const Eigen::Vector4f dst_p4{5, -10, 0, 1}; + + // Convert the points from the world coordinate system to the camera coordinate system + const Eigen::Matrix4f extrinsic_inv = extrinsic.inverse(); + Eigen::Vector3f src_p1 = (extrinsic_inv * dst_p1).head<3>(); + Eigen::Vector3f src_p2 = (extrinsic_inv * dst_p2).head<3>(); + Eigen::Vector3f src_p3 = (extrinsic_inv * dst_p3).head<3>(); + Eigen::Vector3f src_p4 = (extrinsic_inv * dst_p4).head<3>(); + + // Project the points from 3D camera coordinate system to 2D image plane + src_p1 = intrinsic * src_p1; src_p1 /= src_p1(2); + src_p2 = intrinsic * src_p2; src_p2 /= src_p2(2); + src_p3 = intrinsic * src_p3; src_p3 /= src_p3(2); + src_p4 = intrinsic * src_p4; src_p4 /= src_p4(2); + + // Create source and destination points for cv::getPerspectiveTransform + const std::vector src_pts = { + cv::Point2f(src_p1(0), src_p1(1)), + cv::Point2f(src_p2(0), src_p2(1)), + cv::Point2f(src_p3(0), src_p3(1)), + cv::Point2f(src_p4(0), src_p4(1)) + }; + const std::vector dst_pts = { + cv::Point2f(dst_p3(1) * 12 + 320, dst_p3(0) * 12), + cv::Point2f(dst_p4(1) * 12 + 320, dst_p4(0) * 12), + cv::Point2f(dst_p1(1) * 12 + 320, dst_p1(0) * 12), + cv::Point2f(dst_p2(1) * 12 + 320, dst_p2(0) * 12) + }; + + perspective_transform_ = cv::getPerspectiveTransform(src_pts, dst_pts); +} + +void IPM::run(const cv::Mat & img, cv::Mat & output_img) +{ + cv::warpPerspective(img, output_img, perspective_transform_, cv::Size(640, 640)); +} \ No newline at end of file diff --git a/src/utils.hpp b/src/utils.hpp new file mode 100644 index 0000000..631f277 --- /dev/null +++ b/src/utils.hpp @@ -0,0 +1,38 @@ +// Copyright 2023 TIER IV +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIGHTNET_TRT__UTILS_HPP_ +#define LIGHTNET_TRT__UTILS_HPP_ + +#include + +#include +#include + +#include "sensor_msgs/msg/camera_info.hpp" +#include "geometry_msgs/msg/transform.hpp" + +Eigen::Matrix3f get_intrinsic_matrix(const sensor_msgs::msg::CameraInfo & camera_info); +Eigen::Matrix3f resize_intrinsic_matrix(const Eigen::Matrix3f & intrinsic, const int resized_width, const int resized_height); + +class IPM +{ +public: + IPM(const Eigen::Matrix3f & intrinsic, const Eigen::Matrix4f & extrinsic); + void run(const cv::Mat & img, cv::Mat & output_img); +private: + cv::Mat perspective_transform_; +}; + +#endif // LIGHTNET_TRT__UTILS_HPP_