From 81e03ae7775428808fffef25ddb08a46dac2c96b Mon Sep 17 00:00:00 2001 From: Takahiro Ishikawa Date: Tue, 9 May 2023 21:57:50 +0900 Subject: [PATCH] perf(ring_outlier_filter): performance tuning (#3014) * Perforance tuning Signed-off-by: Takahiro Ishikawa * style(pre-commit): autofix * parameterize Signed-off-by: Takahiro Ishikawa * fix bug Signed-off-by: Takahiro Ishikawa * add transform computation Signed-off-by: Takahiro Ishikawa * style(pre-commit): autofix * fix Signed-off-by: Takahiro Ishikawa * fix Signed-off-by: Takahiro Ishikawa * fix Co-authored-by: Shunsuke Miura <37187849+miursh@users.noreply.github.com> * fix magic number Signed-off-by: Takahiro Ishikawa * style(pre-commit): autofix * change param default value Signed-off-by: Takahiro Ishikawa * Revert "fix" This reverts commit 1463b7d141e61bb835f9c99bba651c51eb94fac4. --------- Signed-off-by: Takahiro Ishikawa --- .../docs/ring-outlier-filter.md | 12 +- .../ring_outlier_filter_nodelet.hpp | 31 ++-- .../pointcloud_preprocessor/src/filter.cpp | 7 +- .../ring_outlier_filter_nodelet.cpp | 175 +++++++++++++----- 4 files changed, 157 insertions(+), 68 deletions(-) diff --git a/sensing/pointcloud_preprocessor/docs/ring-outlier-filter.md b/sensing/pointcloud_preprocessor/docs/ring-outlier-filter.md index 1cd4a6f1245c9..42ca694235a00 100644 --- a/sensing/pointcloud_preprocessor/docs/ring-outlier-filter.md +++ b/sensing/pointcloud_preprocessor/docs/ring-outlier-filter.md @@ -22,11 +22,13 @@ This implementation inherits `pointcloud_preprocessor::Filter` class, please ref ### Core Parameters -| Name | Type | Default Value | Description | -| ------------------------- | ------ | ------------- | ----------- | -| `distance_ratio` | double | 1.03 | | -| `object_length_threshold` | double | 0.1 | | -| `num_points_threshold` | int | 4 | | +| Name | Type | Default Value | Description | +| ------------------------- | ------- | ------------- | ----------------------------------------------------------------------------------- | +| `distance_ratio` | double | 1.03 | | +| `object_length_threshold` | double | 0.1 | | +| `num_points_threshold` | int | 4 | | +| `max_rings_num` | uint_16 | 128 | | +| `max_points_num_per_ring` | size_t | 4000 | Set this value large enough such that `HFoV / resolution < max_points_num_per_ring` | ## Assumptions / Known limits diff --git a/sensing/pointcloud_preprocessor/include/pointcloud_preprocessor/outlier_filter/ring_outlier_filter_nodelet.hpp b/sensing/pointcloud_preprocessor/include/pointcloud_preprocessor/outlier_filter/ring_outlier_filter_nodelet.hpp index b517b2be78b47..a906eb913d863 100644 --- a/sensing/pointcloud_preprocessor/include/pointcloud_preprocessor/outlier_filter/ring_outlier_filter_nodelet.hpp +++ b/sensing/pointcloud_preprocessor/include/pointcloud_preprocessor/outlier_filter/ring_outlier_filter_nodelet.hpp @@ -20,6 +20,7 @@ #include +#include #include namespace pointcloud_preprocessor @@ -33,10 +34,18 @@ class RingOutlierFilterComponent : public pointcloud_preprocessor::Filter virtual void filter( const PointCloud2ConstPtr & input, const IndicesPtr & indices, PointCloud2 & output); + // TODO(sykwer): Temporary Implementation: Remove this interface when all the filter nodes conform + // to new API + virtual void faster_filter( + const PointCloud2ConstPtr & input, const IndicesPtr & indices, PointCloud2 & output, + const TransformInfo & transform_info); + private: double distance_ratio_; double object_length_threshold_; int num_points_threshold_; + uint16_t max_rings_num_; + size_t max_points_num_per_ring_; /** \brief Parameter service callback result : needed to be hold */ OnSetParametersCallbackHandle::SharedPtr set_param_res_; @@ -45,18 +54,18 @@ class RingOutlierFilterComponent : public pointcloud_preprocessor::Filter rcl_interfaces::msg::SetParametersResult paramCallback(const std::vector & p); bool isCluster( - sensor_msgs::msg::PointCloud2::SharedPtr & input_ptr, - const std::vector & tmp_indices) + const PointCloud2ConstPtr & input, std::pair data_idx_both_ends, int walk_size) { - PointXYZI * front_pt = reinterpret_cast(&input_ptr->data[tmp_indices.front()]); - PointXYZI * back_pt = reinterpret_cast(&input_ptr->data[tmp_indices.back()]); - - const auto x_diff = front_pt->x - back_pt->x; - const auto y_diff = front_pt->y - back_pt->y; - const auto z_diff = front_pt->z - back_pt->z; - return static_cast(tmp_indices.size()) > num_points_threshold_ || - (x_diff * x_diff) + (y_diff * y_diff) + (z_diff * z_diff) >= - object_length_threshold_ * object_length_threshold_; + if (walk_size > num_points_threshold_) return true; + + auto first_point = reinterpret_cast(&input->data[data_idx_both_ends.first]); + auto last_point = reinterpret_cast(&input->data[data_idx_both_ends.second]); + + const auto x = first_point->x - last_point->x; + const auto y = first_point->y - last_point->y; + const auto z = first_point->z - last_point->z; + + return x * x + y * y + z * z >= object_length_threshold_ * object_length_threshold_; } public: diff --git a/sensing/pointcloud_preprocessor/src/filter.cpp b/sensing/pointcloud_preprocessor/src/filter.cpp index 8864b90023dc1..f0b716c73b5f1 100644 --- a/sensing/pointcloud_preprocessor/src/filter.cpp +++ b/sensing/pointcloud_preprocessor/src/filter.cpp @@ -56,6 +56,7 @@ #include #include +#include #include #include #include @@ -123,8 +124,10 @@ void pointcloud_preprocessor::Filter::subscribe(const std::string & filter_name) // TODO(sykwer): Change the corresponding node to subscribe to `faster_input_indices_callback` // each time a child class supports the faster version. // When all the child classes support the faster version, this workaround is deleted. - auto callback = filter_name == "CropBoxFilter" ? &Filter::faster_input_indices_callback - : &Filter::input_indices_callback; + std::set supported_nodes = {"CropBoxFilter", "RingOutlierFilter"}; + auto callback = supported_nodes.find(filter_name) != supported_nodes.end() + ? &Filter::faster_input_indices_callback + : &Filter::input_indices_callback; if (use_indices_) { // Subscribe to the input using a filter diff --git a/sensing/pointcloud_preprocessor/src/outlier_filter/ring_outlier_filter_nodelet.cpp b/sensing/pointcloud_preprocessor/src/outlier_filter/ring_outlier_filter_nodelet.cpp index 65581f0c31d19..501583caed7f7 100644 --- a/sensing/pointcloud_preprocessor/src/outlier_filter/ring_outlier_filter_nodelet.cpp +++ b/sensing/pointcloud_preprocessor/src/outlier_filter/ring_outlier_filter_nodelet.cpp @@ -37,6 +37,9 @@ RingOutlierFilterComponent::RingOutlierFilterComponent(const rclcpp::NodeOptions object_length_threshold_ = static_cast(declare_parameter("object_length_threshold", 0.1)); num_points_threshold_ = static_cast(declare_parameter("num_points_threshold", 4)); + max_rings_num_ = static_cast(declare_parameter("max_rings_num", 128)); + max_points_num_per_ring_ = + static_cast(declare_parameter("max_points_num_per_ring", 4000)); } using std::placeholders::_1; @@ -44,82 +47,143 @@ RingOutlierFilterComponent::RingOutlierFilterComponent(const rclcpp::NodeOptions std::bind(&RingOutlierFilterComponent::paramCallback, this, _1)); } -void RingOutlierFilterComponent::filter( +// TODO(sykwer): Temporary Implementation: Rename this function to `filter()` when all the filter +// nodes conform to new API. Then delete the old `filter()` defined below. +void RingOutlierFilterComponent::faster_filter( const PointCloud2ConstPtr & input, [[maybe_unused]] const IndicesPtr & indices, - PointCloud2 & output) + PointCloud2 & output, const TransformInfo & transform_info) { std::scoped_lock lock(mutex_); stop_watch_ptr_->toc("processing_time", true); - std::unordered_map> input_ring_map; - input_ring_map.reserve(128); - sensor_msgs::msg::PointCloud2::SharedPtr input_ptr = - std::make_shared(*input); + + output.point_step = sizeof(PointXYZI); + output.data.resize(output.point_step * input->width); + size_t output_size = 0; const auto ring_offset = input->fields.at(static_cast(autoware_point_types::PointIndex::Ring)).offset; - for (std::size_t idx = 0U; idx < input_ptr->data.size(); idx += input_ptr->point_step) { - input_ring_map[*reinterpret_cast(&input_ptr->data[idx + ring_offset])].push_back( - idx); - } - - PointCloud2Modifier output_modifier{output, input->header.frame_id}; - output_modifier.reserve(input->width); - - std::vector tmp_indices; - tmp_indices.reserve(input->width); - const auto azimuth_offset = input->fields.at(static_cast(autoware_point_types::PointIndex::Azimuth)).offset; const auto distance_offset = input->fields.at(static_cast(autoware_point_types::PointIndex::Distance)).offset; - for (const auto & ring_indices : input_ring_map) { - if (ring_indices.second.size() < 2) { - continue; - } - for (size_t idx = 0U; idx < ring_indices.second.size() - 1; ++idx) { - const auto & current_idx = ring_indices.second.at(idx); - const auto & next_idx = ring_indices.second.at(idx + 1); - tmp_indices.emplace_back(current_idx); + std::vector> ring2indices; + ring2indices.reserve(max_rings_num_); + + for (uint16_t i = 0; i < max_rings_num_; i++) { + ring2indices.push_back(std::vector()); + ring2indices.back().reserve(max_points_num_per_ring_); + } + + for (size_t data_idx = 0; data_idx < input->data.size(); data_idx += input->point_step) { + const uint16_t ring = *reinterpret_cast(&input->data[data_idx + ring_offset]); + ring2indices[ring].push_back(data_idx); + } + + // walk range: [walk_first_idx, walk_last_idx] + int walk_first_idx = 0; + int walk_last_idx = -1; + + for (const auto & indices : ring2indices) { + if (indices.size() < 2) continue; + + walk_first_idx = 0; + walk_last_idx = -1; + + for (size_t idx = 0U; idx < indices.size() - 1; ++idx) { + const size_t & current_data_idx = indices[idx]; + const size_t & next_data_idx = indices[idx + 1]; + walk_last_idx = idx; // if(std::abs(iter->distance - (iter+1)->distance) <= std::sqrt(iter->distance) * 0.08) - const auto current_pt_azimuth = - *reinterpret_cast(&input_ptr->data[current_idx + azimuth_offset]); - const auto next_pt_azimuth = - *reinterpret_cast(&input_ptr->data[next_idx + azimuth_offset]); - float azimuth_diff = next_pt_azimuth - current_pt_azimuth; + + const float & current_azimuth = + *reinterpret_cast(&input->data[current_data_idx + azimuth_offset]); + const float & next_azimuth = + *reinterpret_cast(&input->data[next_data_idx + azimuth_offset]); + float azimuth_diff = next_azimuth - current_azimuth; azimuth_diff = azimuth_diff < 0.f ? azimuth_diff + 36000.f : azimuth_diff; - const auto current_pt_distance = - *reinterpret_cast(&input_ptr->data[current_idx + distance_offset]); - const auto next_pt_distance = - *reinterpret_cast(&input_ptr->data[next_idx + distance_offset]); + const float & current_distance = + *reinterpret_cast(&input->data[current_data_idx + distance_offset]); + const float & next_distance = + *reinterpret_cast(&input->data[next_data_idx + distance_offset]); if ( - std::max(current_pt_distance, next_pt_distance) < - std::min(current_pt_distance, next_pt_distance) * distance_ratio_ && + std::max(current_distance, next_distance) < + std::min(current_distance, next_distance) * distance_ratio_ && azimuth_diff < 100.f) { - continue; + continue; // Determined to be included in the same walk } - if (isCluster(input_ptr, tmp_indices)) { - for (const auto & tmp_idx : tmp_indices) { - output_modifier.push_back( - std::move(*reinterpret_cast(&input_ptr->data[tmp_idx]))); + + if (isCluster( + input, std::make_pair(indices[walk_first_idx], indices[walk_last_idx]), + walk_last_idx - walk_first_idx + 1)) { + for (int i = walk_first_idx; i <= walk_last_idx; i++) { + auto output_ptr = reinterpret_cast(&output.data[output_size]); + auto input_ptr = reinterpret_cast(&input->data[indices[i]]); + + if (transform_info.need_transform) { + Eigen::Vector4f p(input_ptr->x, input_ptr->y, input_ptr->z, 1); + p = transform_info.eigen_transform * p; + output_ptr->x = p[0]; + output_ptr->y = p[1]; + output_ptr->z = p[2]; + output_ptr->intensity = input_ptr->intensity; + } else { + *output_ptr = *input_ptr; + } + + output_size += output.point_step; } } - tmp_indices.clear(); - } - if (tmp_indices.empty()) { - continue; + + walk_first_idx = idx + 1; } - if (isCluster(input_ptr, tmp_indices)) { - for (const auto & tmp_idx : tmp_indices) { - output_modifier.push_back( - std::move(*reinterpret_cast(&input_ptr->data[tmp_idx]))); + + if (walk_first_idx > walk_last_idx) continue; + + if (isCluster( + input, std::make_pair(indices[walk_first_idx], indices[walk_last_idx]), + walk_last_idx - walk_first_idx + 1)) { + for (int i = walk_first_idx; i <= walk_last_idx; i++) { + auto output_ptr = reinterpret_cast(&output.data[output_size]); + auto input_ptr = reinterpret_cast(&input->data[indices[i]]); + + if (transform_info.need_transform) { + Eigen::Vector4f p(input_ptr->x, input_ptr->y, input_ptr->z, 1); + p = transform_info.eigen_transform * p; + output_ptr->x = p[0]; + output_ptr->y = p[1]; + output_ptr->z = p[2]; + output_ptr->intensity = input_ptr->intensity; + } else { + *output_ptr = *input_ptr; + } + + output_size += output.point_step; } } - tmp_indices.clear(); } + + output.data.resize(output_size); + + // Note that `input->header.frame_id` is data before converted when `transform_info.need_transform + // == true` + output.header.frame_id = !tf_input_frame_.empty() ? tf_input_frame_ : tf_input_orig_frame_; + + output.fields.resize(4); // x, y, z, intensity + std::copy( + input->fields.begin(), + input->fields.begin() + static_cast(autoware_point_types::PointIndex::Intensity) + 1, + output.fields.begin()); + + output.height = 1; + output.is_bigendian = input->is_bigendian; + output.is_dense = input->is_dense; + output.width = static_cast(output.data.size() / output.height / output.point_step); + output.row_step = static_cast(output.data.size() / output.height); + // add processing time for debug if (debug_publisher_) { const double cyclic_time_ms = stop_watch_ptr_->toc("cyclic_time", true); @@ -131,6 +195,17 @@ void RingOutlierFilterComponent::filter( } } +// TODO(sykwer): Temporary Implementation: Delete this function definition when all the filter nodes +// conform to new API +void RingOutlierFilterComponent::filter( + const PointCloud2ConstPtr & input, [[maybe_unused]] const IndicesPtr & indices, + PointCloud2 & output) +{ + (void)input; + (void)indices; + (void)output; +} + rcl_interfaces::msg::SetParametersResult RingOutlierFilterComponent::paramCallback( const std::vector & p) {