Skip to content

Commit

Permalink
perf(ring_outlier_filter): performance tuning (autowarefoundation#3014)
Browse files Browse the repository at this point in the history
* Perforance tuning

Signed-off-by: Takahiro Ishikawa <sykwer@gmail.com>

* style(pre-commit): autofix

* parameterize

Signed-off-by: Takahiro Ishikawa <sykwer@gmail.com>

* fix bug

Signed-off-by: Takahiro Ishikawa <sykwer@gmail.com>

* add transform computation

Signed-off-by: Takahiro Ishikawa <sykwer@gmail.com>

* style(pre-commit): autofix

* fix

Signed-off-by: Takahiro Ishikawa <sykwer@gmail.com>

* fix

Signed-off-by: Takahiro Ishikawa <sykwer@gmail.com>

* fix

Co-authored-by: Shunsuke Miura <37187849+miursh@users.noreply.github.com>

* fix magic number

Signed-off-by: Takahiro Ishikawa <sykwer@gmail.com>

* style(pre-commit): autofix

* change param default value

Signed-off-by: Takahiro Ishikawa <sykwer@gmail.com>

* Revert "fix"

This reverts commit 1463b7d.

---------

Signed-off-by: Takahiro Ishikawa <sykwer@gmail.com>
  • Loading branch information
sykwer authored May 9, 2023
1 parent f85c90b commit 81e03ae
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 68 deletions.
12 changes: 7 additions & 5 deletions sensing/pointcloud_preprocessor/docs/ring-outlier-filter.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,13 @@ This implementation inherits `pointcloud_preprocessor::Filter` class, please ref

### Core Parameters

| Name | Type | Default Value | Description |
| ------------------------- | ------ | ------------- | ----------- |
| `distance_ratio` | double | 1.03 | |
| `object_length_threshold` | double | 0.1 | |
| `num_points_threshold` | int | 4 | |
| Name | Type | Default Value | Description |
| ------------------------- | ------- | ------------- | ----------------------------------------------------------------------------------- |
| `distance_ratio` | double | 1.03 | |
| `object_length_threshold` | double | 0.1 | |
| `num_points_threshold` | int | 4 | |
| `max_rings_num` | uint_16 | 128 | |
| `max_points_num_per_ring` | size_t | 4000 | Set this value large enough such that `HFoV / resolution < max_points_num_per_ring` |

## Assumptions / Known limits

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include <point_cloud_msg_wrapper/point_cloud_msg_wrapper.hpp>

#include <utility>
#include <vector>

namespace pointcloud_preprocessor
Expand All @@ -33,10 +34,18 @@ class RingOutlierFilterComponent : public pointcloud_preprocessor::Filter
virtual void filter(
const PointCloud2ConstPtr & input, const IndicesPtr & indices, PointCloud2 & output);

// TODO(sykwer): Temporary Implementation: Remove this interface when all the filter nodes conform
// to new API
virtual void faster_filter(
const PointCloud2ConstPtr & input, const IndicesPtr & indices, PointCloud2 & output,
const TransformInfo & transform_info);

private:
double distance_ratio_;
double object_length_threshold_;
int num_points_threshold_;
uint16_t max_rings_num_;
size_t max_points_num_per_ring_;

/** \brief Parameter service callback result : needed to be hold */
OnSetParametersCallbackHandle::SharedPtr set_param_res_;
Expand All @@ -45,18 +54,18 @@ class RingOutlierFilterComponent : public pointcloud_preprocessor::Filter
rcl_interfaces::msg::SetParametersResult paramCallback(const std::vector<rclcpp::Parameter> & p);

bool isCluster(
sensor_msgs::msg::PointCloud2::SharedPtr & input_ptr,
const std::vector<std::size_t> & tmp_indices)
const PointCloud2ConstPtr & input, std::pair<int, int> data_idx_both_ends, int walk_size)
{
PointXYZI * front_pt = reinterpret_cast<PointXYZI *>(&input_ptr->data[tmp_indices.front()]);
PointXYZI * back_pt = reinterpret_cast<PointXYZI *>(&input_ptr->data[tmp_indices.back()]);

const auto x_diff = front_pt->x - back_pt->x;
const auto y_diff = front_pt->y - back_pt->y;
const auto z_diff = front_pt->z - back_pt->z;
return static_cast<int>(tmp_indices.size()) > num_points_threshold_ ||
(x_diff * x_diff) + (y_diff * y_diff) + (z_diff * z_diff) >=
object_length_threshold_ * object_length_threshold_;
if (walk_size > num_points_threshold_) return true;

auto first_point = reinterpret_cast<const PointXYZI *>(&input->data[data_idx_both_ends.first]);
auto last_point = reinterpret_cast<const PointXYZI *>(&input->data[data_idx_both_ends.second]);

const auto x = first_point->x - last_point->x;
const auto y = first_point->y - last_point->y;
const auto z = first_point->z - last_point->z;

return x * x + y * y + z * z >= object_length_threshold_ * object_length_threshold_;
}

public:
Expand Down
7 changes: 5 additions & 2 deletions sensing/pointcloud_preprocessor/src/filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
#include <pcl/io/io.h>

#include <memory>
#include <set>
#include <string>
#include <utility>
#include <vector>
Expand Down Expand Up @@ -123,8 +124,10 @@ void pointcloud_preprocessor::Filter::subscribe(const std::string & filter_name)
// TODO(sykwer): Change the corresponding node to subscribe to `faster_input_indices_callback`
// each time a child class supports the faster version.
// When all the child classes support the faster version, this workaround is deleted.
auto callback = filter_name == "CropBoxFilter" ? &Filter::faster_input_indices_callback
: &Filter::input_indices_callback;
std::set<std::string> supported_nodes = {"CropBoxFilter", "RingOutlierFilter"};
auto callback = supported_nodes.find(filter_name) != supported_nodes.end()
? &Filter::faster_input_indices_callback
: &Filter::input_indices_callback;

if (use_indices_) {
// Subscribe to the input using a filter
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,89 +37,153 @@ RingOutlierFilterComponent::RingOutlierFilterComponent(const rclcpp::NodeOptions
object_length_threshold_ =
static_cast<double>(declare_parameter("object_length_threshold", 0.1));
num_points_threshold_ = static_cast<int>(declare_parameter("num_points_threshold", 4));
max_rings_num_ = static_cast<uint16_t>(declare_parameter("max_rings_num", 128));
max_points_num_per_ring_ =
static_cast<size_t>(declare_parameter("max_points_num_per_ring", 4000));
}

using std::placeholders::_1;
set_param_res_ = this->add_on_set_parameters_callback(
std::bind(&RingOutlierFilterComponent::paramCallback, this, _1));
}

void RingOutlierFilterComponent::filter(
// TODO(sykwer): Temporary Implementation: Rename this function to `filter()` when all the filter
// nodes conform to new API. Then delete the old `filter()` defined below.
void RingOutlierFilterComponent::faster_filter(
const PointCloud2ConstPtr & input, [[maybe_unused]] const IndicesPtr & indices,
PointCloud2 & output)
PointCloud2 & output, const TransformInfo & transform_info)
{
std::scoped_lock lock(mutex_);
stop_watch_ptr_->toc("processing_time", true);
std::unordered_map<uint16_t, std::vector<std::size_t>> input_ring_map;
input_ring_map.reserve(128);
sensor_msgs::msg::PointCloud2::SharedPtr input_ptr =
std::make_shared<sensor_msgs::msg::PointCloud2>(*input);

output.point_step = sizeof(PointXYZI);
output.data.resize(output.point_step * input->width);
size_t output_size = 0;

const auto ring_offset =
input->fields.at(static_cast<size_t>(autoware_point_types::PointIndex::Ring)).offset;
for (std::size_t idx = 0U; idx < input_ptr->data.size(); idx += input_ptr->point_step) {
input_ring_map[*reinterpret_cast<uint16_t *>(&input_ptr->data[idx + ring_offset])].push_back(
idx);
}

PointCloud2Modifier<PointXYZI> output_modifier{output, input->header.frame_id};
output_modifier.reserve(input->width);

std::vector<std::size_t> tmp_indices;
tmp_indices.reserve(input->width);

const auto azimuth_offset =
input->fields.at(static_cast<size_t>(autoware_point_types::PointIndex::Azimuth)).offset;
const auto distance_offset =
input->fields.at(static_cast<size_t>(autoware_point_types::PointIndex::Distance)).offset;
for (const auto & ring_indices : input_ring_map) {
if (ring_indices.second.size() < 2) {
continue;
}

for (size_t idx = 0U; idx < ring_indices.second.size() - 1; ++idx) {
const auto & current_idx = ring_indices.second.at(idx);
const auto & next_idx = ring_indices.second.at(idx + 1);
tmp_indices.emplace_back(current_idx);
std::vector<std::vector<size_t>> ring2indices;
ring2indices.reserve(max_rings_num_);

for (uint16_t i = 0; i < max_rings_num_; i++) {
ring2indices.push_back(std::vector<size_t>());
ring2indices.back().reserve(max_points_num_per_ring_);
}

for (size_t data_idx = 0; data_idx < input->data.size(); data_idx += input->point_step) {
const uint16_t ring = *reinterpret_cast<const uint16_t *>(&input->data[data_idx + ring_offset]);
ring2indices[ring].push_back(data_idx);
}

// walk range: [walk_first_idx, walk_last_idx]
int walk_first_idx = 0;
int walk_last_idx = -1;

for (const auto & indices : ring2indices) {
if (indices.size() < 2) continue;

walk_first_idx = 0;
walk_last_idx = -1;

for (size_t idx = 0U; idx < indices.size() - 1; ++idx) {
const size_t & current_data_idx = indices[idx];
const size_t & next_data_idx = indices[idx + 1];
walk_last_idx = idx;

// if(std::abs(iter->distance - (iter+1)->distance) <= std::sqrt(iter->distance) * 0.08)
const auto current_pt_azimuth =
*reinterpret_cast<float *>(&input_ptr->data[current_idx + azimuth_offset]);
const auto next_pt_azimuth =
*reinterpret_cast<float *>(&input_ptr->data[next_idx + azimuth_offset]);
float azimuth_diff = next_pt_azimuth - current_pt_azimuth;

const float & current_azimuth =
*reinterpret_cast<const float *>(&input->data[current_data_idx + azimuth_offset]);
const float & next_azimuth =
*reinterpret_cast<const float *>(&input->data[next_data_idx + azimuth_offset]);
float azimuth_diff = next_azimuth - current_azimuth;
azimuth_diff = azimuth_diff < 0.f ? azimuth_diff + 36000.f : azimuth_diff;

const auto current_pt_distance =
*reinterpret_cast<float *>(&input_ptr->data[current_idx + distance_offset]);
const auto next_pt_distance =
*reinterpret_cast<float *>(&input_ptr->data[next_idx + distance_offset]);
const float & current_distance =
*reinterpret_cast<const float *>(&input->data[current_data_idx + distance_offset]);
const float & next_distance =
*reinterpret_cast<const float *>(&input->data[next_data_idx + distance_offset]);

if (
std::max(current_pt_distance, next_pt_distance) <
std::min(current_pt_distance, next_pt_distance) * distance_ratio_ &&
std::max(current_distance, next_distance) <
std::min(current_distance, next_distance) * distance_ratio_ &&
azimuth_diff < 100.f) {
continue;
continue; // Determined to be included in the same walk
}
if (isCluster(input_ptr, tmp_indices)) {
for (const auto & tmp_idx : tmp_indices) {
output_modifier.push_back(
std::move(*reinterpret_cast<PointXYZI *>(&input_ptr->data[tmp_idx])));

if (isCluster(
input, std::make_pair(indices[walk_first_idx], indices[walk_last_idx]),
walk_last_idx - walk_first_idx + 1)) {
for (int i = walk_first_idx; i <= walk_last_idx; i++) {
auto output_ptr = reinterpret_cast<PointXYZI *>(&output.data[output_size]);
auto input_ptr = reinterpret_cast<const PointXYZI *>(&input->data[indices[i]]);

if (transform_info.need_transform) {
Eigen::Vector4f p(input_ptr->x, input_ptr->y, input_ptr->z, 1);
p = transform_info.eigen_transform * p;
output_ptr->x = p[0];
output_ptr->y = p[1];
output_ptr->z = p[2];
output_ptr->intensity = input_ptr->intensity;
} else {
*output_ptr = *input_ptr;
}

output_size += output.point_step;
}
}
tmp_indices.clear();
}
if (tmp_indices.empty()) {
continue;

walk_first_idx = idx + 1;
}
if (isCluster(input_ptr, tmp_indices)) {
for (const auto & tmp_idx : tmp_indices) {
output_modifier.push_back(
std::move(*reinterpret_cast<PointXYZI *>(&input_ptr->data[tmp_idx])));

if (walk_first_idx > walk_last_idx) continue;

if (isCluster(
input, std::make_pair(indices[walk_first_idx], indices[walk_last_idx]),
walk_last_idx - walk_first_idx + 1)) {
for (int i = walk_first_idx; i <= walk_last_idx; i++) {
auto output_ptr = reinterpret_cast<PointXYZI *>(&output.data[output_size]);
auto input_ptr = reinterpret_cast<const PointXYZI *>(&input->data[indices[i]]);

if (transform_info.need_transform) {
Eigen::Vector4f p(input_ptr->x, input_ptr->y, input_ptr->z, 1);
p = transform_info.eigen_transform * p;
output_ptr->x = p[0];
output_ptr->y = p[1];
output_ptr->z = p[2];
output_ptr->intensity = input_ptr->intensity;
} else {
*output_ptr = *input_ptr;
}

output_size += output.point_step;
}
}
tmp_indices.clear();
}

output.data.resize(output_size);

// Note that `input->header.frame_id` is data before converted when `transform_info.need_transform
// == true`
output.header.frame_id = !tf_input_frame_.empty() ? tf_input_frame_ : tf_input_orig_frame_;

output.fields.resize(4); // x, y, z, intensity
std::copy(
input->fields.begin(),
input->fields.begin() + static_cast<size_t>(autoware_point_types::PointIndex::Intensity) + 1,
output.fields.begin());

output.height = 1;
output.is_bigendian = input->is_bigendian;
output.is_dense = input->is_dense;
output.width = static_cast<uint32_t>(output.data.size() / output.height / output.point_step);
output.row_step = static_cast<uint32_t>(output.data.size() / output.height);

// add processing time for debug
if (debug_publisher_) {
const double cyclic_time_ms = stop_watch_ptr_->toc("cyclic_time", true);
Expand All @@ -131,6 +195,17 @@ void RingOutlierFilterComponent::filter(
}
}

// TODO(sykwer): Temporary Implementation: Delete this function definition when all the filter nodes
// conform to new API
void RingOutlierFilterComponent::filter(
const PointCloud2ConstPtr & input, [[maybe_unused]] const IndicesPtr & indices,
PointCloud2 & output)
{
(void)input;
(void)indices;
(void)output;
}

rcl_interfaces::msg::SetParametersResult RingOutlierFilterComponent::paramCallback(
const std::vector<rclcpp::Parameter> & p)
{
Expand Down

0 comments on commit 81e03ae

Please sign in to comment.