From 1b5a90bac811cf27df9cea9611195f2776a5b055 Mon Sep 17 00:00:00 2001 From: cirpue49 Date: Tue, 26 Feb 2019 15:42:25 +0900 Subject: [PATCH 01/37] Add PointPillars --- .../lidar_point_pillars/CMakeLists.txt | 119 ++++ .../include/anchor_mask_cuda.h | 84 +++ .../lidar_point_pillars/include/common.h | 45 ++ .../lidar_point_pillars/include/nms_cuda.h | 60 ++ .../include/point_pillars.h | 290 ++++++++ .../include/point_pillars_ros.h | 141 ++++ .../include/postprocess_cuda.h | 94 +++ .../include/preprocess_points.h | 118 ++++ .../include/preprocess_points_cuda.h | 116 ++++ .../include/scatter_cuda.h | 58 ++ .../launch/lidar_point_pillars.launch | 24 + .../nodes/anchor_mask_cuda.cu | 203 ++++++ .../nodes/lidar_point_pillars_node.cpp | 31 + .../lidar_point_pillars/nodes/nms_cuda.cu | 110 ++++ .../nodes/point_pillars.cpp | 621 ++++++++++++++++++ .../nodes/point_pillars_ros.cpp | 181 +++++ .../nodes/postprocess_cuda.cu | 219 ++++++ .../nodes/preprocess_points.cpp | 165 +++++ .../nodes/preprocess_points_cuda.cu | 313 +++++++++ .../lidar_point_pillars/nodes/scatter_cuda.cu | 43 ++ .../packages/lidar_point_pillars/package.xml | 24 + .../test/src/test_point_pillars.cpp | 229 +++++++ .../runtime_manager/scripts/computing.yaml | 68 +- 23 files changed, 3355 insertions(+), 1 deletion(-) create mode 100755 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/anchor_mask_cuda.h create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/common.h create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/nms_cuda.h create mode 100755 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h create mode 100755 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points_cuda.h create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/scatter_cuda.h create mode 100755 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/launch/lidar_point_pillars.launch create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu create mode 100755 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu create mode 100755 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp create mode 100755 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/scatter_cuda.cu create mode 100755 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt new file mode 100755 index 00000000000..fad7f1a04c6 --- /dev/null +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt @@ -0,0 +1,119 @@ +cmake_minimum_required(VERSION 2.8.3) +project(lidar_point_pillars) + +# set flags for CUDA and TensorRT availability +option(CUDA_AVAIL "CUDA available" OFF) +find_package(CUDA) +if (CUDA_FOUND) + message("CUDA is available!") + message("CUDA Libs: ${CUDA_LIBRARIES}") + message("CUDA Headers: ${CUDA_INCLUDE_DIRS}") + set(CUDA_AVAIL ON) +else() + message("CUDA NOT FOUND") + set(CUDA_AVAIL OFF) +endif (CUDA_FOUND) + +# try to find the tensorRT modules +option(TRT_AVAIL "TensorRT available" OFF) +find_library(NVINFER NAMES nvinfer) +find_library(NVPARSERS NAMES nvparsers) +if(NVINFER AND NVPARSERS) + message("TensorRT is available!") + message("NVINFER: ${NVINFER}") + message("NVPARSERS: ${NVPARSERS}") + set(TRT_AVAIL ON) + add_definitions(-DTRT_AVAIL) +else() + message("TensorRT is NOT Available") + set(TRT_AVAIL OFF) +endif() + +if(TRT_AVAIL AND CUDA_AVAIL) + + find_package(autoware_build_flags REQUIRED) + find_package(catkin REQUIRED COMPONENTS + roscpp + pcl_ros + autoware_msgs + ) + set(CMAKE_CXX_STANDARD 11) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") + + catkin_package( + CATKIN_DEPENDS + roscpp + pcl_ros + autoware_msgs + ) + + include_directories( + include + ${catkin_INCLUDE_DIRS} + ) + set(SOURCE_FILES + nodes/lidar_point_pillars_node.cpp + nodes/point_pillars_ros.cpp + ) + + add_executable(lidar_point_pillars + ${SOURCE_FILES}) + + add_dependencies(lidar_point_pillars + ${catkin_EXPORTED_TARGETS} + ) + + + cuda_add_library(gpu_point_pillars_lib + nodes/preprocess_points_cuda.cu + nodes/anchor_mask_cuda.cu + nodes/scatter_cuda.cu + nodes/postprocess_cuda.cu + nodes/nms_cuda.cu + ) + + target_link_libraries(gpu_point_pillars_lib + ${CUDA_LIBRARIES} + ) + + add_library(point_pillars_lib + nodes/point_pillars.cpp + nodes/preprocess_points.cpp + ) + target_link_libraries(point_pillars_lib + ${catkin_LIBRARIES} + nvinfer + nvonnxparser + gpu_point_pillars_lib + ) + + target_link_libraries(lidar_point_pillars + ${catkin_LIBRARIES} + point_pillars_lib + ) + + + install(TARGETS + lidar_point_pillars + ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} + LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} + RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION} + ) + + install(DIRECTORY launch/ + DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/launch + PATTERN ".svn" EXCLUDE) + + install(DIRECTORY include/ + DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION} + PATTERN ".svn" EXCLUDE + ) + + if (CATKIN_ENABLE_TESTING) + find_package(rostest REQUIRED) + catkin_add_gtest(test-point_pillars test/src/test_point_pillars.cpp) + target_link_libraries(test-point_pillars ${catkin_LIBRARIES} point_pillars_lib) + endif() +ELSE() + message("PointPillars won't be built, CUDA and/or TensorRT were not found.") +ENDIF () diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/anchor_mask_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/anchor_mask_cuda.h new file mode 100644 index 00000000000..ccca5cb6b6d --- /dev/null +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/anchor_mask_cuda.h @@ -0,0 +1,84 @@ +/* + * Copyright 2018-2019 Autoware Foundation. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /** + * @file anchor_mask_cuda.h + * @brief Make anchor mask for filtering output + * @author Kosuke Murakami + * @date 2019/02/26 + */ + +#ifndef ANCHOR_MASK_CUDA_H +#define ANCHOR_MASK_CUDA_H + +class AnchorMaskCuda +{ +private: + const int NUM_INDS_FOR_SCAN_; + const int NUM_ANCHOR_X_INDS_; + const int NUM_ANCHOR_Y_INDS_; + const int NUM_ANCHOR_R_INDS_; + const float MIN_X_RANGE_; + const float MIN_Y_RANGE_; + const float PILLAR_X_SIZE_; + const float PILLAR_Y_SIZE_; + const int GRID_X_SIZE_; + const int GRID_Y_SIZE_; +public: + /** + * @brief Constructor + * @param[in] NUM_INDS_FOR_SCAN Number of indexes for scan(cumsum) + * @param[in] NUM_ANCHOR_X_INDS Number of x-indexes for anchors + * @param[in] NUM_ANCHOR_Y_INDS Number of y-indexes for anchors + * @param[in] NUM_ANCHOR_R_INDS Number of rotation-indexes for anchors + * @param[in] MIN_X_RANGE Minimum x value for pointcloud + * @param[in] MIN_Y_RANGE Minimum y value for pointcloud + * @param[in] PILLAR_X_SIZE Size of x-dimension for a pillar + * @param[in] PILLAR_Y_SIZE Size of y-dimension for a pillar + * @param[in] GRID_X_SIZE Number of pillars in x-coordinate + * @param[in] GRID_Y_SIZE Number of pillars in y-coordinate + * @details Captital variables never change after the compile + */ + AnchorMaskCuda(const int NUM_INDS_FOR_SCAN, + const int NUM_ANCHOR_X_INDS, + const int NUM_ANCHOR_Y_INDS, + const int NUM_ANCHOR_R_INDS, + const float MIN_X_RANGE, + const float MIN_Y_RANGE, + const float PILLAR_X_SIZE, + const float PILLAR_Y_SIZE, + const int GRID_X_SIZE, + const int GRID_Y_SIZE); + + /** + * @brief call cuda code for making anchor mask + * @param[in] dev_sparse_pillar_map Grid map representation for pillar occupancy + * @param[in] dev_cumsum_along_x Cumsum dev_sparse_pillar_map along x axis + * @param[in] dev_cumsum_along_y Cumsum dev_cumsum_along_x along y axis + * @param[in] dev_box_anchors_min_x Array for storng min x value for each anchor + * @param[in] dev_box_anchors_min_y Array for storng min y value for each anchor + * @param[in] dev_box_anchors_max_x Array for storng max x value for each anchor + * @param[in] dev_box_anchors_max_y Array for storng max y value for each anchor + * @param[in] dev_box_anchors_max_y Array for storng max y value for each anchor + * @param[out] dev_anchor_mask Anchor mask for filtering output + * @details dev_* means device memory. Make a mask for activating pillar occupancy area + */ + void doAnchorMaskCuda(int* dev_sparse_pillar_map, int* dev_cumsum_along_x, int* dev_cumsum_along_y, + const float* dev_box_anchors_min_x, const float* dev_box_anchors_min_y, + const float* dev_box_anchors_max_x, const float* dev_box_anchors_max_y, int* dev_anchor_mask); +}; + +#endif // ANCHOR_MASK_CUDA_H diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/common.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/common.h new file mode 100644 index 00000000000..7cf02333b4d --- /dev/null +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/common.h @@ -0,0 +1,45 @@ +/* + * Copyright 2018-2019 Autoware Foundation. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /** + * @file common.h + * @brief MACRO for CUDA codes + * @author Kosuke Murakami + * @date 2019/02/26 + */ + +#ifndef COMMON_H +#define COMMON_H + +//headers in STL +#include + +//headers in CUDA +#include + +#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) + +#define GPU_CHECK(ans) { GPUAssert((ans), __FILE__, __LINE__); } +inline void GPUAssert(cudaError_t code, const char *file, int line, bool abort=true) +{ + if (code != cudaSuccess) + { + fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); + if (abort) exit(code); + } +} + +#endif // COMMON_H diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/nms_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/nms_cuda.h new file mode 100644 index 00000000000..7a34abf498b --- /dev/null +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/nms_cuda.h @@ -0,0 +1,60 @@ +/* + * Copyright 2018-2019 Autoware Foundation. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** +* @file nms_cuda.h +* @brief Non-maximum suppresion for network output +* @author Modified by Kosuke Murakami +* @date 2019/02/26 +*/ + +#ifndef NMS_CUDA_H +#define NMS_CUDA_H + +//heders in STL +#include +#include + +//headers in local files +#include "common.h" + +class NMSCuda +{ +private: + const int NUM_THREADS_; + const float nms_overlap_threshold_; + +public: + /** + * @brief Constructor + * @param[in] NUM_THRESD Number of threads for launching cuda kernel + * @param[in] nms_overlap_threshold IOU threshold for NMS + * @details Captital variables never change after the compile, Non-captital variables could be chaned through rosparam + */ + NMSCuda(const int NUM_THREADS, const float nms_overlap_threshold); + + /** + * @brief GPU Non-Maximum Suppresion for network output + * @param[in] host_filter_count Number of filtered output + * @param[in] dev_sorted_box_for_nms Bounding box output sorted by score + * @param[out] out_keep_inds Indexes for selected bounding box + * @param[out] out_num_to_keep Number of keep bounding box + * @details Includes CUDA NMS and postprocessing for selecting box in CPU + */ + void doNMSCuda(const int host_filter_count, float* dev_sorted_box_for_nms, int* out_keep_inds, int& out_num_to_keep); +}; + +#endif // NMS_CUDA_H diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h new file mode 100755 index 00000000000..c58fc5e58d7 --- /dev/null +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h @@ -0,0 +1,290 @@ +/* + * Copyright 2018-2019 Autoware Foundation. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /** + * @file point_pillars.h + * @brief Algorithm for PointPillars + * @author Kosuke Murakami + * @date 2019/02/26 + */ + + +#ifndef POINTS_PILLAR_H +#define POINTS_PILLAR_H + +//headers in STL +#include +#include +#include +#include +#include +#include +#include + +//headers in TensorRT +#include "NvInfer.h" +#include "NvOnnxParser.h" + +//headers in local files +#include "common.h" +#include "preprocess_points.h" +#include "preprocess_points_cuda.h" +#include "anchor_mask_cuda.h" +#include "scatter_cuda.h" +#include "postprocess_cuda.h" + + +// Logger for TensorRT info/warning/errors +class Logger : public nvinfer1::ILogger +{ +public: + Logger(Severity severity = Severity::kWARNING) + : reportableSeverity(severity) + { + } + + void log(Severity severity, const char* msg) override + { + // suppress messages with severity enum value greater than the reportable + if (severity > reportableSeverity) + return; + + switch (severity) + { + case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break; + case Severity::kERROR: std::cerr << "ERROR: "; break; + case Severity::kWARNING: std::cerr << "WARNING: "; break; + case Severity::kINFO: std::cerr << "INFO: "; break; + default: std::cerr << "UNKNOWN: "; break; + } + std::cerr << msg << std::endl; + } + + Severity reportableSeverity; +}; + +class PointPillars +{ +private: + //initize in initializer list + const bool reproduce_result_mode_; + const float score_threshold_; + const float nms_overlap_threshold_; + const std::string pfe_onnx_file_; + const std::string rpn_onnx_file_; + const int MAX_NUM_PILLARS_; + const int MAX_NUM_POINTS_PER_PILLAR_; + const int PFE_OUTPUT_SIZE_; + const int GRID_X_SIZE_; + const int GRID_Y_SIZE_; + const int GRID_Z_SIZE_; + const int RPN_INPUT_SIZE_; + const int NUM_ANCHOR_X_INDS_; + const int NUM_ANCHOR_Y_INDS_; + const int NUM_ANCHOR_R_INDS_; + const int NUM_ANCHOR_; + const int RPN_BOX_OUTPUT_SIZE_; + const int RPN_CLS_OUTPUT_SIZE_; + const int RPN_DIR_OUTPUT_SIZE_; + const float PILLAR_X_SIZE_; + const float PILLAR_Y_SIZE_; + const float PILLAR_Z_SIZE_; + const float MIN_X_RANGE_; + const float MIN_Y_RANGE_; + const float MIN_Z_RANGE_; + const float MAX_X_RANGE_; + const float MAX_Y_RANGE_; + const float MAX_Z_RANGE_; + const int BATCH_SIZE_; + const int NUM_INDS_FOR_SCAN_; + const int NUM_THREADS_; + const float SENSOR_HEIGHT_; + const float ANCHOR_DX_SIZE_; + const float ANCHOR_DY_SIZE_; + const float ANCHOR_DZ_SIZE_; + //end initializer list + + + int host_pillar_count_[1]; + + float* anchors_px_; + float* anchors_py_; + float* anchors_pz_; + float* anchors_dx_; + float* anchors_dy_; + float* anchors_dz_; + float* anchors_ro_; + + float* box_anchors_min_x_; + float* box_anchors_min_y_; + float* box_anchors_max_x_; + float* box_anchors_max_y_; + + // cuda malloc + float* dev_pillar_x_in_coors_; + float* dev_pillar_y_in_coors_; + float* dev_pillar_z_in_coors_; + float* dev_pillar_i_in_coors_; + int* dev_pillar_count_histo_; + + int* dev_x_coors_; + int* dev_y_coors_; + float* dev_num_points_per_pillar_; + int* dev_sparse_pillar_map_; + int* dev_cumsum_along_x_; + int* dev_cumsum_along_y_; + + float* dev_pillar_x_; + float* dev_pillar_y_; + float* dev_pillar_z_; + float* dev_pillar_i_; + + float* dev_x_coors_for_sub_shaped_; + float* dev_y_coors_for_sub_shaped_; + float* dev_pillar_feature_mask_; + + float* dev_box_anchors_min_x_; + float* dev_box_anchors_min_y_; + float* dev_box_anchors_max_x_; + float* dev_box_anchors_max_y_; + int* dev_anchor_mask_; + + void* pfe_buffers_[9]; + void* rpn_buffers_[4]; + + float* dev_scattered_feature_; + + float* dev_anchors_px_; + float* dev_anchors_py_; + float* dev_anchors_pz_; + float* dev_anchors_dx_; + float* dev_anchors_dy_; + float* dev_anchors_dz_; + float* dev_anchors_ro_; + float* dev_filtered_box_; + float* dev_filtered_score_; + int* dev_filtered_dir_; + float* dev_box_for_nms_; + int* dev_filter_count_; + + + std::unique_ptr preprocess_points_ptr_; + std::unique_ptr preprocess_points_cuda_ptr_; + std::unique_ptr anchor_mask_cuda_ptr_; + std::unique_ptr scatter_cuda_ptr_; + std::unique_ptr postprocess_cuda_ptr_; + + Logger g_logger_; + nvinfer1::IExecutionContext* pfe_context_; + nvinfer1::IExecutionContext* rpn_context_; + nvinfer1::IRuntime* pfe_runtime_; + nvinfer1::IRuntime* rpn_runtime_; + nvinfer1::ICudaEngine* pfe_engine_; + nvinfer1::ICudaEngine* rpn_engine_; + + /** + * @brief Memory allocation for device memory + * @details Call in the constructor + */ + void deviceMemoryMalloc(); + + /** + * @brief Initialize anchor + * @details Call in the constructor + */ + void initAnchors(); + + /** + * @brief Initialize TensorRT isntances + * @details Call in the constructor + */ + void initTRT(); + + /** + * @brief Convert ONNX to TensorRT model + * @param[in] model_file ONNX model file path + * @param[out] trt_model_stream TensorRT model made out of ONNX model + * @details Load ONNX model, and convert it to TensorRT model + */ + void onnxToTRTModel(const std::string& model_file, nvinfer1::IHostMemory*& trt_model_stream); + + /** + * @brief Preproces points + * @param[in] in_points_array pointcloud array + * @param[in] in_num_points Number of points + * @details Call CPU or GPU preprocess + */ + void preprocess(const float* in_points_array, const int in_num_points); + + /** + * @brief Preproces by CPU + * @param[in] in_points_array pointcloud array + * @param[in] in_num_points Number of points + * @details The output from CPU preprocess is reproducible + */ + void preprocessCPU(const float* in_points_array, const int in_num_points); + + /** + * @brief Preproces by GPU + * @param[in] in_points_array pointcloud array + * @param[in] in_num_points Number of points + * @details Fast preprocess comapared with CPU preprocess + */ + void preprocessGPU(const float* in_points_array, const int in_num_points); + + /** + * @brief Convert anchors to box form like min_x, min_y, max_x, max_y anchors + * @param[in] anchors_px Represents x-coordinate value for corresponding anchor + * @param[in] anchors_py Represents y-coordinate value for corresponding anchor + * @param[in] anchors_dx Represents x-dimension value for corresponding anchor + * @param[in] anchors_dy Represents y-dimension value for corresponding anchor + * @details Make box anchors for nms + */ + void convertAnchors2BoxAnchors(float* anchors_px, float* anchors_py, float* anchors_dx, float* anchors_dy); + + /** + * @brief Memory allocation for anchors + * @details Memory allocation for anchors + */ + void putAnchorsInDeviceMemory(); + +public: + /** + * @brief Constructor + * @param[in] reproduce_result_mode Boolean, if true, the output is reproducible for the same input + * @param[in] score_threshold Score threshold for filtering output + * @param[in] nms_overlap_threshold IOU threshold for NMS + * @param[in] pfe_onnx_file Pillar Feature Extractor ONNX file path + * @param[in] rpn_onnx_file Region Proposal Network ONNX file path + * @details Variables could be chaned through rosparam + */ + PointPillars(const bool reproduce_result_mode, const float score_threshold, const float nms_overlap_threshold, + const std::string pfe_onnx_file, const std::string rpn_onnx_file); + ~PointPillars(); + + /** + * @brief Call PointPillars for the inference + * @param[in] in_points_array pointcloud array + * @param[in] in_num_points Number of points + * @param[in] out_detections Network output bounding box + * @param[in] out_num_objects Number of output bounding box + * @details This is interface for the algorithm + */ + void doInference(const float* in_points_array, const int in_num_points, + std::vector& out_detection, int& out_num_objects); +}; + +#endif // POINTS_PILLAR_H diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h new file mode 100755 index 00000000000..e0b1e1bd53c --- /dev/null +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h @@ -0,0 +1,141 @@ +/* + * Copyright 2018-2019 Autoware Foundation. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /** + * @file point_pillars_ros.h + * @brief ROS interface for PointPillars + * @author Kosuke Murakami + * @date 2019/02/26 + */ + +#ifndef POINTS_PILLAR_ROS_H +#define POINTS_PILLAR_ROS_H + +//headers in STL +#include +#include + +//headers in ROS +#include +#include +#include + +//headers in PCL +#include + +//headers in local files +#include "point_pillars.h" + + +class PointPillarsROS +{ +private: + friend class TestClass; + + //initializer list + ros::NodeHandle private_nh_; + bool has_subscribed_baselink_; + const int NUM_POINT_FEATURE_; + const int OUTPUT_NUM_BOX_FEATURE_; + const float TRAINED_SENSOR_HEIGHT_; + const float NORMALIZING_INTENSITY_VALUE_; + const std::string BASELINK_FRAME_; + //end initializer list + + //rosparam + bool baselink_support_; + bool reproduce_result_mode_; + float score_threshold_; + float nms_overlap_threshold_; + std::string pfe_onnx_file_; + std::string rpn_onnx_file_; + //end rosparam + + ros::NodeHandle nh_; + ros::Subscriber sub_points_; + ros::Publisher pub_objects_; + + tf::TransformListener tf_listener_; + tf::StampedTransform baselink2lidar_; + tf::Transform angle_transform_; + tf::Transform angle_transform_inversed_; + + + + float offset_z_from_trained_data_; + + std::unique_ptr point_pillars_ptr_; + + /** + * @brief Get base_link to lidar transformation + * @param[in] target_frameid target lidar frame_id + * @details Get transformation info + */ + void getBaselinkToLidarTF(const std::string& target_frameid); + + /** + * @brief Analyze tf information + * @param[in] lidar2baselink transofomation info + * @details Calculate z offset compared with trained sensor height and get rotation matrix + */ + void getTFInfo(tf::StampedTransform lidar2baselink); + + /** + * @brief Transform pose based on tf stamp info + * @param[in] in_pose Target pose to be transformed + * @param[in] tf TF stamp contains rotation matrix and translation matrix + * @return geometry_msgs::Pose Transformed pose + * @details Calculate transformed pose + */ + geometry_msgs::Pose getTransformedPose(const geometry_msgs::Pose& in_pose, + const tf::Transform& tf); + + /** + * @brief callback for pointcloud + * @param[in] input pointcloud from lidar sensor + * @details Call point_pillars to get 3D bounding box + */ + void pointsCallback(const sensor_msgs::PointCloud2::ConstPtr& input); + + /** + * @brief convert pcl point to c++ array + * @param[in] in_pcl_pc_ptr pointcloud in pcl format + * @param[out] out_points_array converted pointcloud in c++ array format + * @param[in] offset_z (default: 1.0) when using baselink_support, offset height based on current sensor configuration + * @details convert pcl points to c++ array, plus offset z if it is necessary + */ + void pclToArray(const pcl::PointCloud::Ptr & in_pcl_pc_ptr, float* out_points_array, const float offset_z = 0); + + /** + * @brief publish DetectedObject + * @param[in] detections Network output bounding box + * @param[in] num_objects Number of objects + * @param[in] pc_header Header from pointcloud + * @details Convert std::vector to DetectedObject, and publish them + */ + void pubDetectedObject(const std::vector& detections, const int num_objects, const std_msgs::Header& pc_header); + +public: + PointPillarsROS(); + + /** + * @brief Create ROS pub/sub obejct + * @details Create/Initializing ros pub/sub object + */ + void createROSPubSub(); +}; + +#endif // POINTS_PILLAR_ROS_H diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h new file mode 100644 index 00000000000..261cdd19ed1 --- /dev/null +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h @@ -0,0 +1,94 @@ +/* + * Copyright 2018-2019 Autoware Foundation. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /** + * @file postprocess_cuda.h + * @brief Postprocess for network output + * @author Kosuke Murakami + * @date 2019/02/26 + */ + +#ifndef POSTPROCESS_CUDA_H +#define POSTPROCESS_CUDA_H + +//headers in STL +#include +#include + +//headers in local files +#include "nms_cuda.h" + +class PostprocessCuda +{ +private: + const float FLOAT_MIN_; + const float FLOAT_MAX_; + const int NUM_ANCHOR_X_INDS_; + const int NUM_ANCHOR_Y_INDS_; + const int NUM_ANCHOR_R_INDS_; + const float score_threshold_; + const int NUM_THREADS_; + const float nms_overlap_threshold_; + + std::unique_ptr nms_cuda_ptr_; + +public: + /** + * @brief Constructor + * @param[in] FLOAT_MIN the lowest float value + * @param[in] FLOAT_MAX the maximum float value + * @param[in] NUM_ANCHOR_X_INDS Number of x-indexes for anchors + * @param[in] NUM_ANCHOR_Y_INDS Number of y-indexes for anchors + * @param[in] NUM_ANCHOR_R_INDS Number of rotation-indexes for anchors + * @param[in] score_threshold Score threshold for filtering output + * @param[in] NUM_THREDS Number of threads for launching cuda kernel + * @param[in] nms_overlap_threshold IOU threshold for NMS + * @details Captital variables never change after the compile, non-capital variables could be chaned through rosparam + */ + PostprocessCuda(const float FLOAT_MIN, const float FLOAT_MAX, + const int NUM_ANCHOR_X_INDS, const int NUM_ANCHOR_Y_INDS, const int NUM_ANCHOR_R_INDS, + const float score_threshold, const int NUM_THREADS, const float nms_overlap_threshold); + + /** + * @brief Postprocessing for the network output + * @param[in] rpn_box_output Box prediction from the network output + * @param[in] rpn_cls_output Class prediction from the network output + * @param[in] rpn_dir_output Direction prediction from the network output + * @param[in] dev_anchor_mask Anchor mask for filtering + * @param[in] dev_anchors_px X-coordinate value for corresponding anchor + * @param[in] dev_anchors_py Y-coordinate value for corresponding anchor + * @param[in] dev_anchors_pz Z-coordinate value for corresponding anchor + * @param[in] dev_anchors_dx X-dimension value for corresponding anchor + * @param[in] dev_anchors_dy Y-dimension value for corresponding anchor + * @param[in] dev_anchors_dz Z-dimension value for corresponding anchor + * @param[in] dev_anchors_ro Rotation value for corresponding anchor + * @param[in] dev_filtered_box Filtered box prediction + * @param[in] dev_filtered_score Filtered score prediction + * @param[in] dev_filtered_dir Filtered direction prediction + * @param[in] dev_box_for_nms Decoded box from pose and dimension to min_x min_y max_x max_y represenation for nms + * @param[in] dev_filter_count The number of filtered output + * @param[out] out_detection Output bounding boxes + * @param[out] out_num_objects The number of output bounding boxes + * @details dev_* represents device memory allocated variables + */ + void doPostprocessCuda(const float* rpn_box_output, const float* rpn_cls_output, const float* rpn_dir_output, + int* dev_anchor_mask, const float* dev_anchors_px, const float* dev_anchors_py, const float* dev_anchors_pz, + const float* dev_anchors_dx, const float* dev_anchors_dy, const float* dev_anchors_dz, const float* dev_anchors_ro, + float* dev_filtered_box, float* dev_filtered_score, int* dev_filtered_dir, float* dev_box_for_nms, int* dev_filter_count, + std::vector& out_detection, int& out_num_objects); +}; + +#endif // POSTPROCESS_CUDA_H diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h new file mode 100644 index 00000000000..bd0b54720f2 --- /dev/null +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h @@ -0,0 +1,118 @@ +/* + * Copyright 2018-2019 Autoware Foundation. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /** + * @file preprocess_points.h + * @brief CPU version of preprocess points + * @author Kosuke Murakami + * @date 2019/02/26 + */ + +#ifndef PREPROCESS_POINTS_H +#define PREPROCESS_POINTS_H + + + +class PreprocessPoints +{ +private: + friend class TestClass; + const int MAX_NUM_PILLARS_; + const int MAX_NUM_POINTS_PER_PILLAR_; + const int GRID_X_SIZE_; + const int GRID_Y_SIZE_; + const int GRID_Z_SIZE_; + const float PILLAR_X_SIZE_; + const float PILLAR_Y_SIZE_; + const float PILLAR_Z_SIZE_; + const float MIN_X_RANGE_; + const float MIN_Y_RANGE_; + const float MIN_Z_RANGE_; + const int NUM_INDS_FOR_SCAN_; + + +public: + /** + * @brief Constructor + * @param[in] MAX_NUM_PILLARS Maximum number of pillars + * @param[in] MAX_POINTS_PER_PILLAR Maximum number of points per pillar + * @param[in] GRID_X_SIZE Number of pillars in x-coordinate + * @param[in] GRID_Y_SIZE Number of pillars in y-coordinate + * @param[in] GRID_Z_SIZE Number of pillars in z-coordinate + * @param[in] PILLAR_X_SIZE Size of x-dimension for a pillar + * @param[in] PILLAR_Y_SIZE Size of y-dimension for a pillar + * @param[in] PILLAR_Z_SIZE Size of z-dimension for a pillar + * @param[in] MIN_X_RANGE Minimum x value for pointcloud + * @param[in] MIN_Y_RANGE Minimum y value for pointcloud + * @param[in] MIN_Z_RANGE Minimum z value for pointcloud + * @param[in] NUM_INDS_FOR_SCAN Number of indexes for scan(cumsum) + * @details Captital variables never change after the compile + */ + PreprocessPoints(const int MAX_NUM_PILLARS, + const int MAX_POINTS_PER_PILLAR, + const int GRID_X_SIZE, + const int GRID_Y_SIZE, + const int GRID_Z_SIZE, + const float PILLAR_X_SIZE, + const float PILLAR_Y_SIZE, + const float PILLAR_Z_SIZE, + const float MIN_X_RANGE, + const float MIN_Y_RANGE, + const float MIN_Z_RANGE, + const int NUM_INDS_FOR_SCAN); + + /** + * @brief CPU preprocessing for input pointcloud + * @param[in] in_points_array Pointcloud array + * @param[in] in_num_points The number of points + * @param[in] x_coors X-coordinate indexes for corresponding pillars + * @param[in] y_coors Y-coordinate indexes for corresponding pillars + * @param[in] num_points_per_pillar Number of points in corresponding pillars + * @param[in] pillar_x X-coordinate value for points in each pillar + * @param[in] pillar_y Y-coordinate value for points in each pillar + * @param[in] pillar_z Z-coordinate value for points in each pillar + * @param[in] pillar_i Intensity value for points in each pillar + * @param[in] x_coors_for_sub_shaped Used for x substraction in the network + * @param[in] y_coors_for_sub_shaped Used for y substraction in the network + * @param[in] pillar_feature_mask Mask to make sure pillars' feature equal to zero where no points in the pillars + * @param[in] sparse_pillar_map Grid map represents occupancy pillars + * @param[in] host_pillar_count The numnber of valid pillars for the input pointcloud + * @details Convert pointcloud to pillar representation + */ + void preprocess(const float* in_points_array, int in_num_points, + int* x_coors, int* y_coors, float* num_points_per_pillar, + float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i, + float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped, + float* pillar_feature_mask, float* sparse_pillar_map, int* host_pillar_count); + + /** + * @brief Initializing variables for preprocessing + * @param[in] coor_to_pillaridx Map for converting from one set of coordinate to a pillar + * @param[in] sparse_pillar_map Grid map represents occupancy pillars + * @param[in] pillar_x X-coordinate value for points in each pillar + * @param[in] pillar_y Y-coordinate value for points in each pillar + * @param[in] pillar_z Z-coordinate value for points in each pillar + * @param[in] pillar_i Intensity value for points in each pillar + * @param[in] x_coors_for_sub_shaped Used for x substraction in the network + * @param[in] y_coors_for_sub_shaped Used for y substraction in the network + * @details Initializeing input arguments with certain values + */ + void initializeVariables(int* coor_to_pillaridx, float* sparse_pillar_map, + float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i, + float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped); +}; + +#endif // PREPROCESS_POINTS_H diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points_cuda.h new file mode 100644 index 00000000000..7a4dd8420d2 --- /dev/null +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points_cuda.h @@ -0,0 +1,116 @@ +/* + * Copyright 2018-2019 Autoware Foundation. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /** + * @file preprocess_points_cuda.h + * @brief GPU version of preprocess points + * @author Kosuke Murakami + * @date 2019/02/26 + */ + + +#ifndef PREPROCESS_POINTS_CUDA_H +#define PREPROCESS_POINTS_CUDA_H + + +class PreprocessPointsCuda +{ +private: + //initialzer list + const int NUM_THREADS_; + const int MAX_NUM_PILLARS_; + const int MAX_NUM_POINTS_PER_PILLAR_; + const int NUM_INDS_FOR_SCAN_; + const int GRID_X_SIZE_; + const int GRID_Y_SIZE_; + const int GRID_Z_SIZE_; + const float PILLAR_X_SIZE_; + const float PILLAR_Y_SIZE_; + const float PILLAR_Z_SIZE_; + const float MIN_X_RANGE_; + const float MIN_Y_RANGE_; + const float MIN_Z_RANGE_; + //end initalizer list + + float* dev_pillar_x_in_coors_; + float* dev_pillar_y_in_coors_; + float* dev_pillar_z_in_coors_; + float* dev_pillar_i_in_coors_; + int* dev_pillar_count_histo_; + + int* dev_counter_; + int* dev_pillar_count_; + float* dev_x_coors_for_sub_; + float* dev_y_coors_for_sub_; + +public: + /** + * @brief Constructor + * @param[in] NUM_THREDS Number of threads for launching cuda kernel + * @param[in] MAX_NUM_PILLARS Maximum number of pillars + * @param[in] MAX_POINTS_PER_PILLAR Maximum number of points per pillar + * @param[in] NUM_INDS_FOR_SCAN Number of indexes for scan(cumsum) + * @param[in] GRID_X_SIZE Number of pillars in x-coordinate + * @param[in] GRID_Y_SIZE Number of pillars in y-coordinate + * @param[in] GRID_Z_SIZE Number of pillars in z-coordinate + * @param[in] PILLAR_X_SIZE Size of x-dimension for a pillar + * @param[in] PILLAR_Y_SIZE Size of y-dimension for a pillar + * @param[in] PILLAR_Z_SIZE Size of z-dimension for a pillar + * @param[in] MIN_X_RANGE Minimum x value for pointcloud + * @param[in] MIN_Y_RANGE Minimum y value for pointcloud + * @param[in] MIN_Z_RANGE Minimum z value for pointcloud + * @details Captital variables never change after the compile + */ + PreprocessPointsCuda(const int NUM_THREADS, + const int MAX_NUM_PILLARS, + const int MAX_POINTS_PER_PILLAR, + const int NUM_INDS_FOR_SCAN, + const int GRID_X_SIZE, + const int GRID_Y_SIZE, + const int GRID_Z_SIZE, + const float PILLAR_X_SIZE, + const float PILLAR_Y_SIZE, + const float PILLAR_Z_SIZE, + const float MIN_X_RANGE, + const float MIN_Y_RANGE, + const float MIN_Z_RANGE); + ~PreprocessPointsCuda(); + + /** + * @brief CUDA preprocessing for input pointcloud + * @param[in] dev_points Pointcloud array + * @param[in] in_num_points The number of points + * @param[in] dev_x_coors X-coordinate indexes for corresponding pillars + * @param[in] dev_y_coors Y-coordinate indexes for corresponding pillars + * @param[in] dev_num_points_per_pillar Number of points in corresponding pillars + * @param[in] dev_pillar_x X-coordinate value for points in each pillar + * @param[in] dev_pillar_y Y-coordinate value for points in each pillar + * @param[in] dev_pillar_z Z-coordinate value for points in each pillar + * @param[in] dev_pillar_i Intensity value for points in each pillar + * @param[in] dev_x_coors_for_sub_shaped Used for x substraction in the network + * @param[in] dev_y_coors_for_sub_shaped Used for y substraction in the network + * @param[in] dev_pillar_feature_mask Mask to make sure pillars' feature equal to zero where no points in the pillars + * @param[in] dev_sparse_pillar_map Grid map represents occupancy pillars + * @param[in] host_pillar_count The numnber of valid pillars for the input pointcloud + * @details Convert pointcloud to pillar representation + */ + void doPreprocessPointsCuda(const float* dev_points, const int in_num_points, int* dev_x_coors, int* dev_y_coors, + float* dev_num_points_per_pillar, float* dev_pillar_x, float* dev_pillar_y, float* dev_pillar_z, float* dev_pillar_i, + float* dev_x_coors_for_sub_shaped, float* dev_y_coors_for_sub_shaped, + float* dev_pillar_feature_mask, int* dev_sparse_pillar_map, int* host_pillar_count); +}; + +#endif // PREPROCESS_POINTS_CUDA_H diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/scatter_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/scatter_cuda.h new file mode 100644 index 00000000000..24e4f0b79dc --- /dev/null +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/scatter_cuda.h @@ -0,0 +1,58 @@ +/* + * Copyright 2018-2019 Autoware Foundation. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /** + * @file scatter_cuda.h + * @brief CUDA code for scatter operation + * @author Kosuke Murakami + * @date 2019/02/26 + */ + +#ifndef SCATTERCUDA_H +#define SCATTERCUDA_H + +class ScatterCuda +{ +private: + const int NUM_THREADS_; + const int MAX_NUM_PILLARS_; + const int GRID_X_SIZE_; + const int GRID_Y_SIZE_; + +public: + /** + * @brief Constructor + * @param[in] NUM_THREADS The number of threads to launch cuda kernel + * @param[in] MAX_NUM_PILLARS Maximum number of pillars + * @param[in] GRID_X_SIZE Number of pillars in x-coordinate + * @param[in] GRID_Y_SIZE Number of pillars in y-coordinate + * @details Captital variables never change after the compile + */ + ScatterCuda(const int NUM_THREADS, const int MAX_NUM_PILLARS, const int GRID_X_SIZE, const int GRID_Y_SIZE); + + /** + * @brief Call scatter cuda kernel + * @param[in] pillar_count The valid number of pillars + * @param[in] x_coors X-coordinate indexes for corresponding pillars + * @param[in] y_coors Y-coordinate indexes for corresponding pillars + * @param[in] pfe_output Output from Pillar Feature Extractor + * @param[out] scattered_feature Gridmap representation for pillars' feature + * @details Allocate pillars in gridmap based on index(coordinates) information + */ + void doScatterCuda(const int pillar_count, int *x_coors, int *y_coors, float *pfe_output, float *scattered_feature); +}; + +#endif // SCATTERCUDA_H diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/launch/lidar_point_pillars.launch b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/launch/lidar_point_pillars.launch new file mode 100755 index 00000000000..4a7618eacc8 --- /dev/null +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/launch/lidar_point_pillars.launch @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu new file mode 100644 index 00000000000..4383c1ae63c --- /dev/null +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu @@ -0,0 +1,203 @@ +/* + * Copyright 2018-2019 Autoware Foundation. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//headers in local files +#include "common.h" +#include "anchor_mask_cuda.h" + +__global__ void scan_x(int *g_odata, int *g_idata, int n) +{ + extern __shared__ int temp[]; // allocated on invocation + int thid = threadIdx.x; + int bid = blockIdx.x; + int bdim = blockDim.x; + int offset = 1; + temp[2*thid] = g_idata[bid*bdim*2 + 2*thid]; // load input into shared memory + temp[2*thid+1] = g_idata[bid*bdim*2 + 2*thid+1]; + for (int d = n>>1; d > 0; d >>= 1) // build sum in place up the tree + { + __syncthreads(); + if (thid < d) + { + int ai = offset*(2*thid+1)-1; + int bi = offset*(2*thid+2)-1; + temp[bi] += temp[ai]; + } + offset *= 2; + } + if (thid == 0) { temp[n - 1] = 0; } // clear the last element + for (int d = 1; d < n; d *= 2) // traverse down tree & build scan + { + offset >>= 1; + __syncthreads(); + if (thid < d) + { + int ai = offset*(2*thid+1)-1; + int bi = offset*(2*thid+2)-1; + int t = temp[ai]; + temp[ai] = temp[bi]; + temp[bi] += t; + } + } + __syncthreads(); + g_odata[bid*bdim*2 + 2*thid] = temp[2*thid+1]; // write results to device memory + int second_ind = 2*thid+2; + if(second_ind == bdim*2 ) + { + g_odata[bid*bdim*2 + 2*thid+1] = temp[2*thid+1] + g_idata[bid*bdim*2 + 2*thid+1]; + } + else + { + g_odata[bid*bdim*2 + 2*thid+1] = temp[2*thid+2]; + } +} + +__global__ void scan_y(int *g_odata, int *g_idata, int n) +{ + extern __shared__ int temp[]; // allocated on invocation + int thid = threadIdx.x; + int bid = blockIdx.x; + int bdim = blockDim.x; + int gdim = gridDim.x; + int offset = 1; + temp[2*thid] = g_idata[bid + 2*thid*gdim]; // load input into shared memory + temp[2*thid+1] = g_idata[bid + 2*thid*gdim+ gdim]; + for (int d = n>>1; d > 0; d >>= 1) // build sum in place up the tree + { + __syncthreads(); + if (thid < d) + { + int ai = offset*(2*thid+1)-1; + int bi = offset*(2*thid+2)-1; + temp[bi] += temp[ai]; + } + offset *= 2; + } + if (thid == 0) { temp[n - 1] = 0; } // clear the last element + for (int d = 1; d < n; d *= 2) // traverse down tree & build scan + { + offset >>= 1; + __syncthreads(); + if (thid < d) + { + int ai = offset*(2*thid+1)-1; + int bi = offset*(2*thid+2)-1; + int t = temp[ai]; + temp[ai] = temp[bi]; + temp[bi] += t; + } + } + __syncthreads(); + g_odata[bid + 2*thid*gdim] = temp[2*thid+1]; // write results to device memory + int second_ind = 2*thid+2; + if(second_ind == bdim*2 ) + { + g_odata[bid + 2*thid*gdim + gdim] = temp[2*thid+1] + g_idata[bid + 2*thid*gdim + gdim]; + } + else + { + g_odata[bid + 2*thid*gdim + gdim] = temp[2*thid+2]; + } +} + +__global__ void make_anchor_mask_kernel(const float* dev_box_anchors_min_x, const float* dev_box_anchors_min_y, + const float* dev_box_anchors_max_x, const float* dev_box_anchors_max_y, + int* dev_sparse_pillar_map, int* dev_anchor_mask, + const float MIN_X_RANGE, + const float MIN_Y_RANGE, + const float PILLAR_X_SIZE, + const float PILLAR_Y_SIZE, + const int GRID_X_SIZE, + const int GRID_Y_SIZE, + const int NUM_INDS_FOR_SCAN) +{ + int tid = threadIdx.x + blockIdx.x * blockDim.x; + int anchor_coor[4] = {0}; + const int GRID_X_SIZE_1 = GRID_X_SIZE - 1;//grid_x_size - 1 + const int GRID_Y_SIZE_1 = GRID_Y_SIZE - 1;//grid_y_size - 1 + + anchor_coor[0] = floor((dev_box_anchors_min_x[tid] - MIN_X_RANGE) / PILLAR_X_SIZE); + anchor_coor[1] = floor((dev_box_anchors_min_y[tid] - MIN_Y_RANGE) / PILLAR_Y_SIZE); + anchor_coor[2] = floor((dev_box_anchors_max_x[tid] - MIN_X_RANGE) / PILLAR_X_SIZE); + anchor_coor[3] = floor((dev_box_anchors_max_y[tid] - MIN_Y_RANGE) / PILLAR_Y_SIZE); + anchor_coor[0] = max(anchor_coor[0], 0); + anchor_coor[1] = max(anchor_coor[1], 0); + anchor_coor[2] = min(anchor_coor[2], GRID_X_SIZE_1); + anchor_coor[3] = min(anchor_coor[3], GRID_Y_SIZE_1); + + int right_top = dev_sparse_pillar_map[anchor_coor[3]*NUM_INDS_FOR_SCAN + anchor_coor[2]]; + int left_bottom = dev_sparse_pillar_map[anchor_coor[1]*NUM_INDS_FOR_SCAN + anchor_coor[0]]; + int left_top = dev_sparse_pillar_map[anchor_coor[3]*NUM_INDS_FOR_SCAN + anchor_coor[0]]; + int right_bottom = dev_sparse_pillar_map[anchor_coor[1]*NUM_INDS_FOR_SCAN + anchor_coor[2]]; + + int area = right_top -left_top - right_bottom + left_bottom; + if(area > 1) + { + dev_anchor_mask[tid] = 1; + } + else + { + dev_anchor_mask[tid] = 0; + } +} + + +AnchorMaskCuda::AnchorMaskCuda(const int NUM_INDS_FOR_SCAN, + const int NUM_ANCHOR_X_INDS, + const int NUM_ANCHOR_Y_INDS, + const int NUM_ANCHOR_R_INDS, + const float MIN_X_RANGE, + const float MIN_Y_RANGE, + const float PILLAR_X_SIZE, + const float PILLAR_Y_SIZE, + const int GRID_X_SIZE, + const int GRID_Y_SIZE): +NUM_INDS_FOR_SCAN_(NUM_INDS_FOR_SCAN), +NUM_ANCHOR_X_INDS_(NUM_ANCHOR_X_INDS), +NUM_ANCHOR_Y_INDS_(NUM_ANCHOR_Y_INDS), +NUM_ANCHOR_R_INDS_(NUM_ANCHOR_R_INDS), +MIN_X_RANGE_(MIN_X_RANGE), +MIN_Y_RANGE_(MIN_Y_RANGE), +PILLAR_X_SIZE_(PILLAR_X_SIZE), +PILLAR_Y_SIZE_(PILLAR_Y_SIZE), +GRID_X_SIZE_(GRID_X_SIZE), +GRID_Y_SIZE_(GRID_Y_SIZE) +{ + +} + +void AnchorMaskCuda::doAnchorMaskCuda(int* dev_sparse_pillar_map, int* dev_cumsum_along_x, int* dev_cumsum_along_y, + const float* dev_box_anchors_min_x, const float* dev_box_anchors_min_y, + const float* dev_box_anchors_max_x, const float* dev_box_anchors_max_y, int* dev_anchor_mask) +{ + scan_x<<>>(dev_cumsum_along_x, dev_sparse_pillar_map, NUM_INDS_FOR_SCAN_); + scan_y<<>>(dev_cumsum_along_y, dev_cumsum_along_x, NUM_INDS_FOR_SCAN_); + GPU_CHECK(cudaMemcpy(dev_sparse_pillar_map, dev_cumsum_along_y, NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof(int), cudaMemcpyDeviceToDevice ) ); + make_anchor_mask_kernel<<>>( + dev_box_anchors_min_x, + dev_box_anchors_min_y, + dev_box_anchors_max_x, + dev_box_anchors_max_y, + dev_sparse_pillar_map, + dev_anchor_mask, + MIN_X_RANGE_, + MIN_Y_RANGE_, + PILLAR_X_SIZE_, + PILLAR_Y_SIZE_, + GRID_X_SIZE_, + GRID_Y_SIZE_, + NUM_INDS_FOR_SCAN_); +} diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp new file mode 100755 index 00000000000..36617dd472b --- /dev/null +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp @@ -0,0 +1,31 @@ +/* + * Copyright 2018-2019 Autoware Foundation. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//headers in STL +#include + +//headers in local files +#include "point_pillars_ros.h" + +int main(int argc, char** argv) +{ + ros::init(argc, argv, "lidar_point_pillars"); + PointPillarsROS app; + app.createROSPubSub(); + ros::spin(); + + return 0; +} diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu new file mode 100644 index 00000000000..7680a44ad87 --- /dev/null +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu @@ -0,0 +1,110 @@ +// ------------------------------------------------------------------ +// Copyright (c) 2015 Microsoft +// Licensed under The MIT License +// Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) +// ------------------------------------------------------------------ + + +//headers in local files +#include "nms_cuda.h" + + +__device__ inline float devIoU(float const *const a, float const *const b) +{ + float left = max(a[0], b[0]), right = min(a[2], b[2]); + float top = max(a[1], b[1]), bottom = min(a[3], b[3]); + float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); + float interS = width * height; + float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); + float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); + return interS / (Sa + Sb - interS); +} + +__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, + const float *dev_boxes, unsigned long long *dev_mask) +{ + const int row_start = blockIdx.y; + const int col_start = blockIdx.x; + + // if (row_start > col_start) return; + const int block_threads = blockDim.x; + + const int row_size = + min(n_boxes - row_start * block_threads, block_threads); + const int col_size = + min(n_boxes - col_start * block_threads, block_threads); + __shared__ float block_boxes[64 * 4]; + if (threadIdx.x < col_size) + { + block_boxes[threadIdx.x * 4 + 0] = dev_boxes[(block_threads * col_start + threadIdx.x) * 4 + 0]; + block_boxes[threadIdx.x * 4 + 1] = dev_boxes[(block_threads * col_start + threadIdx.x) * 4 + 1]; + block_boxes[threadIdx.x * 4 + 2] = dev_boxes[(block_threads * col_start + threadIdx.x) * 4 + 2]; + block_boxes[threadIdx.x * 4 + 3] = dev_boxes[(block_threads * col_start + threadIdx.x) * 4 + 3]; + } + __syncthreads(); + + if (threadIdx.x < row_size) + { + const int cur_box_idx = block_threads * row_start + threadIdx.x; + const float cur_box[4] = {dev_boxes[cur_box_idx*4 + 0], + dev_boxes[cur_box_idx*4 + 1], + dev_boxes[cur_box_idx*4 + 2], + dev_boxes[cur_box_idx*4 + 3]}; + unsigned long long t = 0; + int start = 0; + if (row_start == col_start) + { + start = threadIdx.x + 1; + } + for (int i = start; i < col_size; i++) + { + if (devIoU(cur_box, block_boxes + i * 4) > nms_overlap_thresh) + { + t |= 1ULL << i; + } + } + const int col_blocks = DIVUP(n_boxes, block_threads); + dev_mask[cur_box_idx * col_blocks + col_start] = t; + } +} + +NMSCuda::NMSCuda(const int NUM_THREADS, const float nms_overlap_threshold): +NUM_THREADS_(NUM_THREADS), +nms_overlap_threshold_(nms_overlap_threshold) +{ +} + +void NMSCuda::doNMSCuda(const int host_filter_count, float* dev_sorted_box_for_nms, int* out_keep_inds, int& out_num_to_keep) +{ + const int col_blocks = DIVUP(host_filter_count, NUM_THREADS_); + dim3 blocks(DIVUP(host_filter_count, NUM_THREADS_),DIVUP(host_filter_count, NUM_THREADS_)); + dim3 threads(NUM_THREADS_); + + unsigned long long *dev_mask = NULL; + GPU_CHECK(cudaMalloc(&dev_mask, host_filter_count * col_blocks * sizeof(unsigned long long))); + + nms_kernel<<>>(host_filter_count, nms_overlap_threshold_, dev_sorted_box_for_nms, dev_mask); + + // postprocess for nms output + std::vector host_mask(host_filter_count * col_blocks); + GPU_CHECK(cudaMemcpy(&host_mask[0],dev_mask, sizeof(unsigned long long) * host_filter_count * col_blocks,cudaMemcpyDeviceToHost)); + std::vector remv(col_blocks); + memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); + + for (int i = 0; i < host_filter_count; i++) + { + int nblock = i / NUM_THREADS_; + int inblock = i % NUM_THREADS_; + + if (!(remv[nblock] & (1ULL << inblock))) + { + out_keep_inds[out_num_to_keep++] = i; + unsigned long long *p = &host_mask[0] + i * col_blocks; + for (int j = nblock; j < col_blocks; j++) + { + remv[j] |= p[j]; + } + } + } + GPU_CHECK(cudaFree(dev_mask)); +} diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp new file mode 100755 index 00000000000..4035f399ffa --- /dev/null +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp @@ -0,0 +1,621 @@ +/* + * Copyright 2018-2019 Autoware Foundation. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +//headers in STL +#include +#include + +//headers in ROS +#include + +//headers in local files +#include "point_pillars.h" + + +PointPillars::PointPillars(const bool reproduce_result_mode, const float score_threshold, const float nms_overlap_threshold, + const std::string pfe_onnx_file, const std::string rpn_onnx_file): +reproduce_result_mode_(reproduce_result_mode), +score_threshold_(score_threshold), +nms_overlap_threshold_(nms_overlap_threshold), +pfe_onnx_file_(pfe_onnx_file), +rpn_onnx_file_(rpn_onnx_file), +MAX_NUM_PILLARS_(12000), +MAX_NUM_POINTS_PER_PILLAR_(100), +PFE_OUTPUT_SIZE_(MAX_NUM_PILLARS_*64), +GRID_X_SIZE_(432), +GRID_Y_SIZE_(496), +GRID_Z_SIZE_(1), +RPN_INPUT_SIZE_(64*GRID_X_SIZE_*GRID_Y_SIZE_), +NUM_ANCHOR_X_INDS_(GRID_X_SIZE_*0.5), +NUM_ANCHOR_Y_INDS_(GRID_Y_SIZE_*0.5), +NUM_ANCHOR_R_INDS_(2), +NUM_ANCHOR_(NUM_ANCHOR_X_INDS_*NUM_ANCHOR_Y_INDS_*NUM_ANCHOR_R_INDS_), +RPN_BOX_OUTPUT_SIZE_(NUM_ANCHOR_*7) , +RPN_CLS_OUTPUT_SIZE_(NUM_ANCHOR_), +RPN_DIR_OUTPUT_SIZE_(NUM_ANCHOR_*2), +PILLAR_X_SIZE_(0.16), +PILLAR_Y_SIZE_(0.16), +PILLAR_Z_SIZE_(4.0), +MIN_X_RANGE_(0.0), +MIN_Y_RANGE_(-39.68), +MIN_Z_RANGE_(-3.0), +MAX_X_RANGE_(69.12), +MAX_Y_RANGE_(39.68), +MAX_Z_RANGE_(1), +BATCH_SIZE_(1), +NUM_INDS_FOR_SCAN_(512), +NUM_THREADS_(64),//if you chancge NUM_THREADS_, need to modify nms_kernel's shared mempry size +SENSOR_HEIGHT_(1.73), +ANCHOR_DX_SIZE_(1.6), +ANCHOR_DY_SIZE_(3.9), +ANCHOR_DZ_SIZE_(1.56) +{ + anchors_px_ = new float[NUM_ANCHOR_]; + anchors_py_ = new float[NUM_ANCHOR_]; + anchors_pz_ = new float[NUM_ANCHOR_]; + anchors_dx_ = new float[NUM_ANCHOR_]; + anchors_dy_ = new float[NUM_ANCHOR_]; + anchors_dz_ = new float[NUM_ANCHOR_]; + anchors_ro_ = new float[NUM_ANCHOR_]; + box_anchors_min_x_ = new float[NUM_ANCHOR_]; + box_anchors_min_y_ = new float[NUM_ANCHOR_]; + box_anchors_max_x_ = new float[NUM_ANCHOR_]; + box_anchors_max_y_ = new float[NUM_ANCHOR_]; + + + if(reproduce_result_mode_) + { + preprocess_points_ptr_.reset(new PreprocessPoints( + MAX_NUM_PILLARS_, + MAX_NUM_POINTS_PER_PILLAR_, + GRID_X_SIZE_, + GRID_Y_SIZE_, + GRID_Z_SIZE_, + PILLAR_X_SIZE_, + PILLAR_Y_SIZE_, + PILLAR_Z_SIZE_, + MIN_X_RANGE_, + MIN_Y_RANGE_, + MIN_Z_RANGE_, + NUM_INDS_FOR_SCAN_)); + } + else + { + preprocess_points_cuda_ptr_.reset(new PreprocessPointsCuda( + NUM_THREADS_, + MAX_NUM_PILLARS_, + MAX_NUM_POINTS_PER_PILLAR_, + NUM_INDS_FOR_SCAN_, + GRID_X_SIZE_, + GRID_Y_SIZE_, + GRID_Z_SIZE_, + PILLAR_X_SIZE_, + PILLAR_Y_SIZE_, + PILLAR_Z_SIZE_, + MIN_X_RANGE_, + MIN_Y_RANGE_, + MIN_Z_RANGE_)); + } + + anchor_mask_cuda_ptr_.reset(new AnchorMaskCuda( + NUM_INDS_FOR_SCAN_, + NUM_ANCHOR_X_INDS_, + NUM_ANCHOR_Y_INDS_, + NUM_ANCHOR_R_INDS_, + MIN_X_RANGE_, + MIN_Y_RANGE_, + PILLAR_X_SIZE_, + PILLAR_Y_SIZE_, + GRID_X_SIZE_, + GRID_Y_SIZE_ + )); + + scatter_cuda_ptr_.reset(new ScatterCuda( + NUM_THREADS_, + MAX_NUM_PILLARS_, + GRID_X_SIZE_, + GRID_Y_SIZE_ + )); + + const float FLOAT_MIN = std::numeric_limits::lowest(); + const float FLOAT_MAX = std::numeric_limits::max(); + postprocess_cuda_ptr_.reset(new PostprocessCuda( + FLOAT_MIN, + FLOAT_MAX, + NUM_ANCHOR_X_INDS_, + NUM_ANCHOR_Y_INDS_, + NUM_ANCHOR_R_INDS_, + score_threshold_, + NUM_THREADS_, + nms_overlap_threshold_ + )); + + deviceMemoryMalloc(); + initTRT(); + initAnchors(); +} + +PointPillars::~PointPillars() +{ + + delete[] anchors_px_; + delete[] anchors_py_; + delete[] anchors_pz_; + delete[] anchors_dx_; + delete[] anchors_dy_; + delete[] anchors_dz_; + delete[] anchors_ro_; + delete[] box_anchors_min_x_; + delete[] box_anchors_min_y_; + delete[] box_anchors_max_x_; + delete[] box_anchors_max_y_; + + GPU_CHECK(cudaFree(dev_pillar_x_in_coors_)); + GPU_CHECK(cudaFree(dev_pillar_y_in_coors_)); + GPU_CHECK(cudaFree(dev_pillar_z_in_coors_)); + GPU_CHECK(cudaFree(dev_pillar_i_in_coors_)); + GPU_CHECK(cudaFree(dev_pillar_count_histo_)); + + GPU_CHECK(cudaFree(dev_x_coors_)); + GPU_CHECK(cudaFree(dev_y_coors_)); + GPU_CHECK(cudaFree(dev_num_points_per_pillar_)); + GPU_CHECK(cudaFree(dev_sparse_pillar_map_)); + + GPU_CHECK(cudaFree(dev_pillar_x_)); + GPU_CHECK(cudaFree(dev_pillar_y_)); + GPU_CHECK(cudaFree(dev_pillar_z_)); + GPU_CHECK(cudaFree(dev_pillar_i_)); + + GPU_CHECK(cudaFree( dev_x_coors_for_sub_shaped_ )); + GPU_CHECK(cudaFree( dev_y_coors_for_sub_shaped_ )); + GPU_CHECK(cudaFree( dev_pillar_feature_mask_ )); + + GPU_CHECK(cudaFree(dev_cumsum_along_x_)); + GPU_CHECK(cudaFree(dev_cumsum_along_y_)); + + GPU_CHECK(cudaFree(dev_box_anchors_min_x_)); + GPU_CHECK(cudaFree(dev_box_anchors_min_y_)); + GPU_CHECK(cudaFree(dev_box_anchors_max_x_)); + GPU_CHECK(cudaFree(dev_box_anchors_max_y_)); + GPU_CHECK(cudaFree(dev_anchor_mask_)); + + GPU_CHECK(cudaFree(pfe_buffers_[0])); + GPU_CHECK(cudaFree(pfe_buffers_[1])); + GPU_CHECK(cudaFree(pfe_buffers_[2])); + GPU_CHECK(cudaFree(pfe_buffers_[3])); + GPU_CHECK(cudaFree(pfe_buffers_[4])); + GPU_CHECK(cudaFree(pfe_buffers_[5])); + GPU_CHECK(cudaFree(pfe_buffers_[6])); + GPU_CHECK(cudaFree(pfe_buffers_[7])); + GPU_CHECK(cudaFree(pfe_buffers_[8])); + + + GPU_CHECK(cudaFree(rpn_buffers_[0])); + GPU_CHECK(cudaFree(rpn_buffers_[1])); + GPU_CHECK(cudaFree(rpn_buffers_[2])); + GPU_CHECK(cudaFree(rpn_buffers_[3])); + + GPU_CHECK(cudaFree(dev_scattered_feature_)); + + GPU_CHECK(cudaFree(dev_anchors_px_)); + GPU_CHECK(cudaFree(dev_anchors_py_)); + GPU_CHECK(cudaFree(dev_anchors_pz_)); + GPU_CHECK(cudaFree(dev_anchors_dx_)); + GPU_CHECK(cudaFree(dev_anchors_dy_)); + GPU_CHECK(cudaFree(dev_anchors_dz_)); + GPU_CHECK(cudaFree(dev_anchors_ro_)); + GPU_CHECK(cudaFree(dev_filtered_box_)); + GPU_CHECK(cudaFree(dev_filtered_score_)); + GPU_CHECK(cudaFree(dev_filtered_dir_)); + GPU_CHECK(cudaFree(dev_box_for_nms_)); + GPU_CHECK(cudaFree(dev_filter_count_)); + + + + pfe_context_->destroy(); + rpn_context_->destroy(); + + pfe_runtime_->destroy(); + rpn_runtime_->destroy(); + pfe_engine_->destroy(); + rpn_engine_->destroy(); +} + +void PointPillars::deviceMemoryMalloc() +{ + GPU_CHECK(cudaMalloc((void**)&dev_pillar_x_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_y_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_z_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_i_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_count_histo_, GRID_Y_SIZE_*GRID_X_SIZE_*sizeof(int))); + + GPU_CHECK(cudaMalloc((void**)&dev_x_coors_, MAX_NUM_PILLARS_* sizeof(int))); + GPU_CHECK(cudaMalloc((void**)&dev_y_coors_, MAX_NUM_PILLARS_* sizeof(int))); + GPU_CHECK(cudaMalloc((void**)&dev_num_points_per_pillar_, MAX_NUM_PILLARS_*sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_sparse_pillar_map_,NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof(int))); + + + GPU_CHECK(cudaMalloc((void**)&dev_pillar_x_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_y_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_z_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_i_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); + + + GPU_CHECK(cudaMalloc((void**)&dev_x_coors_for_sub_shaped_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_y_coors_for_sub_shaped_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_feature_mask_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); + + //cumsum kernel + GPU_CHECK(cudaMalloc((void**)&dev_cumsum_along_x_, NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof(int))); + GPU_CHECK(cudaMalloc((void**)&dev_cumsum_along_y_, NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof(int))); + + + // for make anchor mask kernel + GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_min_x_, NUM_ANCHOR_*sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_min_y_, NUM_ANCHOR_*sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_max_x_, NUM_ANCHOR_*sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_max_y_, NUM_ANCHOR_*sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_anchor_mask_, NUM_ANCHOR_*sizeof(int))); + + + // for trt inference + // create GPU buffers and a stream + GPU_CHECK(cudaMalloc(&pfe_buffers_[0], MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMalloc(&pfe_buffers_[1], MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMalloc(&pfe_buffers_[2], MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMalloc(&pfe_buffers_[3], MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMalloc(&pfe_buffers_[4], MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMalloc(&pfe_buffers_[5], MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMalloc(&pfe_buffers_[6], MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMalloc(&pfe_buffers_[7], MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMalloc(&pfe_buffers_[8], PFE_OUTPUT_SIZE_ * sizeof(float))); + + GPU_CHECK(cudaMalloc(&rpn_buffers_[0], RPN_INPUT_SIZE_ * sizeof(float))); + GPU_CHECK(cudaMalloc(&rpn_buffers_[1], RPN_BOX_OUTPUT_SIZE_ * sizeof(float))); + GPU_CHECK(cudaMalloc(&rpn_buffers_[2], RPN_CLS_OUTPUT_SIZE_ * sizeof(float))); + GPU_CHECK(cudaMalloc(&rpn_buffers_[3], RPN_DIR_OUTPUT_SIZE_ * sizeof(float))); + + // for scatter kernel + GPU_CHECK(cudaMalloc((void**)&dev_scattered_feature_, 64*GRID_Y_SIZE_*GRID_X_SIZE_*sizeof(float) ) ); + + //for filter + GPU_CHECK(cudaMalloc((void**)&dev_anchors_px_, NUM_ANCHOR_*sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_anchors_py_, NUM_ANCHOR_*sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_anchors_pz_, NUM_ANCHOR_*sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_anchors_dx_, NUM_ANCHOR_*sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_anchors_dy_, NUM_ANCHOR_*sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_anchors_dz_, NUM_ANCHOR_*sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_anchors_ro_, NUM_ANCHOR_*sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_filtered_box_, NUM_ANCHOR_*7*sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_filtered_score_, NUM_ANCHOR_*sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_filtered_dir_, NUM_ANCHOR_*sizeof(int))); + GPU_CHECK(cudaMalloc((void**)&dev_box_for_nms_, NUM_ANCHOR_*4*sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_filter_count_, sizeof(int))); +} + +void PointPillars::initAnchors() +{ + // zero clear + for(size_t i = 0; i < NUM_ANCHOR_; i++) + { + anchors_px_[i] = 0; + anchors_py_[i] = 0; + anchors_pz_[i] = 0; + anchors_dx_[i] = 0; + anchors_dy_[i] = 0; + anchors_dz_[i] = 0; + anchors_ro_[i] = 0; + box_anchors_min_x_[i] = 0; + box_anchors_min_y_[i] = 0; + box_anchors_max_x_[i] = 0; + box_anchors_max_y_[i] = 0; + } + + float x_stride = PILLAR_X_SIZE_*2; + float y_stride = PILLAR_Y_SIZE_*2; + float x_offset = MIN_X_RANGE_ + PILLAR_X_SIZE_; + float y_offset = MIN_Y_RANGE_ + PILLAR_Y_SIZE_; + + float anchor_x_count[NUM_ANCHOR_X_INDS_] = {0}; + for(size_t i = 0; i < NUM_ANCHOR_X_INDS_; i++) + { + anchor_x_count[i] = float(i)*x_stride + x_offset; + } + float anchor_y_count[NUM_ANCHOR_Y_INDS_] = {0}; + for(size_t i = 0; i < NUM_ANCHOR_Y_INDS_; i++) + { + anchor_y_count[i] = float(i)*y_stride + y_offset; + } + + float anchor_r_count[NUM_ANCHOR_R_INDS_] = {0, M_PI/2}; + + // np.meshgrid + for(size_t y= 0; y < NUM_ANCHOR_Y_INDS_; y++) + { + for(size_t x = 0; x < NUM_ANCHOR_X_INDS_; x++) + { + for(size_t r=0 ; r < NUM_ANCHOR_R_INDS_; r++) + { + int ind = y*NUM_ANCHOR_X_INDS_*NUM_ANCHOR_R_INDS_ + x*NUM_ANCHOR_R_INDS_ + r; + anchors_px_[ind] = anchor_x_count[x]; + anchors_py_[ind] = anchor_y_count[y]; + anchors_ro_[ind] = anchor_r_count[r]; + anchors_pz_[ind] = -1*SENSOR_HEIGHT_; + anchors_dx_[ind] = ANCHOR_DX_SIZE_; + anchors_dy_[ind] = ANCHOR_DY_SIZE_; + anchors_dz_[ind] = ANCHOR_DZ_SIZE_; + } + } + } + convertAnchors2BoxAnchors(anchors_px_, anchors_py_, anchors_dx_, anchors_dy_); + + putAnchorsInDeviceMemory(); +} + +void PointPillars::putAnchorsInDeviceMemory() +{ + GPU_CHECK(cudaMemcpy(dev_box_anchors_min_x_, box_anchors_min_x_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice ) ); + GPU_CHECK(cudaMemcpy(dev_box_anchors_min_y_, box_anchors_min_y_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice ) ); + GPU_CHECK(cudaMemcpy(dev_box_anchors_max_x_, box_anchors_max_x_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice ) ); + GPU_CHECK(cudaMemcpy(dev_box_anchors_max_y_, box_anchors_max_y_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice ) ); + + GPU_CHECK(cudaMemcpy(dev_anchors_px_, anchors_px_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_anchors_py_, anchors_py_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_anchors_pz_, anchors_pz_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_anchors_dx_, anchors_dx_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_anchors_dy_, anchors_dy_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_anchors_dz_, anchors_dz_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_anchors_ro_, anchors_ro_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice)); + +} + +void PointPillars::convertAnchors2BoxAnchors(float* anchors_px, float* anchors_py, + float* anchors_dx, float* anchors_dy) +{ + // flip box's dimension when the at the third axis == 1 + float flipped_anchors_dx[NUM_ANCHOR_] = {0}; + float flipped_anchors_dy[NUM_ANCHOR_] = {0}; + for(size_t x = 0; x < NUM_ANCHOR_X_INDS_; x++) + { + for(size_t y= 0; y < NUM_ANCHOR_Y_INDS_; y++) + { + int base_ind = x*NUM_ANCHOR_Y_INDS_*NUM_ANCHOR_R_INDS_ + y*NUM_ANCHOR_R_INDS_; + flipped_anchors_dx[base_ind + 0] = ANCHOR_DX_SIZE_; + flipped_anchors_dy[base_ind + 0] = ANCHOR_DY_SIZE_; + flipped_anchors_dx[base_ind + 1] = ANCHOR_DY_SIZE_; + flipped_anchors_dy[base_ind + 1] = ANCHOR_DX_SIZE_; + } + } + for(size_t x = 0; x < NUM_ANCHOR_X_INDS_; x++) + { + for(size_t y= 0; y < NUM_ANCHOR_Y_INDS_; y++) + { + for(size_t r=0 ; r < NUM_ANCHOR_R_INDS_; r++) + { + int ind = x*NUM_ANCHOR_Y_INDS_*NUM_ANCHOR_R_INDS_ + y*NUM_ANCHOR_R_INDS_ + r; + box_anchors_min_x_[ind] = anchors_px[ind] - flipped_anchors_dx[ind]/2; + box_anchors_min_y_[ind] = anchors_py[ind] - flipped_anchors_dy[ind]/2; + box_anchors_max_x_[ind] = anchors_px[ind] + flipped_anchors_dx[ind]/2; + box_anchors_max_y_[ind] = anchors_py[ind] + flipped_anchors_dy[ind]/2; + } + } + } +} + +void PointPillars::initTRT() +{ + // create a TensorRT model from the onnx model and serialize it to a stream + nvinfer1::IHostMemory* pfe_trt_model_stream{nullptr}; + nvinfer1::IHostMemory* rpn_trt_model_stream{nullptr}; + onnxToTRTModel(pfe_onnx_file_, pfe_trt_model_stream); + onnxToTRTModel(rpn_onnx_file_, rpn_trt_model_stream); + if(pfe_trt_model_stream == nullptr || rpn_trt_model_stream == nullptr) + { + ROS_ERROR("Failed to load %s or %s ", pfe_onnx_file_.c_str(), rpn_onnx_file_.c_str()); + } + + // deserialize the engine + pfe_runtime_ = nvinfer1::createInferRuntime(g_logger_); + rpn_runtime_ = nvinfer1::createInferRuntime(g_logger_); + if(pfe_runtime_ == nullptr || rpn_runtime_ == nullptr) + { + ROS_ERROR("Failed to create TensorRT Runtime object."); + } + pfe_engine_ = pfe_runtime_->deserializeCudaEngine(pfe_trt_model_stream->data(), pfe_trt_model_stream->size(), nullptr); + rpn_engine_ = rpn_runtime_->deserializeCudaEngine(rpn_trt_model_stream->data(), rpn_trt_model_stream->size(), nullptr); + if(pfe_engine_ == nullptr || rpn_engine_ == nullptr) + { + ROS_ERROR("Failed to create TensorRT Engine."); + } + pfe_trt_model_stream->destroy(); + rpn_trt_model_stream->destroy(); + pfe_context_ = pfe_engine_->createExecutionContext(); + rpn_context_ = rpn_engine_->createExecutionContext(); + if(pfe_context_ == nullptr || rpn_context_ == nullptr) + { + ROS_ERROR("Failed to create TensorRT Execution Context."); + } +} + +void PointPillars::onnxToTRTModel(const std::string& model_file, // name of the onnx model + nvinfer1::IHostMemory*& trt_model_stream) // output buffer for the TensorRT model +{ + int verbosity = (int) nvinfer1::ILogger::Severity::kWARNING; + + // create the builder + nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(g_logger_); + nvinfer1::INetworkDefinition* network = builder->createNetwork(); + + auto parser = nvonnxparser::createParser(*network, g_logger_); + + if (!parser->parseFromFile(model_file.c_str(), verbosity)) + { + std::string msg("failed to parse onnx file"); + g_logger_.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str()); + exit(EXIT_FAILURE); + } + + // Build the engine + builder->setMaxBatchSize(BATCH_SIZE_); + builder->setMaxWorkspaceSize(1 << 20); + + nvinfer1::ICudaEngine* engine = builder->buildCudaEngine(*network); + + parser->destroy(); + + // serialize the engine, then close everything down + trt_model_stream = engine->serialize(); + engine->destroy(); + network->destroy(); + builder->destroy(); +} + +void PointPillars::preprocessCPU(const float* in_points_array, const int in_num_points) +{ + int x_coors[MAX_NUM_PILLARS_]= {0}; + int y_coors[MAX_NUM_PILLARS_]= {0}; + float num_points_per_pillar[MAX_NUM_PILLARS_] = {0}; + float * pillar_x = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_]; + float * pillar_y = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_]; + float * pillar_z = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_]; + float * pillar_i = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_]; + + float * x_coors_for_sub_shaped = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_]; + float * y_coors_for_sub_shaped = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_]; + float * pillar_feature_mask = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_]; + + float* sparse_pillar_map = new float[NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_]; + + preprocess_points_ptr_->preprocess(in_points_array, in_num_points, + x_coors, y_coors, num_points_per_pillar, + pillar_x, pillar_y, pillar_z, pillar_i, + x_coors_for_sub_shaped, y_coors_for_sub_shaped, + pillar_feature_mask, sparse_pillar_map, host_pillar_count_); + + GPU_CHECK( cudaMemset(dev_x_coors_, 0, MAX_NUM_PILLARS_ * sizeof( int ) ) ); + GPU_CHECK( cudaMemset(dev_y_coors_, 0, MAX_NUM_PILLARS_ * sizeof( int ) ) ); + GPU_CHECK( cudaMemset(dev_pillar_x_,0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK( cudaMemset(dev_pillar_y_,0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK( cudaMemset(dev_pillar_z_,0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK( cudaMemset(dev_pillar_i_,0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK( cudaMemset(dev_x_coors_for_sub_shaped_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ *sizeof( float ) ) ); + GPU_CHECK( cudaMemset(dev_y_coors_for_sub_shaped_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ *sizeof( float ) ) ); + GPU_CHECK( cudaMemset(dev_num_points_per_pillar_, 0, MAX_NUM_PILLARS_ * sizeof( float ) ) ); + GPU_CHECK( cudaMemset(dev_sparse_pillar_map_, 0, NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof( int ) ) ); + + GPU_CHECK(cudaMemcpy(dev_x_coors_, x_coors, MAX_NUM_PILLARS_*sizeof(int), cudaMemcpyHostToDevice )); + GPU_CHECK(cudaMemcpy(dev_y_coors_, y_coors, MAX_NUM_PILLARS_*sizeof(int), cudaMemcpyHostToDevice )); + GPU_CHECK(cudaMemcpy(dev_pillar_x_, pillar_x, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice )); + GPU_CHECK(cudaMemcpy(dev_pillar_y_, pillar_y, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice )); + GPU_CHECK(cudaMemcpy(dev_pillar_z_, pillar_z, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice )); + GPU_CHECK(cudaMemcpy(dev_pillar_i_, pillar_i, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice )); + GPU_CHECK(cudaMemcpy(dev_x_coors_for_sub_shaped_, x_coors_for_sub_shaped,MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice )); + GPU_CHECK(cudaMemcpy(dev_y_coors_for_sub_shaped_, y_coors_for_sub_shaped,MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice )); + GPU_CHECK(cudaMemcpy(dev_num_points_per_pillar_,num_points_per_pillar,MAX_NUM_PILLARS_*sizeof(float), cudaMemcpyHostToDevice )); + GPU_CHECK(cudaMemcpy(dev_pillar_feature_mask_, pillar_feature_mask, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice )); + GPU_CHECK(cudaMemcpy(dev_sparse_pillar_map_, sparse_pillar_map, NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof(float), cudaMemcpyHostToDevice )); + + delete[] pillar_x; + delete[] pillar_y; + delete[] pillar_z; + delete[] pillar_i; + delete[] x_coors_for_sub_shaped; + delete[] y_coors_for_sub_shaped; + delete[] pillar_feature_mask; + delete[] sparse_pillar_map; +} + +void PointPillars::preprocessGPU(const float* in_points_array, const int in_num_points) +{ + float * dev_points; + GPU_CHECK(cudaMalloc((void**)&dev_points, in_num_points*4*sizeof(float))); + GPU_CHECK(cudaMemcpy(dev_points, in_points_array, in_num_points*4*sizeof(float), cudaMemcpyHostToDevice ) ); + + + GPU_CHECK(cudaMemset(dev_pillar_count_histo_, 0, GRID_Y_SIZE_*GRID_X_SIZE_*sizeof(int))); + GPU_CHECK(cudaMemset(dev_sparse_pillar_map_, 0, NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof( int ) ) ); + GPU_CHECK(cudaMemset(dev_pillar_x_, 0, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMemset(dev_pillar_y_, 0, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMemset(dev_pillar_z_, 0, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMemset(dev_pillar_i_, 0, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMemset(dev_x_coors_, 0, MAX_NUM_PILLARS_ * sizeof( int ) ) ); + GPU_CHECK(cudaMemset(dev_y_coors_, 0, MAX_NUM_PILLARS_ * sizeof( int ) ) ); + GPU_CHECK(cudaMemset(dev_num_points_per_pillar_, 0, MAX_NUM_PILLARS_ * sizeof( float ) ) ); + GPU_CHECK(cudaMemset(dev_anchor_mask_, 0, NUM_ANCHOR_*sizeof(int))); + + preprocess_points_cuda_ptr_->doPreprocessPointsCuda(dev_points, in_num_points, dev_x_coors_, dev_y_coors_, + dev_num_points_per_pillar_, dev_pillar_x_, dev_pillar_y_, dev_pillar_z_, dev_pillar_i_, + dev_x_coors_for_sub_shaped_, dev_y_coors_for_sub_shaped_, + dev_pillar_feature_mask_, dev_sparse_pillar_map_, host_pillar_count_); + + GPU_CHECK(cudaFree(dev_points)); +} + +void PointPillars::preprocess(const float* in_points_array, const int in_num_points) +{ + if(reproduce_result_mode_) + { + preprocessCPU(in_points_array, in_num_points); + } + else + { + preprocessGPU(in_points_array, in_num_points); + } +} + +void PointPillars::doInference(const float* in_points_array, const int in_num_points, + std::vector& out_detection, int& out_num_objects) +{ + preprocess(in_points_array, in_num_points); + + anchor_mask_cuda_ptr_->doAnchorMaskCuda(dev_sparse_pillar_map_, dev_cumsum_along_x_, dev_cumsum_along_y_, dev_box_anchors_min_x_, + dev_box_anchors_min_y_, dev_box_anchors_max_x_, dev_box_anchors_max_y_, dev_anchor_mask_); + + + cudaStream_t stream; + GPU_CHECK(cudaStreamCreate(&stream)); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[0], dev_pillar_x_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[1], dev_pillar_y_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[2], dev_pillar_z_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[3], dev_pillar_i_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[4], dev_num_points_per_pillar_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[5], dev_x_coors_for_sub_shaped_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[6], dev_y_coors_for_sub_shaped_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[7], dev_pillar_feature_mask_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); + pfe_context_->enqueue(BATCH_SIZE_, pfe_buffers_, stream, nullptr); + + + + GPU_CHECK( cudaMemset( dev_scattered_feature_, 0, RPN_INPUT_SIZE_*sizeof(float) ) ); + scatter_cuda_ptr_->doScatterCuda(host_pillar_count_[0], dev_x_coors_, dev_y_coors_, + (float *)pfe_buffers_[8], dev_scattered_feature_); + + + GPU_CHECK(cudaMemcpyAsync(rpn_buffers_[0], dev_scattered_feature_, BATCH_SIZE_ * RPN_INPUT_SIZE_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); + rpn_context_->enqueue(BATCH_SIZE_, rpn_buffers_, stream, nullptr); + + + GPU_CHECK(cudaMemset( dev_filter_count_, 0, sizeof( int ))); + postprocess_cuda_ptr_->doPostprocessCuda((float *)rpn_buffers_[1], (float *)rpn_buffers_[2],(float *)rpn_buffers_[3] , dev_anchor_mask_, + dev_anchors_px_, dev_anchors_py_, dev_anchors_pz_, + dev_anchors_dx_, dev_anchors_dy_, dev_anchors_dz_, dev_anchors_ro_, + dev_filtered_box_, dev_filtered_score_, dev_filtered_dir_, dev_box_for_nms_, + dev_filter_count_, out_detection, out_num_objects); + + // release the stream and the buffers + cudaStreamDestroy(stream); +} diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp new file mode 100755 index 00000000000..a7287b3f60e --- /dev/null +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp @@ -0,0 +1,181 @@ +/* + * Copyright 2018-2019 Autoware Foundation. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//headers in STL +#include +#include + +//headers in PCL +#include +#include +#include + +//headers in ROS +#include + +//headers in local files +#include "autoware_msgs/DetectedObjectArray.h" +#include "point_pillars_ros.h" + +PointPillarsROS::PointPillarsROS(): +private_nh_("~"), +has_subscribed_baselink_(false), +NUM_POINT_FEATURE_(4), +OUTPUT_NUM_BOX_FEATURE_(7), +TRAINED_SENSOR_HEIGHT_(1.73), +NORMALIZING_INTENSITY_VALUE_(255.0), +BASELINK_FRAME_("base_link") +{ + private_nh_.param("baselink_support", baselink_support_, true); + private_nh_.param("reproduce_result_mode", reproduce_result_mode_, false); + private_nh_.param("score_threshold", score_threshold_, 0.5); + private_nh_.param("nms_overlap_threshold", nms_overlap_threshold_, 0.5); + private_nh_.param("pfe_onnx_file", pfe_onnx_file_, ""); + private_nh_.param("rpn_onnx_file", rpn_onnx_file_, ""); + + point_pillars_ptr_.reset(new PointPillars( + reproduce_result_mode_, + score_threshold_, + nms_overlap_threshold_, + pfe_onnx_file_, + rpn_onnx_file_)); +} + +void PointPillarsROS::createROSPubSub() +{ + sub_points_ = nh_.subscribe("/points_raw", 1, &PointPillarsROS::pointsCallback, this); + pub_objects_ = nh_.advertise("/detection/lidar_detector/objects", 1); +} + + +geometry_msgs::Pose PointPillarsROS::getTransformedPose(const geometry_msgs::Pose& in_pose, + const tf::Transform& tf) +{ + tf::Transform transform; + geometry_msgs::PoseStamped out_pose; + transform.setOrigin(tf::Vector3(in_pose.position.x, in_pose.position.y, in_pose.position.z)); + transform.setRotation(tf::Quaternion(in_pose.orientation.x, in_pose.orientation.y, in_pose.orientation.z, in_pose.orientation.w)); + geometry_msgs::PoseStamped pose_out; + tf::poseTFToMsg(tf*transform, out_pose.pose); + return out_pose.pose; +} + +void PointPillarsROS::pubDetectedObject(const std::vector& detections, const int num_objects, const std_msgs::Header& pc_header) +{ + autoware_msgs::DetectedObjectArray objects; + objects.header = pc_header; + for (size_t i = 0; i < num_objects; i++) + { + autoware_msgs::DetectedObject object; + object.header = pc_header; + object.valid = true; + object.pose_reliable = true; + + object.pose.position.x = detections[i*OUTPUT_NUM_BOX_FEATURE_+0]; + object.pose.position.y = detections[i*OUTPUT_NUM_BOX_FEATURE_+1]; + object.pose.position.z = detections[i*OUTPUT_NUM_BOX_FEATURE_+2]; + + + float yaw = detections[i*OUTPUT_NUM_BOX_FEATURE_+6]; + yaw = std::atan2(std::sin(yaw), std::cos(yaw)); + geometry_msgs::Quaternion q = tf::createQuaternionMsgFromYaw(-yaw); + object.pose.orientation = q; + + if(baselink_support_) + { + object.pose = getTransformedPose(object.pose, angle_transform_inversed_); + } + + object.dimensions.x = detections[i*OUTPUT_NUM_BOX_FEATURE_+3]; + object.dimensions.y = detections[i*OUTPUT_NUM_BOX_FEATURE_+4]; + object.dimensions.z = detections[i*OUTPUT_NUM_BOX_FEATURE_+5]; + + objects.objects.push_back(object); + } + pub_objects_.publish(objects); +} + +void PointPillarsROS::getBaselinkToLidarTF(const std::string& target_frameid) +{ + try + { + tf_listener_.waitForTransform(BASELINK_FRAME_, target_frameid, ros::Time(0), ros::Duration(1.0)); + tf_listener_.lookupTransform( BASELINK_FRAME_, target_frameid, ros::Time(0), baselink2lidar_); + getTFInfo(baselink2lidar_); + has_subscribed_baselink_ = true; + } + catch (tf::TransformException ex) + { + ROS_ERROR("%s", ex.what()); + } +} + +void PointPillarsROS::getTFInfo(tf::StampedTransform baselink2lidar) +{ + tf::Vector3 v = baselink2lidar.getOrigin(); + offset_z_from_trained_data_ = v.getZ() - TRAINED_SENSOR_HEIGHT_; + + tf::Quaternion q = baselink2lidar_.getRotation(); + angle_transform_ = tf::Transform(q); + angle_transform_inversed_ = angle_transform_.inverse(); + +} + +void PointPillarsROS::pclToArray(const pcl::PointCloud::Ptr & in_pcl_pc_ptr, float* out_points_array, const float offset_z) +{ + for(size_t i = 0; i < in_pcl_pc_ptr->size(); i++) + { + pcl::PointXYZI point = in_pcl_pc_ptr->at(i); + out_points_array[i*NUM_POINT_FEATURE_ + 0] = point.x; + out_points_array[i*NUM_POINT_FEATURE_ + 1] = point.y; + out_points_array[i*NUM_POINT_FEATURE_ + 2] = point.z + offset_z; + out_points_array[i*NUM_POINT_FEATURE_ + 3] = float(point.intensity/NORMALIZING_INTENSITY_VALUE_); + } +} + + +void PointPillarsROS::pointsCallback(const sensor_msgs::PointCloud2::ConstPtr& msg) +{ + + pcl::PointCloud::Ptr pcl_pc_ptr(new pcl::PointCloud); + pcl::fromROSMsg(*msg, *pcl_pc_ptr); + + if(baselink_support_) + { + if(!has_subscribed_baselink_) + { + getBaselinkToLidarTF(msg->header.frame_id); + } + pcl_ros::transformPointCloud(*pcl_pc_ptr, *pcl_pc_ptr, angle_transform_); + } + + float * points_array = new float[pcl_pc_ptr->size()*NUM_POINT_FEATURE_]; + if(baselink_support_ && has_subscribed_baselink_) + { + pclToArray(pcl_pc_ptr, points_array, offset_z_from_trained_data_); + } + else + { + pclToArray(pcl_pc_ptr, points_array); + } + + int out_num_objects = 0; + std::vector out_detection; + point_pillars_ptr_->doInference(points_array, pcl_pc_ptr->size(), out_detection, out_num_objects); + + delete[] points_array; + pubDetectedObject(out_detection, out_num_objects, msg->header); +} diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu new file mode 100644 index 00000000000..48862d2256e --- /dev/null +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu @@ -0,0 +1,219 @@ +/* + * Copyright 2018-2019 Autoware Foundation. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + +//headers in CUDA +#include + +//headers in local files +#include "postprocess_cuda.h" + +__global__ void filter_kernel(const float* box_preds, const float* cls_preds, const float* dir_preds, const int* anchor_mask, + const float* dev_anchors_px, const float* dev_anchors_py, const float* dev_anchors_pz, + const float* dev_anchors_dx, const float* dev_anchors_dy, const float* dev_anchors_dz, const float* dev_anchors_ro, + float* filtered_box, float* filtered_score, int* filtered_dir, float* box_for_nms, int* filter_count, + const float FLOAT_MIN, const float FLOAT_MAX, const float score_threshold) +{ + // boxes ([N, 7] Tensor): normal boxes: x, y, z, w, l, h, r + int tid = threadIdx.x + blockIdx.x * blockDim.x; + //sigmoid funciton + float score = 1/(1+expf(-cls_preds[tid])); + if(anchor_mask[tid] == 1 && score > score_threshold) + { + int counter = atomicAdd(filter_count, 1); + float za = dev_anchors_pz[tid] + dev_anchors_dz[tid]/2; + + //decode network output + float diagonal = sqrtf(dev_anchors_dx[tid]*dev_anchors_dx[tid] + dev_anchors_dy[tid]*dev_anchors_dy[tid]); + float box_px = box_preds[tid*7 + 0] * diagonal + dev_anchors_px[tid]; + float box_py = box_preds[tid*7 + 1] * diagonal + dev_anchors_py[tid]; + float box_pz = box_preds[tid*7 + 2] * dev_anchors_dz[tid] + za; + float box_dx = expf(box_preds[tid*7 + 3]) * dev_anchors_dx[tid]; + float box_dy = expf(box_preds[tid*7 + 4]) * dev_anchors_dy[tid]; + float box_dz = expf(box_preds[tid*7 + 5]) * dev_anchors_dz[tid]; + float box_ro = box_preds[tid*7 + 6] + dev_anchors_ro[tid]; + + box_pz = box_pz - box_dz/2; + + filtered_box[counter*7 + 0] = box_px; + filtered_box[counter*7 + 1] = box_py; + filtered_box[counter*7 + 2] = box_pz; + filtered_box[counter*7 + 3] = box_dx; + filtered_box[counter*7 + 4] = box_dy; + filtered_box[counter*7 + 5] = box_dz; + filtered_box[counter*7 + 6] = box_ro; + filtered_score[counter] = score; + + int direction_label; + if(dir_preds[tid*2 + 0] < dir_preds[tid*2 + 1]) + { + direction_label = 1; + } + else + { + direction_label = 0; + } + filtered_dir[counter] = direction_label; + + //convrt normal box(normal boxes: x, y, z, w, l, h, r) to box(xmin, ymin, xmax, ymax) for nms calculation + //First: dx, dy -> box(x0y0, x0y1, x1y0, x1y1) + float corners[2*4] = {float(-0.5*box_dx), float(-0.5*box_dy), + float(-0.5*box_dx), float( 0.5*box_dy), + float( 0.5*box_dx), float( 0.5*box_dy), + float( 0.5*box_dx), float(-0.5*box_dy)}; + + //Second: Rotate, Offset and convert to point(xmin. ymin, xmax, ymax) + float rotated_corners[2*4]; + float offset_corners[2*4]; + float sin_yaw = sinf(box_ro); + float cos_yaw = cosf(box_ro); + float xmin = FLOAT_MAX; + float ymin = FLOAT_MAX; + float xmax = FLOAT_MIN; + float ymax = FLOAT_MIN; + for(size_t i = 0; i < 4; i++) + { + rotated_corners[i*2 + 0] = cos_yaw*corners[i*2 + 0] - sin_yaw*corners[i*2 + 1]; + rotated_corners[i*2 + 1] = sin_yaw*corners[i*2 + 0] + cos_yaw*corners[i*2 + 1]; + + offset_corners[i*2 + 0] = rotated_corners[i*2 + 0] + box_px; + offset_corners[i*2 + 1] = rotated_corners[i*2 + 1] + box_py; + + xmin = fminf(xmin, offset_corners[i*2 + 0]); + ymin = fminf(ymin, offset_corners[i*2 + 1]); + xmax = fmaxf(xmin, offset_corners[i*2 + 0]); + ymax = fmaxf(ymax, offset_corners[i*2 + 1]); + } + // box_for_nms(num_box, 4) + box_for_nms[counter*4 + 0] = xmin; + box_for_nms[counter*4 + 1] = ymin; + box_for_nms[counter*4 + 2] = xmax; + box_for_nms[counter*4 + 3] = ymax; + + } +} + +__global__ void sort_boxes_by_indexes_kernel(float* filtered_box, int* filtered_dir, float* box_for_nms, int* indexes, int filter_count, + float* sorted_filtered_boxes, int* sorted_filtered_dir, float* sorted_box_for_nms) +{ + int tid = threadIdx.x + blockIdx.x * blockDim.x; + if(tid < filter_count) + { + int sort_index = indexes[tid]; + sorted_filtered_boxes[tid*7 + 0] = filtered_box[sort_index*7 + 0]; + sorted_filtered_boxes[tid*7 + 1] = filtered_box[sort_index*7 + 1]; + sorted_filtered_boxes[tid*7 + 2] = filtered_box[sort_index*7 + 2]; + sorted_filtered_boxes[tid*7 + 3] = filtered_box[sort_index*7 + 3]; + sorted_filtered_boxes[tid*7 + 4] = filtered_box[sort_index*7 + 4]; + sorted_filtered_boxes[tid*7 + 5] = filtered_box[sort_index*7 + 5]; + sorted_filtered_boxes[tid*7 + 6] = filtered_box[sort_index*7 + 6]; + + sorted_filtered_dir[tid] = filtered_dir[sort_index]; + + + sorted_box_for_nms[tid*4 + 0] = box_for_nms[sort_index*4 + 0]; + sorted_box_for_nms[tid*4 + 1] = box_for_nms[sort_index*4 + 1]; + sorted_box_for_nms[tid*4 + 2] = box_for_nms[sort_index*4 + 2]; + sorted_box_for_nms[tid*4 + 3] = box_for_nms[sort_index*4 + 3]; + } +} + +PostprocessCuda::PostprocessCuda(const float FLOAT_MIN, const float FLOAT_MAX, + const int NUM_ANCHOR_X_INDS, const int NUM_ANCHOR_Y_INDS, const int NUM_ANCHOR_R_INDS, + const float score_threshold, const int NUM_THREADS, const float nms_overlap_threshold): +FLOAT_MIN_(FLOAT_MIN), +FLOAT_MAX_(FLOAT_MAX), +NUM_ANCHOR_X_INDS_(NUM_ANCHOR_X_INDS), +NUM_ANCHOR_Y_INDS_(NUM_ANCHOR_Y_INDS), +NUM_ANCHOR_R_INDS_(NUM_ANCHOR_R_INDS), +score_threshold_(score_threshold), +NUM_THREADS_(NUM_THREADS), +nms_overlap_threshold_(nms_overlap_threshold) +{ + nms_cuda_ptr_.reset(new NMSCuda( + NUM_THREADS, + nms_overlap_threshold)); +} + +void PostprocessCuda::doPostprocessCuda(const float* rpn_box_output, const float* rpn_cls_output, const float* rpn_dir_output, + int* dev_anchor_mask, const float* dev_anchors_px, const float* dev_anchors_py, const float* dev_anchors_pz, + const float* dev_anchors_dx, const float* dev_anchors_dy, const float* dev_anchors_dz, const float* dev_anchors_ro, + float* dev_filtered_box, float* dev_filtered_score, int* dev_filtered_dir, float* dev_box_for_nms, int* dev_filter_count, + std::vector& out_detection, int& out_num_objects) +{ + + filter_kernel<<>> + (rpn_box_output, rpn_cls_output, rpn_dir_output, dev_anchor_mask, + dev_anchors_px, dev_anchors_py, dev_anchors_pz, + dev_anchors_dx, dev_anchors_dy, dev_anchors_dz, dev_anchors_ro, + dev_filtered_box, dev_filtered_score, dev_filtered_dir, dev_box_for_nms, dev_filter_count, + FLOAT_MIN_, FLOAT_MAX_, score_threshold_); + + int host_filter_count[1]; + GPU_CHECK( cudaMemcpy(host_filter_count, dev_filter_count, sizeof(int), cudaMemcpyDeviceToHost ) ); + if(host_filter_count[0] == 0) + { + return; + } + + int* dev_indexes; + float* dev_sorted_filtered_box, *dev_sorted_box_for_nms; + int* dev_sorted_filtered_dir; + GPU_CHECK(cudaMalloc((void**)&dev_indexes, host_filter_count[0]*sizeof(int))); + GPU_CHECK(cudaMalloc((void**)&dev_sorted_filtered_box, 7*host_filter_count[0]*sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_sorted_filtered_dir, host_filter_count[0]*sizeof(int))); + GPU_CHECK(cudaMalloc((void**)&dev_sorted_box_for_nms, 4*host_filter_count[0]*sizeof(float))); + thrust::sequence(thrust::device, dev_indexes, dev_indexes + host_filter_count[0]); + thrust::sort_by_key(thrust::device, dev_filtered_score, dev_filtered_score + size_t(host_filter_count[0]), dev_indexes, thrust::greater()); + + + const int num_blocks = DIVUP(host_filter_count[0], NUM_THREADS_); + sort_boxes_by_indexes_kernel<<>>(dev_filtered_box, dev_filtered_dir, dev_box_for_nms, dev_indexes, host_filter_count[0], + dev_sorted_filtered_box, dev_sorted_filtered_dir, dev_sorted_box_for_nms); + + int keep_inds[host_filter_count[0]] = {0}; + nms_cuda_ptr_->doNMSCuda(host_filter_count[0], dev_sorted_box_for_nms, keep_inds, out_num_objects); + + + float host_filtered_box[host_filter_count[0]*7]; + int host_filtered_dir[host_filter_count[0]]; + GPU_CHECK( cudaMemcpy(host_filtered_box, dev_sorted_filtered_box, 7*host_filter_count[0] *sizeof(float), cudaMemcpyDeviceToHost ) ); + GPU_CHECK( cudaMemcpy(host_filtered_dir, dev_sorted_filtered_dir, host_filter_count[0] *sizeof(int), cudaMemcpyDeviceToHost ) ); + for (size_t i = 0; i < out_num_objects; i++) + { + out_detection.push_back(host_filtered_box[keep_inds[i]*7+0]); + out_detection.push_back(host_filtered_box[keep_inds[i]*7+1]); + out_detection.push_back(host_filtered_box[keep_inds[i]*7+2]); + out_detection.push_back(host_filtered_box[keep_inds[i]*7+3]); + out_detection.push_back(host_filtered_box[keep_inds[i]*7+4]); + out_detection.push_back(host_filtered_box[keep_inds[i]*7+5]); + + if(host_filtered_dir[keep_inds[i]] == 0) + { + out_detection.push_back(host_filtered_box[keep_inds[i]*7+6] + M_PI); + } + else + { + out_detection.push_back(host_filtered_box[keep_inds[i]*7+6]); + } + } + + GPU_CHECK(cudaFree(dev_indexes)); + GPU_CHECK(cudaFree(dev_sorted_filtered_box)); + GPU_CHECK(cudaFree(dev_sorted_filtered_dir)); + GPU_CHECK(cudaFree(dev_sorted_box_for_nms)); +} diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp new file mode 100644 index 00000000000..9ea42f9190d --- /dev/null +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp @@ -0,0 +1,165 @@ +/* + * Copyright 2018-2019 Autoware Foundation. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//headers in STL +#include +#include + +//headers in local files +#include "preprocess_points.h" + +PreprocessPoints::PreprocessPoints(const int MAX_NUM_PILLARS, + const int MAX_POINTS_PER_PILLAR, + const int GRID_X_SIZE, + const int GRID_Y_SIZE, + const int GRID_Z_SIZE, + const float PILLAR_X_SIZE, + const float PILLAR_Y_SIZE, + const float PILLAR_Z_SIZE, + const float MIN_X_RANGE, + const float MIN_Y_RANGE, + const float MIN_Z_RANGE, + const int NUM_INDS_FOR_SCAN) +: +MAX_NUM_PILLARS_(MAX_NUM_PILLARS), +MAX_NUM_POINTS_PER_PILLAR_(MAX_POINTS_PER_PILLAR), +GRID_X_SIZE_(GRID_X_SIZE), +GRID_Y_SIZE_(GRID_Y_SIZE), +GRID_Z_SIZE_(GRID_Z_SIZE), +PILLAR_X_SIZE_(PILLAR_X_SIZE), +PILLAR_Y_SIZE_(PILLAR_Y_SIZE), +PILLAR_Z_SIZE_(PILLAR_Z_SIZE), +MIN_X_RANGE_(MIN_X_RANGE), +MIN_Y_RANGE_(MIN_Y_RANGE), +MIN_Z_RANGE_(MIN_Z_RANGE), +NUM_INDS_FOR_SCAN_(NUM_INDS_FOR_SCAN) +{ +} + +void PreprocessPoints::initializeVariables(int* coor_to_pillaridx, float* sparse_pillar_map, + float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i, + float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped) +{ + for (int i = 0; i < GRID_Y_SIZE_; i++) + { + for (int j = 0; j < GRID_X_SIZE_; j++) + { + coor_to_pillaridx[i*GRID_X_SIZE_ + j] = -1; + } + } + + for (int i = 0; i < NUM_INDS_FOR_SCAN_; i++) + { + for (int j = 0; j < NUM_INDS_FOR_SCAN_; j++) + { + sparse_pillar_map[i*NUM_INDS_FOR_SCAN_+j] = 0; + } + } + + for(int i = 0; i < MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_; i++) + { + pillar_x[i] = 0; + pillar_y[i] = 0; + pillar_z[i] = 0; + pillar_i[i] = 0; + x_coors_for_sub_shaped[i] = 0; + y_coors_for_sub_shaped[i] = 0; + } +} + +void PreprocessPoints::preprocess(const float* in_points_array, int in_num_points, + int* x_coors, int* y_coors, float* num_points_per_pillar, + float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i, + float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped, + float* pillar_feature_mask, float* sparse_pillar_map, int* host_pillar_count) +{ + int pillar_count = 0; + float x_coors_for_sub[MAX_NUM_PILLARS_]= {0}; + float y_coors_for_sub[MAX_NUM_PILLARS_]= {0}; + //init variables + int coor_to_pillaridx[GRID_Y_SIZE_*GRID_X_SIZE_]; + initializeVariables(coor_to_pillaridx, sparse_pillar_map, + pillar_x, pillar_y, pillar_z, pillar_i, + x_coors_for_sub_shaped, y_coors_for_sub_shaped); + for(int i = 0; i < in_num_points; i++) + { + int x_coor = std::floor((in_points_array[i*4 + 0] - MIN_X_RANGE_)/PILLAR_X_SIZE_); + int y_coor = std::floor((in_points_array[i*4 + 1] - MIN_Y_RANGE_)/PILLAR_Y_SIZE_); + int z_coor = std::floor((in_points_array[i*4 + 2] - MIN_Z_RANGE_)/PILLAR_Z_SIZE_); + if(x_coor < 0 || x_coor >= GRID_X_SIZE_ || + y_coor < 0 || y_coor >= GRID_Y_SIZE_ || + z_coor < 0 || z_coor >= GRID_Z_SIZE_) + { + continue; + } + //reverse index + int pillar_index = coor_to_pillaridx[y_coor*GRID_X_SIZE_ + x_coor]; + if(pillar_index == -1) + { + pillar_index = pillar_count; + if(pillar_count >= MAX_NUM_PILLARS_) + { + break; + } + pillar_count += 1; + coor_to_pillaridx[y_coor*GRID_X_SIZE_ + x_coor] = pillar_index; + + //reverse index + y_coors[pillar_index] = std::floor(y_coor); + x_coors[pillar_index] = std::floor(x_coor); + + // float y_offset = PILLAR_Y_SIZE_/ 2 + MIN_Y_RANGE_; + // float x_offset = PILLAR_X_SIZE_/ 2 + MIN_X_RANGE_; + // TODO Need to be modified after proper trining code + y_coors_for_sub[pillar_index] = std::floor(y_coor)* PILLAR_Y_SIZE_ + -39.9; + x_coors_for_sub[pillar_index] = std::floor(x_coor)* PILLAR_X_SIZE_ + 0.1; + + //sparse pillar map + sparse_pillar_map[y_coor*512 + x_coor] = 1; + } + int num = num_points_per_pillar[pillar_index]; + if (num < MAX_NUM_POINTS_PER_PILLAR_) + { + pillar_x[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*4 + 0]; + pillar_y[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*4 + 1]; + pillar_z[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*4 + 2]; + pillar_i[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*4 + 3]; + num_points_per_pillar[pillar_index] += 1; + } + } + + for (int i = 0; i < MAX_NUM_PILLARS_; i++) + { + float x = x_coors_for_sub[i]; + float y = y_coors_for_sub[i]; + int num_points_for_a_pillar = num_points_per_pillar[i]; + for(int j = 0; j < MAX_NUM_POINTS_PER_PILLAR_; j++) + { + x_coors_for_sub_shaped[i*MAX_NUM_POINTS_PER_PILLAR_ + j] = x; + y_coors_for_sub_shaped[i*MAX_NUM_POINTS_PER_PILLAR_ + j] = y; + if(j < num_points_for_a_pillar) + { + pillar_feature_mask[i*MAX_NUM_POINTS_PER_PILLAR_ + j] = 1.0; + } + else + { + pillar_feature_mask[i*MAX_NUM_POINTS_PER_PILLAR_ + j] = 0.0; + } + } + } + host_pillar_count[0] = pillar_count; + +} diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu new file mode 100644 index 00000000000..0ee9bba397e --- /dev/null +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu @@ -0,0 +1,313 @@ +/* + * Copyright 2018-2019 Autoware Foundation. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//headers in STL +#include + +//headers in local files +#include "common.h" +#include "preprocess_points_cuda.h" + +__global__ void make_pillar_histo_kernel( + const float* dev_points, + float* dev_pillar_x_in_coors, + float* dev_pillar_y_in_coors, + float* dev_pillar_z_in_coors, + float* dev_pillar_i_in_coors, + int* pillar_count_histo, + const int num_points, + const int max_points_per_pillar, + const int GRID_X_SIZE, + const int GRID_Y_SIZE, + const int GRID_Z_SIZE, + const float MIN_X_RANGE, + const float MIN_Y_RANGE, + const float MIN_Z_RANGE, + const float PILLAR_X_SIZE, + const float PILLAR_Y_SIZE, + const float PILLAR_Z_SIZE + ) +{ + int th_i = threadIdx.x + blockIdx.x * blockDim.x; + if(th_i >= num_points) + { + return; + } + int y_coor = floor((dev_points[th_i*4 + 1] - MIN_Y_RANGE)/PILLAR_Y_SIZE); + int x_coor = floor((dev_points[th_i*4 + 0] - MIN_X_RANGE)/PILLAR_X_SIZE); + int z_coor = floor((dev_points[th_i*4 + 2] - MIN_Z_RANGE)/PILLAR_Z_SIZE); + + if(x_coor >= 0 && x_coor < GRID_X_SIZE && + y_coor >= 0 && y_coor < GRID_Y_SIZE && + z_coor >= 0 && z_coor < GRID_Z_SIZE) + { + int count = atomicAdd(&pillar_count_histo[y_coor*GRID_X_SIZE + x_coor], 1); + if(count < max_points_per_pillar) + { + int ind = y_coor*GRID_X_SIZE*max_points_per_pillar + x_coor*max_points_per_pillar + count; + dev_pillar_x_in_coors[ind] = dev_points[th_i*4 + 0]; + dev_pillar_y_in_coors[ind] = dev_points[th_i*4 + 1]; + dev_pillar_z_in_coors[ind] = dev_points[th_i*4 + 2]; + dev_pillar_i_in_coors[ind] = dev_points[th_i*4 + 3]; + } + } +} + + +__global__ void make_pillar_index_kernel( + int* dev_pillar_count_histo, + int* dev_counter, + int* dev_pillar_count, + int* dev_x_coors, + int* dev_y_coors, + float* dev_x_coors_for_sub, + float* dev_y_coors_for_sub, + float* dev_num_points_per_pillar, + int* dev_sparse_pillar_map, + const int max_pillars, + const int max_points_per_pillar, + const int GRID_X_SIZE, + const float PILLAR_X_SIZE, + const float PILLAR_Y_SIZE, + const int NUM_INDS_FOR_SCAN) +{ + int x = blockIdx.x; + int y = threadIdx.x; + int num_points_at_this_pillar = dev_pillar_count_histo[y*GRID_X_SIZE + x]; + if(num_points_at_this_pillar == 0) + { + return; + } + + int count = atomicAdd(dev_counter, 1); + if(count < max_pillars) + { + atomicAdd(dev_pillar_count, 1); + if(num_points_at_this_pillar >= max_points_per_pillar) + { + dev_num_points_per_pillar[count] = max_points_per_pillar; + } + else + { + dev_num_points_per_pillar[count] = num_points_at_this_pillar; + } + dev_x_coors[count] = x; + dev_y_coors[count] = y; + + //TODO Need to be modified after making properly training model + // x_offset = self.vx / 2 + pc_range[0] + // y_offset = self.vy / 2 + pc_range[1] + // x_sub = coors_x.unsqueeze(1) * 0.16 + x_offset + // y_sub = coors_y.unsqueeze(1) * 0.16 + y_offset + + dev_x_coors_for_sub[count] = x* PILLAR_X_SIZE + 0.1; + dev_y_coors_for_sub[count] = y* PILLAR_Y_SIZE + -39.9; + dev_sparse_pillar_map[y*NUM_INDS_FOR_SCAN + x] = 1; + } +} + + +__global__ void make_pillar_feature_kernel( + float* dev_pillar_x_in_coors, + float* dev_pillar_y_in_coors, + float* dev_pillar_z_in_coors, + float* dev_pillar_i_in_coors, + float* dev_pillar_x, + float* dev_pillar_y, + float* dev_pillar_z, + float* dev_pillar_i, + int* dev_x_coors, + int* dev_y_coors, + float* dev_num_points_per_pillar, + const int max_points, + const int GRID_X_SIZE) +{ + int ith_pillar = blockIdx.x; + int num_points_at_this_pillar = dev_num_points_per_pillar[ith_pillar]; + int ith_point = threadIdx.x; + if(ith_point >= num_points_at_this_pillar) + { + return; + } + int x_ind = dev_x_coors[ith_pillar]; + int y_ind = dev_y_coors[ith_pillar]; + int pillar_ind = ith_pillar*max_points + ith_point; + int coors_ind = y_ind*GRID_X_SIZE*max_points + x_ind*max_points + ith_point; + dev_pillar_x[pillar_ind] = dev_pillar_x_in_coors[coors_ind]; + dev_pillar_y[pillar_ind] = dev_pillar_y_in_coors[coors_ind]; + dev_pillar_z[pillar_ind] = dev_pillar_z_in_coors[coors_ind]; + dev_pillar_i[pillar_ind] = dev_pillar_i_in_coors[coors_ind]; +} + +__global__ void make_extra_network_input_kernel(float* dev_x_coors_for_sub, + float* dev_y_coors_for_sub, + float* dev_num_points_per_pillar, + float* dev_x_coors_for_sub_shaped, + float* dev_y_coors_for_sub_shaped, + float* dev_pillar_feature_mask, + const int MAX_NUM_POINTS_PER_PILLAR) +{ + int ith_pillar = blockIdx.x; + int ith_point = threadIdx.x; + float x = dev_x_coors_for_sub[ith_pillar]; + float y = dev_y_coors_for_sub[ith_pillar]; + int num_points_for_a_pillar = dev_num_points_per_pillar[ith_pillar]; + int ind = ith_pillar*MAX_NUM_POINTS_PER_PILLAR + ith_point; + dev_x_coors_for_sub_shaped[ind] = x; + dev_y_coors_for_sub_shaped[ind] = y; + + if(ith_point < num_points_for_a_pillar) + { + dev_pillar_feature_mask[ind] = 1.0; + } + else + { + dev_pillar_feature_mask[ind] = 0.0; + } +} + + + + +PreprocessPointsCuda::PreprocessPointsCuda(const int NUM_THREADS, + const int MAX_NUM_PILLARS, + const int MAX_POINTS_PER_PILLAR, + const int NUM_INDS_FOR_SCAN, + const int GRID_X_SIZE, + const int GRID_Y_SIZE, + const int GRID_Z_SIZE, + const float PILLAR_X_SIZE, + const float PILLAR_Y_SIZE, + const float PILLAR_Z_SIZE, + const float MIN_X_RANGE, + const float MIN_Y_RANGE, + const float MIN_Z_RANGE) +: +NUM_THREADS_(NUM_THREADS), +MAX_NUM_PILLARS_(MAX_NUM_PILLARS), +MAX_NUM_POINTS_PER_PILLAR_(MAX_POINTS_PER_PILLAR), +NUM_INDS_FOR_SCAN_(NUM_INDS_FOR_SCAN), +GRID_X_SIZE_(GRID_X_SIZE), +GRID_Y_SIZE_(GRID_Y_SIZE), +GRID_Z_SIZE_(GRID_Z_SIZE), +PILLAR_X_SIZE_(PILLAR_X_SIZE), +PILLAR_Y_SIZE_(PILLAR_Y_SIZE), +PILLAR_Z_SIZE_(PILLAR_Z_SIZE), +MIN_X_RANGE_(MIN_X_RANGE), +MIN_Y_RANGE_(MIN_Y_RANGE), +MIN_Z_RANGE_(MIN_Z_RANGE) +{ + GPU_CHECK(cudaMalloc((void**)&dev_pillar_x_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_y_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_z_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_i_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_count_histo_, GRID_Y_SIZE_*GRID_X_SIZE_*sizeof(int))); + + GPU_CHECK(cudaMalloc((void**)&dev_counter_, sizeof(int))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_count_, sizeof(int))); + GPU_CHECK(cudaMalloc((void**)&dev_x_coors_for_sub_, MAX_NUM_PILLARS_* sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_y_coors_for_sub_, MAX_NUM_PILLARS_* sizeof(float))); +} + +PreprocessPointsCuda::~PreprocessPointsCuda() +{ + GPU_CHECK(cudaFree(dev_pillar_x_in_coors_)); + GPU_CHECK(cudaFree(dev_pillar_y_in_coors_)); + GPU_CHECK(cudaFree(dev_pillar_z_in_coors_)); + GPU_CHECK(cudaFree(dev_pillar_i_in_coors_)); + GPU_CHECK(cudaFree(dev_pillar_count_histo_)); + + GPU_CHECK(cudaFree(dev_counter_)); + GPU_CHECK(cudaFree(dev_pillar_count_)); + GPU_CHECK(cudaFree(dev_x_coors_for_sub_)); + GPU_CHECK(cudaFree(dev_y_coors_for_sub_)); + +} + +void PreprocessPointsCuda::doPreprocessPointsCuda(const float* dev_points, const int in_num_points, int* dev_x_coors, int* dev_y_coors, + float* dev_num_points_per_pillar, float* dev_pillar_x, float* dev_pillar_y, float* dev_pillar_z, float* dev_pillar_i, + float* dev_x_coors_for_sub_shaped, float* dev_y_coors_for_sub_shaped, + float* dev_pillar_feature_mask, int* dev_sparse_pillar_map, int* host_pillar_count) +{ + + GPU_CHECK(cudaMemset(dev_pillar_count_histo_, 0, GRID_Y_SIZE_*GRID_X_SIZE_*sizeof(int))); + GPU_CHECK(cudaMemset(dev_counter_, 0, sizeof(int))); + GPU_CHECK(cudaMemset(dev_pillar_count_, 0, sizeof(int))); + + int num_block = DIVUP(in_num_points, NUM_THREADS_); + make_pillar_histo_kernel<<>>( + dev_points, + dev_pillar_x_in_coors_, + dev_pillar_y_in_coors_, + dev_pillar_z_in_coors_, + dev_pillar_i_in_coors_, + dev_pillar_count_histo_, + in_num_points, + MAX_NUM_POINTS_PER_PILLAR_, + GRID_X_SIZE_, + GRID_Y_SIZE_, + GRID_Z_SIZE_, + MIN_X_RANGE_, + MIN_Y_RANGE_, + MIN_Z_RANGE_, + PILLAR_X_SIZE_, + PILLAR_Y_SIZE_, + PILLAR_Z_SIZE_); + + + make_pillar_index_kernel<<>>( + dev_pillar_count_histo_, + dev_counter_, + dev_pillar_count_, + dev_x_coors, + dev_y_coors, + dev_x_coors_for_sub_, + dev_y_coors_for_sub_, + dev_num_points_per_pillar, + dev_sparse_pillar_map, + MAX_NUM_PILLARS_, + MAX_NUM_POINTS_PER_PILLAR_, + GRID_X_SIZE_, + PILLAR_X_SIZE_, + PILLAR_Y_SIZE_, + NUM_INDS_FOR_SCAN_); + + GPU_CHECK(cudaMemcpy(host_pillar_count, dev_pillar_count_, sizeof(int), cudaMemcpyDeviceToHost ) ); + make_pillar_feature_kernel<<>>( + dev_pillar_x_in_coors_, + dev_pillar_y_in_coors_, + dev_pillar_z_in_coors_, + dev_pillar_i_in_coors_, + dev_pillar_x, + dev_pillar_y, + dev_pillar_z, + dev_pillar_i, + dev_x_coors, + dev_y_coors, + dev_num_points_per_pillar, + MAX_NUM_POINTS_PER_PILLAR_, + GRID_X_SIZE_); + + make_extra_network_input_kernel<<>>( + dev_x_coors_for_sub_, + dev_y_coors_for_sub_, + dev_num_points_per_pillar, + dev_x_coors_for_sub_shaped, + dev_y_coors_for_sub_shaped, + dev_pillar_feature_mask, + MAX_NUM_POINTS_PER_PILLAR_); + +} diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/scatter_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/scatter_cuda.cu new file mode 100644 index 00000000000..6933f1da93e --- /dev/null +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/scatter_cuda.cu @@ -0,0 +1,43 @@ +/* + * Copyright 2018-2019 Autoware Foundation. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//headers in local files +#include "scatter_cuda.h" + +__global__ void scatter_kernel( int *x_coors, int *y_coors, float *pfe_output, float *scattered_feature, + const int MAX_NUM_PILLARS_, const int GRID_X_SIZE, const int GRID_Y_SIZE) +{ + int i_pillar = blockIdx.x; + int i_feature = threadIdx.x; + int x_ind = x_coors[i_pillar]; + int y_ind = y_coors[i_pillar]; + float feature = pfe_output[i_feature*MAX_NUM_PILLARS_ + i_pillar]; + scattered_feature[i_feature*GRID_Y_SIZE*GRID_X_SIZE + y_ind * GRID_X_SIZE + x_ind] = feature; +} + +ScatterCuda::ScatterCuda(const int NUM_THREADS, const int MAX_NUM_PILLARS, const int GRID_X_SIZE, const int GRID_Y_SIZE): +NUM_THREADS_(NUM_THREADS), +MAX_NUM_PILLARS_(MAX_NUM_PILLARS), +GRID_X_SIZE_(GRID_X_SIZE), +GRID_Y_SIZE_(GRID_Y_SIZE) +{ +} + +void ScatterCuda::doScatterCuda(const int pillar_count, int *x_coors, int *y_coors, float *pfe_output, float *scattered_feature) +{ + scatter_kernel<<>>(x_coors, y_coors, pfe_output, scattered_feature, + MAX_NUM_PILLARS_, GRID_X_SIZE_, GRID_Y_SIZE_); +} diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml new file mode 100755 index 00000000000..2c79d3dbb70 --- /dev/null +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml @@ -0,0 +1,24 @@ + + + lidar_point_pillars + 1.9.1 + lidar_point_pillars + + Kosuke Murakami + BSD + + catkin + + roscpp + pcl_ros + cuda-toolkit-10-0 + autoware_msgs + + + roscpp + pcl_ros + cuda-toolkit-10-0 + autoware_msgs + + + diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp new file mode 100644 index 00000000000..0b209a92471 --- /dev/null +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp @@ -0,0 +1,229 @@ +/* + * Copyright 2018-2019 Autoware Foundation. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /** + * @file test_point_pillars.cpp + * @brief unit test file + * @author Kosuke Murakami + * @date 2019/02/26 + */ + +#include +#include + +#include + +#include +#include + +#include "point_pillars_ros.h" +#include "preprocess_points.h" + +class TestSuite : public ::testing::Test +{ +public: + TestSuite() + { + } + ~TestSuite() + { + } +}; + +class TestClass +{ +public: + TestClass(const int MAX_NUM_PILLARS, + const int MAX_NUM_POINTS_PER_PILLAR, + const int GRID_X_SIZE, + const int GRID_Y_SIZE, + const int GRID_Z_SIZE, + const float PILLAR_X_SIZE, + const float PILLAR_Y_SIZE, + const float PILLAR_Z_SIZE, + const float MIN_X_RANGE, + const float MIN_Y_RANGE, + const float MIN_Z_RANGE, + const int NUM_INDS_FOR_SCAN); + const int MAX_NUM_PILLARS_; + const int MAX_NUM_POINTS_PER_PILLAR_; + const int GRID_X_SIZE_; + const int GRID_Y_SIZE_; + const int GRID_Z_SIZE_; + const float PILLAR_X_SIZE_; + const float PILLAR_Y_SIZE_; + const float PILLAR_Z_SIZE_; + const float MIN_X_RANGE_; + const float MIN_Y_RANGE_; + const float MIN_Z_RANGE_; + const int NUM_INDS_FOR_SCAN_; + void loadPoints(pcl::PointCloud::Ptr in_pcl_pc_ptr, + const std::string& in_file ); + void pclToArray(const pcl::PointCloud::Ptr & in_pcl_pc_ptr, float* out_points_array); + void preprocess(const float* in_points_array, int in_num_points, + int* x_coors, int* y_coors, float* num_points_per_pillar, + float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i, + float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped, + float* pillar_feature_mask, float* sparse_pillar_map, int* host_pillar_count); +private: + + std::unique_ptr preprocess_points_ptr_; +}; + +TestClass::TestClass( + const int MAX_NUM_PILLARS, + const int MAX_NUM_POINTS_PER_PILLAR, + const int GRID_X_SIZE, + const int GRID_Y_SIZE, + const int GRID_Z_SIZE, + const float PILLAR_X_SIZE, + const float PILLAR_Y_SIZE, + const float PILLAR_Z_SIZE, + const float MIN_X_RANGE, + const float MIN_Y_RANGE, + const float MIN_Z_RANGE, + const int NUM_INDS_FOR_SCAN): +MAX_NUM_PILLARS_(MAX_NUM_PILLARS), +MAX_NUM_POINTS_PER_PILLAR_(MAX_NUM_POINTS_PER_PILLAR), +GRID_X_SIZE_(GRID_X_SIZE), +GRID_Y_SIZE_(GRID_Y_SIZE), +GRID_Z_SIZE_(GRID_Z_SIZE), +PILLAR_X_SIZE_(PILLAR_X_SIZE), +PILLAR_Y_SIZE_(PILLAR_Y_SIZE), +PILLAR_Z_SIZE_(PILLAR_Z_SIZE), +MIN_X_RANGE_(MIN_X_RANGE), +MIN_Y_RANGE_(MIN_Y_RANGE), +MIN_Z_RANGE_(MIN_Z_RANGE), +NUM_INDS_FOR_SCAN_(NUM_INDS_FOR_SCAN) +{ + preprocess_points_ptr_.reset(new PreprocessPoints( + MAX_NUM_PILLARS_, + MAX_NUM_POINTS_PER_PILLAR_, + GRID_X_SIZE_, + GRID_Y_SIZE_, + GRID_Z_SIZE_, + PILLAR_X_SIZE_, + PILLAR_Y_SIZE_, + PILLAR_Z_SIZE_, + MIN_X_RANGE_, + MIN_Y_RANGE_, + MIN_Z_RANGE_, + NUM_INDS_FOR_SCAN_ + )); +}; + +void TestClass::preprocess(const float* in_points_array, int in_num_points, + int* x_coors, int* y_coors, float* num_points_per_pillar, + float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i, + float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped, + float* pillar_feature_mask, float* sparse_pillar_map, int* host_pillar_count) +{ + preprocess_points_ptr_->preprocess(in_points_array, in_num_points, + x_coors, y_coors, num_points_per_pillar, + pillar_x, pillar_y, pillar_z, pillar_i, + x_coors_for_sub_shaped, y_coors_for_sub_shaped, + pillar_feature_mask, sparse_pillar_map, host_pillar_count); +} + +void TestClass::pclToArray(const pcl::PointCloud::Ptr & in_pcl_pc_ptr, float* out_points_array) +{ + for(size_t i = 0; i < in_pcl_pc_ptr->size(); i++) + { + pcl::PointXYZI point = in_pcl_pc_ptr->at(i); + out_points_array[i*4 + 0] = point.x; + out_points_array[i*4 + 1] = point.y; + out_points_array[i*4 + 2] = point.z; + out_points_array[i*4 + 3] = point.intensity; + } +} + +void TestClass::loadPoints(pcl::PointCloud::Ptr in_pcl_pc_ptr, + const std::string& in_file ) +{ + if (pcl::io::loadPCDFile (in_file, *in_pcl_pc_ptr) == -1) + { + ROS_ERROR("Couldn't read test pcd file \n"); + } +} + +TEST(TestSuite, CheckPreprocessPointsCPU) +{ + TestClass test_obj( + 12000, + 100, + 432, + 496, + 1, + 0.16, + 0.16, + 4.0, + 0, + -39.68, + -3.0, + 512); + + pcl::PointCloud::Ptr pcl_pc_ptr(new pcl::PointCloud); + std::string package_path = ros::package::getPath("lidar_point_pillars"); + std::string in_file = package_path + "/test/data/1527836009720148.pcd"; + test_obj.loadPoints(pcl_pc_ptr, in_file); + + float * points_array = new float[pcl_pc_ptr->size()*4]; + test_obj.pclToArray(pcl_pc_ptr, points_array); + + + int x_coors[test_obj.MAX_NUM_PILLARS_]= {0}; + int y_coors[test_obj.MAX_NUM_PILLARS_]= {0}; + float num_points_per_pillar[test_obj.MAX_NUM_PILLARS_] = {0}; + float * pillar_x = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_]; + float * pillar_y = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_]; + float * pillar_z = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_]; + float * pillar_i = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_]; + + float * x_coors_for_sub_shaped = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_]; + float * y_coors_for_sub_shaped = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_]; + float * pillar_feature_mask = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_]; + + float* sparse_pillar_map = new float[512*512]; + + int host_pillar_count[1] = {0}; + test_obj.preprocess(points_array, pcl_pc_ptr->size(), + x_coors, y_coors, num_points_per_pillar, + pillar_x, pillar_y, pillar_z, pillar_i, + x_coors_for_sub_shaped, y_coors_for_sub_shaped, + pillar_feature_mask, sparse_pillar_map, host_pillar_count); + EXPECT_EQ(15, num_points_per_pillar[0]); + EXPECT_FLOAT_EQ(3.7517259, pillar_x[14]); + EXPECT_EQ(71, x_coors[100]); + EXPECT_EQ(177, y_coors[100]); + EXPECT_EQ(1, sparse_pillar_map[177*512 + 71]); + EXPECT_EQ(6270, host_pillar_count[0]); + delete[] points_array; + delete[] pillar_x; + delete[] pillar_y; + delete[] pillar_z; + delete[] pillar_i; + delete[] x_coors_for_sub_shaped; + delete[] y_coors_for_sub_shaped; + delete[] pillar_feature_mask; + delete[] sparse_pillar_map; +} + +int main(int argc, char** argv) +{ + testing::InitGoogleTest(&argc, argv); + ros::init(argc, argv, "TestNode"); + return RUN_ALL_TESTS(); +} diff --git a/ros/src/util/packages/runtime_manager/scripts/computing.yaml b/ros/src/util/packages/runtime_manager/scripts/computing.yaml index 61a12437e33..2aa7f0b80e9 100755 --- a/ros/src/util/packages/runtime_manager/scripts/computing.yaml +++ b/ros/src/util/packages/runtime_manager/scripts/computing.yaml @@ -193,6 +193,11 @@ subs : cmd : roslaunch lidar_apollo_cnn_seg_detect lidar_apollo_cnn_seg_detect.launch param: lidar_cnn_baidu_params + - name : lidar_point_pillars + desc : lidar_point_pillars. Requires CUDA and TensorRT + cmd : roslaunch lidar_point_pillars lidar_point_pillars.launch + param: lidar_point_pillars_params + - name : lidar_naive_l_shape_detect desc : lidar_naive_l_shape_detect desc sample cmd : roslaunch lidar_naive_l_shape_detect lidar_naive_l_shape_detect.launch @@ -5406,4 +5411,65 @@ params : v : 0 cmd_param : dash : '' - delim : ':=' \ No newline at end of file + delim : ':=' + - name : lidar_point_pillars_params + vars : + - name : input_topic + desc : PointCloud topic to be fed to the network for detection. + label : Input topic + kind : topic + topic_type : sensor_msgs/PointCloud2 + v : /points_raw + cmd_param : + dash : '' + delim : ':=' + - name : baselink_support + desc : Whether to use baselink to adujust current sensor setup. + label : Baselink support + kind : checkbox + v : True + cmd_param : + dash : '' + delim : ':=' + - name : reproduce_result_mode + desc : Whether to enable reproducible result at the cost of the runtime. + label : reproduce_result_mode + kind : checkbox + v : False + cmd_param : + dash : '' + delim : ':=' + - name : score_threshold + desc : Detections with a confidence value larger than this value will be displayed. + label : Score threshold + min : 0.0 + max : 1.0 + v : 0.5 + cmd_param : + dash : '' + delim : ':=' + - name : nms_overlap_threshold + desc : Lower IOU than this value will be filtered. + label : NMS overlap threshold + min : 0.0 + max : 1.0 + v : 0.5 + cmd_param : + dash : '' + delim : ':=' + - name : pfe_onnx_file + desc : Pillar Feature Extractor onnx file. + label : 'PFE ONNX file' + kind : path + v : '' + cmd_param: + dash : '' + delim : ':=' + - name : rpn_onnx_file + desc : Region Proposal Network onnx file. + label : 'RPN ONNX file' + kind : path + v : '' + cmd_param: + dash : '' + delim : ':=' From 6b1f65ceec127fc1c4bb8af9e7ab2ed46e1d0bce Mon Sep 17 00:00:00 2001 From: cirpue49 Date: Tue, 26 Feb 2019 17:14:30 +0900 Subject: [PATCH 02/37] add readme.md --- .../packages/lidar_point_pillars/README.md | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md new file mode 100644 index 00000000000..ffae0b6c011 --- /dev/null +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md @@ -0,0 +1,43 @@ +# Point Pillars for 3D Object Detection + +Autoware package for Point Pillars. ([reference paper](https://arxiv.org/abs/1812.05784)) + +## Pre requisites + +CUDA Toolkit v9.0 or 10.0 + +TensorRT : [How to install](https://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html#installing) + + +## How to launch + +Using launch file: +`roslaunch lidar_point_pillars lidar_point_pillars.launch pfe_onnx_file:=/PATH/TO/FILE.onnx rpn_onnx_file:=/PATH/TO/FILE.onnx input_topic:=/points_raw` + + +## Parameters + +|Parameter| Type| Description|Default| +----------|-----|--------|----| +|`input_topic`|*String*|Input topic Pointcloud. Default.|`/points_raw`| +|`baselink_support`|*Bool*|Whether to use baselink to adjust parameters. Default.|`True`| +|`reproduce_result_mode`|*Bool*|Whether to enable reproducible result mode at the cost of the runtime. Default.|`False`| +|`score_threshold`|*Float*|Minimum score required to include the result (0.1)|0.5| +|`nms_overlap_threshold`|*Float*|Minimum IOU required to have when applying NMS (0.1)|0.5| +|`pfe_onnx_file`|*String* |Path to the PFE onnx file|| +|`rpn_onnx_file`|*String* |Path to the RPN onnx file|| + +## Outputs + +|Topic|Type|Description| +|---|---|---| +|`/detection/lidar_detector/objects`|`autoware_msgs/DetectedObjetArray`|Array of Detected Objects in Autoware format| + +## Notes + +* To display the results in Rviz `objects_visualizer` is required. +(Launch file launches automatically this node). + +* Pre trained models can be downloaded from the [github repository](https://github.com/cirpue49/kitti_pretrained_pp).Notice that this model is under `BY-NC-SA 3.0` license. + +* If trained model comes from KITTI data, users might not be allowed to use the model for Commercial purpose. From a9a8cf9b9af94faba149260a9bf11b9d6a5fc049 Mon Sep 17 00:00:00 2001 From: cirpue49 Date: Tue, 26 Feb 2019 19:01:25 +0900 Subject: [PATCH 03/37] Refactor codes --- .../include/point_pillars.h | 2 + .../include/postprocess_cuda.h | 7 +- .../include/preprocess_points.h | 5 +- .../nodes/anchor_mask_cuda.cu | 2 + .../lidar_point_pillars/nodes/nms_cuda.cu | 1 + .../nodes/point_pillars.cpp | 11 +- .../nodes/postprocess_cuda.cu | 104 ++++++++++-------- .../nodes/preprocess_points.cpp | 20 ++-- 8 files changed, 90 insertions(+), 62 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h index c58fc5e58d7..b0c69147ec9 100755 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h @@ -115,6 +115,8 @@ class PointPillars const float ANCHOR_DX_SIZE_; const float ANCHOR_DY_SIZE_; const float ANCHOR_DZ_SIZE_; + const int NUM_BOX_CORNERS_; + const int NUM_OUTPUT_BOX_FEATURE_; //end initializer list diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h index 261cdd19ed1..3768c040499 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h @@ -42,6 +42,8 @@ class PostprocessCuda const float score_threshold_; const int NUM_THREADS_; const float nms_overlap_threshold_; + const int NUM_BOX_CORNERS_; + const int NUM_OUTPUT_BOX_FEATURE_; std::unique_ptr nms_cuda_ptr_; @@ -56,11 +58,14 @@ class PostprocessCuda * @param[in] score_threshold Score threshold for filtering output * @param[in] NUM_THREDS Number of threads for launching cuda kernel * @param[in] nms_overlap_threshold IOU threshold for NMS + * @param[in] NUM_BOX_CORNERS Number of box's corner + * @param[in] NUM_OUTPUT_BOX_FEATURE_ Number of output box's feature * @details Captital variables never change after the compile, non-capital variables could be chaned through rosparam */ PostprocessCuda(const float FLOAT_MIN, const float FLOAT_MAX, const int NUM_ANCHOR_X_INDS, const int NUM_ANCHOR_Y_INDS, const int NUM_ANCHOR_R_INDS, - const float score_threshold, const int NUM_THREADS, const float nms_overlap_threshold); + const float score_threshold, const int NUM_THREADS, const float nms_overlap_threshold, + const int NUM_BOX_CORNERS, const int NUM_OUTPUT_BOX_FEATURE); /** * @brief Postprocessing for the network output diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h index bd0b54720f2..426dd50e714 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h @@ -42,6 +42,7 @@ class PreprocessPoints const float MIN_Y_RANGE_; const float MIN_Z_RANGE_; const int NUM_INDS_FOR_SCAN_; + const int NUM_BOX_CORNERS_; public: @@ -59,6 +60,7 @@ class PreprocessPoints * @param[in] MIN_Y_RANGE Minimum y value for pointcloud * @param[in] MIN_Z_RANGE Minimum z value for pointcloud * @param[in] NUM_INDS_FOR_SCAN Number of indexes for scan(cumsum) + * @param[in] NUM_BOX_CORNERS Number of box's corner * @details Captital variables never change after the compile */ PreprocessPoints(const int MAX_NUM_PILLARS, @@ -72,7 +74,8 @@ class PreprocessPoints const float MIN_X_RANGE, const float MIN_Y_RANGE, const float MIN_Z_RANGE, - const int NUM_INDS_FOR_SCAN); + const int NUM_INDS_FOR_SCAN, + const int NUM_BOX_CORNERS); /** * @brief CPU preprocessing for input pointcloud diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu index 4383c1ae63c..72c7556f0c8 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu @@ -18,6 +18,7 @@ #include "common.h" #include "anchor_mask_cuda.h" +//modified prefix sum code from https://www.mimuw.edu.pl/~ps209291/kgkp/slides/scan.pdf __global__ void scan_x(int *g_odata, int *g_idata, int n) { extern __shared__ int temp[]; // allocated on invocation @@ -65,6 +66,7 @@ __global__ void scan_x(int *g_odata, int *g_idata, int n) } } +//modified prefix sum code from https://www.mimuw.edu.pl/~ps209291/kgkp/slides/scan.pdf __global__ void scan_y(int *g_odata, int *g_idata, int n) { extern __shared__ int temp[]; // allocated on invocation diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu index 7680a44ad87..d675f9e32f8 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu @@ -33,6 +33,7 @@ __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, min(n_boxes - row_start * block_threads, block_threads); const int col_size = min(n_boxes - col_start * block_threads, block_threads); + //Only can initialize with figures, not by variables __shared__ float block_boxes[64 * 4]; if (threadIdx.x < col_size) { diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp index 4035f399ffa..767008e2788 100755 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp @@ -62,7 +62,9 @@ NUM_THREADS_(64),//if you chancge NUM_THREADS_, need to modify nms_kernel's shar SENSOR_HEIGHT_(1.73), ANCHOR_DX_SIZE_(1.6), ANCHOR_DY_SIZE_(3.9), -ANCHOR_DZ_SIZE_(1.56) +ANCHOR_DZ_SIZE_(1.56), +NUM_BOX_CORNERS_(4), +NUM_OUTPUT_BOX_FEATURE_(7) { anchors_px_ = new float[NUM_ANCHOR_]; anchors_py_ = new float[NUM_ANCHOR_]; @@ -91,7 +93,8 @@ ANCHOR_DZ_SIZE_(1.56) MIN_X_RANGE_, MIN_Y_RANGE_, MIN_Z_RANGE_, - NUM_INDS_FOR_SCAN_)); + NUM_INDS_FOR_SCAN_, + NUM_BOX_CORNERS_)); } else { @@ -141,7 +144,9 @@ ANCHOR_DZ_SIZE_(1.56) NUM_ANCHOR_R_INDS_, score_threshold_, NUM_THREADS_, - nms_overlap_threshold_ + nms_overlap_threshold_, + NUM_BOX_CORNERS_, + NUM_OUTPUT_BOX_FEATURE_ )); deviceMemoryMalloc(); diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu index 48862d2256e..2b5501e3541 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu @@ -26,7 +26,8 @@ __global__ void filter_kernel(const float* box_preds, const float* cls_preds, co const float* dev_anchors_px, const float* dev_anchors_py, const float* dev_anchors_pz, const float* dev_anchors_dx, const float* dev_anchors_dy, const float* dev_anchors_dz, const float* dev_anchors_ro, float* filtered_box, float* filtered_score, int* filtered_dir, float* box_for_nms, int* filter_count, - const float FLOAT_MIN, const float FLOAT_MAX, const float score_threshold) + const float FLOAT_MIN, const float FLOAT_MAX, const float score_threshold, + const int NUM_BOX_CORNERS, const int NUM_OUTPUT_BOX_FEATURE) { // boxes ([N, 7] Tensor): normal boxes: x, y, z, w, l, h, r int tid = threadIdx.x + blockIdx.x * blockDim.x; @@ -39,23 +40,23 @@ __global__ void filter_kernel(const float* box_preds, const float* cls_preds, co //decode network output float diagonal = sqrtf(dev_anchors_dx[tid]*dev_anchors_dx[tid] + dev_anchors_dy[tid]*dev_anchors_dy[tid]); - float box_px = box_preds[tid*7 + 0] * diagonal + dev_anchors_px[tid]; - float box_py = box_preds[tid*7 + 1] * diagonal + dev_anchors_py[tid]; - float box_pz = box_preds[tid*7 + 2] * dev_anchors_dz[tid] + za; - float box_dx = expf(box_preds[tid*7 + 3]) * dev_anchors_dx[tid]; - float box_dy = expf(box_preds[tid*7 + 4]) * dev_anchors_dy[tid]; - float box_dz = expf(box_preds[tid*7 + 5]) * dev_anchors_dz[tid]; - float box_ro = box_preds[tid*7 + 6] + dev_anchors_ro[tid]; + float box_px = box_preds[tid*NUM_OUTPUT_BOX_FEATURE + 0] * diagonal + dev_anchors_px[tid]; + float box_py = box_preds[tid*NUM_OUTPUT_BOX_FEATURE + 1] * diagonal + dev_anchors_py[tid]; + float box_pz = box_preds[tid*NUM_OUTPUT_BOX_FEATURE + 2] * dev_anchors_dz[tid] + za; + float box_dx = expf(box_preds[tid*NUM_OUTPUT_BOX_FEATURE + 3]) * dev_anchors_dx[tid]; + float box_dy = expf(box_preds[tid*NUM_OUTPUT_BOX_FEATURE + 4]) * dev_anchors_dy[tid]; + float box_dz = expf(box_preds[tid*NUM_OUTPUT_BOX_FEATURE + 5]) * dev_anchors_dz[tid]; + float box_ro = box_preds[tid*NUM_OUTPUT_BOX_FEATURE + 6] + dev_anchors_ro[tid]; box_pz = box_pz - box_dz/2; - filtered_box[counter*7 + 0] = box_px; - filtered_box[counter*7 + 1] = box_py; - filtered_box[counter*7 + 2] = box_pz; - filtered_box[counter*7 + 3] = box_dx; - filtered_box[counter*7 + 4] = box_dy; - filtered_box[counter*7 + 5] = box_dz; - filtered_box[counter*7 + 6] = box_ro; + filtered_box[counter*NUM_OUTPUT_BOX_FEATURE + 0] = box_px; + filtered_box[counter*NUM_OUTPUT_BOX_FEATURE + 1] = box_py; + filtered_box[counter*NUM_OUTPUT_BOX_FEATURE + 2] = box_pz; + filtered_box[counter*NUM_OUTPUT_BOX_FEATURE + 3] = box_dx; + filtered_box[counter*NUM_OUTPUT_BOX_FEATURE + 4] = box_dy; + filtered_box[counter*NUM_OUTPUT_BOX_FEATURE + 5] = box_dz; + filtered_box[counter*NUM_OUTPUT_BOX_FEATURE + 6] = box_ro; filtered_score[counter] = score; int direction_label; @@ -77,6 +78,7 @@ __global__ void filter_kernel(const float* box_preds, const float* cls_preds, co float( 0.5*box_dx), float(-0.5*box_dy)}; //Second: Rotate, Offset and convert to point(xmin. ymin, xmax, ymax) + //cannot use variable initialization since "error: expression must have a constant value" float rotated_corners[2*4]; float offset_corners[2*4]; float sin_yaw = sinf(box_ro); @@ -85,7 +87,7 @@ __global__ void filter_kernel(const float* box_preds, const float* cls_preds, co float ymin = FLOAT_MAX; float xmax = FLOAT_MIN; float ymax = FLOAT_MIN; - for(size_t i = 0; i < 4; i++) + for(size_t i = 0; i < NUM_BOX_CORNERS; i++) { rotated_corners[i*2 + 0] = cos_yaw*corners[i*2 + 0] - sin_yaw*corners[i*2 + 1]; rotated_corners[i*2 + 1] = sin_yaw*corners[i*2 + 0] + cos_yaw*corners[i*2 + 1]; @@ -99,42 +101,44 @@ __global__ void filter_kernel(const float* box_preds, const float* cls_preds, co ymax = fmaxf(ymax, offset_corners[i*2 + 1]); } // box_for_nms(num_box, 4) - box_for_nms[counter*4 + 0] = xmin; - box_for_nms[counter*4 + 1] = ymin; - box_for_nms[counter*4 + 2] = xmax; - box_for_nms[counter*4 + 3] = ymax; + box_for_nms[counter*NUM_BOX_CORNERS + 0] = xmin; + box_for_nms[counter*NUM_BOX_CORNERS + 1] = ymin; + box_for_nms[counter*NUM_BOX_CORNERS + 2] = xmax; + box_for_nms[counter*NUM_BOX_CORNERS + 3] = ymax; } } __global__ void sort_boxes_by_indexes_kernel(float* filtered_box, int* filtered_dir, float* box_for_nms, int* indexes, int filter_count, - float* sorted_filtered_boxes, int* sorted_filtered_dir, float* sorted_box_for_nms) + float* sorted_filtered_boxes, int* sorted_filtered_dir, float* sorted_box_for_nms, + const int NUM_BOX_CORNERS, const int NUM_OUTPUT_BOX_FEATURE) { int tid = threadIdx.x + blockIdx.x * blockDim.x; if(tid < filter_count) { int sort_index = indexes[tid]; - sorted_filtered_boxes[tid*7 + 0] = filtered_box[sort_index*7 + 0]; - sorted_filtered_boxes[tid*7 + 1] = filtered_box[sort_index*7 + 1]; - sorted_filtered_boxes[tid*7 + 2] = filtered_box[sort_index*7 + 2]; - sorted_filtered_boxes[tid*7 + 3] = filtered_box[sort_index*7 + 3]; - sorted_filtered_boxes[tid*7 + 4] = filtered_box[sort_index*7 + 4]; - sorted_filtered_boxes[tid*7 + 5] = filtered_box[sort_index*7 + 5]; - sorted_filtered_boxes[tid*7 + 6] = filtered_box[sort_index*7 + 6]; + sorted_filtered_boxes[tid*NUM_OUTPUT_BOX_FEATURE + 0] = filtered_box[sort_index*NUM_OUTPUT_BOX_FEATURE + 0]; + sorted_filtered_boxes[tid*NUM_OUTPUT_BOX_FEATURE + 1] = filtered_box[sort_index*NUM_OUTPUT_BOX_FEATURE + 1]; + sorted_filtered_boxes[tid*NUM_OUTPUT_BOX_FEATURE + 2] = filtered_box[sort_index*NUM_OUTPUT_BOX_FEATURE + 2]; + sorted_filtered_boxes[tid*NUM_OUTPUT_BOX_FEATURE + 3] = filtered_box[sort_index*NUM_OUTPUT_BOX_FEATURE + 3]; + sorted_filtered_boxes[tid*NUM_OUTPUT_BOX_FEATURE + 4] = filtered_box[sort_index*NUM_OUTPUT_BOX_FEATURE + 4]; + sorted_filtered_boxes[tid*NUM_OUTPUT_BOX_FEATURE + 5] = filtered_box[sort_index*NUM_OUTPUT_BOX_FEATURE + 5]; + sorted_filtered_boxes[tid*NUM_OUTPUT_BOX_FEATURE + 6] = filtered_box[sort_index*NUM_OUTPUT_BOX_FEATURE + 6]; sorted_filtered_dir[tid] = filtered_dir[sort_index]; - sorted_box_for_nms[tid*4 + 0] = box_for_nms[sort_index*4 + 0]; - sorted_box_for_nms[tid*4 + 1] = box_for_nms[sort_index*4 + 1]; - sorted_box_for_nms[tid*4 + 2] = box_for_nms[sort_index*4 + 2]; - sorted_box_for_nms[tid*4 + 3] = box_for_nms[sort_index*4 + 3]; + sorted_box_for_nms[tid*NUM_BOX_CORNERS + 0] = box_for_nms[sort_index*NUM_BOX_CORNERS + 0]; + sorted_box_for_nms[tid*NUM_BOX_CORNERS + 1] = box_for_nms[sort_index*NUM_BOX_CORNERS + 1]; + sorted_box_for_nms[tid*NUM_BOX_CORNERS + 2] = box_for_nms[sort_index*NUM_BOX_CORNERS + 2]; + sorted_box_for_nms[tid*NUM_BOX_CORNERS + 3] = box_for_nms[sort_index*NUM_BOX_CORNERS + 3]; } } PostprocessCuda::PostprocessCuda(const float FLOAT_MIN, const float FLOAT_MAX, const int NUM_ANCHOR_X_INDS, const int NUM_ANCHOR_Y_INDS, const int NUM_ANCHOR_R_INDS, - const float score_threshold, const int NUM_THREADS, const float nms_overlap_threshold): + const float score_threshold, const int NUM_THREADS, const float nms_overlap_threshold, + const int NUM_BOX_CORNERS, const int NUM_OUTPUT_BOX_FEATURE): FLOAT_MIN_(FLOAT_MIN), FLOAT_MAX_(FLOAT_MAX), NUM_ANCHOR_X_INDS_(NUM_ANCHOR_X_INDS), @@ -142,7 +146,9 @@ NUM_ANCHOR_Y_INDS_(NUM_ANCHOR_Y_INDS), NUM_ANCHOR_R_INDS_(NUM_ANCHOR_R_INDS), score_threshold_(score_threshold), NUM_THREADS_(NUM_THREADS), -nms_overlap_threshold_(nms_overlap_threshold) +nms_overlap_threshold_(nms_overlap_threshold), +NUM_BOX_CORNERS_(NUM_BOX_CORNERS), +NUM_OUTPUT_BOX_FEATURE_(NUM_OUTPUT_BOX_FEATURE) { nms_cuda_ptr_.reset(new NMSCuda( NUM_THREADS, @@ -161,7 +167,8 @@ void PostprocessCuda::doPostprocessCuda(const float* rpn_box_output, const float dev_anchors_px, dev_anchors_py, dev_anchors_pz, dev_anchors_dx, dev_anchors_dy, dev_anchors_dz, dev_anchors_ro, dev_filtered_box, dev_filtered_score, dev_filtered_dir, dev_box_for_nms, dev_filter_count, - FLOAT_MIN_, FLOAT_MAX_, score_threshold_); + FLOAT_MIN_, FLOAT_MAX_, score_threshold_, + NUM_BOX_CORNERS_, NUM_OUTPUT_BOX_FEATURE_); int host_filter_count[1]; GPU_CHECK( cudaMemcpy(host_filter_count, dev_filter_count, sizeof(int), cudaMemcpyDeviceToHost ) ); @@ -174,16 +181,17 @@ void PostprocessCuda::doPostprocessCuda(const float* rpn_box_output, const float float* dev_sorted_filtered_box, *dev_sorted_box_for_nms; int* dev_sorted_filtered_dir; GPU_CHECK(cudaMalloc((void**)&dev_indexes, host_filter_count[0]*sizeof(int))); - GPU_CHECK(cudaMalloc((void**)&dev_sorted_filtered_box, 7*host_filter_count[0]*sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_sorted_filtered_box, NUM_OUTPUT_BOX_FEATURE_*host_filter_count[0]*sizeof(float))); GPU_CHECK(cudaMalloc((void**)&dev_sorted_filtered_dir, host_filter_count[0]*sizeof(int))); - GPU_CHECK(cudaMalloc((void**)&dev_sorted_box_for_nms, 4*host_filter_count[0]*sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_sorted_box_for_nms, NUM_BOX_CORNERS_*host_filter_count[0]*sizeof(float))); thrust::sequence(thrust::device, dev_indexes, dev_indexes + host_filter_count[0]); thrust::sort_by_key(thrust::device, dev_filtered_score, dev_filtered_score + size_t(host_filter_count[0]), dev_indexes, thrust::greater()); const int num_blocks = DIVUP(host_filter_count[0], NUM_THREADS_); sort_boxes_by_indexes_kernel<<>>(dev_filtered_box, dev_filtered_dir, dev_box_for_nms, dev_indexes, host_filter_count[0], - dev_sorted_filtered_box, dev_sorted_filtered_dir, dev_sorted_box_for_nms); + dev_sorted_filtered_box, dev_sorted_filtered_dir, dev_sorted_box_for_nms, + NUM_BOX_CORNERS_, NUM_OUTPUT_BOX_FEATURE_); int keep_inds[host_filter_count[0]] = {0}; nms_cuda_ptr_->doNMSCuda(host_filter_count[0], dev_sorted_box_for_nms, keep_inds, out_num_objects); @@ -191,24 +199,24 @@ void PostprocessCuda::doPostprocessCuda(const float* rpn_box_output, const float float host_filtered_box[host_filter_count[0]*7]; int host_filtered_dir[host_filter_count[0]]; - GPU_CHECK( cudaMemcpy(host_filtered_box, dev_sorted_filtered_box, 7*host_filter_count[0] *sizeof(float), cudaMemcpyDeviceToHost ) ); - GPU_CHECK( cudaMemcpy(host_filtered_dir, dev_sorted_filtered_dir, host_filter_count[0] *sizeof(int), cudaMemcpyDeviceToHost ) ); + GPU_CHECK( cudaMemcpy(host_filtered_box, dev_sorted_filtered_box, NUM_OUTPUT_BOX_FEATURE_*host_filter_count[0] *sizeof(float), cudaMemcpyDeviceToHost ) ); + GPU_CHECK( cudaMemcpy(host_filtered_dir, dev_sorted_filtered_dir, host_filter_count[0] *sizeof(int), cudaMemcpyDeviceToHost ) ); for (size_t i = 0; i < out_num_objects; i++) { - out_detection.push_back(host_filtered_box[keep_inds[i]*7+0]); - out_detection.push_back(host_filtered_box[keep_inds[i]*7+1]); - out_detection.push_back(host_filtered_box[keep_inds[i]*7+2]); - out_detection.push_back(host_filtered_box[keep_inds[i]*7+3]); - out_detection.push_back(host_filtered_box[keep_inds[i]*7+4]); - out_detection.push_back(host_filtered_box[keep_inds[i]*7+5]); + out_detection.push_back(host_filtered_box[keep_inds[i]*NUM_OUTPUT_BOX_FEATURE_+0]); + out_detection.push_back(host_filtered_box[keep_inds[i]*NUM_OUTPUT_BOX_FEATURE_+1]); + out_detection.push_back(host_filtered_box[keep_inds[i]*NUM_OUTPUT_BOX_FEATURE_+2]); + out_detection.push_back(host_filtered_box[keep_inds[i]*NUM_OUTPUT_BOX_FEATURE_+3]); + out_detection.push_back(host_filtered_box[keep_inds[i]*NUM_OUTPUT_BOX_FEATURE_+4]); + out_detection.push_back(host_filtered_box[keep_inds[i]*NUM_OUTPUT_BOX_FEATURE_+5]); if(host_filtered_dir[keep_inds[i]] == 0) { - out_detection.push_back(host_filtered_box[keep_inds[i]*7+6] + M_PI); + out_detection.push_back(host_filtered_box[keep_inds[i]*NUM_OUTPUT_BOX_FEATURE_+6] + M_PI); } else { - out_detection.push_back(host_filtered_box[keep_inds[i]*7+6]); + out_detection.push_back(host_filtered_box[keep_inds[i]*NUM_OUTPUT_BOX_FEATURE_+6]); } } diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp index 9ea42f9190d..51fe2a07884 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp @@ -32,7 +32,8 @@ PreprocessPoints::PreprocessPoints(const int MAX_NUM_PILLARS, const float MIN_X_RANGE, const float MIN_Y_RANGE, const float MIN_Z_RANGE, - const int NUM_INDS_FOR_SCAN) + const int NUM_INDS_FOR_SCAN, + const int NUM_BOX_CORNERS) : MAX_NUM_PILLARS_(MAX_NUM_PILLARS), MAX_NUM_POINTS_PER_PILLAR_(MAX_POINTS_PER_PILLAR), @@ -45,7 +46,8 @@ PILLAR_Z_SIZE_(PILLAR_Z_SIZE), MIN_X_RANGE_(MIN_X_RANGE), MIN_Y_RANGE_(MIN_Y_RANGE), MIN_Z_RANGE_(MIN_Z_RANGE), -NUM_INDS_FOR_SCAN_(NUM_INDS_FOR_SCAN) +NUM_INDS_FOR_SCAN_(NUM_INDS_FOR_SCAN), +NUM_BOX_CORNERS_(NUM_BOX_CORNERS) { } @@ -96,9 +98,9 @@ void PreprocessPoints::preprocess(const float* in_points_array, int in_num_point x_coors_for_sub_shaped, y_coors_for_sub_shaped); for(int i = 0; i < in_num_points; i++) { - int x_coor = std::floor((in_points_array[i*4 + 0] - MIN_X_RANGE_)/PILLAR_X_SIZE_); - int y_coor = std::floor((in_points_array[i*4 + 1] - MIN_Y_RANGE_)/PILLAR_Y_SIZE_); - int z_coor = std::floor((in_points_array[i*4 + 2] - MIN_Z_RANGE_)/PILLAR_Z_SIZE_); + int x_coor = std::floor((in_points_array[i*NUM_BOX_CORNERS_ + 0] - MIN_X_RANGE_)/PILLAR_X_SIZE_); + int y_coor = std::floor((in_points_array[i*NUM_BOX_CORNERS_ + 1] - MIN_Y_RANGE_)/PILLAR_Y_SIZE_); + int z_coor = std::floor((in_points_array[i*NUM_BOX_CORNERS_ + 2] - MIN_Z_RANGE_)/PILLAR_Z_SIZE_); if(x_coor < 0 || x_coor >= GRID_X_SIZE_ || y_coor < 0 || y_coor >= GRID_Y_SIZE_ || z_coor < 0 || z_coor >= GRID_Z_SIZE_) @@ -133,10 +135,10 @@ void PreprocessPoints::preprocess(const float* in_points_array, int in_num_point int num = num_points_per_pillar[pillar_index]; if (num < MAX_NUM_POINTS_PER_PILLAR_) { - pillar_x[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*4 + 0]; - pillar_y[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*4 + 1]; - pillar_z[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*4 + 2]; - pillar_i[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*4 + 3]; + pillar_x[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*NUM_BOX_CORNERS_ + 0]; + pillar_y[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*NUM_BOX_CORNERS_ + 1]; + pillar_z[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*NUM_BOX_CORNERS_ + 2]; + pillar_i[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*NUM_BOX_CORNERS_ + 3]; num_points_per_pillar[pillar_index] += 1; } } From 49f6fba3d4b0d006aaecd561ba5d2b70f0e11f1a Mon Sep 17 00:00:00 2001 From: cirpue49 Date: Tue, 26 Feb 2019 19:06:52 +0900 Subject: [PATCH 04/37] apply ros clang --- .../include/anchor_mask_cuda.h | 34 +- .../lidar_point_pillars/include/common.h | 38 +- .../lidar_point_pillars/include/nms_cuda.h | 10 +- .../include/point_pillars.h | 77 +-- .../include/point_pillars_ros.h | 40 +- .../include/postprocess_cuda.h | 81 +-- .../include/preprocess_points.h | 47 +- .../include/preprocess_points_cuda.h | 46 +- .../include/scatter_cuda.h | 18 +- .../nodes/lidar_point_pillars_node.cpp | 4 +- .../nodes/point_pillars.cpp | 584 +++++++++--------- .../nodes/point_pillars_ros.cpp | 91 ++- .../nodes/preprocess_points.cpp | 145 ++--- .../test/src/test_point_pillars.cpp | 183 ++---- 14 files changed, 642 insertions(+), 756 deletions(-) mode change 100755 => 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h mode change 100755 => 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h mode change 100755 => 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp mode change 100755 => 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp mode change 100755 => 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/anchor_mask_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/anchor_mask_cuda.h index ccca5cb6b6d..6a8b555422c 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/anchor_mask_cuda.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/anchor_mask_cuda.h @@ -14,15 +14,15 @@ * limitations under the License. */ - /** - * @file anchor_mask_cuda.h - * @brief Make anchor mask for filtering output - * @author Kosuke Murakami - * @date 2019/02/26 - */ +/** +* @file anchor_mask_cuda.h +* @brief Make anchor mask for filtering output +* @author Kosuke Murakami +* @date 2019/02/26 +*/ -#ifndef ANCHOR_MASK_CUDA_H -#define ANCHOR_MASK_CUDA_H +#ifndef ANCHOR_MASK_CUDA_H +#define ANCHOR_MASK_CUDA_H class AnchorMaskCuda { @@ -37,6 +37,7 @@ class AnchorMaskCuda const float PILLAR_Y_SIZE_; const int GRID_X_SIZE_; const int GRID_Y_SIZE_; + public: /** * @brief Constructor @@ -52,16 +53,9 @@ class AnchorMaskCuda * @param[in] GRID_Y_SIZE Number of pillars in y-coordinate * @details Captital variables never change after the compile */ - AnchorMaskCuda(const int NUM_INDS_FOR_SCAN, - const int NUM_ANCHOR_X_INDS, - const int NUM_ANCHOR_Y_INDS, - const int NUM_ANCHOR_R_INDS, - const float MIN_X_RANGE, - const float MIN_Y_RANGE, - const float PILLAR_X_SIZE, - const float PILLAR_Y_SIZE, - const int GRID_X_SIZE, - const int GRID_Y_SIZE); + AnchorMaskCuda(const int NUM_INDS_FOR_SCAN, const int NUM_ANCHOR_X_INDS, const int NUM_ANCHOR_Y_INDS, + const int NUM_ANCHOR_R_INDS, const float MIN_X_RANGE, const float MIN_Y_RANGE, + const float PILLAR_X_SIZE, const float PILLAR_Y_SIZE, const int GRID_X_SIZE, const int GRID_Y_SIZE); /** * @brief call cuda code for making anchor mask @@ -77,8 +71,8 @@ class AnchorMaskCuda * @details dev_* means device memory. Make a mask for activating pillar occupancy area */ void doAnchorMaskCuda(int* dev_sparse_pillar_map, int* dev_cumsum_along_x, int* dev_cumsum_along_y, - const float* dev_box_anchors_min_x, const float* dev_box_anchors_min_y, - const float* dev_box_anchors_max_x, const float* dev_box_anchors_max_y, int* dev_anchor_mask); + const float* dev_box_anchors_min_x, const float* dev_box_anchors_min_y, + const float* dev_box_anchors_max_x, const float* dev_box_anchors_max_y, int* dev_anchor_mask); }; #endif // ANCHOR_MASK_CUDA_H diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/common.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/common.h index 7cf02333b4d..46e816db735 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/common.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/common.h @@ -14,32 +14,36 @@ * limitations under the License. */ - /** - * @file common.h - * @brief MACRO for CUDA codes - * @author Kosuke Murakami - * @date 2019/02/26 - */ +/** +* @file common.h +* @brief MACRO for CUDA codes +* @author Kosuke Murakami +* @date 2019/02/26 +*/ -#ifndef COMMON_H -#define COMMON_H +#ifndef COMMON_H +#define COMMON_H -//headers in STL +// headers in STL #include -//headers in CUDA +// headers in CUDA #include #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) -#define GPU_CHECK(ans) { GPUAssert((ans), __FILE__, __LINE__); } -inline void GPUAssert(cudaError_t code, const char *file, int line, bool abort=true) +#define GPU_CHECK(ans) \ + { \ + GPUAssert((ans), __FILE__, __LINE__); \ + } +inline void GPUAssert(cudaError_t code, const char* file, int line, bool abort = true) { - if (code != cudaSuccess) - { - fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); - if (abort) exit(code); - } + if (code != cudaSuccess) + { + fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); + if (abort) + exit(code); + } } #endif // COMMON_H diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/nms_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/nms_cuda.h index 7a34abf498b..30f2a997062 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/nms_cuda.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/nms_cuda.h @@ -21,14 +21,14 @@ * @date 2019/02/26 */ -#ifndef NMS_CUDA_H -#define NMS_CUDA_H +#ifndef NMS_CUDA_H +#define NMS_CUDA_H -//heders in STL +// heders in STL #include #include -//headers in local files +// headers in local files #include "common.h" class NMSCuda @@ -54,7 +54,7 @@ class NMSCuda * @param[out] out_num_to_keep Number of keep bounding box * @details Includes CUDA NMS and postprocessing for selecting box in CPU */ - void doNMSCuda(const int host_filter_count, float* dev_sorted_box_for_nms, int* out_keep_inds, int& out_num_to_keep); + void doNMSCuda(const int host_filter_count, float* dev_sorted_box_for_nms, int* out_keep_inds, int& out_num_to_keep); }; #endif // NMS_CUDA_H diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h old mode 100755 new mode 100644 index b0c69147ec9..e65a5c02433 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h @@ -14,18 +14,17 @@ * limitations under the License. */ - /** - * @file point_pillars.h - * @brief Algorithm for PointPillars - * @author Kosuke Murakami - * @date 2019/02/26 - */ - +/** +* @file point_pillars.h +* @brief Algorithm for PointPillars +* @author Kosuke Murakami +* @date 2019/02/26 +*/ #ifndef POINTS_PILLAR_H #define POINTS_PILLAR_H -//headers in STL +// headers in STL #include #include #include @@ -34,11 +33,11 @@ #include #include -//headers in TensorRT +// headers in TensorRT #include "NvInfer.h" #include "NvOnnxParser.h" -//headers in local files +// headers in local files #include "common.h" #include "preprocess_points.h" #include "preprocess_points_cuda.h" @@ -46,40 +45,48 @@ #include "scatter_cuda.h" #include "postprocess_cuda.h" - // Logger for TensorRT info/warning/errors class Logger : public nvinfer1::ILogger { public: - Logger(Severity severity = Severity::kWARNING) - : reportableSeverity(severity) - { - } + Logger(Severity severity = Severity::kWARNING) : reportableSeverity(severity) + { + } + + void log(Severity severity, const char* msg) override + { + // suppress messages with severity enum value greater than the reportable + if (severity > reportableSeverity) + return; - void log(Severity severity, const char* msg) override + switch (severity) { - // suppress messages with severity enum value greater than the reportable - if (severity > reportableSeverity) - return; - - switch (severity) - { - case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break; - case Severity::kERROR: std::cerr << "ERROR: "; break; - case Severity::kWARNING: std::cerr << "WARNING: "; break; - case Severity::kINFO: std::cerr << "INFO: "; break; - default: std::cerr << "UNKNOWN: "; break; - } - std::cerr << msg << std::endl; + case Severity::kINTERNAL_ERROR: + std::cerr << "INTERNAL_ERROR: "; + break; + case Severity::kERROR: + std::cerr << "ERROR: "; + break; + case Severity::kWARNING: + std::cerr << "WARNING: "; + break; + case Severity::kINFO: + std::cerr << "INFO: "; + break; + default: + std::cerr << "UNKNOWN: "; + break; } + std::cerr << msg << std::endl; + } - Severity reportableSeverity; + Severity reportableSeverity; }; class PointPillars { private: - //initize in initializer list + // initize in initializer list const bool reproduce_result_mode_; const float score_threshold_; const float nms_overlap_threshold_; @@ -117,8 +124,7 @@ class PointPillars const float ANCHOR_DZ_SIZE_; const int NUM_BOX_CORNERS_; const int NUM_OUTPUT_BOX_FEATURE_; - //end initializer list - + // end initializer list int host_pillar_count_[1]; @@ -182,7 +188,6 @@ class PointPillars float* dev_box_for_nms_; int* dev_filter_count_; - std::unique_ptr preprocess_points_ptr_; std::unique_ptr preprocess_points_cuda_ptr_; std::unique_ptr anchor_mask_cuda_ptr_; @@ -285,8 +290,8 @@ class PointPillars * @param[in] out_num_objects Number of output bounding box * @details This is interface for the algorithm */ - void doInference(const float* in_points_array, const int in_num_points, - std::vector& out_detection, int& out_num_objects); + void doInference(const float* in_points_array, const int in_num_points, std::vector& out_detection, + int& out_num_objects); }; #endif // POINTS_PILLAR_H diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h old mode 100755 new mode 100644 index e0b1e1bd53c..8a40722a896 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h @@ -14,38 +14,37 @@ * limitations under the License. */ - /** - * @file point_pillars_ros.h - * @brief ROS interface for PointPillars - * @author Kosuke Murakami - * @date 2019/02/26 - */ +/** +* @file point_pillars_ros.h +* @brief ROS interface for PointPillars +* @author Kosuke Murakami +* @date 2019/02/26 +*/ #ifndef POINTS_PILLAR_ROS_H #define POINTS_PILLAR_ROS_H -//headers in STL +// headers in STL #include #include -//headers in ROS +// headers in ROS #include #include #include -//headers in PCL +// headers in PCL #include -//headers in local files +// headers in local files #include "point_pillars.h" - class PointPillarsROS { private: friend class TestClass; - //initializer list + // initializer list ros::NodeHandle private_nh_; bool has_subscribed_baselink_; const int NUM_POINT_FEATURE_; @@ -53,16 +52,16 @@ class PointPillarsROS const float TRAINED_SENSOR_HEIGHT_; const float NORMALIZING_INTENSITY_VALUE_; const std::string BASELINK_FRAME_; - //end initializer list + // end initializer list - //rosparam + // rosparam bool baselink_support_; bool reproduce_result_mode_; float score_threshold_; float nms_overlap_threshold_; std::string pfe_onnx_file_; std::string rpn_onnx_file_; - //end rosparam + // end rosparam ros::NodeHandle nh_; ros::Subscriber sub_points_; @@ -73,8 +72,6 @@ class PointPillarsROS tf::Transform angle_transform_; tf::Transform angle_transform_inversed_; - - float offset_z_from_trained_data_; std::unique_ptr point_pillars_ptr_; @@ -100,8 +97,7 @@ class PointPillarsROS * @return geometry_msgs::Pose Transformed pose * @details Calculate transformed pose */ - geometry_msgs::Pose getTransformedPose(const geometry_msgs::Pose& in_pose, - const tf::Transform& tf); + geometry_msgs::Pose getTransformedPose(const geometry_msgs::Pose& in_pose, const tf::Transform& tf); /** * @brief callback for pointcloud @@ -117,7 +113,8 @@ class PointPillarsROS * @param[in] offset_z (default: 1.0) when using baselink_support, offset height based on current sensor configuration * @details convert pcl points to c++ array, plus offset z if it is necessary */ - void pclToArray(const pcl::PointCloud::Ptr & in_pcl_pc_ptr, float* out_points_array, const float offset_z = 0); + void pclToArray(const pcl::PointCloud::Ptr& in_pcl_pc_ptr, float* out_points_array, + const float offset_z = 0); /** * @brief publish DetectedObject @@ -126,7 +123,8 @@ class PointPillarsROS * @param[in] pc_header Header from pointcloud * @details Convert std::vector to DetectedObject, and publish them */ - void pubDetectedObject(const std::vector& detections, const int num_objects, const std_msgs::Header& pc_header); + void pubDetectedObject(const std::vector& detections, const int num_objects, + const std_msgs::Header& pc_header); public: PointPillarsROS(); diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h index 3768c040499..b60a427ca3f 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h @@ -14,21 +14,21 @@ * limitations under the License. */ - /** - * @file postprocess_cuda.h - * @brief Postprocess for network output - * @author Kosuke Murakami - * @date 2019/02/26 - */ +/** +* @file postprocess_cuda.h +* @brief Postprocess for network output +* @author Kosuke Murakami +* @date 2019/02/26 +*/ -#ifndef POSTPROCESS_CUDA_H -#define POSTPROCESS_CUDA_H +#ifndef POSTPROCESS_CUDA_H +#define POSTPROCESS_CUDA_H -//headers in STL +// headers in STL #include #include -//headers in local files +// headers in local files #include "nms_cuda.h" class PostprocessCuda @@ -62,38 +62,39 @@ class PostprocessCuda * @param[in] NUM_OUTPUT_BOX_FEATURE_ Number of output box's feature * @details Captital variables never change after the compile, non-capital variables could be chaned through rosparam */ - PostprocessCuda(const float FLOAT_MIN, const float FLOAT_MAX, - const int NUM_ANCHOR_X_INDS, const int NUM_ANCHOR_Y_INDS, const int NUM_ANCHOR_R_INDS, - const float score_threshold, const int NUM_THREADS, const float nms_overlap_threshold, - const int NUM_BOX_CORNERS, const int NUM_OUTPUT_BOX_FEATURE); + PostprocessCuda(const float FLOAT_MIN, const float FLOAT_MAX, const int NUM_ANCHOR_X_INDS, + const int NUM_ANCHOR_Y_INDS, const int NUM_ANCHOR_R_INDS, const float score_threshold, + const int NUM_THREADS, const float nms_overlap_threshold, const int NUM_BOX_CORNERS, + const int NUM_OUTPUT_BOX_FEATURE); - /** - * @brief Postprocessing for the network output - * @param[in] rpn_box_output Box prediction from the network output - * @param[in] rpn_cls_output Class prediction from the network output - * @param[in] rpn_dir_output Direction prediction from the network output - * @param[in] dev_anchor_mask Anchor mask for filtering - * @param[in] dev_anchors_px X-coordinate value for corresponding anchor - * @param[in] dev_anchors_py Y-coordinate value for corresponding anchor - * @param[in] dev_anchors_pz Z-coordinate value for corresponding anchor - * @param[in] dev_anchors_dx X-dimension value for corresponding anchor - * @param[in] dev_anchors_dy Y-dimension value for corresponding anchor - * @param[in] dev_anchors_dz Z-dimension value for corresponding anchor - * @param[in] dev_anchors_ro Rotation value for corresponding anchor - * @param[in] dev_filtered_box Filtered box prediction - * @param[in] dev_filtered_score Filtered score prediction - * @param[in] dev_filtered_dir Filtered direction prediction - * @param[in] dev_box_for_nms Decoded box from pose and dimension to min_x min_y max_x max_y represenation for nms - * @param[in] dev_filter_count The number of filtered output - * @param[out] out_detection Output bounding boxes - * @param[out] out_num_objects The number of output bounding boxes - * @details dev_* represents device memory allocated variables - */ + /** + * @brief Postprocessing for the network output + * @param[in] rpn_box_output Box prediction from the network output + * @param[in] rpn_cls_output Class prediction from the network output + * @param[in] rpn_dir_output Direction prediction from the network output + * @param[in] dev_anchor_mask Anchor mask for filtering + * @param[in] dev_anchors_px X-coordinate value for corresponding anchor + * @param[in] dev_anchors_py Y-coordinate value for corresponding anchor + * @param[in] dev_anchors_pz Z-coordinate value for corresponding anchor + * @param[in] dev_anchors_dx X-dimension value for corresponding anchor + * @param[in] dev_anchors_dy Y-dimension value for corresponding anchor + * @param[in] dev_anchors_dz Z-dimension value for corresponding anchor + * @param[in] dev_anchors_ro Rotation value for corresponding anchor + * @param[in] dev_filtered_box Filtered box prediction + * @param[in] dev_filtered_score Filtered score prediction + * @param[in] dev_filtered_dir Filtered direction prediction + * @param[in] dev_box_for_nms Decoded box from pose and dimension to min_x min_y max_x max_y represenation for nms + * @param[in] dev_filter_count The number of filtered output + * @param[out] out_detection Output bounding boxes + * @param[out] out_num_objects The number of output bounding boxes + * @details dev_* represents device memory allocated variables + */ void doPostprocessCuda(const float* rpn_box_output, const float* rpn_cls_output, const float* rpn_dir_output, - int* dev_anchor_mask, const float* dev_anchors_px, const float* dev_anchors_py, const float* dev_anchors_pz, - const float* dev_anchors_dx, const float* dev_anchors_dy, const float* dev_anchors_dz, const float* dev_anchors_ro, - float* dev_filtered_box, float* dev_filtered_score, int* dev_filtered_dir, float* dev_box_for_nms, int* dev_filter_count, - std::vector& out_detection, int& out_num_objects); + int* dev_anchor_mask, const float* dev_anchors_px, const float* dev_anchors_py, + const float* dev_anchors_pz, const float* dev_anchors_dx, const float* dev_anchors_dy, + const float* dev_anchors_dz, const float* dev_anchors_ro, float* dev_filtered_box, + float* dev_filtered_score, int* dev_filtered_dir, float* dev_box_for_nms, + int* dev_filter_count, std::vector& out_detection, int& out_num_objects); }; #endif // POSTPROCESS_CUDA_H diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h index 426dd50e714..b8b361db22e 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h @@ -14,18 +14,16 @@ * limitations under the License. */ - /** - * @file preprocess_points.h - * @brief CPU version of preprocess points - * @author Kosuke Murakami - * @date 2019/02/26 - */ +/** +* @file preprocess_points.h +* @brief CPU version of preprocess points +* @author Kosuke Murakami +* @date 2019/02/26 +*/ #ifndef PREPROCESS_POINTS_H #define PREPROCESS_POINTS_H - - class PreprocessPoints { private: @@ -44,7 +42,6 @@ class PreprocessPoints const int NUM_INDS_FOR_SCAN_; const int NUM_BOX_CORNERS_; - public: /** * @brief Constructor @@ -63,19 +60,10 @@ class PreprocessPoints * @param[in] NUM_BOX_CORNERS Number of box's corner * @details Captital variables never change after the compile */ - PreprocessPoints(const int MAX_NUM_PILLARS, - const int MAX_POINTS_PER_PILLAR, - const int GRID_X_SIZE, - const int GRID_Y_SIZE, - const int GRID_Z_SIZE, - const float PILLAR_X_SIZE, - const float PILLAR_Y_SIZE, - const float PILLAR_Z_SIZE, - const float MIN_X_RANGE, - const float MIN_Y_RANGE, - const float MIN_Z_RANGE, - const int NUM_INDS_FOR_SCAN, - const int NUM_BOX_CORNERS); + PreprocessPoints(const int MAX_NUM_PILLARS, const int MAX_POINTS_PER_PILLAR, const int GRID_X_SIZE, + const int GRID_Y_SIZE, const int GRID_Z_SIZE, const float PILLAR_X_SIZE, const float PILLAR_Y_SIZE, + const float PILLAR_Z_SIZE, const float MIN_X_RANGE, const float MIN_Y_RANGE, const float MIN_Z_RANGE, + const int NUM_INDS_FOR_SCAN, const int NUM_BOX_CORNERS); /** * @brief CPU preprocessing for input pointcloud @@ -95,11 +83,10 @@ class PreprocessPoints * @param[in] host_pillar_count The numnber of valid pillars for the input pointcloud * @details Convert pointcloud to pillar representation */ - void preprocess(const float* in_points_array, int in_num_points, - int* x_coors, int* y_coors, float* num_points_per_pillar, - float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i, - float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped, - float* pillar_feature_mask, float* sparse_pillar_map, int* host_pillar_count); + void preprocess(const float* in_points_array, int in_num_points, int* x_coors, int* y_coors, + float* num_points_per_pillar, float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i, + float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped, float* pillar_feature_mask, + float* sparse_pillar_map, int* host_pillar_count); /** * @brief Initializing variables for preprocessing @@ -113,9 +100,9 @@ class PreprocessPoints * @param[in] y_coors_for_sub_shaped Used for y substraction in the network * @details Initializeing input arguments with certain values */ - void initializeVariables(int* coor_to_pillaridx, float* sparse_pillar_map, - float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i, - float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped); + void initializeVariables(int* coor_to_pillaridx, float* sparse_pillar_map, float* pillar_x, float* pillar_y, + float* pillar_z, float* pillar_i, float* x_coors_for_sub_shaped, + float* y_coors_for_sub_shaped); }; #endif // PREPROCESS_POINTS_H diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points_cuda.h index 7a4dd8420d2..710a14a87c3 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points_cuda.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points_cuda.h @@ -14,22 +14,20 @@ * limitations under the License. */ - /** - * @file preprocess_points_cuda.h - * @brief GPU version of preprocess points - * @author Kosuke Murakami - * @date 2019/02/26 - */ - - -#ifndef PREPROCESS_POINTS_CUDA_H -#define PREPROCESS_POINTS_CUDA_H +/** +* @file preprocess_points_cuda.h +* @brief GPU version of preprocess points +* @author Kosuke Murakami +* @date 2019/02/26 +*/ +#ifndef PREPROCESS_POINTS_CUDA_H +#define PREPROCESS_POINTS_CUDA_H class PreprocessPointsCuda { private: - //initialzer list + // initialzer list const int NUM_THREADS_; const int MAX_NUM_PILLARS_; const int MAX_NUM_POINTS_PER_PILLAR_; @@ -43,7 +41,7 @@ class PreprocessPointsCuda const float MIN_X_RANGE_; const float MIN_Y_RANGE_; const float MIN_Z_RANGE_; - //end initalizer list + // end initalizer list float* dev_pillar_x_in_coors_; float* dev_pillar_y_in_coors_; @@ -74,19 +72,10 @@ class PreprocessPointsCuda * @param[in] MIN_Z_RANGE Minimum z value for pointcloud * @details Captital variables never change after the compile */ - PreprocessPointsCuda(const int NUM_THREADS, - const int MAX_NUM_PILLARS, - const int MAX_POINTS_PER_PILLAR, - const int NUM_INDS_FOR_SCAN, - const int GRID_X_SIZE, - const int GRID_Y_SIZE, - const int GRID_Z_SIZE, - const float PILLAR_X_SIZE, - const float PILLAR_Y_SIZE, - const float PILLAR_Z_SIZE, - const float MIN_X_RANGE, - const float MIN_Y_RANGE, - const float MIN_Z_RANGE); + PreprocessPointsCuda(const int NUM_THREADS, const int MAX_NUM_PILLARS, const int MAX_POINTS_PER_PILLAR, + const int NUM_INDS_FOR_SCAN, const int GRID_X_SIZE, const int GRID_Y_SIZE, const int GRID_Z_SIZE, + const float PILLAR_X_SIZE, const float PILLAR_Y_SIZE, const float PILLAR_Z_SIZE, + const float MIN_X_RANGE, const float MIN_Y_RANGE, const float MIN_Z_RANGE); ~PreprocessPointsCuda(); /** @@ -108,9 +97,10 @@ class PreprocessPointsCuda * @details Convert pointcloud to pillar representation */ void doPreprocessPointsCuda(const float* dev_points, const int in_num_points, int* dev_x_coors, int* dev_y_coors, - float* dev_num_points_per_pillar, float* dev_pillar_x, float* dev_pillar_y, float* dev_pillar_z, float* dev_pillar_i, - float* dev_x_coors_for_sub_shaped, float* dev_y_coors_for_sub_shaped, - float* dev_pillar_feature_mask, int* dev_sparse_pillar_map, int* host_pillar_count); + float* dev_num_points_per_pillar, float* dev_pillar_x, float* dev_pillar_y, + float* dev_pillar_z, float* dev_pillar_i, float* dev_x_coors_for_sub_shaped, + float* dev_y_coors_for_sub_shaped, float* dev_pillar_feature_mask, + int* dev_sparse_pillar_map, int* host_pillar_count); }; #endif // PREPROCESS_POINTS_CUDA_H diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/scatter_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/scatter_cuda.h index 24e4f0b79dc..1c955a3a65d 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/scatter_cuda.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/scatter_cuda.h @@ -14,15 +14,15 @@ * limitations under the License. */ - /** - * @file scatter_cuda.h - * @brief CUDA code for scatter operation - * @author Kosuke Murakami - * @date 2019/02/26 - */ +/** +* @file scatter_cuda.h +* @brief CUDA code for scatter operation +* @author Kosuke Murakami +* @date 2019/02/26 +*/ -#ifndef SCATTERCUDA_H -#define SCATTERCUDA_H +#ifndef SCATTERCUDA_H +#define SCATTERCUDA_H class ScatterCuda { @@ -52,7 +52,7 @@ class ScatterCuda * @param[out] scattered_feature Gridmap representation for pillars' feature * @details Allocate pillars in gridmap based on index(coordinates) information */ - void doScatterCuda(const int pillar_count, int *x_coors, int *y_coors, float *pfe_output, float *scattered_feature); + void doScatterCuda(const int pillar_count, int* x_coors, int* y_coors, float* pfe_output, float* scattered_feature); }; #endif // SCATTERCUDA_H diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp old mode 100755 new mode 100644 index 36617dd472b..8faa0bede56 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp @@ -14,10 +14,10 @@ * limitations under the License. */ -//headers in STL +// headers in STL #include -//headers in local files +// headers in local files #include "point_pillars_ros.h" int main(int argc, char** argv) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp old mode 100755 new mode 100644 index 767008e2788..117fb5c2a5e --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp @@ -14,57 +14,57 @@ * limitations under the License. */ - -//headers in STL +// headers in STL #include #include -//headers in ROS +// headers in ROS #include -//headers in local files +// headers in local files #include "point_pillars.h" - -PointPillars::PointPillars(const bool reproduce_result_mode, const float score_threshold, const float nms_overlap_threshold, - const std::string pfe_onnx_file, const std::string rpn_onnx_file): -reproduce_result_mode_(reproduce_result_mode), -score_threshold_(score_threshold), -nms_overlap_threshold_(nms_overlap_threshold), -pfe_onnx_file_(pfe_onnx_file), -rpn_onnx_file_(rpn_onnx_file), -MAX_NUM_PILLARS_(12000), -MAX_NUM_POINTS_PER_PILLAR_(100), -PFE_OUTPUT_SIZE_(MAX_NUM_PILLARS_*64), -GRID_X_SIZE_(432), -GRID_Y_SIZE_(496), -GRID_Z_SIZE_(1), -RPN_INPUT_SIZE_(64*GRID_X_SIZE_*GRID_Y_SIZE_), -NUM_ANCHOR_X_INDS_(GRID_X_SIZE_*0.5), -NUM_ANCHOR_Y_INDS_(GRID_Y_SIZE_*0.5), -NUM_ANCHOR_R_INDS_(2), -NUM_ANCHOR_(NUM_ANCHOR_X_INDS_*NUM_ANCHOR_Y_INDS_*NUM_ANCHOR_R_INDS_), -RPN_BOX_OUTPUT_SIZE_(NUM_ANCHOR_*7) , -RPN_CLS_OUTPUT_SIZE_(NUM_ANCHOR_), -RPN_DIR_OUTPUT_SIZE_(NUM_ANCHOR_*2), -PILLAR_X_SIZE_(0.16), -PILLAR_Y_SIZE_(0.16), -PILLAR_Z_SIZE_(4.0), -MIN_X_RANGE_(0.0), -MIN_Y_RANGE_(-39.68), -MIN_Z_RANGE_(-3.0), -MAX_X_RANGE_(69.12), -MAX_Y_RANGE_(39.68), -MAX_Z_RANGE_(1), -BATCH_SIZE_(1), -NUM_INDS_FOR_SCAN_(512), -NUM_THREADS_(64),//if you chancge NUM_THREADS_, need to modify nms_kernel's shared mempry size -SENSOR_HEIGHT_(1.73), -ANCHOR_DX_SIZE_(1.6), -ANCHOR_DY_SIZE_(3.9), -ANCHOR_DZ_SIZE_(1.56), -NUM_BOX_CORNERS_(4), -NUM_OUTPUT_BOX_FEATURE_(7) +PointPillars::PointPillars(const bool reproduce_result_mode, const float score_threshold, + const float nms_overlap_threshold, const std::string pfe_onnx_file, + const std::string rpn_onnx_file) + : reproduce_result_mode_(reproduce_result_mode) + , score_threshold_(score_threshold) + , nms_overlap_threshold_(nms_overlap_threshold) + , pfe_onnx_file_(pfe_onnx_file) + , rpn_onnx_file_(rpn_onnx_file) + , MAX_NUM_PILLARS_(12000) + , MAX_NUM_POINTS_PER_PILLAR_(100) + , PFE_OUTPUT_SIZE_(MAX_NUM_PILLARS_ * 64) + , GRID_X_SIZE_(432) + , GRID_Y_SIZE_(496) + , GRID_Z_SIZE_(1) + , RPN_INPUT_SIZE_(64 * GRID_X_SIZE_ * GRID_Y_SIZE_) + , NUM_ANCHOR_X_INDS_(GRID_X_SIZE_ * 0.5) + , NUM_ANCHOR_Y_INDS_(GRID_Y_SIZE_ * 0.5) + , NUM_ANCHOR_R_INDS_(2) + , NUM_ANCHOR_(NUM_ANCHOR_X_INDS_ * NUM_ANCHOR_Y_INDS_ * NUM_ANCHOR_R_INDS_) + , RPN_BOX_OUTPUT_SIZE_(NUM_ANCHOR_ * 7) + , RPN_CLS_OUTPUT_SIZE_(NUM_ANCHOR_) + , RPN_DIR_OUTPUT_SIZE_(NUM_ANCHOR_ * 2) + , PILLAR_X_SIZE_(0.16) + , PILLAR_Y_SIZE_(0.16) + , PILLAR_Z_SIZE_(4.0) + , MIN_X_RANGE_(0.0) + , MIN_Y_RANGE_(-39.68) + , MIN_Z_RANGE_(-3.0) + , MAX_X_RANGE_(69.12) + , MAX_Y_RANGE_(39.68) + , MAX_Z_RANGE_(1) + , BATCH_SIZE_(1) + , NUM_INDS_FOR_SCAN_(512) + , NUM_THREADS_(64) + , // if you chancge NUM_THREADS_, need to modify nms_kernel's shared mempry size + SENSOR_HEIGHT_(1.73) + , ANCHOR_DX_SIZE_(1.6) + , ANCHOR_DY_SIZE_(3.9) + , ANCHOR_DZ_SIZE_(1.56) + , NUM_BOX_CORNERS_(4) + , NUM_OUTPUT_BOX_FEATURE_(7) { anchors_px_ = new float[NUM_ANCHOR_]; anchors_py_ = new float[NUM_ANCHOR_]; @@ -78,76 +78,31 @@ NUM_OUTPUT_BOX_FEATURE_(7) box_anchors_max_x_ = new float[NUM_ANCHOR_]; box_anchors_max_y_ = new float[NUM_ANCHOR_]; - - if(reproduce_result_mode_) + if (reproduce_result_mode_) { - preprocess_points_ptr_.reset(new PreprocessPoints( - MAX_NUM_PILLARS_, - MAX_NUM_POINTS_PER_PILLAR_, - GRID_X_SIZE_, - GRID_Y_SIZE_, - GRID_Z_SIZE_, - PILLAR_X_SIZE_, - PILLAR_Y_SIZE_, - PILLAR_Z_SIZE_, - MIN_X_RANGE_, - MIN_Y_RANGE_, - MIN_Z_RANGE_, - NUM_INDS_FOR_SCAN_, - NUM_BOX_CORNERS_)); + preprocess_points_ptr_.reset(new PreprocessPoints(MAX_NUM_PILLARS_, MAX_NUM_POINTS_PER_PILLAR_, GRID_X_SIZE_, + GRID_Y_SIZE_, GRID_Z_SIZE_, PILLAR_X_SIZE_, PILLAR_Y_SIZE_, + PILLAR_Z_SIZE_, MIN_X_RANGE_, MIN_Y_RANGE_, MIN_Z_RANGE_, + NUM_INDS_FOR_SCAN_, NUM_BOX_CORNERS_)); } else { preprocess_points_cuda_ptr_.reset(new PreprocessPointsCuda( - NUM_THREADS_, - MAX_NUM_PILLARS_, - MAX_NUM_POINTS_PER_PILLAR_, - NUM_INDS_FOR_SCAN_, - GRID_X_SIZE_, - GRID_Y_SIZE_, - GRID_Z_SIZE_, - PILLAR_X_SIZE_, - PILLAR_Y_SIZE_, - PILLAR_Z_SIZE_, - MIN_X_RANGE_, - MIN_Y_RANGE_, - MIN_Z_RANGE_)); + NUM_THREADS_, MAX_NUM_PILLARS_, MAX_NUM_POINTS_PER_PILLAR_, NUM_INDS_FOR_SCAN_, GRID_X_SIZE_, GRID_Y_SIZE_, + GRID_Z_SIZE_, PILLAR_X_SIZE_, PILLAR_Y_SIZE_, PILLAR_Z_SIZE_, MIN_X_RANGE_, MIN_Y_RANGE_, MIN_Z_RANGE_)); } - anchor_mask_cuda_ptr_.reset(new AnchorMaskCuda( - NUM_INDS_FOR_SCAN_, - NUM_ANCHOR_X_INDS_, - NUM_ANCHOR_Y_INDS_, - NUM_ANCHOR_R_INDS_, - MIN_X_RANGE_, - MIN_Y_RANGE_, - PILLAR_X_SIZE_, - PILLAR_Y_SIZE_, - GRID_X_SIZE_, - GRID_Y_SIZE_ - )); - - scatter_cuda_ptr_.reset(new ScatterCuda( - NUM_THREADS_, - MAX_NUM_PILLARS_, - GRID_X_SIZE_, - GRID_Y_SIZE_ - )); + anchor_mask_cuda_ptr_.reset(new AnchorMaskCuda(NUM_INDS_FOR_SCAN_, NUM_ANCHOR_X_INDS_, NUM_ANCHOR_Y_INDS_, + NUM_ANCHOR_R_INDS_, MIN_X_RANGE_, MIN_Y_RANGE_, PILLAR_X_SIZE_, + PILLAR_Y_SIZE_, GRID_X_SIZE_, GRID_Y_SIZE_)); + + scatter_cuda_ptr_.reset(new ScatterCuda(NUM_THREADS_, MAX_NUM_PILLARS_, GRID_X_SIZE_, GRID_Y_SIZE_)); const float FLOAT_MIN = std::numeric_limits::lowest(); const float FLOAT_MAX = std::numeric_limits::max(); - postprocess_cuda_ptr_.reset(new PostprocessCuda( - FLOAT_MIN, - FLOAT_MAX, - NUM_ANCHOR_X_INDS_, - NUM_ANCHOR_Y_INDS_, - NUM_ANCHOR_R_INDS_, - score_threshold_, - NUM_THREADS_, - nms_overlap_threshold_, - NUM_BOX_CORNERS_, - NUM_OUTPUT_BOX_FEATURE_ - )); + postprocess_cuda_ptr_.reset(new PostprocessCuda(FLOAT_MIN, FLOAT_MAX, NUM_ANCHOR_X_INDS_, NUM_ANCHOR_Y_INDS_, + NUM_ANCHOR_R_INDS_, score_threshold_, NUM_THREADS_, + nms_overlap_threshold_, NUM_BOX_CORNERS_, NUM_OUTPUT_BOX_FEATURE_)); deviceMemoryMalloc(); initTRT(); @@ -156,7 +111,6 @@ NUM_OUTPUT_BOX_FEATURE_(7) PointPillars::~PointPillars() { - delete[] anchors_px_; delete[] anchors_py_; delete[] anchors_pz_; @@ -185,9 +139,9 @@ PointPillars::~PointPillars() GPU_CHECK(cudaFree(dev_pillar_z_)); GPU_CHECK(cudaFree(dev_pillar_i_)); - GPU_CHECK(cudaFree( dev_x_coors_for_sub_shaped_ )); - GPU_CHECK(cudaFree( dev_y_coors_for_sub_shaped_ )); - GPU_CHECK(cudaFree( dev_pillar_feature_mask_ )); + GPU_CHECK(cudaFree(dev_x_coors_for_sub_shaped_)); + GPU_CHECK(cudaFree(dev_y_coors_for_sub_shaped_)); + GPU_CHECK(cudaFree(dev_pillar_feature_mask_)); GPU_CHECK(cudaFree(dev_cumsum_along_x_)); GPU_CHECK(cudaFree(dev_cumsum_along_y_)); @@ -208,7 +162,6 @@ PointPillars::~PointPillars() GPU_CHECK(cudaFree(pfe_buffers_[7])); GPU_CHECK(cudaFree(pfe_buffers_[8])); - GPU_CHECK(cudaFree(rpn_buffers_[0])); GPU_CHECK(cudaFree(rpn_buffers_[1])); GPU_CHECK(cudaFree(rpn_buffers_[2])); @@ -229,8 +182,6 @@ PointPillars::~PointPillars() GPU_CHECK(cudaFree(dev_box_for_nms_)); GPU_CHECK(cudaFree(dev_filter_count_)); - - pfe_context_->destroy(); rpn_context_->destroy(); @@ -242,40 +193,43 @@ PointPillars::~PointPillars() void PointPillars::deviceMemoryMalloc() { - GPU_CHECK(cudaMalloc((void**)&dev_pillar_x_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_pillar_y_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_pillar_z_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_pillar_i_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_pillar_count_histo_, GRID_Y_SIZE_*GRID_X_SIZE_*sizeof(int))); - - GPU_CHECK(cudaMalloc((void**)&dev_x_coors_, MAX_NUM_PILLARS_* sizeof(int))); - GPU_CHECK(cudaMalloc((void**)&dev_y_coors_, MAX_NUM_PILLARS_* sizeof(int))); - GPU_CHECK(cudaMalloc((void**)&dev_num_points_per_pillar_, MAX_NUM_PILLARS_*sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_sparse_pillar_map_,NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof(int))); - - - GPU_CHECK(cudaMalloc((void**)&dev_pillar_x_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_pillar_y_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_pillar_z_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_pillar_i_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); - - - GPU_CHECK(cudaMalloc((void**)&dev_x_coors_for_sub_shaped_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_y_coors_for_sub_shaped_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_pillar_feature_mask_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); - - //cumsum kernel - GPU_CHECK(cudaMalloc((void**)&dev_cumsum_along_x_, NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof(int))); - GPU_CHECK(cudaMalloc((void**)&dev_cumsum_along_y_, NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof(int))); - + GPU_CHECK(cudaMalloc((void**)&dev_pillar_x_in_coors_, + GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_y_in_coors_, + GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_z_in_coors_, + GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_i_in_coors_, + GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_count_histo_, GRID_Y_SIZE_ * GRID_X_SIZE_ * sizeof(int))); + + GPU_CHECK(cudaMalloc((void**)&dev_x_coors_, MAX_NUM_PILLARS_ * sizeof(int))); + GPU_CHECK(cudaMalloc((void**)&dev_y_coors_, MAX_NUM_PILLARS_ * sizeof(int))); + GPU_CHECK(cudaMalloc((void**)&dev_num_points_per_pillar_, MAX_NUM_PILLARS_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_sparse_pillar_map_, NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(int))); + + GPU_CHECK(cudaMalloc((void**)&dev_pillar_x_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_y_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_z_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_i_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + + GPU_CHECK( + cudaMalloc((void**)&dev_x_coors_for_sub_shaped_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK( + cudaMalloc((void**)&dev_y_coors_for_sub_shaped_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK( + cudaMalloc((void**)&dev_pillar_feature_mask_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + + // cumsum kernel + GPU_CHECK(cudaMalloc((void**)&dev_cumsum_along_x_, NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(int))); + GPU_CHECK(cudaMalloc((void**)&dev_cumsum_along_y_, NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(int))); // for make anchor mask kernel - GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_min_x_, NUM_ANCHOR_*sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_min_y_, NUM_ANCHOR_*sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_max_x_, NUM_ANCHOR_*sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_max_y_, NUM_ANCHOR_*sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_anchor_mask_, NUM_ANCHOR_*sizeof(int))); - + GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_min_x_, NUM_ANCHOR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_min_y_, NUM_ANCHOR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_max_x_, NUM_ANCHOR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_max_y_, NUM_ANCHOR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_anchor_mask_, NUM_ANCHOR_ * sizeof(int))); // for trt inference // create GPU buffers and a stream @@ -295,27 +249,27 @@ void PointPillars::deviceMemoryMalloc() GPU_CHECK(cudaMalloc(&rpn_buffers_[3], RPN_DIR_OUTPUT_SIZE_ * sizeof(float))); // for scatter kernel - GPU_CHECK(cudaMalloc((void**)&dev_scattered_feature_, 64*GRID_Y_SIZE_*GRID_X_SIZE_*sizeof(float) ) ); - - //for filter - GPU_CHECK(cudaMalloc((void**)&dev_anchors_px_, NUM_ANCHOR_*sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_anchors_py_, NUM_ANCHOR_*sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_anchors_pz_, NUM_ANCHOR_*sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_anchors_dx_, NUM_ANCHOR_*sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_anchors_dy_, NUM_ANCHOR_*sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_anchors_dz_, NUM_ANCHOR_*sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_anchors_ro_, NUM_ANCHOR_*sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_filtered_box_, NUM_ANCHOR_*7*sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_filtered_score_, NUM_ANCHOR_*sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_filtered_dir_, NUM_ANCHOR_*sizeof(int))); - GPU_CHECK(cudaMalloc((void**)&dev_box_for_nms_, NUM_ANCHOR_*4*sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_scattered_feature_, 64 * GRID_Y_SIZE_ * GRID_X_SIZE_ * sizeof(float))); + + // for filter + GPU_CHECK(cudaMalloc((void**)&dev_anchors_px_, NUM_ANCHOR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_anchors_py_, NUM_ANCHOR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_anchors_pz_, NUM_ANCHOR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_anchors_dx_, NUM_ANCHOR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_anchors_dy_, NUM_ANCHOR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_anchors_dz_, NUM_ANCHOR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_anchors_ro_, NUM_ANCHOR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_filtered_box_, NUM_ANCHOR_ * 7 * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_filtered_score_, NUM_ANCHOR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_filtered_dir_, NUM_ANCHOR_ * sizeof(int))); + GPU_CHECK(cudaMalloc((void**)&dev_box_for_nms_, NUM_ANCHOR_ * 4 * sizeof(float))); GPU_CHECK(cudaMalloc((void**)&dev_filter_count_, sizeof(int))); } void PointPillars::initAnchors() { // zero clear - for(size_t i = 0; i < NUM_ANCHOR_; i++) + for (size_t i = 0; i < NUM_ANCHOR_; i++) { anchors_px_[i] = 0; anchors_py_[i] = 0; @@ -330,36 +284,36 @@ void PointPillars::initAnchors() box_anchors_max_y_[i] = 0; } - float x_stride = PILLAR_X_SIZE_*2; - float y_stride = PILLAR_Y_SIZE_*2; + float x_stride = PILLAR_X_SIZE_ * 2; + float y_stride = PILLAR_Y_SIZE_ * 2; float x_offset = MIN_X_RANGE_ + PILLAR_X_SIZE_; float y_offset = MIN_Y_RANGE_ + PILLAR_Y_SIZE_; - float anchor_x_count[NUM_ANCHOR_X_INDS_] = {0}; - for(size_t i = 0; i < NUM_ANCHOR_X_INDS_; i++) + float anchor_x_count[NUM_ANCHOR_X_INDS_] = { 0 }; + for (size_t i = 0; i < NUM_ANCHOR_X_INDS_; i++) { - anchor_x_count[i] = float(i)*x_stride + x_offset; + anchor_x_count[i] = float(i) * x_stride + x_offset; } - float anchor_y_count[NUM_ANCHOR_Y_INDS_] = {0}; - for(size_t i = 0; i < NUM_ANCHOR_Y_INDS_; i++) + float anchor_y_count[NUM_ANCHOR_Y_INDS_] = { 0 }; + for (size_t i = 0; i < NUM_ANCHOR_Y_INDS_; i++) { - anchor_y_count[i] = float(i)*y_stride + y_offset; + anchor_y_count[i] = float(i) * y_stride + y_offset; } - float anchor_r_count[NUM_ANCHOR_R_INDS_] = {0, M_PI/2}; + float anchor_r_count[NUM_ANCHOR_R_INDS_] = { 0, M_PI / 2 }; // np.meshgrid - for(size_t y= 0; y < NUM_ANCHOR_Y_INDS_; y++) + for (size_t y = 0; y < NUM_ANCHOR_Y_INDS_; y++) { - for(size_t x = 0; x < NUM_ANCHOR_X_INDS_; x++) + for (size_t x = 0; x < NUM_ANCHOR_X_INDS_; x++) { - for(size_t r=0 ; r < NUM_ANCHOR_R_INDS_; r++) + for (size_t r = 0; r < NUM_ANCHOR_R_INDS_; r++) { - int ind = y*NUM_ANCHOR_X_INDS_*NUM_ANCHOR_R_INDS_ + x*NUM_ANCHOR_R_INDS_ + r; + int ind = y * NUM_ANCHOR_X_INDS_ * NUM_ANCHOR_R_INDS_ + x * NUM_ANCHOR_R_INDS_ + r; anchors_px_[ind] = anchor_x_count[x]; anchors_py_[ind] = anchor_y_count[y]; anchors_ro_[ind] = anchor_r_count[r]; - anchors_pz_[ind] = -1*SENSOR_HEIGHT_; + anchors_pz_[ind] = -1 * SENSOR_HEIGHT_; anchors_dx_[ind] = ANCHOR_DX_SIZE_; anchors_dy_[ind] = ANCHOR_DY_SIZE_; anchors_dz_[ind] = ANCHOR_DZ_SIZE_; @@ -373,49 +327,51 @@ void PointPillars::initAnchors() void PointPillars::putAnchorsInDeviceMemory() { - GPU_CHECK(cudaMemcpy(dev_box_anchors_min_x_, box_anchors_min_x_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice ) ); - GPU_CHECK(cudaMemcpy(dev_box_anchors_min_y_, box_anchors_min_y_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice ) ); - GPU_CHECK(cudaMemcpy(dev_box_anchors_max_x_, box_anchors_max_x_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice ) ); - GPU_CHECK(cudaMemcpy(dev_box_anchors_max_y_, box_anchors_max_y_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice ) ); - - GPU_CHECK(cudaMemcpy(dev_anchors_px_, anchors_px_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice)); - GPU_CHECK(cudaMemcpy(dev_anchors_py_, anchors_py_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice)); - GPU_CHECK(cudaMemcpy(dev_anchors_pz_, anchors_pz_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice)); - GPU_CHECK(cudaMemcpy(dev_anchors_dx_, anchors_dx_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice)); - GPU_CHECK(cudaMemcpy(dev_anchors_dy_, anchors_dy_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice)); - GPU_CHECK(cudaMemcpy(dev_anchors_dz_, anchors_dz_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice)); - GPU_CHECK(cudaMemcpy(dev_anchors_ro_, anchors_ro_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice)); - + GPU_CHECK( + cudaMemcpy(dev_box_anchors_min_x_, box_anchors_min_x_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK( + cudaMemcpy(dev_box_anchors_min_y_, box_anchors_min_y_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK( + cudaMemcpy(dev_box_anchors_max_x_, box_anchors_max_x_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK( + cudaMemcpy(dev_box_anchors_max_y_, box_anchors_max_y_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice)); + + GPU_CHECK(cudaMemcpy(dev_anchors_px_, anchors_px_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_anchors_py_, anchors_py_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_anchors_pz_, anchors_pz_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_anchors_dx_, anchors_dx_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_anchors_dy_, anchors_dy_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_anchors_dz_, anchors_dz_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_anchors_ro_, anchors_ro_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice)); } -void PointPillars::convertAnchors2BoxAnchors(float* anchors_px, float* anchors_py, - float* anchors_dx, float* anchors_dy) +void PointPillars::convertAnchors2BoxAnchors(float* anchors_px, float* anchors_py, float* anchors_dx, float* anchors_dy) { // flip box's dimension when the at the third axis == 1 - float flipped_anchors_dx[NUM_ANCHOR_] = {0}; - float flipped_anchors_dy[NUM_ANCHOR_] = {0}; - for(size_t x = 0; x < NUM_ANCHOR_X_INDS_; x++) + float flipped_anchors_dx[NUM_ANCHOR_] = { 0 }; + float flipped_anchors_dy[NUM_ANCHOR_] = { 0 }; + for (size_t x = 0; x < NUM_ANCHOR_X_INDS_; x++) { - for(size_t y= 0; y < NUM_ANCHOR_Y_INDS_; y++) + for (size_t y = 0; y < NUM_ANCHOR_Y_INDS_; y++) { - int base_ind = x*NUM_ANCHOR_Y_INDS_*NUM_ANCHOR_R_INDS_ + y*NUM_ANCHOR_R_INDS_; + int base_ind = x * NUM_ANCHOR_Y_INDS_ * NUM_ANCHOR_R_INDS_ + y * NUM_ANCHOR_R_INDS_; flipped_anchors_dx[base_ind + 0] = ANCHOR_DX_SIZE_; flipped_anchors_dy[base_ind + 0] = ANCHOR_DY_SIZE_; flipped_anchors_dx[base_ind + 1] = ANCHOR_DY_SIZE_; flipped_anchors_dy[base_ind + 1] = ANCHOR_DX_SIZE_; } } - for(size_t x = 0; x < NUM_ANCHOR_X_INDS_; x++) + for (size_t x = 0; x < NUM_ANCHOR_X_INDS_; x++) { - for(size_t y= 0; y < NUM_ANCHOR_Y_INDS_; y++) + for (size_t y = 0; y < NUM_ANCHOR_Y_INDS_; y++) { - for(size_t r=0 ; r < NUM_ANCHOR_R_INDS_; r++) + for (size_t r = 0; r < NUM_ANCHOR_R_INDS_; r++) { - int ind = x*NUM_ANCHOR_Y_INDS_*NUM_ANCHOR_R_INDS_ + y*NUM_ANCHOR_R_INDS_ + r; - box_anchors_min_x_[ind] = anchors_px[ind] - flipped_anchors_dx[ind]/2; - box_anchors_min_y_[ind] = anchors_py[ind] - flipped_anchors_dy[ind]/2; - box_anchors_max_x_[ind] = anchors_px[ind] + flipped_anchors_dx[ind]/2; - box_anchors_max_y_[ind] = anchors_py[ind] + flipped_anchors_dy[ind]/2; + int ind = x * NUM_ANCHOR_Y_INDS_ * NUM_ANCHOR_R_INDS_ + y * NUM_ANCHOR_R_INDS_ + r; + box_anchors_min_x_[ind] = anchors_px[ind] - flipped_anchors_dx[ind] / 2; + box_anchors_min_y_[ind] = anchors_py[ind] - flipped_anchors_dy[ind] / 2; + box_anchors_max_x_[ind] = anchors_px[ind] + flipped_anchors_dx[ind] / 2; + box_anchors_max_y_[ind] = anchors_py[ind] + flipped_anchors_dy[ind] / 2; } } } @@ -424,11 +380,11 @@ void PointPillars::convertAnchors2BoxAnchors(float* anchors_px, float* anchors_p void PointPillars::initTRT() { // create a TensorRT model from the onnx model and serialize it to a stream - nvinfer1::IHostMemory* pfe_trt_model_stream{nullptr}; - nvinfer1::IHostMemory* rpn_trt_model_stream{nullptr}; + nvinfer1::IHostMemory* pfe_trt_model_stream{ nullptr }; + nvinfer1::IHostMemory* rpn_trt_model_stream{ nullptr }; onnxToTRTModel(pfe_onnx_file_, pfe_trt_model_stream); onnxToTRTModel(rpn_onnx_file_, rpn_trt_model_stream); - if(pfe_trt_model_stream == nullptr || rpn_trt_model_stream == nullptr) + if (pfe_trt_model_stream == nullptr || rpn_trt_model_stream == nullptr) { ROS_ERROR("Failed to load %s or %s ", pfe_onnx_file_.c_str(), rpn_onnx_file_.c_str()); } @@ -436,13 +392,15 @@ void PointPillars::initTRT() // deserialize the engine pfe_runtime_ = nvinfer1::createInferRuntime(g_logger_); rpn_runtime_ = nvinfer1::createInferRuntime(g_logger_); - if(pfe_runtime_ == nullptr || rpn_runtime_ == nullptr) + if (pfe_runtime_ == nullptr || rpn_runtime_ == nullptr) { ROS_ERROR("Failed to create TensorRT Runtime object."); } - pfe_engine_ = pfe_runtime_->deserializeCudaEngine(pfe_trt_model_stream->data(), pfe_trt_model_stream->size(), nullptr); - rpn_engine_ = rpn_runtime_->deserializeCudaEngine(rpn_trt_model_stream->data(), rpn_trt_model_stream->size(), nullptr); - if(pfe_engine_ == nullptr || rpn_engine_ == nullptr) + pfe_engine_ = + pfe_runtime_->deserializeCudaEngine(pfe_trt_model_stream->data(), pfe_trt_model_stream->size(), nullptr); + rpn_engine_ = + rpn_runtime_->deserializeCudaEngine(rpn_trt_model_stream->data(), rpn_trt_model_stream->size(), nullptr); + if (pfe_engine_ == nullptr || rpn_engine_ == nullptr) { ROS_ERROR("Failed to create TensorRT Engine."); } @@ -450,16 +408,16 @@ void PointPillars::initTRT() rpn_trt_model_stream->destroy(); pfe_context_ = pfe_engine_->createExecutionContext(); rpn_context_ = rpn_engine_->createExecutionContext(); - if(pfe_context_ == nullptr || rpn_context_ == nullptr) + if (pfe_context_ == nullptr || rpn_context_ == nullptr) { ROS_ERROR("Failed to create TensorRT Execution Context."); } } -void PointPillars::onnxToTRTModel(const std::string& model_file, // name of the onnx model - nvinfer1::IHostMemory*& trt_model_stream) // output buffer for the TensorRT model +void PointPillars::onnxToTRTModel(const std::string& model_file, // name of the onnx model + nvinfer1::IHostMemory*& trt_model_stream) // output buffer for the TensorRT model { - int verbosity = (int) nvinfer1::ILogger::Severity::kWARNING; + int verbosity = (int)nvinfer1::ILogger::Severity::kWARNING; // create the builder nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(g_logger_); @@ -469,9 +427,9 @@ void PointPillars::onnxToTRTModel(const std::string& model_file, // name of the if (!parser->parseFromFile(model_file.c_str(), verbosity)) { - std::string msg("failed to parse onnx file"); - g_logger_.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str()); - exit(EXIT_FAILURE); + std::string msg("failed to parse onnx file"); + g_logger_.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str()); + exit(EXIT_FAILURE); } // Build the engine @@ -491,48 +449,55 @@ void PointPillars::onnxToTRTModel(const std::string& model_file, // name of the void PointPillars::preprocessCPU(const float* in_points_array, const int in_num_points) { - int x_coors[MAX_NUM_PILLARS_]= {0}; - int y_coors[MAX_NUM_PILLARS_]= {0}; - float num_points_per_pillar[MAX_NUM_PILLARS_] = {0}; - float * pillar_x = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_]; - float * pillar_y = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_]; - float * pillar_z = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_]; - float * pillar_i = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_]; - - float * x_coors_for_sub_shaped = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_]; - float * y_coors_for_sub_shaped = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_]; - float * pillar_feature_mask = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_]; - - float* sparse_pillar_map = new float[NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_]; - - preprocess_points_ptr_->preprocess(in_points_array, in_num_points, - x_coors, y_coors, num_points_per_pillar, - pillar_x, pillar_y, pillar_z, pillar_i, - x_coors_for_sub_shaped, y_coors_for_sub_shaped, - pillar_feature_mask, sparse_pillar_map, host_pillar_count_); - - GPU_CHECK( cudaMemset(dev_x_coors_, 0, MAX_NUM_PILLARS_ * sizeof( int ) ) ); - GPU_CHECK( cudaMemset(dev_y_coors_, 0, MAX_NUM_PILLARS_ * sizeof( int ) ) ); - GPU_CHECK( cudaMemset(dev_pillar_x_,0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); - GPU_CHECK( cudaMemset(dev_pillar_y_,0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); - GPU_CHECK( cudaMemset(dev_pillar_z_,0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); - GPU_CHECK( cudaMemset(dev_pillar_i_,0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); - GPU_CHECK( cudaMemset(dev_x_coors_for_sub_shaped_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ *sizeof( float ) ) ); - GPU_CHECK( cudaMemset(dev_y_coors_for_sub_shaped_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ *sizeof( float ) ) ); - GPU_CHECK( cudaMemset(dev_num_points_per_pillar_, 0, MAX_NUM_PILLARS_ * sizeof( float ) ) ); - GPU_CHECK( cudaMemset(dev_sparse_pillar_map_, 0, NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof( int ) ) ); - - GPU_CHECK(cudaMemcpy(dev_x_coors_, x_coors, MAX_NUM_PILLARS_*sizeof(int), cudaMemcpyHostToDevice )); - GPU_CHECK(cudaMemcpy(dev_y_coors_, y_coors, MAX_NUM_PILLARS_*sizeof(int), cudaMemcpyHostToDevice )); - GPU_CHECK(cudaMemcpy(dev_pillar_x_, pillar_x, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice )); - GPU_CHECK(cudaMemcpy(dev_pillar_y_, pillar_y, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice )); - GPU_CHECK(cudaMemcpy(dev_pillar_z_, pillar_z, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice )); - GPU_CHECK(cudaMemcpy(dev_pillar_i_, pillar_i, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice )); - GPU_CHECK(cudaMemcpy(dev_x_coors_for_sub_shaped_, x_coors_for_sub_shaped,MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice )); - GPU_CHECK(cudaMemcpy(dev_y_coors_for_sub_shaped_, y_coors_for_sub_shaped,MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice )); - GPU_CHECK(cudaMemcpy(dev_num_points_per_pillar_,num_points_per_pillar,MAX_NUM_PILLARS_*sizeof(float), cudaMemcpyHostToDevice )); - GPU_CHECK(cudaMemcpy(dev_pillar_feature_mask_, pillar_feature_mask, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice )); - GPU_CHECK(cudaMemcpy(dev_sparse_pillar_map_, sparse_pillar_map, NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof(float), cudaMemcpyHostToDevice )); + int x_coors[MAX_NUM_PILLARS_] = { 0 }; + int y_coors[MAX_NUM_PILLARS_] = { 0 }; + float num_points_per_pillar[MAX_NUM_PILLARS_] = { 0 }; + float* pillar_x = new float[MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_]; + float* pillar_y = new float[MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_]; + float* pillar_z = new float[MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_]; + float* pillar_i = new float[MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_]; + + float* x_coors_for_sub_shaped = new float[MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_]; + float* y_coors_for_sub_shaped = new float[MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_]; + float* pillar_feature_mask = new float[MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_]; + + float* sparse_pillar_map = new float[NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_]; + + preprocess_points_ptr_->preprocess(in_points_array, in_num_points, x_coors, y_coors, num_points_per_pillar, pillar_x, + pillar_y, pillar_z, pillar_i, x_coors_for_sub_shaped, y_coors_for_sub_shaped, + pillar_feature_mask, sparse_pillar_map, host_pillar_count_); + + GPU_CHECK(cudaMemset(dev_x_coors_, 0, MAX_NUM_PILLARS_ * sizeof(int))); + GPU_CHECK(cudaMemset(dev_y_coors_, 0, MAX_NUM_PILLARS_ * sizeof(int))); + GPU_CHECK(cudaMemset(dev_pillar_x_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMemset(dev_pillar_y_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMemset(dev_pillar_z_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMemset(dev_pillar_i_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMemset(dev_x_coors_for_sub_shaped_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMemset(dev_y_coors_for_sub_shaped_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMemset(dev_num_points_per_pillar_, 0, MAX_NUM_PILLARS_ * sizeof(float))); + GPU_CHECK(cudaMemset(dev_sparse_pillar_map_, 0, NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(int))); + + GPU_CHECK(cudaMemcpy(dev_x_coors_, x_coors, MAX_NUM_PILLARS_ * sizeof(int), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_y_coors_, y_coors, MAX_NUM_PILLARS_ * sizeof(int), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_pillar_x_, pillar_x, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), + cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_pillar_y_, pillar_y, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), + cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_pillar_z_, pillar_z, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), + cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_pillar_i_, pillar_i, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), + cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_x_coors_for_sub_shaped_, x_coors_for_sub_shaped, + MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_y_coors_for_sub_shaped_, y_coors_for_sub_shaped, + MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_num_points_per_pillar_, num_points_per_pillar, MAX_NUM_PILLARS_ * sizeof(float), + cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_pillar_feature_mask_, pillar_feature_mask, + MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_sparse_pillar_map_, sparse_pillar_map, + NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(float), cudaMemcpyHostToDevice)); delete[] pillar_x; delete[] pillar_y; @@ -546,33 +511,32 @@ void PointPillars::preprocessCPU(const float* in_points_array, const int in_num_ void PointPillars::preprocessGPU(const float* in_points_array, const int in_num_points) { - float * dev_points; - GPU_CHECK(cudaMalloc((void**)&dev_points, in_num_points*4*sizeof(float))); - GPU_CHECK(cudaMemcpy(dev_points, in_points_array, in_num_points*4*sizeof(float), cudaMemcpyHostToDevice ) ); - - - GPU_CHECK(cudaMemset(dev_pillar_count_histo_, 0, GRID_Y_SIZE_*GRID_X_SIZE_*sizeof(int))); - GPU_CHECK(cudaMemset(dev_sparse_pillar_map_, 0, NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof( int ) ) ); - GPU_CHECK(cudaMemset(dev_pillar_x_, 0, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); - GPU_CHECK(cudaMemset(dev_pillar_y_, 0, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); - GPU_CHECK(cudaMemset(dev_pillar_z_, 0, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); - GPU_CHECK(cudaMemset(dev_pillar_i_, 0, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); - GPU_CHECK(cudaMemset(dev_x_coors_, 0, MAX_NUM_PILLARS_ * sizeof( int ) ) ); - GPU_CHECK(cudaMemset(dev_y_coors_, 0, MAX_NUM_PILLARS_ * sizeof( int ) ) ); - GPU_CHECK(cudaMemset(dev_num_points_per_pillar_, 0, MAX_NUM_PILLARS_ * sizeof( float ) ) ); - GPU_CHECK(cudaMemset(dev_anchor_mask_, 0, NUM_ANCHOR_*sizeof(int))); - - preprocess_points_cuda_ptr_->doPreprocessPointsCuda(dev_points, in_num_points, dev_x_coors_, dev_y_coors_, - dev_num_points_per_pillar_, dev_pillar_x_, dev_pillar_y_, dev_pillar_z_, dev_pillar_i_, - dev_x_coors_for_sub_shaped_, dev_y_coors_for_sub_shaped_, - dev_pillar_feature_mask_, dev_sparse_pillar_map_, host_pillar_count_); + float* dev_points; + GPU_CHECK(cudaMalloc((void**)&dev_points, in_num_points * 4 * sizeof(float))); + GPU_CHECK(cudaMemcpy(dev_points, in_points_array, in_num_points * 4 * sizeof(float), cudaMemcpyHostToDevice)); + + GPU_CHECK(cudaMemset(dev_pillar_count_histo_, 0, GRID_Y_SIZE_ * GRID_X_SIZE_ * sizeof(int))); + GPU_CHECK(cudaMemset(dev_sparse_pillar_map_, 0, NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(int))); + GPU_CHECK(cudaMemset(dev_pillar_x_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMemset(dev_pillar_y_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMemset(dev_pillar_z_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMemset(dev_pillar_i_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMemset(dev_x_coors_, 0, MAX_NUM_PILLARS_ * sizeof(int))); + GPU_CHECK(cudaMemset(dev_y_coors_, 0, MAX_NUM_PILLARS_ * sizeof(int))); + GPU_CHECK(cudaMemset(dev_num_points_per_pillar_, 0, MAX_NUM_PILLARS_ * sizeof(float))); + GPU_CHECK(cudaMemset(dev_anchor_mask_, 0, NUM_ANCHOR_ * sizeof(int))); + + preprocess_points_cuda_ptr_->doPreprocessPointsCuda( + dev_points, in_num_points, dev_x_coors_, dev_y_coors_, dev_num_points_per_pillar_, dev_pillar_x_, dev_pillar_y_, + dev_pillar_z_, dev_pillar_i_, dev_x_coors_for_sub_shaped_, dev_y_coors_for_sub_shaped_, dev_pillar_feature_mask_, + dev_sparse_pillar_map_, host_pillar_count_); GPU_CHECK(cudaFree(dev_points)); } void PointPillars::preprocess(const float* in_points_array, const int in_num_points) { - if(reproduce_result_mode_) + if (reproduce_result_mode_) { preprocessCPU(in_points_array, in_num_points); } @@ -582,44 +546,56 @@ void PointPillars::preprocess(const float* in_points_array, const int in_num_poi } } -void PointPillars::doInference(const float* in_points_array, const int in_num_points, - std::vector& out_detection, int& out_num_objects) +void PointPillars::doInference(const float* in_points_array, const int in_num_points, std::vector& out_detection, + int& out_num_objects) { preprocess(in_points_array, in_num_points); - anchor_mask_cuda_ptr_->doAnchorMaskCuda(dev_sparse_pillar_map_, dev_cumsum_along_x_, dev_cumsum_along_y_, dev_box_anchors_min_x_, - dev_box_anchors_min_y_, dev_box_anchors_max_x_, dev_box_anchors_max_y_, dev_anchor_mask_); - + anchor_mask_cuda_ptr_->doAnchorMaskCuda(dev_sparse_pillar_map_, dev_cumsum_along_x_, dev_cumsum_along_y_, + dev_box_anchors_min_x_, dev_box_anchors_min_y_, dev_box_anchors_max_x_, + dev_box_anchors_max_y_, dev_anchor_mask_); cudaStream_t stream; GPU_CHECK(cudaStreamCreate(&stream)); - GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[0], dev_pillar_x_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); - GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[1], dev_pillar_y_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); - GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[2], dev_pillar_z_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); - GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[3], dev_pillar_i_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); - GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[4], dev_num_points_per_pillar_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); - GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[5], dev_x_coors_for_sub_shaped_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); - GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[6], dev_y_coors_for_sub_shaped_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); - GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[7], dev_pillar_feature_mask_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[0], dev_pillar_x_, + BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), + cudaMemcpyDeviceToDevice, stream)); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[1], dev_pillar_y_, + BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), + cudaMemcpyDeviceToDevice, stream)); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[2], dev_pillar_z_, + BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), + cudaMemcpyDeviceToDevice, stream)); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[3], dev_pillar_i_, + BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), + cudaMemcpyDeviceToDevice, stream)); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[4], dev_num_points_per_pillar_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * sizeof(float), + cudaMemcpyDeviceToDevice, stream)); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[5], dev_x_coors_for_sub_shaped_, + BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), + cudaMemcpyDeviceToDevice, stream)); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[6], dev_y_coors_for_sub_shaped_, + BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), + cudaMemcpyDeviceToDevice, stream)); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[7], dev_pillar_feature_mask_, + BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), + cudaMemcpyDeviceToDevice, stream)); pfe_context_->enqueue(BATCH_SIZE_, pfe_buffers_, stream, nullptr); + GPU_CHECK(cudaMemset(dev_scattered_feature_, 0, RPN_INPUT_SIZE_ * sizeof(float))); + scatter_cuda_ptr_->doScatterCuda(host_pillar_count_[0], dev_x_coors_, dev_y_coors_, (float*)pfe_buffers_[8], + dev_scattered_feature_); - - GPU_CHECK( cudaMemset( dev_scattered_feature_, 0, RPN_INPUT_SIZE_*sizeof(float) ) ); - scatter_cuda_ptr_->doScatterCuda(host_pillar_count_[0], dev_x_coors_, dev_y_coors_, - (float *)pfe_buffers_[8], dev_scattered_feature_); - - - GPU_CHECK(cudaMemcpyAsync(rpn_buffers_[0], dev_scattered_feature_, BATCH_SIZE_ * RPN_INPUT_SIZE_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); + GPU_CHECK(cudaMemcpyAsync(rpn_buffers_[0], dev_scattered_feature_, BATCH_SIZE_ * RPN_INPUT_SIZE_ * sizeof(float), + cudaMemcpyDeviceToDevice, stream)); rpn_context_->enqueue(BATCH_SIZE_, rpn_buffers_, stream, nullptr); - - GPU_CHECK(cudaMemset( dev_filter_count_, 0, sizeof( int ))); - postprocess_cuda_ptr_->doPostprocessCuda((float *)rpn_buffers_[1], (float *)rpn_buffers_[2],(float *)rpn_buffers_[3] , dev_anchor_mask_, - dev_anchors_px_, dev_anchors_py_, dev_anchors_pz_, - dev_anchors_dx_, dev_anchors_dy_, dev_anchors_dz_, dev_anchors_ro_, - dev_filtered_box_, dev_filtered_score_, dev_filtered_dir_, dev_box_for_nms_, - dev_filter_count_, out_detection, out_num_objects); + GPU_CHECK(cudaMemset(dev_filter_count_, 0, sizeof(int))); + postprocess_cuda_ptr_->doPostprocessCuda((float*)rpn_buffers_[1], (float*)rpn_buffers_[2], (float*)rpn_buffers_[3], + dev_anchor_mask_, dev_anchors_px_, dev_anchors_py_, dev_anchors_pz_, + dev_anchors_dx_, dev_anchors_dy_, dev_anchors_dz_, dev_anchors_ro_, + dev_filtered_box_, dev_filtered_score_, dev_filtered_dir_, dev_box_for_nms_, + dev_filter_count_, out_detection, out_num_objects); // release the stream and the buffers cudaStreamDestroy(stream); diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp old mode 100755 new mode 100644 index a7287b3f60e..0d4b3df7148 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp @@ -14,30 +14,30 @@ * limitations under the License. */ -//headers in STL +// headers in STL #include #include -//headers in PCL +// headers in PCL #include #include #include -//headers in ROS +// headers in ROS #include -//headers in local files +// headers in local files #include "autoware_msgs/DetectedObjectArray.h" #include "point_pillars_ros.h" -PointPillarsROS::PointPillarsROS(): -private_nh_("~"), -has_subscribed_baselink_(false), -NUM_POINT_FEATURE_(4), -OUTPUT_NUM_BOX_FEATURE_(7), -TRAINED_SENSOR_HEIGHT_(1.73), -NORMALIZING_INTENSITY_VALUE_(255.0), -BASELINK_FRAME_("base_link") +PointPillarsROS::PointPillarsROS() + : private_nh_("~") + , has_subscribed_baselink_(false) + , NUM_POINT_FEATURE_(4) + , OUTPUT_NUM_BOX_FEATURE_(7) + , TRAINED_SENSOR_HEIGHT_(1.73) + , NORMALIZING_INTENSITY_VALUE_(255.0) + , BASELINK_FRAME_("base_link") { private_nh_.param("baselink_support", baselink_support_, true); private_nh_.param("reproduce_result_mode", reproduce_result_mode_, false); @@ -46,34 +46,30 @@ BASELINK_FRAME_("base_link") private_nh_.param("pfe_onnx_file", pfe_onnx_file_, ""); private_nh_.param("rpn_onnx_file", rpn_onnx_file_, ""); - point_pillars_ptr_.reset(new PointPillars( - reproduce_result_mode_, - score_threshold_, - nms_overlap_threshold_, - pfe_onnx_file_, - rpn_onnx_file_)); + point_pillars_ptr_.reset(new PointPillars(reproduce_result_mode_, score_threshold_, nms_overlap_threshold_, + pfe_onnx_file_, rpn_onnx_file_)); } void PointPillarsROS::createROSPubSub() { - sub_points_ = nh_.subscribe("/points_raw", 1, &PointPillarsROS::pointsCallback, this); - pub_objects_ = nh_.advertise("/detection/lidar_detector/objects", 1); + sub_points_ = nh_.subscribe("/points_raw", 1, &PointPillarsROS::pointsCallback, this); + pub_objects_ = nh_.advertise("/detection/lidar_detector/objects", 1); } - -geometry_msgs::Pose PointPillarsROS::getTransformedPose(const geometry_msgs::Pose& in_pose, - const tf::Transform& tf) +geometry_msgs::Pose PointPillarsROS::getTransformedPose(const geometry_msgs::Pose& in_pose, const tf::Transform& tf) { tf::Transform transform; geometry_msgs::PoseStamped out_pose; transform.setOrigin(tf::Vector3(in_pose.position.x, in_pose.position.y, in_pose.position.z)); - transform.setRotation(tf::Quaternion(in_pose.orientation.x, in_pose.orientation.y, in_pose.orientation.z, in_pose.orientation.w)); + transform.setRotation( + tf::Quaternion(in_pose.orientation.x, in_pose.orientation.y, in_pose.orientation.z, in_pose.orientation.w)); geometry_msgs::PoseStamped pose_out; - tf::poseTFToMsg(tf*transform, out_pose.pose); + tf::poseTFToMsg(tf * transform, out_pose.pose); return out_pose.pose; } -void PointPillarsROS::pubDetectedObject(const std::vector& detections, const int num_objects, const std_msgs::Header& pc_header) +void PointPillarsROS::pubDetectedObject(const std::vector& detections, const int num_objects, + const std_msgs::Header& pc_header) { autoware_msgs::DetectedObjectArray objects; objects.header = pc_header; @@ -84,24 +80,23 @@ void PointPillarsROS::pubDetectedObject(const std::vector& detections, co object.valid = true; object.pose_reliable = true; - object.pose.position.x = detections[i*OUTPUT_NUM_BOX_FEATURE_+0]; - object.pose.position.y = detections[i*OUTPUT_NUM_BOX_FEATURE_+1]; - object.pose.position.z = detections[i*OUTPUT_NUM_BOX_FEATURE_+2]; - + object.pose.position.x = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 0]; + object.pose.position.y = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 1]; + object.pose.position.z = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 2]; - float yaw = detections[i*OUTPUT_NUM_BOX_FEATURE_+6]; + float yaw = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 6]; yaw = std::atan2(std::sin(yaw), std::cos(yaw)); geometry_msgs::Quaternion q = tf::createQuaternionMsgFromYaw(-yaw); object.pose.orientation = q; - if(baselink_support_) + if (baselink_support_) { object.pose = getTransformedPose(object.pose, angle_transform_inversed_); } - object.dimensions.x = detections[i*OUTPUT_NUM_BOX_FEATURE_+3]; - object.dimensions.y = detections[i*OUTPUT_NUM_BOX_FEATURE_+4]; - object.dimensions.z = detections[i*OUTPUT_NUM_BOX_FEATURE_+5]; + object.dimensions.x = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 3]; + object.dimensions.y = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 4]; + object.dimensions.z = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 5]; objects.objects.push_back(object); } @@ -113,7 +108,7 @@ void PointPillarsROS::getBaselinkToLidarTF(const std::string& target_frameid) try { tf_listener_.waitForTransform(BASELINK_FRAME_, target_frameid, ros::Time(0), ros::Duration(1.0)); - tf_listener_.lookupTransform( BASELINK_FRAME_, target_frameid, ros::Time(0), baselink2lidar_); + tf_listener_.lookupTransform(BASELINK_FRAME_, target_frameid, ros::Time(0), baselink2lidar_); getTFInfo(baselink2lidar_); has_subscribed_baselink_ = true; } @@ -131,39 +126,37 @@ void PointPillarsROS::getTFInfo(tf::StampedTransform baselink2lidar) tf::Quaternion q = baselink2lidar_.getRotation(); angle_transform_ = tf::Transform(q); angle_transform_inversed_ = angle_transform_.inverse(); - } -void PointPillarsROS::pclToArray(const pcl::PointCloud::Ptr & in_pcl_pc_ptr, float* out_points_array, const float offset_z) +void PointPillarsROS::pclToArray(const pcl::PointCloud::Ptr& in_pcl_pc_ptr, float* out_points_array, + const float offset_z) { - for(size_t i = 0; i < in_pcl_pc_ptr->size(); i++) + for (size_t i = 0; i < in_pcl_pc_ptr->size(); i++) { pcl::PointXYZI point = in_pcl_pc_ptr->at(i); - out_points_array[i*NUM_POINT_FEATURE_ + 0] = point.x; - out_points_array[i*NUM_POINT_FEATURE_ + 1] = point.y; - out_points_array[i*NUM_POINT_FEATURE_ + 2] = point.z + offset_z; - out_points_array[i*NUM_POINT_FEATURE_ + 3] = float(point.intensity/NORMALIZING_INTENSITY_VALUE_); + out_points_array[i * NUM_POINT_FEATURE_ + 0] = point.x; + out_points_array[i * NUM_POINT_FEATURE_ + 1] = point.y; + out_points_array[i * NUM_POINT_FEATURE_ + 2] = point.z + offset_z; + out_points_array[i * NUM_POINT_FEATURE_ + 3] = float(point.intensity / NORMALIZING_INTENSITY_VALUE_); } } - void PointPillarsROS::pointsCallback(const sensor_msgs::PointCloud2::ConstPtr& msg) { - pcl::PointCloud::Ptr pcl_pc_ptr(new pcl::PointCloud); pcl::fromROSMsg(*msg, *pcl_pc_ptr); - if(baselink_support_) + if (baselink_support_) { - if(!has_subscribed_baselink_) + if (!has_subscribed_baselink_) { getBaselinkToLidarTF(msg->header.frame_id); } pcl_ros::transformPointCloud(*pcl_pc_ptr, *pcl_pc_ptr, angle_transform_); } - float * points_array = new float[pcl_pc_ptr->size()*NUM_POINT_FEATURE_]; - if(baselink_support_ && has_subscribed_baselink_) + float* points_array = new float[pcl_pc_ptr->size() * NUM_POINT_FEATURE_]; + if (baselink_support_ && has_subscribed_baselink_) { pclToArray(pcl_pc_ptr, points_array, offset_z_from_trained_data_); } diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp index 51fe2a07884..e36055debc9 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp @@ -14,52 +14,43 @@ * limitations under the License. */ -//headers in STL +// headers in STL #include #include -//headers in local files +// headers in local files #include "preprocess_points.h" -PreprocessPoints::PreprocessPoints(const int MAX_NUM_PILLARS, - const int MAX_POINTS_PER_PILLAR, - const int GRID_X_SIZE, - const int GRID_Y_SIZE, - const int GRID_Z_SIZE, - const float PILLAR_X_SIZE, - const float PILLAR_Y_SIZE, - const float PILLAR_Z_SIZE, - const float MIN_X_RANGE, - const float MIN_Y_RANGE, - const float MIN_Z_RANGE, - const int NUM_INDS_FOR_SCAN, +PreprocessPoints::PreprocessPoints(const int MAX_NUM_PILLARS, const int MAX_POINTS_PER_PILLAR, const int GRID_X_SIZE, + const int GRID_Y_SIZE, const int GRID_Z_SIZE, const float PILLAR_X_SIZE, + const float PILLAR_Y_SIZE, const float PILLAR_Z_SIZE, const float MIN_X_RANGE, + const float MIN_Y_RANGE, const float MIN_Z_RANGE, const int NUM_INDS_FOR_SCAN, const int NUM_BOX_CORNERS) -: -MAX_NUM_PILLARS_(MAX_NUM_PILLARS), -MAX_NUM_POINTS_PER_PILLAR_(MAX_POINTS_PER_PILLAR), -GRID_X_SIZE_(GRID_X_SIZE), -GRID_Y_SIZE_(GRID_Y_SIZE), -GRID_Z_SIZE_(GRID_Z_SIZE), -PILLAR_X_SIZE_(PILLAR_X_SIZE), -PILLAR_Y_SIZE_(PILLAR_Y_SIZE), -PILLAR_Z_SIZE_(PILLAR_Z_SIZE), -MIN_X_RANGE_(MIN_X_RANGE), -MIN_Y_RANGE_(MIN_Y_RANGE), -MIN_Z_RANGE_(MIN_Z_RANGE), -NUM_INDS_FOR_SCAN_(NUM_INDS_FOR_SCAN), -NUM_BOX_CORNERS_(NUM_BOX_CORNERS) + : MAX_NUM_PILLARS_(MAX_NUM_PILLARS) + , MAX_NUM_POINTS_PER_PILLAR_(MAX_POINTS_PER_PILLAR) + , GRID_X_SIZE_(GRID_X_SIZE) + , GRID_Y_SIZE_(GRID_Y_SIZE) + , GRID_Z_SIZE_(GRID_Z_SIZE) + , PILLAR_X_SIZE_(PILLAR_X_SIZE) + , PILLAR_Y_SIZE_(PILLAR_Y_SIZE) + , PILLAR_Z_SIZE_(PILLAR_Z_SIZE) + , MIN_X_RANGE_(MIN_X_RANGE) + , MIN_Y_RANGE_(MIN_Y_RANGE) + , MIN_Z_RANGE_(MIN_Z_RANGE) + , NUM_INDS_FOR_SCAN_(NUM_INDS_FOR_SCAN) + , NUM_BOX_CORNERS_(NUM_BOX_CORNERS) { } -void PreprocessPoints::initializeVariables(int* coor_to_pillaridx, float* sparse_pillar_map, - float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i, - float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped) +void PreprocessPoints::initializeVariables(int* coor_to_pillaridx, float* sparse_pillar_map, float* pillar_x, + float* pillar_y, float* pillar_z, float* pillar_i, + float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped) { for (int i = 0; i < GRID_Y_SIZE_; i++) { for (int j = 0; j < GRID_X_SIZE_; j++) { - coor_to_pillaridx[i*GRID_X_SIZE_ + j] = -1; + coor_to_pillaridx[i * GRID_X_SIZE_ + j] = -1; } } @@ -67,11 +58,11 @@ void PreprocessPoints::initializeVariables(int* coor_to_pillaridx, float* sparse { for (int j = 0; j < NUM_INDS_FOR_SCAN_; j++) { - sparse_pillar_map[i*NUM_INDS_FOR_SCAN_+j] = 0; + sparse_pillar_map[i * NUM_INDS_FOR_SCAN_ + j] = 0; } } - for(int i = 0; i < MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_; i++) + for (int i = 0; i < MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_; i++) { pillar_x[i] = 0; pillar_y[i] = 0; @@ -82,63 +73,60 @@ void PreprocessPoints::initializeVariables(int* coor_to_pillaridx, float* sparse } } -void PreprocessPoints::preprocess(const float* in_points_array, int in_num_points, - int* x_coors, int* y_coors, float* num_points_per_pillar, - float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i, - float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped, - float* pillar_feature_mask, float* sparse_pillar_map, int* host_pillar_count) +void PreprocessPoints::preprocess(const float* in_points_array, int in_num_points, int* x_coors, int* y_coors, + float* num_points_per_pillar, float* pillar_x, float* pillar_y, float* pillar_z, + float* pillar_i, float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped, + float* pillar_feature_mask, float* sparse_pillar_map, int* host_pillar_count) { int pillar_count = 0; - float x_coors_for_sub[MAX_NUM_PILLARS_]= {0}; - float y_coors_for_sub[MAX_NUM_PILLARS_]= {0}; - //init variables - int coor_to_pillaridx[GRID_Y_SIZE_*GRID_X_SIZE_]; - initializeVariables(coor_to_pillaridx, sparse_pillar_map, - pillar_x, pillar_y, pillar_z, pillar_i, - x_coors_for_sub_shaped, y_coors_for_sub_shaped); - for(int i = 0; i < in_num_points; i++) + float x_coors_for_sub[MAX_NUM_PILLARS_] = { 0 }; + float y_coors_for_sub[MAX_NUM_PILLARS_] = { 0 }; + // init variables + int coor_to_pillaridx[GRID_Y_SIZE_ * GRID_X_SIZE_]; + initializeVariables(coor_to_pillaridx, sparse_pillar_map, pillar_x, pillar_y, pillar_z, pillar_i, + x_coors_for_sub_shaped, y_coors_for_sub_shaped); + for (int i = 0; i < in_num_points; i++) { - int x_coor = std::floor((in_points_array[i*NUM_BOX_CORNERS_ + 0] - MIN_X_RANGE_)/PILLAR_X_SIZE_); - int y_coor = std::floor((in_points_array[i*NUM_BOX_CORNERS_ + 1] - MIN_Y_RANGE_)/PILLAR_Y_SIZE_); - int z_coor = std::floor((in_points_array[i*NUM_BOX_CORNERS_ + 2] - MIN_Z_RANGE_)/PILLAR_Z_SIZE_); - if(x_coor < 0 || x_coor >= GRID_X_SIZE_ || - y_coor < 0 || y_coor >= GRID_Y_SIZE_ || - z_coor < 0 || z_coor >= GRID_Z_SIZE_) + int x_coor = std::floor((in_points_array[i * NUM_BOX_CORNERS_ + 0] - MIN_X_RANGE_) / PILLAR_X_SIZE_); + int y_coor = std::floor((in_points_array[i * NUM_BOX_CORNERS_ + 1] - MIN_Y_RANGE_) / PILLAR_Y_SIZE_); + int z_coor = std::floor((in_points_array[i * NUM_BOX_CORNERS_ + 2] - MIN_Z_RANGE_) / PILLAR_Z_SIZE_); + if (x_coor < 0 || x_coor >= GRID_X_SIZE_ || y_coor < 0 || y_coor >= GRID_Y_SIZE_ || z_coor < 0 || + z_coor >= GRID_Z_SIZE_) { continue; } - //reverse index - int pillar_index = coor_to_pillaridx[y_coor*GRID_X_SIZE_ + x_coor]; - if(pillar_index == -1) + // reverse index + int pillar_index = coor_to_pillaridx[y_coor * GRID_X_SIZE_ + x_coor]; + if (pillar_index == -1) { pillar_index = pillar_count; - if(pillar_count >= MAX_NUM_PILLARS_) + if (pillar_count >= MAX_NUM_PILLARS_) { break; } pillar_count += 1; - coor_to_pillaridx[y_coor*GRID_X_SIZE_ + x_coor] = pillar_index; + coor_to_pillaridx[y_coor * GRID_X_SIZE_ + x_coor] = pillar_index; - //reverse index + // reverse index y_coors[pillar_index] = std::floor(y_coor); x_coors[pillar_index] = std::floor(x_coor); // float y_offset = PILLAR_Y_SIZE_/ 2 + MIN_Y_RANGE_; // float x_offset = PILLAR_X_SIZE_/ 2 + MIN_X_RANGE_; // TODO Need to be modified after proper trining code - y_coors_for_sub[pillar_index] = std::floor(y_coor)* PILLAR_Y_SIZE_ + -39.9; - x_coors_for_sub[pillar_index] = std::floor(x_coor)* PILLAR_X_SIZE_ + 0.1; + y_coors_for_sub[pillar_index] = std::floor(y_coor) * PILLAR_Y_SIZE_ + -39.9; + x_coors_for_sub[pillar_index] = std::floor(x_coor) * PILLAR_X_SIZE_ + 0.1; - //sparse pillar map - sparse_pillar_map[y_coor*512 + x_coor] = 1; + // sparse pillar map + sparse_pillar_map[y_coor * 512 + x_coor] = 1; } int num = num_points_per_pillar[pillar_index]; if (num < MAX_NUM_POINTS_PER_PILLAR_) { - pillar_x[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*NUM_BOX_CORNERS_ + 0]; - pillar_y[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*NUM_BOX_CORNERS_ + 1]; - pillar_z[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*NUM_BOX_CORNERS_ + 2]; - pillar_i[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*NUM_BOX_CORNERS_ + 3]; + pillar_x[pillar_index * MAX_NUM_POINTS_PER_PILLAR_ + num] = in_points_array[i * NUM_BOX_CORNERS_ + 0]; + pillar_y[pillar_index * MAX_NUM_POINTS_PER_PILLAR_ + num] = in_points_array[i * NUM_BOX_CORNERS_ + 1]; + pillar_z[pillar_index * MAX_NUM_POINTS_PER_PILLAR_ + num] = in_points_array[i * NUM_BOX_CORNERS_ + 2]; + pillar_i[pillar_index * MAX_NUM_POINTS_PER_PILLAR_ + num] = in_points_array[i * NUM_BOX_CORNERS_ + 3]; num_points_per_pillar[pillar_index] += 1; } } @@ -148,20 +136,19 @@ void PreprocessPoints::preprocess(const float* in_points_array, int in_num_point float x = x_coors_for_sub[i]; float y = y_coors_for_sub[i]; int num_points_for_a_pillar = num_points_per_pillar[i]; - for(int j = 0; j < MAX_NUM_POINTS_PER_PILLAR_; j++) + for (int j = 0; j < MAX_NUM_POINTS_PER_PILLAR_; j++) { - x_coors_for_sub_shaped[i*MAX_NUM_POINTS_PER_PILLAR_ + j] = x; - y_coors_for_sub_shaped[i*MAX_NUM_POINTS_PER_PILLAR_ + j] = y; - if(j < num_points_for_a_pillar) - { - pillar_feature_mask[i*MAX_NUM_POINTS_PER_PILLAR_ + j] = 1.0; - } - else - { - pillar_feature_mask[i*MAX_NUM_POINTS_PER_PILLAR_ + j] = 0.0; - } + x_coors_for_sub_shaped[i * MAX_NUM_POINTS_PER_PILLAR_ + j] = x; + y_coors_for_sub_shaped[i * MAX_NUM_POINTS_PER_PILLAR_ + j] = y; + if (j < num_points_for_a_pillar) + { + pillar_feature_mask[i * MAX_NUM_POINTS_PER_PILLAR_ + j] = 1.0; + } + else + { + pillar_feature_mask[i * MAX_NUM_POINTS_PER_PILLAR_ + j] = 0.0; + } } } host_pillar_count[0] = pillar_count; - } diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp index 0b209a92471..a45cef2bdef 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp @@ -14,12 +14,12 @@ * limitations under the License. */ - /** - * @file test_point_pillars.cpp - * @brief unit test file - * @author Kosuke Murakami - * @date 2019/02/26 - */ +/** +* @file test_point_pillars.cpp +* @brief unit test file +* @author Kosuke Murakami +* @date 2019/02/26 +*/ #include #include @@ -46,17 +46,9 @@ class TestSuite : public ::testing::Test class TestClass { public: - TestClass(const int MAX_NUM_PILLARS, - const int MAX_NUM_POINTS_PER_PILLAR, - const int GRID_X_SIZE, - const int GRID_Y_SIZE, - const int GRID_Z_SIZE, - const float PILLAR_X_SIZE, - const float PILLAR_Y_SIZE, - const float PILLAR_Z_SIZE, - const float MIN_X_RANGE, - const float MIN_Y_RANGE, - const float MIN_Z_RANGE, + TestClass(const int MAX_NUM_PILLARS, const int MAX_NUM_POINTS_PER_PILLAR, const int GRID_X_SIZE, + const int GRID_Y_SIZE, const int GRID_Z_SIZE, const float PILLAR_X_SIZE, const float PILLAR_Y_SIZE, + const float PILLAR_Z_SIZE, const float MIN_X_RANGE, const float MIN_Y_RANGE, const float MIN_Z_RANGE, const int NUM_INDS_FOR_SCAN); const int MAX_NUM_PILLARS_; const int MAX_NUM_POINTS_PER_PILLAR_; @@ -70,90 +62,64 @@ class TestClass const float MIN_Y_RANGE_; const float MIN_Z_RANGE_; const int NUM_INDS_FOR_SCAN_; - void loadPoints(pcl::PointCloud::Ptr in_pcl_pc_ptr, - const std::string& in_file ); - void pclToArray(const pcl::PointCloud::Ptr & in_pcl_pc_ptr, float* out_points_array); - void preprocess(const float* in_points_array, int in_num_points, - int* x_coors, int* y_coors, float* num_points_per_pillar, - float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i, - float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped, - float* pillar_feature_mask, float* sparse_pillar_map, int* host_pillar_count); -private: + void loadPoints(pcl::PointCloud::Ptr in_pcl_pc_ptr, const std::string& in_file); + void pclToArray(const pcl::PointCloud::Ptr& in_pcl_pc_ptr, float* out_points_array); + void preprocess(const float* in_points_array, int in_num_points, int* x_coors, int* y_coors, + float* num_points_per_pillar, float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i, + float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped, float* pillar_feature_mask, + float* sparse_pillar_map, int* host_pillar_count); +private: std::unique_ptr preprocess_points_ptr_; }; -TestClass::TestClass( - const int MAX_NUM_PILLARS, - const int MAX_NUM_POINTS_PER_PILLAR, - const int GRID_X_SIZE, - const int GRID_Y_SIZE, - const int GRID_Z_SIZE, - const float PILLAR_X_SIZE, - const float PILLAR_Y_SIZE, - const float PILLAR_Z_SIZE, - const float MIN_X_RANGE, - const float MIN_Y_RANGE, - const float MIN_Z_RANGE, - const int NUM_INDS_FOR_SCAN): -MAX_NUM_PILLARS_(MAX_NUM_PILLARS), -MAX_NUM_POINTS_PER_PILLAR_(MAX_NUM_POINTS_PER_PILLAR), -GRID_X_SIZE_(GRID_X_SIZE), -GRID_Y_SIZE_(GRID_Y_SIZE), -GRID_Z_SIZE_(GRID_Z_SIZE), -PILLAR_X_SIZE_(PILLAR_X_SIZE), -PILLAR_Y_SIZE_(PILLAR_Y_SIZE), -PILLAR_Z_SIZE_(PILLAR_Z_SIZE), -MIN_X_RANGE_(MIN_X_RANGE), -MIN_Y_RANGE_(MIN_Y_RANGE), -MIN_Z_RANGE_(MIN_Z_RANGE), -NUM_INDS_FOR_SCAN_(NUM_INDS_FOR_SCAN) +TestClass::TestClass(const int MAX_NUM_PILLARS, const int MAX_NUM_POINTS_PER_PILLAR, const int GRID_X_SIZE, + const int GRID_Y_SIZE, const int GRID_Z_SIZE, const float PILLAR_X_SIZE, const float PILLAR_Y_SIZE, + const float PILLAR_Z_SIZE, const float MIN_X_RANGE, const float MIN_Y_RANGE, + const float MIN_Z_RANGE, const int NUM_INDS_FOR_SCAN) + : MAX_NUM_PILLARS_(MAX_NUM_PILLARS) + , MAX_NUM_POINTS_PER_PILLAR_(MAX_NUM_POINTS_PER_PILLAR) + , GRID_X_SIZE_(GRID_X_SIZE) + , GRID_Y_SIZE_(GRID_Y_SIZE) + , GRID_Z_SIZE_(GRID_Z_SIZE) + , PILLAR_X_SIZE_(PILLAR_X_SIZE) + , PILLAR_Y_SIZE_(PILLAR_Y_SIZE) + , PILLAR_Z_SIZE_(PILLAR_Z_SIZE) + , MIN_X_RANGE_(MIN_X_RANGE) + , MIN_Y_RANGE_(MIN_Y_RANGE) + , MIN_Z_RANGE_(MIN_Z_RANGE) + , NUM_INDS_FOR_SCAN_(NUM_INDS_FOR_SCAN) { preprocess_points_ptr_.reset(new PreprocessPoints( - MAX_NUM_PILLARS_, - MAX_NUM_POINTS_PER_PILLAR_, - GRID_X_SIZE_, - GRID_Y_SIZE_, - GRID_Z_SIZE_, - PILLAR_X_SIZE_, - PILLAR_Y_SIZE_, - PILLAR_Z_SIZE_, - MIN_X_RANGE_, - MIN_Y_RANGE_, - MIN_Z_RANGE_, - NUM_INDS_FOR_SCAN_ - )); + MAX_NUM_PILLARS_, MAX_NUM_POINTS_PER_PILLAR_, GRID_X_SIZE_, GRID_Y_SIZE_, GRID_Z_SIZE_, PILLAR_X_SIZE_, + PILLAR_Y_SIZE_, PILLAR_Z_SIZE_, MIN_X_RANGE_, MIN_Y_RANGE_, MIN_Z_RANGE_, NUM_INDS_FOR_SCAN_)); }; -void TestClass::preprocess(const float* in_points_array, int in_num_points, - int* x_coors, int* y_coors, float* num_points_per_pillar, - float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i, - float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped, - float* pillar_feature_mask, float* sparse_pillar_map, int* host_pillar_count) +void TestClass::preprocess(const float* in_points_array, int in_num_points, int* x_coors, int* y_coors, + float* num_points_per_pillar, float* pillar_x, float* pillar_y, float* pillar_z, + float* pillar_i, float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped, + float* pillar_feature_mask, float* sparse_pillar_map, int* host_pillar_count) { - preprocess_points_ptr_->preprocess(in_points_array, in_num_points, - x_coors, y_coors, num_points_per_pillar, - pillar_x, pillar_y, pillar_z, pillar_i, - x_coors_for_sub_shaped, y_coors_for_sub_shaped, - pillar_feature_mask, sparse_pillar_map, host_pillar_count); + preprocess_points_ptr_->preprocess(in_points_array, in_num_points, x_coors, y_coors, num_points_per_pillar, pillar_x, + pillar_y, pillar_z, pillar_i, x_coors_for_sub_shaped, y_coors_for_sub_shaped, + pillar_feature_mask, sparse_pillar_map, host_pillar_count); } -void TestClass::pclToArray(const pcl::PointCloud::Ptr & in_pcl_pc_ptr, float* out_points_array) +void TestClass::pclToArray(const pcl::PointCloud::Ptr& in_pcl_pc_ptr, float* out_points_array) { - for(size_t i = 0; i < in_pcl_pc_ptr->size(); i++) + for (size_t i = 0; i < in_pcl_pc_ptr->size(); i++) { pcl::PointXYZI point = in_pcl_pc_ptr->at(i); - out_points_array[i*4 + 0] = point.x; - out_points_array[i*4 + 1] = point.y; - out_points_array[i*4 + 2] = point.z; - out_points_array[i*4 + 3] = point.intensity; + out_points_array[i * 4 + 0] = point.x; + out_points_array[i * 4 + 1] = point.y; + out_points_array[i * 4 + 2] = point.z; + out_points_array[i * 4 + 3] = point.intensity; } } -void TestClass::loadPoints(pcl::PointCloud::Ptr in_pcl_pc_ptr, - const std::string& in_file ) +void TestClass::loadPoints(pcl::PointCloud::Ptr in_pcl_pc_ptr, const std::string& in_file) { - if (pcl::io::loadPCDFile (in_file, *in_pcl_pc_ptr) == -1) + if (pcl::io::loadPCDFile(in_file, *in_pcl_pc_ptr) == -1) { ROS_ERROR("Couldn't read test pcd file \n"); } @@ -161,54 +127,39 @@ void TestClass::loadPoints(pcl::PointCloud::Ptr in_pcl_pc_ptr, TEST(TestSuite, CheckPreprocessPointsCPU) { - TestClass test_obj( - 12000, - 100, - 432, - 496, - 1, - 0.16, - 0.16, - 4.0, - 0, - -39.68, - -3.0, - 512); + TestClass test_obj(12000, 100, 432, 496, 1, 0.16, 0.16, 4.0, 0, -39.68, -3.0, 512); pcl::PointCloud::Ptr pcl_pc_ptr(new pcl::PointCloud); std::string package_path = ros::package::getPath("lidar_point_pillars"); std::string in_file = package_path + "/test/data/1527836009720148.pcd"; test_obj.loadPoints(pcl_pc_ptr, in_file); - float * points_array = new float[pcl_pc_ptr->size()*4]; + float* points_array = new float[pcl_pc_ptr->size() * 4]; test_obj.pclToArray(pcl_pc_ptr, points_array); + int x_coors[test_obj.MAX_NUM_PILLARS_] = { 0 }; + int y_coors[test_obj.MAX_NUM_PILLARS_] = { 0 }; + float num_points_per_pillar[test_obj.MAX_NUM_PILLARS_] = { 0 }; + float* pillar_x = new float[test_obj.MAX_NUM_PILLARS_ * test_obj.MAX_NUM_POINTS_PER_PILLAR_]; + float* pillar_y = new float[test_obj.MAX_NUM_PILLARS_ * test_obj.MAX_NUM_POINTS_PER_PILLAR_]; + float* pillar_z = new float[test_obj.MAX_NUM_PILLARS_ * test_obj.MAX_NUM_POINTS_PER_PILLAR_]; + float* pillar_i = new float[test_obj.MAX_NUM_PILLARS_ * test_obj.MAX_NUM_POINTS_PER_PILLAR_]; - int x_coors[test_obj.MAX_NUM_PILLARS_]= {0}; - int y_coors[test_obj.MAX_NUM_PILLARS_]= {0}; - float num_points_per_pillar[test_obj.MAX_NUM_PILLARS_] = {0}; - float * pillar_x = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_]; - float * pillar_y = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_]; - float * pillar_z = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_]; - float * pillar_i = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_]; - - float * x_coors_for_sub_shaped = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_]; - float * y_coors_for_sub_shaped = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_]; - float * pillar_feature_mask = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_]; + float* x_coors_for_sub_shaped = new float[test_obj.MAX_NUM_PILLARS_ * test_obj.MAX_NUM_POINTS_PER_PILLAR_]; + float* y_coors_for_sub_shaped = new float[test_obj.MAX_NUM_PILLARS_ * test_obj.MAX_NUM_POINTS_PER_PILLAR_]; + float* pillar_feature_mask = new float[test_obj.MAX_NUM_PILLARS_ * test_obj.MAX_NUM_POINTS_PER_PILLAR_]; - float* sparse_pillar_map = new float[512*512]; + float* sparse_pillar_map = new float[512 * 512]; - int host_pillar_count[1] = {0}; - test_obj.preprocess(points_array, pcl_pc_ptr->size(), - x_coors, y_coors, num_points_per_pillar, - pillar_x, pillar_y, pillar_z, pillar_i, - x_coors_for_sub_shaped, y_coors_for_sub_shaped, - pillar_feature_mask, sparse_pillar_map, host_pillar_count); + int host_pillar_count[1] = { 0 }; + test_obj.preprocess(points_array, pcl_pc_ptr->size(), x_coors, y_coors, num_points_per_pillar, pillar_x, pillar_y, + pillar_z, pillar_i, x_coors_for_sub_shaped, y_coors_for_sub_shaped, pillar_feature_mask, + sparse_pillar_map, host_pillar_count); EXPECT_EQ(15, num_points_per_pillar[0]); EXPECT_FLOAT_EQ(3.7517259, pillar_x[14]); EXPECT_EQ(71, x_coors[100]); EXPECT_EQ(177, y_coors[100]); - EXPECT_EQ(1, sparse_pillar_map[177*512 + 71]); + EXPECT_EQ(1, sparse_pillar_map[177 * 512 + 71]); EXPECT_EQ(6270, host_pillar_count[0]); delete[] points_array; delete[] pillar_x; From 7c4c1d6ca20e6d7683ba1ead8483ad7d8bf784a8 Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Thu, 28 Feb 2019 08:35:54 +0900 Subject: [PATCH 05/37] Refactor codes --- .../lidar_point_pillars/CMakeLists.txt | 1 + .../include/point_pillars.h | 30 +++++++++++++++---- .../nodes/point_pillars.cpp | 20 ++++++++++--- 3 files changed, 42 insertions(+), 9 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt index fad7f1a04c6..b4c451a393b 100755 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt @@ -80,6 +80,7 @@ if(TRT_AVAIL AND CUDA_AVAIL) nodes/point_pillars.cpp nodes/preprocess_points.cpp ) + # Need ${catkin_LIBRARIES} because of #include for ROS_ERROR target_link_libraries(point_pillars_lib ${catkin_LIBRARIES} nvinfer diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h index e65a5c02433..541e1d9b347 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h @@ -220,6 +220,20 @@ class PointPillars */ void initTRT(); + /** + * @brief Generate anchors + * @param[in] anchors_px_ Represents x-coordinate value for corresponding anchor + * @param[in] anchors_py_ Represents y-coordinate value for corresponding anchor + * @param[in] anchors_pz_ Represents z-coordinate value for corresponding anchor + * @param[in] anchors_dx_ Represents x-dimension value for corresponding anchor + * @param[in] anchors_dy_ Represents y-dimension value for corresponding anchor + * @param[in] anchors_dz_ Represents z-dimension value for corresponding anchor + * @param[in] anchors_ro_ Represents rotation value for corresponding anchor + * @details Generate anchors for each grid + */ + void generateAnchors(float* anchors_px_, float* anchors_py_, float* anchors_pz_, + float* anchors_dx_, float* anchors_dy_, float* anchors_dz_, float* anchors_ro_); + /** * @brief Convert ONNX to TensorRT model * @param[in] model_file ONNX model file path @@ -254,13 +268,19 @@ class PointPillars /** * @brief Convert anchors to box form like min_x, min_y, max_x, max_y anchors - * @param[in] anchors_px Represents x-coordinate value for corresponding anchor - * @param[in] anchors_py Represents y-coordinate value for corresponding anchor - * @param[in] anchors_dx Represents x-dimension value for corresponding anchor - * @param[in] anchors_dy Represents y-dimension value for corresponding anchor + * @param[in] anchors_px_ Represents x-coordinate value for a corresponding anchor + * @param[in] anchors_py_ Represents y-coordinate value for a corresponding anchor + * @param[in] anchors_dx_ Represents x-dimension value for a corresponding anchor + * @param[in] anchors_dy_ Represents y-dimension value for a corresponding anchor + * @param[in] box_anchors_min_x_ Represents minimum x value for a correspomding anchor + * @param[in] box_anchors_min_y_ Represents minimum y value for a correspomding anchor + * @param[in] box_anchors_max_x_ Represents maximum x value for a correspomding anchor + * @param[in] box_anchors_max_y_ Represents maximum y value for a correspomding anchor * @details Make box anchors for nms */ - void convertAnchors2BoxAnchors(float* anchors_px, float* anchors_py, float* anchors_dx, float* anchors_dy); + void convertAnchors2BoxAnchors(float* anchors_px_, float* anchors_py_, float* anchors_dx_, float* anchors_dy_, + float* box_anchors_min_x_, float* box_anchors_min_y_, + float* box_anchors_max_x_, float* box_anchors_max_y_); /** * @brief Memory allocation for anchors diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp index 117fb5c2a5e..bb5f33050f6 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp @@ -267,6 +267,19 @@ void PointPillars::deviceMemoryMalloc() } void PointPillars::initAnchors() +{ + generateAnchors(anchors_px_, anchors_py_, anchors_pz_, + anchors_dx_, anchors_dy_, anchors_dz_, anchors_ro_); + + convertAnchors2BoxAnchors(anchors_px_, anchors_py_, anchors_dx_, anchors_dy_, + box_anchors_min_x_, box_anchors_min_y_, + box_anchors_max_x_, box_anchors_max_y_); + + putAnchorsInDeviceMemory(); +} + +void PointPillars::generateAnchors(float* anchors_px_, float* anchors_py_, float* anchors_pz_, + float* anchors_dx_, float* anchors_dy_, float* anchors_dz_, float* anchors_ro_) { // zero clear for (size_t i = 0; i < NUM_ANCHOR_; i++) @@ -320,9 +333,6 @@ void PointPillars::initAnchors() } } } - convertAnchors2BoxAnchors(anchors_px_, anchors_py_, anchors_dx_, anchors_dy_); - - putAnchorsInDeviceMemory(); } void PointPillars::putAnchorsInDeviceMemory() @@ -345,7 +355,9 @@ void PointPillars::putAnchorsInDeviceMemory() GPU_CHECK(cudaMemcpy(dev_anchors_ro_, anchors_ro_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice)); } -void PointPillars::convertAnchors2BoxAnchors(float* anchors_px, float* anchors_py, float* anchors_dx, float* anchors_dy) +void PointPillars::convertAnchors2BoxAnchors(float* anchors_px, float* anchors_py, float* anchors_dx, float* anchors_dy, + float* box_anchors_min_x_, float* box_anchors_min_y_, + float* box_anchors_max_x_, float* box_anchors_max_y_) { // flip box's dimension when the at the third axis == 1 float flipped_anchors_dx[NUM_ANCHOR_] = { 0 }; From 0a784b63647afc58b16d632cd92ea504f909c498 Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Thu, 28 Feb 2019 09:27:03 +0900 Subject: [PATCH 06/37] fix some test --- .../test/src/test_point_pillars.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp index a45cef2bdef..e3ca4a7dd28 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp @@ -49,7 +49,7 @@ class TestClass TestClass(const int MAX_NUM_PILLARS, const int MAX_NUM_POINTS_PER_PILLAR, const int GRID_X_SIZE, const int GRID_Y_SIZE, const int GRID_Z_SIZE, const float PILLAR_X_SIZE, const float PILLAR_Y_SIZE, const float PILLAR_Z_SIZE, const float MIN_X_RANGE, const float MIN_Y_RANGE, const float MIN_Z_RANGE, - const int NUM_INDS_FOR_SCAN); + const int NUM_INDS_FOR_SCAN, const int NUM_BOX_CORNERS); const int MAX_NUM_PILLARS_; const int MAX_NUM_POINTS_PER_PILLAR_; const int GRID_X_SIZE_; @@ -62,6 +62,8 @@ class TestClass const float MIN_Y_RANGE_; const float MIN_Z_RANGE_; const int NUM_INDS_FOR_SCAN_; + const int NUM_BOX_CORNERS_; + void loadPoints(pcl::PointCloud::Ptr in_pcl_pc_ptr, const std::string& in_file); void pclToArray(const pcl::PointCloud::Ptr& in_pcl_pc_ptr, float* out_points_array); void preprocess(const float* in_points_array, int in_num_points, int* x_coors, int* y_coors, @@ -76,7 +78,7 @@ class TestClass TestClass::TestClass(const int MAX_NUM_PILLARS, const int MAX_NUM_POINTS_PER_PILLAR, const int GRID_X_SIZE, const int GRID_Y_SIZE, const int GRID_Z_SIZE, const float PILLAR_X_SIZE, const float PILLAR_Y_SIZE, const float PILLAR_Z_SIZE, const float MIN_X_RANGE, const float MIN_Y_RANGE, - const float MIN_Z_RANGE, const int NUM_INDS_FOR_SCAN) + const float MIN_Z_RANGE, const int NUM_INDS_FOR_SCAN, const int NUM_BOX_CORNERS) : MAX_NUM_PILLARS_(MAX_NUM_PILLARS) , MAX_NUM_POINTS_PER_PILLAR_(MAX_NUM_POINTS_PER_PILLAR) , GRID_X_SIZE_(GRID_X_SIZE) @@ -89,10 +91,11 @@ TestClass::TestClass(const int MAX_NUM_PILLARS, const int MAX_NUM_POINTS_PER_PIL , MIN_Y_RANGE_(MIN_Y_RANGE) , MIN_Z_RANGE_(MIN_Z_RANGE) , NUM_INDS_FOR_SCAN_(NUM_INDS_FOR_SCAN) + , NUM_BOX_CORNERS_(NUM_BOX_CORNERS) { preprocess_points_ptr_.reset(new PreprocessPoints( MAX_NUM_PILLARS_, MAX_NUM_POINTS_PER_PILLAR_, GRID_X_SIZE_, GRID_Y_SIZE_, GRID_Z_SIZE_, PILLAR_X_SIZE_, - PILLAR_Y_SIZE_, PILLAR_Z_SIZE_, MIN_X_RANGE_, MIN_Y_RANGE_, MIN_Z_RANGE_, NUM_INDS_FOR_SCAN_)); + PILLAR_Y_SIZE_, PILLAR_Z_SIZE_, MIN_X_RANGE_, MIN_Y_RANGE_, MIN_Z_RANGE_, NUM_INDS_FOR_SCAN_, NUM_BOX_CORNERS_)); }; void TestClass::preprocess(const float* in_points_array, int in_num_points, int* x_coors, int* y_coors, @@ -127,7 +130,7 @@ void TestClass::loadPoints(pcl::PointCloud::Ptr in_pcl_pc_ptr, c TEST(TestSuite, CheckPreprocessPointsCPU) { - TestClass test_obj(12000, 100, 432, 496, 1, 0.16, 0.16, 4.0, 0, -39.68, -3.0, 512); + TestClass test_obj(12000, 100, 432, 496, 1, 0.16, 0.16, 4.0, 0, -39.68, -3.0, 512, 4); pcl::PointCloud::Ptr pcl_pc_ptr(new pcl::PointCloud); std::string package_path = ros::package::getPath("lidar_point_pillars"); From 978a5d637af6f95d4ac6964095a090d07dcd6fad Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Thu, 28 Feb 2019 10:13:59 +0900 Subject: [PATCH 07/37] fix test --- .../test/src/test_point_pillars.cpp | 65 +++++++++++++++---- 1 file changed, 52 insertions(+), 13 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp index e3ca4a7dd28..51a54579963 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp @@ -64,13 +64,14 @@ class TestClass const int NUM_INDS_FOR_SCAN_; const int NUM_BOX_CORNERS_; - void loadPoints(pcl::PointCloud::Ptr in_pcl_pc_ptr, const std::string& in_file); + void initPoints(pcl::PointCloud::Ptr in_pcl_pc_ptr); void pclToArray(const pcl::PointCloud::Ptr& in_pcl_pc_ptr, float* out_points_array); void preprocess(const float* in_points_array, int in_num_points, int* x_coors, int* y_coors, float* num_points_per_pillar, float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i, float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped, float* pillar_feature_mask, float* sparse_pillar_map, int* host_pillar_count); + private: std::unique_ptr preprocess_points_ptr_; }; @@ -120,12 +121,49 @@ void TestClass::pclToArray(const pcl::PointCloud::Ptr& in_pcl_pc } } -void TestClass::loadPoints(pcl::PointCloud::Ptr in_pcl_pc_ptr, const std::string& in_file) +void TestClass::initPoints(pcl::PointCloud::Ptr in_pcl_pc_ptr) { - if (pcl::io::loadPCDFile(in_file, *in_pcl_pc_ptr) == -1) - { - ROS_ERROR("Couldn't read test pcd file \n"); - } + pcl::PointXYZI point; + point.x = 12.9892; + point.y = -9.98058; + point.z = 0; + point.intensity = 4; + in_pcl_pc_ptr->push_back(point); + point.x = 11.8697; + point.y = -11.123; + point.z = -0.189377; + point.intensity = 35; + in_pcl_pc_ptr->push_back(point); + point.x = 12.489; + point.y = -9.59703; + point.z = -2.15565; + point.intensity =11; + in_pcl_pc_ptr->push_back(point); + point.x = 12.9084; + point.y = -10.9626; + point.z = -2.15565; + point.intensity = 11; + in_pcl_pc_ptr->push_back(point); + point.x = 13.8676; + point.y = -9.61668; + point.z = 0.0980819; + point.intensity = 14; + in_pcl_pc_ptr->push_back(point); + point.x = 13.5673; + point.y = -12.9834; + point.z = 0.21862; + point.intensity = 1; + in_pcl_pc_ptr->push_back(point); + point.x = 13.8213; + point.y = -10.8529; + point.z =-1.22883; + point.intensity = 19; + in_pcl_pc_ptr->push_back(point); + point.x = 11.8957; + point.y = -10.3189; + point.z =-1.28556; + point.intensity = 13; + in_pcl_pc_ptr->push_back(point); } TEST(TestSuite, CheckPreprocessPointsCPU) @@ -135,7 +173,8 @@ TEST(TestSuite, CheckPreprocessPointsCPU) pcl::PointCloud::Ptr pcl_pc_ptr(new pcl::PointCloud); std::string package_path = ros::package::getPath("lidar_point_pillars"); std::string in_file = package_path + "/test/data/1527836009720148.pcd"; - test_obj.loadPoints(pcl_pc_ptr, in_file); + test_obj.initPoints(pcl_pc_ptr); + float* points_array = new float[pcl_pc_ptr->size() * 4]; test_obj.pclToArray(pcl_pc_ptr, points_array); @@ -158,12 +197,12 @@ TEST(TestSuite, CheckPreprocessPointsCPU) test_obj.preprocess(points_array, pcl_pc_ptr->size(), x_coors, y_coors, num_points_per_pillar, pillar_x, pillar_y, pillar_z, pillar_i, x_coors_for_sub_shaped, y_coors_for_sub_shaped, pillar_feature_mask, sparse_pillar_map, host_pillar_count); - EXPECT_EQ(15, num_points_per_pillar[0]); - EXPECT_FLOAT_EQ(3.7517259, pillar_x[14]); - EXPECT_EQ(71, x_coors[100]); - EXPECT_EQ(177, y_coors[100]); - EXPECT_EQ(1, sparse_pillar_map[177 * 512 + 71]); - EXPECT_EQ(6270, host_pillar_count[0]); + EXPECT_EQ(1, num_points_per_pillar[0]); + EXPECT_FLOAT_EQ(12.9892, pillar_x[0]); + EXPECT_EQ(74, x_coors[1]); + EXPECT_EQ(178, y_coors[1]); + EXPECT_EQ(1, sparse_pillar_map[178 * 512 + 74]); + EXPECT_EQ(8, host_pillar_count[0]); delete[] points_array; delete[] pillar_x; delete[] pillar_y; From 9cd5b71f6f3363d1957931d280b64cd5b3dcac8f Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Thu, 28 Feb 2019 10:32:22 +0900 Subject: [PATCH 08/37] refactor codes --- .../packages/lidar_point_pillars/include/point_pillars_ros.h | 2 +- .../packages/lidar_point_pillars/nodes/point_pillars_ros.cpp | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h index 8a40722a896..97e6ff51f1f 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h @@ -88,7 +88,7 @@ class PointPillarsROS * @param[in] lidar2baselink transofomation info * @details Calculate z offset compared with trained sensor height and get rotation matrix */ - void getTFInfo(tf::StampedTransform lidar2baselink); + void analyzeTFInfo(tf::StampedTransform lidar2baselink); /** * @brief Transform pose based on tf stamp info diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp index 0d4b3df7148..17dcc8ecabc 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp @@ -86,6 +86,7 @@ void PointPillarsROS::pubDetectedObject(const std::vector& detections, co float yaw = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 6]; yaw = std::atan2(std::sin(yaw), std::cos(yaw)); + // Trained this way geometry_msgs::Quaternion q = tf::createQuaternionMsgFromYaw(-yaw); object.pose.orientation = q; @@ -109,7 +110,7 @@ void PointPillarsROS::getBaselinkToLidarTF(const std::string& target_frameid) { tf_listener_.waitForTransform(BASELINK_FRAME_, target_frameid, ros::Time(0), ros::Duration(1.0)); tf_listener_.lookupTransform(BASELINK_FRAME_, target_frameid, ros::Time(0), baselink2lidar_); - getTFInfo(baselink2lidar_); + analyzeTFInfo(baselink2lidar_); has_subscribed_baselink_ = true; } catch (tf::TransformException ex) @@ -118,7 +119,7 @@ void PointPillarsROS::getBaselinkToLidarTF(const std::string& target_frameid) } } -void PointPillarsROS::getTFInfo(tf::StampedTransform baselink2lidar) +void PointPillarsROS::analyzeTFInfo(tf::StampedTransform baselink2lidar) { tf::Vector3 v = baselink2lidar.getOrigin(); offset_z_from_trained_data_ = v.getZ() - TRAINED_SENSOR_HEIGHT_; From b4fee927a1bd1b8ba5f92ef211c447c24350a587 Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Thu, 28 Feb 2019 11:09:57 +0900 Subject: [PATCH 09/37] refactor codes --- .../lidar_point_pillars/include/point_pillars.h | 4 +--- .../lidar_point_pillars/include/point_pillars_ros.h | 3 +-- .../lidar_point_pillars/include/postprocess_cuda.h | 3 +-- .../lidar_point_pillars/nodes/point_pillars.cpp | 13 ++++++------- .../lidar_point_pillars/nodes/point_pillars_ros.cpp | 8 ++++---- .../lidar_point_pillars/nodes/postprocess_cuda.cu | 11 ++++++----- 6 files changed, 19 insertions(+), 23 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h index 541e1d9b347..b2f6c04728f 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h @@ -307,11 +307,9 @@ class PointPillars * @param[in] in_points_array pointcloud array * @param[in] in_num_points Number of points * @param[in] out_detections Network output bounding box - * @param[in] out_num_objects Number of output bounding box * @details This is interface for the algorithm */ - void doInference(const float* in_points_array, const int in_num_points, std::vector& out_detection, - int& out_num_objects); + void doInference(const float* in_points_array, const int in_num_points, std::vector& out_detection); }; #endif // POINTS_PILLAR_H diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h index 97e6ff51f1f..43c0994f547 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h @@ -119,11 +119,10 @@ class PointPillarsROS /** * @brief publish DetectedObject * @param[in] detections Network output bounding box - * @param[in] num_objects Number of objects * @param[in] pc_header Header from pointcloud * @details Convert std::vector to DetectedObject, and publish them */ - void pubDetectedObject(const std::vector& detections, const int num_objects, + void pubDetectedObject(const std::vector& detections, const std_msgs::Header& pc_header); public: diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h index b60a427ca3f..2add152f904 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h @@ -86,7 +86,6 @@ class PostprocessCuda * @param[in] dev_box_for_nms Decoded box from pose and dimension to min_x min_y max_x max_y represenation for nms * @param[in] dev_filter_count The number of filtered output * @param[out] out_detection Output bounding boxes - * @param[out] out_num_objects The number of output bounding boxes * @details dev_* represents device memory allocated variables */ void doPostprocessCuda(const float* rpn_box_output, const float* rpn_cls_output, const float* rpn_dir_output, @@ -94,7 +93,7 @@ class PostprocessCuda const float* dev_anchors_pz, const float* dev_anchors_dx, const float* dev_anchors_dy, const float* dev_anchors_dz, const float* dev_anchors_ro, float* dev_filtered_box, float* dev_filtered_score, int* dev_filtered_dir, float* dev_box_for_nms, - int* dev_filter_count, std::vector& out_detection, int& out_num_objects); + int* dev_filter_count, std::vector& out_detection); }; #endif // POSTPROCESS_CUDA_H diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp index bb5f33050f6..1e0cf78a85a 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp @@ -259,10 +259,10 @@ void PointPillars::deviceMemoryMalloc() GPU_CHECK(cudaMalloc((void**)&dev_anchors_dy_, NUM_ANCHOR_ * sizeof(float))); GPU_CHECK(cudaMalloc((void**)&dev_anchors_dz_, NUM_ANCHOR_ * sizeof(float))); GPU_CHECK(cudaMalloc((void**)&dev_anchors_ro_, NUM_ANCHOR_ * sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_filtered_box_, NUM_ANCHOR_ * 7 * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_filtered_box_, NUM_ANCHOR_ * NUM_OUTPUT_BOX_FEATURE_ * sizeof(float))); GPU_CHECK(cudaMalloc((void**)&dev_filtered_score_, NUM_ANCHOR_ * sizeof(float))); GPU_CHECK(cudaMalloc((void**)&dev_filtered_dir_, NUM_ANCHOR_ * sizeof(int))); - GPU_CHECK(cudaMalloc((void**)&dev_box_for_nms_, NUM_ANCHOR_ * 4 * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_box_for_nms_, NUM_ANCHOR_ * NUM_BOX_CORNERS_ * sizeof(float))); GPU_CHECK(cudaMalloc((void**)&dev_filter_count_, sizeof(int))); } @@ -524,8 +524,8 @@ void PointPillars::preprocessCPU(const float* in_points_array, const int in_num_ void PointPillars::preprocessGPU(const float* in_points_array, const int in_num_points) { float* dev_points; - GPU_CHECK(cudaMalloc((void**)&dev_points, in_num_points * 4 * sizeof(float))); - GPU_CHECK(cudaMemcpy(dev_points, in_points_array, in_num_points * 4 * sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMalloc((void**)&dev_points, in_num_points * NUM_BOX_CORNERS_ * sizeof(float))); + GPU_CHECK(cudaMemcpy(dev_points, in_points_array, in_num_points * NUM_BOX_CORNERS_ * sizeof(float), cudaMemcpyHostToDevice)); GPU_CHECK(cudaMemset(dev_pillar_count_histo_, 0, GRID_Y_SIZE_ * GRID_X_SIZE_ * sizeof(int))); GPU_CHECK(cudaMemset(dev_sparse_pillar_map_, 0, NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(int))); @@ -558,8 +558,7 @@ void PointPillars::preprocess(const float* in_points_array, const int in_num_poi } } -void PointPillars::doInference(const float* in_points_array, const int in_num_points, std::vector& out_detection, - int& out_num_objects) +void PointPillars::doInference(const float* in_points_array, const int in_num_points, std::vector& out_detection) { preprocess(in_points_array, in_num_points); @@ -607,7 +606,7 @@ void PointPillars::doInference(const float* in_points_array, const int in_num_po dev_anchor_mask_, dev_anchors_px_, dev_anchors_py_, dev_anchors_pz_, dev_anchors_dx_, dev_anchors_dy_, dev_anchors_dz_, dev_anchors_ro_, dev_filtered_box_, dev_filtered_score_, dev_filtered_dir_, dev_box_for_nms_, - dev_filter_count_, out_detection, out_num_objects); + dev_filter_count_, out_detection); // release the stream and the buffers cudaStreamDestroy(stream); diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp index 17dcc8ecabc..2d6c88907d2 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp @@ -68,11 +68,12 @@ geometry_msgs::Pose PointPillarsROS::getTransformedPose(const geometry_msgs::Pos return out_pose.pose; } -void PointPillarsROS::pubDetectedObject(const std::vector& detections, const int num_objects, +void PointPillarsROS::pubDetectedObject(const std::vector& detections, const std_msgs::Header& pc_header) { autoware_msgs::DetectedObjectArray objects; objects.header = pc_header; + int num_objects = detections.size()/OUTPUT_NUM_BOX_FEATURE_; for (size_t i = 0; i < num_objects; i++) { autoware_msgs::DetectedObject object; @@ -166,10 +167,9 @@ void PointPillarsROS::pointsCallback(const sensor_msgs::PointCloud2::ConstPtr& m pclToArray(pcl_pc_ptr, points_array); } - int out_num_objects = 0; std::vector out_detection; - point_pillars_ptr_->doInference(points_array, pcl_pc_ptr->size(), out_detection, out_num_objects); + point_pillars_ptr_->doInference(points_array, pcl_pc_ptr->size(), out_detection); delete[] points_array; - pubDetectedObject(out_detection, out_num_objects, msg->header); + pubDetectedObject(out_detection, msg->header); } diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu index 2b5501e3541..cf82496926a 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu @@ -73,9 +73,9 @@ __global__ void filter_kernel(const float* box_preds, const float* cls_preds, co //convrt normal box(normal boxes: x, y, z, w, l, h, r) to box(xmin, ymin, xmax, ymax) for nms calculation //First: dx, dy -> box(x0y0, x0y1, x1y0, x1y1) float corners[2*4] = {float(-0.5*box_dx), float(-0.5*box_dy), - float(-0.5*box_dx), float( 0.5*box_dy), - float( 0.5*box_dx), float( 0.5*box_dy), - float( 0.5*box_dx), float(-0.5*box_dy)}; + float(-0.5*box_dx), float( 0.5*box_dy), + float( 0.5*box_dx), float( 0.5*box_dy), + float( 0.5*box_dx), float(-0.5*box_dy)}; //Second: Rotate, Offset and convert to point(xmin. ymin, xmax, ymax) //cannot use variable initialization since "error: expression must have a constant value" @@ -159,7 +159,7 @@ void PostprocessCuda::doPostprocessCuda(const float* rpn_box_output, const float int* dev_anchor_mask, const float* dev_anchors_px, const float* dev_anchors_py, const float* dev_anchors_pz, const float* dev_anchors_dx, const float* dev_anchors_dy, const float* dev_anchors_dz, const float* dev_anchors_ro, float* dev_filtered_box, float* dev_filtered_score, int* dev_filtered_dir, float* dev_box_for_nms, int* dev_filter_count, - std::vector& out_detection, int& out_num_objects) + std::vector& out_detection) { filter_kernel<<>> @@ -194,10 +194,11 @@ void PostprocessCuda::doPostprocessCuda(const float* rpn_box_output, const float NUM_BOX_CORNERS_, NUM_OUTPUT_BOX_FEATURE_); int keep_inds[host_filter_count[0]] = {0}; + int out_num_objects = 0; nms_cuda_ptr_->doNMSCuda(host_filter_count[0], dev_sorted_box_for_nms, keep_inds, out_num_objects); - float host_filtered_box[host_filter_count[0]*7]; + float host_filtered_box[host_filter_count[0]*NUM_OUTPUT_BOX_FEATURE_]; int host_filtered_dir[host_filter_count[0]]; GPU_CHECK( cudaMemcpy(host_filtered_box, dev_sorted_filtered_box, NUM_OUTPUT_BOX_FEATURE_*host_filter_count[0] *sizeof(float), cudaMemcpyDeviceToHost ) ); GPU_CHECK( cudaMemcpy(host_filtered_dir, dev_sorted_filtered_dir, host_filter_count[0] *sizeof(int), cudaMemcpyDeviceToHost ) ); From 663f4f1d06e86582bbb1925d89aca7fd1a348245 Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Thu, 28 Feb 2019 11:18:06 +0900 Subject: [PATCH 10/37] refactor code --- .../nodes/point_pillars.cpp | 101 +++++++----------- 1 file changed, 38 insertions(+), 63 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp index 1e0cf78a85a..4d23bfcb6c5 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp @@ -193,15 +193,13 @@ PointPillars::~PointPillars() void PointPillars::deviceMemoryMalloc() { - GPU_CHECK(cudaMalloc((void**)&dev_pillar_x_in_coors_, - GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_pillar_y_in_coors_, - GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_pillar_z_in_coors_, - GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); - GPU_CHECK(cudaMalloc((void**)&dev_pillar_i_in_coors_, - GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + // clang-format off + GPU_CHECK(cudaMalloc((void**)&dev_pillar_x_in_coors_,GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_y_in_coors_,GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_z_in_coors_,GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_i_in_coors_,GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); GPU_CHECK(cudaMalloc((void**)&dev_pillar_count_histo_, GRID_Y_SIZE_ * GRID_X_SIZE_ * sizeof(int))); + // clang-format on GPU_CHECK(cudaMalloc((void**)&dev_x_coors_, MAX_NUM_PILLARS_ * sizeof(int))); GPU_CHECK(cudaMalloc((void**)&dev_y_coors_, MAX_NUM_PILLARS_ * sizeof(int))); @@ -213,12 +211,11 @@ void PointPillars::deviceMemoryMalloc() GPU_CHECK(cudaMalloc((void**)&dev_pillar_z_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); GPU_CHECK(cudaMalloc((void**)&dev_pillar_i_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); - GPU_CHECK( - cudaMalloc((void**)&dev_x_coors_for_sub_shaped_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); - GPU_CHECK( - cudaMalloc((void**)&dev_y_coors_for_sub_shaped_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); - GPU_CHECK( - cudaMalloc((void**)&dev_pillar_feature_mask_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + // clang-format off + GPU_CHECK(cudaMalloc((void**)&dev_x_coors_for_sub_shaped_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_y_coors_for_sub_shaped_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_pillar_feature_mask_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float))); + // clang-format on // cumsum kernel GPU_CHECK(cudaMalloc((void**)&dev_cumsum_along_x_, NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(int))); @@ -337,14 +334,12 @@ void PointPillars::generateAnchors(float* anchors_px_, float* anchors_py_, float void PointPillars::putAnchorsInDeviceMemory() { - GPU_CHECK( - cudaMemcpy(dev_box_anchors_min_x_, box_anchors_min_x_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice)); - GPU_CHECK( - cudaMemcpy(dev_box_anchors_min_y_, box_anchors_min_y_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice)); - GPU_CHECK( - cudaMemcpy(dev_box_anchors_max_x_, box_anchors_max_x_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice)); - GPU_CHECK( - cudaMemcpy(dev_box_anchors_max_y_, box_anchors_max_y_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice)); + // clang-format off + GPU_CHECK(cudaMemcpy(dev_box_anchors_min_x_, box_anchors_min_x_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_box_anchors_min_y_, box_anchors_min_y_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_box_anchors_max_x_, box_anchors_max_x_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_box_anchors_max_y_, box_anchors_max_y_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice)); + // clang-format on GPU_CHECK(cudaMemcpy(dev_anchors_px_, anchors_px_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice)); GPU_CHECK(cudaMemcpy(dev_anchors_py_, anchors_py_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice)); @@ -490,26 +485,19 @@ void PointPillars::preprocessCPU(const float* in_points_array, const int in_num_ GPU_CHECK(cudaMemset(dev_num_points_per_pillar_, 0, MAX_NUM_PILLARS_ * sizeof(float))); GPU_CHECK(cudaMemset(dev_sparse_pillar_map_, 0, NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(int))); + // clang-format off GPU_CHECK(cudaMemcpy(dev_x_coors_, x_coors, MAX_NUM_PILLARS_ * sizeof(int), cudaMemcpyHostToDevice)); GPU_CHECK(cudaMemcpy(dev_y_coors_, y_coors, MAX_NUM_PILLARS_ * sizeof(int), cudaMemcpyHostToDevice)); - GPU_CHECK(cudaMemcpy(dev_pillar_x_, pillar_x, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), - cudaMemcpyHostToDevice)); - GPU_CHECK(cudaMemcpy(dev_pillar_y_, pillar_y, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), - cudaMemcpyHostToDevice)); - GPU_CHECK(cudaMemcpy(dev_pillar_z_, pillar_z, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), - cudaMemcpyHostToDevice)); - GPU_CHECK(cudaMemcpy(dev_pillar_i_, pillar_i, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), - cudaMemcpyHostToDevice)); - GPU_CHECK(cudaMemcpy(dev_x_coors_for_sub_shaped_, x_coors_for_sub_shaped, - MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyHostToDevice)); - GPU_CHECK(cudaMemcpy(dev_y_coors_for_sub_shaped_, y_coors_for_sub_shaped, - MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyHostToDevice)); - GPU_CHECK(cudaMemcpy(dev_num_points_per_pillar_, num_points_per_pillar, MAX_NUM_PILLARS_ * sizeof(float), - cudaMemcpyHostToDevice)); - GPU_CHECK(cudaMemcpy(dev_pillar_feature_mask_, pillar_feature_mask, - MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyHostToDevice)); - GPU_CHECK(cudaMemcpy(dev_sparse_pillar_map_, sparse_pillar_map, - NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_pillar_x_, pillar_x, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_pillar_y_, pillar_y, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_pillar_z_, pillar_z, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_pillar_i_, pillar_i, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_x_coors_for_sub_shaped_, x_coors_for_sub_shaped, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_y_coors_for_sub_shaped_, y_coors_for_sub_shaped,MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_num_points_per_pillar_, num_points_per_pillar, MAX_NUM_PILLARS_ * sizeof(float),cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_pillar_feature_mask_, pillar_feature_mask,MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_sparse_pillar_map_, sparse_pillar_map,NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(float), cudaMemcpyHostToDevice)); + // clang-format on delete[] pillar_x; delete[] pillar_y; @@ -568,29 +556,16 @@ void PointPillars::doInference(const float* in_points_array, const int in_num_po cudaStream_t stream; GPU_CHECK(cudaStreamCreate(&stream)); - GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[0], dev_pillar_x_, - BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), - cudaMemcpyDeviceToDevice, stream)); - GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[1], dev_pillar_y_, - BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), - cudaMemcpyDeviceToDevice, stream)); - GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[2], dev_pillar_z_, - BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), - cudaMemcpyDeviceToDevice, stream)); - GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[3], dev_pillar_i_, - BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), - cudaMemcpyDeviceToDevice, stream)); - GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[4], dev_num_points_per_pillar_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * sizeof(float), - cudaMemcpyDeviceToDevice, stream)); - GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[5], dev_x_coors_for_sub_shaped_, - BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), - cudaMemcpyDeviceToDevice, stream)); - GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[6], dev_y_coors_for_sub_shaped_, - BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), - cudaMemcpyDeviceToDevice, stream)); - GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[7], dev_pillar_feature_mask_, - BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), - cudaMemcpyDeviceToDevice, stream)); + // clang-format off + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[0], dev_pillar_x_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[1], dev_pillar_y_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[2], dev_pillar_z_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[3], dev_pillar_i_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[4], dev_num_points_per_pillar_, MAX_NUM_PILLARS_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[5], dev_x_coors_for_sub_shaped_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[6], dev_y_coors_for_sub_shaped_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); + GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[7], dev_pillar_feature_mask_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream)); + // clang-format on pfe_context_->enqueue(BATCH_SIZE_, pfe_buffers_, stream, nullptr); GPU_CHECK(cudaMemset(dev_scattered_feature_, 0, RPN_INPUT_SIZE_ * sizeof(float))); From 36d9d10bf76ab8216819e526fcc9eab1262c3769 Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Thu, 28 Feb 2019 11:24:33 +0900 Subject: [PATCH 11/37] apply ros clang --- .../include/point_pillars.h | 8 +++---- .../include/point_pillars_ros.h | 3 +-- .../nodes/point_pillars.cpp | 23 +++++++++---------- .../nodes/point_pillars_ros.cpp | 5 ++-- .../test/src/test_point_pillars.cpp | 8 +++---- 5 files changed, 21 insertions(+), 26 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h index b2f6c04728f..735b80e36b4 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h @@ -231,8 +231,8 @@ class PointPillars * @param[in] anchors_ro_ Represents rotation value for corresponding anchor * @details Generate anchors for each grid */ - void generateAnchors(float* anchors_px_, float* anchors_py_, float* anchors_pz_, - float* anchors_dx_, float* anchors_dy_, float* anchors_dz_, float* anchors_ro_); + void generateAnchors(float* anchors_px_, float* anchors_py_, float* anchors_pz_, float* anchors_dx_, + float* anchors_dy_, float* anchors_dz_, float* anchors_ro_); /** * @brief Convert ONNX to TensorRT model @@ -279,8 +279,8 @@ class PointPillars * @details Make box anchors for nms */ void convertAnchors2BoxAnchors(float* anchors_px_, float* anchors_py_, float* anchors_dx_, float* anchors_dy_, - float* box_anchors_min_x_, float* box_anchors_min_y_, - float* box_anchors_max_x_, float* box_anchors_max_y_); + float* box_anchors_min_x_, float* box_anchors_min_y_, float* box_anchors_max_x_, + float* box_anchors_max_y_); /** * @brief Memory allocation for anchors diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h index 43c0994f547..4aa50f3160d 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h @@ -122,8 +122,7 @@ class PointPillarsROS * @param[in] pc_header Header from pointcloud * @details Convert std::vector to DetectedObject, and publish them */ - void pubDetectedObject(const std::vector& detections, - const std_msgs::Header& pc_header); + void pubDetectedObject(const std::vector& detections, const std_msgs::Header& pc_header); public: PointPillarsROS(); diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp index 4d23bfcb6c5..be307a5d68a 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp @@ -265,18 +265,16 @@ void PointPillars::deviceMemoryMalloc() void PointPillars::initAnchors() { - generateAnchors(anchors_px_, anchors_py_, anchors_pz_, - anchors_dx_, anchors_dy_, anchors_dz_, anchors_ro_); + generateAnchors(anchors_px_, anchors_py_, anchors_pz_, anchors_dx_, anchors_dy_, anchors_dz_, anchors_ro_); - convertAnchors2BoxAnchors(anchors_px_, anchors_py_, anchors_dx_, anchors_dy_, - box_anchors_min_x_, box_anchors_min_y_, + convertAnchors2BoxAnchors(anchors_px_, anchors_py_, anchors_dx_, anchors_dy_, box_anchors_min_x_, box_anchors_min_y_, box_anchors_max_x_, box_anchors_max_y_); putAnchorsInDeviceMemory(); } -void PointPillars::generateAnchors(float* anchors_px_, float* anchors_py_, float* anchors_pz_, - float* anchors_dx_, float* anchors_dy_, float* anchors_dz_, float* anchors_ro_) +void PointPillars::generateAnchors(float* anchors_px_, float* anchors_py_, float* anchors_pz_, float* anchors_dx_, + float* anchors_dy_, float* anchors_dz_, float* anchors_ro_) { // zero clear for (size_t i = 0; i < NUM_ANCHOR_; i++) @@ -512,8 +510,10 @@ void PointPillars::preprocessCPU(const float* in_points_array, const int in_num_ void PointPillars::preprocessGPU(const float* in_points_array, const int in_num_points) { float* dev_points; + // clang-format off GPU_CHECK(cudaMalloc((void**)&dev_points, in_num_points * NUM_BOX_CORNERS_ * sizeof(float))); - GPU_CHECK(cudaMemcpy(dev_points, in_points_array, in_num_points * NUM_BOX_CORNERS_ * sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_points, in_points_array, in_num_points * NUM_BOX_CORNERS_ * sizeof(float), cudaMemcpyHostToDevice)); + // clang-format on GPU_CHECK(cudaMemset(dev_pillar_count_histo_, 0, GRID_Y_SIZE_ * GRID_X_SIZE_ * sizeof(int))); GPU_CHECK(cudaMemset(dev_sparse_pillar_map_, 0, NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(int))); @@ -577,11 +577,10 @@ void PointPillars::doInference(const float* in_points_array, const int in_num_po rpn_context_->enqueue(BATCH_SIZE_, rpn_buffers_, stream, nullptr); GPU_CHECK(cudaMemset(dev_filter_count_, 0, sizeof(int))); - postprocess_cuda_ptr_->doPostprocessCuda((float*)rpn_buffers_[1], (float*)rpn_buffers_[2], (float*)rpn_buffers_[3], - dev_anchor_mask_, dev_anchors_px_, dev_anchors_py_, dev_anchors_pz_, - dev_anchors_dx_, dev_anchors_dy_, dev_anchors_dz_, dev_anchors_ro_, - dev_filtered_box_, dev_filtered_score_, dev_filtered_dir_, dev_box_for_nms_, - dev_filter_count_, out_detection); + postprocess_cuda_ptr_->doPostprocessCuda( + (float*)rpn_buffers_[1], (float*)rpn_buffers_[2], (float*)rpn_buffers_[3], dev_anchor_mask_, dev_anchors_px_, + dev_anchors_py_, dev_anchors_pz_, dev_anchors_dx_, dev_anchors_dy_, dev_anchors_dz_, dev_anchors_ro_, + dev_filtered_box_, dev_filtered_score_, dev_filtered_dir_, dev_box_for_nms_, dev_filter_count_, out_detection); // release the stream and the buffers cudaStreamDestroy(stream); diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp index 2d6c88907d2..93969a403a7 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp @@ -68,12 +68,11 @@ geometry_msgs::Pose PointPillarsROS::getTransformedPose(const geometry_msgs::Pos return out_pose.pose; } -void PointPillarsROS::pubDetectedObject(const std::vector& detections, - const std_msgs::Header& pc_header) +void PointPillarsROS::pubDetectedObject(const std::vector& detections, const std_msgs::Header& pc_header) { autoware_msgs::DetectedObjectArray objects; objects.header = pc_header; - int num_objects = detections.size()/OUTPUT_NUM_BOX_FEATURE_; + int num_objects = detections.size() / OUTPUT_NUM_BOX_FEATURE_; for (size_t i = 0; i < num_objects; i++) { autoware_msgs::DetectedObject object; diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp index 51a54579963..d0736414ef0 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp @@ -71,7 +71,6 @@ class TestClass float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped, float* pillar_feature_mask, float* sparse_pillar_map, int* host_pillar_count); - private: std::unique_ptr preprocess_points_ptr_; }; @@ -137,7 +136,7 @@ void TestClass::initPoints(pcl::PointCloud::Ptr in_pcl_pc_ptr) point.x = 12.489; point.y = -9.59703; point.z = -2.15565; - point.intensity =11; + point.intensity = 11; in_pcl_pc_ptr->push_back(point); point.x = 12.9084; point.y = -10.9626; @@ -156,12 +155,12 @@ void TestClass::initPoints(pcl::PointCloud::Ptr in_pcl_pc_ptr) in_pcl_pc_ptr->push_back(point); point.x = 13.8213; point.y = -10.8529; - point.z =-1.22883; + point.z = -1.22883; point.intensity = 19; in_pcl_pc_ptr->push_back(point); point.x = 11.8957; point.y = -10.3189; - point.z =-1.28556; + point.z = -1.28556; point.intensity = 13; in_pcl_pc_ptr->push_back(point); } @@ -175,7 +174,6 @@ TEST(TestSuite, CheckPreprocessPointsCPU) std::string in_file = package_path + "/test/data/1527836009720148.pcd"; test_obj.initPoints(pcl_pc_ptr); - float* points_array = new float[pcl_pc_ptr->size() * 4]; test_obj.pclToArray(pcl_pc_ptr, points_array); From 267467d0ab5440c5cf5845cf4e7654ceba276896 Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Thu, 28 Feb 2019 11:52:44 +0900 Subject: [PATCH 12/37] refactor codes --- .../packages/lidar_point_pillars/CMakeLists.txt | 3 +-- .../packages/lidar_point_pillars/README.md | 13 ++++++++++++- .../lidar_point_pillars/nodes/point_pillars.cpp | 13 +++++-------- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt index b4c451a393b..1978f5f47eb 100755 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt @@ -80,9 +80,8 @@ if(TRT_AVAIL AND CUDA_AVAIL) nodes/point_pillars.cpp nodes/preprocess_points.cpp ) - # Need ${catkin_LIBRARIES} because of #include for ROS_ERROR + target_link_libraries(point_pillars_lib - ${catkin_LIBRARIES} nvinfer nvonnxparser gpu_point_pillars_lib diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md index ffae0b6c011..d20ad5b2bfc 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md @@ -1,4 +1,4 @@ -# Point Pillars for 3D Object Detection +# Point Pillars for 3D Object Detection: ver. 1.0 Autoware package for Point Pillars. ([reference paper](https://arxiv.org/abs/1812.05784)) @@ -14,6 +14,17 @@ TensorRT : [How to install](https://docs.nvidia.com/deeplearning/sdk/tensorrt-in Using launch file: `roslaunch lidar_point_pillars lidar_point_pillars.launch pfe_onnx_file:=/PATH/TO/FILE.onnx rpn_onnx_file:=/PATH/TO/FILE.onnx input_topic:=/points_raw` +## API +``` +/** +* @brief Call PointPillars for the inference +* @param[in] in_points_array pointcloud array +* @param[in] in_num_points Number of points +* @param[in] out_detections Network output bounding box +* @details This is interface for the algorithm +*/ +void doInference(const float* in_points_array, const int in_num_points, std::vector& out_detection); +``` ## Parameters diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp index be307a5d68a..0c4293a95d3 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp @@ -18,9 +18,6 @@ #include #include -// headers in ROS -#include - // headers in local files #include "point_pillars.h" @@ -391,7 +388,7 @@ void PointPillars::initTRT() onnxToTRTModel(rpn_onnx_file_, rpn_trt_model_stream); if (pfe_trt_model_stream == nullptr || rpn_trt_model_stream == nullptr) { - ROS_ERROR("Failed to load %s or %s ", pfe_onnx_file_.c_str(), rpn_onnx_file_.c_str()); + std::cerr<< "Failed to load ONNX file " << std::endl; } // deserialize the engine @@ -399,7 +396,7 @@ void PointPillars::initTRT() rpn_runtime_ = nvinfer1::createInferRuntime(g_logger_); if (pfe_runtime_ == nullptr || rpn_runtime_ == nullptr) { - ROS_ERROR("Failed to create TensorRT Runtime object."); + std::cerr<<"Failed to create TensorRT Runtime object."<deserializeCudaEngine(pfe_trt_model_stream->data(), pfe_trt_model_stream->size(), nullptr); @@ -407,7 +404,7 @@ void PointPillars::initTRT() rpn_runtime_->deserializeCudaEngine(rpn_trt_model_stream->data(), rpn_trt_model_stream->size(), nullptr); if (pfe_engine_ == nullptr || rpn_engine_ == nullptr) { - ROS_ERROR("Failed to create TensorRT Engine."); + std::cerr << "Failed to create TensorRT Engine." << std::endl; } pfe_trt_model_stream->destroy(); rpn_trt_model_stream->destroy(); @@ -415,7 +412,7 @@ void PointPillars::initTRT() rpn_context_ = rpn_engine_->createExecutionContext(); if (pfe_context_ == nullptr || rpn_context_ == nullptr) { - ROS_ERROR("Failed to create TensorRT Execution Context."); + std::cerr << "Failed to create TensorRT Execution Context." << std::endl;; } } @@ -512,7 +509,7 @@ void PointPillars::preprocessGPU(const float* in_points_array, const int in_num_ float* dev_points; // clang-format off GPU_CHECK(cudaMalloc((void**)&dev_points, in_num_points * NUM_BOX_CORNERS_ * sizeof(float))); - GPU_CHECK(cudaMemcpy(dev_points, in_points_array, in_num_points * NUM_BOX_CORNERS_ * sizeof(float), cudaMemcpyHostToDevice)); + GPU_CHECK(cudaMemcpy(dev_points, in_points_array, in_num_points * NUM_BOX_CORNERS_ * sizeof(float), cudaMemcpyHostToDevice)); // clang-format on GPU_CHECK(cudaMemset(dev_pillar_count_histo_, 0, GRID_Y_SIZE_ * GRID_X_SIZE_ * sizeof(int))); From 37ee941c9b59b8fd049dc87507b0698db44eb2b0 Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Thu, 28 Feb 2019 12:09:43 +0900 Subject: [PATCH 13/37] remove cuda description in package.xml --- .../packages/lidar_point_pillars/CMakeLists.txt | 7 ++++--- .../packages/lidar_point_pillars/package.xml | 4 +--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt index 1978f5f47eb..1a832d99e8a 100755 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 2.8.3) project(lidar_point_pillars) -# set flags for CUDA and TensorRT availability +# set flags for CUDA availability option(CUDA_AVAIL "CUDA available" OFF) find_package(CUDA) if (CUDA_FOUND) @@ -14,8 +14,9 @@ else() set(CUDA_AVAIL OFF) endif (CUDA_FOUND) -# try to find the tensorRT modules +# set flags for TensorRT availability option(TRT_AVAIL "TensorRT available" OFF) +# try to find the tensorRT modules find_library(NVINFER NAMES nvinfer) find_library(NVPARSERS NAMES nvparsers) if(NVINFER AND NVPARSERS) @@ -80,7 +81,7 @@ if(TRT_AVAIL AND CUDA_AVAIL) nodes/point_pillars.cpp nodes/preprocess_points.cpp ) - + target_link_libraries(point_pillars_lib nvinfer nvonnxparser diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml index 2c79d3dbb70..082e644fa5d 100755 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml @@ -5,19 +5,17 @@ lidar_point_pillars Kosuke Murakami - BSD + Apache 2.0 catkin roscpp pcl_ros - cuda-toolkit-10-0 autoware_msgs roscpp pcl_ros - cuda-toolkit-10-0 autoware_msgs From 036aba69d915286b20cd040ef0cea40877d0d027 Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Thu, 28 Feb 2019 12:10:09 +0900 Subject: [PATCH 14/37] Add specific version for TensorRT in README.md --- .../packages/lidar_point_pillars/README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md index d20ad5b2bfc..e105af0d2a6 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md @@ -2,11 +2,12 @@ Autoware package for Point Pillars. ([reference paper](https://arxiv.org/abs/1812.05784)) -## Pre requisites +## Requirements CUDA Toolkit v9.0 or 10.0 -TensorRT : [How to install](https://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html#installing) +TensorRT: Tested with 5.0.2 -> [How to install](https://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html#installing) + ## How to launch @@ -17,11 +18,11 @@ Using launch file: ## API ``` /** -* @brief Call PointPillars for the inference +* @brief Call PointPillars for the inference. * @param[in] in_points_array pointcloud array * @param[in] in_num_points Number of points -* @param[in] out_detections Network output bounding box -* @details This is interface for the algorithm +* @param[in] out_detections Output bounding box from the network +* @details This is an interface for the algorithm. */ void doInference(const float* in_points_array, const int in_num_points, std::vector& out_detection); ``` From 602d21caeca6d5ca333d588cafb52414ac0ced8c Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Thu, 28 Feb 2019 12:18:39 +0900 Subject: [PATCH 15/37] update readme --- .../packages/lidar_point_pillars/README.md | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md index e105af0d2a6..ff08e3108e8 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md @@ -1,10 +1,10 @@ # Point Pillars for 3D Object Detection: ver. 1.0 -Autoware package for Point Pillars. ([reference paper](https://arxiv.org/abs/1812.05784)) +Autoware package for Point Pillars. [Referenced paper](https://arxiv.org/abs/1812.05784). ## Requirements -CUDA Toolkit v9.0 or 10.0 +CUDA Toolkit v9.0 or v10.0 TensorRT: Tested with 5.0.2 -> [How to install](https://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html#installing) @@ -12,19 +12,21 @@ TensorRT: Tested with 5.0.2 -> [How to install](https://docs.nvidia.com/deeplear ## How to launch -Using launch file: +* Launch file: `roslaunch lidar_point_pillars lidar_point_pillars.launch pfe_onnx_file:=/PATH/TO/FILE.onnx rpn_onnx_file:=/PATH/TO/FILE.onnx input_topic:=/points_raw` +* You can launch it through the runtime manager in Computing tab, as well. + ## API ``` /** * @brief Call PointPillars for the inference. * @param[in] in_points_array pointcloud array * @param[in] in_num_points Number of points -* @param[in] out_detections Output bounding box from the network +* @param[out] out_detections Output bounding box from the network * @details This is an interface for the algorithm. */ -void doInference(const float* in_points_array, const int in_num_points, std::vector& out_detection); +void doInference(float* in_points_array, int in_num_points, std::vector out_detection); ``` ## Parameters @@ -50,6 +52,6 @@ void doInference(const float* in_points_array, const int in_num_points, std::vec * To display the results in Rviz `objects_visualizer` is required. (Launch file launches automatically this node). -* Pre trained models can be downloaded from the [github repository](https://github.com/cirpue49/kitti_pretrained_pp).Notice that this model is under `BY-NC-SA 3.0` license. +* Pre trained models can be downloaded from the [github repository](https://github.com/cirpue49/kitti_pretrained_pp). Notice that this model is under `BY-NC-SA 3.0` license. -* If trained model comes from KITTI data, users might not be allowed to use the model for Commercial purpose. +* If trained model comes from KITTI data, users might not be allowed to use the model for commercial purposes. From c6414f793d22b5be61d6c934974af9a8b82abe9d Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Thu, 28 Feb 2019 12:46:12 +0900 Subject: [PATCH 16/37] refactor codes --- .../include/point_pillars_ros.h | 4 +-- .../nodes/point_pillars.cpp | 33 ++++++++++--------- .../nodes/point_pillars_ros.cpp | 9 +++-- 3 files changed, 26 insertions(+), 20 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h index 4aa50f3160d..c190fb56a97 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h @@ -119,10 +119,10 @@ class PointPillarsROS /** * @brief publish DetectedObject * @param[in] detections Network output bounding box - * @param[in] pc_header Header from pointcloud + * @param[in] in_header Header from pointcloud * @details Convert std::vector to DetectedObject, and publish them */ - void pubDetectedObject(const std::vector& detections, const std_msgs::Header& pc_header); + void pubDetectedObject(const std::vector& detections, const std_msgs::Header& in_header); public: PointPillarsROS(); diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp index 0c4293a95d3..24ce3b021a7 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp @@ -21,6 +21,7 @@ // headers in local files #include "point_pillars.h" +// clang-format off PointPillars::PointPillars(const bool reproduce_result_mode, const float score_threshold, const float nms_overlap_threshold, const std::string pfe_onnx_file, const std::string rpn_onnx_file) @@ -54,27 +55,14 @@ PointPillars::PointPillars(const bool reproduce_result_mode, const float score_t , MAX_Z_RANGE_(1) , BATCH_SIZE_(1) , NUM_INDS_FOR_SCAN_(512) - , NUM_THREADS_(64) - , // if you chancge NUM_THREADS_, need to modify nms_kernel's shared mempry size - SENSOR_HEIGHT_(1.73) + , NUM_THREADS_(64) // if you chancge NUM_THREADS_, need to modify nms_kernel's shared mempry size + , SENSOR_HEIGHT_(1.73) , ANCHOR_DX_SIZE_(1.6) , ANCHOR_DY_SIZE_(3.9) , ANCHOR_DZ_SIZE_(1.56) , NUM_BOX_CORNERS_(4) , NUM_OUTPUT_BOX_FEATURE_(7) { - anchors_px_ = new float[NUM_ANCHOR_]; - anchors_py_ = new float[NUM_ANCHOR_]; - anchors_pz_ = new float[NUM_ANCHOR_]; - anchors_dx_ = new float[NUM_ANCHOR_]; - anchors_dy_ = new float[NUM_ANCHOR_]; - anchors_dz_ = new float[NUM_ANCHOR_]; - anchors_ro_ = new float[NUM_ANCHOR_]; - box_anchors_min_x_ = new float[NUM_ANCHOR_]; - box_anchors_min_y_ = new float[NUM_ANCHOR_]; - box_anchors_max_x_ = new float[NUM_ANCHOR_]; - box_anchors_max_y_ = new float[NUM_ANCHOR_]; - if (reproduce_result_mode_) { preprocess_points_ptr_.reset(new PreprocessPoints(MAX_NUM_PILLARS_, MAX_NUM_POINTS_PER_PILLAR_, GRID_X_SIZE_, @@ -105,6 +93,7 @@ PointPillars::PointPillars(const bool reproduce_result_mode, const float score_t initTRT(); initAnchors(); } +// clang-format on PointPillars::~PointPillars() { @@ -262,6 +251,20 @@ void PointPillars::deviceMemoryMalloc() void PointPillars::initAnchors() { + // allocate memory for anchors + anchors_px_ = new float[NUM_ANCHOR_]; + anchors_py_ = new float[NUM_ANCHOR_]; + anchors_pz_ = new float[NUM_ANCHOR_]; + anchors_dx_ = new float[NUM_ANCHOR_]; + anchors_dy_ = new float[NUM_ANCHOR_]; + anchors_dz_ = new float[NUM_ANCHOR_]; + anchors_ro_ = new float[NUM_ANCHOR_]; + box_anchors_min_x_ = new float[NUM_ANCHOR_]; + box_anchors_min_y_ = new float[NUM_ANCHOR_]; + box_anchors_max_x_ = new float[NUM_ANCHOR_]; + box_anchors_max_y_ = new float[NUM_ANCHOR_]; + // deallocate these memory in deconstructor + generateAnchors(anchors_px_, anchors_py_, anchors_pz_, anchors_dx_, anchors_dy_, anchors_dz_, anchors_ro_); convertAnchors2BoxAnchors(anchors_px_, anchors_py_, anchors_dx_, anchors_dy_, box_anchors_min_x_, box_anchors_min_y_, diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp index 93969a403a7..def9759cf7c 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp @@ -39,7 +39,10 @@ PointPillarsROS::PointPillarsROS() , NORMALIZING_INTENSITY_VALUE_(255.0) , BASELINK_FRAME_("base_link") { + //ros related param private_nh_.param("baselink_support", baselink_support_, true); + + //algorithm related params private_nh_.param("reproduce_result_mode", reproduce_result_mode_, false); private_nh_.param("score_threshold", score_threshold_, 0.5); private_nh_.param("nms_overlap_threshold", nms_overlap_threshold_, 0.5); @@ -68,15 +71,15 @@ geometry_msgs::Pose PointPillarsROS::getTransformedPose(const geometry_msgs::Pos return out_pose.pose; } -void PointPillarsROS::pubDetectedObject(const std::vector& detections, const std_msgs::Header& pc_header) +void PointPillarsROS::pubDetectedObject(const std::vector& detections, const std_msgs::Header& in_header) { autoware_msgs::DetectedObjectArray objects; - objects.header = pc_header; + objects.header = in_header; int num_objects = detections.size() / OUTPUT_NUM_BOX_FEATURE_; for (size_t i = 0; i < num_objects; i++) { autoware_msgs::DetectedObject object; - object.header = pc_header; + object.header = in_header; object.valid = true; object.pose_reliable = true; From 65fd09492dce1617b130638dda104c53da80d1a4 Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Thu, 28 Feb 2019 13:22:10 +0900 Subject: [PATCH 17/37] add empty package declaration when not finding cuda/tensorrt --- .../lidar_detector/packages/lidar_point_pillars/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt index 1a832d99e8a..ef411001240 100755 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt @@ -24,7 +24,6 @@ if(NVINFER AND NVPARSERS) message("NVINFER: ${NVINFER}") message("NVPARSERS: ${NVPARSERS}") set(TRT_AVAIL ON) - add_definitions(-DTRT_AVAIL) else() message("TensorRT is NOT Available") set(TRT_AVAIL OFF) @@ -116,5 +115,7 @@ if(TRT_AVAIL AND CUDA_AVAIL) target_link_libraries(test-point_pillars ${catkin_LIBRARIES} point_pillars_lib) endif() ELSE() + find_package(catkin REQUIRED) + catkin_package() message("PointPillars won't be built, CUDA and/or TensorRT were not found.") ENDIF () From b60a3e5e547ce7177930772194f0e8da8ff18c5d Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Thu, 28 Feb 2019 14:39:22 +0900 Subject: [PATCH 18/37] add more tests --- .../include/point_pillars.h | 1 + .../lidar_point_pillars/test/data/dummy.onnx | Bin 0 -> 1432 bytes .../test/src/test_point_pillars.cpp | 185 +++++++++++++++++- 3 files changed, 183 insertions(+), 3 deletions(-) create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/data/dummy.onnx diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h index 735b80e36b4..983c785c618 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h @@ -86,6 +86,7 @@ class Logger : public nvinfer1::ILogger class PointPillars { private: + friend class TestClass; // initize in initializer list const bool reproduce_result_mode_; const float score_threshold_; diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/data/dummy.onnx b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/data/dummy.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1a24ae75cbe3920f6b833ea359bc0c01caeb044a GIT binary patch literal 1432 zcmb`HJ#P~+7{}wL+ir#7&Z zZ^8#)WdOmz!1rKe<`ZCFlA#q9N|Td(!{f)l|KIlG$1Wo)nJJlu)BW;c(?9yK1Haku z786OXAaEdXA@CsZ5okX^eX7FgYnjHBvv*W6De8Bi6(t$tQtW;D`Ka>|fI5P9EaOP1 zJl-pnE&?5Me_J@uQp{yYIp%DLwdhm|ui3^UmZ?TU!?$!2Ww@|{g}~!nghqk#W-@#1 zv|aDX(fy$@K!kPxSLMZWlyfamAPm7^J8cae%k+$OkVMHb=O{S}DR|;{CbH#X8 z6iv@H=H@giM#QiSDjBCTCi>zczK($F`bMDO5GB`l>~_udkmx*-sanUGb7-91m~*LQ zO!X~Di&lXXPG`g%cJYXe)bsf(IEJWNJmH3}yvBT0{lKnNK_57S+Ly2IgJ&;ku>bT; z@Z<9=`19iX@vDb0Sf;pxTIrf^nd1FF#WyJz!M{d*w`%o4YhW)`P7W5$4%OgY8z2O? zY^X;CmhE4l_?_(PMu;G^_4KxxG6DB$z`GjoF9miWXrT(+wk=c|-h-fhy preprocess_points_ptr_; + std::unique_ptr point_pillars_ptr_; }; TestClass::TestClass(const int MAX_NUM_PILLARS, const int MAX_NUM_POINTS_PER_PILLAR, const int GRID_X_SIZE, @@ -96,6 +102,18 @@ TestClass::TestClass(const int MAX_NUM_PILLARS, const int MAX_NUM_POINTS_PER_PIL preprocess_points_ptr_.reset(new PreprocessPoints( MAX_NUM_PILLARS_, MAX_NUM_POINTS_PER_PILLAR_, GRID_X_SIZE_, GRID_Y_SIZE_, GRID_Z_SIZE_, PILLAR_X_SIZE_, PILLAR_Y_SIZE_, PILLAR_Z_SIZE_, MIN_X_RANGE_, MIN_Y_RANGE_, MIN_Z_RANGE_, NUM_INDS_FOR_SCAN_, NUM_BOX_CORNERS_)); + + bool baselink_support=true; + bool reproduce_result_mode=false; + float score_threshold = 0.5; + float nms_overlap_threshold = 0.5; + std::string package_path = ros::package::getPath("lidar_point_pillars"); + std::string onnx_path = package_path + "/test/data/dummy.onnx"; + std::string pfe_onnx_file = onnx_path; + std::string rpn_onnx_file = onnx_path; + + point_pillars_ptr_.reset(new PointPillars(reproduce_result_mode, score_threshold, nms_overlap_threshold, + pfe_onnx_file, rpn_onnx_file)); }; void TestClass::preprocess(const float* in_points_array, int in_num_points, int* x_coors, int* y_coors, @@ -165,13 +183,52 @@ void TestClass::initPoints(pcl::PointCloud::Ptr in_pcl_pc_ptr) in_pcl_pc_ptr->push_back(point); } +void TestClass::generateAnchors(float* anchors_px, float* anchors_py, float* anchors_pz, float* anchors_dx, + float* anchors_dy, float* anchors_dz, float* anchors_ro) +{ + return point_pillars_ptr_->generateAnchors(anchors_px, anchors_py, anchors_pz, anchors_dx, + anchors_dy, anchors_dz, anchors_ro); +} + +void TestClass::convertAnchors2BoxAnchors(float* anchors_px, float* anchors_py, float* anchors_dx, float* anchors_dy, + float* box_anchors_min_x, float* box_anchors_min_y, + float* box_anchors_max_x, float* box_anchors_max_y) +{ + return point_pillars_ptr_->convertAnchors2BoxAnchors(anchors_px, anchors_py, anchors_dx, anchors_dy, + box_anchors_min_x, box_anchors_min_y, + box_anchors_max_x, box_anchors_max_y); +} + TEST(TestSuite, CheckPreprocessPointsCPU) { - TestClass test_obj(12000, 100, 432, 496, 1, 0.16, 0.16, 4.0, 0, -39.68, -3.0, 512, 4); + const int MAX_NUM_PILLARS = 12000; + const int MAX_NUM_POINTS_PER_PILLAR = 100; + const int GRID_X_SIZE = 432; + const int GRID_Y_SIZE = 496; + const int GRID_Z_SIZE = 1; + const float PILLAR_X_SIZE = 0.16; + const float PILLAR_Y_SIZE = 0.16; + const float PILLAR_Z_SIZE = 4.0; + const float MIN_X_RANGE = 0; + const float MIN_Y_RANGE = -39.68; + const float MIN_Z_RANGE = -3.0; + const int NUM_INDS_FOR_SCAN = 512; + const int NUM_BOX_CORNERS = 4; + TestClass test_obj(MAX_NUM_PILLARS, + MAX_NUM_POINTS_PER_PILLAR, + GRID_X_SIZE, + GRID_Y_SIZE, + GRID_Z_SIZE, + PILLAR_X_SIZE, + PILLAR_Y_SIZE, + PILLAR_Z_SIZE, + MIN_X_RANGE, + MIN_Y_RANGE, + MIN_Z_RANGE, + NUM_INDS_FOR_SCAN, + NUM_BOX_CORNERS); pcl::PointCloud::Ptr pcl_pc_ptr(new pcl::PointCloud); - std::string package_path = ros::package::getPath("lidar_point_pillars"); - std::string in_file = package_path + "/test/data/1527836009720148.pcd"; test_obj.initPoints(pcl_pc_ptr); float* points_array = new float[pcl_pc_ptr->size() * 4]; @@ -212,6 +269,128 @@ TEST(TestSuite, CheckPreprocessPointsCPU) delete[] sparse_pillar_map; } +TEST(TestSuite, CheckGenerateAnchors) +{ + const int MAX_NUM_PILLARS = 12000; + const int MAX_NUM_POINTS_PER_PILLAR = 100; + const int GRID_X_SIZE = 432; + const int GRID_Y_SIZE = 496; + const int GRID_Z_SIZE = 1; + const float PILLAR_X_SIZE = 0.16; + const float PILLAR_Y_SIZE = 0.16; + const float PILLAR_Z_SIZE = 4.0; + const float MIN_X_RANGE = 0; + const float MIN_Y_RANGE = -39.68; + const float MIN_Z_RANGE = -3.0; + const int NUM_INDS_FOR_SCAN = 512; + const int NUM_BOX_CORNERS = 4; + TestClass test_obj(MAX_NUM_PILLARS, + MAX_NUM_POINTS_PER_PILLAR, + GRID_X_SIZE, + GRID_Y_SIZE, + GRID_Z_SIZE, + PILLAR_X_SIZE, + PILLAR_Y_SIZE, + PILLAR_Z_SIZE, + MIN_X_RANGE, + MIN_Y_RANGE, + MIN_Z_RANGE, + NUM_INDS_FOR_SCAN, + NUM_BOX_CORNERS); + + const int NUM_ANCHOR = 432*0.5*496*0.5*2; + float* anchors_px = new float[NUM_ANCHOR]; + float* anchors_py = new float[NUM_ANCHOR]; + float* anchors_pz = new float[NUM_ANCHOR]; + float* anchors_dx = new float[NUM_ANCHOR]; + float* anchors_dy = new float[NUM_ANCHOR]; + float* anchors_dz = new float[NUM_ANCHOR]; + float* anchors_ro = new float[NUM_ANCHOR]; + test_obj.generateAnchors(anchors_px, anchors_py, anchors_pz, anchors_dx, anchors_dy, anchors_dz, anchors_ro); + + EXPECT_NEAR(0.48, anchors_px[3], 0.001); + EXPECT_NEAR(-39.52, anchors_py[109], 0.001); + EXPECT_NEAR(-1.73, anchors_pz[76], 0.001); + EXPECT_NEAR(1.6, anchors_dx[338], 0.001); + EXPECT_NEAR(3.9, anchors_dy[22], 0.001); + EXPECT_NEAR(1.56, anchors_dz[993], 0.001); + EXPECT_NEAR(1.5708, anchors_ro[1765], 0.001); + + delete[] anchors_px; + delete[] anchors_py; + delete[] anchors_pz; + delete[] anchors_dx; + delete[] anchors_dy; + delete[] anchors_dz; + delete[] anchors_ro; +} + +TEST(TestSuite, CheckGenerateBoxAnchors) +{ + const int MAX_NUM_PILLARS = 12000; + const int MAX_NUM_POINTS_PER_PILLAR = 100; + const int GRID_X_SIZE = 432; + const int GRID_Y_SIZE = 496; + const int GRID_Z_SIZE = 1; + const float PILLAR_X_SIZE = 0.16; + const float PILLAR_Y_SIZE = 0.16; + const float PILLAR_Z_SIZE = 4.0; + const float MIN_X_RANGE = 0; + const float MIN_Y_RANGE = -39.68; + const float MIN_Z_RANGE = -3.0; + const int NUM_INDS_FOR_SCAN = 512; + const int NUM_BOX_CORNERS = 4; + TestClass test_obj(MAX_NUM_PILLARS, + MAX_NUM_POINTS_PER_PILLAR, + GRID_X_SIZE, + GRID_Y_SIZE, + GRID_Z_SIZE, + PILLAR_X_SIZE, + PILLAR_Y_SIZE, + PILLAR_Z_SIZE, + MIN_X_RANGE, + MIN_Y_RANGE, + MIN_Z_RANGE, + NUM_INDS_FOR_SCAN, + NUM_BOX_CORNERS); + + const int NUM_ANCHOR = 432*0.5*496*0.5*2; + + float* anchors_px = new float[NUM_ANCHOR]; + float* anchors_py = new float[NUM_ANCHOR]; + float* anchors_pz = new float[NUM_ANCHOR]; + float* anchors_dx = new float[NUM_ANCHOR]; + float* anchors_dy = new float[NUM_ANCHOR]; + float* anchors_dz = new float[NUM_ANCHOR]; + float* anchors_ro = new float[NUM_ANCHOR]; + float* box_anchors_min_x = new float[NUM_ANCHOR]; + float* box_anchors_min_y = new float[NUM_ANCHOR]; + float* box_anchors_max_x = new float[NUM_ANCHOR]; + float* box_anchors_max_y = new float[NUM_ANCHOR]; + test_obj.generateAnchors(anchors_px, anchors_py, anchors_pz, anchors_dx, anchors_dy, anchors_dz, anchors_ro); + test_obj.convertAnchors2BoxAnchors(anchors_px, anchors_py, anchors_dx, anchors_dy, + box_anchors_min_x, box_anchors_min_y, + box_anchors_max_x, box_anchors_max_y); + + EXPECT_NEAR(53.25, box_anchors_min_x[345], 0.001); + EXPECT_NEAR(-41.47, box_anchors_min_y[22], 0.001); + EXPECT_NEAR(38.4, box_anchors_max_x[1098], 0.001); + EXPECT_NEAR(-38.4, box_anchors_max_y[675], 0.001); + + + delete[] anchors_px; + delete[] anchors_py; + delete[] anchors_pz; + delete[] anchors_dx; + delete[] anchors_dy; + delete[] anchors_dz; + delete[] anchors_ro; + delete[] box_anchors_min_x; + delete[] box_anchors_min_y; + delete[] box_anchors_max_x; + delete[] box_anchors_max_y; +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); From 18a39e06046f915ada14f7b45ba932e1bbf61f00 Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Thu, 28 Feb 2019 18:07:38 +0900 Subject: [PATCH 19/37] Update README.md --- .../lidar_detector/packages/lidar_point_pillars/README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md index ff08e3108e8..447cf5ca13c 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md @@ -52,6 +52,4 @@ void doInference(float* in_points_array, int in_num_points, std::vector o * To display the results in Rviz `objects_visualizer` is required. (Launch file launches automatically this node). -* Pre trained models can be downloaded from the [github repository](https://github.com/cirpue49/kitti_pretrained_pp). Notice that this model is under `BY-NC-SA 3.0` license. - -* If trained model comes from KITTI data, users might not be allowed to use the model for commercial purposes. +* Pretrained models are available [here](https://github.com/cirpue49/kitti_pretrained_pp), trained with the help of the KITTI dataset. For this reason, these are not suitable for commercial purposes. Derivative works are bound to the BY-NC-SA 3.0 License. (https://creativecommons.org/licenses/by-nc-sa/3.0/) From 0f9d824326e3e586086f68a0d395553d8bef508d Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Thu, 28 Feb 2019 18:53:15 +0900 Subject: [PATCH 20/37] refactor codes --- .../packages/lidar_point_pillars/README.md | 12 ++++++------ .../lidar_point_pillars/include/point_pillars.h | 2 +- .../lidar_point_pillars/nodes/point_pillars.cpp | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md index 447cf5ca13c..ddca50651f5 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md @@ -26,18 +26,18 @@ TensorRT: Tested with 5.0.2 -> [How to install](https://docs.nvidia.com/deeplear * @param[out] out_detections Output bounding box from the network * @details This is an interface for the algorithm. */ -void doInference(float* in_points_array, int in_num_points, std::vector out_detection); +void doInference(float* in_points_array, int in_num_points, std::vector out_detections); ``` ## Parameters |Parameter| Type| Description|Default| ----------|-----|--------|----| -|`input_topic`|*String*|Input topic Pointcloud. Default.|`/points_raw`| -|`baselink_support`|*Bool*|Whether to use baselink to adjust parameters. Default.|`True`| -|`reproduce_result_mode`|*Bool*|Whether to enable reproducible result mode at the cost of the runtime. Default.|`False`| -|`score_threshold`|*Float*|Minimum score required to include the result (0.1)|0.5| -|`nms_overlap_threshold`|*Float*|Minimum IOU required to have when applying NMS (0.1)|0.5| +|`input_topic`|*String*|Input topic Pointcloud. |`/points_raw`| +|`baselink_support`|*Bool*|Whether to use baselink to adjust parameters. |`True`| +|`reproduce_result_mode`|*Bool*|Whether to enable reproducible result mode at the cost of the runtime. |`False`| +|`score_threshold`|*Float*|Minimum score required to include the result [0,1]|0.5| +|`nms_overlap_threshold`|*Float*|Minimum IOU required to have when applying NMS [0,1]|0.5| |`pfe_onnx_file`|*String* |Path to the PFE onnx file|| |`rpn_onnx_file`|*String* |Path to the RPN onnx file|| diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h index 983c785c618..7945df61354 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h @@ -310,7 +310,7 @@ class PointPillars * @param[in] out_detections Network output bounding box * @details This is interface for the algorithm */ - void doInference(const float* in_points_array, const int in_num_points, std::vector& out_detection); + void doInference(const float* in_points_array, const int in_num_points, std::vector& out_detections); }; #endif // POINTS_PILLAR_H diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp index 24ce3b021a7..35beffa4bb0 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp @@ -546,7 +546,7 @@ void PointPillars::preprocess(const float* in_points_array, const int in_num_poi } } -void PointPillars::doInference(const float* in_points_array, const int in_num_points, std::vector& out_detection) +void PointPillars::doInference(const float* in_points_array, const int in_num_points, std::vector& out_detections) { preprocess(in_points_array, in_num_points); @@ -580,7 +580,7 @@ void PointPillars::doInference(const float* in_points_array, const int in_num_po postprocess_cuda_ptr_->doPostprocessCuda( (float*)rpn_buffers_[1], (float*)rpn_buffers_[2], (float*)rpn_buffers_[3], dev_anchor_mask_, dev_anchors_px_, dev_anchors_py_, dev_anchors_pz_, dev_anchors_dx_, dev_anchors_dy_, dev_anchors_dz_, dev_anchors_ro_, - dev_filtered_box_, dev_filtered_score_, dev_filtered_dir_, dev_box_for_nms_, dev_filter_count_, out_detection); + dev_filtered_box_, dev_filtered_score_, dev_filtered_dir_, dev_box_for_nms_, dev_filter_count_, out_detections); // release the stream and the buffers cudaStreamDestroy(stream); From e7fde83c8629d26707dac2b5781ef07494900441 Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Thu, 28 Feb 2019 19:02:03 +0900 Subject: [PATCH 21/37] refactor codes --- .../packages/lidar_point_pillars/nodes/point_pillars.cpp | 6 +++--- .../lidar_point_pillars/nodes/preprocess_points_cuda.cu | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp index 35beffa4bb0..a202a0b2e14 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp @@ -55,7 +55,7 @@ PointPillars::PointPillars(const bool reproduce_result_mode, const float score_t , MAX_Z_RANGE_(1) , BATCH_SIZE_(1) , NUM_INDS_FOR_SCAN_(512) - , NUM_THREADS_(64) // if you chancge NUM_THREADS_, need to modify nms_kernel's shared mempry size + , NUM_THREADS_(64) // if you change NUM_THREADS_, need to modify nms_kernel's shared memory size , SENSOR_HEIGHT_(1.73) , ANCHOR_DX_SIZE_(1.6) , ANCHOR_DY_SIZE_(3.9) @@ -352,7 +352,7 @@ void PointPillars::convertAnchors2BoxAnchors(float* anchors_px, float* anchors_p float* box_anchors_min_x_, float* box_anchors_min_y_, float* box_anchors_max_x_, float* box_anchors_max_y_) { - // flip box's dimension when the at the third axis == 1 + // flip box's dimension when the third axis == 1 float flipped_anchors_dx[NUM_ANCHOR_] = { 0 }; float flipped_anchors_dy[NUM_ANCHOR_] = { 0 }; for (size_t x = 0; x < NUM_ANCHOR_X_INDS_; x++) @@ -390,7 +390,7 @@ void PointPillars::initTRT() onnxToTRTModel(pfe_onnx_file_, pfe_trt_model_stream); onnxToTRTModel(rpn_onnx_file_, rpn_trt_model_stream); if (pfe_trt_model_stream == nullptr || rpn_trt_model_stream == nullptr) - { + {//use std:cerr instead of ROS_ERROR because want to keep this fille ros-agnostics std::cerr<< "Failed to load ONNX file " << std::endl; } diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu index 0ee9bba397e..c175ac36b3e 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu @@ -107,7 +107,7 @@ __global__ void make_pillar_index_kernel( dev_x_coors[count] = x; dev_y_coors[count] = y; - //TODO Need to be modified after making properly training model + //TODO Need to be modified after making properly trained model // x_offset = self.vx / 2 + pc_range[0] // y_offset = self.vy / 2 + pc_range[1] // x_sub = coors_x.unsqueeze(1) * 0.16 + x_offset From 6512482c5d810ed4cf18bc4dbff9a273f32b1cfb Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Mon, 4 Mar 2019 09:12:34 +0900 Subject: [PATCH 22/37] Modify cmake --- .../lidar_detector/packages/lidar_point_pillars/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt index ef411001240..4174b349c51 100755 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt @@ -97,7 +97,7 @@ if(TRT_AVAIL AND CUDA_AVAIL) lidar_point_pillars ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} - RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION} + RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} ) install(DIRECTORY launch/ From 5904fd114a4aed6628a5f079558324f9930328e8 Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Mon, 4 Mar 2019 09:27:07 +0900 Subject: [PATCH 23/37] update readme --- .../packages/lidar_point_pillars/README.md | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md index ddca50651f5..2a4b5d3697b 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md @@ -8,6 +8,29 @@ CUDA Toolkit v9.0 or v10.0 TensorRT: Tested with 5.0.2 -> [How to install](https://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html#installing) +## How to setup + +1. Install CUDA from this [website](https://developer.nvidia.com/cuda-downloads) + +2. [Download](https://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html#downloading) the TensorRT local repo file that matches the Ubuntu version you are using. + +3. Install TensorRT from the Debian local repo package. + +``` +$ sudo dpkg -i +nv-tensorrt-repo-ubuntu1x04-cudax.x-trt5.x.x.x-ga-yyyymmdd_1-1_amd64.deb +$ sudo apt-key add /var/nv-tensorrt-repo-cudax.x-trt5.x.x.x-ga-yyyymmdd/7fa2af80.pub + +$ sudo apt-get update +$ sudo apt-get install tensorrt +``` + +4. Download the pretrained file from [here](https://github.com/cirpue49/kitti_pretrained_pp). + +``` +$ git clone git@github.com:cirpue49/kitti_pretrained_point_pillars.git +``` + ## How to launch From b350ec9661aa6583f61aeec2685b8e81020e12ac Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Wed, 6 Mar 2019 18:14:06 +0900 Subject: [PATCH 24/37] add script in cmake to install lib --- .../lidar_detector/packages/lidar_point_pillars/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt index 4174b349c51..6916195aaf7 100755 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt @@ -94,6 +94,8 @@ if(TRT_AVAIL AND CUDA_AVAIL) install(TARGETS + gpu_point_pillars_lib + point_pillars_lib lidar_point_pillars ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} From 10535100188519490b8e6a22bac65f227334a7e1 Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Wed, 6 Mar 2019 18:33:58 +0900 Subject: [PATCH 25/37] add script in package.xml for autoware_build_flags --- .../lidar_detector/packages/lidar_point_pillars/package.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml index 082e644fa5d..88ed828f0a4 100755 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml @@ -1,13 +1,14 @@ lidar_point_pillars - 1.9.1 + 1.10.0 lidar_point_pillars Kosuke Murakami Apache 2.0 catkin + autoware_build_flags roscpp pcl_ros From 60f3a1298c84ce6955cff1901f687490ab8efffe Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Wed, 6 Mar 2019 19:41:08 +0900 Subject: [PATCH 26/37] Modify cmake, include CUDNN --- .../lidar_point_pillars/CMakeLists.txt | 45 ++++++++++++++----- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt index 6916195aaf7..8be73787630 100755 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt @@ -19,17 +19,36 @@ option(TRT_AVAIL "TensorRT available" OFF) # try to find the tensorRT modules find_library(NVINFER NAMES nvinfer) find_library(NVPARSERS NAMES nvparsers) -if(NVINFER AND NVPARSERS) - message("TensorRT is available!") - message("NVINFER: ${NVINFER}") - message("NVPARSERS: ${NVPARSERS}") - set(TRT_AVAIL ON) +find_library(NVONNXPARSERS NAMES nvonnxparser) +if(NVINFER AND NVPARSERS AND NVONNXPARSERS) + message("TensorRT is available!") + message("NVINFER: ${NVINFER}") + message("NVPARSERS: ${NVPARSERS}") + message("NVONNXPARSERS: ${NVONNXPARSERS}") + set(TRT_AVAIL ON) else() message("TensorRT is NOT Available") set(TRT_AVAIL OFF) endif() -if(TRT_AVAIL AND CUDA_AVAIL) +# set flags for CUDNN availability +option(CUDNN_AVAIL "CUDNN available" OFF) +# try to find the CUDNN module +find_library(CUDNN_LIBRARY +NAMES libcudnn.so${__cudnn_ver_suffix} libcudnn${__cudnn_ver_suffix}.dylib ${__cudnn_lib_win_name} +PATHS $ENV{LD_LIBRARY_PATH} ${__libpath_cudart} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX} +PATH_SUFFIXES lib lib64 bin +DOC "CUDNN library." ) +if(CUDNN_LIBRARY) + message("CUDNN is available!") + message("CUDNN_LIBRARY: ${CUDNN_LIBRARY}") + set(CUDNN_AVAIL ON) +else() + message("CUDNN is NOT Available") + set(CUDNN_AVAIL OFF) +endif() + +if(TRT_AVAIL AND CUDA_AVAIL AND CUDNN_AVAIL) find_package(autoware_build_flags REQUIRED) find_package(catkin REQUIRED COMPONENTS @@ -81,9 +100,13 @@ if(TRT_AVAIL AND CUDA_AVAIL) nodes/preprocess_points.cpp ) - target_link_libraries(point_pillars_lib - nvinfer - nvonnxparser + target_link_libraries(point_pillars_lib + ${NVINFER} + ${NVONNXPARSERS} + ${CUDA_LIBRARIES} + ${CUDA_CUBLAS_LIBRARIES} + ${CUDA_curand_LIBRARY} + ${CUDNN_LIBRARY} gpu_point_pillars_lib ) @@ -116,8 +139,8 @@ if(TRT_AVAIL AND CUDA_AVAIL) catkin_add_gtest(test-point_pillars test/src/test_point_pillars.cpp) target_link_libraries(test-point_pillars ${catkin_LIBRARIES} point_pillars_lib) endif() -ELSE() +else() find_package(catkin REQUIRED) catkin_package() message("PointPillars won't be built, CUDA and/or TensorRT were not found.") -ENDIF () +endif() From a384528a9ad95122a3a65f557831c176ca96c1cf Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Wed, 6 Mar 2019 19:49:06 +0900 Subject: [PATCH 27/37] Add CUDNN dependency script in README --- .../lidar_detector/packages/lidar_point_pillars/README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md index 2a4b5d3697b..3cef61d109a 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md @@ -6,15 +6,19 @@ Autoware package for Point Pillars. [Referenced paper](https://arxiv.org/abs/18 CUDA Toolkit v9.0 or v10.0 +CUDNN: Tested with v7.3.1 + TensorRT: Tested with 5.0.2 -> [How to install](https://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html#installing) ## How to setup 1. Install CUDA from this [website](https://developer.nvidia.com/cuda-downloads) -2. [Download](https://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html#downloading) the TensorRT local repo file that matches the Ubuntu version you are using. +2. Install CUDNN + +3. [Download](https://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html#downloading) the TensorRT local repo file that matches the Ubuntu version you are using. -3. Install TensorRT from the Debian local repo package. +4. Install TensorRT from the Debian local repo package. ``` $ sudo dpkg -i From 367bcb19927688f2817e86210b38f4d63f3c8241 Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Wed, 6 Mar 2019 19:57:15 +0900 Subject: [PATCH 28/37] Delete unnecessary default value for onnx files in launch file --- .../lidar_point_pillars/launch/lidar_point_pillars.launch | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/launch/lidar_point_pillars.launch b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/launch/lidar_point_pillars.launch index 4a7618eacc8..9bf500fc6e8 100755 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/launch/lidar_point_pillars.launch +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/launch/lidar_point_pillars.launch @@ -4,8 +4,8 @@ - - + + From b5cd3ff7c5430c6eb6ab6119c3014818441d5345 Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Wed, 6 Mar 2019 20:11:38 +0900 Subject: [PATCH 29/37] add label when creating DetectedObject --- .../lidar_point_pillars/nodes/point_pillars_ros.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp index def9759cf7c..9da67a411d5 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp @@ -87,9 +87,10 @@ void PointPillarsROS::pubDetectedObject(const std::vector& detections, co object.pose.position.y = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 1]; object.pose.position.z = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 2]; + // Trained this way float yaw = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 6]; + yaw += M_PI/2; yaw = std::atan2(std::sin(yaw), std::cos(yaw)); - // Trained this way geometry_msgs::Quaternion q = tf::createQuaternionMsgFromYaw(-yaw); object.pose.orientation = q; @@ -98,10 +99,14 @@ void PointPillarsROS::pubDetectedObject(const std::vector& detections, co object.pose = getTransformedPose(object.pose, angle_transform_inversed_); } - object.dimensions.x = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 3]; - object.dimensions.y = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 4]; + // Again: Trained this way + object.dimensions.x = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 4]; + object.dimensions.y = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 3]; object.dimensions.z = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 5]; + //Only detects car in Version 1.0 + object.label = "car"; + objects.objects.push_back(object); } pub_objects_.publish(objects); From 3e226acf6ce1ef4df86dd54727d65ec37108a25a Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Thu, 7 Mar 2019 12:53:55 +0900 Subject: [PATCH 30/37] Add include/{}/ --- .../packages/lidar_point_pillars/CMakeLists.txt | 2 +- .../{ => lidar_point_pillars}/anchor_mask_cuda.h | 0 .../include/{ => lidar_point_pillars}/common.h | 0 .../include/{ => lidar_point_pillars}/nms_cuda.h | 2 +- .../{ => lidar_point_pillars}/point_pillars.h | 12 ++++++------ .../{ => lidar_point_pillars}/point_pillars_ros.h | 2 +- .../{ => lidar_point_pillars}/postprocess_cuda.h | 2 +- .../{ => lidar_point_pillars}/preprocess_points.h | 0 .../preprocess_points_cuda.h | 0 .../include/{ => lidar_point_pillars}/scatter_cuda.h | 0 .../lidar_point_pillars/nodes/anchor_mask_cuda.cu | 4 ++-- .../nodes/lidar_point_pillars_node.cpp | 2 +- .../packages/lidar_point_pillars/nodes/nms_cuda.cu | 2 +- .../lidar_point_pillars/nodes/point_pillars.cpp | 2 +- .../lidar_point_pillars/nodes/point_pillars_ros.cpp | 2 +- .../lidar_point_pillars/nodes/postprocess_cuda.cu | 2 +- .../lidar_point_pillars/nodes/preprocess_points.cpp | 2 +- .../nodes/preprocess_points_cuda.cu | 4 ++-- .../lidar_point_pillars/nodes/scatter_cuda.cu | 2 +- .../test/src/test_point_pillars.cpp | 6 ++++-- 20 files changed, 25 insertions(+), 23 deletions(-) rename ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/{ => lidar_point_pillars}/anchor_mask_cuda.h (100%) rename ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/{ => lidar_point_pillars}/common.h (100%) rename ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/{ => lidar_point_pillars}/nms_cuda.h (97%) rename ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/{ => lidar_point_pillars}/point_pillars.h (97%) rename ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/{ => lidar_point_pillars}/point_pillars_ros.h (98%) rename ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/{ => lidar_point_pillars}/postprocess_cuda.h (99%) rename ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/{ => lidar_point_pillars}/preprocess_points.h (100%) rename ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/{ => lidar_point_pillars}/preprocess_points_cuda.h (100%) rename ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/{ => lidar_point_pillars}/scatter_cuda.h (100%) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt index 8be73787630..8b0bee0e491 100755 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt @@ -130,7 +130,7 @@ if(TRT_AVAIL AND CUDA_AVAIL AND CUDNN_AVAIL) PATTERN ".svn" EXCLUDE) install(DIRECTORY include/ - DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION} + DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}/${PROJECT_NAME}/ PATTERN ".svn" EXCLUDE ) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/anchor_mask_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/anchor_mask_cuda.h similarity index 100% rename from ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/anchor_mask_cuda.h rename to ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/anchor_mask_cuda.h diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/common.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/common.h similarity index 100% rename from ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/common.h rename to ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/common.h diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/nms_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h similarity index 97% rename from ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/nms_cuda.h rename to ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h index 30f2a997062..496019d6c25 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/nms_cuda.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h @@ -29,7 +29,7 @@ #include // headers in local files -#include "common.h" +#include "lidar_point_pillars/common.h" class NMSCuda { diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars.h similarity index 97% rename from ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h rename to ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars.h index 7945df61354..ea9a0549201 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars.h @@ -38,12 +38,12 @@ #include "NvOnnxParser.h" // headers in local files -#include "common.h" -#include "preprocess_points.h" -#include "preprocess_points_cuda.h" -#include "anchor_mask_cuda.h" -#include "scatter_cuda.h" -#include "postprocess_cuda.h" +#include "lidar_point_pillars/common.h" +#include "lidar_point_pillars/preprocess_points.h" +#include "lidar_point_pillars/preprocess_points_cuda.h" +#include "lidar_point_pillars/anchor_mask_cuda.h" +#include "lidar_point_pillars/scatter_cuda.h" +#include "lidar_point_pillars/postprocess_cuda.h" // Logger for TensorRT info/warning/errors class Logger : public nvinfer1::ILogger diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars_ros.h similarity index 98% rename from ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h rename to ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars_ros.h index c190fb56a97..b2c18c4a36b 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars_ros.h @@ -37,7 +37,7 @@ #include // headers in local files -#include "point_pillars.h" +#include "lidar_point_pillars/point_pillars.h" class PointPillarsROS { diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/postprocess_cuda.h similarity index 99% rename from ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h rename to ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/postprocess_cuda.h index 2add152f904..be32518be09 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/postprocess_cuda.h @@ -29,7 +29,7 @@ #include // headers in local files -#include "nms_cuda.h" +#include "lidar_point_pillars/nms_cuda.h" class PostprocessCuda { diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points.h similarity index 100% rename from ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h rename to ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points.h diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h similarity index 100% rename from ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points_cuda.h rename to ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/scatter_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/scatter_cuda.h similarity index 100% rename from ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/scatter_cuda.h rename to ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/scatter_cuda.h diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu index 72c7556f0c8..a46030857a9 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu @@ -15,8 +15,8 @@ */ //headers in local files -#include "common.h" -#include "anchor_mask_cuda.h" +#include "lidar_point_pillars/common.h" +#include "lidar_point_pillars/anchor_mask_cuda.h" //modified prefix sum code from https://www.mimuw.edu.pl/~ps209291/kgkp/slides/scan.pdf __global__ void scan_x(int *g_odata, int *g_idata, int n) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp index 8faa0bede56..001933b8197 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp @@ -18,7 +18,7 @@ #include // headers in local files -#include "point_pillars_ros.h" +#include "lidar_point_pillars/point_pillars_ros.h" int main(int argc, char** argv) { diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu index d675f9e32f8..d73ef0b92d8 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu @@ -6,7 +6,7 @@ //headers in local files -#include "nms_cuda.h" +#include "lidar_point_pillars/nms_cuda.h" __device__ inline float devIoU(float const *const a, float const *const b) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp index a202a0b2e14..afb89b2c22f 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp @@ -19,7 +19,7 @@ #include // headers in local files -#include "point_pillars.h" +#include "lidar_point_pillars/point_pillars.h" // clang-format off PointPillars::PointPillars(const bool reproduce_result_mode, const float score_threshold, diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp index 9da67a411d5..126ffc63a97 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp @@ -28,7 +28,7 @@ // headers in local files #include "autoware_msgs/DetectedObjectArray.h" -#include "point_pillars_ros.h" +#include "lidar_point_pillars/point_pillars_ros.h" PointPillarsROS::PointPillarsROS() : private_nh_("~") diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu index cf82496926a..95e8ef6036e 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu @@ -20,7 +20,7 @@ #include //headers in local files -#include "postprocess_cuda.h" +#include "lidar_point_pillars/postprocess_cuda.h" __global__ void filter_kernel(const float* box_preds, const float* cls_preds, const float* dir_preds, const int* anchor_mask, const float* dev_anchors_px, const float* dev_anchors_py, const float* dev_anchors_pz, diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp index e36055debc9..479094cf0d2 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp @@ -19,7 +19,7 @@ #include // headers in local files -#include "preprocess_points.h" +#include "lidar_point_pillars/preprocess_points.h" PreprocessPoints::PreprocessPoints(const int MAX_NUM_PILLARS, const int MAX_POINTS_PER_PILLAR, const int GRID_X_SIZE, const int GRID_Y_SIZE, const int GRID_Z_SIZE, const float PILLAR_X_SIZE, diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu index c175ac36b3e..0f6504980a6 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu @@ -18,8 +18,8 @@ #include //headers in local files -#include "common.h" -#include "preprocess_points_cuda.h" +#include "lidar_point_pillars/common.h" +#include "lidar_point_pillars/preprocess_points_cuda.h" __global__ void make_pillar_histo_kernel( const float* dev_points, diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/scatter_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/scatter_cuda.cu index 6933f1da93e..107e3de6e4a 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/scatter_cuda.cu +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/scatter_cuda.cu @@ -15,7 +15,7 @@ */ //headers in local files -#include "scatter_cuda.h" +#include "lidar_point_pillars/scatter_cuda.h" __global__ void scatter_kernel( int *x_coors, int *y_coors, float *pfe_output, float *scattered_feature, const int MAX_NUM_PILLARS_, const int GRID_X_SIZE, const int GRID_Y_SIZE) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp index d42fa7d7c44..f06956b56dc 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp @@ -29,8 +29,8 @@ #include #include -#include "point_pillars_ros.h" -#include "preprocess_points.h" +#include "lidar_point_pillars/point_pillars_ros.h" +#include "lidar_point_pillars/preprocess_points.h" class TestSuite : public ::testing::Test { @@ -376,6 +376,8 @@ TEST(TestSuite, CheckGenerateBoxAnchors) EXPECT_NEAR(-41.47, box_anchors_min_y[22], 0.001); EXPECT_NEAR(38.4, box_anchors_max_x[1098], 0.001); EXPECT_NEAR(-38.4, box_anchors_max_y[675], 0.001); + EXPECT_NEAR(-31, box_anchors_max_y[675], 0.001); + EXPECT_EQ(true, false); delete[] anchors_px; From 7daa788295c2187308c3f8a7a5eea39874565462 Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Thu, 7 Mar 2019 13:04:41 +0900 Subject: [PATCH 31/37] Delete unnecessary code from previous commit --- .../lidar_point_pillars/test/src/test_point_pillars.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp index f06956b56dc..f11ec3d257f 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp @@ -376,8 +376,6 @@ TEST(TestSuite, CheckGenerateBoxAnchors) EXPECT_NEAR(-41.47, box_anchors_min_y[22], 0.001); EXPECT_NEAR(38.4, box_anchors_max_x[1098], 0.001); EXPECT_NEAR(-38.4, box_anchors_max_y[675], 0.001); - EXPECT_NEAR(-31, box_anchors_max_y[675], 0.001); - EXPECT_EQ(true, false); delete[] anchors_px; From a04b5ac358a16c2a6c8597a77a2252b612d1a727 Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Thu, 7 Mar 2019 16:07:26 +0900 Subject: [PATCH 32/37] Remove magic number by using MACRO --- .../include/lidar_point_pillars/common.h | 5 ++++ .../include/lidar_point_pillars/nms_cuda.h | 4 ++- .../preprocess_points_cuda.h | 4 ++- .../nodes/anchor_mask_cuda.cu | 2 +- .../lidar_point_pillars/nodes/nms_cuda.cu | 30 ++++++++++--------- .../nodes/point_pillars.cpp | 2 +- .../nodes/postprocess_cuda.cu | 8 ++--- .../nodes/preprocess_points.cpp | 3 +- .../nodes/preprocess_points_cuda.cu | 30 +++++++++++-------- 9 files changed, 52 insertions(+), 36 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/common.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/common.h index 46e816db735..283e16be0e3 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/common.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/common.h @@ -30,6 +30,11 @@ // headers in CUDA #include +//using MACRO to allocate memory inside CUDA kernel +#define NUM_3D_BOX_CORNERS_MACRO 8 +#define NUM_2D_BOX_CORNERS_MACRO 4 +#define NUM_THREADS_MACRO 64 // need to be changed when NUM_THREADS is changed + #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) #define GPU_CHECK(ans) \ diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h index 496019d6c25..e3395227f2b 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h @@ -35,16 +35,18 @@ class NMSCuda { private: const int NUM_THREADS_; + const int NUM_BOX_CORNERS_; const float nms_overlap_threshold_; public: /** * @brief Constructor * @param[in] NUM_THRESD Number of threads for launching cuda kernel + * @param[in] NUM_BOX_CORNERS Number of corners for 2D box * @param[in] nms_overlap_threshold IOU threshold for NMS * @details Captital variables never change after the compile, Non-captital variables could be chaned through rosparam */ - NMSCuda(const int NUM_THREADS, const float nms_overlap_threshold); + NMSCuda(const int NUM_THREADS, const int NUM_BOX_CORNERS, const float nms_overlap_threshold); /** * @brief GPU Non-Maximum Suppresion for network output diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h index 710a14a87c3..cb79f0bc29c 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h @@ -41,6 +41,7 @@ class PreprocessPointsCuda const float MIN_X_RANGE_; const float MIN_Y_RANGE_; const float MIN_Z_RANGE_; + const int NUM_BOX_CORNERS_; // end initalizer list float* dev_pillar_x_in_coors_; @@ -70,12 +71,13 @@ class PreprocessPointsCuda * @param[in] MIN_X_RANGE Minimum x value for pointcloud * @param[in] MIN_Y_RANGE Minimum y value for pointcloud * @param[in] MIN_Z_RANGE Minimum z value for pointcloud + * @param[in] NUM_BOX_CORNERS Number of corners for 2D box * @details Captital variables never change after the compile */ PreprocessPointsCuda(const int NUM_THREADS, const int MAX_NUM_PILLARS, const int MAX_POINTS_PER_PILLAR, const int NUM_INDS_FOR_SCAN, const int GRID_X_SIZE, const int GRID_Y_SIZE, const int GRID_Z_SIZE, const float PILLAR_X_SIZE, const float PILLAR_Y_SIZE, const float PILLAR_Z_SIZE, - const float MIN_X_RANGE, const float MIN_Y_RANGE, const float MIN_Z_RANGE); + const float MIN_X_RANGE, const float MIN_Y_RANGE, const float MIN_Z_RANGE, const int NUM_BOX_CORNERS); ~PreprocessPointsCuda(); /** diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu index a46030857a9..6b58021cbc4 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu @@ -127,7 +127,7 @@ __global__ void make_anchor_mask_kernel(const float* dev_box_anchors_min_x, cons const int NUM_INDS_FOR_SCAN) { int tid = threadIdx.x + blockIdx.x * blockDim.x; - int anchor_coor[4] = {0}; + int anchor_coor[NUM_2D_BOX_CORNERS_MACRO] = {0}; const int GRID_X_SIZE_1 = GRID_X_SIZE - 1;//grid_x_size - 1 const int GRID_Y_SIZE_1 = GRID_Y_SIZE - 1;//grid_y_size - 1 diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu index d73ef0b92d8..3f52973e810 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu @@ -21,7 +21,8 @@ __device__ inline float devIoU(float const *const a, float const *const b) } __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, - const float *dev_boxes, unsigned long long *dev_mask) + const float *dev_boxes, unsigned long long *dev_mask, + const int NUM_BOX_CORNERS) { const int row_start = blockIdx.y; const int col_start = blockIdx.x; @@ -33,24 +34,24 @@ __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, min(n_boxes - row_start * block_threads, block_threads); const int col_size = min(n_boxes - col_start * block_threads, block_threads); - //Only can initialize with figures, not by variables - __shared__ float block_boxes[64 * 4]; + + __shared__ float block_boxes[NUM_THREADS_MACRO * NUM_2D_BOX_CORNERS_MACRO]; if (threadIdx.x < col_size) { - block_boxes[threadIdx.x * 4 + 0] = dev_boxes[(block_threads * col_start + threadIdx.x) * 4 + 0]; - block_boxes[threadIdx.x * 4 + 1] = dev_boxes[(block_threads * col_start + threadIdx.x) * 4 + 1]; - block_boxes[threadIdx.x * 4 + 2] = dev_boxes[(block_threads * col_start + threadIdx.x) * 4 + 2]; - block_boxes[threadIdx.x * 4 + 3] = dev_boxes[(block_threads * col_start + threadIdx.x) * 4 + 3]; + block_boxes[threadIdx.x * NUM_BOX_CORNERS + 0] = dev_boxes[(block_threads * col_start + threadIdx.x) * NUM_BOX_CORNERS + 0]; + block_boxes[threadIdx.x * NUM_BOX_CORNERS + 1] = dev_boxes[(block_threads * col_start + threadIdx.x) * NUM_BOX_CORNERS + 1]; + block_boxes[threadIdx.x * NUM_BOX_CORNERS + 2] = dev_boxes[(block_threads * col_start + threadIdx.x) * NUM_BOX_CORNERS + 2]; + block_boxes[threadIdx.x * NUM_BOX_CORNERS + 3] = dev_boxes[(block_threads * col_start + threadIdx.x) * NUM_BOX_CORNERS + 3]; } __syncthreads(); if (threadIdx.x < row_size) { const int cur_box_idx = block_threads * row_start + threadIdx.x; - const float cur_box[4] = {dev_boxes[cur_box_idx*4 + 0], - dev_boxes[cur_box_idx*4 + 1], - dev_boxes[cur_box_idx*4 + 2], - dev_boxes[cur_box_idx*4 + 3]}; + const float cur_box[NUM_2D_BOX_CORNERS_MACRO] = {dev_boxes[cur_box_idx*NUM_BOX_CORNERS + 0], + dev_boxes[cur_box_idx*NUM_BOX_CORNERS + 1], + dev_boxes[cur_box_idx*NUM_BOX_CORNERS + 2], + dev_boxes[cur_box_idx*NUM_BOX_CORNERS + 3]}; unsigned long long t = 0; int start = 0; if (row_start == col_start) @@ -59,7 +60,7 @@ __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, } for (int i = start; i < col_size; i++) { - if (devIoU(cur_box, block_boxes + i * 4) > nms_overlap_thresh) + if (devIoU(cur_box, block_boxes + i * NUM_BOX_CORNERS) > nms_overlap_thresh) { t |= 1ULL << i; } @@ -69,8 +70,9 @@ __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, } } -NMSCuda::NMSCuda(const int NUM_THREADS, const float nms_overlap_threshold): +NMSCuda::NMSCuda(const int NUM_THREADS, const int NUM_BOX_CORNERS ,const float nms_overlap_threshold): NUM_THREADS_(NUM_THREADS), +NUM_BOX_CORNERS_(NUM_BOX_CORNERS), nms_overlap_threshold_(nms_overlap_threshold) { } @@ -84,7 +86,7 @@ void NMSCuda::doNMSCuda(const int host_filter_count, float* dev_sorted_box_for_n unsigned long long *dev_mask = NULL; GPU_CHECK(cudaMalloc(&dev_mask, host_filter_count * col_blocks * sizeof(unsigned long long))); - nms_kernel<<>>(host_filter_count, nms_overlap_threshold_, dev_sorted_box_for_nms, dev_mask); + nms_kernel<<>>(host_filter_count, nms_overlap_threshold_, dev_sorted_box_for_nms, dev_mask, NUM_BOX_CORNERS_); // postprocess for nms output std::vector host_mask(host_filter_count * col_blocks); diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp index afb89b2c22f..b3f8d7afa68 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp @@ -74,7 +74,7 @@ PointPillars::PointPillars(const bool reproduce_result_mode, const float score_t { preprocess_points_cuda_ptr_.reset(new PreprocessPointsCuda( NUM_THREADS_, MAX_NUM_PILLARS_, MAX_NUM_POINTS_PER_PILLAR_, NUM_INDS_FOR_SCAN_, GRID_X_SIZE_, GRID_Y_SIZE_, - GRID_Z_SIZE_, PILLAR_X_SIZE_, PILLAR_Y_SIZE_, PILLAR_Z_SIZE_, MIN_X_RANGE_, MIN_Y_RANGE_, MIN_Z_RANGE_)); + GRID_Z_SIZE_, PILLAR_X_SIZE_, PILLAR_Y_SIZE_, PILLAR_Z_SIZE_, MIN_X_RANGE_, MIN_Y_RANGE_, MIN_Z_RANGE_, NUM_BOX_CORNERS_)); } anchor_mask_cuda_ptr_.reset(new AnchorMaskCuda(NUM_INDS_FOR_SCAN_, NUM_ANCHOR_X_INDS_, NUM_ANCHOR_Y_INDS_, diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu index 95e8ef6036e..87abbee141e 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu @@ -72,15 +72,14 @@ __global__ void filter_kernel(const float* box_preds, const float* cls_preds, co //convrt normal box(normal boxes: x, y, z, w, l, h, r) to box(xmin, ymin, xmax, ymax) for nms calculation //First: dx, dy -> box(x0y0, x0y1, x1y0, x1y1) - float corners[2*4] = {float(-0.5*box_dx), float(-0.5*box_dy), + float corners[NUM_3D_BOX_CORNERS_MACRO] = {float(-0.5*box_dx), float(-0.5*box_dy), float(-0.5*box_dx), float( 0.5*box_dy), float( 0.5*box_dx), float( 0.5*box_dy), float( 0.5*box_dx), float(-0.5*box_dy)}; //Second: Rotate, Offset and convert to point(xmin. ymin, xmax, ymax) - //cannot use variable initialization since "error: expression must have a constant value" - float rotated_corners[2*4]; - float offset_corners[2*4]; + float rotated_corners[NUM_3D_BOX_CORNERS_MACRO]; + float offset_corners[NUM_3D_BOX_CORNERS_MACRO]; float sin_yaw = sinf(box_ro); float cos_yaw = cosf(box_ro); float xmin = FLOAT_MAX; @@ -152,6 +151,7 @@ NUM_OUTPUT_BOX_FEATURE_(NUM_OUTPUT_BOX_FEATURE) { nms_cuda_ptr_.reset(new NMSCuda( NUM_THREADS, + NUM_BOX_CORNERS, nms_overlap_threshold)); } diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp index 479094cf0d2..97c84918210 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp @@ -114,11 +114,12 @@ void PreprocessPoints::preprocess(const float* in_points_array, int in_num_point // float y_offset = PILLAR_Y_SIZE_/ 2 + MIN_Y_RANGE_; // float x_offset = PILLAR_X_SIZE_/ 2 + MIN_X_RANGE_; // TODO Need to be modified after proper trining code + // Will be modified in ver 1.1 y_coors_for_sub[pillar_index] = std::floor(y_coor) * PILLAR_Y_SIZE_ + -39.9; x_coors_for_sub[pillar_index] = std::floor(x_coor) * PILLAR_X_SIZE_ + 0.1; // sparse pillar map - sparse_pillar_map[y_coor * 512 + x_coor] = 1; + sparse_pillar_map[y_coor * NUM_INDS_FOR_SCAN_ + x_coor] = 1; } int num = num_points_per_pillar[pillar_index]; if (num < MAX_NUM_POINTS_PER_PILLAR_) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu index 0f6504980a6..89ef5131d6e 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu @@ -38,7 +38,8 @@ __global__ void make_pillar_histo_kernel( const float MIN_Z_RANGE, const float PILLAR_X_SIZE, const float PILLAR_Y_SIZE, - const float PILLAR_Z_SIZE + const float PILLAR_Z_SIZE, + const int NUM_BOX_CORNERS ) { int th_i = threadIdx.x + blockIdx.x * blockDim.x; @@ -46,9 +47,9 @@ __global__ void make_pillar_histo_kernel( { return; } - int y_coor = floor((dev_points[th_i*4 + 1] - MIN_Y_RANGE)/PILLAR_Y_SIZE); - int x_coor = floor((dev_points[th_i*4 + 0] - MIN_X_RANGE)/PILLAR_X_SIZE); - int z_coor = floor((dev_points[th_i*4 + 2] - MIN_Z_RANGE)/PILLAR_Z_SIZE); + int y_coor = floor((dev_points[th_i*NUM_BOX_CORNERS + 1] - MIN_Y_RANGE)/PILLAR_Y_SIZE); + int x_coor = floor((dev_points[th_i*NUM_BOX_CORNERS + 0] - MIN_X_RANGE)/PILLAR_X_SIZE); + int z_coor = floor((dev_points[th_i*NUM_BOX_CORNERS + 2] - MIN_Z_RANGE)/PILLAR_Z_SIZE); if(x_coor >= 0 && x_coor < GRID_X_SIZE && y_coor >= 0 && y_coor < GRID_Y_SIZE && @@ -58,10 +59,10 @@ __global__ void make_pillar_histo_kernel( if(count < max_points_per_pillar) { int ind = y_coor*GRID_X_SIZE*max_points_per_pillar + x_coor*max_points_per_pillar + count; - dev_pillar_x_in_coors[ind] = dev_points[th_i*4 + 0]; - dev_pillar_y_in_coors[ind] = dev_points[th_i*4 + 1]; - dev_pillar_z_in_coors[ind] = dev_points[th_i*4 + 2]; - dev_pillar_i_in_coors[ind] = dev_points[th_i*4 + 3]; + dev_pillar_x_in_coors[ind] = dev_points[th_i*NUM_BOX_CORNERS + 0]; + dev_pillar_y_in_coors[ind] = dev_points[th_i*NUM_BOX_CORNERS + 1]; + dev_pillar_z_in_coors[ind] = dev_points[th_i*NUM_BOX_CORNERS + 2]; + dev_pillar_i_in_coors[ind] = dev_points[th_i*NUM_BOX_CORNERS + 3]; } } } @@ -107,12 +108,12 @@ __global__ void make_pillar_index_kernel( dev_x_coors[count] = x; dev_y_coors[count] = y; - //TODO Need to be modified after making properly trained model + //TODO Need to be modified after making properly trained weight + // Will be modified in ver 1.1 // x_offset = self.vx / 2 + pc_range[0] // y_offset = self.vy / 2 + pc_range[1] // x_sub = coors_x.unsqueeze(1) * 0.16 + x_offset // y_sub = coors_y.unsqueeze(1) * 0.16 + y_offset - dev_x_coors_for_sub[count] = x* PILLAR_X_SIZE + 0.1; dev_y_coors_for_sub[count] = y* PILLAR_Y_SIZE + -39.9; dev_sparse_pillar_map[y*NUM_INDS_FOR_SCAN + x] = 1; @@ -194,7 +195,8 @@ PreprocessPointsCuda::PreprocessPointsCuda(const int NUM_THREADS, const float PILLAR_Z_SIZE, const float MIN_X_RANGE, const float MIN_Y_RANGE, - const float MIN_Z_RANGE) + const float MIN_Z_RANGE, + const int NUM_BOX_CORNERS) : NUM_THREADS_(NUM_THREADS), MAX_NUM_PILLARS_(MAX_NUM_PILLARS), @@ -208,7 +210,8 @@ PILLAR_Y_SIZE_(PILLAR_Y_SIZE), PILLAR_Z_SIZE_(PILLAR_Z_SIZE), MIN_X_RANGE_(MIN_X_RANGE), MIN_Y_RANGE_(MIN_Y_RANGE), -MIN_Z_RANGE_(MIN_Z_RANGE) +MIN_Z_RANGE_(MIN_Z_RANGE), +NUM_BOX_CORNERS_(NUM_BOX_CORNERS) { GPU_CHECK(cudaMalloc((void**)&dev_pillar_x_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); GPU_CHECK(cudaMalloc((void**)&dev_pillar_y_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float))); @@ -265,7 +268,8 @@ void PreprocessPointsCuda::doPreprocessPointsCuda(const float* dev_points, const MIN_Z_RANGE_, PILLAR_X_SIZE_, PILLAR_Y_SIZE_, - PILLAR_Z_SIZE_); + PILLAR_Z_SIZE_, + NUM_BOX_CORNERS_); make_pillar_index_kernel<<>>( From c59fa712de081abd1a2d69b96546f4b9bcdfec89 Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Thu, 7 Mar 2019 16:14:13 +0900 Subject: [PATCH 33/37] Clarify float number --- .../nodes/point_pillars.cpp | 38 +++++++++---------- .../nodes/point_pillars_ros.cpp | 8 ++-- .../nodes/preprocess_points.cpp | 8 ++-- .../nodes/preprocess_points_cuda.cu | 4 +- 4 files changed, 29 insertions(+), 29 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp index b3f8d7afa68..72d30f772e8 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp @@ -44,22 +44,22 @@ PointPillars::PointPillars(const bool reproduce_result_mode, const float score_t , RPN_BOX_OUTPUT_SIZE_(NUM_ANCHOR_ * 7) , RPN_CLS_OUTPUT_SIZE_(NUM_ANCHOR_) , RPN_DIR_OUTPUT_SIZE_(NUM_ANCHOR_ * 2) - , PILLAR_X_SIZE_(0.16) - , PILLAR_Y_SIZE_(0.16) - , PILLAR_Z_SIZE_(4.0) - , MIN_X_RANGE_(0.0) - , MIN_Y_RANGE_(-39.68) - , MIN_Z_RANGE_(-3.0) - , MAX_X_RANGE_(69.12) - , MAX_Y_RANGE_(39.68) + , PILLAR_X_SIZE_(0.16f) + , PILLAR_Y_SIZE_(0.16f) + , PILLAR_Z_SIZE_(4.0f) + , MIN_X_RANGE_(0.0f) + , MIN_Y_RANGE_(-39.68f) + , MIN_Z_RANGE_(-3.0f) + , MAX_X_RANGE_(69.12f) + , MAX_Y_RANGE_(39.68f) , MAX_Z_RANGE_(1) , BATCH_SIZE_(1) , NUM_INDS_FOR_SCAN_(512) , NUM_THREADS_(64) // if you change NUM_THREADS_, need to modify nms_kernel's shared memory size - , SENSOR_HEIGHT_(1.73) - , ANCHOR_DX_SIZE_(1.6) - , ANCHOR_DY_SIZE_(3.9) - , ANCHOR_DZ_SIZE_(1.56) + , SENSOR_HEIGHT_(1.73f) + , ANCHOR_DX_SIZE_(1.6f) + , ANCHOR_DY_SIZE_(3.9f) + , ANCHOR_DZ_SIZE_(1.56f) , NUM_BOX_CORNERS_(4) , NUM_OUTPUT_BOX_FEATURE_(7) { @@ -232,7 +232,7 @@ void PointPillars::deviceMemoryMalloc() GPU_CHECK(cudaMalloc(&rpn_buffers_[3], RPN_DIR_OUTPUT_SIZE_ * sizeof(float))); // for scatter kernel - GPU_CHECK(cudaMalloc((void**)&dev_scattered_feature_, 64 * GRID_Y_SIZE_ * GRID_X_SIZE_ * sizeof(float))); + GPU_CHECK(cudaMalloc((void**)&dev_scattered_feature_, NUM_THREADS_ * GRID_Y_SIZE_ * GRID_X_SIZE_ * sizeof(float))); // for filter GPU_CHECK(cudaMalloc((void**)&dev_anchors_px_, NUM_ANCHOR_ * sizeof(float))); @@ -292,8 +292,8 @@ void PointPillars::generateAnchors(float* anchors_px_, float* anchors_py_, float box_anchors_max_y_[i] = 0; } - float x_stride = PILLAR_X_SIZE_ * 2; - float y_stride = PILLAR_Y_SIZE_ * 2; + float x_stride = PILLAR_X_SIZE_ * 2.0f; + float y_stride = PILLAR_Y_SIZE_ * 2.0f; float x_offset = MIN_X_RANGE_ + PILLAR_X_SIZE_; float y_offset = MIN_Y_RANGE_ + PILLAR_Y_SIZE_; @@ -373,10 +373,10 @@ void PointPillars::convertAnchors2BoxAnchors(float* anchors_px, float* anchors_p for (size_t r = 0; r < NUM_ANCHOR_R_INDS_; r++) { int ind = x * NUM_ANCHOR_Y_INDS_ * NUM_ANCHOR_R_INDS_ + y * NUM_ANCHOR_R_INDS_ + r; - box_anchors_min_x_[ind] = anchors_px[ind] - flipped_anchors_dx[ind] / 2; - box_anchors_min_y_[ind] = anchors_py[ind] - flipped_anchors_dy[ind] / 2; - box_anchors_max_x_[ind] = anchors_px[ind] + flipped_anchors_dx[ind] / 2; - box_anchors_max_y_[ind] = anchors_py[ind] + flipped_anchors_dy[ind] / 2; + box_anchors_min_x_[ind] = anchors_px[ind] - flipped_anchors_dx[ind] / 2.0f; + box_anchors_min_y_[ind] = anchors_py[ind] - flipped_anchors_dy[ind] / 2.0f; + box_anchors_max_x_[ind] = anchors_px[ind] + flipped_anchors_dx[ind] / 2.0f; + box_anchors_max_y_[ind] = anchors_py[ind] + flipped_anchors_dy[ind] / 2.0f; } } } diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp index 126ffc63a97..776ce59394a 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp @@ -35,8 +35,8 @@ PointPillarsROS::PointPillarsROS() , has_subscribed_baselink_(false) , NUM_POINT_FEATURE_(4) , OUTPUT_NUM_BOX_FEATURE_(7) - , TRAINED_SENSOR_HEIGHT_(1.73) - , NORMALIZING_INTENSITY_VALUE_(255.0) + , TRAINED_SENSOR_HEIGHT_(1.73f) + , NORMALIZING_INTENSITY_VALUE_(255.0f) , BASELINK_FRAME_("base_link") { //ros related param @@ -44,8 +44,8 @@ PointPillarsROS::PointPillarsROS() //algorithm related params private_nh_.param("reproduce_result_mode", reproduce_result_mode_, false); - private_nh_.param("score_threshold", score_threshold_, 0.5); - private_nh_.param("nms_overlap_threshold", nms_overlap_threshold_, 0.5); + private_nh_.param("score_threshold", score_threshold_, 0.5f); + private_nh_.param("nms_overlap_threshold", nms_overlap_threshold_, 0.5f); private_nh_.param("pfe_onnx_file", pfe_onnx_file_, ""); private_nh_.param("rpn_onnx_file", rpn_onnx_file_, ""); diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp index 97c84918210..3180f94d72b 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp @@ -115,8 +115,8 @@ void PreprocessPoints::preprocess(const float* in_points_array, int in_num_point // float x_offset = PILLAR_X_SIZE_/ 2 + MIN_X_RANGE_; // TODO Need to be modified after proper trining code // Will be modified in ver 1.1 - y_coors_for_sub[pillar_index] = std::floor(y_coor) * PILLAR_Y_SIZE_ + -39.9; - x_coors_for_sub[pillar_index] = std::floor(x_coor) * PILLAR_X_SIZE_ + 0.1; + y_coors_for_sub[pillar_index] = std::floor(y_coor) * PILLAR_Y_SIZE_ + -39.9f; + x_coors_for_sub[pillar_index] = std::floor(x_coor) * PILLAR_X_SIZE_ + 0.1f; // sparse pillar map sparse_pillar_map[y_coor * NUM_INDS_FOR_SCAN_ + x_coor] = 1; @@ -143,11 +143,11 @@ void PreprocessPoints::preprocess(const float* in_points_array, int in_num_point y_coors_for_sub_shaped[i * MAX_NUM_POINTS_PER_PILLAR_ + j] = y; if (j < num_points_for_a_pillar) { - pillar_feature_mask[i * MAX_NUM_POINTS_PER_PILLAR_ + j] = 1.0; + pillar_feature_mask[i * MAX_NUM_POINTS_PER_PILLAR_ + j] = 1.0f; } else { - pillar_feature_mask[i * MAX_NUM_POINTS_PER_PILLAR_ + j] = 0.0; + pillar_feature_mask[i * MAX_NUM_POINTS_PER_PILLAR_ + j] = 0.0f; } } } diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu index 89ef5131d6e..343f30ccea1 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu @@ -114,8 +114,8 @@ __global__ void make_pillar_index_kernel( // y_offset = self.vy / 2 + pc_range[1] // x_sub = coors_x.unsqueeze(1) * 0.16 + x_offset // y_sub = coors_y.unsqueeze(1) * 0.16 + y_offset - dev_x_coors_for_sub[count] = x* PILLAR_X_SIZE + 0.1; - dev_y_coors_for_sub[count] = y* PILLAR_Y_SIZE + -39.9; + dev_x_coors_for_sub[count] = x* PILLAR_X_SIZE + 0.1f; + dev_y_coors_for_sub[count] = y* PILLAR_Y_SIZE + -39.9f; dev_sparse_pillar_map[y*NUM_INDS_FOR_SCAN + x] = 1; } } From 7b9cd43487aa0699e73c4fff5b0f6d1ed656bfbb Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Thu, 7 Mar 2019 16:22:57 +0900 Subject: [PATCH 34/37] Add roslib dependency --- .../packages/lidar_point_pillars/CMakeLists.txt | 2 ++ .../lidar_detector/packages/lidar_point_pillars/package.xml | 2 ++ .../lidar_point_pillars/test/src/test_point_pillars.cpp | 4 ++++ 3 files changed, 8 insertions(+) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt index 8b0bee0e491..87d2a5728b8 100755 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt @@ -53,6 +53,7 @@ if(TRT_AVAIL AND CUDA_AVAIL AND CUDNN_AVAIL) find_package(autoware_build_flags REQUIRED) find_package(catkin REQUIRED COMPONENTS roscpp + roslib pcl_ros autoware_msgs ) @@ -62,6 +63,7 @@ if(TRT_AVAIL AND CUDA_AVAIL AND CUDNN_AVAIL) catkin_package( CATKIN_DEPENDS roscpp + roslib pcl_ros autoware_msgs ) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml index 88ed828f0a4..8c937c49787 100755 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml @@ -11,11 +11,13 @@ autoware_build_flags roscpp + roslib pcl_ros autoware_msgs roscpp + roslib pcl_ros autoware_msgs diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp index f11ec3d257f..babc6ecb35f 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp @@ -21,14 +21,18 @@ * @date 2019/02/26 */ +// headers in ROS #include #include +// headers in gtest #include +//headers in PCL #include #include +//headers in local files #include "lidar_point_pillars/point_pillars_ros.h" #include "lidar_point_pillars/preprocess_points.h" From d67f2ecfcf32e99635649a614e9b8c9dfcd81659 Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Thu, 7 Mar 2019 16:31:53 +0900 Subject: [PATCH 35/37] Fix typo --- .../include/lidar_point_pillars/nms_cuda.h | 2 +- .../include/lidar_point_pillars/postprocess_cuda.h | 6 +++--- .../include/lidar_point_pillars/preprocess_points_cuda.h | 2 +- .../lidar_point_pillars/test/src/test_point_pillars.cpp | 7 ++++--- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h index e3395227f2b..ea88b3602aa 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h @@ -41,7 +41,7 @@ class NMSCuda public: /** * @brief Constructor - * @param[in] NUM_THRESD Number of threads for launching cuda kernel + * @param[in] NUM_THREADS Number of threads for launching cuda kernel * @param[in] NUM_BOX_CORNERS Number of corners for 2D box * @param[in] nms_overlap_threshold IOU threshold for NMS * @details Captital variables never change after the compile, Non-captital variables could be chaned through rosparam diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/postprocess_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/postprocess_cuda.h index be32518be09..29de5f97141 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/postprocess_cuda.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/postprocess_cuda.h @@ -56,11 +56,11 @@ class PostprocessCuda * @param[in] NUM_ANCHOR_Y_INDS Number of y-indexes for anchors * @param[in] NUM_ANCHOR_R_INDS Number of rotation-indexes for anchors * @param[in] score_threshold Score threshold for filtering output - * @param[in] NUM_THREDS Number of threads for launching cuda kernel + * @param[in] NUM_THREADS Number of threads for launching cuda kernel * @param[in] nms_overlap_threshold IOU threshold for NMS * @param[in] NUM_BOX_CORNERS Number of box's corner - * @param[in] NUM_OUTPUT_BOX_FEATURE_ Number of output box's feature - * @details Captital variables never change after the compile, non-capital variables could be chaned through rosparam + * @param[in] NUM_OUTPUT_BOX_FEATURE Number of output box's feature + * @details Captital variables never change after the compile, non-capital variables could be changed through rosparam */ PostprocessCuda(const float FLOAT_MIN, const float FLOAT_MAX, const int NUM_ANCHOR_X_INDS, const int NUM_ANCHOR_Y_INDS, const int NUM_ANCHOR_R_INDS, const float score_threshold, diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h index cb79f0bc29c..92cc3b6eb1e 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h @@ -58,7 +58,7 @@ class PreprocessPointsCuda public: /** * @brief Constructor - * @param[in] NUM_THREDS Number of threads for launching cuda kernel + * @param[in] NUM_THREADS Number of threads for launching cuda kernel * @param[in] MAX_NUM_PILLARS Maximum number of pillars * @param[in] MAX_POINTS_PER_PILLAR Maximum number of points per pillar * @param[in] NUM_INDS_FOR_SCAN Number of indexes for scan(cumsum) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp index babc6ecb35f..70b3a4fa0ae 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp @@ -68,7 +68,8 @@ class TestClass const int NUM_INDS_FOR_SCAN_; const int NUM_BOX_CORNERS_; - void initPoints(pcl::PointCloud::Ptr in_pcl_pc_ptr); + // Make pointcloud for test + void makePointsForTest(pcl::PointCloud::Ptr in_pcl_pc_ptr); void pclToArray(const pcl::PointCloud::Ptr& in_pcl_pc_ptr, float* out_points_array); void preprocess(const float* in_points_array, int in_num_points, int* x_coors, int* y_coors, float* num_points_per_pillar, float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i, @@ -142,7 +143,7 @@ void TestClass::pclToArray(const pcl::PointCloud::Ptr& in_pcl_pc } } -void TestClass::initPoints(pcl::PointCloud::Ptr in_pcl_pc_ptr) +void TestClass::makePointsForTest(pcl::PointCloud::Ptr in_pcl_pc_ptr) { pcl::PointXYZI point; point.x = 12.9892; @@ -233,7 +234,7 @@ TEST(TestSuite, CheckPreprocessPointsCPU) NUM_BOX_CORNERS); pcl::PointCloud::Ptr pcl_pc_ptr(new pcl::PointCloud); - test_obj.initPoints(pcl_pc_ptr); + test_obj.makePointsForTest(pcl_pc_ptr); float* points_array = new float[pcl_pc_ptr->size() * 4]; test_obj.pclToArray(pcl_pc_ptr, points_array); From 816ba2d17a101db17e928e39234bc4812cda03e0 Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Fri, 8 Mar 2019 10:47:38 +0900 Subject: [PATCH 36/37] Modify comment --- .../lidar_point_pillars/anchor_mask_cuda.h | 18 +++++----- .../include/lidar_point_pillars/nms_cuda.h | 8 ++--- .../lidar_point_pillars/point_pillars.h | 32 ++++++++--------- .../lidar_point_pillars/point_pillars_ros.h | 2 +- .../lidar_point_pillars/postprocess_cuda.h | 36 +++++++++---------- .../lidar_point_pillars/preprocess_points.h | 32 ++++++++--------- .../preprocess_points_cuda.h | 20 +++++------ .../lidar_point_pillars/nodes/nms_cuda.cu | 1 - .../nodes/point_pillars.cpp | 6 ++-- .../nodes/preprocess_points.cpp | 2 -- .../sensing/drivers/lidar/packages/robosense | 2 +- 11 files changed, 78 insertions(+), 81 deletions(-) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/anchor_mask_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/anchor_mask_cuda.h index 6a8b555422c..255ac480bb5 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/anchor_mask_cuda.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/anchor_mask_cuda.h @@ -60,15 +60,15 @@ class AnchorMaskCuda /** * @brief call cuda code for making anchor mask * @param[in] dev_sparse_pillar_map Grid map representation for pillar occupancy - * @param[in] dev_cumsum_along_x Cumsum dev_sparse_pillar_map along x axis - * @param[in] dev_cumsum_along_y Cumsum dev_cumsum_along_x along y axis - * @param[in] dev_box_anchors_min_x Array for storng min x value for each anchor - * @param[in] dev_box_anchors_min_y Array for storng min y value for each anchor - * @param[in] dev_box_anchors_max_x Array for storng max x value for each anchor - * @param[in] dev_box_anchors_max_y Array for storng max y value for each anchor - * @param[in] dev_box_anchors_max_y Array for storng max y value for each anchor - * @param[out] dev_anchor_mask Anchor mask for filtering output - * @details dev_* means device memory. Make a mask for activating pillar occupancy area + * @param[in] dev_cumsum_along_x Array for storing cumsum-ed dev_sparse_pillar_map values + * @param[in] dev_cumsum_along_y Array for storing cumsum-ed dev_cumsum_along_y values + * @param[in] dev_box_anchors_min_x Array for storing min x value for each anchor + * @param[in] dev_box_anchors_min_y Array for storing min y value for each anchor + * @param[in] dev_box_anchors_max_x Array for storing max x value for each anchor + * @param[in] dev_box_anchors_max_y Array for storing max y value for each anchor + * @param[in] dev_box_anchors_max_y Array for storing max y value for each anchor + * @param[out] dev_anchor_mask Anchor mask for filtering the network output + * @details dev_* means device memory. Make a mask for filtering pillar occupancy area */ void doAnchorMaskCuda(int* dev_sparse_pillar_map, int* dev_cumsum_along_x, int* dev_cumsum_along_y, const float* dev_box_anchors_min_x, const float* dev_box_anchors_min_y, diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h index ea88b3602aa..bf4c3501ba1 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h @@ -41,7 +41,7 @@ class NMSCuda public: /** * @brief Constructor - * @param[in] NUM_THREADS Number of threads for launching cuda kernel + * @param[in] NUM_THREADS Number of threads when launching cuda kernel * @param[in] NUM_BOX_CORNERS Number of corners for 2D box * @param[in] nms_overlap_threshold IOU threshold for NMS * @details Captital variables never change after the compile, Non-captital variables could be chaned through rosparam @@ -52,9 +52,9 @@ class NMSCuda * @brief GPU Non-Maximum Suppresion for network output * @param[in] host_filter_count Number of filtered output * @param[in] dev_sorted_box_for_nms Bounding box output sorted by score - * @param[out] out_keep_inds Indexes for selected bounding box - * @param[out] out_num_to_keep Number of keep bounding box - * @details Includes CUDA NMS and postprocessing for selecting box in CPU + * @param[out] out_keep_inds Indexes of selected bounding box + * @param[out] out_num_to_keep Number of kept bounding boxes + * @details NMS in GPU and postprocessing for selecting box in CPU */ void doNMSCuda(const int host_filter_count, float* dev_sorted_box_for_nms, int* out_keep_inds, int& out_num_to_keep); }; diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars.h index ea9a0549201..249a8da4245 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars.h @@ -205,31 +205,31 @@ class PointPillars /** * @brief Memory allocation for device memory - * @details Call in the constructor + * @details Called in the constructor */ void deviceMemoryMalloc(); /** - * @brief Initialize anchor - * @details Call in the constructor + * @brief Initializing anchor + * @details Called in the constructor */ void initAnchors(); /** - * @brief Initialize TensorRT isntances - * @details Call in the constructor + * @brief Initializing TensorRT instances + * @details Called in the constructor */ void initTRT(); /** * @brief Generate anchors - * @param[in] anchors_px_ Represents x-coordinate value for corresponding anchor - * @param[in] anchors_py_ Represents y-coordinate value for corresponding anchor - * @param[in] anchors_pz_ Represents z-coordinate value for corresponding anchor - * @param[in] anchors_dx_ Represents x-dimension value for corresponding anchor - * @param[in] anchors_dy_ Represents y-dimension value for corresponding anchor - * @param[in] anchors_dz_ Represents z-dimension value for corresponding anchor - * @param[in] anchors_ro_ Represents rotation value for corresponding anchor + * @param[in] anchors_px_ Represents x-coordinate values for a corresponding anchor + * @param[in] anchors_py_ Represents y-coordinate values for a corresponding anchor + * @param[in] anchors_pz_ Represents z-coordinate values for a corresponding anchor + * @param[in] anchors_dx_ Represents x-dimension values for a corresponding anchor + * @param[in] anchors_dy_ Represents y-dimension values for a corresponding anchor + * @param[in] anchors_dz_ Represents z-dimension values for a corresponding anchor + * @param[in] anchors_ro_ Represents rotation values for a corresponding anchor * @details Generate anchors for each grid */ void generateAnchors(float* anchors_px_, float* anchors_py_, float* anchors_pz_, float* anchors_dx_, @@ -255,7 +255,7 @@ class PointPillars * @brief Preproces by CPU * @param[in] in_points_array pointcloud array * @param[in] in_num_points Number of points - * @details The output from CPU preprocess is reproducible + * @details The output from preprocessCPU is reproducible, while preprocessGPU is not */ void preprocessCPU(const float* in_points_array, const int in_num_points); @@ -263,7 +263,7 @@ class PointPillars * @brief Preproces by GPU * @param[in] in_points_array pointcloud array * @param[in] in_num_points Number of points - * @details Fast preprocess comapared with CPU preprocess + * @details Faster preprocess comapared with CPU preprocess */ void preprocessGPU(const float* in_points_array, const int in_num_points); @@ -305,10 +305,10 @@ class PointPillars /** * @brief Call PointPillars for the inference - * @param[in] in_points_array pointcloud array + * @param[in] in_points_array Pointcloud array * @param[in] in_num_points Number of points * @param[in] out_detections Network output bounding box - * @details This is interface for the algorithm + * @details This is an interface for the algorithm */ void doInference(const float* in_points_array, const int in_num_points, std::vector& out_detections); }; diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars_ros.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars_ros.h index b2c18c4a36b..7484b10c36c 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars_ros.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars_ros.h @@ -78,7 +78,7 @@ class PointPillarsROS /** * @brief Get base_link to lidar transformation - * @param[in] target_frameid target lidar frame_id + * @param[in] target_frameid Name of lidar frame_id to be targeted * @details Get transformation info */ void getBaselinkToLidarTF(const std::string& target_frameid); diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/postprocess_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/postprocess_cuda.h index 29de5f97141..61f2985562f 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/postprocess_cuda.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/postprocess_cuda.h @@ -50,13 +50,13 @@ class PostprocessCuda public: /** * @brief Constructor - * @param[in] FLOAT_MIN the lowest float value - * @param[in] FLOAT_MAX the maximum float value + * @param[in] FLOAT_MIN The lowest float value + * @param[in] FLOAT_MAX The maximum float value * @param[in] NUM_ANCHOR_X_INDS Number of x-indexes for anchors * @param[in] NUM_ANCHOR_Y_INDS Number of y-indexes for anchors * @param[in] NUM_ANCHOR_R_INDS Number of rotation-indexes for anchors * @param[in] score_threshold Score threshold for filtering output - * @param[in] NUM_THREADS Number of threads for launching cuda kernel + * @param[in] NUM_THREADS Number of threads when launching cuda kernel * @param[in] nms_overlap_threshold IOU threshold for NMS * @param[in] NUM_BOX_CORNERS Number of box's corner * @param[in] NUM_OUTPUT_BOX_FEATURE Number of output box's feature @@ -69,21 +69,21 @@ class PostprocessCuda /** * @brief Postprocessing for the network output - * @param[in] rpn_box_output Box prediction from the network output - * @param[in] rpn_cls_output Class prediction from the network output - * @param[in] rpn_dir_output Direction prediction from the network output - * @param[in] dev_anchor_mask Anchor mask for filtering - * @param[in] dev_anchors_px X-coordinate value for corresponding anchor - * @param[in] dev_anchors_py Y-coordinate value for corresponding anchor - * @param[in] dev_anchors_pz Z-coordinate value for corresponding anchor - * @param[in] dev_anchors_dx X-dimension value for corresponding anchor - * @param[in] dev_anchors_dy Y-dimension value for corresponding anchor - * @param[in] dev_anchors_dz Z-dimension value for corresponding anchor - * @param[in] dev_anchors_ro Rotation value for corresponding anchor - * @param[in] dev_filtered_box Filtered box prediction - * @param[in] dev_filtered_score Filtered score prediction - * @param[in] dev_filtered_dir Filtered direction prediction - * @param[in] dev_box_for_nms Decoded box from pose and dimension to min_x min_y max_x max_y represenation for nms + * @param[in] rpn_box_output Box predictions from the network output + * @param[in] rpn_cls_output Class predictions from the network output + * @param[in] rpn_dir_output Direction predictions from the network output + * @param[in] dev_anchor_mask Anchor mask for filtering the network output + * @param[in] dev_anchors_px X-coordinate values for corresponding anchors + * @param[in] dev_anchors_py Y-coordinate values for corresponding anchors + * @param[in] dev_anchors_pz Z-coordinate values for corresponding anchors + * @param[in] dev_anchors_dx X-dimension values for corresponding anchors + * @param[in] dev_anchors_dy Y-dimension values for corresponding anchors + * @param[in] dev_anchors_dz Z-dimension values for corresponding anchors + * @param[in] dev_anchors_ro Rotation values for corresponding anchors + * @param[in] dev_filtered_box Filtered box predictions + * @param[in] dev_filtered_score Filtered score predictions + * @param[in] dev_filtered_dir Filtered direction predictions + * @param[in] dev_box_for_nms Decoded boxes in min_x min_y max_x max_y represenation from pose and dimension * @param[in] dev_filter_count The number of filtered output * @param[out] out_detection Output bounding boxes * @details dev_* represents device memory allocated variables diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points.h index b8b361db22e..b09aea1e42e 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points.h @@ -72,14 +72,14 @@ class PreprocessPoints * @param[in] x_coors X-coordinate indexes for corresponding pillars * @param[in] y_coors Y-coordinate indexes for corresponding pillars * @param[in] num_points_per_pillar Number of points in corresponding pillars - * @param[in] pillar_x X-coordinate value for points in each pillar - * @param[in] pillar_y Y-coordinate value for points in each pillar - * @param[in] pillar_z Z-coordinate value for points in each pillar - * @param[in] pillar_i Intensity value for points in each pillar - * @param[in] x_coors_for_sub_shaped Used for x substraction in the network - * @param[in] y_coors_for_sub_shaped Used for y substraction in the network - * @param[in] pillar_feature_mask Mask to make sure pillars' feature equal to zero where no points in the pillars - * @param[in] sparse_pillar_map Grid map represents occupancy pillars + * @param[in] pillar_x X-coordinate values for points in each pillar + * @param[in] pillar_y Y-coordinate values for points in each pillar + * @param[in] pillar_z Z-coordinate values for points in each pillar + * @param[in] pillar_i Intensity values for points in each pillar + * @param[in] x_coors_for_sub_shaped Array for x substraction in the network + * @param[in] y_coors_for_sub_shaped Array for y substraction in the network + * @param[in] pillar_feature_mask Mask to make pillars' feature zero where no points in the pillars + * @param[in] sparse_pillar_map Grid map representation for pillar-occupancy * @param[in] host_pillar_count The numnber of valid pillars for the input pointcloud * @details Convert pointcloud to pillar representation */ @@ -90,14 +90,14 @@ class PreprocessPoints /** * @brief Initializing variables for preprocessing - * @param[in] coor_to_pillaridx Map for converting from one set of coordinate to a pillar - * @param[in] sparse_pillar_map Grid map represents occupancy pillars - * @param[in] pillar_x X-coordinate value for points in each pillar - * @param[in] pillar_y Y-coordinate value for points in each pillar - * @param[in] pillar_z Z-coordinate value for points in each pillar - * @param[in] pillar_i Intensity value for points in each pillar - * @param[in] x_coors_for_sub_shaped Used for x substraction in the network - * @param[in] y_coors_for_sub_shaped Used for y substraction in the network + * @param[in] coor_to_pillaridx Map for converting one set of coordinate to a pillar + * @param[in] sparse_pillar_map Grid map representation for pillar-occupancy + * @param[in] pillar_x X-coordinate values for points in each pillar + * @param[in] pillar_y Y-coordinate values for points in each pillar + * @param[in] pillar_z Z-coordinate values for points in each pillar + * @param[in] pillar_i Intensity values for points in each pillar + * @param[in] x_coors_for_sub_shaped Array for x substraction in the network + * @param[in] y_coors_for_sub_shaped Array for y substraction in the network * @details Initializeing input arguments with certain values */ void initializeVariables(int* coor_to_pillaridx, float* sparse_pillar_map, float* pillar_x, float* pillar_y, diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h index 92cc3b6eb1e..67df868e3f9 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h @@ -58,7 +58,7 @@ class PreprocessPointsCuda public: /** * @brief Constructor - * @param[in] NUM_THREADS Number of threads for launching cuda kernel + * @param[in] NUM_THREADS Number of threads when launching cuda kernel * @param[in] MAX_NUM_PILLARS Maximum number of pillars * @param[in] MAX_POINTS_PER_PILLAR Maximum number of points per pillar * @param[in] NUM_INDS_FOR_SCAN Number of indexes for scan(cumsum) @@ -87,15 +87,15 @@ class PreprocessPointsCuda * @param[in] dev_x_coors X-coordinate indexes for corresponding pillars * @param[in] dev_y_coors Y-coordinate indexes for corresponding pillars * @param[in] dev_num_points_per_pillar Number of points in corresponding pillars - * @param[in] dev_pillar_x X-coordinate value for points in each pillar - * @param[in] dev_pillar_y Y-coordinate value for points in each pillar - * @param[in] dev_pillar_z Z-coordinate value for points in each pillar - * @param[in] dev_pillar_i Intensity value for points in each pillar - * @param[in] dev_x_coors_for_sub_shaped Used for x substraction in the network - * @param[in] dev_y_coors_for_sub_shaped Used for y substraction in the network - * @param[in] dev_pillar_feature_mask Mask to make sure pillars' feature equal to zero where no points in the pillars - * @param[in] dev_sparse_pillar_map Grid map represents occupancy pillars - * @param[in] host_pillar_count The numnber of valid pillars for the input pointcloud + * @param[in] dev_pillar_x X-coordinate values for points in each pillar + * @param[in] dev_pillar_y Y-coordinate values for points in each pillar + * @param[in] dev_pillar_z Z-coordinate values for points in each pillar + * @param[in] dev_pillar_i Intensity values for points in each pillar + * @param[in] dev_x_coors_for_sub_shaped Array for x substraction in the network + * @param[in] dev_y_coors_for_sub_shaped Array for y substraction in the network + * @param[in] dev_pillar_feature_mask Mask to make pillars' feature zero where no points in the pillars + * @param[in] dev_sparse_pillar_map Grid map representation for pillar-occupancy + * @param[in] host_pillar_count The numnber of valid pillars for an input pointcloud * @details Convert pointcloud to pillar representation */ void doPreprocessPointsCuda(const float* dev_points, const int in_num_points, int* dev_x_coors, int* dev_y_coors, diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu index 3f52973e810..4bb01ecac3d 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu @@ -27,7 +27,6 @@ __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, const int row_start = blockIdx.y; const int col_start = blockIdx.x; - // if (row_start > col_start) return; const int block_threads = blockDim.x; const int row_size = diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp index 72d30f772e8..52fa314bf85 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp @@ -55,7 +55,7 @@ PointPillars::PointPillars(const bool reproduce_result_mode, const float score_t , MAX_Z_RANGE_(1) , BATCH_SIZE_(1) , NUM_INDS_FOR_SCAN_(512) - , NUM_THREADS_(64) // if you change NUM_THREADS_, need to modify nms_kernel's shared memory size + , NUM_THREADS_(64) // if you change NUM_THREADS_, need to modify NUM_THREADS_MACRO in common.h , SENSOR_HEIGHT_(1.73f) , ANCHOR_DX_SIZE_(1.6f) , ANCHOR_DY_SIZE_(3.9f) @@ -263,7 +263,7 @@ void PointPillars::initAnchors() box_anchors_min_y_ = new float[NUM_ANCHOR_]; box_anchors_max_x_ = new float[NUM_ANCHOR_]; box_anchors_max_y_ = new float[NUM_ANCHOR_]; - // deallocate these memory in deconstructor + // deallocate these memories in deconstructor generateAnchors(anchors_px_, anchors_py_, anchors_pz_, anchors_dx_, anchors_dy_, anchors_dz_, anchors_ro_); @@ -352,7 +352,7 @@ void PointPillars::convertAnchors2BoxAnchors(float* anchors_px, float* anchors_p float* box_anchors_min_x_, float* box_anchors_min_y_, float* box_anchors_max_x_, float* box_anchors_max_y_) { - // flip box's dimension when the third axis == 1 + // flipping box's dimension float flipped_anchors_dx[NUM_ANCHOR_] = { 0 }; float flipped_anchors_dy[NUM_ANCHOR_] = { 0 }; for (size_t x = 0; x < NUM_ANCHOR_X_INDS_; x++) diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp index 3180f94d72b..82b6fd46d68 100644 --- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp +++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp @@ -107,7 +107,6 @@ void PreprocessPoints::preprocess(const float* in_points_array, int in_num_point pillar_count += 1; coor_to_pillaridx[y_coor * GRID_X_SIZE_ + x_coor] = pillar_index; - // reverse index y_coors[pillar_index] = std::floor(y_coor); x_coors[pillar_index] = std::floor(x_coor); @@ -118,7 +117,6 @@ void PreprocessPoints::preprocess(const float* in_points_array, int in_num_point y_coors_for_sub[pillar_index] = std::floor(y_coor) * PILLAR_Y_SIZE_ + -39.9f; x_coors_for_sub[pillar_index] = std::floor(x_coor) * PILLAR_X_SIZE_ + 0.1f; - // sparse pillar map sparse_pillar_map[y_coor * NUM_INDS_FOR_SCAN_ + x_coor] = 1; } int num = num_points_per_pillar[pillar_index]; diff --git a/ros/src/sensing/drivers/lidar/packages/robosense b/ros/src/sensing/drivers/lidar/packages/robosense index 4a284cc1ba5..038e5f65243 160000 --- a/ros/src/sensing/drivers/lidar/packages/robosense +++ b/ros/src/sensing/drivers/lidar/packages/robosense @@ -1 +1 @@ -Subproject commit 4a284cc1ba595a7ad0679c2e5978975189a33d51 +Subproject commit 038e5f6524305d0d287805fdc9029872627ac7de From 4b6b0ac618e583d579afb415d563ce19d0d6824b Mon Sep 17 00:00:00 2001 From: Kosuke Murakami Date: Fri, 8 Mar 2019 11:20:29 +0900 Subject: [PATCH 37/37] revert robosense submodule --- ros/src/sensing/drivers/lidar/packages/robosense | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ros/src/sensing/drivers/lidar/packages/robosense b/ros/src/sensing/drivers/lidar/packages/robosense index 038e5f65243..4a284cc1ba5 160000 --- a/ros/src/sensing/drivers/lidar/packages/robosense +++ b/ros/src/sensing/drivers/lidar/packages/robosense @@ -1 +1 @@ -Subproject commit 038e5f6524305d0d287805fdc9029872627ac7de +Subproject commit 4a284cc1ba595a7ad0679c2e5978975189a33d51