From 1b5a90bac811cf27df9cea9611195f2776a5b055 Mon Sep 17 00:00:00 2001
From: cirpue49 <cirpue49@yahoo.co.jp>
Date: Tue, 26 Feb 2019 15:42:25 +0900
Subject: [PATCH 01/37] Add PointPillars

---
 .../lidar_point_pillars/CMakeLists.txt        | 119 ++++
 .../include/anchor_mask_cuda.h                |  84 +++
 .../lidar_point_pillars/include/common.h      |  45 ++
 .../lidar_point_pillars/include/nms_cuda.h    |  60 ++
 .../include/point_pillars.h                   | 290 ++++++++
 .../include/point_pillars_ros.h               | 141 ++++
 .../include/postprocess_cuda.h                |  94 +++
 .../include/preprocess_points.h               | 118 ++++
 .../include/preprocess_points_cuda.h          | 116 ++++
 .../include/scatter_cuda.h                    |  58 ++
 .../launch/lidar_point_pillars.launch         |  24 +
 .../nodes/anchor_mask_cuda.cu                 | 203 ++++++
 .../nodes/lidar_point_pillars_node.cpp        |  31 +
 .../lidar_point_pillars/nodes/nms_cuda.cu     | 110 ++++
 .../nodes/point_pillars.cpp                   | 621 ++++++++++++++++++
 .../nodes/point_pillars_ros.cpp               | 181 +++++
 .../nodes/postprocess_cuda.cu                 | 219 ++++++
 .../nodes/preprocess_points.cpp               | 165 +++++
 .../nodes/preprocess_points_cuda.cu           | 313 +++++++++
 .../lidar_point_pillars/nodes/scatter_cuda.cu |  43 ++
 .../packages/lidar_point_pillars/package.xml  |  24 +
 .../test/src/test_point_pillars.cpp           | 229 +++++++
 .../runtime_manager/scripts/computing.yaml    |  68 +-
 23 files changed, 3355 insertions(+), 1 deletion(-)
 create mode 100755 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
 create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/anchor_mask_cuda.h
 create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/common.h
 create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/nms_cuda.h
 create mode 100755 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
 create mode 100755 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h
 create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h
 create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h
 create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points_cuda.h
 create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/scatter_cuda.h
 create mode 100755 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/launch/lidar_point_pillars.launch
 create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu
 create mode 100755 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp
 create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu
 create mode 100755 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
 create mode 100755 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
 create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu
 create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp
 create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu
 create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/scatter_cuda.cu
 create mode 100755 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml
 create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
new file mode 100755
index 00000000000..fad7f1a04c6
--- /dev/null
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
@@ -0,0 +1,119 @@
+cmake_minimum_required(VERSION 2.8.3)
+project(lidar_point_pillars)
+
+# set flags for CUDA and TensorRT availability
+option(CUDA_AVAIL "CUDA available" OFF)
+find_package(CUDA)
+if (CUDA_FOUND)
+  message("CUDA is available!")
+  message("CUDA Libs: ${CUDA_LIBRARIES}")
+  message("CUDA Headers: ${CUDA_INCLUDE_DIRS}")
+  set(CUDA_AVAIL ON)
+else()
+  message("CUDA NOT FOUND")
+  set(CUDA_AVAIL OFF)
+endif (CUDA_FOUND)
+
+# try to find the tensorRT modules
+option(TRT_AVAIL "TensorRT available" OFF)
+find_library(NVINFER NAMES nvinfer)
+find_library(NVPARSERS NAMES nvparsers)
+if(NVINFER AND NVPARSERS)
+  message("TensorRT is available!")
+  message("NVINFER: ${NVINFER}")
+  message("NVPARSERS: ${NVPARSERS}")
+  set(TRT_AVAIL ON)
+  add_definitions(-DTRT_AVAIL)
+else()
+  message("TensorRT is NOT Available")
+  set(TRT_AVAIL OFF)
+endif()
+
+if(TRT_AVAIL AND CUDA_AVAIL)
+
+  find_package(autoware_build_flags REQUIRED)
+  find_package(catkin REQUIRED COMPONENTS
+   roscpp
+   pcl_ros
+   autoware_msgs
+   )
+  set(CMAKE_CXX_STANDARD 11)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+
+  catkin_package(
+   CATKIN_DEPENDS
+   roscpp
+   pcl_ros
+   autoware_msgs
+   )
+
+  include_directories(
+   include
+   ${catkin_INCLUDE_DIRS}
+  )
+  set(SOURCE_FILES
+   nodes/lidar_point_pillars_node.cpp
+   nodes/point_pillars_ros.cpp
+   )
+
+  add_executable(lidar_point_pillars
+         ${SOURCE_FILES})
+
+  add_dependencies(lidar_point_pillars
+         ${catkin_EXPORTED_TARGETS}
+         )
+
+
+  cuda_add_library(gpu_point_pillars_lib
+         nodes/preprocess_points_cuda.cu
+         nodes/anchor_mask_cuda.cu
+         nodes/scatter_cuda.cu
+         nodes/postprocess_cuda.cu
+         nodes/nms_cuda.cu
+         )
+
+  target_link_libraries(gpu_point_pillars_lib
+   ${CUDA_LIBRARIES}
+  )
+
+  add_library(point_pillars_lib
+         nodes/point_pillars.cpp
+         nodes/preprocess_points.cpp
+         )
+  target_link_libraries(point_pillars_lib
+         ${catkin_LIBRARIES}
+         nvinfer
+         nvonnxparser
+         gpu_point_pillars_lib
+         )
+
+  target_link_libraries(lidar_point_pillars
+         ${catkin_LIBRARIES}
+         point_pillars_lib
+         )
+
+
+  install(TARGETS
+         lidar_point_pillars
+         ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
+         LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
+         RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION}
+  )
+
+  install(DIRECTORY launch/
+         DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/launch
+         PATTERN ".svn" EXCLUDE)
+
+  install(DIRECTORY include/
+         DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
+         PATTERN ".svn" EXCLUDE
+         )
+
+  if (CATKIN_ENABLE_TESTING)
+    find_package(rostest REQUIRED)
+    catkin_add_gtest(test-point_pillars test/src/test_point_pillars.cpp)
+    target_link_libraries(test-point_pillars ${catkin_LIBRARIES} point_pillars_lib)
+  endif()
+ELSE()
+  message("PointPillars won't be built, CUDA and/or TensorRT were not found.")
+ENDIF ()
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/anchor_mask_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/anchor_mask_cuda.h
new file mode 100644
index 00000000000..ccca5cb6b6d
--- /dev/null
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/anchor_mask_cuda.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2018-2019 Autoware Foundation. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /**
+ * @file anchor_mask_cuda.h
+ * @brief Make anchor mask for filtering output
+ * @author Kosuke Murakami
+ * @date 2019/02/26
+ */
+
+#ifndef   ANCHOR_MASK_CUDA_H
+#define   ANCHOR_MASK_CUDA_H
+
+class AnchorMaskCuda
+{
+private:
+  const int NUM_INDS_FOR_SCAN_;
+  const int NUM_ANCHOR_X_INDS_;
+  const int NUM_ANCHOR_Y_INDS_;
+  const int NUM_ANCHOR_R_INDS_;
+  const float MIN_X_RANGE_;
+  const float MIN_Y_RANGE_;
+  const float PILLAR_X_SIZE_;
+  const float PILLAR_Y_SIZE_;
+  const int GRID_X_SIZE_;
+  const int GRID_Y_SIZE_;
+public:
+  /**
+  * @brief Constructor
+  * @param[in] NUM_INDS_FOR_SCAN Number of indexes for scan(cumsum)
+  * @param[in] NUM_ANCHOR_X_INDS Number of x-indexes for anchors
+  * @param[in] NUM_ANCHOR_Y_INDS Number of y-indexes for anchors
+  * @param[in] NUM_ANCHOR_R_INDS Number of rotation-indexes for anchors
+  * @param[in] MIN_X_RANGE Minimum x value for pointcloud
+  * @param[in] MIN_Y_RANGE Minimum y value for pointcloud
+  * @param[in] PILLAR_X_SIZE Size of x-dimension for a pillar
+  * @param[in] PILLAR_Y_SIZE Size of y-dimension for a pillar
+  * @param[in] GRID_X_SIZE Number of pillars in x-coordinate
+  * @param[in] GRID_Y_SIZE Number of pillars in y-coordinate
+  * @details Captital variables never change after the compile
+  */
+  AnchorMaskCuda(const int NUM_INDS_FOR_SCAN,
+                 const int NUM_ANCHOR_X_INDS,
+                 const int NUM_ANCHOR_Y_INDS,
+                 const int NUM_ANCHOR_R_INDS,
+                 const float MIN_X_RANGE,
+                 const float MIN_Y_RANGE,
+                 const float PILLAR_X_SIZE,
+                 const float PILLAR_Y_SIZE,
+                 const int GRID_X_SIZE,
+                 const int GRID_Y_SIZE);
+
+  /**
+  * @brief call cuda code for making anchor mask
+  * @param[in] dev_sparse_pillar_map Grid map representation for pillar occupancy
+  * @param[in] dev_cumsum_along_x Cumsum dev_sparse_pillar_map along x axis
+  * @param[in] dev_cumsum_along_y Cumsum dev_cumsum_along_x along y axis
+  * @param[in] dev_box_anchors_min_x Array for storng min x value for each anchor
+  * @param[in] dev_box_anchors_min_y Array for storng min y value for each anchor
+  * @param[in] dev_box_anchors_max_x Array for storng max x value for each anchor
+  * @param[in] dev_box_anchors_max_y Array for storng max y value for each anchor
+  * @param[in] dev_box_anchors_max_y Array for storng max y value for each anchor
+  * @param[out] dev_anchor_mask Anchor mask for filtering output
+  * @details dev_* means device memory. Make a mask for activating pillar occupancy area
+  */
+  void doAnchorMaskCuda(int* dev_sparse_pillar_map, int* dev_cumsum_along_x, int* dev_cumsum_along_y,
+    const float* dev_box_anchors_min_x, const float* dev_box_anchors_min_y,
+    const float* dev_box_anchors_max_x, const float* dev_box_anchors_max_y, int* dev_anchor_mask);
+};
+
+#endif  // ANCHOR_MASK_CUDA_H
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/common.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/common.h
new file mode 100644
index 00000000000..7cf02333b4d
--- /dev/null
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/common.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2018-2019 Autoware Foundation. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /**
+ * @file common.h
+ * @brief MACRO for CUDA codes
+ * @author Kosuke Murakami
+ * @date 2019/02/26
+ */
+
+#ifndef   COMMON_H
+#define   COMMON_H
+
+//headers in STL
+#include <stdio.h>
+
+//headers in CUDA
+#include <cuda_runtime_api.h>
+
+#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
+
+#define GPU_CHECK(ans) { GPUAssert((ans), __FILE__, __LINE__); }
+inline void GPUAssert(cudaError_t code, const char *file, int line, bool abort=true)
+{
+   if (code != cudaSuccess)
+   {
+      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
+      if (abort) exit(code);
+   }
+}
+
+#endif  // COMMON_H
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/nms_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/nms_cuda.h
new file mode 100644
index 00000000000..7a34abf498b
--- /dev/null
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/nms_cuda.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2018-2019 Autoware Foundation. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+* @file nms_cuda.h
+* @brief Non-maximum suppresion for network output
+* @author Modified by Kosuke Murakami
+* @date 2019/02/26
+*/
+
+#ifndef   NMS_CUDA_H
+#define   NMS_CUDA_H
+
+//heders in STL
+#include <vector>
+#include <iostream>
+
+//headers in local files
+#include "common.h"
+
+class NMSCuda
+{
+private:
+  const int NUM_THREADS_;
+  const float nms_overlap_threshold_;
+
+public:
+  /**
+  * @brief Constructor
+  * @param[in] NUM_THRESD Number of threads for launching cuda kernel
+  * @param[in] nms_overlap_threshold IOU threshold for NMS
+  * @details Captital variables never change after the compile, Non-captital variables could be chaned through rosparam
+  */
+  NMSCuda(const int NUM_THREADS, const float nms_overlap_threshold);
+
+  /**
+  * @brief GPU Non-Maximum Suppresion for network output
+  * @param[in] host_filter_count Number of filtered output
+  * @param[in] dev_sorted_box_for_nms Bounding box output sorted by score
+  * @param[out] out_keep_inds Indexes for selected bounding box
+  * @param[out] out_num_to_keep Number of keep bounding box
+  * @details Includes CUDA NMS and postprocessing for selecting box in CPU
+  */
+  void doNMSCuda(const int host_filter_count, float* dev_sorted_box_for_nms, int* out_keep_inds,  int& out_num_to_keep);
+};
+
+#endif  // NMS_CUDA_H
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
new file mode 100755
index 00000000000..c58fc5e58d7
--- /dev/null
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
@@ -0,0 +1,290 @@
+/*
+ * Copyright 2018-2019 Autoware Foundation. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /**
+ * @file point_pillars.h
+ * @brief Algorithm for PointPillars
+ * @author Kosuke Murakami
+ * @date 2019/02/26
+ */
+
+
+#ifndef POINTS_PILLAR_H
+#define POINTS_PILLAR_H
+
+//headers in STL
+#include <vector>
+#include <iomanip>
+#include <string>
+#include <map>
+#include <cmath>
+#include <algorithm>
+#include <limits>
+
+//headers in TensorRT
+#include "NvInfer.h"
+#include "NvOnnxParser.h"
+
+//headers in local files
+#include "common.h"
+#include "preprocess_points.h"
+#include "preprocess_points_cuda.h"
+#include "anchor_mask_cuda.h"
+#include "scatter_cuda.h"
+#include "postprocess_cuda.h"
+
+
+// Logger for TensorRT info/warning/errors
+class Logger : public nvinfer1::ILogger
+{
+public:
+    Logger(Severity severity = Severity::kWARNING)
+        : reportableSeverity(severity)
+    {
+    }
+
+    void log(Severity severity, const char* msg) override
+    {
+        // suppress messages with severity enum value greater than the reportable
+        if (severity > reportableSeverity)
+            return;
+
+        switch (severity)
+        {
+        case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break;
+        case Severity::kERROR: std::cerr << "ERROR: "; break;
+        case Severity::kWARNING: std::cerr << "WARNING: "; break;
+        case Severity::kINFO: std::cerr << "INFO: "; break;
+        default: std::cerr << "UNKNOWN: "; break;
+        }
+        std::cerr << msg << std::endl;
+    }
+
+    Severity reportableSeverity;
+};
+
+class PointPillars
+{
+private:
+  //initize in initializer list
+  const bool reproduce_result_mode_;
+  const float score_threshold_;
+  const float nms_overlap_threshold_;
+  const std::string pfe_onnx_file_;
+  const std::string rpn_onnx_file_;
+  const int MAX_NUM_PILLARS_;
+  const int MAX_NUM_POINTS_PER_PILLAR_;
+  const int PFE_OUTPUT_SIZE_;
+  const int GRID_X_SIZE_;
+  const int GRID_Y_SIZE_;
+  const int GRID_Z_SIZE_;
+  const int RPN_INPUT_SIZE_;
+  const int NUM_ANCHOR_X_INDS_;
+  const int NUM_ANCHOR_Y_INDS_;
+  const int NUM_ANCHOR_R_INDS_;
+  const int NUM_ANCHOR_;
+  const int RPN_BOX_OUTPUT_SIZE_;
+  const int RPN_CLS_OUTPUT_SIZE_;
+  const int RPN_DIR_OUTPUT_SIZE_;
+  const float PILLAR_X_SIZE_;
+  const float PILLAR_Y_SIZE_;
+  const float PILLAR_Z_SIZE_;
+  const float MIN_X_RANGE_;
+  const float MIN_Y_RANGE_;
+  const float MIN_Z_RANGE_;
+  const float MAX_X_RANGE_;
+  const float MAX_Y_RANGE_;
+  const float MAX_Z_RANGE_;
+  const int BATCH_SIZE_;
+  const int NUM_INDS_FOR_SCAN_;
+  const int NUM_THREADS_;
+  const float SENSOR_HEIGHT_;
+  const float ANCHOR_DX_SIZE_;
+  const float ANCHOR_DY_SIZE_;
+  const float ANCHOR_DZ_SIZE_;
+  //end initializer list
+
+
+  int host_pillar_count_[1];
+
+  float* anchors_px_;
+  float* anchors_py_;
+  float* anchors_pz_;
+  float* anchors_dx_;
+  float* anchors_dy_;
+  float* anchors_dz_;
+  float* anchors_ro_;
+
+  float* box_anchors_min_x_;
+  float* box_anchors_min_y_;
+  float* box_anchors_max_x_;
+  float* box_anchors_max_y_;
+
+  // cuda malloc
+  float* dev_pillar_x_in_coors_;
+  float* dev_pillar_y_in_coors_;
+  float* dev_pillar_z_in_coors_;
+  float* dev_pillar_i_in_coors_;
+  int* dev_pillar_count_histo_;
+
+  int* dev_x_coors_;
+  int* dev_y_coors_;
+  float* dev_num_points_per_pillar_;
+  int* dev_sparse_pillar_map_;
+  int* dev_cumsum_along_x_;
+  int* dev_cumsum_along_y_;
+
+  float* dev_pillar_x_;
+  float* dev_pillar_y_;
+  float* dev_pillar_z_;
+  float* dev_pillar_i_;
+
+  float* dev_x_coors_for_sub_shaped_;
+  float* dev_y_coors_for_sub_shaped_;
+  float* dev_pillar_feature_mask_;
+
+  float* dev_box_anchors_min_x_;
+  float* dev_box_anchors_min_y_;
+  float* dev_box_anchors_max_x_;
+  float* dev_box_anchors_max_y_;
+  int* dev_anchor_mask_;
+
+  void* pfe_buffers_[9];
+  void* rpn_buffers_[4];
+
+  float* dev_scattered_feature_;
+
+  float* dev_anchors_px_;
+  float* dev_anchors_py_;
+  float* dev_anchors_pz_;
+  float* dev_anchors_dx_;
+  float* dev_anchors_dy_;
+  float* dev_anchors_dz_;
+  float* dev_anchors_ro_;
+  float* dev_filtered_box_;
+  float* dev_filtered_score_;
+  int* dev_filtered_dir_;
+  float* dev_box_for_nms_;
+  int* dev_filter_count_;
+
+
+  std::unique_ptr<PreprocessPoints> preprocess_points_ptr_;
+  std::unique_ptr<PreprocessPointsCuda> preprocess_points_cuda_ptr_;
+  std::unique_ptr<AnchorMaskCuda> anchor_mask_cuda_ptr_;
+  std::unique_ptr<ScatterCuda> scatter_cuda_ptr_;
+  std::unique_ptr<PostprocessCuda> postprocess_cuda_ptr_;
+
+  Logger g_logger_;
+  nvinfer1::IExecutionContext* pfe_context_;
+  nvinfer1::IExecutionContext* rpn_context_;
+  nvinfer1::IRuntime* pfe_runtime_;
+  nvinfer1::IRuntime* rpn_runtime_;
+  nvinfer1::ICudaEngine* pfe_engine_;
+  nvinfer1::ICudaEngine* rpn_engine_;
+
+  /**
+  * @brief Memory allocation for device memory
+  * @details Call in the constructor
+  */
+  void deviceMemoryMalloc();
+
+  /**
+  * @brief Initialize anchor
+  * @details Call in the constructor
+  */
+  void initAnchors();
+
+  /**
+  * @brief Initialize TensorRT isntances
+  * @details Call in the constructor
+  */
+  void initTRT();
+
+  /**
+  * @brief Convert ONNX to TensorRT model
+  * @param[in] model_file ONNX model file path
+  * @param[out] trt_model_stream TensorRT model made out of ONNX model
+  * @details Load ONNX model, and convert it to TensorRT model
+  */
+  void onnxToTRTModel(const std::string& model_file, nvinfer1::IHostMemory*& trt_model_stream);
+
+  /**
+  * @brief Preproces points
+  * @param[in] in_points_array pointcloud array
+  * @param[in] in_num_points Number of points
+  * @details Call CPU or GPU preprocess
+  */
+  void preprocess(const float* in_points_array, const int in_num_points);
+
+  /**
+  * @brief Preproces by CPU
+  * @param[in] in_points_array pointcloud array
+  * @param[in] in_num_points Number of points
+  * @details The output from CPU preprocess is reproducible
+  */
+  void preprocessCPU(const float* in_points_array, const int in_num_points);
+
+  /**
+  * @brief Preproces by GPU
+  * @param[in] in_points_array pointcloud array
+  * @param[in] in_num_points Number of points
+  * @details Fast preprocess comapared with CPU preprocess
+  */
+  void preprocessGPU(const float* in_points_array, const int in_num_points);
+
+  /**
+  * @brief Convert anchors to box form like min_x, min_y, max_x, max_y anchors
+  * @param[in] anchors_px Represents x-coordinate value for corresponding anchor
+  * @param[in] anchors_py Represents y-coordinate value for corresponding anchor
+  * @param[in] anchors_dx Represents x-dimension value for corresponding anchor
+  * @param[in] anchors_dy Represents y-dimension value for corresponding anchor
+  * @details Make box anchors for nms
+  */
+  void convertAnchors2BoxAnchors(float* anchors_px, float* anchors_py, float* anchors_dx, float* anchors_dy);
+
+  /**
+  * @brief Memory allocation for anchors
+  * @details Memory allocation for anchors
+  */
+  void putAnchorsInDeviceMemory();
+
+public:
+  /**
+  * @brief Constructor
+  * @param[in] reproduce_result_mode Boolean, if true, the output is reproducible for the same input
+  * @param[in] score_threshold Score threshold for filtering output
+  * @param[in] nms_overlap_threshold IOU threshold for NMS
+  * @param[in] pfe_onnx_file Pillar Feature Extractor ONNX file path
+  * @param[in] rpn_onnx_file Region Proposal Network ONNX file path
+  * @details Variables could be chaned through rosparam
+  */
+  PointPillars(const bool reproduce_result_mode, const float score_threshold, const float nms_overlap_threshold,
+               const std::string pfe_onnx_file, const std::string rpn_onnx_file);
+  ~PointPillars();
+
+  /**
+  * @brief Call PointPillars for the inference
+  * @param[in] in_points_array pointcloud array
+  * @param[in] in_num_points Number of points
+  * @param[in] out_detections Network output bounding box
+  * @param[in] out_num_objects Number of output bounding box
+  * @details This is interface for the algorithm
+  */
+  void doInference(const float* in_points_array, const int in_num_points,
+    std::vector<float>& out_detection, int& out_num_objects);
+};
+
+#endif  // POINTS_PILLAR_H
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h
new file mode 100755
index 00000000000..e0b1e1bd53c
--- /dev/null
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2018-2019 Autoware Foundation. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /**
+ * @file point_pillars_ros.h
+ * @brief ROS interface for PointPillars
+ * @author Kosuke Murakami
+ * @date 2019/02/26
+ */
+
+#ifndef POINTS_PILLAR_ROS_H
+#define POINTS_PILLAR_ROS_H
+
+//headers in STL
+#include <memory>
+#include <vector>
+
+//headers in ROS
+#include <ros/ros.h>
+#include <sensor_msgs/PointCloud2.h>
+#include <tf/transform_listener.h>
+
+//headers in PCL
+#include <pcl/io/pcd_io.h>
+
+//headers in local files
+#include "point_pillars.h"
+
+
+class PointPillarsROS
+{
+private:
+  friend class TestClass;
+
+  //initializer list
+  ros::NodeHandle private_nh_;
+  bool has_subscribed_baselink_;
+  const int NUM_POINT_FEATURE_;
+  const int OUTPUT_NUM_BOX_FEATURE_;
+  const float TRAINED_SENSOR_HEIGHT_;
+  const float NORMALIZING_INTENSITY_VALUE_;
+  const std::string BASELINK_FRAME_;
+  //end initializer list
+
+  //rosparam
+  bool baselink_support_;
+  bool reproduce_result_mode_;
+  float score_threshold_;
+  float nms_overlap_threshold_;
+  std::string pfe_onnx_file_;
+  std::string rpn_onnx_file_;
+  //end rosparam
+
+  ros::NodeHandle nh_;
+  ros::Subscriber sub_points_;
+  ros::Publisher pub_objects_;
+
+  tf::TransformListener tf_listener_;
+  tf::StampedTransform baselink2lidar_;
+  tf::Transform angle_transform_;
+  tf::Transform angle_transform_inversed_;
+
+
+
+  float offset_z_from_trained_data_;
+
+  std::unique_ptr<PointPillars> point_pillars_ptr_;
+
+  /**
+  * @brief Get base_link to lidar transformation
+  * @param[in] target_frameid target lidar frame_id
+  * @details Get transformation info
+  */
+  void getBaselinkToLidarTF(const std::string& target_frameid);
+
+  /**
+  * @brief Analyze tf information
+  * @param[in] lidar2baselink transofomation info
+  * @details Calculate z offset compared with trained sensor height and get rotation matrix
+  */
+  void getTFInfo(tf::StampedTransform lidar2baselink);
+
+  /**
+  * @brief Transform pose based on tf stamp info
+  * @param[in] in_pose Target pose to be transformed
+  * @param[in] tf TF stamp contains rotation matrix and translation matrix
+  * @return geometry_msgs::Pose Transformed pose
+  * @details Calculate transformed pose
+  */
+  geometry_msgs::Pose getTransformedPose(const geometry_msgs::Pose& in_pose,
+                                         const tf::Transform& tf);
+
+  /**
+  * @brief callback for pointcloud
+  * @param[in] input pointcloud from lidar sensor
+  * @details Call point_pillars to get 3D bounding box
+  */
+  void pointsCallback(const sensor_msgs::PointCloud2::ConstPtr& input);
+
+  /**
+  * @brief convert pcl point to c++ array
+  * @param[in] in_pcl_pc_ptr pointcloud in pcl format
+  * @param[out] out_points_array converted pointcloud in c++ array format
+  * @param[in] offset_z (default: 1.0) when using baselink_support, offset height based on current sensor configuration
+  * @details convert pcl points to c++ array, plus offset z if it is necessary
+  */
+  void pclToArray(const pcl::PointCloud<pcl::PointXYZI>::Ptr & in_pcl_pc_ptr, float* out_points_array, const float offset_z = 0);
+
+  /**
+  * @brief publish DetectedObject
+  * @param[in] detections Network output bounding box
+  * @param[in] num_objects Number of objects
+  * @param[in] pc_header Header from pointcloud
+  * @details Convert std::vector to DetectedObject, and publish them
+  */
+  void pubDetectedObject(const std::vector<float>& detections, const int num_objects, const std_msgs::Header& pc_header);
+
+public:
+  PointPillarsROS();
+
+  /**
+  * @brief Create ROS pub/sub obejct
+  * @details Create/Initializing ros pub/sub object
+  */
+  void createROSPubSub();
+};
+
+#endif  // POINTS_PILLAR_ROS_H
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h
new file mode 100644
index 00000000000..261cdd19ed1
--- /dev/null
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2018-2019 Autoware Foundation. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /**
+ * @file postprocess_cuda.h
+ * @brief Postprocess for network output
+ * @author Kosuke Murakami
+ * @date 2019/02/26
+ */
+
+#ifndef   POSTPROCESS_CUDA_H
+#define   POSTPROCESS_CUDA_H
+
+//headers in STL
+#include <memory>
+#include <iostream>
+
+//headers in local files
+#include "nms_cuda.h"
+
+class PostprocessCuda
+{
+private:
+  const float FLOAT_MIN_;
+  const float FLOAT_MAX_;
+  const int NUM_ANCHOR_X_INDS_;
+  const int NUM_ANCHOR_Y_INDS_;
+  const int NUM_ANCHOR_R_INDS_;
+  const float score_threshold_;
+  const int NUM_THREADS_;
+  const float nms_overlap_threshold_;
+
+  std::unique_ptr<NMSCuda> nms_cuda_ptr_;
+
+public:
+  /**
+  * @brief Constructor
+  * @param[in] FLOAT_MIN the lowest float value
+  * @param[in] FLOAT_MAX the maximum float value
+  * @param[in] NUM_ANCHOR_X_INDS Number of x-indexes for anchors
+  * @param[in] NUM_ANCHOR_Y_INDS Number of y-indexes for anchors
+  * @param[in] NUM_ANCHOR_R_INDS Number of rotation-indexes for anchors
+  * @param[in] score_threshold Score threshold for filtering output
+  * @param[in] NUM_THREDS Number of threads for launching cuda kernel
+  * @param[in] nms_overlap_threshold IOU threshold for NMS
+  * @details Captital variables never change after the compile, non-capital variables could be chaned through rosparam
+  */
+  PostprocessCuda(const float FLOAT_MIN, const float FLOAT_MAX,
+    const int NUM_ANCHOR_X_INDS, const int NUM_ANCHOR_Y_INDS, const int NUM_ANCHOR_R_INDS,
+    const float score_threshold, const int NUM_THREADS, const float nms_overlap_threshold);
+
+    /**
+    * @brief Postprocessing for the network output
+    * @param[in] rpn_box_output Box prediction from the network output
+    * @param[in] rpn_cls_output Class prediction from the network output
+    * @param[in] rpn_dir_output Direction prediction from the network output
+    * @param[in] dev_anchor_mask Anchor mask for filtering
+    * @param[in] dev_anchors_px X-coordinate value for corresponding anchor
+    * @param[in] dev_anchors_py Y-coordinate value for corresponding anchor
+    * @param[in] dev_anchors_pz Z-coordinate value for corresponding anchor
+    * @param[in] dev_anchors_dx X-dimension value for corresponding anchor
+    * @param[in] dev_anchors_dy Y-dimension value for corresponding anchor
+    * @param[in] dev_anchors_dz Z-dimension value for corresponding anchor
+    * @param[in] dev_anchors_ro Rotation value for corresponding anchor
+    * @param[in] dev_filtered_box Filtered box prediction
+    * @param[in] dev_filtered_score Filtered score prediction
+    * @param[in] dev_filtered_dir Filtered direction prediction
+    * @param[in] dev_box_for_nms Decoded box from pose and dimension to min_x min_y max_x max_y represenation for nms
+    * @param[in] dev_filter_count The number of filtered output
+    * @param[out] out_detection Output bounding boxes
+    * @param[out] out_num_objects The number of output bounding boxes
+    * @details dev_* represents device memory allocated variables
+    */
+  void doPostprocessCuda(const float* rpn_box_output, const float* rpn_cls_output, const float* rpn_dir_output,
+          int* dev_anchor_mask, const float* dev_anchors_px, const float* dev_anchors_py, const float* dev_anchors_pz,
+          const float* dev_anchors_dx, const float* dev_anchors_dy, const float* dev_anchors_dz, const float* dev_anchors_ro,
+          float* dev_filtered_box, float* dev_filtered_score, int* dev_filtered_dir, float* dev_box_for_nms, int* dev_filter_count,
+          std::vector<float>& out_detection, int& out_num_objects);
+};
+
+#endif  // POSTPROCESS_CUDA_H
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h
new file mode 100644
index 00000000000..bd0b54720f2
--- /dev/null
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2018-2019 Autoware Foundation. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /**
+ * @file preprocess_points.h
+ * @brief CPU version of preprocess points
+ * @author Kosuke Murakami
+ * @date 2019/02/26
+ */
+
+#ifndef PREPROCESS_POINTS_H
+#define PREPROCESS_POINTS_H
+
+
+
+class PreprocessPoints
+{
+private:
+  friend class TestClass;
+  const int MAX_NUM_PILLARS_;
+  const int MAX_NUM_POINTS_PER_PILLAR_;
+  const int GRID_X_SIZE_;
+  const int GRID_Y_SIZE_;
+  const int GRID_Z_SIZE_;
+  const float PILLAR_X_SIZE_;
+  const float PILLAR_Y_SIZE_;
+  const float PILLAR_Z_SIZE_;
+  const float MIN_X_RANGE_;
+  const float MIN_Y_RANGE_;
+  const float MIN_Z_RANGE_;
+  const int NUM_INDS_FOR_SCAN_;
+
+
+public:
+  /**
+  * @brief Constructor
+  * @param[in] MAX_NUM_PILLARS Maximum number of pillars
+  * @param[in] MAX_POINTS_PER_PILLAR Maximum number of points per pillar
+  * @param[in] GRID_X_SIZE Number of pillars in x-coordinate
+  * @param[in] GRID_Y_SIZE Number of pillars in y-coordinate
+  * @param[in] GRID_Z_SIZE Number of pillars in z-coordinate
+  * @param[in] PILLAR_X_SIZE Size of x-dimension for a pillar
+  * @param[in] PILLAR_Y_SIZE Size of y-dimension for a pillar
+  * @param[in] PILLAR_Z_SIZE Size of z-dimension for a pillar
+  * @param[in] MIN_X_RANGE Minimum x value for pointcloud
+  * @param[in] MIN_Y_RANGE Minimum y value for pointcloud
+  * @param[in] MIN_Z_RANGE Minimum z value for pointcloud
+  * @param[in] NUM_INDS_FOR_SCAN Number of indexes for scan(cumsum)
+  * @details Captital variables never change after the compile
+  */
+  PreprocessPoints(const int MAX_NUM_PILLARS,
+                   const int MAX_POINTS_PER_PILLAR,
+                   const int GRID_X_SIZE,
+                   const int GRID_Y_SIZE,
+                   const int GRID_Z_SIZE,
+                   const float PILLAR_X_SIZE,
+                   const float PILLAR_Y_SIZE,
+                   const float PILLAR_Z_SIZE,
+                   const float MIN_X_RANGE,
+                   const float MIN_Y_RANGE,
+                   const float MIN_Z_RANGE,
+                   const int NUM_INDS_FOR_SCAN);
+
+  /**
+  * @brief CPU preprocessing for input pointcloud
+  * @param[in] in_points_array Pointcloud array
+  * @param[in] in_num_points The number of points
+  * @param[in] x_coors X-coordinate indexes for corresponding pillars
+  * @param[in] y_coors Y-coordinate indexes for corresponding pillars
+  * @param[in] num_points_per_pillar Number of points in corresponding pillars
+  * @param[in] pillar_x X-coordinate value for points in each pillar
+  * @param[in] pillar_y Y-coordinate value for points in each pillar
+  * @param[in] pillar_z Z-coordinate value for points in each pillar
+  * @param[in] pillar_i Intensity value for points in each pillar
+  * @param[in] x_coors_for_sub_shaped Used for x substraction in the network
+  * @param[in] y_coors_for_sub_shaped Used for y substraction in the network
+  * @param[in] pillar_feature_mask Mask to make sure pillars' feature equal to zero where no points in the pillars
+  * @param[in] sparse_pillar_map Grid map represents occupancy pillars
+  * @param[in] host_pillar_count The numnber of valid pillars for the input pointcloud
+  * @details Convert pointcloud to pillar representation
+  */
+  void preprocess(const float* in_points_array, int in_num_points,
+                  int* x_coors, int* y_coors, float* num_points_per_pillar,
+                  float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i,
+                  float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped,
+                  float* pillar_feature_mask, float* sparse_pillar_map, int* host_pillar_count);
+
+  /**
+  * @brief Initializing variables for preprocessing
+  * @param[in] coor_to_pillaridx Map for converting from one set of coordinate to a pillar
+  * @param[in] sparse_pillar_map Grid map represents occupancy pillars
+  * @param[in] pillar_x X-coordinate value for points in each pillar
+  * @param[in] pillar_y Y-coordinate value for points in each pillar
+  * @param[in] pillar_z Z-coordinate value for points in each pillar
+  * @param[in] pillar_i Intensity value for points in each pillar
+  * @param[in] x_coors_for_sub_shaped Used for x substraction in the network
+  * @param[in] y_coors_for_sub_shaped Used for y substraction in the network
+  * @details Initializeing input arguments with certain values
+  */
+  void initializeVariables(int* coor_to_pillaridx, float* sparse_pillar_map,
+                           float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i,
+                           float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped);
+};
+
+#endif  // PREPROCESS_POINTS_H
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points_cuda.h
new file mode 100644
index 00000000000..7a4dd8420d2
--- /dev/null
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points_cuda.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright 2018-2019 Autoware Foundation. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /**
+ * @file preprocess_points_cuda.h
+ * @brief GPU version of preprocess points
+ * @author Kosuke Murakami
+ * @date 2019/02/26
+ */
+
+
+#ifndef   PREPROCESS_POINTS_CUDA_H
+#define   PREPROCESS_POINTS_CUDA_H
+
+
+class PreprocessPointsCuda
+{
+private:
+  //initialzer list
+  const int NUM_THREADS_;
+  const int MAX_NUM_PILLARS_;
+  const int MAX_NUM_POINTS_PER_PILLAR_;
+  const int NUM_INDS_FOR_SCAN_;
+  const int GRID_X_SIZE_;
+  const int GRID_Y_SIZE_;
+  const int GRID_Z_SIZE_;
+  const float PILLAR_X_SIZE_;
+  const float PILLAR_Y_SIZE_;
+  const float PILLAR_Z_SIZE_;
+  const float MIN_X_RANGE_;
+  const float MIN_Y_RANGE_;
+  const float MIN_Z_RANGE_;
+  //end initalizer list
+
+  float* dev_pillar_x_in_coors_;
+  float* dev_pillar_y_in_coors_;
+  float* dev_pillar_z_in_coors_;
+  float* dev_pillar_i_in_coors_;
+  int* dev_pillar_count_histo_;
+
+  int* dev_counter_;
+  int* dev_pillar_count_;
+  float* dev_x_coors_for_sub_;
+  float* dev_y_coors_for_sub_;
+
+public:
+  /**
+  * @brief Constructor
+  * @param[in] NUM_THREDS Number of threads for launching cuda kernel
+  * @param[in] MAX_NUM_PILLARS Maximum number of pillars
+  * @param[in] MAX_POINTS_PER_PILLAR Maximum number of points per pillar
+  * @param[in] NUM_INDS_FOR_SCAN Number of indexes for scan(cumsum)
+  * @param[in] GRID_X_SIZE Number of pillars in x-coordinate
+  * @param[in] GRID_Y_SIZE Number of pillars in y-coordinate
+  * @param[in] GRID_Z_SIZE Number of pillars in z-coordinate
+  * @param[in] PILLAR_X_SIZE Size of x-dimension for a pillar
+  * @param[in] PILLAR_Y_SIZE Size of y-dimension for a pillar
+  * @param[in] PILLAR_Z_SIZE Size of z-dimension for a pillar
+  * @param[in] MIN_X_RANGE Minimum x value for pointcloud
+  * @param[in] MIN_Y_RANGE Minimum y value for pointcloud
+  * @param[in] MIN_Z_RANGE Minimum z value for pointcloud
+  * @details Captital variables never change after the compile
+  */
+  PreprocessPointsCuda(const int NUM_THREADS,
+                       const int MAX_NUM_PILLARS,
+                       const int MAX_POINTS_PER_PILLAR,
+                       const int NUM_INDS_FOR_SCAN,
+                       const int GRID_X_SIZE,
+                       const int GRID_Y_SIZE,
+                       const int GRID_Z_SIZE,
+                       const float PILLAR_X_SIZE,
+                       const float PILLAR_Y_SIZE,
+                       const float PILLAR_Z_SIZE,
+                       const float MIN_X_RANGE,
+                       const float MIN_Y_RANGE,
+                       const float MIN_Z_RANGE);
+  ~PreprocessPointsCuda();
+
+  /**
+  * @brief CUDA preprocessing for input pointcloud
+  * @param[in] dev_points Pointcloud array
+  * @param[in] in_num_points The number of points
+  * @param[in] dev_x_coors X-coordinate indexes for corresponding pillars
+  * @param[in] dev_y_coors Y-coordinate indexes for corresponding pillars
+  * @param[in] dev_num_points_per_pillar Number of points in corresponding pillars
+  * @param[in] dev_pillar_x X-coordinate value for points in each pillar
+  * @param[in] dev_pillar_y Y-coordinate value for points in each pillar
+  * @param[in] dev_pillar_z Z-coordinate value for points in each pillar
+  * @param[in] dev_pillar_i Intensity value for points in each pillar
+  * @param[in] dev_x_coors_for_sub_shaped Used for x substraction in the network
+  * @param[in] dev_y_coors_for_sub_shaped Used for y substraction in the network
+  * @param[in] dev_pillar_feature_mask Mask to make sure pillars' feature equal to zero where no points in the pillars
+  * @param[in] dev_sparse_pillar_map Grid map represents occupancy pillars
+  * @param[in] host_pillar_count The numnber of valid pillars for the input pointcloud
+  * @details Convert pointcloud to pillar representation
+  */
+  void doPreprocessPointsCuda(const float* dev_points, const int in_num_points, int* dev_x_coors, int* dev_y_coors,
+                      float* dev_num_points_per_pillar, float* dev_pillar_x, float* dev_pillar_y, float* dev_pillar_z, float* dev_pillar_i,
+                      float* dev_x_coors_for_sub_shaped, float* dev_y_coors_for_sub_shaped,
+                      float* dev_pillar_feature_mask, int* dev_sparse_pillar_map, int* host_pillar_count);
+};
+
+#endif  // PREPROCESS_POINTS_CUDA_H
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/scatter_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/scatter_cuda.h
new file mode 100644
index 00000000000..24e4f0b79dc
--- /dev/null
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/scatter_cuda.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2018-2019 Autoware Foundation. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /**
+ * @file scatter_cuda.h
+ * @brief CUDA code for scatter operation
+ * @author Kosuke Murakami
+ * @date 2019/02/26
+ */
+
+#ifndef   SCATTERCUDA_H
+#define   SCATTERCUDA_H
+
+class ScatterCuda
+{
+private:
+  const int NUM_THREADS_;
+  const int MAX_NUM_PILLARS_;
+  const int GRID_X_SIZE_;
+  const int GRID_Y_SIZE_;
+
+public:
+  /**
+  * @brief Constructor
+  * @param[in] NUM_THREADS The number of threads to launch cuda kernel
+  * @param[in] MAX_NUM_PILLARS Maximum number of pillars
+  * @param[in] GRID_X_SIZE Number of pillars in x-coordinate
+  * @param[in] GRID_Y_SIZE Number of pillars in y-coordinate
+  * @details Captital variables never change after the compile
+  */
+  ScatterCuda(const int NUM_THREADS, const int MAX_NUM_PILLARS, const int GRID_X_SIZE, const int GRID_Y_SIZE);
+
+  /**
+  * @brief Call scatter cuda kernel
+  * @param[in] pillar_count The valid number of pillars
+  * @param[in] x_coors X-coordinate indexes for corresponding pillars
+  * @param[in] y_coors Y-coordinate indexes for corresponding pillars
+  * @param[in] pfe_output Output from Pillar Feature Extractor
+  * @param[out] scattered_feature Gridmap representation for pillars' feature
+  * @details Allocate pillars in gridmap based on index(coordinates) information
+  */
+  void doScatterCuda(const int pillar_count, int *x_coors, int *y_coors, float *pfe_output, float *scattered_feature);
+};
+
+#endif  // SCATTERCUDA_H
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/launch/lidar_point_pillars.launch b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/launch/lidar_point_pillars.launch
new file mode 100755
index 00000000000..4a7618eacc8
--- /dev/null
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/launch/lidar_point_pillars.launch
@@ -0,0 +1,24 @@
+<launch>
+  <arg name="input_topic" default="/points_raw" />
+  <arg name="baselink_support" default="true" />
+  <arg name="reproduce_result_mode" default="true" />
+  <arg name="score_threshold" default="0.5" />
+  <arg name="nms_overlap_threshold" default="0.5" />
+  <arg name="pfe_onnx_file" default="" />
+  <arg name="rpn_onnx_file" default="" />
+  <node pkg="lidar_point_pillars" type="lidar_point_pillars"
+    name="lidar_point_pillars" output="screen" >
+    <remap from="/points_raw" to="$(arg input_topic)"/>
+
+    <param name="baselink_support"            value="$(arg baselink_support)" />
+    <param name="reproduce_result_mode"        value="$(arg reproduce_result_mode)" />
+    <param name="score_threshold"   value="$(arg score_threshold)" />
+    <param name="nms_overlap_threshold"         value="$(arg nms_overlap_threshold)" />
+    <param name="pfe_onnx_file"   value="$(arg pfe_onnx_file)" />
+    <param name="rpn_onnx_file"   value="$(arg rpn_onnx_file)" />
+  </node>
+
+  <node pkg="detected_objects_visualizer" type="visualize_detected_objects" name="point_pillars_visualization_01"
+        output="screen" ns="/detection/lidar_detector" />
+
+</launch>
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu
new file mode 100644
index 00000000000..4383c1ae63c
--- /dev/null
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu
@@ -0,0 +1,203 @@
+/*
+ * Copyright 2018-2019 Autoware Foundation. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//headers in local files
+#include "common.h"
+#include "anchor_mask_cuda.h"
+
+__global__ void scan_x(int *g_odata, int *g_idata, int n)
+{
+  extern __shared__ int temp[];  // allocated on invocation
+  int thid = threadIdx.x;
+  int bid = blockIdx.x;
+  int bdim = blockDim.x;
+  int offset = 1;
+  temp[2*thid]   = g_idata[bid*bdim*2 + 2*thid]; // load input into shared memory
+  temp[2*thid+1] = g_idata[bid*bdim*2 + 2*thid+1];
+  for (int d = n>>1; d > 0; d >>= 1)                    // build sum in place up the tree
+  {
+    __syncthreads();
+    if (thid < d)
+    {
+      int ai = offset*(2*thid+1)-1;
+      int bi = offset*(2*thid+2)-1;
+      temp[bi] += temp[ai];
+    }
+    offset *= 2;
+  }
+  if (thid == 0) { temp[n - 1] = 0; } // clear the last element
+  for (int d = 1; d < n; d *= 2) // traverse down tree & build scan
+    {
+      offset >>= 1;
+      __syncthreads();
+      if (thid < d)
+      {
+         int ai = offset*(2*thid+1)-1;
+         int bi = offset*(2*thid+2)-1;
+         int t = temp[ai];
+         temp[ai] = temp[bi];
+         temp[bi] += t;
+      }
+    }
+  __syncthreads();
+  g_odata[bid*bdim*2 + 2*thid] = temp[2*thid+1]; // write results to device memory
+  int second_ind = 2*thid+2;
+  if(second_ind == bdim*2 )
+  {
+    g_odata[bid*bdim*2 + 2*thid+1] = temp[2*thid+1] + g_idata[bid*bdim*2 + 2*thid+1];
+  }
+  else
+  {
+    g_odata[bid*bdim*2 + 2*thid+1] = temp[2*thid+2];
+  }
+}
+
+__global__ void scan_y(int *g_odata, int *g_idata, int n)
+{
+  extern __shared__ int temp[];  // allocated on invocation
+  int thid = threadIdx.x;
+  int bid = blockIdx.x;
+  int bdim = blockDim.x;
+  int gdim = gridDim.x;
+  int offset = 1;
+  temp[2*thid]   = g_idata[bid + 2*thid*gdim]; // load input into shared memory
+  temp[2*thid+1] = g_idata[bid + 2*thid*gdim+ gdim];
+  for (int d = n>>1; d > 0; d >>= 1)                    // build sum in place up the tree
+  {
+    __syncthreads();
+    if (thid < d)
+    {
+      int ai = offset*(2*thid+1)-1;
+      int bi = offset*(2*thid+2)-1;
+      temp[bi] += temp[ai];
+    }
+    offset *= 2;
+  }
+  if (thid == 0) { temp[n - 1] = 0; } // clear the last element
+  for (int d = 1; d < n; d *= 2) // traverse down tree & build scan
+    {
+      offset >>= 1;
+      __syncthreads();
+      if (thid < d)
+      {
+         int ai = offset*(2*thid+1)-1;
+         int bi = offset*(2*thid+2)-1;
+         int t = temp[ai];
+         temp[ai] = temp[bi];
+         temp[bi] += t;
+      }
+    }
+  __syncthreads();
+  g_odata[bid + 2*thid*gdim] = temp[2*thid+1]; // write results to device memory
+  int second_ind = 2*thid+2;
+  if(second_ind == bdim*2 )
+  {
+    g_odata[bid + 2*thid*gdim + gdim] = temp[2*thid+1] + g_idata[bid + 2*thid*gdim + gdim];
+  }
+  else
+  {
+    g_odata[bid + 2*thid*gdim + gdim] = temp[2*thid+2];
+  }
+}
+
+__global__ void make_anchor_mask_kernel(const float* dev_box_anchors_min_x, const float* dev_box_anchors_min_y,
+                                        const float* dev_box_anchors_max_x, const float* dev_box_anchors_max_y,
+                                        int* dev_sparse_pillar_map, int* dev_anchor_mask,
+                                        const float MIN_X_RANGE,
+                                        const float MIN_Y_RANGE,
+                                        const float PILLAR_X_SIZE,
+                                        const float PILLAR_Y_SIZE,
+                                        const int GRID_X_SIZE,
+                                        const int GRID_Y_SIZE,
+                                        const int NUM_INDS_FOR_SCAN)
+{
+  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  int anchor_coor[4] = {0};
+  const int GRID_X_SIZE_1 = GRID_X_SIZE - 1;//grid_x_size - 1
+  const int GRID_Y_SIZE_1 = GRID_Y_SIZE - 1;//grid_y_size - 1
+
+  anchor_coor[0] = floor((dev_box_anchors_min_x[tid] - MIN_X_RANGE) / PILLAR_X_SIZE);
+  anchor_coor[1] = floor((dev_box_anchors_min_y[tid] - MIN_Y_RANGE) / PILLAR_Y_SIZE);
+  anchor_coor[2] = floor((dev_box_anchors_max_x[tid] - MIN_X_RANGE) / PILLAR_X_SIZE);
+  anchor_coor[3] = floor((dev_box_anchors_max_y[tid] - MIN_Y_RANGE) / PILLAR_Y_SIZE);
+  anchor_coor[0] = max(anchor_coor[0], 0);
+  anchor_coor[1] = max(anchor_coor[1], 0);
+  anchor_coor[2] = min(anchor_coor[2], GRID_X_SIZE_1);
+  anchor_coor[3] = min(anchor_coor[3], GRID_Y_SIZE_1);
+
+  int right_top    = dev_sparse_pillar_map[anchor_coor[3]*NUM_INDS_FOR_SCAN + anchor_coor[2]];
+  int left_bottom  = dev_sparse_pillar_map[anchor_coor[1]*NUM_INDS_FOR_SCAN + anchor_coor[0]];
+  int left_top     = dev_sparse_pillar_map[anchor_coor[3]*NUM_INDS_FOR_SCAN + anchor_coor[0]];
+  int right_bottom = dev_sparse_pillar_map[anchor_coor[1]*NUM_INDS_FOR_SCAN + anchor_coor[2]];
+
+  int area = right_top -left_top - right_bottom + left_bottom;
+  if(area > 1)
+  {
+    dev_anchor_mask[tid] = 1;
+  }
+  else
+  {
+    dev_anchor_mask[tid] = 0;
+  }
+}
+
+
+AnchorMaskCuda::AnchorMaskCuda(const int NUM_INDS_FOR_SCAN,
+                               const int NUM_ANCHOR_X_INDS,
+                               const int NUM_ANCHOR_Y_INDS,
+                               const int NUM_ANCHOR_R_INDS,
+                               const float MIN_X_RANGE,
+                               const float MIN_Y_RANGE,
+                               const float PILLAR_X_SIZE,
+                               const float PILLAR_Y_SIZE,
+                               const int GRID_X_SIZE,
+                               const int GRID_Y_SIZE):
+NUM_INDS_FOR_SCAN_(NUM_INDS_FOR_SCAN),
+NUM_ANCHOR_X_INDS_(NUM_ANCHOR_X_INDS),
+NUM_ANCHOR_Y_INDS_(NUM_ANCHOR_Y_INDS),
+NUM_ANCHOR_R_INDS_(NUM_ANCHOR_R_INDS),
+MIN_X_RANGE_(MIN_X_RANGE),
+MIN_Y_RANGE_(MIN_Y_RANGE),
+PILLAR_X_SIZE_(PILLAR_X_SIZE),
+PILLAR_Y_SIZE_(PILLAR_Y_SIZE),
+GRID_X_SIZE_(GRID_X_SIZE),
+GRID_Y_SIZE_(GRID_Y_SIZE)
+{
+
+}
+
+void AnchorMaskCuda::doAnchorMaskCuda(int* dev_sparse_pillar_map, int* dev_cumsum_along_x, int* dev_cumsum_along_y,
+  const float* dev_box_anchors_min_x, const float* dev_box_anchors_min_y,
+  const float* dev_box_anchors_max_x, const float* dev_box_anchors_max_y, int* dev_anchor_mask)
+{
+  scan_x<<<NUM_INDS_FOR_SCAN_, NUM_INDS_FOR_SCAN_/2, NUM_INDS_FOR_SCAN_*sizeof(int)>>>(dev_cumsum_along_x, dev_sparse_pillar_map, NUM_INDS_FOR_SCAN_);
+  scan_y<<<NUM_INDS_FOR_SCAN_, NUM_INDS_FOR_SCAN_/2, NUM_INDS_FOR_SCAN_*sizeof(int)>>>(dev_cumsum_along_y, dev_cumsum_along_x,    NUM_INDS_FOR_SCAN_);
+  GPU_CHECK(cudaMemcpy(dev_sparse_pillar_map, dev_cumsum_along_y,  NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof(int), cudaMemcpyDeviceToDevice ) );
+  make_anchor_mask_kernel<<<NUM_ANCHOR_X_INDS_*NUM_ANCHOR_R_INDS_, NUM_ANCHOR_Y_INDS_>>>(
+                                          dev_box_anchors_min_x,
+                                          dev_box_anchors_min_y,
+                                          dev_box_anchors_max_x,
+                                          dev_box_anchors_max_y,
+                                          dev_sparse_pillar_map,
+                                          dev_anchor_mask,
+                                          MIN_X_RANGE_,
+                                          MIN_Y_RANGE_,
+                                          PILLAR_X_SIZE_,
+                                          PILLAR_Y_SIZE_,
+                                          GRID_X_SIZE_,
+                                          GRID_Y_SIZE_,
+                                          NUM_INDS_FOR_SCAN_);
+}
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp
new file mode 100755
index 00000000000..36617dd472b
--- /dev/null
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2018-2019 Autoware Foundation. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//headers in STL
+#include <iostream>
+
+//headers in local files
+#include "point_pillars_ros.h"
+
+int main(int argc, char** argv)
+{
+  ros::init(argc, argv, "lidar_point_pillars");
+  PointPillarsROS app;
+  app.createROSPubSub();
+  ros::spin();
+
+  return 0;
+}
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu
new file mode 100644
index 00000000000..7680a44ad87
--- /dev/null
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu
@@ -0,0 +1,110 @@
+// ------------------------------------------------------------------
+// Copyright (c) 2015 Microsoft
+// Licensed under The MIT License
+// Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
+// ------------------------------------------------------------------
+
+
+//headers in local files
+#include "nms_cuda.h"
+
+
+__device__ inline float devIoU(float const *const a, float const *const b)
+{
+  float left = max(a[0], b[0]), right = min(a[2], b[2]);
+  float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
+  float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
+  float interS = width * height;
+  float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
+  float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
+  return interS / (Sa + Sb - interS);
+}
+
+__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
+                           const float *dev_boxes, unsigned long long *dev_mask)
+{
+  const int row_start = blockIdx.y;
+  const int col_start = blockIdx.x;
+
+  // if (row_start > col_start) return;
+  const int block_threads = blockDim.x;
+
+  const int row_size =
+      min(n_boxes - row_start * block_threads, block_threads);
+  const int col_size =
+      min(n_boxes - col_start * block_threads, block_threads);
+  __shared__ float block_boxes[64 * 4];
+  if (threadIdx.x < col_size)
+  {
+    block_boxes[threadIdx.x * 4 + 0] = dev_boxes[(block_threads * col_start + threadIdx.x) * 4 + 0];
+    block_boxes[threadIdx.x * 4 + 1] = dev_boxes[(block_threads * col_start + threadIdx.x) * 4 + 1];
+    block_boxes[threadIdx.x * 4 + 2] = dev_boxes[(block_threads * col_start + threadIdx.x) * 4 + 2];
+    block_boxes[threadIdx.x * 4 + 3] = dev_boxes[(block_threads * col_start + threadIdx.x) * 4 + 3];
+  }
+  __syncthreads();
+
+  if (threadIdx.x < row_size)
+  {
+    const int cur_box_idx = block_threads * row_start + threadIdx.x;
+    const float cur_box[4] = {dev_boxes[cur_box_idx*4 + 0],
+                              dev_boxes[cur_box_idx*4 + 1],
+                              dev_boxes[cur_box_idx*4 + 2],
+                              dev_boxes[cur_box_idx*4 + 3]};
+    unsigned long long t = 0;
+    int start = 0;
+    if (row_start == col_start)
+    {
+      start = threadIdx.x + 1;
+    }
+    for (int i = start; i < col_size; i++)
+    {
+      if (devIoU(cur_box, block_boxes + i * 4) > nms_overlap_thresh)
+      {
+        t |= 1ULL << i;
+      }
+    }
+    const int col_blocks = DIVUP(n_boxes, block_threads);
+    dev_mask[cur_box_idx * col_blocks + col_start] = t;
+  }
+}
+
+NMSCuda::NMSCuda(const int NUM_THREADS, const float nms_overlap_threshold):
+NUM_THREADS_(NUM_THREADS),
+nms_overlap_threshold_(nms_overlap_threshold)
+{
+}
+
+void NMSCuda::doNMSCuda(const int host_filter_count, float* dev_sorted_box_for_nms, int* out_keep_inds, int& out_num_to_keep)
+{
+  const int col_blocks = DIVUP(host_filter_count, NUM_THREADS_);
+  dim3 blocks(DIVUP(host_filter_count, NUM_THREADS_),DIVUP(host_filter_count, NUM_THREADS_));
+  dim3 threads(NUM_THREADS_);
+
+  unsigned long long *dev_mask = NULL;
+  GPU_CHECK(cudaMalloc(&dev_mask, host_filter_count * col_blocks * sizeof(unsigned long long)));
+
+  nms_kernel<<<blocks, threads>>>(host_filter_count, nms_overlap_threshold_, dev_sorted_box_for_nms, dev_mask);
+
+  // postprocess for nms output
+  std::vector<unsigned long long> host_mask(host_filter_count * col_blocks);
+  GPU_CHECK(cudaMemcpy(&host_mask[0],dev_mask, sizeof(unsigned long long) * host_filter_count * col_blocks,cudaMemcpyDeviceToHost));
+  std::vector<unsigned long long> remv(col_blocks);
+  memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
+
+  for (int i = 0; i < host_filter_count; i++)
+  {
+    int nblock = i /  NUM_THREADS_;
+    int inblock = i % NUM_THREADS_;
+
+    if (!(remv[nblock] & (1ULL << inblock)))
+    {
+      out_keep_inds[out_num_to_keep++] = i;
+      unsigned long long *p = &host_mask[0] + i * col_blocks;
+      for (int j = nblock; j < col_blocks; j++)
+      {
+        remv[j] |= p[j];
+      }
+    }
+  }
+  GPU_CHECK(cudaFree(dev_mask));
+}
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
new file mode 100755
index 00000000000..4035f399ffa
--- /dev/null
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
@@ -0,0 +1,621 @@
+/*
+ * Copyright 2018-2019 Autoware Foundation. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+//headers in STL
+#include <chrono>
+#include <iostream>
+
+//headers in ROS
+#include <ros/console.h>
+
+//headers in local files
+#include "point_pillars.h"
+
+
+PointPillars::PointPillars(const bool reproduce_result_mode, const float score_threshold, const float nms_overlap_threshold,
+                           const std::string pfe_onnx_file, const std::string rpn_onnx_file):
+reproduce_result_mode_(reproduce_result_mode),
+score_threshold_(score_threshold),
+nms_overlap_threshold_(nms_overlap_threshold),
+pfe_onnx_file_(pfe_onnx_file),
+rpn_onnx_file_(rpn_onnx_file),
+MAX_NUM_PILLARS_(12000),
+MAX_NUM_POINTS_PER_PILLAR_(100),
+PFE_OUTPUT_SIZE_(MAX_NUM_PILLARS_*64),
+GRID_X_SIZE_(432),
+GRID_Y_SIZE_(496),
+GRID_Z_SIZE_(1),
+RPN_INPUT_SIZE_(64*GRID_X_SIZE_*GRID_Y_SIZE_),
+NUM_ANCHOR_X_INDS_(GRID_X_SIZE_*0.5),
+NUM_ANCHOR_Y_INDS_(GRID_Y_SIZE_*0.5),
+NUM_ANCHOR_R_INDS_(2),
+NUM_ANCHOR_(NUM_ANCHOR_X_INDS_*NUM_ANCHOR_Y_INDS_*NUM_ANCHOR_R_INDS_),
+RPN_BOX_OUTPUT_SIZE_(NUM_ANCHOR_*7) ,
+RPN_CLS_OUTPUT_SIZE_(NUM_ANCHOR_),
+RPN_DIR_OUTPUT_SIZE_(NUM_ANCHOR_*2),
+PILLAR_X_SIZE_(0.16),
+PILLAR_Y_SIZE_(0.16),
+PILLAR_Z_SIZE_(4.0),
+MIN_X_RANGE_(0.0),
+MIN_Y_RANGE_(-39.68),
+MIN_Z_RANGE_(-3.0),
+MAX_X_RANGE_(69.12),
+MAX_Y_RANGE_(39.68),
+MAX_Z_RANGE_(1),
+BATCH_SIZE_(1),
+NUM_INDS_FOR_SCAN_(512),
+NUM_THREADS_(64),//if you chancge NUM_THREADS_, need to modify nms_kernel's shared mempry size
+SENSOR_HEIGHT_(1.73),
+ANCHOR_DX_SIZE_(1.6),
+ANCHOR_DY_SIZE_(3.9),
+ANCHOR_DZ_SIZE_(1.56)
+{
+  anchors_px_ = new float[NUM_ANCHOR_];
+  anchors_py_ = new float[NUM_ANCHOR_];
+  anchors_pz_ = new float[NUM_ANCHOR_];
+  anchors_dx_ = new float[NUM_ANCHOR_];
+  anchors_dy_ = new float[NUM_ANCHOR_];
+  anchors_dz_ = new float[NUM_ANCHOR_];
+  anchors_ro_ = new float[NUM_ANCHOR_];
+  box_anchors_min_x_ = new float[NUM_ANCHOR_];
+  box_anchors_min_y_ = new float[NUM_ANCHOR_];
+  box_anchors_max_x_ = new float[NUM_ANCHOR_];
+  box_anchors_max_y_ = new float[NUM_ANCHOR_];
+
+
+  if(reproduce_result_mode_)
+  {
+    preprocess_points_ptr_.reset(new PreprocessPoints(
+      MAX_NUM_PILLARS_,
+      MAX_NUM_POINTS_PER_PILLAR_,
+      GRID_X_SIZE_,
+      GRID_Y_SIZE_,
+      GRID_Z_SIZE_,
+      PILLAR_X_SIZE_,
+      PILLAR_Y_SIZE_,
+      PILLAR_Z_SIZE_,
+      MIN_X_RANGE_,
+      MIN_Y_RANGE_,
+      MIN_Z_RANGE_,
+      NUM_INDS_FOR_SCAN_));
+  }
+  else
+  {
+    preprocess_points_cuda_ptr_.reset(new PreprocessPointsCuda(
+      NUM_THREADS_,
+      MAX_NUM_PILLARS_,
+      MAX_NUM_POINTS_PER_PILLAR_,
+      NUM_INDS_FOR_SCAN_,
+      GRID_X_SIZE_,
+      GRID_Y_SIZE_,
+      GRID_Z_SIZE_,
+      PILLAR_X_SIZE_,
+      PILLAR_Y_SIZE_,
+      PILLAR_Z_SIZE_,
+      MIN_X_RANGE_,
+      MIN_Y_RANGE_,
+      MIN_Z_RANGE_));
+  }
+
+  anchor_mask_cuda_ptr_.reset(new AnchorMaskCuda(
+    NUM_INDS_FOR_SCAN_,
+    NUM_ANCHOR_X_INDS_,
+    NUM_ANCHOR_Y_INDS_,
+    NUM_ANCHOR_R_INDS_,
+    MIN_X_RANGE_,
+    MIN_Y_RANGE_,
+    PILLAR_X_SIZE_,
+    PILLAR_Y_SIZE_,
+    GRID_X_SIZE_,
+    GRID_Y_SIZE_
+  ));
+
+  scatter_cuda_ptr_.reset(new ScatterCuda(
+    NUM_THREADS_,
+    MAX_NUM_PILLARS_,
+    GRID_X_SIZE_,
+    GRID_Y_SIZE_
+  ));
+
+  const float FLOAT_MIN = std::numeric_limits<float>::lowest();
+  const float FLOAT_MAX = std::numeric_limits<float>::max();
+  postprocess_cuda_ptr_.reset(new PostprocessCuda(
+    FLOAT_MIN,
+    FLOAT_MAX,
+    NUM_ANCHOR_X_INDS_,
+    NUM_ANCHOR_Y_INDS_,
+    NUM_ANCHOR_R_INDS_,
+    score_threshold_,
+    NUM_THREADS_,
+    nms_overlap_threshold_
+  ));
+
+  deviceMemoryMalloc();
+  initTRT();
+  initAnchors();
+}
+
+PointPillars::~PointPillars()
+{
+
+  delete[] anchors_px_;
+  delete[] anchors_py_;
+  delete[] anchors_pz_;
+  delete[] anchors_dx_;
+  delete[] anchors_dy_;
+  delete[] anchors_dz_;
+  delete[] anchors_ro_;
+  delete[] box_anchors_min_x_;
+  delete[] box_anchors_min_y_;
+  delete[] box_anchors_max_x_;
+  delete[] box_anchors_max_y_;
+
+  GPU_CHECK(cudaFree(dev_pillar_x_in_coors_));
+  GPU_CHECK(cudaFree(dev_pillar_y_in_coors_));
+  GPU_CHECK(cudaFree(dev_pillar_z_in_coors_));
+  GPU_CHECK(cudaFree(dev_pillar_i_in_coors_));
+  GPU_CHECK(cudaFree(dev_pillar_count_histo_));
+
+  GPU_CHECK(cudaFree(dev_x_coors_));
+  GPU_CHECK(cudaFree(dev_y_coors_));
+  GPU_CHECK(cudaFree(dev_num_points_per_pillar_));
+  GPU_CHECK(cudaFree(dev_sparse_pillar_map_));
+
+  GPU_CHECK(cudaFree(dev_pillar_x_));
+  GPU_CHECK(cudaFree(dev_pillar_y_));
+  GPU_CHECK(cudaFree(dev_pillar_z_));
+  GPU_CHECK(cudaFree(dev_pillar_i_));
+
+  GPU_CHECK(cudaFree( dev_x_coors_for_sub_shaped_ ));
+  GPU_CHECK(cudaFree( dev_y_coors_for_sub_shaped_ ));
+  GPU_CHECK(cudaFree( dev_pillar_feature_mask_ ));
+
+  GPU_CHECK(cudaFree(dev_cumsum_along_x_));
+  GPU_CHECK(cudaFree(dev_cumsum_along_y_));
+
+  GPU_CHECK(cudaFree(dev_box_anchors_min_x_));
+  GPU_CHECK(cudaFree(dev_box_anchors_min_y_));
+  GPU_CHECK(cudaFree(dev_box_anchors_max_x_));
+  GPU_CHECK(cudaFree(dev_box_anchors_max_y_));
+  GPU_CHECK(cudaFree(dev_anchor_mask_));
+
+  GPU_CHECK(cudaFree(pfe_buffers_[0]));
+  GPU_CHECK(cudaFree(pfe_buffers_[1]));
+  GPU_CHECK(cudaFree(pfe_buffers_[2]));
+  GPU_CHECK(cudaFree(pfe_buffers_[3]));
+  GPU_CHECK(cudaFree(pfe_buffers_[4]));
+  GPU_CHECK(cudaFree(pfe_buffers_[5]));
+  GPU_CHECK(cudaFree(pfe_buffers_[6]));
+  GPU_CHECK(cudaFree(pfe_buffers_[7]));
+  GPU_CHECK(cudaFree(pfe_buffers_[8]));
+
+
+  GPU_CHECK(cudaFree(rpn_buffers_[0]));
+  GPU_CHECK(cudaFree(rpn_buffers_[1]));
+  GPU_CHECK(cudaFree(rpn_buffers_[2]));
+  GPU_CHECK(cudaFree(rpn_buffers_[3]));
+
+  GPU_CHECK(cudaFree(dev_scattered_feature_));
+
+  GPU_CHECK(cudaFree(dev_anchors_px_));
+  GPU_CHECK(cudaFree(dev_anchors_py_));
+  GPU_CHECK(cudaFree(dev_anchors_pz_));
+  GPU_CHECK(cudaFree(dev_anchors_dx_));
+  GPU_CHECK(cudaFree(dev_anchors_dy_));
+  GPU_CHECK(cudaFree(dev_anchors_dz_));
+  GPU_CHECK(cudaFree(dev_anchors_ro_));
+  GPU_CHECK(cudaFree(dev_filtered_box_));
+  GPU_CHECK(cudaFree(dev_filtered_score_));
+  GPU_CHECK(cudaFree(dev_filtered_dir_));
+  GPU_CHECK(cudaFree(dev_box_for_nms_));
+  GPU_CHECK(cudaFree(dev_filter_count_));
+
+
+
+  pfe_context_->destroy();
+  rpn_context_->destroy();
+
+  pfe_runtime_->destroy();
+  rpn_runtime_->destroy();
+  pfe_engine_->destroy();
+  rpn_engine_->destroy();
+}
+
+void PointPillars::deviceMemoryMalloc()
+{
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_x_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_y_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_z_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_i_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_count_histo_, GRID_Y_SIZE_*GRID_X_SIZE_*sizeof(int)));
+
+  GPU_CHECK(cudaMalloc((void**)&dev_x_coors_, MAX_NUM_PILLARS_* sizeof(int)));
+  GPU_CHECK(cudaMalloc((void**)&dev_y_coors_, MAX_NUM_PILLARS_* sizeof(int)));
+  GPU_CHECK(cudaMalloc((void**)&dev_num_points_per_pillar_, MAX_NUM_PILLARS_*sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_sparse_pillar_map_,NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof(int)));
+
+
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_x_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_y_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_z_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_i_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
+
+
+  GPU_CHECK(cudaMalloc((void**)&dev_x_coors_for_sub_shaped_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_y_coors_for_sub_shaped_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_feature_mask_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
+
+  //cumsum kernel
+  GPU_CHECK(cudaMalloc((void**)&dev_cumsum_along_x_, NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof(int)));
+  GPU_CHECK(cudaMalloc((void**)&dev_cumsum_along_y_, NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof(int)));
+
+
+  // for make anchor mask kernel
+  GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_min_x_, NUM_ANCHOR_*sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_min_y_, NUM_ANCHOR_*sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_max_x_, NUM_ANCHOR_*sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_max_y_, NUM_ANCHOR_*sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_anchor_mask_, NUM_ANCHOR_*sizeof(int)));
+
+
+  // for trt inference
+  // create GPU buffers and a stream
+  GPU_CHECK(cudaMalloc(&pfe_buffers_[0], MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc(&pfe_buffers_[1], MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc(&pfe_buffers_[2], MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc(&pfe_buffers_[3], MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc(&pfe_buffers_[4], MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc(&pfe_buffers_[5], MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc(&pfe_buffers_[6], MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc(&pfe_buffers_[7], MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc(&pfe_buffers_[8], PFE_OUTPUT_SIZE_ * sizeof(float)));
+
+  GPU_CHECK(cudaMalloc(&rpn_buffers_[0], RPN_INPUT_SIZE_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc(&rpn_buffers_[1], RPN_BOX_OUTPUT_SIZE_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc(&rpn_buffers_[2], RPN_CLS_OUTPUT_SIZE_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc(&rpn_buffers_[3], RPN_DIR_OUTPUT_SIZE_ * sizeof(float)));
+
+  // for scatter kernel
+  GPU_CHECK(cudaMalloc((void**)&dev_scattered_feature_,     64*GRID_Y_SIZE_*GRID_X_SIZE_*sizeof(float) ) );
+
+  //for filter
+  GPU_CHECK(cudaMalloc((void**)&dev_anchors_px_, NUM_ANCHOR_*sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_anchors_py_, NUM_ANCHOR_*sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_anchors_pz_, NUM_ANCHOR_*sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_anchors_dx_, NUM_ANCHOR_*sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_anchors_dy_, NUM_ANCHOR_*sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_anchors_dz_, NUM_ANCHOR_*sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_anchors_ro_, NUM_ANCHOR_*sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_filtered_box_, NUM_ANCHOR_*7*sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_filtered_score_, NUM_ANCHOR_*sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_filtered_dir_,  NUM_ANCHOR_*sizeof(int)));
+  GPU_CHECK(cudaMalloc((void**)&dev_box_for_nms_, NUM_ANCHOR_*4*sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_filter_count_, sizeof(int)));
+}
+
+void PointPillars::initAnchors()
+{
+  // zero clear
+  for(size_t i = 0; i < NUM_ANCHOR_; i++)
+  {
+    anchors_px_[i] = 0;
+    anchors_py_[i] = 0;
+    anchors_pz_[i] = 0;
+    anchors_dx_[i] = 0;
+    anchors_dy_[i] = 0;
+    anchors_dz_[i] = 0;
+    anchors_ro_[i] = 0;
+    box_anchors_min_x_[i] = 0;
+    box_anchors_min_y_[i] = 0;
+    box_anchors_max_x_[i] = 0;
+    box_anchors_max_y_[i] = 0;
+  }
+
+  float x_stride = PILLAR_X_SIZE_*2;
+  float y_stride = PILLAR_Y_SIZE_*2;
+  float x_offset = MIN_X_RANGE_ + PILLAR_X_SIZE_;
+  float y_offset = MIN_Y_RANGE_ + PILLAR_Y_SIZE_;
+
+  float anchor_x_count[NUM_ANCHOR_X_INDS_] = {0};
+  for(size_t i = 0; i < NUM_ANCHOR_X_INDS_; i++)
+  {
+    anchor_x_count[i] = float(i)*x_stride + x_offset;
+  }
+  float anchor_y_count[NUM_ANCHOR_Y_INDS_] = {0};
+  for(size_t i = 0; i < NUM_ANCHOR_Y_INDS_; i++)
+  {
+    anchor_y_count[i] = float(i)*y_stride + y_offset;
+  }
+
+  float anchor_r_count[NUM_ANCHOR_R_INDS_] = {0, M_PI/2};
+
+  // np.meshgrid
+  for(size_t y= 0; y < NUM_ANCHOR_Y_INDS_; y++)
+  {
+    for(size_t x = 0; x < NUM_ANCHOR_X_INDS_; x++)
+    {
+      for(size_t r=0 ; r < NUM_ANCHOR_R_INDS_; r++)
+      {
+        int ind = y*NUM_ANCHOR_X_INDS_*NUM_ANCHOR_R_INDS_ + x*NUM_ANCHOR_R_INDS_ + r;
+        anchors_px_[ind] = anchor_x_count[x];
+        anchors_py_[ind] = anchor_y_count[y];
+        anchors_ro_[ind] = anchor_r_count[r];
+        anchors_pz_[ind] = -1*SENSOR_HEIGHT_;
+        anchors_dx_[ind] = ANCHOR_DX_SIZE_;
+        anchors_dy_[ind] = ANCHOR_DY_SIZE_;
+        anchors_dz_[ind] = ANCHOR_DZ_SIZE_;
+      }
+    }
+  }
+  convertAnchors2BoxAnchors(anchors_px_, anchors_py_, anchors_dx_, anchors_dy_);
+
+  putAnchorsInDeviceMemory();
+}
+
+void PointPillars::putAnchorsInDeviceMemory()
+{
+  GPU_CHECK(cudaMemcpy(dev_box_anchors_min_x_, box_anchors_min_x_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice ) );
+  GPU_CHECK(cudaMemcpy(dev_box_anchors_min_y_, box_anchors_min_y_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice ) );
+  GPU_CHECK(cudaMemcpy(dev_box_anchors_max_x_, box_anchors_max_x_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice ) );
+  GPU_CHECK(cudaMemcpy(dev_box_anchors_max_y_, box_anchors_max_y_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice ) );
+
+  GPU_CHECK(cudaMemcpy(dev_anchors_px_, anchors_px_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_anchors_py_, anchors_py_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_anchors_pz_, anchors_pz_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_anchors_dx_, anchors_dx_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_anchors_dy_, anchors_dy_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_anchors_dz_, anchors_dz_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_anchors_ro_, anchors_ro_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice));
+
+}
+
+void PointPillars::convertAnchors2BoxAnchors(float* anchors_px, float* anchors_py,
+                                             float* anchors_dx, float* anchors_dy)
+{
+  // flip box's dimension when the at the third axis == 1
+  float flipped_anchors_dx[NUM_ANCHOR_] = {0};
+  float flipped_anchors_dy[NUM_ANCHOR_] = {0};
+  for(size_t x = 0; x < NUM_ANCHOR_X_INDS_; x++)
+  {
+    for(size_t y= 0; y < NUM_ANCHOR_Y_INDS_; y++)
+    {
+      int base_ind = x*NUM_ANCHOR_Y_INDS_*NUM_ANCHOR_R_INDS_ + y*NUM_ANCHOR_R_INDS_;
+      flipped_anchors_dx[base_ind + 0] = ANCHOR_DX_SIZE_;
+      flipped_anchors_dy[base_ind + 0] = ANCHOR_DY_SIZE_;
+      flipped_anchors_dx[base_ind + 1] = ANCHOR_DY_SIZE_;
+      flipped_anchors_dy[base_ind + 1] = ANCHOR_DX_SIZE_;
+    }
+  }
+  for(size_t x = 0; x < NUM_ANCHOR_X_INDS_; x++)
+  {
+    for(size_t y= 0; y < NUM_ANCHOR_Y_INDS_; y++)
+    {
+      for(size_t r=0 ; r < NUM_ANCHOR_R_INDS_; r++)
+      {
+        int ind = x*NUM_ANCHOR_Y_INDS_*NUM_ANCHOR_R_INDS_ + y*NUM_ANCHOR_R_INDS_ + r;
+        box_anchors_min_x_[ind] = anchors_px[ind] - flipped_anchors_dx[ind]/2;
+        box_anchors_min_y_[ind] = anchors_py[ind] - flipped_anchors_dy[ind]/2;
+        box_anchors_max_x_[ind] = anchors_px[ind] + flipped_anchors_dx[ind]/2;
+        box_anchors_max_y_[ind] = anchors_py[ind] + flipped_anchors_dy[ind]/2;
+      }
+    }
+  }
+}
+
+void PointPillars::initTRT()
+{
+  // create a TensorRT model from the onnx model and serialize it to a stream
+  nvinfer1::IHostMemory* pfe_trt_model_stream{nullptr};
+  nvinfer1::IHostMemory* rpn_trt_model_stream{nullptr};
+  onnxToTRTModel(pfe_onnx_file_, pfe_trt_model_stream);
+  onnxToTRTModel(rpn_onnx_file_, rpn_trt_model_stream);
+  if(pfe_trt_model_stream == nullptr || rpn_trt_model_stream == nullptr)
+  {
+    ROS_ERROR("Failed to load %s or %s ", pfe_onnx_file_.c_str(), rpn_onnx_file_.c_str());
+  }
+
+  // deserialize the engine
+  pfe_runtime_ = nvinfer1::createInferRuntime(g_logger_);
+  rpn_runtime_ = nvinfer1::createInferRuntime(g_logger_);
+  if(pfe_runtime_ == nullptr || rpn_runtime_ == nullptr)
+  {
+    ROS_ERROR("Failed to create TensorRT Runtime object.");
+  }
+  pfe_engine_ = pfe_runtime_->deserializeCudaEngine(pfe_trt_model_stream->data(), pfe_trt_model_stream->size(), nullptr);
+  rpn_engine_ = rpn_runtime_->deserializeCudaEngine(rpn_trt_model_stream->data(), rpn_trt_model_stream->size(), nullptr);
+  if(pfe_engine_ == nullptr || rpn_engine_ == nullptr)
+  {
+    ROS_ERROR("Failed to create TensorRT Engine.");
+  }
+  pfe_trt_model_stream->destroy();
+  rpn_trt_model_stream->destroy();
+  pfe_context_ = pfe_engine_->createExecutionContext();
+  rpn_context_ = rpn_engine_->createExecutionContext();
+  if(pfe_context_ == nullptr || rpn_context_ == nullptr)
+  {
+    ROS_ERROR("Failed to create TensorRT Execution Context.");
+  }
+}
+
+void PointPillars::onnxToTRTModel(const std::string& model_file, // name of the onnx model
+                    nvinfer1::IHostMemory*& trt_model_stream) // output buffer for the TensorRT model
+{
+  int verbosity = (int) nvinfer1::ILogger::Severity::kWARNING;
+
+  // create the builder
+  nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(g_logger_);
+  nvinfer1::INetworkDefinition* network = builder->createNetwork();
+
+  auto parser = nvonnxparser::createParser(*network, g_logger_);
+
+  if (!parser->parseFromFile(model_file.c_str(), verbosity))
+  {
+      std::string msg("failed to parse onnx file");
+      g_logger_.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
+      exit(EXIT_FAILURE);
+  }
+
+  // Build the engine
+  builder->setMaxBatchSize(BATCH_SIZE_);
+  builder->setMaxWorkspaceSize(1 << 20);
+
+  nvinfer1::ICudaEngine* engine = builder->buildCudaEngine(*network);
+
+  parser->destroy();
+
+  // serialize the engine, then close everything down
+  trt_model_stream = engine->serialize();
+  engine->destroy();
+  network->destroy();
+  builder->destroy();
+}
+
+void PointPillars::preprocessCPU(const float* in_points_array, const int in_num_points)
+{
+  int x_coors[MAX_NUM_PILLARS_]= {0};
+  int y_coors[MAX_NUM_PILLARS_]= {0};
+  float num_points_per_pillar[MAX_NUM_PILLARS_] = {0};
+  float * pillar_x = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_];
+  float * pillar_y = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_];
+  float * pillar_z = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_];
+  float * pillar_i = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_];
+
+  float * x_coors_for_sub_shaped = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_];
+  float * y_coors_for_sub_shaped = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_];
+  float * pillar_feature_mask = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_];
+
+  float* sparse_pillar_map = new float[NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_];
+
+  preprocess_points_ptr_->preprocess(in_points_array, in_num_points,
+                                    x_coors, y_coors, num_points_per_pillar,
+                                    pillar_x, pillar_y, pillar_z, pillar_i,
+                                    x_coors_for_sub_shaped, y_coors_for_sub_shaped,
+                                    pillar_feature_mask, sparse_pillar_map, host_pillar_count_);
+
+  GPU_CHECK( cudaMemset(dev_x_coors_, 0, MAX_NUM_PILLARS_ * sizeof( int ) ) );
+  GPU_CHECK( cudaMemset(dev_y_coors_, 0, MAX_NUM_PILLARS_ * sizeof( int ) ) );
+  GPU_CHECK( cudaMemset(dev_pillar_x_,0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK( cudaMemset(dev_pillar_y_,0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK( cudaMemset(dev_pillar_z_,0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK( cudaMemset(dev_pillar_i_,0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK( cudaMemset(dev_x_coors_for_sub_shaped_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ *sizeof( float ) ) );
+  GPU_CHECK( cudaMemset(dev_y_coors_for_sub_shaped_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ *sizeof( float ) ) );
+  GPU_CHECK( cudaMemset(dev_num_points_per_pillar_, 0, MAX_NUM_PILLARS_ * sizeof( float ) ) );
+  GPU_CHECK( cudaMemset(dev_sparse_pillar_map_, 0, NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof( int ) ) );
+
+  GPU_CHECK(cudaMemcpy(dev_x_coors_, x_coors, MAX_NUM_PILLARS_*sizeof(int), cudaMemcpyHostToDevice ));
+  GPU_CHECK(cudaMemcpy(dev_y_coors_, y_coors, MAX_NUM_PILLARS_*sizeof(int), cudaMemcpyHostToDevice ));
+  GPU_CHECK(cudaMemcpy(dev_pillar_x_, pillar_x, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice ));
+  GPU_CHECK(cudaMemcpy(dev_pillar_y_, pillar_y, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice ));
+  GPU_CHECK(cudaMemcpy(dev_pillar_z_, pillar_z, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice ));
+  GPU_CHECK(cudaMemcpy(dev_pillar_i_, pillar_i, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice ));
+  GPU_CHECK(cudaMemcpy(dev_x_coors_for_sub_shaped_, x_coors_for_sub_shaped,MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice ));
+  GPU_CHECK(cudaMemcpy(dev_y_coors_for_sub_shaped_, y_coors_for_sub_shaped,MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice ));
+  GPU_CHECK(cudaMemcpy(dev_num_points_per_pillar_,num_points_per_pillar,MAX_NUM_PILLARS_*sizeof(float), cudaMemcpyHostToDevice ));
+  GPU_CHECK(cudaMemcpy(dev_pillar_feature_mask_, pillar_feature_mask, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice ));
+  GPU_CHECK(cudaMemcpy(dev_sparse_pillar_map_, sparse_pillar_map, NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof(float), cudaMemcpyHostToDevice ));
+
+  delete[] pillar_x;
+  delete[] pillar_y;
+  delete[] pillar_z;
+  delete[] pillar_i;
+  delete[] x_coors_for_sub_shaped;
+  delete[] y_coors_for_sub_shaped;
+  delete[] pillar_feature_mask;
+  delete[] sparse_pillar_map;
+}
+
+void PointPillars::preprocessGPU(const float* in_points_array, const int in_num_points)
+{
+  float * dev_points;
+  GPU_CHECK(cudaMalloc((void**)&dev_points, in_num_points*4*sizeof(float)));
+  GPU_CHECK(cudaMemcpy(dev_points, in_points_array, in_num_points*4*sizeof(float), cudaMemcpyHostToDevice ) );
+
+
+  GPU_CHECK(cudaMemset(dev_pillar_count_histo_, 0, GRID_Y_SIZE_*GRID_X_SIZE_*sizeof(int)));
+  GPU_CHECK(cudaMemset(dev_sparse_pillar_map_, 0, NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof( int ) ) );
+  GPU_CHECK(cudaMemset(dev_pillar_x_, 0, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_pillar_y_, 0, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_pillar_z_, 0, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_pillar_i_, 0, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_x_coors_, 0, MAX_NUM_PILLARS_ * sizeof( int ) ) );
+  GPU_CHECK(cudaMemset(dev_y_coors_, 0, MAX_NUM_PILLARS_ * sizeof( int ) ) );
+  GPU_CHECK(cudaMemset(dev_num_points_per_pillar_, 0, MAX_NUM_PILLARS_ * sizeof( float ) ) );
+  GPU_CHECK(cudaMemset(dev_anchor_mask_, 0, NUM_ANCHOR_*sizeof(int)));
+
+  preprocess_points_cuda_ptr_->doPreprocessPointsCuda(dev_points, in_num_points, dev_x_coors_, dev_y_coors_,
+            dev_num_points_per_pillar_, dev_pillar_x_, dev_pillar_y_, dev_pillar_z_, dev_pillar_i_,
+            dev_x_coors_for_sub_shaped_, dev_y_coors_for_sub_shaped_,
+            dev_pillar_feature_mask_, dev_sparse_pillar_map_, host_pillar_count_);
+
+  GPU_CHECK(cudaFree(dev_points));
+}
+
+void PointPillars::preprocess(const float* in_points_array, const int in_num_points)
+{
+  if(reproduce_result_mode_)
+  {
+    preprocessCPU(in_points_array, in_num_points);
+  }
+  else
+  {
+    preprocessGPU(in_points_array, in_num_points);
+  }
+}
+
+void PointPillars::doInference(const float* in_points_array, const int in_num_points,
+  std::vector<float>& out_detection, int& out_num_objects)
+{
+  preprocess(in_points_array, in_num_points);
+
+  anchor_mask_cuda_ptr_->doAnchorMaskCuda(dev_sparse_pillar_map_, dev_cumsum_along_x_, dev_cumsum_along_y_, dev_box_anchors_min_x_,
+                  dev_box_anchors_min_y_, dev_box_anchors_max_x_, dev_box_anchors_max_y_, dev_anchor_mask_);
+
+
+  cudaStream_t stream;
+  GPU_CHECK(cudaStreamCreate(&stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[0], dev_pillar_x_,              BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[1], dev_pillar_y_,              BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[2], dev_pillar_z_,              BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[3], dev_pillar_i_,              BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[4], dev_num_points_per_pillar_, BATCH_SIZE_ * MAX_NUM_PILLARS_           * sizeof(float), cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[5], dev_x_coors_for_sub_shaped_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[6], dev_y_coors_for_sub_shaped_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[7], dev_pillar_feature_mask_,   BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
+  pfe_context_->enqueue(BATCH_SIZE_, pfe_buffers_, stream, nullptr);
+
+
+
+  GPU_CHECK( cudaMemset( dev_scattered_feature_, 0, RPN_INPUT_SIZE_*sizeof(float) ) );
+  scatter_cuda_ptr_->doScatterCuda(host_pillar_count_[0], dev_x_coors_, dev_y_coors_,
+                                  (float *)pfe_buffers_[8], dev_scattered_feature_);
+
+
+  GPU_CHECK(cudaMemcpyAsync(rpn_buffers_[0], dev_scattered_feature_,   BATCH_SIZE_ * RPN_INPUT_SIZE_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
+  rpn_context_->enqueue(BATCH_SIZE_, rpn_buffers_, stream, nullptr);
+
+
+  GPU_CHECK(cudaMemset( dev_filter_count_, 0,  sizeof( int )));
+  postprocess_cuda_ptr_->doPostprocessCuda((float *)rpn_buffers_[1], (float *)rpn_buffers_[2],(float *)rpn_buffers_[3] , dev_anchor_mask_,
+                  dev_anchors_px_, dev_anchors_py_, dev_anchors_pz_,
+                  dev_anchors_dx_, dev_anchors_dy_, dev_anchors_dz_, dev_anchors_ro_,
+                  dev_filtered_box_, dev_filtered_score_, dev_filtered_dir_, dev_box_for_nms_,
+                  dev_filter_count_, out_detection, out_num_objects);
+
+  // release the stream and the buffers
+  cudaStreamDestroy(stream);
+}
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
new file mode 100755
index 00000000000..a7287b3f60e
--- /dev/null
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
@@ -0,0 +1,181 @@
+/*
+ * Copyright 2018-2019 Autoware Foundation. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//headers in STL
+#include <chrono>
+#include <cmath>
+
+//headers in PCL
+#include <pcl_conversions/pcl_conversions.h>
+#include <pcl/PCLPointCloud2.h>
+#include <pcl_ros/transforms.h>
+
+//headers in ROS
+#include <tf/transform_datatypes.h>
+
+//headers in local files
+#include "autoware_msgs/DetectedObjectArray.h"
+#include "point_pillars_ros.h"
+
+PointPillarsROS::PointPillarsROS():
+private_nh_("~"),
+has_subscribed_baselink_(false),
+NUM_POINT_FEATURE_(4),
+OUTPUT_NUM_BOX_FEATURE_(7),
+TRAINED_SENSOR_HEIGHT_(1.73),
+NORMALIZING_INTENSITY_VALUE_(255.0),
+BASELINK_FRAME_("base_link")
+{
+  private_nh_.param<bool>("baselink_support", baselink_support_, true);
+  private_nh_.param<bool>("reproduce_result_mode", reproduce_result_mode_, false);
+  private_nh_.param<float>("score_threshold", score_threshold_, 0.5);
+  private_nh_.param<float>("nms_overlap_threshold", nms_overlap_threshold_, 0.5);
+  private_nh_.param<std::string>("pfe_onnx_file", pfe_onnx_file_, "");
+  private_nh_.param<std::string>("rpn_onnx_file", rpn_onnx_file_, "");
+
+  point_pillars_ptr_.reset(new PointPillars(
+    reproduce_result_mode_,
+    score_threshold_,
+    nms_overlap_threshold_,
+    pfe_onnx_file_,
+    rpn_onnx_file_));
+}
+
+void PointPillarsROS::createROSPubSub()
+{
+  sub_points_  = nh_.subscribe<sensor_msgs::PointCloud2>("/points_raw", 1, &PointPillarsROS::pointsCallback, this);
+  pub_objects_  = nh_.advertise<autoware_msgs::DetectedObjectArray>("/detection/lidar_detector/objects", 1);
+}
+
+
+geometry_msgs::Pose PointPillarsROS::getTransformedPose(const geometry_msgs::Pose& in_pose,
+                                                        const tf::Transform& tf)
+{
+  tf::Transform transform;
+  geometry_msgs::PoseStamped out_pose;
+  transform.setOrigin(tf::Vector3(in_pose.position.x, in_pose.position.y, in_pose.position.z));
+  transform.setRotation(tf::Quaternion(in_pose.orientation.x, in_pose.orientation.y, in_pose.orientation.z, in_pose.orientation.w));
+  geometry_msgs::PoseStamped pose_out;
+  tf::poseTFToMsg(tf*transform, out_pose.pose);
+  return out_pose.pose;
+}
+
+void PointPillarsROS::pubDetectedObject(const std::vector<float>& detections, const int num_objects, const std_msgs::Header& pc_header)
+{
+  autoware_msgs::DetectedObjectArray objects;
+  objects.header = pc_header;
+  for (size_t i = 0; i < num_objects; i++)
+  {
+    autoware_msgs::DetectedObject object;
+    object.header = pc_header;
+    object.valid = true;
+    object.pose_reliable = true;
+
+    object.pose.position.x = detections[i*OUTPUT_NUM_BOX_FEATURE_+0];
+    object.pose.position.y = detections[i*OUTPUT_NUM_BOX_FEATURE_+1];
+    object.pose.position.z = detections[i*OUTPUT_NUM_BOX_FEATURE_+2];
+
+
+    float yaw = detections[i*OUTPUT_NUM_BOX_FEATURE_+6];
+    yaw = std::atan2(std::sin(yaw), std::cos(yaw));
+    geometry_msgs::Quaternion q = tf::createQuaternionMsgFromYaw(-yaw);
+    object.pose.orientation = q;
+
+    if(baselink_support_)
+    {
+      object.pose = getTransformedPose(object.pose, angle_transform_inversed_);
+    }
+
+    object.dimensions.x = detections[i*OUTPUT_NUM_BOX_FEATURE_+3];
+    object.dimensions.y = detections[i*OUTPUT_NUM_BOX_FEATURE_+4];
+    object.dimensions.z = detections[i*OUTPUT_NUM_BOX_FEATURE_+5];
+
+    objects.objects.push_back(object);
+  }
+  pub_objects_.publish(objects);
+}
+
+void PointPillarsROS::getBaselinkToLidarTF(const std::string& target_frameid)
+{
+  try
+  {
+    tf_listener_.waitForTransform(BASELINK_FRAME_, target_frameid, ros::Time(0), ros::Duration(1.0));
+    tf_listener_.lookupTransform( BASELINK_FRAME_, target_frameid, ros::Time(0), baselink2lidar_);
+    getTFInfo(baselink2lidar_);
+    has_subscribed_baselink_ = true;
+  }
+  catch (tf::TransformException ex)
+  {
+    ROS_ERROR("%s", ex.what());
+  }
+}
+
+void PointPillarsROS::getTFInfo(tf::StampedTransform baselink2lidar)
+{
+  tf::Vector3 v = baselink2lidar.getOrigin();
+  offset_z_from_trained_data_ = v.getZ() - TRAINED_SENSOR_HEIGHT_;
+
+  tf::Quaternion q = baselink2lidar_.getRotation();
+  angle_transform_ = tf::Transform(q);
+  angle_transform_inversed_ = angle_transform_.inverse();
+
+}
+
+void PointPillarsROS::pclToArray(const pcl::PointCloud<pcl::PointXYZI>::Ptr & in_pcl_pc_ptr, float* out_points_array, const float offset_z)
+{
+  for(size_t i = 0; i < in_pcl_pc_ptr->size(); i++)
+  {
+    pcl::PointXYZI point = in_pcl_pc_ptr->at(i);
+    out_points_array[i*NUM_POINT_FEATURE_ + 0] = point.x;
+    out_points_array[i*NUM_POINT_FEATURE_ + 1] = point.y;
+    out_points_array[i*NUM_POINT_FEATURE_ + 2] = point.z + offset_z;
+    out_points_array[i*NUM_POINT_FEATURE_ + 3] = float(point.intensity/NORMALIZING_INTENSITY_VALUE_);
+  }
+}
+
+
+void PointPillarsROS::pointsCallback(const sensor_msgs::PointCloud2::ConstPtr& msg)
+{
+
+  pcl::PointCloud<pcl::PointXYZI>::Ptr pcl_pc_ptr(new pcl::PointCloud<pcl::PointXYZI>);
+  pcl::fromROSMsg(*msg, *pcl_pc_ptr);
+
+  if(baselink_support_)
+  {
+    if(!has_subscribed_baselink_)
+    {
+      getBaselinkToLidarTF(msg->header.frame_id);
+    }
+    pcl_ros::transformPointCloud(*pcl_pc_ptr, *pcl_pc_ptr, angle_transform_);
+  }
+
+  float * points_array = new float[pcl_pc_ptr->size()*NUM_POINT_FEATURE_];
+  if(baselink_support_ && has_subscribed_baselink_)
+  {
+    pclToArray(pcl_pc_ptr, points_array, offset_z_from_trained_data_);
+  }
+  else
+  {
+    pclToArray(pcl_pc_ptr, points_array);
+  }
+
+  int out_num_objects = 0;
+  std::vector<float> out_detection;
+  point_pillars_ptr_->doInference(points_array, pcl_pc_ptr->size(), out_detection, out_num_objects);
+
+  delete[] points_array;
+  pubDetectedObject(out_detection, out_num_objects, msg->header);
+}
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu
new file mode 100644
index 00000000000..48862d2256e
--- /dev/null
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu
@@ -0,0 +1,219 @@
+/*
+ * Copyright 2018-2019 Autoware Foundation. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+
+//headers in CUDA
+#include <thrust/sort.h>
+
+//headers in local files
+#include "postprocess_cuda.h"
+
+__global__ void filter_kernel(const float* box_preds, const float* cls_preds, const float* dir_preds, const int* anchor_mask,
+                              const float* dev_anchors_px, const float* dev_anchors_py, const float* dev_anchors_pz,
+                              const float* dev_anchors_dx, const float* dev_anchors_dy, const float* dev_anchors_dz, const float* dev_anchors_ro,
+                              float* filtered_box, float* filtered_score, int* filtered_dir, float* box_for_nms, int* filter_count,
+                              const float FLOAT_MIN, const float FLOAT_MAX, const float score_threshold)
+{
+  // boxes ([N, 7] Tensor): normal boxes: x, y, z, w, l, h, r
+  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  //sigmoid funciton
+  float score = 1/(1+expf(-cls_preds[tid]));
+  if(anchor_mask[tid] == 1 && score > score_threshold)
+  {
+    int counter = atomicAdd(filter_count, 1);
+    float za = dev_anchors_pz[tid] + dev_anchors_dz[tid]/2;
+
+    //decode network output
+    float diagonal = sqrtf(dev_anchors_dx[tid]*dev_anchors_dx[tid] + dev_anchors_dy[tid]*dev_anchors_dy[tid]);
+    float box_px = box_preds[tid*7 + 0] * diagonal + dev_anchors_px[tid];
+    float box_py = box_preds[tid*7 + 1] * diagonal + dev_anchors_py[tid];
+    float box_pz = box_preds[tid*7 + 2] * dev_anchors_dz[tid] + za;
+    float box_dx = expf(box_preds[tid*7 + 3]) * dev_anchors_dx[tid];
+    float box_dy = expf(box_preds[tid*7 + 4]) * dev_anchors_dy[tid];
+    float box_dz = expf(box_preds[tid*7 + 5]) * dev_anchors_dz[tid];
+    float box_ro = box_preds[tid*7 + 6] + dev_anchors_ro[tid];
+
+    box_pz = box_pz - box_dz/2;
+
+    filtered_box[counter*7 + 0] = box_px;
+    filtered_box[counter*7 + 1] = box_py;
+    filtered_box[counter*7 + 2] = box_pz;
+    filtered_box[counter*7 + 3] = box_dx;
+    filtered_box[counter*7 + 4] = box_dy;
+    filtered_box[counter*7 + 5] = box_dz;
+    filtered_box[counter*7 + 6] = box_ro;
+    filtered_score[counter] = score;
+
+    int direction_label;
+    if(dir_preds[tid*2 + 0] < dir_preds[tid*2 + 1])
+    {
+      direction_label = 1;
+    }
+    else
+    {
+      direction_label = 0;
+    }
+    filtered_dir[counter] = direction_label;
+
+    //convrt normal box(normal boxes: x, y, z, w, l, h, r) to box(xmin, ymin, xmax, ymax) for nms calculation
+    //First: dx, dy -> box(x0y0, x0y1, x1y0, x1y1)
+    float corners[2*4] = {float(-0.5*box_dx), float(-0.5*box_dy),
+                          float(-0.5*box_dx), float( 0.5*box_dy),
+                          float( 0.5*box_dx), float( 0.5*box_dy),
+                          float( 0.5*box_dx), float(-0.5*box_dy)};
+
+    //Second: Rotate, Offset and convert to point(xmin. ymin, xmax, ymax)
+    float rotated_corners[2*4];
+    float offset_corners[2*4];
+    float sin_yaw = sinf(box_ro);
+    float cos_yaw = cosf(box_ro);
+    float xmin = FLOAT_MAX;
+    float ymin = FLOAT_MAX;
+    float xmax = FLOAT_MIN;
+    float ymax = FLOAT_MIN;
+    for(size_t i = 0; i < 4; i++)
+    {
+      rotated_corners[i*2 + 0] = cos_yaw*corners[i*2 + 0] - sin_yaw*corners[i*2 + 1];
+      rotated_corners[i*2 + 1] = sin_yaw*corners[i*2 + 0] + cos_yaw*corners[i*2 + 1];
+
+      offset_corners[i*2 + 0] = rotated_corners[i*2 + 0] + box_px;
+      offset_corners[i*2 + 1] = rotated_corners[i*2 + 1] + box_py;
+
+      xmin = fminf(xmin, offset_corners[i*2 + 0]);
+      ymin = fminf(ymin, offset_corners[i*2 + 1]);
+      xmax = fmaxf(xmin, offset_corners[i*2 + 0]);
+      ymax = fmaxf(ymax, offset_corners[i*2 + 1]);
+    }
+    // box_for_nms(num_box, 4)
+    box_for_nms[counter*4 + 0] = xmin;
+    box_for_nms[counter*4 + 1] = ymin;
+    box_for_nms[counter*4 + 2] = xmax;
+    box_for_nms[counter*4 + 3] = ymax;
+
+  }
+}
+
+__global__ void sort_boxes_by_indexes_kernel(float* filtered_box, int* filtered_dir, float* box_for_nms, int* indexes, int filter_count,
+                                             float* sorted_filtered_boxes, int* sorted_filtered_dir, float* sorted_box_for_nms)
+{
+  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  if(tid < filter_count)
+  {
+    int sort_index = indexes[tid];
+    sorted_filtered_boxes[tid*7 + 0] = filtered_box[sort_index*7 + 0];
+    sorted_filtered_boxes[tid*7 + 1] = filtered_box[sort_index*7 + 1];
+    sorted_filtered_boxes[tid*7 + 2] = filtered_box[sort_index*7 + 2];
+    sorted_filtered_boxes[tid*7 + 3] = filtered_box[sort_index*7 + 3];
+    sorted_filtered_boxes[tid*7 + 4] = filtered_box[sort_index*7 + 4];
+    sorted_filtered_boxes[tid*7 + 5] = filtered_box[sort_index*7 + 5];
+    sorted_filtered_boxes[tid*7 + 6] = filtered_box[sort_index*7 + 6];
+
+    sorted_filtered_dir[tid] = filtered_dir[sort_index];
+
+
+    sorted_box_for_nms[tid*4 + 0] = box_for_nms[sort_index*4 + 0];
+    sorted_box_for_nms[tid*4 + 1] = box_for_nms[sort_index*4 + 1];
+    sorted_box_for_nms[tid*4 + 2] = box_for_nms[sort_index*4 + 2];
+    sorted_box_for_nms[tid*4 + 3] = box_for_nms[sort_index*4 + 3];
+  }
+}
+
+PostprocessCuda::PostprocessCuda(const float FLOAT_MIN, const float FLOAT_MAX,
+  const int NUM_ANCHOR_X_INDS, const int NUM_ANCHOR_Y_INDS, const int NUM_ANCHOR_R_INDS,
+  const float score_threshold, const int NUM_THREADS, const float nms_overlap_threshold):
+FLOAT_MIN_(FLOAT_MIN),
+FLOAT_MAX_(FLOAT_MAX),
+NUM_ANCHOR_X_INDS_(NUM_ANCHOR_X_INDS),
+NUM_ANCHOR_Y_INDS_(NUM_ANCHOR_Y_INDS),
+NUM_ANCHOR_R_INDS_(NUM_ANCHOR_R_INDS),
+score_threshold_(score_threshold),
+NUM_THREADS_(NUM_THREADS),
+nms_overlap_threshold_(nms_overlap_threshold)
+{
+  nms_cuda_ptr_.reset(new NMSCuda(
+    NUM_THREADS,
+    nms_overlap_threshold));
+}
+
+void PostprocessCuda::doPostprocessCuda(const float* rpn_box_output, const float* rpn_cls_output, const float* rpn_dir_output,
+        int* dev_anchor_mask, const float* dev_anchors_px, const float* dev_anchors_py, const float* dev_anchors_pz,
+        const float* dev_anchors_dx, const float* dev_anchors_dy, const float* dev_anchors_dz, const float* dev_anchors_ro,
+        float* dev_filtered_box, float* dev_filtered_score, int* dev_filtered_dir, float* dev_box_for_nms, int* dev_filter_count,
+        std::vector<float>& out_detection, int& out_num_objects)
+{
+
+  filter_kernel<<<NUM_ANCHOR_X_INDS_*NUM_ANCHOR_R_INDS_, NUM_ANCHOR_Y_INDS_>>>
+                                (rpn_box_output, rpn_cls_output, rpn_dir_output, dev_anchor_mask,
+                                dev_anchors_px, dev_anchors_py, dev_anchors_pz,
+                                dev_anchors_dx, dev_anchors_dy, dev_anchors_dz, dev_anchors_ro,
+                                dev_filtered_box, dev_filtered_score, dev_filtered_dir, dev_box_for_nms, dev_filter_count,
+                                FLOAT_MIN_, FLOAT_MAX_, score_threshold_);
+
+  int host_filter_count[1];
+  GPU_CHECK( cudaMemcpy(host_filter_count, dev_filter_count, sizeof(int), cudaMemcpyDeviceToHost ) );
+  if(host_filter_count[0] == 0)
+  {
+    return;
+  }
+
+  int* dev_indexes;
+  float* dev_sorted_filtered_box,  *dev_sorted_box_for_nms;
+  int* dev_sorted_filtered_dir;
+  GPU_CHECK(cudaMalloc((void**)&dev_indexes, host_filter_count[0]*sizeof(int)));
+  GPU_CHECK(cudaMalloc((void**)&dev_sorted_filtered_box, 7*host_filter_count[0]*sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_sorted_filtered_dir, host_filter_count[0]*sizeof(int)));
+  GPU_CHECK(cudaMalloc((void**)&dev_sorted_box_for_nms, 4*host_filter_count[0]*sizeof(float)));
+  thrust::sequence(thrust::device, dev_indexes, dev_indexes + host_filter_count[0]);
+  thrust::sort_by_key(thrust::device, dev_filtered_score, dev_filtered_score + size_t(host_filter_count[0]), dev_indexes, thrust::greater<float>());
+
+
+  const int num_blocks = DIVUP(host_filter_count[0], NUM_THREADS_);
+  sort_boxes_by_indexes_kernel<<<num_blocks, NUM_THREADS_>>>(dev_filtered_box, dev_filtered_dir, dev_box_for_nms, dev_indexes, host_filter_count[0],
+                                                              dev_sorted_filtered_box, dev_sorted_filtered_dir, dev_sorted_box_for_nms);
+
+  int keep_inds[host_filter_count[0]] = {0};
+  nms_cuda_ptr_->doNMSCuda(host_filter_count[0], dev_sorted_box_for_nms, keep_inds, out_num_objects);
+
+
+  float host_filtered_box[host_filter_count[0]*7];
+  int host_filtered_dir[host_filter_count[0]];
+  GPU_CHECK( cudaMemcpy(host_filtered_box, dev_sorted_filtered_box, 7*host_filter_count[0] *sizeof(float), cudaMemcpyDeviceToHost ) );
+  GPU_CHECK( cudaMemcpy(host_filtered_dir, dev_sorted_filtered_dir,   host_filter_count[0] *sizeof(int), cudaMemcpyDeviceToHost ) );
+  for (size_t i = 0; i < out_num_objects; i++)
+  {
+    out_detection.push_back(host_filtered_box[keep_inds[i]*7+0]);
+    out_detection.push_back(host_filtered_box[keep_inds[i]*7+1]);
+    out_detection.push_back(host_filtered_box[keep_inds[i]*7+2]);
+    out_detection.push_back(host_filtered_box[keep_inds[i]*7+3]);
+    out_detection.push_back(host_filtered_box[keep_inds[i]*7+4]);
+    out_detection.push_back(host_filtered_box[keep_inds[i]*7+5]);
+
+    if(host_filtered_dir[keep_inds[i]] == 0)
+    {
+      out_detection.push_back(host_filtered_box[keep_inds[i]*7+6] + M_PI);
+    }
+    else
+    {
+      out_detection.push_back(host_filtered_box[keep_inds[i]*7+6]);
+    }
+  }
+
+  GPU_CHECK(cudaFree(dev_indexes));
+  GPU_CHECK(cudaFree(dev_sorted_filtered_box));
+  GPU_CHECK(cudaFree(dev_sorted_filtered_dir));
+  GPU_CHECK(cudaFree(dev_sorted_box_for_nms));
+}
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp
new file mode 100644
index 00000000000..9ea42f9190d
--- /dev/null
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp
@@ -0,0 +1,165 @@
+/*
+ * Copyright 2018-2019 Autoware Foundation. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//headers in STL
+#include <cmath>
+#include <iostream>
+
+//headers in local files
+#include "preprocess_points.h"
+
+PreprocessPoints::PreprocessPoints(const int MAX_NUM_PILLARS,
+                                   const int MAX_POINTS_PER_PILLAR,
+                                   const int GRID_X_SIZE,
+                                   const int GRID_Y_SIZE,
+                                   const int GRID_Z_SIZE,
+                                   const float PILLAR_X_SIZE,
+                                   const float PILLAR_Y_SIZE,
+                                   const float PILLAR_Z_SIZE,
+                                   const float MIN_X_RANGE,
+                                   const float MIN_Y_RANGE,
+                                   const float MIN_Z_RANGE,
+                                   const int NUM_INDS_FOR_SCAN)
+:
+MAX_NUM_PILLARS_(MAX_NUM_PILLARS),
+MAX_NUM_POINTS_PER_PILLAR_(MAX_POINTS_PER_PILLAR),
+GRID_X_SIZE_(GRID_X_SIZE),
+GRID_Y_SIZE_(GRID_Y_SIZE),
+GRID_Z_SIZE_(GRID_Z_SIZE),
+PILLAR_X_SIZE_(PILLAR_X_SIZE),
+PILLAR_Y_SIZE_(PILLAR_Y_SIZE),
+PILLAR_Z_SIZE_(PILLAR_Z_SIZE),
+MIN_X_RANGE_(MIN_X_RANGE),
+MIN_Y_RANGE_(MIN_Y_RANGE),
+MIN_Z_RANGE_(MIN_Z_RANGE),
+NUM_INDS_FOR_SCAN_(NUM_INDS_FOR_SCAN)
+{
+}
+
+void PreprocessPoints::initializeVariables(int* coor_to_pillaridx, float* sparse_pillar_map,
+                                         float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i,
+                                         float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped)
+{
+  for (int i = 0; i < GRID_Y_SIZE_; i++)
+  {
+    for (int j = 0; j < GRID_X_SIZE_; j++)
+    {
+      coor_to_pillaridx[i*GRID_X_SIZE_ + j] = -1;
+    }
+  }
+
+  for (int i = 0; i < NUM_INDS_FOR_SCAN_; i++)
+  {
+    for (int j = 0; j < NUM_INDS_FOR_SCAN_; j++)
+    {
+      sparse_pillar_map[i*NUM_INDS_FOR_SCAN_+j] = 0;
+    }
+  }
+
+  for(int i = 0; i < MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_; i++)
+  {
+    pillar_x[i] = 0;
+    pillar_y[i] = 0;
+    pillar_z[i] = 0;
+    pillar_i[i] = 0;
+    x_coors_for_sub_shaped[i] = 0;
+    y_coors_for_sub_shaped[i] = 0;
+  }
+}
+
+void PreprocessPoints::preprocess(const float* in_points_array, int in_num_points,
+                int* x_coors, int* y_coors, float* num_points_per_pillar,
+                float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i,
+                float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped,
+                float* pillar_feature_mask, float* sparse_pillar_map, int* host_pillar_count)
+{
+  int pillar_count = 0;
+  float x_coors_for_sub[MAX_NUM_PILLARS_]= {0};
+  float y_coors_for_sub[MAX_NUM_PILLARS_]= {0};
+  //init variables
+  int coor_to_pillaridx[GRID_Y_SIZE_*GRID_X_SIZE_];
+  initializeVariables(coor_to_pillaridx, sparse_pillar_map,
+                     pillar_x, pillar_y, pillar_z, pillar_i,
+                     x_coors_for_sub_shaped, y_coors_for_sub_shaped);
+  for(int i = 0; i < in_num_points; i++)
+  {
+    int x_coor = std::floor((in_points_array[i*4 + 0] - MIN_X_RANGE_)/PILLAR_X_SIZE_);
+    int y_coor = std::floor((in_points_array[i*4 + 1] - MIN_Y_RANGE_)/PILLAR_Y_SIZE_);
+    int z_coor = std::floor((in_points_array[i*4 + 2] - MIN_Z_RANGE_)/PILLAR_Z_SIZE_);
+    if(x_coor < 0 || x_coor >= GRID_X_SIZE_ ||
+       y_coor < 0 || y_coor >= GRID_Y_SIZE_ ||
+       z_coor < 0 || z_coor >= GRID_Z_SIZE_)
+    {
+      continue;
+    }
+    //reverse index
+    int pillar_index = coor_to_pillaridx[y_coor*GRID_X_SIZE_ + x_coor];
+    if(pillar_index == -1)
+    {
+      pillar_index = pillar_count;
+      if(pillar_count >= MAX_NUM_PILLARS_)
+      {
+        break;
+      }
+      pillar_count += 1;
+      coor_to_pillaridx[y_coor*GRID_X_SIZE_ + x_coor] = pillar_index;
+
+      //reverse index
+      y_coors[pillar_index] = std::floor(y_coor);
+      x_coors[pillar_index] = std::floor(x_coor);
+
+      // float y_offset = PILLAR_Y_SIZE_/ 2 + MIN_Y_RANGE_;
+      // float x_offset = PILLAR_X_SIZE_/ 2 + MIN_X_RANGE_;
+      // TODO Need to be modified after proper trining code
+      y_coors_for_sub[pillar_index] = std::floor(y_coor)* PILLAR_Y_SIZE_ + -39.9;
+      x_coors_for_sub[pillar_index] = std::floor(x_coor)* PILLAR_X_SIZE_ + 0.1;
+
+      //sparse pillar map
+      sparse_pillar_map[y_coor*512 + x_coor] = 1;
+    }
+    int num = num_points_per_pillar[pillar_index];
+    if (num < MAX_NUM_POINTS_PER_PILLAR_)
+    {
+      pillar_x[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*4 + 0];
+      pillar_y[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*4 + 1];
+      pillar_z[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*4 + 2];
+      pillar_i[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*4 + 3];
+      num_points_per_pillar[pillar_index] += 1;
+    }
+  }
+
+  for (int i = 0; i < MAX_NUM_PILLARS_; i++)
+  {
+    float x = x_coors_for_sub[i];
+    float y = y_coors_for_sub[i];
+    int num_points_for_a_pillar = num_points_per_pillar[i];
+    for(int j = 0; j < MAX_NUM_POINTS_PER_PILLAR_; j++)
+    {
+       x_coors_for_sub_shaped[i*MAX_NUM_POINTS_PER_PILLAR_ + j] = x;
+       y_coors_for_sub_shaped[i*MAX_NUM_POINTS_PER_PILLAR_ + j] = y;
+       if(j < num_points_for_a_pillar)
+       {
+         pillar_feature_mask[i*MAX_NUM_POINTS_PER_PILLAR_ + j] = 1.0;
+       }
+       else
+       {
+         pillar_feature_mask[i*MAX_NUM_POINTS_PER_PILLAR_ + j] = 0.0;
+       }
+    }
+  }
+  host_pillar_count[0] = pillar_count;
+
+}
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu
new file mode 100644
index 00000000000..0ee9bba397e
--- /dev/null
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu
@@ -0,0 +1,313 @@
+/*
+ * Copyright 2018-2019 Autoware Foundation. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//headers in STL
+#include <iostream>
+
+//headers in local files
+#include "common.h"
+#include "preprocess_points_cuda.h"
+
+__global__ void make_pillar_histo_kernel(
+                                     const float* dev_points,
+                                     float* dev_pillar_x_in_coors,
+                                     float* dev_pillar_y_in_coors,
+                                     float* dev_pillar_z_in_coors,
+                                     float* dev_pillar_i_in_coors,
+                                     int* pillar_count_histo,
+                                     const int num_points,
+                                     const int max_points_per_pillar,
+                                     const int GRID_X_SIZE,
+                                     const int GRID_Y_SIZE,
+                                     const int GRID_Z_SIZE,
+                                     const float MIN_X_RANGE,
+                                     const float MIN_Y_RANGE,
+                                     const float MIN_Z_RANGE,
+                                     const float PILLAR_X_SIZE,
+                                     const float PILLAR_Y_SIZE,
+                                     const float PILLAR_Z_SIZE
+                                   )
+{
+  int th_i = threadIdx.x + blockIdx.x * blockDim.x;
+  if(th_i >= num_points)
+  {
+    return;
+  }
+  int y_coor = floor((dev_points[th_i*4 + 1] - MIN_Y_RANGE)/PILLAR_Y_SIZE);
+  int x_coor = floor((dev_points[th_i*4 + 0] - MIN_X_RANGE)/PILLAR_X_SIZE);
+  int z_coor = floor((dev_points[th_i*4 + 2] - MIN_Z_RANGE)/PILLAR_Z_SIZE);
+
+  if(x_coor >= 0 && x_coor < GRID_X_SIZE &&
+     y_coor >= 0 && y_coor < GRID_Y_SIZE &&
+     z_coor >= 0 && z_coor < GRID_Z_SIZE)
+  {
+    int count = atomicAdd(&pillar_count_histo[y_coor*GRID_X_SIZE + x_coor], 1);
+    if(count < max_points_per_pillar)
+    {
+      int ind = y_coor*GRID_X_SIZE*max_points_per_pillar + x_coor*max_points_per_pillar + count;
+      dev_pillar_x_in_coors[ind] = dev_points[th_i*4 + 0];
+      dev_pillar_y_in_coors[ind] = dev_points[th_i*4 + 1];
+      dev_pillar_z_in_coors[ind] = dev_points[th_i*4 + 2];
+      dev_pillar_i_in_coors[ind] = dev_points[th_i*4 + 3];
+    }
+  }
+}
+
+
+__global__ void make_pillar_index_kernel(
+                                               int* dev_pillar_count_histo,
+                                               int* dev_counter,
+                                               int* dev_pillar_count,
+                                               int* dev_x_coors,
+                                               int* dev_y_coors,
+                                               float* dev_x_coors_for_sub,
+                                               float* dev_y_coors_for_sub,
+                                               float* dev_num_points_per_pillar,
+                                               int* dev_sparse_pillar_map,
+                                               const int max_pillars,
+                                               const int max_points_per_pillar,
+                                               const int GRID_X_SIZE,
+                                               const float PILLAR_X_SIZE,
+                                               const float PILLAR_Y_SIZE,
+                                               const int NUM_INDS_FOR_SCAN)
+{
+  int x = blockIdx.x;
+  int y = threadIdx.x;
+  int num_points_at_this_pillar = dev_pillar_count_histo[y*GRID_X_SIZE + x];
+  if(num_points_at_this_pillar == 0)
+  {
+    return;
+  }
+
+  int count = atomicAdd(dev_counter, 1);
+  if(count < max_pillars)
+  {
+    atomicAdd(dev_pillar_count, 1);
+    if(num_points_at_this_pillar >= max_points_per_pillar)
+    {
+      dev_num_points_per_pillar[count] = max_points_per_pillar;
+    }
+    else
+    {
+      dev_num_points_per_pillar[count] = num_points_at_this_pillar;
+    }
+    dev_x_coors[count] = x;
+    dev_y_coors[count] = y;
+
+    //TODO Need to be modified after making properly training model
+    // x_offset = self.vx / 2 + pc_range[0]
+    // y_offset = self.vy / 2 + pc_range[1]
+    // x_sub = coors_x.unsqueeze(1) * 0.16 + x_offset
+    // y_sub = coors_y.unsqueeze(1) * 0.16 + y_offset
+
+    dev_x_coors_for_sub[count] =  x*  PILLAR_X_SIZE + 0.1;
+    dev_y_coors_for_sub[count] =  y*  PILLAR_Y_SIZE + -39.9;
+    dev_sparse_pillar_map[y*NUM_INDS_FOR_SCAN + x] = 1;
+  }
+}
+
+
+__global__ void make_pillar_feature_kernel(
+                                          float* dev_pillar_x_in_coors,
+                                          float* dev_pillar_y_in_coors,
+                                          float* dev_pillar_z_in_coors,
+                                          float* dev_pillar_i_in_coors,
+                                          float* dev_pillar_x,
+                                          float* dev_pillar_y,
+                                          float* dev_pillar_z,
+                                          float* dev_pillar_i,
+                                          int* dev_x_coors,
+                                          int* dev_y_coors,
+                                          float* dev_num_points_per_pillar,
+                                          const int max_points,
+                                          const int GRID_X_SIZE)
+{
+  int ith_pillar = blockIdx.x;
+  int num_points_at_this_pillar = dev_num_points_per_pillar[ith_pillar];
+  int ith_point = threadIdx.x;
+  if(ith_point >= num_points_at_this_pillar)
+  {
+    return;
+  }
+  int x_ind = dev_x_coors[ith_pillar];
+  int y_ind = dev_y_coors[ith_pillar];
+  int pillar_ind = ith_pillar*max_points + ith_point;
+  int coors_ind = y_ind*GRID_X_SIZE*max_points + x_ind*max_points + ith_point;
+  dev_pillar_x[pillar_ind] = dev_pillar_x_in_coors[coors_ind];
+  dev_pillar_y[pillar_ind] = dev_pillar_y_in_coors[coors_ind];
+  dev_pillar_z[pillar_ind] = dev_pillar_z_in_coors[coors_ind];
+  dev_pillar_i[pillar_ind] = dev_pillar_i_in_coors[coors_ind];
+}
+
+__global__ void make_extra_network_input_kernel(float* dev_x_coors_for_sub,
+                                               float* dev_y_coors_for_sub,
+                                               float* dev_num_points_per_pillar,
+                                               float* dev_x_coors_for_sub_shaped,
+                                               float* dev_y_coors_for_sub_shaped,
+                                               float* dev_pillar_feature_mask,
+                                               const int MAX_NUM_POINTS_PER_PILLAR)
+{
+  int ith_pillar = blockIdx.x;
+  int ith_point = threadIdx.x;
+  float x = dev_x_coors_for_sub[ith_pillar];
+  float y = dev_y_coors_for_sub[ith_pillar];
+  int num_points_for_a_pillar = dev_num_points_per_pillar[ith_pillar];
+  int ind = ith_pillar*MAX_NUM_POINTS_PER_PILLAR + ith_point;
+  dev_x_coors_for_sub_shaped[ind] = x;
+  dev_y_coors_for_sub_shaped[ind] = y;
+
+  if(ith_point < num_points_for_a_pillar)
+  {
+    dev_pillar_feature_mask[ind] = 1.0;
+  }
+  else
+  {
+    dev_pillar_feature_mask[ind] = 0.0;
+  }
+}
+
+
+
+
+PreprocessPointsCuda::PreprocessPointsCuda(const int NUM_THREADS,
+                                           const int MAX_NUM_PILLARS,
+                                           const int MAX_POINTS_PER_PILLAR,
+                                           const int NUM_INDS_FOR_SCAN,
+                                           const int GRID_X_SIZE,
+                                           const int GRID_Y_SIZE,
+                                           const int GRID_Z_SIZE,
+                                           const float PILLAR_X_SIZE,
+                                           const float PILLAR_Y_SIZE,
+                                           const float PILLAR_Z_SIZE,
+                                           const float MIN_X_RANGE,
+                                           const float MIN_Y_RANGE,
+                                           const float MIN_Z_RANGE)
+:
+NUM_THREADS_(NUM_THREADS),
+MAX_NUM_PILLARS_(MAX_NUM_PILLARS),
+MAX_NUM_POINTS_PER_PILLAR_(MAX_POINTS_PER_PILLAR),
+NUM_INDS_FOR_SCAN_(NUM_INDS_FOR_SCAN),
+GRID_X_SIZE_(GRID_X_SIZE),
+GRID_Y_SIZE_(GRID_Y_SIZE),
+GRID_Z_SIZE_(GRID_Z_SIZE),
+PILLAR_X_SIZE_(PILLAR_X_SIZE),
+PILLAR_Y_SIZE_(PILLAR_Y_SIZE),
+PILLAR_Z_SIZE_(PILLAR_Z_SIZE),
+MIN_X_RANGE_(MIN_X_RANGE),
+MIN_Y_RANGE_(MIN_Y_RANGE),
+MIN_Z_RANGE_(MIN_Z_RANGE)
+{
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_x_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_y_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_z_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_i_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_count_histo_, GRID_Y_SIZE_*GRID_X_SIZE_*sizeof(int)));
+
+  GPU_CHECK(cudaMalloc((void**)&dev_counter_, sizeof(int)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_count_, sizeof(int)));
+  GPU_CHECK(cudaMalloc((void**)&dev_x_coors_for_sub_, MAX_NUM_PILLARS_* sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_y_coors_for_sub_, MAX_NUM_PILLARS_* sizeof(float)));
+}
+
+PreprocessPointsCuda::~PreprocessPointsCuda()
+{
+  GPU_CHECK(cudaFree(dev_pillar_x_in_coors_));
+  GPU_CHECK(cudaFree(dev_pillar_y_in_coors_));
+  GPU_CHECK(cudaFree(dev_pillar_z_in_coors_));
+  GPU_CHECK(cudaFree(dev_pillar_i_in_coors_));
+  GPU_CHECK(cudaFree(dev_pillar_count_histo_));
+
+  GPU_CHECK(cudaFree(dev_counter_));
+  GPU_CHECK(cudaFree(dev_pillar_count_));
+  GPU_CHECK(cudaFree(dev_x_coors_for_sub_));
+  GPU_CHECK(cudaFree(dev_y_coors_for_sub_));
+
+}
+
+void PreprocessPointsCuda::doPreprocessPointsCuda(const float* dev_points, const int in_num_points, int* dev_x_coors, int* dev_y_coors,
+                    float* dev_num_points_per_pillar, float* dev_pillar_x, float* dev_pillar_y, float* dev_pillar_z, float* dev_pillar_i,
+                    float* dev_x_coors_for_sub_shaped, float* dev_y_coors_for_sub_shaped,
+                    float* dev_pillar_feature_mask, int* dev_sparse_pillar_map, int* host_pillar_count)
+{
+
+  GPU_CHECK(cudaMemset(dev_pillar_count_histo_, 0, GRID_Y_SIZE_*GRID_X_SIZE_*sizeof(int)));
+  GPU_CHECK(cudaMemset(dev_counter_, 0, sizeof(int)));
+  GPU_CHECK(cudaMemset(dev_pillar_count_, 0, sizeof(int)));
+
+  int num_block = DIVUP(in_num_points, NUM_THREADS_);
+  make_pillar_histo_kernel<<<num_block, NUM_THREADS_>>>(
+                                        dev_points,
+                                        dev_pillar_x_in_coors_,
+                                        dev_pillar_y_in_coors_,
+                                        dev_pillar_z_in_coors_,
+                                        dev_pillar_i_in_coors_,
+                                        dev_pillar_count_histo_,
+                                        in_num_points,
+                                        MAX_NUM_POINTS_PER_PILLAR_,
+                                        GRID_X_SIZE_,
+                                        GRID_Y_SIZE_,
+                                        GRID_Z_SIZE_,
+                                        MIN_X_RANGE_,
+                                        MIN_Y_RANGE_,
+                                        MIN_Z_RANGE_,
+                                        PILLAR_X_SIZE_,
+                                        PILLAR_Y_SIZE_,
+                                        PILLAR_Z_SIZE_);
+
+
+  make_pillar_index_kernel<<<GRID_X_SIZE_, GRID_Y_SIZE_>>>(
+                                         dev_pillar_count_histo_,
+                                         dev_counter_,
+                                         dev_pillar_count_,
+                                         dev_x_coors,
+                                         dev_y_coors,
+                                         dev_x_coors_for_sub_,
+                                         dev_y_coors_for_sub_,
+                                         dev_num_points_per_pillar,
+                                         dev_sparse_pillar_map,
+                                         MAX_NUM_PILLARS_,
+                                         MAX_NUM_POINTS_PER_PILLAR_,
+                                         GRID_X_SIZE_,
+                                         PILLAR_X_SIZE_,
+                                         PILLAR_Y_SIZE_,
+                                         NUM_INDS_FOR_SCAN_);
+
+  GPU_CHECK(cudaMemcpy(host_pillar_count, dev_pillar_count_, sizeof(int), cudaMemcpyDeviceToHost ) );
+  make_pillar_feature_kernel<<<host_pillar_count[0], MAX_NUM_POINTS_PER_PILLAR_>>>(
+                                                                 dev_pillar_x_in_coors_,
+                                                                 dev_pillar_y_in_coors_,
+                                                                 dev_pillar_z_in_coors_,
+                                                                 dev_pillar_i_in_coors_,
+                                                                 dev_pillar_x,
+                                                                 dev_pillar_y,
+                                                                 dev_pillar_z,
+                                                                 dev_pillar_i,
+                                                                 dev_x_coors,
+                                                                 dev_y_coors,
+                                                                 dev_num_points_per_pillar,
+                                                                 MAX_NUM_POINTS_PER_PILLAR_,
+                                                                 GRID_X_SIZE_);
+
+  make_extra_network_input_kernel<<<MAX_NUM_PILLARS_, MAX_NUM_POINTS_PER_PILLAR_>>>(
+                                dev_x_coors_for_sub_,
+                                dev_y_coors_for_sub_,
+                                dev_num_points_per_pillar,
+                                dev_x_coors_for_sub_shaped,
+                                dev_y_coors_for_sub_shaped,
+                                dev_pillar_feature_mask,
+                                MAX_NUM_POINTS_PER_PILLAR_);
+
+}
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/scatter_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/scatter_cuda.cu
new file mode 100644
index 00000000000..6933f1da93e
--- /dev/null
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/scatter_cuda.cu
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2018-2019 Autoware Foundation. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//headers in local files
+#include "scatter_cuda.h"
+
+__global__ void scatter_kernel( int *x_coors, int *y_coors, float *pfe_output, float *scattered_feature,
+                                const int MAX_NUM_PILLARS_, const int GRID_X_SIZE, const int GRID_Y_SIZE)
+{
+    int i_pillar = blockIdx.x;
+    int i_feature = threadIdx.x;
+    int x_ind = x_coors[i_pillar];
+    int y_ind = y_coors[i_pillar];
+    float feature = pfe_output[i_feature*MAX_NUM_PILLARS_ + i_pillar];
+    scattered_feature[i_feature*GRID_Y_SIZE*GRID_X_SIZE + y_ind * GRID_X_SIZE + x_ind] = feature;
+}
+
+ScatterCuda::ScatterCuda(const int NUM_THREADS, const int MAX_NUM_PILLARS, const int GRID_X_SIZE, const int GRID_Y_SIZE):
+NUM_THREADS_(NUM_THREADS),
+MAX_NUM_PILLARS_(MAX_NUM_PILLARS),
+GRID_X_SIZE_(GRID_X_SIZE),
+GRID_Y_SIZE_(GRID_Y_SIZE)
+{
+}
+
+void ScatterCuda::doScatterCuda(const int pillar_count, int *x_coors, int *y_coors, float *pfe_output, float *scattered_feature)
+{
+  scatter_kernel<<<pillar_count, NUM_THREADS_>>>(x_coors, y_coors, pfe_output, scattered_feature,
+                                                MAX_NUM_PILLARS_, GRID_X_SIZE_, GRID_Y_SIZE_);
+}
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml
new file mode 100755
index 00000000000..2c79d3dbb70
--- /dev/null
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<package>
+    <name>lidar_point_pillars</name>
+    <version>1.9.1</version>
+    <description>lidar_point_pillars</description>
+
+    <maintainer email="cirpue49@yahoo.co.jp">Kosuke Murakami</maintainer>
+    <license>BSD</license>
+
+    <buildtool_depend>catkin</buildtool_depend>
+
+    <build_depend>roscpp</build_depend>
+    <build_depend>pcl_ros</build_depend>
+    <build_depend>cuda-toolkit-10-0</build_depend>
+    <build_depend>autoware_msgs</build_depend>
+
+
+    <run_depend>roscpp</run_depend>
+    <run_depend>pcl_ros</run_depend>
+    <run_depend>cuda-toolkit-10-0</run_depend>
+    <run_depend>autoware_msgs</run_depend>
+
+    <export></export>
+</package>
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
new file mode 100644
index 00000000000..0b209a92471
--- /dev/null
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
@@ -0,0 +1,229 @@
+/*
+ * Copyright 2018-2019 Autoware Foundation. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /**
+ * @file test_point_pillars.cpp
+ * @brief unit test file
+ * @author Kosuke Murakami
+ * @date 2019/02/26
+ */
+
+#include <ros/ros.h>
+#include <ros/package.h>
+
+#include <gtest/gtest.h>
+
+#include <pcl/io/pcd_io.h>
+#include <pcl/point_types.h>
+
+#include "point_pillars_ros.h"
+#include "preprocess_points.h"
+
+class TestSuite : public ::testing::Test
+{
+public:
+  TestSuite()
+  {
+  }
+  ~TestSuite()
+  {
+  }
+};
+
+class TestClass
+{
+public:
+  TestClass(const int MAX_NUM_PILLARS,
+            const int MAX_NUM_POINTS_PER_PILLAR,
+            const int GRID_X_SIZE,
+            const int GRID_Y_SIZE,
+            const int GRID_Z_SIZE,
+            const float PILLAR_X_SIZE,
+            const float PILLAR_Y_SIZE,
+            const float PILLAR_Z_SIZE,
+            const float MIN_X_RANGE,
+            const float MIN_Y_RANGE,
+            const float MIN_Z_RANGE,
+            const int NUM_INDS_FOR_SCAN);
+  const int MAX_NUM_PILLARS_;
+  const int MAX_NUM_POINTS_PER_PILLAR_;
+  const int GRID_X_SIZE_;
+  const int GRID_Y_SIZE_;
+  const int GRID_Z_SIZE_;
+  const float PILLAR_X_SIZE_;
+  const float PILLAR_Y_SIZE_;
+  const float PILLAR_Z_SIZE_;
+  const float MIN_X_RANGE_;
+  const float MIN_Y_RANGE_;
+  const float MIN_Z_RANGE_;
+  const int NUM_INDS_FOR_SCAN_;
+  void loadPoints(pcl::PointCloud<pcl::PointXYZI>::Ptr in_pcl_pc_ptr,
+    const std::string& in_file );
+  void pclToArray(const pcl::PointCloud<pcl::PointXYZI>::Ptr & in_pcl_pc_ptr, float* out_points_array);
+  void preprocess(const float* in_points_array, int in_num_points,
+    int* x_coors, int* y_coors, float* num_points_per_pillar,
+    float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i,
+    float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped,
+    float* pillar_feature_mask, float* sparse_pillar_map, int* host_pillar_count);
+private:
+
+  std::unique_ptr<PreprocessPoints> preprocess_points_ptr_;
+};
+
+TestClass::TestClass(
+          const int MAX_NUM_PILLARS,
+          const int MAX_NUM_POINTS_PER_PILLAR,
+          const int GRID_X_SIZE,
+          const int GRID_Y_SIZE,
+          const int GRID_Z_SIZE,
+          const float PILLAR_X_SIZE,
+          const float PILLAR_Y_SIZE,
+          const float PILLAR_Z_SIZE,
+          const float MIN_X_RANGE,
+          const float MIN_Y_RANGE,
+          const float MIN_Z_RANGE,
+          const int NUM_INDS_FOR_SCAN):
+MAX_NUM_PILLARS_(MAX_NUM_PILLARS),
+MAX_NUM_POINTS_PER_PILLAR_(MAX_NUM_POINTS_PER_PILLAR),
+GRID_X_SIZE_(GRID_X_SIZE),
+GRID_Y_SIZE_(GRID_Y_SIZE),
+GRID_Z_SIZE_(GRID_Z_SIZE),
+PILLAR_X_SIZE_(PILLAR_X_SIZE),
+PILLAR_Y_SIZE_(PILLAR_Y_SIZE),
+PILLAR_Z_SIZE_(PILLAR_Z_SIZE),
+MIN_X_RANGE_(MIN_X_RANGE),
+MIN_Y_RANGE_(MIN_Y_RANGE),
+MIN_Z_RANGE_(MIN_Z_RANGE),
+NUM_INDS_FOR_SCAN_(NUM_INDS_FOR_SCAN)
+{
+  preprocess_points_ptr_.reset(new PreprocessPoints(
+    MAX_NUM_PILLARS_,
+    MAX_NUM_POINTS_PER_PILLAR_,
+    GRID_X_SIZE_,
+    GRID_Y_SIZE_,
+    GRID_Z_SIZE_,
+    PILLAR_X_SIZE_,
+    PILLAR_Y_SIZE_,
+    PILLAR_Z_SIZE_,
+    MIN_X_RANGE_,
+    MIN_Y_RANGE_,
+    MIN_Z_RANGE_,
+    NUM_INDS_FOR_SCAN_
+    ));
+};
+
+void TestClass::preprocess(const float* in_points_array, int in_num_points,
+                int* x_coors, int* y_coors, float* num_points_per_pillar,
+                float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i,
+                float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped,
+                float* pillar_feature_mask, float* sparse_pillar_map, int* host_pillar_count)
+{
+  preprocess_points_ptr_->preprocess(in_points_array, in_num_points,
+                                    x_coors, y_coors, num_points_per_pillar,
+                                    pillar_x, pillar_y, pillar_z, pillar_i,
+                                    x_coors_for_sub_shaped, y_coors_for_sub_shaped,
+                                    pillar_feature_mask, sparse_pillar_map, host_pillar_count);
+}
+
+void TestClass::pclToArray(const pcl::PointCloud<pcl::PointXYZI>::Ptr & in_pcl_pc_ptr, float* out_points_array)
+{
+  for(size_t i = 0; i < in_pcl_pc_ptr->size(); i++)
+  {
+    pcl::PointXYZI point = in_pcl_pc_ptr->at(i);
+    out_points_array[i*4 + 0] = point.x;
+    out_points_array[i*4 + 1] = point.y;
+    out_points_array[i*4 + 2] = point.z;
+    out_points_array[i*4 + 3] = point.intensity;
+  }
+}
+
+void TestClass::loadPoints(pcl::PointCloud<pcl::PointXYZI>::Ptr in_pcl_pc_ptr,
+                           const std::string& in_file )
+{
+  if (pcl::io::loadPCDFile<pcl::PointXYZI> (in_file, *in_pcl_pc_ptr) == -1)
+  {
+    ROS_ERROR("Couldn't read test pcd file \n");
+  }
+}
+
+TEST(TestSuite, CheckPreprocessPointsCPU)
+{
+  TestClass test_obj(
+    12000,
+    100,
+    432,
+    496,
+    1,
+    0.16,
+    0.16,
+    4.0,
+    0,
+    -39.68,
+    -3.0,
+    512);
+
+  pcl::PointCloud<pcl::PointXYZI>::Ptr pcl_pc_ptr(new pcl::PointCloud<pcl::PointXYZI>);
+  std::string package_path = ros::package::getPath("lidar_point_pillars");
+  std::string in_file = package_path + "/test/data/1527836009720148.pcd";
+  test_obj.loadPoints(pcl_pc_ptr, in_file);
+
+  float * points_array = new float[pcl_pc_ptr->size()*4];
+  test_obj.pclToArray(pcl_pc_ptr, points_array);
+
+
+  int x_coors[test_obj.MAX_NUM_PILLARS_]= {0};
+  int y_coors[test_obj.MAX_NUM_PILLARS_]= {0};
+  float num_points_per_pillar[test_obj.MAX_NUM_PILLARS_] = {0};
+  float * pillar_x = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_];
+  float * pillar_y = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_];
+  float * pillar_z = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_];
+  float * pillar_i = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_];
+
+  float * x_coors_for_sub_shaped = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_];
+  float * y_coors_for_sub_shaped = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_];
+  float * pillar_feature_mask = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_];
+
+  float* sparse_pillar_map = new float[512*512];
+
+  int host_pillar_count[1] = {0};
+  test_obj.preprocess(points_array, pcl_pc_ptr->size(),
+                      x_coors, y_coors, num_points_per_pillar,
+                      pillar_x, pillar_y, pillar_z, pillar_i,
+                      x_coors_for_sub_shaped, y_coors_for_sub_shaped,
+                      pillar_feature_mask, sparse_pillar_map, host_pillar_count);
+  EXPECT_EQ(15, num_points_per_pillar[0]);
+  EXPECT_FLOAT_EQ(3.7517259, pillar_x[14]);
+  EXPECT_EQ(71, x_coors[100]);
+  EXPECT_EQ(177, y_coors[100]);
+  EXPECT_EQ(1, sparse_pillar_map[177*512 + 71]);
+  EXPECT_EQ(6270, host_pillar_count[0]);
+  delete[] points_array;
+  delete[] pillar_x;
+  delete[] pillar_y;
+  delete[] pillar_z;
+  delete[] pillar_i;
+  delete[] x_coors_for_sub_shaped;
+  delete[] y_coors_for_sub_shaped;
+  delete[] pillar_feature_mask;
+  delete[] sparse_pillar_map;
+}
+
+int main(int argc, char** argv)
+{
+  testing::InitGoogleTest(&argc, argv);
+  ros::init(argc, argv, "TestNode");
+  return RUN_ALL_TESTS();
+}
diff --git a/ros/src/util/packages/runtime_manager/scripts/computing.yaml b/ros/src/util/packages/runtime_manager/scripts/computing.yaml
index 61a12437e33..2aa7f0b80e9 100755
--- a/ros/src/util/packages/runtime_manager/scripts/computing.yaml
+++ b/ros/src/util/packages/runtime_manager/scripts/computing.yaml
@@ -193,6 +193,11 @@ subs :
         cmd  : roslaunch lidar_apollo_cnn_seg_detect lidar_apollo_cnn_seg_detect.launch
         param: lidar_cnn_baidu_params
 
+      - name : lidar_point_pillars
+        desc : lidar_point_pillars. Requires CUDA and TensorRT
+        cmd  : roslaunch lidar_point_pillars lidar_point_pillars.launch
+        param: lidar_point_pillars_params
+
       - name : lidar_naive_l_shape_detect
         desc : lidar_naive_l_shape_detect desc sample
         cmd  : roslaunch lidar_naive_l_shape_detect lidar_naive_l_shape_detect.launch
@@ -5406,4 +5411,65 @@ params :
       v         : 0
       cmd_param :
         dash      : ''
-        delim     : ':='
\ No newline at end of file
+        delim     : ':='
+  - name  : lidar_point_pillars_params
+    vars  :
+    - name      : input_topic
+      desc      : PointCloud topic to be fed to the network for detection.
+      label     : Input topic
+      kind  : topic
+      topic_type : sensor_msgs/PointCloud2
+      v         : /points_raw
+      cmd_param :
+        dash      : ''
+        delim     : ':='
+    - name      : baselink_support
+      desc      : Whether to use baselink to adujust current sensor setup.
+      label     : Baselink support
+      kind      : checkbox
+      v         : True
+      cmd_param :
+        dash      : ''
+        delim     : ':='
+    - name      : reproduce_result_mode
+      desc      : Whether to enable reproducible result at the cost of the runtime.
+      label     : reproduce_result_mode
+      kind      : checkbox
+      v         : False
+      cmd_param :
+        dash      : ''
+        delim     : ':='
+    - name  : score_threshold
+      desc  : Detections with a confidence value larger than this value will be displayed.
+      label : Score threshold
+      min   : 0.0
+      max   : 1.0
+      v     : 0.5
+      cmd_param :
+        dash      : ''
+        delim     : ':='
+    - name  : nms_overlap_threshold
+      desc  : Lower IOU than this value will be filtered.
+      label : NMS overlap threshold
+      min   : 0.0
+      max   : 1.0
+      v     : 0.5
+      cmd_param :
+        dash      : ''
+        delim     : ':='
+    - name     : pfe_onnx_file
+      desc     : Pillar Feature Extractor onnx file.
+      label    : 'PFE ONNX file'
+      kind     : path
+      v        : ''
+      cmd_param:
+        dash     : ''
+        delim    : ':='
+    - name     : rpn_onnx_file
+      desc     : Region Proposal Network onnx file.
+      label    : 'RPN ONNX file'
+      kind     : path
+      v        : ''
+      cmd_param:
+        dash     : ''
+        delim    : ':='

From 6b1f65ceec127fc1c4bb8af9e7ab2ed46e1d0bce Mon Sep 17 00:00:00 2001
From: cirpue49 <cirpue49@yahoo.co.jp>
Date: Tue, 26 Feb 2019 17:14:30 +0900
Subject: [PATCH 02/37] add readme.md

---
 .../packages/lidar_point_pillars/README.md    | 43 +++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md
new file mode 100644
index 00000000000..ffae0b6c011
--- /dev/null
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md
@@ -0,0 +1,43 @@
+# Point Pillars for 3D Object Detection
+
+Autoware package for Point Pillars.  ([reference paper](https://arxiv.org/abs/1812.05784))
+
+## Pre requisites
+
+CUDA Toolkit v9.0 or 10.0
+
+TensorRT : [How to install](https://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html#installing)
+
+
+## How to launch
+
+Using launch file:
+`roslaunch lidar_point_pillars lidar_point_pillars.launch pfe_onnx_file:=/PATH/TO/FILE.onnx rpn_onnx_file:=/PATH/TO/FILE.onnx input_topic:=/points_raw`
+
+
+## Parameters
+
+|Parameter| Type| Description|Default|
+----------|-----|--------|----|
+|`input_topic`|*String*|Input topic Pointcloud. Default.|`/points_raw`|
+|`baselink_support`|*Bool*|Whether to use baselink to adjust parameters. Default.|`True`|
+|`reproduce_result_mode`|*Bool*|Whether to enable reproducible result mode at the cost of the runtime. Default.|`False`|
+|`score_threshold`|*Float*|Minimum score required to include the result (0.1)|0.5|
+|`nms_overlap_threshold`|*Float*|Minimum IOU required to have when applying NMS (0.1)|0.5|
+|`pfe_onnx_file`|*String* |Path to the PFE onnx file||
+|`rpn_onnx_file`|*String* |Path to the RPN onnx file||
+
+## Outputs
+
+|Topic|Type|Description|
+|---|---|---|
+|`/detection/lidar_detector/objects`|`autoware_msgs/DetectedObjetArray`|Array of Detected Objects in Autoware format|
+
+## Notes
+
+* To display the results in Rviz `objects_visualizer` is required.
+(Launch file launches automatically this node).
+
+* Pre trained models can be downloaded from the [github repository](https://github.com/cirpue49/kitti_pretrained_pp).Notice that this model is under `BY-NC-SA 3.0` license.
+
+* If trained model comes from KITTI data, users might not be allowed to use the model for Commercial purpose.

From a9a8cf9b9af94faba149260a9bf11b9d6a5fc049 Mon Sep 17 00:00:00 2001
From: cirpue49 <cirpue49@yahoo.co.jp>
Date: Tue, 26 Feb 2019 19:01:25 +0900
Subject: [PATCH 03/37] Refactor codes

---
 .../include/point_pillars.h                   |   2 +
 .../include/postprocess_cuda.h                |   7 +-
 .../include/preprocess_points.h               |   5 +-
 .../nodes/anchor_mask_cuda.cu                 |   2 +
 .../lidar_point_pillars/nodes/nms_cuda.cu     |   1 +
 .../nodes/point_pillars.cpp                   |  11 +-
 .../nodes/postprocess_cuda.cu                 | 104 ++++++++++--------
 .../nodes/preprocess_points.cpp               |  20 ++--
 8 files changed, 90 insertions(+), 62 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
index c58fc5e58d7..b0c69147ec9 100755
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
@@ -115,6 +115,8 @@ class PointPillars
   const float ANCHOR_DX_SIZE_;
   const float ANCHOR_DY_SIZE_;
   const float ANCHOR_DZ_SIZE_;
+  const int NUM_BOX_CORNERS_;
+  const int NUM_OUTPUT_BOX_FEATURE_;
   //end initializer list
 
 
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h
index 261cdd19ed1..3768c040499 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h
@@ -42,6 +42,8 @@ class PostprocessCuda
   const float score_threshold_;
   const int NUM_THREADS_;
   const float nms_overlap_threshold_;
+  const int NUM_BOX_CORNERS_;
+  const int NUM_OUTPUT_BOX_FEATURE_;
 
   std::unique_ptr<NMSCuda> nms_cuda_ptr_;
 
@@ -56,11 +58,14 @@ class PostprocessCuda
   * @param[in] score_threshold Score threshold for filtering output
   * @param[in] NUM_THREDS Number of threads for launching cuda kernel
   * @param[in] nms_overlap_threshold IOU threshold for NMS
+  * @param[in] NUM_BOX_CORNERS Number of box's corner
+  * @param[in] NUM_OUTPUT_BOX_FEATURE_ Number of output box's feature
   * @details Captital variables never change after the compile, non-capital variables could be chaned through rosparam
   */
   PostprocessCuda(const float FLOAT_MIN, const float FLOAT_MAX,
     const int NUM_ANCHOR_X_INDS, const int NUM_ANCHOR_Y_INDS, const int NUM_ANCHOR_R_INDS,
-    const float score_threshold, const int NUM_THREADS, const float nms_overlap_threshold);
+    const float score_threshold, const int NUM_THREADS, const float nms_overlap_threshold,
+    const int NUM_BOX_CORNERS, const int NUM_OUTPUT_BOX_FEATURE);
 
     /**
     * @brief Postprocessing for the network output
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h
index bd0b54720f2..426dd50e714 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h
@@ -42,6 +42,7 @@ class PreprocessPoints
   const float MIN_Y_RANGE_;
   const float MIN_Z_RANGE_;
   const int NUM_INDS_FOR_SCAN_;
+  const int NUM_BOX_CORNERS_;
 
 
 public:
@@ -59,6 +60,7 @@ class PreprocessPoints
   * @param[in] MIN_Y_RANGE Minimum y value for pointcloud
   * @param[in] MIN_Z_RANGE Minimum z value for pointcloud
   * @param[in] NUM_INDS_FOR_SCAN Number of indexes for scan(cumsum)
+  * @param[in] NUM_BOX_CORNERS Number of box's corner
   * @details Captital variables never change after the compile
   */
   PreprocessPoints(const int MAX_NUM_PILLARS,
@@ -72,7 +74,8 @@ class PreprocessPoints
                    const float MIN_X_RANGE,
                    const float MIN_Y_RANGE,
                    const float MIN_Z_RANGE,
-                   const int NUM_INDS_FOR_SCAN);
+                   const int NUM_INDS_FOR_SCAN,
+                   const int NUM_BOX_CORNERS);
 
   /**
   * @brief CPU preprocessing for input pointcloud
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu
index 4383c1ae63c..72c7556f0c8 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu
@@ -18,6 +18,7 @@
 #include "common.h"
 #include "anchor_mask_cuda.h"
 
+//modified prefix sum code from https://www.mimuw.edu.pl/~ps209291/kgkp/slides/scan.pdf
 __global__ void scan_x(int *g_odata, int *g_idata, int n)
 {
   extern __shared__ int temp[];  // allocated on invocation
@@ -65,6 +66,7 @@ __global__ void scan_x(int *g_odata, int *g_idata, int n)
   }
 }
 
+//modified prefix sum code from https://www.mimuw.edu.pl/~ps209291/kgkp/slides/scan.pdf
 __global__ void scan_y(int *g_odata, int *g_idata, int n)
 {
   extern __shared__ int temp[];  // allocated on invocation
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu
index 7680a44ad87..d675f9e32f8 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu
@@ -33,6 +33,7 @@ __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
       min(n_boxes - row_start * block_threads, block_threads);
   const int col_size =
       min(n_boxes - col_start * block_threads, block_threads);
+  //Only can initialize with figures, not by variables
   __shared__ float block_boxes[64 * 4];
   if (threadIdx.x < col_size)
   {
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
index 4035f399ffa..767008e2788 100755
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
@@ -62,7 +62,9 @@ NUM_THREADS_(64),//if you chancge NUM_THREADS_, need to modify nms_kernel's shar
 SENSOR_HEIGHT_(1.73),
 ANCHOR_DX_SIZE_(1.6),
 ANCHOR_DY_SIZE_(3.9),
-ANCHOR_DZ_SIZE_(1.56)
+ANCHOR_DZ_SIZE_(1.56),
+NUM_BOX_CORNERS_(4),
+NUM_OUTPUT_BOX_FEATURE_(7)
 {
   anchors_px_ = new float[NUM_ANCHOR_];
   anchors_py_ = new float[NUM_ANCHOR_];
@@ -91,7 +93,8 @@ ANCHOR_DZ_SIZE_(1.56)
       MIN_X_RANGE_,
       MIN_Y_RANGE_,
       MIN_Z_RANGE_,
-      NUM_INDS_FOR_SCAN_));
+      NUM_INDS_FOR_SCAN_,
+      NUM_BOX_CORNERS_));
   }
   else
   {
@@ -141,7 +144,9 @@ ANCHOR_DZ_SIZE_(1.56)
     NUM_ANCHOR_R_INDS_,
     score_threshold_,
     NUM_THREADS_,
-    nms_overlap_threshold_
+    nms_overlap_threshold_,
+    NUM_BOX_CORNERS_,
+    NUM_OUTPUT_BOX_FEATURE_
   ));
 
   deviceMemoryMalloc();
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu
index 48862d2256e..2b5501e3541 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu
@@ -26,7 +26,8 @@ __global__ void filter_kernel(const float* box_preds, const float* cls_preds, co
                               const float* dev_anchors_px, const float* dev_anchors_py, const float* dev_anchors_pz,
                               const float* dev_anchors_dx, const float* dev_anchors_dy, const float* dev_anchors_dz, const float* dev_anchors_ro,
                               float* filtered_box, float* filtered_score, int* filtered_dir, float* box_for_nms, int* filter_count,
-                              const float FLOAT_MIN, const float FLOAT_MAX, const float score_threshold)
+                              const float FLOAT_MIN, const float FLOAT_MAX, const float score_threshold,
+                              const int NUM_BOX_CORNERS, const int NUM_OUTPUT_BOX_FEATURE)
 {
   // boxes ([N, 7] Tensor): normal boxes: x, y, z, w, l, h, r
   int tid = threadIdx.x + blockIdx.x * blockDim.x;
@@ -39,23 +40,23 @@ __global__ void filter_kernel(const float* box_preds, const float* cls_preds, co
 
     //decode network output
     float diagonal = sqrtf(dev_anchors_dx[tid]*dev_anchors_dx[tid] + dev_anchors_dy[tid]*dev_anchors_dy[tid]);
-    float box_px = box_preds[tid*7 + 0] * diagonal + dev_anchors_px[tid];
-    float box_py = box_preds[tid*7 + 1] * diagonal + dev_anchors_py[tid];
-    float box_pz = box_preds[tid*7 + 2] * dev_anchors_dz[tid] + za;
-    float box_dx = expf(box_preds[tid*7 + 3]) * dev_anchors_dx[tid];
-    float box_dy = expf(box_preds[tid*7 + 4]) * dev_anchors_dy[tid];
-    float box_dz = expf(box_preds[tid*7 + 5]) * dev_anchors_dz[tid];
-    float box_ro = box_preds[tid*7 + 6] + dev_anchors_ro[tid];
+    float box_px = box_preds[tid*NUM_OUTPUT_BOX_FEATURE + 0] * diagonal + dev_anchors_px[tid];
+    float box_py = box_preds[tid*NUM_OUTPUT_BOX_FEATURE + 1] * diagonal + dev_anchors_py[tid];
+    float box_pz = box_preds[tid*NUM_OUTPUT_BOX_FEATURE + 2] * dev_anchors_dz[tid] + za;
+    float box_dx = expf(box_preds[tid*NUM_OUTPUT_BOX_FEATURE + 3]) * dev_anchors_dx[tid];
+    float box_dy = expf(box_preds[tid*NUM_OUTPUT_BOX_FEATURE + 4]) * dev_anchors_dy[tid];
+    float box_dz = expf(box_preds[tid*NUM_OUTPUT_BOX_FEATURE + 5]) * dev_anchors_dz[tid];
+    float box_ro = box_preds[tid*NUM_OUTPUT_BOX_FEATURE + 6] + dev_anchors_ro[tid];
 
     box_pz = box_pz - box_dz/2;
 
-    filtered_box[counter*7 + 0] = box_px;
-    filtered_box[counter*7 + 1] = box_py;
-    filtered_box[counter*7 + 2] = box_pz;
-    filtered_box[counter*7 + 3] = box_dx;
-    filtered_box[counter*7 + 4] = box_dy;
-    filtered_box[counter*7 + 5] = box_dz;
-    filtered_box[counter*7 + 6] = box_ro;
+    filtered_box[counter*NUM_OUTPUT_BOX_FEATURE + 0] = box_px;
+    filtered_box[counter*NUM_OUTPUT_BOX_FEATURE + 1] = box_py;
+    filtered_box[counter*NUM_OUTPUT_BOX_FEATURE + 2] = box_pz;
+    filtered_box[counter*NUM_OUTPUT_BOX_FEATURE + 3] = box_dx;
+    filtered_box[counter*NUM_OUTPUT_BOX_FEATURE + 4] = box_dy;
+    filtered_box[counter*NUM_OUTPUT_BOX_FEATURE + 5] = box_dz;
+    filtered_box[counter*NUM_OUTPUT_BOX_FEATURE + 6] = box_ro;
     filtered_score[counter] = score;
 
     int direction_label;
@@ -77,6 +78,7 @@ __global__ void filter_kernel(const float* box_preds, const float* cls_preds, co
                           float( 0.5*box_dx), float(-0.5*box_dy)};
 
     //Second: Rotate, Offset and convert to point(xmin. ymin, xmax, ymax)
+    //cannot use variable initialization since "error: expression must have a constant value"
     float rotated_corners[2*4];
     float offset_corners[2*4];
     float sin_yaw = sinf(box_ro);
@@ -85,7 +87,7 @@ __global__ void filter_kernel(const float* box_preds, const float* cls_preds, co
     float ymin = FLOAT_MAX;
     float xmax = FLOAT_MIN;
     float ymax = FLOAT_MIN;
-    for(size_t i = 0; i < 4; i++)
+    for(size_t i = 0; i < NUM_BOX_CORNERS; i++)
     {
       rotated_corners[i*2 + 0] = cos_yaw*corners[i*2 + 0] - sin_yaw*corners[i*2 + 1];
       rotated_corners[i*2 + 1] = sin_yaw*corners[i*2 + 0] + cos_yaw*corners[i*2 + 1];
@@ -99,42 +101,44 @@ __global__ void filter_kernel(const float* box_preds, const float* cls_preds, co
       ymax = fmaxf(ymax, offset_corners[i*2 + 1]);
     }
     // box_for_nms(num_box, 4)
-    box_for_nms[counter*4 + 0] = xmin;
-    box_for_nms[counter*4 + 1] = ymin;
-    box_for_nms[counter*4 + 2] = xmax;
-    box_for_nms[counter*4 + 3] = ymax;
+    box_for_nms[counter*NUM_BOX_CORNERS + 0] = xmin;
+    box_for_nms[counter*NUM_BOX_CORNERS + 1] = ymin;
+    box_for_nms[counter*NUM_BOX_CORNERS + 2] = xmax;
+    box_for_nms[counter*NUM_BOX_CORNERS + 3] = ymax;
 
   }
 }
 
 __global__ void sort_boxes_by_indexes_kernel(float* filtered_box, int* filtered_dir, float* box_for_nms, int* indexes, int filter_count,
-                                             float* sorted_filtered_boxes, int* sorted_filtered_dir, float* sorted_box_for_nms)
+                                             float* sorted_filtered_boxes, int* sorted_filtered_dir, float* sorted_box_for_nms,
+                                             const int NUM_BOX_CORNERS, const int NUM_OUTPUT_BOX_FEATURE)
 {
   int tid = threadIdx.x + blockIdx.x * blockDim.x;
   if(tid < filter_count)
   {
     int sort_index = indexes[tid];
-    sorted_filtered_boxes[tid*7 + 0] = filtered_box[sort_index*7 + 0];
-    sorted_filtered_boxes[tid*7 + 1] = filtered_box[sort_index*7 + 1];
-    sorted_filtered_boxes[tid*7 + 2] = filtered_box[sort_index*7 + 2];
-    sorted_filtered_boxes[tid*7 + 3] = filtered_box[sort_index*7 + 3];
-    sorted_filtered_boxes[tid*7 + 4] = filtered_box[sort_index*7 + 4];
-    sorted_filtered_boxes[tid*7 + 5] = filtered_box[sort_index*7 + 5];
-    sorted_filtered_boxes[tid*7 + 6] = filtered_box[sort_index*7 + 6];
+    sorted_filtered_boxes[tid*NUM_OUTPUT_BOX_FEATURE + 0] = filtered_box[sort_index*NUM_OUTPUT_BOX_FEATURE + 0];
+    sorted_filtered_boxes[tid*NUM_OUTPUT_BOX_FEATURE + 1] = filtered_box[sort_index*NUM_OUTPUT_BOX_FEATURE + 1];
+    sorted_filtered_boxes[tid*NUM_OUTPUT_BOX_FEATURE + 2] = filtered_box[sort_index*NUM_OUTPUT_BOX_FEATURE + 2];
+    sorted_filtered_boxes[tid*NUM_OUTPUT_BOX_FEATURE + 3] = filtered_box[sort_index*NUM_OUTPUT_BOX_FEATURE + 3];
+    sorted_filtered_boxes[tid*NUM_OUTPUT_BOX_FEATURE + 4] = filtered_box[sort_index*NUM_OUTPUT_BOX_FEATURE + 4];
+    sorted_filtered_boxes[tid*NUM_OUTPUT_BOX_FEATURE + 5] = filtered_box[sort_index*NUM_OUTPUT_BOX_FEATURE + 5];
+    sorted_filtered_boxes[tid*NUM_OUTPUT_BOX_FEATURE + 6] = filtered_box[sort_index*NUM_OUTPUT_BOX_FEATURE + 6];
 
     sorted_filtered_dir[tid] = filtered_dir[sort_index];
 
 
-    sorted_box_for_nms[tid*4 + 0] = box_for_nms[sort_index*4 + 0];
-    sorted_box_for_nms[tid*4 + 1] = box_for_nms[sort_index*4 + 1];
-    sorted_box_for_nms[tid*4 + 2] = box_for_nms[sort_index*4 + 2];
-    sorted_box_for_nms[tid*4 + 3] = box_for_nms[sort_index*4 + 3];
+    sorted_box_for_nms[tid*NUM_BOX_CORNERS + 0] = box_for_nms[sort_index*NUM_BOX_CORNERS + 0];
+    sorted_box_for_nms[tid*NUM_BOX_CORNERS + 1] = box_for_nms[sort_index*NUM_BOX_CORNERS + 1];
+    sorted_box_for_nms[tid*NUM_BOX_CORNERS + 2] = box_for_nms[sort_index*NUM_BOX_CORNERS + 2];
+    sorted_box_for_nms[tid*NUM_BOX_CORNERS + 3] = box_for_nms[sort_index*NUM_BOX_CORNERS + 3];
   }
 }
 
 PostprocessCuda::PostprocessCuda(const float FLOAT_MIN, const float FLOAT_MAX,
   const int NUM_ANCHOR_X_INDS, const int NUM_ANCHOR_Y_INDS, const int NUM_ANCHOR_R_INDS,
-  const float score_threshold, const int NUM_THREADS, const float nms_overlap_threshold):
+  const float score_threshold, const int NUM_THREADS, const float nms_overlap_threshold,
+  const int NUM_BOX_CORNERS, const int NUM_OUTPUT_BOX_FEATURE):
 FLOAT_MIN_(FLOAT_MIN),
 FLOAT_MAX_(FLOAT_MAX),
 NUM_ANCHOR_X_INDS_(NUM_ANCHOR_X_INDS),
@@ -142,7 +146,9 @@ NUM_ANCHOR_Y_INDS_(NUM_ANCHOR_Y_INDS),
 NUM_ANCHOR_R_INDS_(NUM_ANCHOR_R_INDS),
 score_threshold_(score_threshold),
 NUM_THREADS_(NUM_THREADS),
-nms_overlap_threshold_(nms_overlap_threshold)
+nms_overlap_threshold_(nms_overlap_threshold),
+NUM_BOX_CORNERS_(NUM_BOX_CORNERS),
+NUM_OUTPUT_BOX_FEATURE_(NUM_OUTPUT_BOX_FEATURE)
 {
   nms_cuda_ptr_.reset(new NMSCuda(
     NUM_THREADS,
@@ -161,7 +167,8 @@ void PostprocessCuda::doPostprocessCuda(const float* rpn_box_output, const float
                                 dev_anchors_px, dev_anchors_py, dev_anchors_pz,
                                 dev_anchors_dx, dev_anchors_dy, dev_anchors_dz, dev_anchors_ro,
                                 dev_filtered_box, dev_filtered_score, dev_filtered_dir, dev_box_for_nms, dev_filter_count,
-                                FLOAT_MIN_, FLOAT_MAX_, score_threshold_);
+                                FLOAT_MIN_, FLOAT_MAX_, score_threshold_,
+                                NUM_BOX_CORNERS_, NUM_OUTPUT_BOX_FEATURE_);
 
   int host_filter_count[1];
   GPU_CHECK( cudaMemcpy(host_filter_count, dev_filter_count, sizeof(int), cudaMemcpyDeviceToHost ) );
@@ -174,16 +181,17 @@ void PostprocessCuda::doPostprocessCuda(const float* rpn_box_output, const float
   float* dev_sorted_filtered_box,  *dev_sorted_box_for_nms;
   int* dev_sorted_filtered_dir;
   GPU_CHECK(cudaMalloc((void**)&dev_indexes, host_filter_count[0]*sizeof(int)));
-  GPU_CHECK(cudaMalloc((void**)&dev_sorted_filtered_box, 7*host_filter_count[0]*sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_sorted_filtered_box, NUM_OUTPUT_BOX_FEATURE_*host_filter_count[0]*sizeof(float)));
   GPU_CHECK(cudaMalloc((void**)&dev_sorted_filtered_dir, host_filter_count[0]*sizeof(int)));
-  GPU_CHECK(cudaMalloc((void**)&dev_sorted_box_for_nms, 4*host_filter_count[0]*sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_sorted_box_for_nms, NUM_BOX_CORNERS_*host_filter_count[0]*sizeof(float)));
   thrust::sequence(thrust::device, dev_indexes, dev_indexes + host_filter_count[0]);
   thrust::sort_by_key(thrust::device, dev_filtered_score, dev_filtered_score + size_t(host_filter_count[0]), dev_indexes, thrust::greater<float>());
 
 
   const int num_blocks = DIVUP(host_filter_count[0], NUM_THREADS_);
   sort_boxes_by_indexes_kernel<<<num_blocks, NUM_THREADS_>>>(dev_filtered_box, dev_filtered_dir, dev_box_for_nms, dev_indexes, host_filter_count[0],
-                                                              dev_sorted_filtered_box, dev_sorted_filtered_dir, dev_sorted_box_for_nms);
+                                                              dev_sorted_filtered_box, dev_sorted_filtered_dir, dev_sorted_box_for_nms,
+                                                              NUM_BOX_CORNERS_, NUM_OUTPUT_BOX_FEATURE_);
 
   int keep_inds[host_filter_count[0]] = {0};
   nms_cuda_ptr_->doNMSCuda(host_filter_count[0], dev_sorted_box_for_nms, keep_inds, out_num_objects);
@@ -191,24 +199,24 @@ void PostprocessCuda::doPostprocessCuda(const float* rpn_box_output, const float
 
   float host_filtered_box[host_filter_count[0]*7];
   int host_filtered_dir[host_filter_count[0]];
-  GPU_CHECK( cudaMemcpy(host_filtered_box, dev_sorted_filtered_box, 7*host_filter_count[0] *sizeof(float), cudaMemcpyDeviceToHost ) );
-  GPU_CHECK( cudaMemcpy(host_filtered_dir, dev_sorted_filtered_dir,   host_filter_count[0] *sizeof(int), cudaMemcpyDeviceToHost ) );
+  GPU_CHECK( cudaMemcpy(host_filtered_box, dev_sorted_filtered_box, NUM_OUTPUT_BOX_FEATURE_*host_filter_count[0] *sizeof(float), cudaMemcpyDeviceToHost ) );
+  GPU_CHECK( cudaMemcpy(host_filtered_dir, dev_sorted_filtered_dir,                        host_filter_count[0] *sizeof(int), cudaMemcpyDeviceToHost ) );
   for (size_t i = 0; i < out_num_objects; i++)
   {
-    out_detection.push_back(host_filtered_box[keep_inds[i]*7+0]);
-    out_detection.push_back(host_filtered_box[keep_inds[i]*7+1]);
-    out_detection.push_back(host_filtered_box[keep_inds[i]*7+2]);
-    out_detection.push_back(host_filtered_box[keep_inds[i]*7+3]);
-    out_detection.push_back(host_filtered_box[keep_inds[i]*7+4]);
-    out_detection.push_back(host_filtered_box[keep_inds[i]*7+5]);
+    out_detection.push_back(host_filtered_box[keep_inds[i]*NUM_OUTPUT_BOX_FEATURE_+0]);
+    out_detection.push_back(host_filtered_box[keep_inds[i]*NUM_OUTPUT_BOX_FEATURE_+1]);
+    out_detection.push_back(host_filtered_box[keep_inds[i]*NUM_OUTPUT_BOX_FEATURE_+2]);
+    out_detection.push_back(host_filtered_box[keep_inds[i]*NUM_OUTPUT_BOX_FEATURE_+3]);
+    out_detection.push_back(host_filtered_box[keep_inds[i]*NUM_OUTPUT_BOX_FEATURE_+4]);
+    out_detection.push_back(host_filtered_box[keep_inds[i]*NUM_OUTPUT_BOX_FEATURE_+5]);
 
     if(host_filtered_dir[keep_inds[i]] == 0)
     {
-      out_detection.push_back(host_filtered_box[keep_inds[i]*7+6] + M_PI);
+      out_detection.push_back(host_filtered_box[keep_inds[i]*NUM_OUTPUT_BOX_FEATURE_+6] + M_PI);
     }
     else
     {
-      out_detection.push_back(host_filtered_box[keep_inds[i]*7+6]);
+      out_detection.push_back(host_filtered_box[keep_inds[i]*NUM_OUTPUT_BOX_FEATURE_+6]);
     }
   }
 
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp
index 9ea42f9190d..51fe2a07884 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp
@@ -32,7 +32,8 @@ PreprocessPoints::PreprocessPoints(const int MAX_NUM_PILLARS,
                                    const float MIN_X_RANGE,
                                    const float MIN_Y_RANGE,
                                    const float MIN_Z_RANGE,
-                                   const int NUM_INDS_FOR_SCAN)
+                                   const int NUM_INDS_FOR_SCAN,
+                                   const int NUM_BOX_CORNERS)
 :
 MAX_NUM_PILLARS_(MAX_NUM_PILLARS),
 MAX_NUM_POINTS_PER_PILLAR_(MAX_POINTS_PER_PILLAR),
@@ -45,7 +46,8 @@ PILLAR_Z_SIZE_(PILLAR_Z_SIZE),
 MIN_X_RANGE_(MIN_X_RANGE),
 MIN_Y_RANGE_(MIN_Y_RANGE),
 MIN_Z_RANGE_(MIN_Z_RANGE),
-NUM_INDS_FOR_SCAN_(NUM_INDS_FOR_SCAN)
+NUM_INDS_FOR_SCAN_(NUM_INDS_FOR_SCAN),
+NUM_BOX_CORNERS_(NUM_BOX_CORNERS)
 {
 }
 
@@ -96,9 +98,9 @@ void PreprocessPoints::preprocess(const float* in_points_array, int in_num_point
                      x_coors_for_sub_shaped, y_coors_for_sub_shaped);
   for(int i = 0; i < in_num_points; i++)
   {
-    int x_coor = std::floor((in_points_array[i*4 + 0] - MIN_X_RANGE_)/PILLAR_X_SIZE_);
-    int y_coor = std::floor((in_points_array[i*4 + 1] - MIN_Y_RANGE_)/PILLAR_Y_SIZE_);
-    int z_coor = std::floor((in_points_array[i*4 + 2] - MIN_Z_RANGE_)/PILLAR_Z_SIZE_);
+    int x_coor = std::floor((in_points_array[i*NUM_BOX_CORNERS_ + 0] - MIN_X_RANGE_)/PILLAR_X_SIZE_);
+    int y_coor = std::floor((in_points_array[i*NUM_BOX_CORNERS_ + 1] - MIN_Y_RANGE_)/PILLAR_Y_SIZE_);
+    int z_coor = std::floor((in_points_array[i*NUM_BOX_CORNERS_ + 2] - MIN_Z_RANGE_)/PILLAR_Z_SIZE_);
     if(x_coor < 0 || x_coor >= GRID_X_SIZE_ ||
        y_coor < 0 || y_coor >= GRID_Y_SIZE_ ||
        z_coor < 0 || z_coor >= GRID_Z_SIZE_)
@@ -133,10 +135,10 @@ void PreprocessPoints::preprocess(const float* in_points_array, int in_num_point
     int num = num_points_per_pillar[pillar_index];
     if (num < MAX_NUM_POINTS_PER_PILLAR_)
     {
-      pillar_x[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*4 + 0];
-      pillar_y[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*4 + 1];
-      pillar_z[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*4 + 2];
-      pillar_i[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*4 + 3];
+      pillar_x[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*NUM_BOX_CORNERS_ + 0];
+      pillar_y[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*NUM_BOX_CORNERS_ + 1];
+      pillar_z[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*NUM_BOX_CORNERS_ + 2];
+      pillar_i[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*NUM_BOX_CORNERS_ + 3];
       num_points_per_pillar[pillar_index] += 1;
     }
   }

From 49f6fba3d4b0d006aaecd561ba5d2b70f0e11f1a Mon Sep 17 00:00:00 2001
From: cirpue49 <cirpue49@yahoo.co.jp>
Date: Tue, 26 Feb 2019 19:06:52 +0900
Subject: [PATCH 04/37] apply ros clang

---
 .../include/anchor_mask_cuda.h                |  34 +-
 .../lidar_point_pillars/include/common.h      |  38 +-
 .../lidar_point_pillars/include/nms_cuda.h    |  10 +-
 .../include/point_pillars.h                   |  77 +--
 .../include/point_pillars_ros.h               |  40 +-
 .../include/postprocess_cuda.h                |  81 +--
 .../include/preprocess_points.h               |  47 +-
 .../include/preprocess_points_cuda.h          |  46 +-
 .../include/scatter_cuda.h                    |  18 +-
 .../nodes/lidar_point_pillars_node.cpp        |   4 +-
 .../nodes/point_pillars.cpp                   | 584 +++++++++---------
 .../nodes/point_pillars_ros.cpp               |  91 ++-
 .../nodes/preprocess_points.cpp               | 145 ++---
 .../test/src/test_point_pillars.cpp           | 183 ++----
 14 files changed, 642 insertions(+), 756 deletions(-)
 mode change 100755 => 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
 mode change 100755 => 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h
 mode change 100755 => 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp
 mode change 100755 => 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
 mode change 100755 => 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/anchor_mask_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/anchor_mask_cuda.h
index ccca5cb6b6d..6a8b555422c 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/anchor_mask_cuda.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/anchor_mask_cuda.h
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
- /**
- * @file anchor_mask_cuda.h
- * @brief Make anchor mask for filtering output
- * @author Kosuke Murakami
- * @date 2019/02/26
- */
+/**
+* @file anchor_mask_cuda.h
+* @brief Make anchor mask for filtering output
+* @author Kosuke Murakami
+* @date 2019/02/26
+*/
 
-#ifndef   ANCHOR_MASK_CUDA_H
-#define   ANCHOR_MASK_CUDA_H
+#ifndef ANCHOR_MASK_CUDA_H
+#define ANCHOR_MASK_CUDA_H
 
 class AnchorMaskCuda
 {
@@ -37,6 +37,7 @@ class AnchorMaskCuda
   const float PILLAR_Y_SIZE_;
   const int GRID_X_SIZE_;
   const int GRID_Y_SIZE_;
+
 public:
   /**
   * @brief Constructor
@@ -52,16 +53,9 @@ class AnchorMaskCuda
   * @param[in] GRID_Y_SIZE Number of pillars in y-coordinate
   * @details Captital variables never change after the compile
   */
-  AnchorMaskCuda(const int NUM_INDS_FOR_SCAN,
-                 const int NUM_ANCHOR_X_INDS,
-                 const int NUM_ANCHOR_Y_INDS,
-                 const int NUM_ANCHOR_R_INDS,
-                 const float MIN_X_RANGE,
-                 const float MIN_Y_RANGE,
-                 const float PILLAR_X_SIZE,
-                 const float PILLAR_Y_SIZE,
-                 const int GRID_X_SIZE,
-                 const int GRID_Y_SIZE);
+  AnchorMaskCuda(const int NUM_INDS_FOR_SCAN, const int NUM_ANCHOR_X_INDS, const int NUM_ANCHOR_Y_INDS,
+                 const int NUM_ANCHOR_R_INDS, const float MIN_X_RANGE, const float MIN_Y_RANGE,
+                 const float PILLAR_X_SIZE, const float PILLAR_Y_SIZE, const int GRID_X_SIZE, const int GRID_Y_SIZE);
 
   /**
   * @brief call cuda code for making anchor mask
@@ -77,8 +71,8 @@ class AnchorMaskCuda
   * @details dev_* means device memory. Make a mask for activating pillar occupancy area
   */
   void doAnchorMaskCuda(int* dev_sparse_pillar_map, int* dev_cumsum_along_x, int* dev_cumsum_along_y,
-    const float* dev_box_anchors_min_x, const float* dev_box_anchors_min_y,
-    const float* dev_box_anchors_max_x, const float* dev_box_anchors_max_y, int* dev_anchor_mask);
+                        const float* dev_box_anchors_min_x, const float* dev_box_anchors_min_y,
+                        const float* dev_box_anchors_max_x, const float* dev_box_anchors_max_y, int* dev_anchor_mask);
 };
 
 #endif  // ANCHOR_MASK_CUDA_H
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/common.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/common.h
index 7cf02333b4d..46e816db735 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/common.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/common.h
@@ -14,32 +14,36 @@
  * limitations under the License.
  */
 
- /**
- * @file common.h
- * @brief MACRO for CUDA codes
- * @author Kosuke Murakami
- * @date 2019/02/26
- */
+/**
+* @file common.h
+* @brief MACRO for CUDA codes
+* @author Kosuke Murakami
+* @date 2019/02/26
+*/
 
-#ifndef   COMMON_H
-#define   COMMON_H
+#ifndef COMMON_H
+#define COMMON_H
 
-//headers in STL
+// headers in STL
 #include <stdio.h>
 
-//headers in CUDA
+// headers in CUDA
 #include <cuda_runtime_api.h>
 
 #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
 
-#define GPU_CHECK(ans) { GPUAssert((ans), __FILE__, __LINE__); }
-inline void GPUAssert(cudaError_t code, const char *file, int line, bool abort=true)
+#define GPU_CHECK(ans)                                                                                                 \
+  {                                                                                                                    \
+    GPUAssert((ans), __FILE__, __LINE__);                                                                              \
+  }
+inline void GPUAssert(cudaError_t code, const char* file, int line, bool abort = true)
 {
-   if (code != cudaSuccess)
-   {
-      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
-      if (abort) exit(code);
-   }
+  if (code != cudaSuccess)
+  {
+    fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
+    if (abort)
+      exit(code);
+  }
 }
 
 #endif  // COMMON_H
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/nms_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/nms_cuda.h
index 7a34abf498b..30f2a997062 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/nms_cuda.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/nms_cuda.h
@@ -21,14 +21,14 @@
 * @date 2019/02/26
 */
 
-#ifndef   NMS_CUDA_H
-#define   NMS_CUDA_H
+#ifndef NMS_CUDA_H
+#define NMS_CUDA_H
 
-//heders in STL
+// heders in STL
 #include <vector>
 #include <iostream>
 
-//headers in local files
+// headers in local files
 #include "common.h"
 
 class NMSCuda
@@ -54,7 +54,7 @@ class NMSCuda
   * @param[out] out_num_to_keep Number of keep bounding box
   * @details Includes CUDA NMS and postprocessing for selecting box in CPU
   */
-  void doNMSCuda(const int host_filter_count, float* dev_sorted_box_for_nms, int* out_keep_inds,  int& out_num_to_keep);
+  void doNMSCuda(const int host_filter_count, float* dev_sorted_box_for_nms, int* out_keep_inds, int& out_num_to_keep);
 };
 
 #endif  // NMS_CUDA_H
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
old mode 100755
new mode 100644
index b0c69147ec9..e65a5c02433
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
@@ -14,18 +14,17 @@
  * limitations under the License.
  */
 
- /**
- * @file point_pillars.h
- * @brief Algorithm for PointPillars
- * @author Kosuke Murakami
- * @date 2019/02/26
- */
-
+/**
+* @file point_pillars.h
+* @brief Algorithm for PointPillars
+* @author Kosuke Murakami
+* @date 2019/02/26
+*/
 
 #ifndef POINTS_PILLAR_H
 #define POINTS_PILLAR_H
 
-//headers in STL
+// headers in STL
 #include <vector>
 #include <iomanip>
 #include <string>
@@ -34,11 +33,11 @@
 #include <algorithm>
 #include <limits>
 
-//headers in TensorRT
+// headers in TensorRT
 #include "NvInfer.h"
 #include "NvOnnxParser.h"
 
-//headers in local files
+// headers in local files
 #include "common.h"
 #include "preprocess_points.h"
 #include "preprocess_points_cuda.h"
@@ -46,40 +45,48 @@
 #include "scatter_cuda.h"
 #include "postprocess_cuda.h"
 
-
 // Logger for TensorRT info/warning/errors
 class Logger : public nvinfer1::ILogger
 {
 public:
-    Logger(Severity severity = Severity::kWARNING)
-        : reportableSeverity(severity)
-    {
-    }
+  Logger(Severity severity = Severity::kWARNING) : reportableSeverity(severity)
+  {
+  }
+
+  void log(Severity severity, const char* msg) override
+  {
+    // suppress messages with severity enum value greater than the reportable
+    if (severity > reportableSeverity)
+      return;
 
-    void log(Severity severity, const char* msg) override
+    switch (severity)
     {
-        // suppress messages with severity enum value greater than the reportable
-        if (severity > reportableSeverity)
-            return;
-
-        switch (severity)
-        {
-        case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break;
-        case Severity::kERROR: std::cerr << "ERROR: "; break;
-        case Severity::kWARNING: std::cerr << "WARNING: "; break;
-        case Severity::kINFO: std::cerr << "INFO: "; break;
-        default: std::cerr << "UNKNOWN: "; break;
-        }
-        std::cerr << msg << std::endl;
+      case Severity::kINTERNAL_ERROR:
+        std::cerr << "INTERNAL_ERROR: ";
+        break;
+      case Severity::kERROR:
+        std::cerr << "ERROR: ";
+        break;
+      case Severity::kWARNING:
+        std::cerr << "WARNING: ";
+        break;
+      case Severity::kINFO:
+        std::cerr << "INFO: ";
+        break;
+      default:
+        std::cerr << "UNKNOWN: ";
+        break;
     }
+    std::cerr << msg << std::endl;
+  }
 
-    Severity reportableSeverity;
+  Severity reportableSeverity;
 };
 
 class PointPillars
 {
 private:
-  //initize in initializer list
+  // initize in initializer list
   const bool reproduce_result_mode_;
   const float score_threshold_;
   const float nms_overlap_threshold_;
@@ -117,8 +124,7 @@ class PointPillars
   const float ANCHOR_DZ_SIZE_;
   const int NUM_BOX_CORNERS_;
   const int NUM_OUTPUT_BOX_FEATURE_;
-  //end initializer list
-
+  // end initializer list
 
   int host_pillar_count_[1];
 
@@ -182,7 +188,6 @@ class PointPillars
   float* dev_box_for_nms_;
   int* dev_filter_count_;
 
-
   std::unique_ptr<PreprocessPoints> preprocess_points_ptr_;
   std::unique_ptr<PreprocessPointsCuda> preprocess_points_cuda_ptr_;
   std::unique_ptr<AnchorMaskCuda> anchor_mask_cuda_ptr_;
@@ -285,8 +290,8 @@ class PointPillars
   * @param[in] out_num_objects Number of output bounding box
   * @details This is interface for the algorithm
   */
-  void doInference(const float* in_points_array, const int in_num_points,
-    std::vector<float>& out_detection, int& out_num_objects);
+  void doInference(const float* in_points_array, const int in_num_points, std::vector<float>& out_detection,
+                   int& out_num_objects);
 };
 
 #endif  // POINTS_PILLAR_H
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h
old mode 100755
new mode 100644
index e0b1e1bd53c..8a40722a896
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h
@@ -14,38 +14,37 @@
  * limitations under the License.
  */
 
- /**
- * @file point_pillars_ros.h
- * @brief ROS interface for PointPillars
- * @author Kosuke Murakami
- * @date 2019/02/26
- */
+/**
+* @file point_pillars_ros.h
+* @brief ROS interface for PointPillars
+* @author Kosuke Murakami
+* @date 2019/02/26
+*/
 
 #ifndef POINTS_PILLAR_ROS_H
 #define POINTS_PILLAR_ROS_H
 
-//headers in STL
+// headers in STL
 #include <memory>
 #include <vector>
 
-//headers in ROS
+// headers in ROS
 #include <ros/ros.h>
 #include <sensor_msgs/PointCloud2.h>
 #include <tf/transform_listener.h>
 
-//headers in PCL
+// headers in PCL
 #include <pcl/io/pcd_io.h>
 
-//headers in local files
+// headers in local files
 #include "point_pillars.h"
 
-
 class PointPillarsROS
 {
 private:
   friend class TestClass;
 
-  //initializer list
+  // initializer list
   ros::NodeHandle private_nh_;
   bool has_subscribed_baselink_;
   const int NUM_POINT_FEATURE_;
@@ -53,16 +52,16 @@ class PointPillarsROS
   const float TRAINED_SENSOR_HEIGHT_;
   const float NORMALIZING_INTENSITY_VALUE_;
   const std::string BASELINK_FRAME_;
-  //end initializer list
+  // end initializer list
 
-  //rosparam
+  // rosparam
   bool baselink_support_;
   bool reproduce_result_mode_;
   float score_threshold_;
   float nms_overlap_threshold_;
   std::string pfe_onnx_file_;
   std::string rpn_onnx_file_;
-  //end rosparam
+  // end rosparam
 
   ros::NodeHandle nh_;
   ros::Subscriber sub_points_;
@@ -73,8 +72,6 @@ class PointPillarsROS
   tf::Transform angle_transform_;
   tf::Transform angle_transform_inversed_;
 
-
-
   float offset_z_from_trained_data_;
 
   std::unique_ptr<PointPillars> point_pillars_ptr_;
@@ -100,8 +97,7 @@ class PointPillarsROS
   * @return geometry_msgs::Pose Transformed pose
   * @details Calculate transformed pose
   */
-  geometry_msgs::Pose getTransformedPose(const geometry_msgs::Pose& in_pose,
-                                         const tf::Transform& tf);
+  geometry_msgs::Pose getTransformedPose(const geometry_msgs::Pose& in_pose, const tf::Transform& tf);
 
   /**
   * @brief callback for pointcloud
@@ -117,7 +113,8 @@ class PointPillarsROS
   * @param[in] offset_z (default: 1.0) when using baselink_support, offset height based on current sensor configuration
   * @details convert pcl points to c++ array, plus offset z if it is necessary
   */
-  void pclToArray(const pcl::PointCloud<pcl::PointXYZI>::Ptr & in_pcl_pc_ptr, float* out_points_array, const float offset_z = 0);
+  void pclToArray(const pcl::PointCloud<pcl::PointXYZI>::Ptr& in_pcl_pc_ptr, float* out_points_array,
+                  const float offset_z = 0);
 
   /**
   * @brief publish DetectedObject
@@ -126,7 +123,8 @@ class PointPillarsROS
   * @param[in] pc_header Header from pointcloud
   * @details Convert std::vector to DetectedObject, and publish them
   */
-  void pubDetectedObject(const std::vector<float>& detections, const int num_objects, const std_msgs::Header& pc_header);
+  void pubDetectedObject(const std::vector<float>& detections, const int num_objects,
+                         const std_msgs::Header& pc_header);
 
 public:
   PointPillarsROS();
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h
index 3768c040499..b60a427ca3f 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h
@@ -14,21 +14,21 @@
  * limitations under the License.
  */
 
- /**
- * @file postprocess_cuda.h
- * @brief Postprocess for network output
- * @author Kosuke Murakami
- * @date 2019/02/26
- */
+/**
+* @file postprocess_cuda.h
+* @brief Postprocess for network output
+* @author Kosuke Murakami
+* @date 2019/02/26
+*/
 
-#ifndef   POSTPROCESS_CUDA_H
-#define   POSTPROCESS_CUDA_H
+#ifndef POSTPROCESS_CUDA_H
+#define POSTPROCESS_CUDA_H
 
-//headers in STL
+// headers in STL
 #include <memory>
 #include <iostream>
 
-//headers in local files
+// headers in local files
 #include "nms_cuda.h"
 
 class PostprocessCuda
@@ -62,38 +62,39 @@ class PostprocessCuda
   * @param[in] NUM_OUTPUT_BOX_FEATURE_ Number of output box's feature
   * @details Captital variables never change after the compile, non-capital variables could be chaned through rosparam
   */
-  PostprocessCuda(const float FLOAT_MIN, const float FLOAT_MAX,
-    const int NUM_ANCHOR_X_INDS, const int NUM_ANCHOR_Y_INDS, const int NUM_ANCHOR_R_INDS,
-    const float score_threshold, const int NUM_THREADS, const float nms_overlap_threshold,
-    const int NUM_BOX_CORNERS, const int NUM_OUTPUT_BOX_FEATURE);
+  PostprocessCuda(const float FLOAT_MIN, const float FLOAT_MAX, const int NUM_ANCHOR_X_INDS,
+                  const int NUM_ANCHOR_Y_INDS, const int NUM_ANCHOR_R_INDS, const float score_threshold,
+                  const int NUM_THREADS, const float nms_overlap_threshold, const int NUM_BOX_CORNERS,
+                  const int NUM_OUTPUT_BOX_FEATURE);
 
-    /**
-    * @brief Postprocessing for the network output
-    * @param[in] rpn_box_output Box prediction from the network output
-    * @param[in] rpn_cls_output Class prediction from the network output
-    * @param[in] rpn_dir_output Direction prediction from the network output
-    * @param[in] dev_anchor_mask Anchor mask for filtering
-    * @param[in] dev_anchors_px X-coordinate value for corresponding anchor
-    * @param[in] dev_anchors_py Y-coordinate value for corresponding anchor
-    * @param[in] dev_anchors_pz Z-coordinate value for corresponding anchor
-    * @param[in] dev_anchors_dx X-dimension value for corresponding anchor
-    * @param[in] dev_anchors_dy Y-dimension value for corresponding anchor
-    * @param[in] dev_anchors_dz Z-dimension value for corresponding anchor
-    * @param[in] dev_anchors_ro Rotation value for corresponding anchor
-    * @param[in] dev_filtered_box Filtered box prediction
-    * @param[in] dev_filtered_score Filtered score prediction
-    * @param[in] dev_filtered_dir Filtered direction prediction
-    * @param[in] dev_box_for_nms Decoded box from pose and dimension to min_x min_y max_x max_y represenation for nms
-    * @param[in] dev_filter_count The number of filtered output
-    * @param[out] out_detection Output bounding boxes
-    * @param[out] out_num_objects The number of output bounding boxes
-    * @details dev_* represents device memory allocated variables
-    */
+  /**
+  * @brief Postprocessing for the network output
+  * @param[in] rpn_box_output Box prediction from the network output
+  * @param[in] rpn_cls_output Class prediction from the network output
+  * @param[in] rpn_dir_output Direction prediction from the network output
+  * @param[in] dev_anchor_mask Anchor mask for filtering
+  * @param[in] dev_anchors_px X-coordinate value for corresponding anchor
+  * @param[in] dev_anchors_py Y-coordinate value for corresponding anchor
+  * @param[in] dev_anchors_pz Z-coordinate value for corresponding anchor
+  * @param[in] dev_anchors_dx X-dimension value for corresponding anchor
+  * @param[in] dev_anchors_dy Y-dimension value for corresponding anchor
+  * @param[in] dev_anchors_dz Z-dimension value for corresponding anchor
+  * @param[in] dev_anchors_ro Rotation value for corresponding anchor
+  * @param[in] dev_filtered_box Filtered box prediction
+  * @param[in] dev_filtered_score Filtered score prediction
+  * @param[in] dev_filtered_dir Filtered direction prediction
+  * @param[in] dev_box_for_nms Decoded box from pose and dimension to min_x min_y max_x max_y represenation for nms
+  * @param[in] dev_filter_count The number of filtered output
+  * @param[out] out_detection Output bounding boxes
+  * @param[out] out_num_objects The number of output bounding boxes
+  * @details dev_* represents device memory allocated variables
+  */
   void doPostprocessCuda(const float* rpn_box_output, const float* rpn_cls_output, const float* rpn_dir_output,
-          int* dev_anchor_mask, const float* dev_anchors_px, const float* dev_anchors_py, const float* dev_anchors_pz,
-          const float* dev_anchors_dx, const float* dev_anchors_dy, const float* dev_anchors_dz, const float* dev_anchors_ro,
-          float* dev_filtered_box, float* dev_filtered_score, int* dev_filtered_dir, float* dev_box_for_nms, int* dev_filter_count,
-          std::vector<float>& out_detection, int& out_num_objects);
+                         int* dev_anchor_mask, const float* dev_anchors_px, const float* dev_anchors_py,
+                         const float* dev_anchors_pz, const float* dev_anchors_dx, const float* dev_anchors_dy,
+                         const float* dev_anchors_dz, const float* dev_anchors_ro, float* dev_filtered_box,
+                         float* dev_filtered_score, int* dev_filtered_dir, float* dev_box_for_nms,
+                         int* dev_filter_count, std::vector<float>& out_detection, int& out_num_objects);
 };
 
 #endif  // POSTPROCESS_CUDA_H
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h
index 426dd50e714..b8b361db22e 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h
@@ -14,18 +14,16 @@
  * limitations under the License.
  */
 
- /**
- * @file preprocess_points.h
- * @brief CPU version of preprocess points
- * @author Kosuke Murakami
- * @date 2019/02/26
- */
+/**
+* @file preprocess_points.h
+* @brief CPU version of preprocess points
+* @author Kosuke Murakami
+* @date 2019/02/26
+*/
 
 #ifndef PREPROCESS_POINTS_H
 #define PREPROCESS_POINTS_H
 
-
-
 class PreprocessPoints
 {
 private:
@@ -44,7 +42,6 @@ class PreprocessPoints
   const int NUM_INDS_FOR_SCAN_;
   const int NUM_BOX_CORNERS_;
 
-
 public:
   /**
   * @brief Constructor
@@ -63,19 +60,10 @@ class PreprocessPoints
   * @param[in] NUM_BOX_CORNERS Number of box's corner
   * @details Captital variables never change after the compile
   */
-  PreprocessPoints(const int MAX_NUM_PILLARS,
-                   const int MAX_POINTS_PER_PILLAR,
-                   const int GRID_X_SIZE,
-                   const int GRID_Y_SIZE,
-                   const int GRID_Z_SIZE,
-                   const float PILLAR_X_SIZE,
-                   const float PILLAR_Y_SIZE,
-                   const float PILLAR_Z_SIZE,
-                   const float MIN_X_RANGE,
-                   const float MIN_Y_RANGE,
-                   const float MIN_Z_RANGE,
-                   const int NUM_INDS_FOR_SCAN,
-                   const int NUM_BOX_CORNERS);
+  PreprocessPoints(const int MAX_NUM_PILLARS, const int MAX_POINTS_PER_PILLAR, const int GRID_X_SIZE,
+                   const int GRID_Y_SIZE, const int GRID_Z_SIZE, const float PILLAR_X_SIZE, const float PILLAR_Y_SIZE,
+                   const float PILLAR_Z_SIZE, const float MIN_X_RANGE, const float MIN_Y_RANGE, const float MIN_Z_RANGE,
+                   const int NUM_INDS_FOR_SCAN, const int NUM_BOX_CORNERS);
 
   /**
   * @brief CPU preprocessing for input pointcloud
@@ -95,11 +83,10 @@ class PreprocessPoints
   * @param[in] host_pillar_count The numnber of valid pillars for the input pointcloud
   * @details Convert pointcloud to pillar representation
   */
-  void preprocess(const float* in_points_array, int in_num_points,
-                  int* x_coors, int* y_coors, float* num_points_per_pillar,
-                  float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i,
-                  float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped,
-                  float* pillar_feature_mask, float* sparse_pillar_map, int* host_pillar_count);
+  void preprocess(const float* in_points_array, int in_num_points, int* x_coors, int* y_coors,
+                  float* num_points_per_pillar, float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i,
+                  float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped, float* pillar_feature_mask,
+                  float* sparse_pillar_map, int* host_pillar_count);
 
   /**
   * @brief Initializing variables for preprocessing
@@ -113,9 +100,9 @@ class PreprocessPoints
   * @param[in] y_coors_for_sub_shaped Used for y substraction in the network
   * @details Initializeing input arguments with certain values
   */
-  void initializeVariables(int* coor_to_pillaridx, float* sparse_pillar_map,
-                           float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i,
-                           float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped);
+  void initializeVariables(int* coor_to_pillaridx, float* sparse_pillar_map, float* pillar_x, float* pillar_y,
+                           float* pillar_z, float* pillar_i, float* x_coors_for_sub_shaped,
+                           float* y_coors_for_sub_shaped);
 };
 
 #endif  // PREPROCESS_POINTS_H
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points_cuda.h
index 7a4dd8420d2..710a14a87c3 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points_cuda.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points_cuda.h
@@ -14,22 +14,20 @@
  * limitations under the License.
  */
 
- /**
- * @file preprocess_points_cuda.h
- * @brief GPU version of preprocess points
- * @author Kosuke Murakami
- * @date 2019/02/26
- */
-
-
-#ifndef   PREPROCESS_POINTS_CUDA_H
-#define   PREPROCESS_POINTS_CUDA_H
+/**
+* @file preprocess_points_cuda.h
+* @brief GPU version of preprocess points
+* @author Kosuke Murakami
+* @date 2019/02/26
+*/
 
+#ifndef PREPROCESS_POINTS_CUDA_H
+#define PREPROCESS_POINTS_CUDA_H
 
 class PreprocessPointsCuda
 {
 private:
-  //initialzer list
+  // initialzer list
   const int NUM_THREADS_;
   const int MAX_NUM_PILLARS_;
   const int MAX_NUM_POINTS_PER_PILLAR_;
@@ -43,7 +41,7 @@ class PreprocessPointsCuda
   const float MIN_X_RANGE_;
   const float MIN_Y_RANGE_;
   const float MIN_Z_RANGE_;
-  //end initalizer list
+  // end initalizer list
 
   float* dev_pillar_x_in_coors_;
   float* dev_pillar_y_in_coors_;
@@ -74,19 +72,10 @@ class PreprocessPointsCuda
   * @param[in] MIN_Z_RANGE Minimum z value for pointcloud
   * @details Captital variables never change after the compile
   */
-  PreprocessPointsCuda(const int NUM_THREADS,
-                       const int MAX_NUM_PILLARS,
-                       const int MAX_POINTS_PER_PILLAR,
-                       const int NUM_INDS_FOR_SCAN,
-                       const int GRID_X_SIZE,
-                       const int GRID_Y_SIZE,
-                       const int GRID_Z_SIZE,
-                       const float PILLAR_X_SIZE,
-                       const float PILLAR_Y_SIZE,
-                       const float PILLAR_Z_SIZE,
-                       const float MIN_X_RANGE,
-                       const float MIN_Y_RANGE,
-                       const float MIN_Z_RANGE);
+  PreprocessPointsCuda(const int NUM_THREADS, const int MAX_NUM_PILLARS, const int MAX_POINTS_PER_PILLAR,
+                       const int NUM_INDS_FOR_SCAN, const int GRID_X_SIZE, const int GRID_Y_SIZE, const int GRID_Z_SIZE,
+                       const float PILLAR_X_SIZE, const float PILLAR_Y_SIZE, const float PILLAR_Z_SIZE,
+                       const float MIN_X_RANGE, const float MIN_Y_RANGE, const float MIN_Z_RANGE);
   ~PreprocessPointsCuda();
 
   /**
@@ -108,9 +97,10 @@ class PreprocessPointsCuda
   * @details Convert pointcloud to pillar representation
   */
   void doPreprocessPointsCuda(const float* dev_points, const int in_num_points, int* dev_x_coors, int* dev_y_coors,
-                      float* dev_num_points_per_pillar, float* dev_pillar_x, float* dev_pillar_y, float* dev_pillar_z, float* dev_pillar_i,
-                      float* dev_x_coors_for_sub_shaped, float* dev_y_coors_for_sub_shaped,
-                      float* dev_pillar_feature_mask, int* dev_sparse_pillar_map, int* host_pillar_count);
+                              float* dev_num_points_per_pillar, float* dev_pillar_x, float* dev_pillar_y,
+                              float* dev_pillar_z, float* dev_pillar_i, float* dev_x_coors_for_sub_shaped,
+                              float* dev_y_coors_for_sub_shaped, float* dev_pillar_feature_mask,
+                              int* dev_sparse_pillar_map, int* host_pillar_count);
 };
 
 #endif  // PREPROCESS_POINTS_CUDA_H
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/scatter_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/scatter_cuda.h
index 24e4f0b79dc..1c955a3a65d 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/scatter_cuda.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/scatter_cuda.h
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
- /**
- * @file scatter_cuda.h
- * @brief CUDA code for scatter operation
- * @author Kosuke Murakami
- * @date 2019/02/26
- */
+/**
+* @file scatter_cuda.h
+* @brief CUDA code for scatter operation
+* @author Kosuke Murakami
+* @date 2019/02/26
+*/
 
-#ifndef   SCATTERCUDA_H
-#define   SCATTERCUDA_H
+#ifndef SCATTERCUDA_H
+#define SCATTERCUDA_H
 
 class ScatterCuda
 {
@@ -52,7 +52,7 @@ class ScatterCuda
   * @param[out] scattered_feature Gridmap representation for pillars' feature
   * @details Allocate pillars in gridmap based on index(coordinates) information
   */
-  void doScatterCuda(const int pillar_count, int *x_coors, int *y_coors, float *pfe_output, float *scattered_feature);
+  void doScatterCuda(const int pillar_count, int* x_coors, int* y_coors, float* pfe_output, float* scattered_feature);
 };
 
 #endif  // SCATTERCUDA_H
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp
old mode 100755
new mode 100644
index 36617dd472b..8faa0bede56
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-//headers in STL
+// headers in STL
 #include <iostream>
 
-//headers in local files
+// headers in local files
 #include "point_pillars_ros.h"
 
 int main(int argc, char** argv)
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
old mode 100755
new mode 100644
index 767008e2788..117fb5c2a5e
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
@@ -14,57 +14,57 @@
  * limitations under the License.
  */
 
-
-//headers in STL
+// headers in STL
 #include <chrono>
 #include <iostream>
 
-//headers in ROS
+// headers in ROS
 #include <ros/console.h>
 
-//headers in local files
+// headers in local files
 #include "point_pillars.h"
 
-
-PointPillars::PointPillars(const bool reproduce_result_mode, const float score_threshold, const float nms_overlap_threshold,
-                           const std::string pfe_onnx_file, const std::string rpn_onnx_file):
-reproduce_result_mode_(reproduce_result_mode),
-score_threshold_(score_threshold),
-nms_overlap_threshold_(nms_overlap_threshold),
-pfe_onnx_file_(pfe_onnx_file),
-rpn_onnx_file_(rpn_onnx_file),
-MAX_NUM_PILLARS_(12000),
-MAX_NUM_POINTS_PER_PILLAR_(100),
-PFE_OUTPUT_SIZE_(MAX_NUM_PILLARS_*64),
-GRID_X_SIZE_(432),
-GRID_Y_SIZE_(496),
-GRID_Z_SIZE_(1),
-RPN_INPUT_SIZE_(64*GRID_X_SIZE_*GRID_Y_SIZE_),
-NUM_ANCHOR_X_INDS_(GRID_X_SIZE_*0.5),
-NUM_ANCHOR_Y_INDS_(GRID_Y_SIZE_*0.5),
-NUM_ANCHOR_R_INDS_(2),
-NUM_ANCHOR_(NUM_ANCHOR_X_INDS_*NUM_ANCHOR_Y_INDS_*NUM_ANCHOR_R_INDS_),
-RPN_BOX_OUTPUT_SIZE_(NUM_ANCHOR_*7) ,
-RPN_CLS_OUTPUT_SIZE_(NUM_ANCHOR_),
-RPN_DIR_OUTPUT_SIZE_(NUM_ANCHOR_*2),
-PILLAR_X_SIZE_(0.16),
-PILLAR_Y_SIZE_(0.16),
-PILLAR_Z_SIZE_(4.0),
-MIN_X_RANGE_(0.0),
-MIN_Y_RANGE_(-39.68),
-MIN_Z_RANGE_(-3.0),
-MAX_X_RANGE_(69.12),
-MAX_Y_RANGE_(39.68),
-MAX_Z_RANGE_(1),
-BATCH_SIZE_(1),
-NUM_INDS_FOR_SCAN_(512),
-NUM_THREADS_(64),//if you chancge NUM_THREADS_, need to modify nms_kernel's shared mempry size
-SENSOR_HEIGHT_(1.73),
-ANCHOR_DX_SIZE_(1.6),
-ANCHOR_DY_SIZE_(3.9),
-ANCHOR_DZ_SIZE_(1.56),
-NUM_BOX_CORNERS_(4),
-NUM_OUTPUT_BOX_FEATURE_(7)
+PointPillars::PointPillars(const bool reproduce_result_mode, const float score_threshold,
+                           const float nms_overlap_threshold, const std::string pfe_onnx_file,
+                           const std::string rpn_onnx_file)
+  : reproduce_result_mode_(reproduce_result_mode)
+  , score_threshold_(score_threshold)
+  , nms_overlap_threshold_(nms_overlap_threshold)
+  , pfe_onnx_file_(pfe_onnx_file)
+  , rpn_onnx_file_(rpn_onnx_file)
+  , MAX_NUM_PILLARS_(12000)
+  , MAX_NUM_POINTS_PER_PILLAR_(100)
+  , PFE_OUTPUT_SIZE_(MAX_NUM_PILLARS_ * 64)
+  , GRID_X_SIZE_(432)
+  , GRID_Y_SIZE_(496)
+  , GRID_Z_SIZE_(1)
+  , RPN_INPUT_SIZE_(64 * GRID_X_SIZE_ * GRID_Y_SIZE_)
+  , NUM_ANCHOR_X_INDS_(GRID_X_SIZE_ * 0.5)
+  , NUM_ANCHOR_Y_INDS_(GRID_Y_SIZE_ * 0.5)
+  , NUM_ANCHOR_R_INDS_(2)
+  , NUM_ANCHOR_(NUM_ANCHOR_X_INDS_ * NUM_ANCHOR_Y_INDS_ * NUM_ANCHOR_R_INDS_)
+  , RPN_BOX_OUTPUT_SIZE_(NUM_ANCHOR_ * 7)
+  , RPN_CLS_OUTPUT_SIZE_(NUM_ANCHOR_)
+  , RPN_DIR_OUTPUT_SIZE_(NUM_ANCHOR_ * 2)
+  , PILLAR_X_SIZE_(0.16)
+  , PILLAR_Y_SIZE_(0.16)
+  , PILLAR_Z_SIZE_(4.0)
+  , MIN_X_RANGE_(0.0)
+  , MIN_Y_RANGE_(-39.68)
+  , MIN_Z_RANGE_(-3.0)
+  , MAX_X_RANGE_(69.12)
+  , MAX_Y_RANGE_(39.68)
+  , MAX_Z_RANGE_(1)
+  , BATCH_SIZE_(1)
+  , NUM_INDS_FOR_SCAN_(512)
+  , NUM_THREADS_(64)
+  ,  // if you chancge NUM_THREADS_, need to modify nms_kernel's shared mempry size
+  SENSOR_HEIGHT_(1.73)
+  , ANCHOR_DX_SIZE_(1.6)
+  , ANCHOR_DY_SIZE_(3.9)
+  , ANCHOR_DZ_SIZE_(1.56)
+  , NUM_BOX_CORNERS_(4)
+  , NUM_OUTPUT_BOX_FEATURE_(7)
 {
   anchors_px_ = new float[NUM_ANCHOR_];
   anchors_py_ = new float[NUM_ANCHOR_];
@@ -78,76 +78,31 @@ NUM_OUTPUT_BOX_FEATURE_(7)
   box_anchors_max_x_ = new float[NUM_ANCHOR_];
   box_anchors_max_y_ = new float[NUM_ANCHOR_];
 
-
-  if(reproduce_result_mode_)
+  if (reproduce_result_mode_)
   {
-    preprocess_points_ptr_.reset(new PreprocessPoints(
-      MAX_NUM_PILLARS_,
-      MAX_NUM_POINTS_PER_PILLAR_,
-      GRID_X_SIZE_,
-      GRID_Y_SIZE_,
-      GRID_Z_SIZE_,
-      PILLAR_X_SIZE_,
-      PILLAR_Y_SIZE_,
-      PILLAR_Z_SIZE_,
-      MIN_X_RANGE_,
-      MIN_Y_RANGE_,
-      MIN_Z_RANGE_,
-      NUM_INDS_FOR_SCAN_,
-      NUM_BOX_CORNERS_));
+    preprocess_points_ptr_.reset(new PreprocessPoints(MAX_NUM_PILLARS_, MAX_NUM_POINTS_PER_PILLAR_, GRID_X_SIZE_,
+                                                      GRID_Y_SIZE_, GRID_Z_SIZE_, PILLAR_X_SIZE_, PILLAR_Y_SIZE_,
+                                                      PILLAR_Z_SIZE_, MIN_X_RANGE_, MIN_Y_RANGE_, MIN_Z_RANGE_,
+                                                      NUM_INDS_FOR_SCAN_, NUM_BOX_CORNERS_));
   }
   else
   {
     preprocess_points_cuda_ptr_.reset(new PreprocessPointsCuda(
-      NUM_THREADS_,
-      MAX_NUM_PILLARS_,
-      MAX_NUM_POINTS_PER_PILLAR_,
-      NUM_INDS_FOR_SCAN_,
-      GRID_X_SIZE_,
-      GRID_Y_SIZE_,
-      GRID_Z_SIZE_,
-      PILLAR_X_SIZE_,
-      PILLAR_Y_SIZE_,
-      PILLAR_Z_SIZE_,
-      MIN_X_RANGE_,
-      MIN_Y_RANGE_,
-      MIN_Z_RANGE_));
+        NUM_THREADS_, MAX_NUM_PILLARS_, MAX_NUM_POINTS_PER_PILLAR_, NUM_INDS_FOR_SCAN_, GRID_X_SIZE_, GRID_Y_SIZE_,
+        GRID_Z_SIZE_, PILLAR_X_SIZE_, PILLAR_Y_SIZE_, PILLAR_Z_SIZE_, MIN_X_RANGE_, MIN_Y_RANGE_, MIN_Z_RANGE_));
   }
 
-  anchor_mask_cuda_ptr_.reset(new AnchorMaskCuda(
-    NUM_INDS_FOR_SCAN_,
-    NUM_ANCHOR_X_INDS_,
-    NUM_ANCHOR_Y_INDS_,
-    NUM_ANCHOR_R_INDS_,
-    MIN_X_RANGE_,
-    MIN_Y_RANGE_,
-    PILLAR_X_SIZE_,
-    PILLAR_Y_SIZE_,
-    GRID_X_SIZE_,
-    GRID_Y_SIZE_
-  ));
-
-  scatter_cuda_ptr_.reset(new ScatterCuda(
-    NUM_THREADS_,
-    MAX_NUM_PILLARS_,
-    GRID_X_SIZE_,
-    GRID_Y_SIZE_
-  ));
+  anchor_mask_cuda_ptr_.reset(new AnchorMaskCuda(NUM_INDS_FOR_SCAN_, NUM_ANCHOR_X_INDS_, NUM_ANCHOR_Y_INDS_,
+                                                 NUM_ANCHOR_R_INDS_, MIN_X_RANGE_, MIN_Y_RANGE_, PILLAR_X_SIZE_,
+                                                 PILLAR_Y_SIZE_, GRID_X_SIZE_, GRID_Y_SIZE_));
+
+  scatter_cuda_ptr_.reset(new ScatterCuda(NUM_THREADS_, MAX_NUM_PILLARS_, GRID_X_SIZE_, GRID_Y_SIZE_));
 
   const float FLOAT_MIN = std::numeric_limits<float>::lowest();
   const float FLOAT_MAX = std::numeric_limits<float>::max();
-  postprocess_cuda_ptr_.reset(new PostprocessCuda(
-    FLOAT_MIN,
-    FLOAT_MAX,
-    NUM_ANCHOR_X_INDS_,
-    NUM_ANCHOR_Y_INDS_,
-    NUM_ANCHOR_R_INDS_,
-    score_threshold_,
-    NUM_THREADS_,
-    nms_overlap_threshold_,
-    NUM_BOX_CORNERS_,
-    NUM_OUTPUT_BOX_FEATURE_
-  ));
+  postprocess_cuda_ptr_.reset(new PostprocessCuda(FLOAT_MIN, FLOAT_MAX, NUM_ANCHOR_X_INDS_, NUM_ANCHOR_Y_INDS_,
+                                                  NUM_ANCHOR_R_INDS_, score_threshold_, NUM_THREADS_,
+                                                  nms_overlap_threshold_, NUM_BOX_CORNERS_, NUM_OUTPUT_BOX_FEATURE_));
 
   deviceMemoryMalloc();
   initTRT();
@@ -156,7 +111,6 @@ NUM_OUTPUT_BOX_FEATURE_(7)
 
 PointPillars::~PointPillars()
 {
-
   delete[] anchors_px_;
   delete[] anchors_py_;
   delete[] anchors_pz_;
@@ -185,9 +139,9 @@ PointPillars::~PointPillars()
   GPU_CHECK(cudaFree(dev_pillar_z_));
   GPU_CHECK(cudaFree(dev_pillar_i_));
 
-  GPU_CHECK(cudaFree( dev_x_coors_for_sub_shaped_ ));
-  GPU_CHECK(cudaFree( dev_y_coors_for_sub_shaped_ ));
-  GPU_CHECK(cudaFree( dev_pillar_feature_mask_ ));
+  GPU_CHECK(cudaFree(dev_x_coors_for_sub_shaped_));
+  GPU_CHECK(cudaFree(dev_y_coors_for_sub_shaped_));
+  GPU_CHECK(cudaFree(dev_pillar_feature_mask_));
 
   GPU_CHECK(cudaFree(dev_cumsum_along_x_));
   GPU_CHECK(cudaFree(dev_cumsum_along_y_));
@@ -208,7 +162,6 @@ PointPillars::~PointPillars()
   GPU_CHECK(cudaFree(pfe_buffers_[7]));
   GPU_CHECK(cudaFree(pfe_buffers_[8]));
 
-
   GPU_CHECK(cudaFree(rpn_buffers_[0]));
   GPU_CHECK(cudaFree(rpn_buffers_[1]));
   GPU_CHECK(cudaFree(rpn_buffers_[2]));
@@ -229,8 +182,6 @@ PointPillars::~PointPillars()
   GPU_CHECK(cudaFree(dev_box_for_nms_));
   GPU_CHECK(cudaFree(dev_filter_count_));
 
-
-
   pfe_context_->destroy();
   rpn_context_->destroy();
 
@@ -242,40 +193,43 @@ PointPillars::~PointPillars()
 
 void PointPillars::deviceMemoryMalloc()
 {
-  GPU_CHECK(cudaMalloc((void**)&dev_pillar_x_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_pillar_y_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_pillar_z_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_pillar_i_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_pillar_count_histo_, GRID_Y_SIZE_*GRID_X_SIZE_*sizeof(int)));
-
-  GPU_CHECK(cudaMalloc((void**)&dev_x_coors_, MAX_NUM_PILLARS_* sizeof(int)));
-  GPU_CHECK(cudaMalloc((void**)&dev_y_coors_, MAX_NUM_PILLARS_* sizeof(int)));
-  GPU_CHECK(cudaMalloc((void**)&dev_num_points_per_pillar_, MAX_NUM_PILLARS_*sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_sparse_pillar_map_,NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof(int)));
-
-
-  GPU_CHECK(cudaMalloc((void**)&dev_pillar_x_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_pillar_y_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_pillar_z_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_pillar_i_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
-
-
-  GPU_CHECK(cudaMalloc((void**)&dev_x_coors_for_sub_shaped_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_y_coors_for_sub_shaped_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_pillar_feature_mask_, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
-
-  //cumsum kernel
-  GPU_CHECK(cudaMalloc((void**)&dev_cumsum_along_x_, NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof(int)));
-  GPU_CHECK(cudaMalloc((void**)&dev_cumsum_along_y_, NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof(int)));
-
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_x_in_coors_,
+                       GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_y_in_coors_,
+                       GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_z_in_coors_,
+                       GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_i_in_coors_,
+                       GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_count_histo_, GRID_Y_SIZE_ * GRID_X_SIZE_ * sizeof(int)));
+
+  GPU_CHECK(cudaMalloc((void**)&dev_x_coors_, MAX_NUM_PILLARS_ * sizeof(int)));
+  GPU_CHECK(cudaMalloc((void**)&dev_y_coors_, MAX_NUM_PILLARS_ * sizeof(int)));
+  GPU_CHECK(cudaMalloc((void**)&dev_num_points_per_pillar_, MAX_NUM_PILLARS_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_sparse_pillar_map_, NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(int)));
+
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_x_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_y_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_z_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_i_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+
+  GPU_CHECK(
+      cudaMalloc((void**)&dev_x_coors_for_sub_shaped_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(
+      cudaMalloc((void**)&dev_y_coors_for_sub_shaped_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(
+      cudaMalloc((void**)&dev_pillar_feature_mask_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+
+  // cumsum kernel
+  GPU_CHECK(cudaMalloc((void**)&dev_cumsum_along_x_, NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(int)));
+  GPU_CHECK(cudaMalloc((void**)&dev_cumsum_along_y_, NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(int)));
 
   // for make anchor mask kernel
-  GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_min_x_, NUM_ANCHOR_*sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_min_y_, NUM_ANCHOR_*sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_max_x_, NUM_ANCHOR_*sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_max_y_, NUM_ANCHOR_*sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_anchor_mask_, NUM_ANCHOR_*sizeof(int)));
-
+  GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_min_x_, NUM_ANCHOR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_min_y_, NUM_ANCHOR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_max_x_, NUM_ANCHOR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_box_anchors_max_y_, NUM_ANCHOR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_anchor_mask_, NUM_ANCHOR_ * sizeof(int)));
 
   // for trt inference
   // create GPU buffers and a stream
@@ -295,27 +249,27 @@ void PointPillars::deviceMemoryMalloc()
   GPU_CHECK(cudaMalloc(&rpn_buffers_[3], RPN_DIR_OUTPUT_SIZE_ * sizeof(float)));
 
   // for scatter kernel
-  GPU_CHECK(cudaMalloc((void**)&dev_scattered_feature_,     64*GRID_Y_SIZE_*GRID_X_SIZE_*sizeof(float) ) );
-
-  //for filter
-  GPU_CHECK(cudaMalloc((void**)&dev_anchors_px_, NUM_ANCHOR_*sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_anchors_py_, NUM_ANCHOR_*sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_anchors_pz_, NUM_ANCHOR_*sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_anchors_dx_, NUM_ANCHOR_*sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_anchors_dy_, NUM_ANCHOR_*sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_anchors_dz_, NUM_ANCHOR_*sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_anchors_ro_, NUM_ANCHOR_*sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_filtered_box_, NUM_ANCHOR_*7*sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_filtered_score_, NUM_ANCHOR_*sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_filtered_dir_,  NUM_ANCHOR_*sizeof(int)));
-  GPU_CHECK(cudaMalloc((void**)&dev_box_for_nms_, NUM_ANCHOR_*4*sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_scattered_feature_, 64 * GRID_Y_SIZE_ * GRID_X_SIZE_ * sizeof(float)));
+
+  // for filter
+  GPU_CHECK(cudaMalloc((void**)&dev_anchors_px_, NUM_ANCHOR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_anchors_py_, NUM_ANCHOR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_anchors_pz_, NUM_ANCHOR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_anchors_dx_, NUM_ANCHOR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_anchors_dy_, NUM_ANCHOR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_anchors_dz_, NUM_ANCHOR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_anchors_ro_, NUM_ANCHOR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_filtered_box_, NUM_ANCHOR_ * 7 * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_filtered_score_, NUM_ANCHOR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_filtered_dir_, NUM_ANCHOR_ * sizeof(int)));
+  GPU_CHECK(cudaMalloc((void**)&dev_box_for_nms_, NUM_ANCHOR_ * 4 * sizeof(float)));
   GPU_CHECK(cudaMalloc((void**)&dev_filter_count_, sizeof(int)));
 }
 
 void PointPillars::initAnchors()
 {
   // zero clear
-  for(size_t i = 0; i < NUM_ANCHOR_; i++)
+  for (size_t i = 0; i < NUM_ANCHOR_; i++)
   {
     anchors_px_[i] = 0;
     anchors_py_[i] = 0;
@@ -330,36 +284,36 @@ void PointPillars::initAnchors()
     box_anchors_max_y_[i] = 0;
   }
 
-  float x_stride = PILLAR_X_SIZE_*2;
-  float y_stride = PILLAR_Y_SIZE_*2;
+  float x_stride = PILLAR_X_SIZE_ * 2;
+  float y_stride = PILLAR_Y_SIZE_ * 2;
   float x_offset = MIN_X_RANGE_ + PILLAR_X_SIZE_;
   float y_offset = MIN_Y_RANGE_ + PILLAR_Y_SIZE_;
 
-  float anchor_x_count[NUM_ANCHOR_X_INDS_] = {0};
-  for(size_t i = 0; i < NUM_ANCHOR_X_INDS_; i++)
+  float anchor_x_count[NUM_ANCHOR_X_INDS_] = { 0 };
+  for (size_t i = 0; i < NUM_ANCHOR_X_INDS_; i++)
   {
-    anchor_x_count[i] = float(i)*x_stride + x_offset;
+    anchor_x_count[i] = float(i) * x_stride + x_offset;
   }
-  float anchor_y_count[NUM_ANCHOR_Y_INDS_] = {0};
-  for(size_t i = 0; i < NUM_ANCHOR_Y_INDS_; i++)
+  float anchor_y_count[NUM_ANCHOR_Y_INDS_] = { 0 };
+  for (size_t i = 0; i < NUM_ANCHOR_Y_INDS_; i++)
   {
-    anchor_y_count[i] = float(i)*y_stride + y_offset;
+    anchor_y_count[i] = float(i) * y_stride + y_offset;
   }
 
-  float anchor_r_count[NUM_ANCHOR_R_INDS_] = {0, M_PI/2};
+  float anchor_r_count[NUM_ANCHOR_R_INDS_] = { 0, M_PI / 2 };
 
   // np.meshgrid
-  for(size_t y= 0; y < NUM_ANCHOR_Y_INDS_; y++)
+  for (size_t y = 0; y < NUM_ANCHOR_Y_INDS_; y++)
   {
-    for(size_t x = 0; x < NUM_ANCHOR_X_INDS_; x++)
+    for (size_t x = 0; x < NUM_ANCHOR_X_INDS_; x++)
     {
-      for(size_t r=0 ; r < NUM_ANCHOR_R_INDS_; r++)
+      for (size_t r = 0; r < NUM_ANCHOR_R_INDS_; r++)
       {
-        int ind = y*NUM_ANCHOR_X_INDS_*NUM_ANCHOR_R_INDS_ + x*NUM_ANCHOR_R_INDS_ + r;
+        int ind = y * NUM_ANCHOR_X_INDS_ * NUM_ANCHOR_R_INDS_ + x * NUM_ANCHOR_R_INDS_ + r;
         anchors_px_[ind] = anchor_x_count[x];
         anchors_py_[ind] = anchor_y_count[y];
         anchors_ro_[ind] = anchor_r_count[r];
-        anchors_pz_[ind] = -1*SENSOR_HEIGHT_;
+        anchors_pz_[ind] = -1 * SENSOR_HEIGHT_;
         anchors_dx_[ind] = ANCHOR_DX_SIZE_;
         anchors_dy_[ind] = ANCHOR_DY_SIZE_;
         anchors_dz_[ind] = ANCHOR_DZ_SIZE_;
@@ -373,49 +327,51 @@ void PointPillars::initAnchors()
 
 void PointPillars::putAnchorsInDeviceMemory()
 {
-  GPU_CHECK(cudaMemcpy(dev_box_anchors_min_x_, box_anchors_min_x_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice ) );
-  GPU_CHECK(cudaMemcpy(dev_box_anchors_min_y_, box_anchors_min_y_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice ) );
-  GPU_CHECK(cudaMemcpy(dev_box_anchors_max_x_, box_anchors_max_x_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice ) );
-  GPU_CHECK(cudaMemcpy(dev_box_anchors_max_y_, box_anchors_max_y_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice ) );
-
-  GPU_CHECK(cudaMemcpy(dev_anchors_px_, anchors_px_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice));
-  GPU_CHECK(cudaMemcpy(dev_anchors_py_, anchors_py_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice));
-  GPU_CHECK(cudaMemcpy(dev_anchors_pz_, anchors_pz_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice));
-  GPU_CHECK(cudaMemcpy(dev_anchors_dx_, anchors_dx_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice));
-  GPU_CHECK(cudaMemcpy(dev_anchors_dy_, anchors_dy_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice));
-  GPU_CHECK(cudaMemcpy(dev_anchors_dz_, anchors_dz_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice));
-  GPU_CHECK(cudaMemcpy(dev_anchors_ro_, anchors_ro_, NUM_ANCHOR_*sizeof(float), cudaMemcpyHostToDevice));
-
+  GPU_CHECK(
+      cudaMemcpy(dev_box_anchors_min_x_, box_anchors_min_x_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(
+      cudaMemcpy(dev_box_anchors_min_y_, box_anchors_min_y_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(
+      cudaMemcpy(dev_box_anchors_max_x_, box_anchors_max_x_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(
+      cudaMemcpy(dev_box_anchors_max_y_, box_anchors_max_y_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice));
+
+  GPU_CHECK(cudaMemcpy(dev_anchors_px_, anchors_px_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_anchors_py_, anchors_py_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_anchors_pz_, anchors_pz_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_anchors_dx_, anchors_dx_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_anchors_dy_, anchors_dy_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_anchors_dz_, anchors_dz_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_anchors_ro_, anchors_ro_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice));
 }
 
-void PointPillars::convertAnchors2BoxAnchors(float* anchors_px, float* anchors_py,
-                                             float* anchors_dx, float* anchors_dy)
+void PointPillars::convertAnchors2BoxAnchors(float* anchors_px, float* anchors_py, float* anchors_dx, float* anchors_dy)
 {
   // flip box's dimension when the at the third axis == 1
-  float flipped_anchors_dx[NUM_ANCHOR_] = {0};
-  float flipped_anchors_dy[NUM_ANCHOR_] = {0};
-  for(size_t x = 0; x < NUM_ANCHOR_X_INDS_; x++)
+  float flipped_anchors_dx[NUM_ANCHOR_] = { 0 };
+  float flipped_anchors_dy[NUM_ANCHOR_] = { 0 };
+  for (size_t x = 0; x < NUM_ANCHOR_X_INDS_; x++)
   {
-    for(size_t y= 0; y < NUM_ANCHOR_Y_INDS_; y++)
+    for (size_t y = 0; y < NUM_ANCHOR_Y_INDS_; y++)
     {
-      int base_ind = x*NUM_ANCHOR_Y_INDS_*NUM_ANCHOR_R_INDS_ + y*NUM_ANCHOR_R_INDS_;
+      int base_ind = x * NUM_ANCHOR_Y_INDS_ * NUM_ANCHOR_R_INDS_ + y * NUM_ANCHOR_R_INDS_;
       flipped_anchors_dx[base_ind + 0] = ANCHOR_DX_SIZE_;
       flipped_anchors_dy[base_ind + 0] = ANCHOR_DY_SIZE_;
       flipped_anchors_dx[base_ind + 1] = ANCHOR_DY_SIZE_;
       flipped_anchors_dy[base_ind + 1] = ANCHOR_DX_SIZE_;
     }
   }
-  for(size_t x = 0; x < NUM_ANCHOR_X_INDS_; x++)
+  for (size_t x = 0; x < NUM_ANCHOR_X_INDS_; x++)
   {
-    for(size_t y= 0; y < NUM_ANCHOR_Y_INDS_; y++)
+    for (size_t y = 0; y < NUM_ANCHOR_Y_INDS_; y++)
     {
-      for(size_t r=0 ; r < NUM_ANCHOR_R_INDS_; r++)
+      for (size_t r = 0; r < NUM_ANCHOR_R_INDS_; r++)
       {
-        int ind = x*NUM_ANCHOR_Y_INDS_*NUM_ANCHOR_R_INDS_ + y*NUM_ANCHOR_R_INDS_ + r;
-        box_anchors_min_x_[ind] = anchors_px[ind] - flipped_anchors_dx[ind]/2;
-        box_anchors_min_y_[ind] = anchors_py[ind] - flipped_anchors_dy[ind]/2;
-        box_anchors_max_x_[ind] = anchors_px[ind] + flipped_anchors_dx[ind]/2;
-        box_anchors_max_y_[ind] = anchors_py[ind] + flipped_anchors_dy[ind]/2;
+        int ind = x * NUM_ANCHOR_Y_INDS_ * NUM_ANCHOR_R_INDS_ + y * NUM_ANCHOR_R_INDS_ + r;
+        box_anchors_min_x_[ind] = anchors_px[ind] - flipped_anchors_dx[ind] / 2;
+        box_anchors_min_y_[ind] = anchors_py[ind] - flipped_anchors_dy[ind] / 2;
+        box_anchors_max_x_[ind] = anchors_px[ind] + flipped_anchors_dx[ind] / 2;
+        box_anchors_max_y_[ind] = anchors_py[ind] + flipped_anchors_dy[ind] / 2;
       }
     }
   }
@@ -424,11 +380,11 @@ void PointPillars::convertAnchors2BoxAnchors(float* anchors_px, float* anchors_p
 void PointPillars::initTRT()
 {
   // create a TensorRT model from the onnx model and serialize it to a stream
-  nvinfer1::IHostMemory* pfe_trt_model_stream{nullptr};
-  nvinfer1::IHostMemory* rpn_trt_model_stream{nullptr};
+  nvinfer1::IHostMemory* pfe_trt_model_stream{ nullptr };
+  nvinfer1::IHostMemory* rpn_trt_model_stream{ nullptr };
   onnxToTRTModel(pfe_onnx_file_, pfe_trt_model_stream);
   onnxToTRTModel(rpn_onnx_file_, rpn_trt_model_stream);
-  if(pfe_trt_model_stream == nullptr || rpn_trt_model_stream == nullptr)
+  if (pfe_trt_model_stream == nullptr || rpn_trt_model_stream == nullptr)
   {
     ROS_ERROR("Failed to load %s or %s ", pfe_onnx_file_.c_str(), rpn_onnx_file_.c_str());
   }
@@ -436,13 +392,15 @@ void PointPillars::initTRT()
   // deserialize the engine
   pfe_runtime_ = nvinfer1::createInferRuntime(g_logger_);
   rpn_runtime_ = nvinfer1::createInferRuntime(g_logger_);
-  if(pfe_runtime_ == nullptr || rpn_runtime_ == nullptr)
+  if (pfe_runtime_ == nullptr || rpn_runtime_ == nullptr)
   {
     ROS_ERROR("Failed to create TensorRT Runtime object.");
   }
-  pfe_engine_ = pfe_runtime_->deserializeCudaEngine(pfe_trt_model_stream->data(), pfe_trt_model_stream->size(), nullptr);
-  rpn_engine_ = rpn_runtime_->deserializeCudaEngine(rpn_trt_model_stream->data(), rpn_trt_model_stream->size(), nullptr);
-  if(pfe_engine_ == nullptr || rpn_engine_ == nullptr)
+  pfe_engine_ =
+      pfe_runtime_->deserializeCudaEngine(pfe_trt_model_stream->data(), pfe_trt_model_stream->size(), nullptr);
+  rpn_engine_ =
+      rpn_runtime_->deserializeCudaEngine(rpn_trt_model_stream->data(), rpn_trt_model_stream->size(), nullptr);
+  if (pfe_engine_ == nullptr || rpn_engine_ == nullptr)
   {
     ROS_ERROR("Failed to create TensorRT Engine.");
   }
@@ -450,16 +408,16 @@ void PointPillars::initTRT()
   rpn_trt_model_stream->destroy();
   pfe_context_ = pfe_engine_->createExecutionContext();
   rpn_context_ = rpn_engine_->createExecutionContext();
-  if(pfe_context_ == nullptr || rpn_context_ == nullptr)
+  if (pfe_context_ == nullptr || rpn_context_ == nullptr)
   {
     ROS_ERROR("Failed to create TensorRT Execution Context.");
   }
 }
 
-void PointPillars::onnxToTRTModel(const std::string& model_file, // name of the onnx model
-                    nvinfer1::IHostMemory*& trt_model_stream) // output buffer for the TensorRT model
+void PointPillars::onnxToTRTModel(const std::string& model_file,             // name of the onnx model
+                                  nvinfer1::IHostMemory*& trt_model_stream)  // output buffer for the TensorRT model
 {
-  int verbosity = (int) nvinfer1::ILogger::Severity::kWARNING;
+  int verbosity = (int)nvinfer1::ILogger::Severity::kWARNING;
 
   // create the builder
   nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(g_logger_);
@@ -469,9 +427,9 @@ void PointPillars::onnxToTRTModel(const std::string& model_file, // name of the
 
   if (!parser->parseFromFile(model_file.c_str(), verbosity))
   {
-      std::string msg("failed to parse onnx file");
-      g_logger_.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
-      exit(EXIT_FAILURE);
+    std::string msg("failed to parse onnx file");
+    g_logger_.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
+    exit(EXIT_FAILURE);
   }
 
   // Build the engine
@@ -491,48 +449,55 @@ void PointPillars::onnxToTRTModel(const std::string& model_file, // name of the
 
 void PointPillars::preprocessCPU(const float* in_points_array, const int in_num_points)
 {
-  int x_coors[MAX_NUM_PILLARS_]= {0};
-  int y_coors[MAX_NUM_PILLARS_]= {0};
-  float num_points_per_pillar[MAX_NUM_PILLARS_] = {0};
-  float * pillar_x = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_];
-  float * pillar_y = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_];
-  float * pillar_z = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_];
-  float * pillar_i = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_];
-
-  float * x_coors_for_sub_shaped = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_];
-  float * y_coors_for_sub_shaped = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_];
-  float * pillar_feature_mask = new float[MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_];
-
-  float* sparse_pillar_map = new float[NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_];
-
-  preprocess_points_ptr_->preprocess(in_points_array, in_num_points,
-                                    x_coors, y_coors, num_points_per_pillar,
-                                    pillar_x, pillar_y, pillar_z, pillar_i,
-                                    x_coors_for_sub_shaped, y_coors_for_sub_shaped,
-                                    pillar_feature_mask, sparse_pillar_map, host_pillar_count_);
-
-  GPU_CHECK( cudaMemset(dev_x_coors_, 0, MAX_NUM_PILLARS_ * sizeof( int ) ) );
-  GPU_CHECK( cudaMemset(dev_y_coors_, 0, MAX_NUM_PILLARS_ * sizeof( int ) ) );
-  GPU_CHECK( cudaMemset(dev_pillar_x_,0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
-  GPU_CHECK( cudaMemset(dev_pillar_y_,0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
-  GPU_CHECK( cudaMemset(dev_pillar_z_,0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
-  GPU_CHECK( cudaMemset(dev_pillar_i_,0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
-  GPU_CHECK( cudaMemset(dev_x_coors_for_sub_shaped_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ *sizeof( float ) ) );
-  GPU_CHECK( cudaMemset(dev_y_coors_for_sub_shaped_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ *sizeof( float ) ) );
-  GPU_CHECK( cudaMemset(dev_num_points_per_pillar_, 0, MAX_NUM_PILLARS_ * sizeof( float ) ) );
-  GPU_CHECK( cudaMemset(dev_sparse_pillar_map_, 0, NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof( int ) ) );
-
-  GPU_CHECK(cudaMemcpy(dev_x_coors_, x_coors, MAX_NUM_PILLARS_*sizeof(int), cudaMemcpyHostToDevice ));
-  GPU_CHECK(cudaMemcpy(dev_y_coors_, y_coors, MAX_NUM_PILLARS_*sizeof(int), cudaMemcpyHostToDevice ));
-  GPU_CHECK(cudaMemcpy(dev_pillar_x_, pillar_x, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice ));
-  GPU_CHECK(cudaMemcpy(dev_pillar_y_, pillar_y, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice ));
-  GPU_CHECK(cudaMemcpy(dev_pillar_z_, pillar_z, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice ));
-  GPU_CHECK(cudaMemcpy(dev_pillar_i_, pillar_i, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice ));
-  GPU_CHECK(cudaMemcpy(dev_x_coors_for_sub_shaped_, x_coors_for_sub_shaped,MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice ));
-  GPU_CHECK(cudaMemcpy(dev_y_coors_for_sub_shaped_, y_coors_for_sub_shaped,MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice ));
-  GPU_CHECK(cudaMemcpy(dev_num_points_per_pillar_,num_points_per_pillar,MAX_NUM_PILLARS_*sizeof(float), cudaMemcpyHostToDevice ));
-  GPU_CHECK(cudaMemcpy(dev_pillar_feature_mask_, pillar_feature_mask, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_*sizeof(float), cudaMemcpyHostToDevice ));
-  GPU_CHECK(cudaMemcpy(dev_sparse_pillar_map_, sparse_pillar_map, NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof(float), cudaMemcpyHostToDevice ));
+  int x_coors[MAX_NUM_PILLARS_] = { 0 };
+  int y_coors[MAX_NUM_PILLARS_] = { 0 };
+  float num_points_per_pillar[MAX_NUM_PILLARS_] = { 0 };
+  float* pillar_x = new float[MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_];
+  float* pillar_y = new float[MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_];
+  float* pillar_z = new float[MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_];
+  float* pillar_i = new float[MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_];
+
+  float* x_coors_for_sub_shaped = new float[MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_];
+  float* y_coors_for_sub_shaped = new float[MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_];
+  float* pillar_feature_mask = new float[MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_];
+
+  float* sparse_pillar_map = new float[NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_];
+
+  preprocess_points_ptr_->preprocess(in_points_array, in_num_points, x_coors, y_coors, num_points_per_pillar, pillar_x,
+                                     pillar_y, pillar_z, pillar_i, x_coors_for_sub_shaped, y_coors_for_sub_shaped,
+                                     pillar_feature_mask, sparse_pillar_map, host_pillar_count_);
+
+  GPU_CHECK(cudaMemset(dev_x_coors_, 0, MAX_NUM_PILLARS_ * sizeof(int)));
+  GPU_CHECK(cudaMemset(dev_y_coors_, 0, MAX_NUM_PILLARS_ * sizeof(int)));
+  GPU_CHECK(cudaMemset(dev_pillar_x_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_pillar_y_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_pillar_z_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_pillar_i_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_x_coors_for_sub_shaped_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_y_coors_for_sub_shaped_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_num_points_per_pillar_, 0, MAX_NUM_PILLARS_ * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_sparse_pillar_map_, 0, NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(int)));
+
+  GPU_CHECK(cudaMemcpy(dev_x_coors_, x_coors, MAX_NUM_PILLARS_ * sizeof(int), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_y_coors_, y_coors, MAX_NUM_PILLARS_ * sizeof(int), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_pillar_x_, pillar_x, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),
+                       cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_pillar_y_, pillar_y, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),
+                       cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_pillar_z_, pillar_z, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),
+                       cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_pillar_i_, pillar_i, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),
+                       cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_x_coors_for_sub_shaped_, x_coors_for_sub_shaped,
+                       MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_y_coors_for_sub_shaped_, y_coors_for_sub_shaped,
+                       MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_num_points_per_pillar_, num_points_per_pillar, MAX_NUM_PILLARS_ * sizeof(float),
+                       cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_pillar_feature_mask_, pillar_feature_mask,
+                       MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_sparse_pillar_map_, sparse_pillar_map,
+                       NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(float), cudaMemcpyHostToDevice));
 
   delete[] pillar_x;
   delete[] pillar_y;
@@ -546,33 +511,32 @@ void PointPillars::preprocessCPU(const float* in_points_array, const int in_num_
 
 void PointPillars::preprocessGPU(const float* in_points_array, const int in_num_points)
 {
-  float * dev_points;
-  GPU_CHECK(cudaMalloc((void**)&dev_points, in_num_points*4*sizeof(float)));
-  GPU_CHECK(cudaMemcpy(dev_points, in_points_array, in_num_points*4*sizeof(float), cudaMemcpyHostToDevice ) );
-
-
-  GPU_CHECK(cudaMemset(dev_pillar_count_histo_, 0, GRID_Y_SIZE_*GRID_X_SIZE_*sizeof(int)));
-  GPU_CHECK(cudaMemset(dev_sparse_pillar_map_, 0, NUM_INDS_FOR_SCAN_*NUM_INDS_FOR_SCAN_*sizeof( int ) ) );
-  GPU_CHECK(cudaMemset(dev_pillar_x_, 0, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
-  GPU_CHECK(cudaMemset(dev_pillar_y_, 0, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
-  GPU_CHECK(cudaMemset(dev_pillar_z_, 0, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
-  GPU_CHECK(cudaMemset(dev_pillar_i_, 0, MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
-  GPU_CHECK(cudaMemset(dev_x_coors_, 0, MAX_NUM_PILLARS_ * sizeof( int ) ) );
-  GPU_CHECK(cudaMemset(dev_y_coors_, 0, MAX_NUM_PILLARS_ * sizeof( int ) ) );
-  GPU_CHECK(cudaMemset(dev_num_points_per_pillar_, 0, MAX_NUM_PILLARS_ * sizeof( float ) ) );
-  GPU_CHECK(cudaMemset(dev_anchor_mask_, 0, NUM_ANCHOR_*sizeof(int)));
-
-  preprocess_points_cuda_ptr_->doPreprocessPointsCuda(dev_points, in_num_points, dev_x_coors_, dev_y_coors_,
-            dev_num_points_per_pillar_, dev_pillar_x_, dev_pillar_y_, dev_pillar_z_, dev_pillar_i_,
-            dev_x_coors_for_sub_shaped_, dev_y_coors_for_sub_shaped_,
-            dev_pillar_feature_mask_, dev_sparse_pillar_map_, host_pillar_count_);
+  float* dev_points;
+  GPU_CHECK(cudaMalloc((void**)&dev_points, in_num_points * 4 * sizeof(float)));
+  GPU_CHECK(cudaMemcpy(dev_points, in_points_array, in_num_points * 4 * sizeof(float), cudaMemcpyHostToDevice));
+
+  GPU_CHECK(cudaMemset(dev_pillar_count_histo_, 0, GRID_Y_SIZE_ * GRID_X_SIZE_ * sizeof(int)));
+  GPU_CHECK(cudaMemset(dev_sparse_pillar_map_, 0, NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(int)));
+  GPU_CHECK(cudaMemset(dev_pillar_x_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_pillar_y_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_pillar_z_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_pillar_i_, 0, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_x_coors_, 0, MAX_NUM_PILLARS_ * sizeof(int)));
+  GPU_CHECK(cudaMemset(dev_y_coors_, 0, MAX_NUM_PILLARS_ * sizeof(int)));
+  GPU_CHECK(cudaMemset(dev_num_points_per_pillar_, 0, MAX_NUM_PILLARS_ * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_anchor_mask_, 0, NUM_ANCHOR_ * sizeof(int)));
+
+  preprocess_points_cuda_ptr_->doPreprocessPointsCuda(
+      dev_points, in_num_points, dev_x_coors_, dev_y_coors_, dev_num_points_per_pillar_, dev_pillar_x_, dev_pillar_y_,
+      dev_pillar_z_, dev_pillar_i_, dev_x_coors_for_sub_shaped_, dev_y_coors_for_sub_shaped_, dev_pillar_feature_mask_,
+      dev_sparse_pillar_map_, host_pillar_count_);
 
   GPU_CHECK(cudaFree(dev_points));
 }
 
 void PointPillars::preprocess(const float* in_points_array, const int in_num_points)
 {
-  if(reproduce_result_mode_)
+  if (reproduce_result_mode_)
   {
     preprocessCPU(in_points_array, in_num_points);
   }
@@ -582,44 +546,56 @@ void PointPillars::preprocess(const float* in_points_array, const int in_num_poi
   }
 }
 
-void PointPillars::doInference(const float* in_points_array, const int in_num_points,
-  std::vector<float>& out_detection, int& out_num_objects)
+void PointPillars::doInference(const float* in_points_array, const int in_num_points, std::vector<float>& out_detection,
+                               int& out_num_objects)
 {
   preprocess(in_points_array, in_num_points);
 
-  anchor_mask_cuda_ptr_->doAnchorMaskCuda(dev_sparse_pillar_map_, dev_cumsum_along_x_, dev_cumsum_along_y_, dev_box_anchors_min_x_,
-                  dev_box_anchors_min_y_, dev_box_anchors_max_x_, dev_box_anchors_max_y_, dev_anchor_mask_);
-
+  anchor_mask_cuda_ptr_->doAnchorMaskCuda(dev_sparse_pillar_map_, dev_cumsum_along_x_, dev_cumsum_along_y_,
+                                          dev_box_anchors_min_x_, dev_box_anchors_min_y_, dev_box_anchors_max_x_,
+                                          dev_box_anchors_max_y_, dev_anchor_mask_);
 
   cudaStream_t stream;
   GPU_CHECK(cudaStreamCreate(&stream));
-  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[0], dev_pillar_x_,              BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
-  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[1], dev_pillar_y_,              BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
-  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[2], dev_pillar_z_,              BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
-  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[3], dev_pillar_i_,              BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
-  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[4], dev_num_points_per_pillar_, BATCH_SIZE_ * MAX_NUM_PILLARS_           * sizeof(float), cudaMemcpyDeviceToDevice, stream));
-  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[5], dev_x_coors_for_sub_shaped_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
-  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[6], dev_y_coors_for_sub_shaped_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
-  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[7], dev_pillar_feature_mask_,   BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[0], dev_pillar_x_,
+                            BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),
+                            cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[1], dev_pillar_y_,
+                            BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),
+                            cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[2], dev_pillar_z_,
+                            BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),
+                            cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[3], dev_pillar_i_,
+                            BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),
+                            cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[4], dev_num_points_per_pillar_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * sizeof(float),
+                            cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[5], dev_x_coors_for_sub_shaped_,
+                            BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),
+                            cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[6], dev_y_coors_for_sub_shaped_,
+                            BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),
+                            cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[7], dev_pillar_feature_mask_,
+                            BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),
+                            cudaMemcpyDeviceToDevice, stream));
   pfe_context_->enqueue(BATCH_SIZE_, pfe_buffers_, stream, nullptr);
 
+  GPU_CHECK(cudaMemset(dev_scattered_feature_, 0, RPN_INPUT_SIZE_ * sizeof(float)));
+  scatter_cuda_ptr_->doScatterCuda(host_pillar_count_[0], dev_x_coors_, dev_y_coors_, (float*)pfe_buffers_[8],
+                                   dev_scattered_feature_);
 
-
-  GPU_CHECK( cudaMemset( dev_scattered_feature_, 0, RPN_INPUT_SIZE_*sizeof(float) ) );
-  scatter_cuda_ptr_->doScatterCuda(host_pillar_count_[0], dev_x_coors_, dev_y_coors_,
-                                  (float *)pfe_buffers_[8], dev_scattered_feature_);
-
-
-  GPU_CHECK(cudaMemcpyAsync(rpn_buffers_[0], dev_scattered_feature_,   BATCH_SIZE_ * RPN_INPUT_SIZE_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(rpn_buffers_[0], dev_scattered_feature_, BATCH_SIZE_ * RPN_INPUT_SIZE_ * sizeof(float),
+                            cudaMemcpyDeviceToDevice, stream));
   rpn_context_->enqueue(BATCH_SIZE_, rpn_buffers_, stream, nullptr);
 
-
-  GPU_CHECK(cudaMemset( dev_filter_count_, 0,  sizeof( int )));
-  postprocess_cuda_ptr_->doPostprocessCuda((float *)rpn_buffers_[1], (float *)rpn_buffers_[2],(float *)rpn_buffers_[3] , dev_anchor_mask_,
-                  dev_anchors_px_, dev_anchors_py_, dev_anchors_pz_,
-                  dev_anchors_dx_, dev_anchors_dy_, dev_anchors_dz_, dev_anchors_ro_,
-                  dev_filtered_box_, dev_filtered_score_, dev_filtered_dir_, dev_box_for_nms_,
-                  dev_filter_count_, out_detection, out_num_objects);
+  GPU_CHECK(cudaMemset(dev_filter_count_, 0, sizeof(int)));
+  postprocess_cuda_ptr_->doPostprocessCuda((float*)rpn_buffers_[1], (float*)rpn_buffers_[2], (float*)rpn_buffers_[3],
+                                           dev_anchor_mask_, dev_anchors_px_, dev_anchors_py_, dev_anchors_pz_,
+                                           dev_anchors_dx_, dev_anchors_dy_, dev_anchors_dz_, dev_anchors_ro_,
+                                           dev_filtered_box_, dev_filtered_score_, dev_filtered_dir_, dev_box_for_nms_,
+                                           dev_filter_count_, out_detection, out_num_objects);
 
   // release the stream and the buffers
   cudaStreamDestroy(stream);
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
old mode 100755
new mode 100644
index a7287b3f60e..0d4b3df7148
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
@@ -14,30 +14,30 @@
  * limitations under the License.
  */
 
-//headers in STL
+// headers in STL
 #include <chrono>
 #include <cmath>
 
-//headers in PCL
+// headers in PCL
 #include <pcl_conversions/pcl_conversions.h>
 #include <pcl/PCLPointCloud2.h>
 #include <pcl_ros/transforms.h>
 
-//headers in ROS
+// headers in ROS
 #include <tf/transform_datatypes.h>
 
-//headers in local files
+// headers in local files
 #include "autoware_msgs/DetectedObjectArray.h"
 #include "point_pillars_ros.h"
 
-PointPillarsROS::PointPillarsROS():
-private_nh_("~"),
-has_subscribed_baselink_(false),
-NUM_POINT_FEATURE_(4),
-OUTPUT_NUM_BOX_FEATURE_(7),
-TRAINED_SENSOR_HEIGHT_(1.73),
-NORMALIZING_INTENSITY_VALUE_(255.0),
-BASELINK_FRAME_("base_link")
+PointPillarsROS::PointPillarsROS()
+  : private_nh_("~")
+  , has_subscribed_baselink_(false)
+  , NUM_POINT_FEATURE_(4)
+  , OUTPUT_NUM_BOX_FEATURE_(7)
+  , TRAINED_SENSOR_HEIGHT_(1.73)
+  , NORMALIZING_INTENSITY_VALUE_(255.0)
+  , BASELINK_FRAME_("base_link")
 {
   private_nh_.param<bool>("baselink_support", baselink_support_, true);
   private_nh_.param<bool>("reproduce_result_mode", reproduce_result_mode_, false);
@@ -46,34 +46,30 @@ BASELINK_FRAME_("base_link")
   private_nh_.param<std::string>("pfe_onnx_file", pfe_onnx_file_, "");
   private_nh_.param<std::string>("rpn_onnx_file", rpn_onnx_file_, "");
 
-  point_pillars_ptr_.reset(new PointPillars(
-    reproduce_result_mode_,
-    score_threshold_,
-    nms_overlap_threshold_,
-    pfe_onnx_file_,
-    rpn_onnx_file_));
+  point_pillars_ptr_.reset(new PointPillars(reproduce_result_mode_, score_threshold_, nms_overlap_threshold_,
+                                            pfe_onnx_file_, rpn_onnx_file_));
 }
 
 void PointPillarsROS::createROSPubSub()
 {
-  sub_points_  = nh_.subscribe<sensor_msgs::PointCloud2>("/points_raw", 1, &PointPillarsROS::pointsCallback, this);
-  pub_objects_  = nh_.advertise<autoware_msgs::DetectedObjectArray>("/detection/lidar_detector/objects", 1);
+  sub_points_ = nh_.subscribe<sensor_msgs::PointCloud2>("/points_raw", 1, &PointPillarsROS::pointsCallback, this);
+  pub_objects_ = nh_.advertise<autoware_msgs::DetectedObjectArray>("/detection/lidar_detector/objects", 1);
 }
 
-
-geometry_msgs::Pose PointPillarsROS::getTransformedPose(const geometry_msgs::Pose& in_pose,
-                                                        const tf::Transform& tf)
+geometry_msgs::Pose PointPillarsROS::getTransformedPose(const geometry_msgs::Pose& in_pose, const tf::Transform& tf)
 {
   tf::Transform transform;
   geometry_msgs::PoseStamped out_pose;
   transform.setOrigin(tf::Vector3(in_pose.position.x, in_pose.position.y, in_pose.position.z));
-  transform.setRotation(tf::Quaternion(in_pose.orientation.x, in_pose.orientation.y, in_pose.orientation.z, in_pose.orientation.w));
+  transform.setRotation(
+      tf::Quaternion(in_pose.orientation.x, in_pose.orientation.y, in_pose.orientation.z, in_pose.orientation.w));
   geometry_msgs::PoseStamped pose_out;
-  tf::poseTFToMsg(tf*transform, out_pose.pose);
+  tf::poseTFToMsg(tf * transform, out_pose.pose);
   return out_pose.pose;
 }
 
-void PointPillarsROS::pubDetectedObject(const std::vector<float>& detections, const int num_objects, const std_msgs::Header& pc_header)
+void PointPillarsROS::pubDetectedObject(const std::vector<float>& detections, const int num_objects,
+                                        const std_msgs::Header& pc_header)
 {
   autoware_msgs::DetectedObjectArray objects;
   objects.header = pc_header;
@@ -84,24 +80,23 @@ void PointPillarsROS::pubDetectedObject(const std::vector<float>& detections, co
     object.valid = true;
     object.pose_reliable = true;
 
-    object.pose.position.x = detections[i*OUTPUT_NUM_BOX_FEATURE_+0];
-    object.pose.position.y = detections[i*OUTPUT_NUM_BOX_FEATURE_+1];
-    object.pose.position.z = detections[i*OUTPUT_NUM_BOX_FEATURE_+2];
-
+    object.pose.position.x = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 0];
+    object.pose.position.y = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 1];
+    object.pose.position.z = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 2];
 
-    float yaw = detections[i*OUTPUT_NUM_BOX_FEATURE_+6];
+    float yaw = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 6];
     yaw = std::atan2(std::sin(yaw), std::cos(yaw));
     geometry_msgs::Quaternion q = tf::createQuaternionMsgFromYaw(-yaw);
     object.pose.orientation = q;
 
-    if(baselink_support_)
+    if (baselink_support_)
     {
       object.pose = getTransformedPose(object.pose, angle_transform_inversed_);
     }
 
-    object.dimensions.x = detections[i*OUTPUT_NUM_BOX_FEATURE_+3];
-    object.dimensions.y = detections[i*OUTPUT_NUM_BOX_FEATURE_+4];
-    object.dimensions.z = detections[i*OUTPUT_NUM_BOX_FEATURE_+5];
+    object.dimensions.x = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 3];
+    object.dimensions.y = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 4];
+    object.dimensions.z = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 5];
 
     objects.objects.push_back(object);
   }
@@ -113,7 +108,7 @@ void PointPillarsROS::getBaselinkToLidarTF(const std::string& target_frameid)
   try
   {
     tf_listener_.waitForTransform(BASELINK_FRAME_, target_frameid, ros::Time(0), ros::Duration(1.0));
-    tf_listener_.lookupTransform( BASELINK_FRAME_, target_frameid, ros::Time(0), baselink2lidar_);
+    tf_listener_.lookupTransform(BASELINK_FRAME_, target_frameid, ros::Time(0), baselink2lidar_);
     getTFInfo(baselink2lidar_);
     has_subscribed_baselink_ = true;
   }
@@ -131,39 +126,37 @@ void PointPillarsROS::getTFInfo(tf::StampedTransform baselink2lidar)
   tf::Quaternion q = baselink2lidar_.getRotation();
   angle_transform_ = tf::Transform(q);
   angle_transform_inversed_ = angle_transform_.inverse();
-
 }
 
-void PointPillarsROS::pclToArray(const pcl::PointCloud<pcl::PointXYZI>::Ptr & in_pcl_pc_ptr, float* out_points_array, const float offset_z)
+void PointPillarsROS::pclToArray(const pcl::PointCloud<pcl::PointXYZI>::Ptr& in_pcl_pc_ptr, float* out_points_array,
+                                 const float offset_z)
 {
-  for(size_t i = 0; i < in_pcl_pc_ptr->size(); i++)
+  for (size_t i = 0; i < in_pcl_pc_ptr->size(); i++)
   {
     pcl::PointXYZI point = in_pcl_pc_ptr->at(i);
-    out_points_array[i*NUM_POINT_FEATURE_ + 0] = point.x;
-    out_points_array[i*NUM_POINT_FEATURE_ + 1] = point.y;
-    out_points_array[i*NUM_POINT_FEATURE_ + 2] = point.z + offset_z;
-    out_points_array[i*NUM_POINT_FEATURE_ + 3] = float(point.intensity/NORMALIZING_INTENSITY_VALUE_);
+    out_points_array[i * NUM_POINT_FEATURE_ + 0] = point.x;
+    out_points_array[i * NUM_POINT_FEATURE_ + 1] = point.y;
+    out_points_array[i * NUM_POINT_FEATURE_ + 2] = point.z + offset_z;
+    out_points_array[i * NUM_POINT_FEATURE_ + 3] = float(point.intensity / NORMALIZING_INTENSITY_VALUE_);
   }
 }
 
-
 void PointPillarsROS::pointsCallback(const sensor_msgs::PointCloud2::ConstPtr& msg)
 {
-
   pcl::PointCloud<pcl::PointXYZI>::Ptr pcl_pc_ptr(new pcl::PointCloud<pcl::PointXYZI>);
   pcl::fromROSMsg(*msg, *pcl_pc_ptr);
 
-  if(baselink_support_)
+  if (baselink_support_)
   {
-    if(!has_subscribed_baselink_)
+    if (!has_subscribed_baselink_)
     {
       getBaselinkToLidarTF(msg->header.frame_id);
     }
     pcl_ros::transformPointCloud(*pcl_pc_ptr, *pcl_pc_ptr, angle_transform_);
   }
 
-  float * points_array = new float[pcl_pc_ptr->size()*NUM_POINT_FEATURE_];
-  if(baselink_support_ && has_subscribed_baselink_)
+  float* points_array = new float[pcl_pc_ptr->size() * NUM_POINT_FEATURE_];
+  if (baselink_support_ && has_subscribed_baselink_)
   {
     pclToArray(pcl_pc_ptr, points_array, offset_z_from_trained_data_);
   }
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp
index 51fe2a07884..e36055debc9 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp
@@ -14,52 +14,43 @@
  * limitations under the License.
  */
 
-//headers in STL
+// headers in STL
 #include <cmath>
 #include <iostream>
 
-//headers in local files
+// headers in local files
 #include "preprocess_points.h"
 
-PreprocessPoints::PreprocessPoints(const int MAX_NUM_PILLARS,
-                                   const int MAX_POINTS_PER_PILLAR,
-                                   const int GRID_X_SIZE,
-                                   const int GRID_Y_SIZE,
-                                   const int GRID_Z_SIZE,
-                                   const float PILLAR_X_SIZE,
-                                   const float PILLAR_Y_SIZE,
-                                   const float PILLAR_Z_SIZE,
-                                   const float MIN_X_RANGE,
-                                   const float MIN_Y_RANGE,
-                                   const float MIN_Z_RANGE,
-                                   const int NUM_INDS_FOR_SCAN,
+PreprocessPoints::PreprocessPoints(const int MAX_NUM_PILLARS, const int MAX_POINTS_PER_PILLAR, const int GRID_X_SIZE,
+                                   const int GRID_Y_SIZE, const int GRID_Z_SIZE, const float PILLAR_X_SIZE,
+                                   const float PILLAR_Y_SIZE, const float PILLAR_Z_SIZE, const float MIN_X_RANGE,
+                                   const float MIN_Y_RANGE, const float MIN_Z_RANGE, const int NUM_INDS_FOR_SCAN,
                                    const int NUM_BOX_CORNERS)
-:
-MAX_NUM_PILLARS_(MAX_NUM_PILLARS),
-MAX_NUM_POINTS_PER_PILLAR_(MAX_POINTS_PER_PILLAR),
-GRID_X_SIZE_(GRID_X_SIZE),
-GRID_Y_SIZE_(GRID_Y_SIZE),
-GRID_Z_SIZE_(GRID_Z_SIZE),
-PILLAR_X_SIZE_(PILLAR_X_SIZE),
-PILLAR_Y_SIZE_(PILLAR_Y_SIZE),
-PILLAR_Z_SIZE_(PILLAR_Z_SIZE),
-MIN_X_RANGE_(MIN_X_RANGE),
-MIN_Y_RANGE_(MIN_Y_RANGE),
-MIN_Z_RANGE_(MIN_Z_RANGE),
-NUM_INDS_FOR_SCAN_(NUM_INDS_FOR_SCAN),
-NUM_BOX_CORNERS_(NUM_BOX_CORNERS)
+  : MAX_NUM_PILLARS_(MAX_NUM_PILLARS)
+  , MAX_NUM_POINTS_PER_PILLAR_(MAX_POINTS_PER_PILLAR)
+  , GRID_X_SIZE_(GRID_X_SIZE)
+  , GRID_Y_SIZE_(GRID_Y_SIZE)
+  , GRID_Z_SIZE_(GRID_Z_SIZE)
+  , PILLAR_X_SIZE_(PILLAR_X_SIZE)
+  , PILLAR_Y_SIZE_(PILLAR_Y_SIZE)
+  , PILLAR_Z_SIZE_(PILLAR_Z_SIZE)
+  , MIN_X_RANGE_(MIN_X_RANGE)
+  , MIN_Y_RANGE_(MIN_Y_RANGE)
+  , MIN_Z_RANGE_(MIN_Z_RANGE)
+  , NUM_INDS_FOR_SCAN_(NUM_INDS_FOR_SCAN)
+  , NUM_BOX_CORNERS_(NUM_BOX_CORNERS)
 {
 }
 
-void PreprocessPoints::initializeVariables(int* coor_to_pillaridx, float* sparse_pillar_map,
-                                         float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i,
-                                         float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped)
+void PreprocessPoints::initializeVariables(int* coor_to_pillaridx, float* sparse_pillar_map, float* pillar_x,
+                                           float* pillar_y, float* pillar_z, float* pillar_i,
+                                           float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped)
 {
   for (int i = 0; i < GRID_Y_SIZE_; i++)
   {
     for (int j = 0; j < GRID_X_SIZE_; j++)
     {
-      coor_to_pillaridx[i*GRID_X_SIZE_ + j] = -1;
+      coor_to_pillaridx[i * GRID_X_SIZE_ + j] = -1;
     }
   }
 
@@ -67,11 +58,11 @@ void PreprocessPoints::initializeVariables(int* coor_to_pillaridx, float* sparse
   {
     for (int j = 0; j < NUM_INDS_FOR_SCAN_; j++)
     {
-      sparse_pillar_map[i*NUM_INDS_FOR_SCAN_+j] = 0;
+      sparse_pillar_map[i * NUM_INDS_FOR_SCAN_ + j] = 0;
     }
   }
 
-  for(int i = 0; i < MAX_NUM_PILLARS_*MAX_NUM_POINTS_PER_PILLAR_; i++)
+  for (int i = 0; i < MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_; i++)
   {
     pillar_x[i] = 0;
     pillar_y[i] = 0;
@@ -82,63 +73,60 @@ void PreprocessPoints::initializeVariables(int* coor_to_pillaridx, float* sparse
   }
 }
 
-void PreprocessPoints::preprocess(const float* in_points_array, int in_num_points,
-                int* x_coors, int* y_coors, float* num_points_per_pillar,
-                float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i,
-                float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped,
-                float* pillar_feature_mask, float* sparse_pillar_map, int* host_pillar_count)
+void PreprocessPoints::preprocess(const float* in_points_array, int in_num_points, int* x_coors, int* y_coors,
+                                  float* num_points_per_pillar, float* pillar_x, float* pillar_y, float* pillar_z,
+                                  float* pillar_i, float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped,
+                                  float* pillar_feature_mask, float* sparse_pillar_map, int* host_pillar_count)
 {
   int pillar_count = 0;
-  float x_coors_for_sub[MAX_NUM_PILLARS_]= {0};
-  float y_coors_for_sub[MAX_NUM_PILLARS_]= {0};
-  //init variables
-  int coor_to_pillaridx[GRID_Y_SIZE_*GRID_X_SIZE_];
-  initializeVariables(coor_to_pillaridx, sparse_pillar_map,
-                     pillar_x, pillar_y, pillar_z, pillar_i,
-                     x_coors_for_sub_shaped, y_coors_for_sub_shaped);
-  for(int i = 0; i < in_num_points; i++)
+  float x_coors_for_sub[MAX_NUM_PILLARS_] = { 0 };
+  float y_coors_for_sub[MAX_NUM_PILLARS_] = { 0 };
+  // init variables
+  int coor_to_pillaridx[GRID_Y_SIZE_ * GRID_X_SIZE_];
+  initializeVariables(coor_to_pillaridx, sparse_pillar_map, pillar_x, pillar_y, pillar_z, pillar_i,
+                      x_coors_for_sub_shaped, y_coors_for_sub_shaped);
+  for (int i = 0; i < in_num_points; i++)
   {
-    int x_coor = std::floor((in_points_array[i*NUM_BOX_CORNERS_ + 0] - MIN_X_RANGE_)/PILLAR_X_SIZE_);
-    int y_coor = std::floor((in_points_array[i*NUM_BOX_CORNERS_ + 1] - MIN_Y_RANGE_)/PILLAR_Y_SIZE_);
-    int z_coor = std::floor((in_points_array[i*NUM_BOX_CORNERS_ + 2] - MIN_Z_RANGE_)/PILLAR_Z_SIZE_);
-    if(x_coor < 0 || x_coor >= GRID_X_SIZE_ ||
-       y_coor < 0 || y_coor >= GRID_Y_SIZE_ ||
-       z_coor < 0 || z_coor >= GRID_Z_SIZE_)
+    int x_coor = std::floor((in_points_array[i * NUM_BOX_CORNERS_ + 0] - MIN_X_RANGE_) / PILLAR_X_SIZE_);
+    int y_coor = std::floor((in_points_array[i * NUM_BOX_CORNERS_ + 1] - MIN_Y_RANGE_) / PILLAR_Y_SIZE_);
+    int z_coor = std::floor((in_points_array[i * NUM_BOX_CORNERS_ + 2] - MIN_Z_RANGE_) / PILLAR_Z_SIZE_);
+    if (x_coor < 0 || x_coor >= GRID_X_SIZE_ || y_coor < 0 || y_coor >= GRID_Y_SIZE_ || z_coor < 0 ||
+        z_coor >= GRID_Z_SIZE_)
     {
       continue;
     }
-    //reverse index
-    int pillar_index = coor_to_pillaridx[y_coor*GRID_X_SIZE_ + x_coor];
-    if(pillar_index == -1)
+    // reverse index
+    int pillar_index = coor_to_pillaridx[y_coor * GRID_X_SIZE_ + x_coor];
+    if (pillar_index == -1)
     {
       pillar_index = pillar_count;
-      if(pillar_count >= MAX_NUM_PILLARS_)
+      if (pillar_count >= MAX_NUM_PILLARS_)
       {
         break;
       }
       pillar_count += 1;
-      coor_to_pillaridx[y_coor*GRID_X_SIZE_ + x_coor] = pillar_index;
+      coor_to_pillaridx[y_coor * GRID_X_SIZE_ + x_coor] = pillar_index;
 
-      //reverse index
+      // reverse index
       y_coors[pillar_index] = std::floor(y_coor);
       x_coors[pillar_index] = std::floor(x_coor);
 
       // float y_offset = PILLAR_Y_SIZE_/ 2 + MIN_Y_RANGE_;
       // float x_offset = PILLAR_X_SIZE_/ 2 + MIN_X_RANGE_;
       // TODO Need to be modified after proper trining code
-      y_coors_for_sub[pillar_index] = std::floor(y_coor)* PILLAR_Y_SIZE_ + -39.9;
-      x_coors_for_sub[pillar_index] = std::floor(x_coor)* PILLAR_X_SIZE_ + 0.1;
+      y_coors_for_sub[pillar_index] = std::floor(y_coor) * PILLAR_Y_SIZE_ + -39.9;
+      x_coors_for_sub[pillar_index] = std::floor(x_coor) * PILLAR_X_SIZE_ + 0.1;
 
-      //sparse pillar map
-      sparse_pillar_map[y_coor*512 + x_coor] = 1;
+      // sparse pillar map
+      sparse_pillar_map[y_coor * 512 + x_coor] = 1;
     }
     int num = num_points_per_pillar[pillar_index];
     if (num < MAX_NUM_POINTS_PER_PILLAR_)
     {
-      pillar_x[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*NUM_BOX_CORNERS_ + 0];
-      pillar_y[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*NUM_BOX_CORNERS_ + 1];
-      pillar_z[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*NUM_BOX_CORNERS_ + 2];
-      pillar_i[pillar_index*MAX_NUM_POINTS_PER_PILLAR_ + num]= in_points_array[i*NUM_BOX_CORNERS_ + 3];
+      pillar_x[pillar_index * MAX_NUM_POINTS_PER_PILLAR_ + num] = in_points_array[i * NUM_BOX_CORNERS_ + 0];
+      pillar_y[pillar_index * MAX_NUM_POINTS_PER_PILLAR_ + num] = in_points_array[i * NUM_BOX_CORNERS_ + 1];
+      pillar_z[pillar_index * MAX_NUM_POINTS_PER_PILLAR_ + num] = in_points_array[i * NUM_BOX_CORNERS_ + 2];
+      pillar_i[pillar_index * MAX_NUM_POINTS_PER_PILLAR_ + num] = in_points_array[i * NUM_BOX_CORNERS_ + 3];
       num_points_per_pillar[pillar_index] += 1;
     }
   }
@@ -148,20 +136,19 @@ void PreprocessPoints::preprocess(const float* in_points_array, int in_num_point
     float x = x_coors_for_sub[i];
     float y = y_coors_for_sub[i];
     int num_points_for_a_pillar = num_points_per_pillar[i];
-    for(int j = 0; j < MAX_NUM_POINTS_PER_PILLAR_; j++)
+    for (int j = 0; j < MAX_NUM_POINTS_PER_PILLAR_; j++)
     {
-       x_coors_for_sub_shaped[i*MAX_NUM_POINTS_PER_PILLAR_ + j] = x;
-       y_coors_for_sub_shaped[i*MAX_NUM_POINTS_PER_PILLAR_ + j] = y;
-       if(j < num_points_for_a_pillar)
-       {
-         pillar_feature_mask[i*MAX_NUM_POINTS_PER_PILLAR_ + j] = 1.0;
-       }
-       else
-       {
-         pillar_feature_mask[i*MAX_NUM_POINTS_PER_PILLAR_ + j] = 0.0;
-       }
+      x_coors_for_sub_shaped[i * MAX_NUM_POINTS_PER_PILLAR_ + j] = x;
+      y_coors_for_sub_shaped[i * MAX_NUM_POINTS_PER_PILLAR_ + j] = y;
+      if (j < num_points_for_a_pillar)
+      {
+        pillar_feature_mask[i * MAX_NUM_POINTS_PER_PILLAR_ + j] = 1.0;
+      }
+      else
+      {
+        pillar_feature_mask[i * MAX_NUM_POINTS_PER_PILLAR_ + j] = 0.0;
+      }
     }
   }
   host_pillar_count[0] = pillar_count;
-
 }
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
index 0b209a92471..a45cef2bdef 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
- /**
- * @file test_point_pillars.cpp
- * @brief unit test file
- * @author Kosuke Murakami
- * @date 2019/02/26
- */
+/**
+* @file test_point_pillars.cpp
+* @brief unit test file
+* @author Kosuke Murakami
+* @date 2019/02/26
+*/
 
 #include <ros/ros.h>
 #include <ros/package.h>
@@ -46,17 +46,9 @@ class TestSuite : public ::testing::Test
 class TestClass
 {
 public:
-  TestClass(const int MAX_NUM_PILLARS,
-            const int MAX_NUM_POINTS_PER_PILLAR,
-            const int GRID_X_SIZE,
-            const int GRID_Y_SIZE,
-            const int GRID_Z_SIZE,
-            const float PILLAR_X_SIZE,
-            const float PILLAR_Y_SIZE,
-            const float PILLAR_Z_SIZE,
-            const float MIN_X_RANGE,
-            const float MIN_Y_RANGE,
-            const float MIN_Z_RANGE,
+  TestClass(const int MAX_NUM_PILLARS, const int MAX_NUM_POINTS_PER_PILLAR, const int GRID_X_SIZE,
+            const int GRID_Y_SIZE, const int GRID_Z_SIZE, const float PILLAR_X_SIZE, const float PILLAR_Y_SIZE,
+            const float PILLAR_Z_SIZE, const float MIN_X_RANGE, const float MIN_Y_RANGE, const float MIN_Z_RANGE,
             const int NUM_INDS_FOR_SCAN);
   const int MAX_NUM_PILLARS_;
   const int MAX_NUM_POINTS_PER_PILLAR_;
@@ -70,90 +62,64 @@ class TestClass
   const float MIN_Y_RANGE_;
   const float MIN_Z_RANGE_;
   const int NUM_INDS_FOR_SCAN_;
-  void loadPoints(pcl::PointCloud<pcl::PointXYZI>::Ptr in_pcl_pc_ptr,
-    const std::string& in_file );
-  void pclToArray(const pcl::PointCloud<pcl::PointXYZI>::Ptr & in_pcl_pc_ptr, float* out_points_array);
-  void preprocess(const float* in_points_array, int in_num_points,
-    int* x_coors, int* y_coors, float* num_points_per_pillar,
-    float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i,
-    float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped,
-    float* pillar_feature_mask, float* sparse_pillar_map, int* host_pillar_count);
-private:
+  void loadPoints(pcl::PointCloud<pcl::PointXYZI>::Ptr in_pcl_pc_ptr, const std::string& in_file);
+  void pclToArray(const pcl::PointCloud<pcl::PointXYZI>::Ptr& in_pcl_pc_ptr, float* out_points_array);
+  void preprocess(const float* in_points_array, int in_num_points, int* x_coors, int* y_coors,
+                  float* num_points_per_pillar, float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i,
+                  float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped, float* pillar_feature_mask,
+                  float* sparse_pillar_map, int* host_pillar_count);
 
+private:
   std::unique_ptr<PreprocessPoints> preprocess_points_ptr_;
 };
 
-TestClass::TestClass(
-          const int MAX_NUM_PILLARS,
-          const int MAX_NUM_POINTS_PER_PILLAR,
-          const int GRID_X_SIZE,
-          const int GRID_Y_SIZE,
-          const int GRID_Z_SIZE,
-          const float PILLAR_X_SIZE,
-          const float PILLAR_Y_SIZE,
-          const float PILLAR_Z_SIZE,
-          const float MIN_X_RANGE,
-          const float MIN_Y_RANGE,
-          const float MIN_Z_RANGE,
-          const int NUM_INDS_FOR_SCAN):
-MAX_NUM_PILLARS_(MAX_NUM_PILLARS),
-MAX_NUM_POINTS_PER_PILLAR_(MAX_NUM_POINTS_PER_PILLAR),
-GRID_X_SIZE_(GRID_X_SIZE),
-GRID_Y_SIZE_(GRID_Y_SIZE),
-GRID_Z_SIZE_(GRID_Z_SIZE),
-PILLAR_X_SIZE_(PILLAR_X_SIZE),
-PILLAR_Y_SIZE_(PILLAR_Y_SIZE),
-PILLAR_Z_SIZE_(PILLAR_Z_SIZE),
-MIN_X_RANGE_(MIN_X_RANGE),
-MIN_Y_RANGE_(MIN_Y_RANGE),
-MIN_Z_RANGE_(MIN_Z_RANGE),
-NUM_INDS_FOR_SCAN_(NUM_INDS_FOR_SCAN)
+TestClass::TestClass(const int MAX_NUM_PILLARS, const int MAX_NUM_POINTS_PER_PILLAR, const int GRID_X_SIZE,
+                     const int GRID_Y_SIZE, const int GRID_Z_SIZE, const float PILLAR_X_SIZE, const float PILLAR_Y_SIZE,
+                     const float PILLAR_Z_SIZE, const float MIN_X_RANGE, const float MIN_Y_RANGE,
+                     const float MIN_Z_RANGE, const int NUM_INDS_FOR_SCAN)
+  : MAX_NUM_PILLARS_(MAX_NUM_PILLARS)
+  , MAX_NUM_POINTS_PER_PILLAR_(MAX_NUM_POINTS_PER_PILLAR)
+  , GRID_X_SIZE_(GRID_X_SIZE)
+  , GRID_Y_SIZE_(GRID_Y_SIZE)
+  , GRID_Z_SIZE_(GRID_Z_SIZE)
+  , PILLAR_X_SIZE_(PILLAR_X_SIZE)
+  , PILLAR_Y_SIZE_(PILLAR_Y_SIZE)
+  , PILLAR_Z_SIZE_(PILLAR_Z_SIZE)
+  , MIN_X_RANGE_(MIN_X_RANGE)
+  , MIN_Y_RANGE_(MIN_Y_RANGE)
+  , MIN_Z_RANGE_(MIN_Z_RANGE)
+  , NUM_INDS_FOR_SCAN_(NUM_INDS_FOR_SCAN)
 {
   preprocess_points_ptr_.reset(new PreprocessPoints(
-    MAX_NUM_PILLARS_,
-    MAX_NUM_POINTS_PER_PILLAR_,
-    GRID_X_SIZE_,
-    GRID_Y_SIZE_,
-    GRID_Z_SIZE_,
-    PILLAR_X_SIZE_,
-    PILLAR_Y_SIZE_,
-    PILLAR_Z_SIZE_,
-    MIN_X_RANGE_,
-    MIN_Y_RANGE_,
-    MIN_Z_RANGE_,
-    NUM_INDS_FOR_SCAN_
-    ));
+      MAX_NUM_PILLARS_, MAX_NUM_POINTS_PER_PILLAR_, GRID_X_SIZE_, GRID_Y_SIZE_, GRID_Z_SIZE_, PILLAR_X_SIZE_,
+      PILLAR_Y_SIZE_, PILLAR_Z_SIZE_, MIN_X_RANGE_, MIN_Y_RANGE_, MIN_Z_RANGE_, NUM_INDS_FOR_SCAN_));
 };
 
-void TestClass::preprocess(const float* in_points_array, int in_num_points,
-                int* x_coors, int* y_coors, float* num_points_per_pillar,
-                float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i,
-                float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped,
-                float* pillar_feature_mask, float* sparse_pillar_map, int* host_pillar_count)
+void TestClass::preprocess(const float* in_points_array, int in_num_points, int* x_coors, int* y_coors,
+                           float* num_points_per_pillar, float* pillar_x, float* pillar_y, float* pillar_z,
+                           float* pillar_i, float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped,
+                           float* pillar_feature_mask, float* sparse_pillar_map, int* host_pillar_count)
 {
-  preprocess_points_ptr_->preprocess(in_points_array, in_num_points,
-                                    x_coors, y_coors, num_points_per_pillar,
-                                    pillar_x, pillar_y, pillar_z, pillar_i,
-                                    x_coors_for_sub_shaped, y_coors_for_sub_shaped,
-                                    pillar_feature_mask, sparse_pillar_map, host_pillar_count);
+  preprocess_points_ptr_->preprocess(in_points_array, in_num_points, x_coors, y_coors, num_points_per_pillar, pillar_x,
+                                     pillar_y, pillar_z, pillar_i, x_coors_for_sub_shaped, y_coors_for_sub_shaped,
+                                     pillar_feature_mask, sparse_pillar_map, host_pillar_count);
 }
 
-void TestClass::pclToArray(const pcl::PointCloud<pcl::PointXYZI>::Ptr & in_pcl_pc_ptr, float* out_points_array)
+void TestClass::pclToArray(const pcl::PointCloud<pcl::PointXYZI>::Ptr& in_pcl_pc_ptr, float* out_points_array)
 {
-  for(size_t i = 0; i < in_pcl_pc_ptr->size(); i++)
+  for (size_t i = 0; i < in_pcl_pc_ptr->size(); i++)
   {
     pcl::PointXYZI point = in_pcl_pc_ptr->at(i);
-    out_points_array[i*4 + 0] = point.x;
-    out_points_array[i*4 + 1] = point.y;
-    out_points_array[i*4 + 2] = point.z;
-    out_points_array[i*4 + 3] = point.intensity;
+    out_points_array[i * 4 + 0] = point.x;
+    out_points_array[i * 4 + 1] = point.y;
+    out_points_array[i * 4 + 2] = point.z;
+    out_points_array[i * 4 + 3] = point.intensity;
   }
 }
 
-void TestClass::loadPoints(pcl::PointCloud<pcl::PointXYZI>::Ptr in_pcl_pc_ptr,
-                           const std::string& in_file )
+void TestClass::loadPoints(pcl::PointCloud<pcl::PointXYZI>::Ptr in_pcl_pc_ptr, const std::string& in_file)
 {
-  if (pcl::io::loadPCDFile<pcl::PointXYZI> (in_file, *in_pcl_pc_ptr) == -1)
+  if (pcl::io::loadPCDFile<pcl::PointXYZI>(in_file, *in_pcl_pc_ptr) == -1)
   {
     ROS_ERROR("Couldn't read test pcd file \n");
   }
@@ -161,54 +127,39 @@ void TestClass::loadPoints(pcl::PointCloud<pcl::PointXYZI>::Ptr in_pcl_pc_ptr,
 
 TEST(TestSuite, CheckPreprocessPointsCPU)
 {
-  TestClass test_obj(
-    12000,
-    100,
-    432,
-    496,
-    1,
-    0.16,
-    0.16,
-    4.0,
-    0,
-    -39.68,
-    -3.0,
-    512);
+  TestClass test_obj(12000, 100, 432, 496, 1, 0.16, 0.16, 4.0, 0, -39.68, -3.0, 512);
 
   pcl::PointCloud<pcl::PointXYZI>::Ptr pcl_pc_ptr(new pcl::PointCloud<pcl::PointXYZI>);
   std::string package_path = ros::package::getPath("lidar_point_pillars");
   std::string in_file = package_path + "/test/data/1527836009720148.pcd";
   test_obj.loadPoints(pcl_pc_ptr, in_file);
 
-  float * points_array = new float[pcl_pc_ptr->size()*4];
+  float* points_array = new float[pcl_pc_ptr->size() * 4];
   test_obj.pclToArray(pcl_pc_ptr, points_array);
 
+  int x_coors[test_obj.MAX_NUM_PILLARS_] = { 0 };
+  int y_coors[test_obj.MAX_NUM_PILLARS_] = { 0 };
+  float num_points_per_pillar[test_obj.MAX_NUM_PILLARS_] = { 0 };
+  float* pillar_x = new float[test_obj.MAX_NUM_PILLARS_ * test_obj.MAX_NUM_POINTS_PER_PILLAR_];
+  float* pillar_y = new float[test_obj.MAX_NUM_PILLARS_ * test_obj.MAX_NUM_POINTS_PER_PILLAR_];
+  float* pillar_z = new float[test_obj.MAX_NUM_PILLARS_ * test_obj.MAX_NUM_POINTS_PER_PILLAR_];
+  float* pillar_i = new float[test_obj.MAX_NUM_PILLARS_ * test_obj.MAX_NUM_POINTS_PER_PILLAR_];
 
-  int x_coors[test_obj.MAX_NUM_PILLARS_]= {0};
-  int y_coors[test_obj.MAX_NUM_PILLARS_]= {0};
-  float num_points_per_pillar[test_obj.MAX_NUM_PILLARS_] = {0};
-  float * pillar_x = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_];
-  float * pillar_y = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_];
-  float * pillar_z = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_];
-  float * pillar_i = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_];
-
-  float * x_coors_for_sub_shaped = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_];
-  float * y_coors_for_sub_shaped = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_];
-  float * pillar_feature_mask = new float[test_obj.MAX_NUM_PILLARS_*test_obj.MAX_NUM_POINTS_PER_PILLAR_];
+  float* x_coors_for_sub_shaped = new float[test_obj.MAX_NUM_PILLARS_ * test_obj.MAX_NUM_POINTS_PER_PILLAR_];
+  float* y_coors_for_sub_shaped = new float[test_obj.MAX_NUM_PILLARS_ * test_obj.MAX_NUM_POINTS_PER_PILLAR_];
+  float* pillar_feature_mask = new float[test_obj.MAX_NUM_PILLARS_ * test_obj.MAX_NUM_POINTS_PER_PILLAR_];
 
-  float* sparse_pillar_map = new float[512*512];
+  float* sparse_pillar_map = new float[512 * 512];
 
-  int host_pillar_count[1] = {0};
-  test_obj.preprocess(points_array, pcl_pc_ptr->size(),
-                      x_coors, y_coors, num_points_per_pillar,
-                      pillar_x, pillar_y, pillar_z, pillar_i,
-                      x_coors_for_sub_shaped, y_coors_for_sub_shaped,
-                      pillar_feature_mask, sparse_pillar_map, host_pillar_count);
+  int host_pillar_count[1] = { 0 };
+  test_obj.preprocess(points_array, pcl_pc_ptr->size(), x_coors, y_coors, num_points_per_pillar, pillar_x, pillar_y,
+                      pillar_z, pillar_i, x_coors_for_sub_shaped, y_coors_for_sub_shaped, pillar_feature_mask,
+                      sparse_pillar_map, host_pillar_count);
   EXPECT_EQ(15, num_points_per_pillar[0]);
   EXPECT_FLOAT_EQ(3.7517259, pillar_x[14]);
   EXPECT_EQ(71, x_coors[100]);
   EXPECT_EQ(177, y_coors[100]);
-  EXPECT_EQ(1, sparse_pillar_map[177*512 + 71]);
+  EXPECT_EQ(1, sparse_pillar_map[177 * 512 + 71]);
   EXPECT_EQ(6270, host_pillar_count[0]);
   delete[] points_array;
   delete[] pillar_x;

From 7c4c1d6ca20e6d7683ba1ead8483ad7d8bf784a8 Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Thu, 28 Feb 2019 08:35:54 +0900
Subject: [PATCH 05/37] Refactor codes

---
 .../lidar_point_pillars/CMakeLists.txt        |  1 +
 .../include/point_pillars.h                   | 30 +++++++++++++++----
 .../nodes/point_pillars.cpp                   | 20 ++++++++++---
 3 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
index fad7f1a04c6..b4c451a393b 100755
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
@@ -80,6 +80,7 @@ if(TRT_AVAIL AND CUDA_AVAIL)
          nodes/point_pillars.cpp
          nodes/preprocess_points.cpp
          )
+  # Need ${catkin_LIBRARIES} because of #include <ros/console.h> for ROS_ERROR
   target_link_libraries(point_pillars_lib
          ${catkin_LIBRARIES}
          nvinfer
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
index e65a5c02433..541e1d9b347 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
@@ -220,6 +220,20 @@ class PointPillars
   */
   void initTRT();
 
+  /**
+  * @brief Generate anchors
+  * @param[in] anchors_px_ Represents x-coordinate value for corresponding anchor
+  * @param[in] anchors_py_ Represents y-coordinate value for corresponding anchor
+  * @param[in] anchors_pz_ Represents z-coordinate value for corresponding anchor
+  * @param[in] anchors_dx_ Represents x-dimension value for corresponding anchor
+  * @param[in] anchors_dy_ Represents y-dimension value for corresponding anchor
+  * @param[in] anchors_dz_ Represents z-dimension value for corresponding anchor
+  * @param[in] anchors_ro_ Represents rotation value for corresponding anchor
+  * @details Generate anchors for each grid
+  */
+  void generateAnchors(float* anchors_px_, float* anchors_py_, float* anchors_pz_,
+                       float* anchors_dx_, float* anchors_dy_, float* anchors_dz_, float* anchors_ro_);
+
   /**
   * @brief Convert ONNX to TensorRT model
   * @param[in] model_file ONNX model file path
@@ -254,13 +268,19 @@ class PointPillars
 
   /**
   * @brief Convert anchors to box form like min_x, min_y, max_x, max_y anchors
-  * @param[in] anchors_px Represents x-coordinate value for corresponding anchor
-  * @param[in] anchors_py Represents y-coordinate value for corresponding anchor
-  * @param[in] anchors_dx Represents x-dimension value for corresponding anchor
-  * @param[in] anchors_dy Represents y-dimension value for corresponding anchor
+  * @param[in] anchors_px_ Represents x-coordinate value for a corresponding anchor
+  * @param[in] anchors_py_ Represents y-coordinate value for a corresponding anchor
+  * @param[in] anchors_dx_ Represents x-dimension value for a corresponding anchor
+  * @param[in] anchors_dy_ Represents y-dimension value for a corresponding anchor
+  * @param[in] box_anchors_min_x_ Represents minimum x value for a correspomding anchor
+  * @param[in] box_anchors_min_y_ Represents minimum y value for a correspomding anchor
+  * @param[in] box_anchors_max_x_ Represents maximum x value for a correspomding anchor
+  * @param[in] box_anchors_max_y_ Represents maximum y value for a correspomding anchor
   * @details Make box anchors for nms
   */
-  void convertAnchors2BoxAnchors(float* anchors_px, float* anchors_py, float* anchors_dx, float* anchors_dy);
+  void convertAnchors2BoxAnchors(float* anchors_px_, float* anchors_py_, float* anchors_dx_, float* anchors_dy_,
+                                 float* box_anchors_min_x_, float* box_anchors_min_y_,
+                                 float* box_anchors_max_x_, float* box_anchors_max_y_);
 
   /**
   * @brief Memory allocation for anchors
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
index 117fb5c2a5e..bb5f33050f6 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
@@ -267,6 +267,19 @@ void PointPillars::deviceMemoryMalloc()
 }
 
 void PointPillars::initAnchors()
+{
+  generateAnchors(anchors_px_, anchors_py_, anchors_pz_,
+                  anchors_dx_, anchors_dy_, anchors_dz_, anchors_ro_);
+
+  convertAnchors2BoxAnchors(anchors_px_, anchors_py_, anchors_dx_, anchors_dy_,
+                            box_anchors_min_x_, box_anchors_min_y_,
+                            box_anchors_max_x_, box_anchors_max_y_);
+
+  putAnchorsInDeviceMemory();
+}
+
+void PointPillars::generateAnchors(float* anchors_px_, float* anchors_py_, float* anchors_pz_,
+                                   float* anchors_dx_, float* anchors_dy_, float* anchors_dz_, float* anchors_ro_)
 {
   // zero clear
   for (size_t i = 0; i < NUM_ANCHOR_; i++)
@@ -320,9 +333,6 @@ void PointPillars::initAnchors()
       }
     }
   }
-  convertAnchors2BoxAnchors(anchors_px_, anchors_py_, anchors_dx_, anchors_dy_);
-
-  putAnchorsInDeviceMemory();
 }
 
 void PointPillars::putAnchorsInDeviceMemory()
@@ -345,7 +355,9 @@ void PointPillars::putAnchorsInDeviceMemory()
   GPU_CHECK(cudaMemcpy(dev_anchors_ro_, anchors_ro_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice));
 }
 
-void PointPillars::convertAnchors2BoxAnchors(float* anchors_px, float* anchors_py, float* anchors_dx, float* anchors_dy)
+void PointPillars::convertAnchors2BoxAnchors(float* anchors_px, float* anchors_py, float* anchors_dx, float* anchors_dy,
+                                             float* box_anchors_min_x_, float* box_anchors_min_y_,
+                                             float* box_anchors_max_x_, float* box_anchors_max_y_)
 {
   // flip box's dimension when the at the third axis == 1
   float flipped_anchors_dx[NUM_ANCHOR_] = { 0 };

From 0a784b63647afc58b16d632cd92ea504f909c498 Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Thu, 28 Feb 2019 09:27:03 +0900
Subject: [PATCH 06/37] fix some test

---
 .../test/src/test_point_pillars.cpp                   | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
index a45cef2bdef..e3ca4a7dd28 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
@@ -49,7 +49,7 @@ class TestClass
   TestClass(const int MAX_NUM_PILLARS, const int MAX_NUM_POINTS_PER_PILLAR, const int GRID_X_SIZE,
             const int GRID_Y_SIZE, const int GRID_Z_SIZE, const float PILLAR_X_SIZE, const float PILLAR_Y_SIZE,
             const float PILLAR_Z_SIZE, const float MIN_X_RANGE, const float MIN_Y_RANGE, const float MIN_Z_RANGE,
-            const int NUM_INDS_FOR_SCAN);
+            const int NUM_INDS_FOR_SCAN, const int NUM_BOX_CORNERS);
   const int MAX_NUM_PILLARS_;
   const int MAX_NUM_POINTS_PER_PILLAR_;
   const int GRID_X_SIZE_;
@@ -62,6 +62,8 @@ class TestClass
   const float MIN_Y_RANGE_;
   const float MIN_Z_RANGE_;
   const int NUM_INDS_FOR_SCAN_;
+  const int NUM_BOX_CORNERS_;
+
   void loadPoints(pcl::PointCloud<pcl::PointXYZI>::Ptr in_pcl_pc_ptr, const std::string& in_file);
   void pclToArray(const pcl::PointCloud<pcl::PointXYZI>::Ptr& in_pcl_pc_ptr, float* out_points_array);
   void preprocess(const float* in_points_array, int in_num_points, int* x_coors, int* y_coors,
@@ -76,7 +78,7 @@ class TestClass
 TestClass::TestClass(const int MAX_NUM_PILLARS, const int MAX_NUM_POINTS_PER_PILLAR, const int GRID_X_SIZE,
                      const int GRID_Y_SIZE, const int GRID_Z_SIZE, const float PILLAR_X_SIZE, const float PILLAR_Y_SIZE,
                      const float PILLAR_Z_SIZE, const float MIN_X_RANGE, const float MIN_Y_RANGE,
-                     const float MIN_Z_RANGE, const int NUM_INDS_FOR_SCAN)
+                     const float MIN_Z_RANGE, const int NUM_INDS_FOR_SCAN, const int NUM_BOX_CORNERS)
   : MAX_NUM_PILLARS_(MAX_NUM_PILLARS)
   , MAX_NUM_POINTS_PER_PILLAR_(MAX_NUM_POINTS_PER_PILLAR)
   , GRID_X_SIZE_(GRID_X_SIZE)
@@ -89,10 +91,11 @@ TestClass::TestClass(const int MAX_NUM_PILLARS, const int MAX_NUM_POINTS_PER_PIL
   , MIN_Y_RANGE_(MIN_Y_RANGE)
   , MIN_Z_RANGE_(MIN_Z_RANGE)
   , NUM_INDS_FOR_SCAN_(NUM_INDS_FOR_SCAN)
+  , NUM_BOX_CORNERS_(NUM_BOX_CORNERS)
 {
   preprocess_points_ptr_.reset(new PreprocessPoints(
       MAX_NUM_PILLARS_, MAX_NUM_POINTS_PER_PILLAR_, GRID_X_SIZE_, GRID_Y_SIZE_, GRID_Z_SIZE_, PILLAR_X_SIZE_,
-      PILLAR_Y_SIZE_, PILLAR_Z_SIZE_, MIN_X_RANGE_, MIN_Y_RANGE_, MIN_Z_RANGE_, NUM_INDS_FOR_SCAN_));
+      PILLAR_Y_SIZE_, PILLAR_Z_SIZE_, MIN_X_RANGE_, MIN_Y_RANGE_, MIN_Z_RANGE_, NUM_INDS_FOR_SCAN_, NUM_BOX_CORNERS_));
 };
 
 void TestClass::preprocess(const float* in_points_array, int in_num_points, int* x_coors, int* y_coors,
@@ -127,7 +130,7 @@ void TestClass::loadPoints(pcl::PointCloud<pcl::PointXYZI>::Ptr in_pcl_pc_ptr, c
 
 TEST(TestSuite, CheckPreprocessPointsCPU)
 {
-  TestClass test_obj(12000, 100, 432, 496, 1, 0.16, 0.16, 4.0, 0, -39.68, -3.0, 512);
+  TestClass test_obj(12000, 100, 432, 496, 1, 0.16, 0.16, 4.0, 0, -39.68, -3.0, 512, 4);
 
   pcl::PointCloud<pcl::PointXYZI>::Ptr pcl_pc_ptr(new pcl::PointCloud<pcl::PointXYZI>);
   std::string package_path = ros::package::getPath("lidar_point_pillars");

From 978a5d637af6f95d4ac6964095a090d07dcd6fad Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Thu, 28 Feb 2019 10:13:59 +0900
Subject: [PATCH 07/37] fix test

---
 .../test/src/test_point_pillars.cpp           | 65 +++++++++++++++----
 1 file changed, 52 insertions(+), 13 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
index e3ca4a7dd28..51a54579963 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
@@ -64,13 +64,14 @@ class TestClass
   const int NUM_INDS_FOR_SCAN_;
   const int NUM_BOX_CORNERS_;
 
-  void loadPoints(pcl::PointCloud<pcl::PointXYZI>::Ptr in_pcl_pc_ptr, const std::string& in_file);
+  void initPoints(pcl::PointCloud<pcl::PointXYZI>::Ptr in_pcl_pc_ptr);
   void pclToArray(const pcl::PointCloud<pcl::PointXYZI>::Ptr& in_pcl_pc_ptr, float* out_points_array);
   void preprocess(const float* in_points_array, int in_num_points, int* x_coors, int* y_coors,
                   float* num_points_per_pillar, float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i,
                   float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped, float* pillar_feature_mask,
                   float* sparse_pillar_map, int* host_pillar_count);
 
+
 private:
   std::unique_ptr<PreprocessPoints> preprocess_points_ptr_;
 };
@@ -120,12 +121,49 @@ void TestClass::pclToArray(const pcl::PointCloud<pcl::PointXYZI>::Ptr& in_pcl_pc
   }
 }
 
-void TestClass::loadPoints(pcl::PointCloud<pcl::PointXYZI>::Ptr in_pcl_pc_ptr, const std::string& in_file)
+void TestClass::initPoints(pcl::PointCloud<pcl::PointXYZI>::Ptr in_pcl_pc_ptr)
 {
-  if (pcl::io::loadPCDFile<pcl::PointXYZI>(in_file, *in_pcl_pc_ptr) == -1)
-  {
-    ROS_ERROR("Couldn't read test pcd file \n");
-  }
+  pcl::PointXYZI point;
+  point.x = 12.9892;
+  point.y = -9.98058;
+  point.z = 0;
+  point.intensity = 4;
+  in_pcl_pc_ptr->push_back(point);
+  point.x = 11.8697;
+  point.y = -11.123;
+  point.z = -0.189377;
+  point.intensity = 35;
+  in_pcl_pc_ptr->push_back(point);
+  point.x = 12.489;
+  point.y = -9.59703;
+  point.z = -2.15565;
+  point.intensity =11;
+  in_pcl_pc_ptr->push_back(point);
+  point.x = 12.9084;
+  point.y = -10.9626;
+  point.z = -2.15565;
+  point.intensity = 11;
+  in_pcl_pc_ptr->push_back(point);
+  point.x = 13.8676;
+  point.y = -9.61668;
+  point.z = 0.0980819;
+  point.intensity = 14;
+  in_pcl_pc_ptr->push_back(point);
+  point.x = 13.5673;
+  point.y = -12.9834;
+  point.z = 0.21862;
+  point.intensity = 1;
+  in_pcl_pc_ptr->push_back(point);
+  point.x = 13.8213;
+  point.y = -10.8529;
+  point.z =-1.22883;
+  point.intensity = 19;
+  in_pcl_pc_ptr->push_back(point);
+  point.x = 11.8957;
+  point.y = -10.3189;
+  point.z =-1.28556;
+  point.intensity = 13;
+  in_pcl_pc_ptr->push_back(point);
 }
 
 TEST(TestSuite, CheckPreprocessPointsCPU)
@@ -135,7 +173,8 @@ TEST(TestSuite, CheckPreprocessPointsCPU)
   pcl::PointCloud<pcl::PointXYZI>::Ptr pcl_pc_ptr(new pcl::PointCloud<pcl::PointXYZI>);
   std::string package_path = ros::package::getPath("lidar_point_pillars");
   std::string in_file = package_path + "/test/data/1527836009720148.pcd";
-  test_obj.loadPoints(pcl_pc_ptr, in_file);
+  test_obj.initPoints(pcl_pc_ptr);
+
 
   float* points_array = new float[pcl_pc_ptr->size() * 4];
   test_obj.pclToArray(pcl_pc_ptr, points_array);
@@ -158,12 +197,12 @@ TEST(TestSuite, CheckPreprocessPointsCPU)
   test_obj.preprocess(points_array, pcl_pc_ptr->size(), x_coors, y_coors, num_points_per_pillar, pillar_x, pillar_y,
                       pillar_z, pillar_i, x_coors_for_sub_shaped, y_coors_for_sub_shaped, pillar_feature_mask,
                       sparse_pillar_map, host_pillar_count);
-  EXPECT_EQ(15, num_points_per_pillar[0]);
-  EXPECT_FLOAT_EQ(3.7517259, pillar_x[14]);
-  EXPECT_EQ(71, x_coors[100]);
-  EXPECT_EQ(177, y_coors[100]);
-  EXPECT_EQ(1, sparse_pillar_map[177 * 512 + 71]);
-  EXPECT_EQ(6270, host_pillar_count[0]);
+  EXPECT_EQ(1, num_points_per_pillar[0]);
+  EXPECT_FLOAT_EQ(12.9892, pillar_x[0]);
+  EXPECT_EQ(74, x_coors[1]);
+  EXPECT_EQ(178, y_coors[1]);
+  EXPECT_EQ(1, sparse_pillar_map[178 * 512 + 74]);
+  EXPECT_EQ(8, host_pillar_count[0]);
   delete[] points_array;
   delete[] pillar_x;
   delete[] pillar_y;

From 9cd5b71f6f3363d1957931d280b64cd5b3dcac8f Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Thu, 28 Feb 2019 10:32:22 +0900
Subject: [PATCH 08/37] refactor codes

---
 .../packages/lidar_point_pillars/include/point_pillars_ros.h | 2 +-
 .../packages/lidar_point_pillars/nodes/point_pillars_ros.cpp | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h
index 8a40722a896..97e6ff51f1f 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h
@@ -88,7 +88,7 @@ class PointPillarsROS
   * @param[in] lidar2baselink transofomation info
   * @details Calculate z offset compared with trained sensor height and get rotation matrix
   */
-  void getTFInfo(tf::StampedTransform lidar2baselink);
+  void analyzeTFInfo(tf::StampedTransform lidar2baselink);
 
   /**
   * @brief Transform pose based on tf stamp info
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
index 0d4b3df7148..17dcc8ecabc 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
@@ -86,6 +86,7 @@ void PointPillarsROS::pubDetectedObject(const std::vector<float>& detections, co
 
     float yaw = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 6];
     yaw = std::atan2(std::sin(yaw), std::cos(yaw));
+    // Trained this way
     geometry_msgs::Quaternion q = tf::createQuaternionMsgFromYaw(-yaw);
     object.pose.orientation = q;
 
@@ -109,7 +110,7 @@ void PointPillarsROS::getBaselinkToLidarTF(const std::string& target_frameid)
   {
     tf_listener_.waitForTransform(BASELINK_FRAME_, target_frameid, ros::Time(0), ros::Duration(1.0));
     tf_listener_.lookupTransform(BASELINK_FRAME_, target_frameid, ros::Time(0), baselink2lidar_);
-    getTFInfo(baselink2lidar_);
+    analyzeTFInfo(baselink2lidar_);
     has_subscribed_baselink_ = true;
   }
   catch (tf::TransformException ex)
@@ -118,7 +119,7 @@ void PointPillarsROS::getBaselinkToLidarTF(const std::string& target_frameid)
   }
 }
 
-void PointPillarsROS::getTFInfo(tf::StampedTransform baselink2lidar)
+void PointPillarsROS::analyzeTFInfo(tf::StampedTransform baselink2lidar)
 {
   tf::Vector3 v = baselink2lidar.getOrigin();
   offset_z_from_trained_data_ = v.getZ() - TRAINED_SENSOR_HEIGHT_;

From b4fee927a1bd1b8ba5f92ef211c447c24350a587 Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Thu, 28 Feb 2019 11:09:57 +0900
Subject: [PATCH 09/37] refactor codes

---
 .../lidar_point_pillars/include/point_pillars.h     |  4 +---
 .../lidar_point_pillars/include/point_pillars_ros.h |  3 +--
 .../lidar_point_pillars/include/postprocess_cuda.h  |  3 +--
 .../lidar_point_pillars/nodes/point_pillars.cpp     | 13 ++++++-------
 .../lidar_point_pillars/nodes/point_pillars_ros.cpp |  8 ++++----
 .../lidar_point_pillars/nodes/postprocess_cuda.cu   | 11 ++++++-----
 6 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
index 541e1d9b347..b2f6c04728f 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
@@ -307,11 +307,9 @@ class PointPillars
   * @param[in] in_points_array pointcloud array
   * @param[in] in_num_points Number of points
   * @param[in] out_detections Network output bounding box
-  * @param[in] out_num_objects Number of output bounding box
   * @details This is interface for the algorithm
   */
-  void doInference(const float* in_points_array, const int in_num_points, std::vector<float>& out_detection,
-                   int& out_num_objects);
+  void doInference(const float* in_points_array, const int in_num_points, std::vector<float>& out_detection);
 };
 
 #endif  // POINTS_PILLAR_H
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h
index 97e6ff51f1f..43c0994f547 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h
@@ -119,11 +119,10 @@ class PointPillarsROS
   /**
   * @brief publish DetectedObject
   * @param[in] detections Network output bounding box
-  * @param[in] num_objects Number of objects
   * @param[in] pc_header Header from pointcloud
   * @details Convert std::vector to DetectedObject, and publish them
   */
-  void pubDetectedObject(const std::vector<float>& detections, const int num_objects,
+  void pubDetectedObject(const std::vector<float>& detections,
                          const std_msgs::Header& pc_header);
 
 public:
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h
index b60a427ca3f..2add152f904 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h
@@ -86,7 +86,6 @@ class PostprocessCuda
   * @param[in] dev_box_for_nms Decoded box from pose and dimension to min_x min_y max_x max_y represenation for nms
   * @param[in] dev_filter_count The number of filtered output
   * @param[out] out_detection Output bounding boxes
-  * @param[out] out_num_objects The number of output bounding boxes
   * @details dev_* represents device memory allocated variables
   */
   void doPostprocessCuda(const float* rpn_box_output, const float* rpn_cls_output, const float* rpn_dir_output,
@@ -94,7 +93,7 @@ class PostprocessCuda
                          const float* dev_anchors_pz, const float* dev_anchors_dx, const float* dev_anchors_dy,
                          const float* dev_anchors_dz, const float* dev_anchors_ro, float* dev_filtered_box,
                          float* dev_filtered_score, int* dev_filtered_dir, float* dev_box_for_nms,
-                         int* dev_filter_count, std::vector<float>& out_detection, int& out_num_objects);
+                         int* dev_filter_count, std::vector<float>& out_detection);
 };
 
 #endif  // POSTPROCESS_CUDA_H
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
index bb5f33050f6..1e0cf78a85a 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
@@ -259,10 +259,10 @@ void PointPillars::deviceMemoryMalloc()
   GPU_CHECK(cudaMalloc((void**)&dev_anchors_dy_, NUM_ANCHOR_ * sizeof(float)));
   GPU_CHECK(cudaMalloc((void**)&dev_anchors_dz_, NUM_ANCHOR_ * sizeof(float)));
   GPU_CHECK(cudaMalloc((void**)&dev_anchors_ro_, NUM_ANCHOR_ * sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_filtered_box_, NUM_ANCHOR_ * 7 * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_filtered_box_, NUM_ANCHOR_ * NUM_OUTPUT_BOX_FEATURE_ * sizeof(float)));
   GPU_CHECK(cudaMalloc((void**)&dev_filtered_score_, NUM_ANCHOR_ * sizeof(float)));
   GPU_CHECK(cudaMalloc((void**)&dev_filtered_dir_, NUM_ANCHOR_ * sizeof(int)));
-  GPU_CHECK(cudaMalloc((void**)&dev_box_for_nms_, NUM_ANCHOR_ * 4 * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_box_for_nms_, NUM_ANCHOR_ * NUM_BOX_CORNERS_ * sizeof(float)));
   GPU_CHECK(cudaMalloc((void**)&dev_filter_count_, sizeof(int)));
 }
 
@@ -524,8 +524,8 @@ void PointPillars::preprocessCPU(const float* in_points_array, const int in_num_
 void PointPillars::preprocessGPU(const float* in_points_array, const int in_num_points)
 {
   float* dev_points;
-  GPU_CHECK(cudaMalloc((void**)&dev_points, in_num_points * 4 * sizeof(float)));
-  GPU_CHECK(cudaMemcpy(dev_points, in_points_array, in_num_points * 4 * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMalloc((void**)&dev_points, in_num_points * NUM_BOX_CORNERS_ * sizeof(float)));
+  GPU_CHECK(cudaMemcpy(dev_points, in_points_array, in_num_points * NUM_BOX_CORNERS_ * sizeof(float), cudaMemcpyHostToDevice));
 
   GPU_CHECK(cudaMemset(dev_pillar_count_histo_, 0, GRID_Y_SIZE_ * GRID_X_SIZE_ * sizeof(int)));
   GPU_CHECK(cudaMemset(dev_sparse_pillar_map_, 0, NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(int)));
@@ -558,8 +558,7 @@ void PointPillars::preprocess(const float* in_points_array, const int in_num_poi
   }
 }
 
-void PointPillars::doInference(const float* in_points_array, const int in_num_points, std::vector<float>& out_detection,
-                               int& out_num_objects)
+void PointPillars::doInference(const float* in_points_array, const int in_num_points, std::vector<float>& out_detection)
 {
   preprocess(in_points_array, in_num_points);
 
@@ -607,7 +606,7 @@ void PointPillars::doInference(const float* in_points_array, const int in_num_po
                                            dev_anchor_mask_, dev_anchors_px_, dev_anchors_py_, dev_anchors_pz_,
                                            dev_anchors_dx_, dev_anchors_dy_, dev_anchors_dz_, dev_anchors_ro_,
                                            dev_filtered_box_, dev_filtered_score_, dev_filtered_dir_, dev_box_for_nms_,
-                                           dev_filter_count_, out_detection, out_num_objects);
+                                           dev_filter_count_, out_detection);
 
   // release the stream and the buffers
   cudaStreamDestroy(stream);
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
index 17dcc8ecabc..2d6c88907d2 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
@@ -68,11 +68,12 @@ geometry_msgs::Pose PointPillarsROS::getTransformedPose(const geometry_msgs::Pos
   return out_pose.pose;
 }
 
-void PointPillarsROS::pubDetectedObject(const std::vector<float>& detections, const int num_objects,
+void PointPillarsROS::pubDetectedObject(const std::vector<float>& detections,
                                         const std_msgs::Header& pc_header)
 {
   autoware_msgs::DetectedObjectArray objects;
   objects.header = pc_header;
+  int num_objects = detections.size()/OUTPUT_NUM_BOX_FEATURE_;
   for (size_t i = 0; i < num_objects; i++)
   {
     autoware_msgs::DetectedObject object;
@@ -166,10 +167,9 @@ void PointPillarsROS::pointsCallback(const sensor_msgs::PointCloud2::ConstPtr& m
     pclToArray(pcl_pc_ptr, points_array);
   }
 
-  int out_num_objects = 0;
   std::vector<float> out_detection;
-  point_pillars_ptr_->doInference(points_array, pcl_pc_ptr->size(), out_detection, out_num_objects);
+  point_pillars_ptr_->doInference(points_array, pcl_pc_ptr->size(), out_detection);
 
   delete[] points_array;
-  pubDetectedObject(out_detection, out_num_objects, msg->header);
+  pubDetectedObject(out_detection, msg->header);
 }
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu
index 2b5501e3541..cf82496926a 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu
@@ -73,9 +73,9 @@ __global__ void filter_kernel(const float* box_preds, const float* cls_preds, co
     //convrt normal box(normal boxes: x, y, z, w, l, h, r) to box(xmin, ymin, xmax, ymax) for nms calculation
     //First: dx, dy -> box(x0y0, x0y1, x1y0, x1y1)
     float corners[2*4] = {float(-0.5*box_dx), float(-0.5*box_dy),
-                          float(-0.5*box_dx), float( 0.5*box_dy),
-                          float( 0.5*box_dx), float( 0.5*box_dy),
-                          float( 0.5*box_dx), float(-0.5*box_dy)};
+                                        float(-0.5*box_dx), float( 0.5*box_dy),
+                                        float( 0.5*box_dx), float( 0.5*box_dy),
+                                        float( 0.5*box_dx), float(-0.5*box_dy)};
 
     //Second: Rotate, Offset and convert to point(xmin. ymin, xmax, ymax)
     //cannot use variable initialization since "error: expression must have a constant value"
@@ -159,7 +159,7 @@ void PostprocessCuda::doPostprocessCuda(const float* rpn_box_output, const float
         int* dev_anchor_mask, const float* dev_anchors_px, const float* dev_anchors_py, const float* dev_anchors_pz,
         const float* dev_anchors_dx, const float* dev_anchors_dy, const float* dev_anchors_dz, const float* dev_anchors_ro,
         float* dev_filtered_box, float* dev_filtered_score, int* dev_filtered_dir, float* dev_box_for_nms, int* dev_filter_count,
-        std::vector<float>& out_detection, int& out_num_objects)
+        std::vector<float>& out_detection)
 {
 
   filter_kernel<<<NUM_ANCHOR_X_INDS_*NUM_ANCHOR_R_INDS_, NUM_ANCHOR_Y_INDS_>>>
@@ -194,10 +194,11 @@ void PostprocessCuda::doPostprocessCuda(const float* rpn_box_output, const float
                                                               NUM_BOX_CORNERS_, NUM_OUTPUT_BOX_FEATURE_);
 
   int keep_inds[host_filter_count[0]] = {0};
+  int out_num_objects = 0;
   nms_cuda_ptr_->doNMSCuda(host_filter_count[0], dev_sorted_box_for_nms, keep_inds, out_num_objects);
 
 
-  float host_filtered_box[host_filter_count[0]*7];
+  float host_filtered_box[host_filter_count[0]*NUM_OUTPUT_BOX_FEATURE_];
   int host_filtered_dir[host_filter_count[0]];
   GPU_CHECK( cudaMemcpy(host_filtered_box, dev_sorted_filtered_box, NUM_OUTPUT_BOX_FEATURE_*host_filter_count[0] *sizeof(float), cudaMemcpyDeviceToHost ) );
   GPU_CHECK( cudaMemcpy(host_filtered_dir, dev_sorted_filtered_dir,                        host_filter_count[0] *sizeof(int), cudaMemcpyDeviceToHost ) );

From 663f4f1d06e86582bbb1925d89aca7fd1a348245 Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Thu, 28 Feb 2019 11:18:06 +0900
Subject: [PATCH 10/37] refactor code

---
 .../nodes/point_pillars.cpp                   | 101 +++++++-----------
 1 file changed, 38 insertions(+), 63 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
index 1e0cf78a85a..4d23bfcb6c5 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
@@ -193,15 +193,13 @@ PointPillars::~PointPillars()
 
 void PointPillars::deviceMemoryMalloc()
 {
-  GPU_CHECK(cudaMalloc((void**)&dev_pillar_x_in_coors_,
-                       GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_pillar_y_in_coors_,
-                       GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_pillar_z_in_coors_,
-                       GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
-  GPU_CHECK(cudaMalloc((void**)&dev_pillar_i_in_coors_,
-                       GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  // clang-format off
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_x_in_coors_,GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_y_in_coors_,GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_z_in_coors_,GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_i_in_coors_,GRID_Y_SIZE_ * GRID_X_SIZE_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
   GPU_CHECK(cudaMalloc((void**)&dev_pillar_count_histo_, GRID_Y_SIZE_ * GRID_X_SIZE_ * sizeof(int)));
+  // clang-format on
 
   GPU_CHECK(cudaMalloc((void**)&dev_x_coors_, MAX_NUM_PILLARS_ * sizeof(int)));
   GPU_CHECK(cudaMalloc((void**)&dev_y_coors_, MAX_NUM_PILLARS_ * sizeof(int)));
@@ -213,12 +211,11 @@ void PointPillars::deviceMemoryMalloc()
   GPU_CHECK(cudaMalloc((void**)&dev_pillar_z_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
   GPU_CHECK(cudaMalloc((void**)&dev_pillar_i_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
 
-  GPU_CHECK(
-      cudaMalloc((void**)&dev_x_coors_for_sub_shaped_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
-  GPU_CHECK(
-      cudaMalloc((void**)&dev_y_coors_for_sub_shaped_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
-  GPU_CHECK(
-      cudaMalloc((void**)&dev_pillar_feature_mask_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  // clang-format off
+  GPU_CHECK(cudaMalloc((void**)&dev_x_coors_for_sub_shaped_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_y_coors_for_sub_shaped_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_pillar_feature_mask_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float)));
+  // clang-format on
 
   // cumsum kernel
   GPU_CHECK(cudaMalloc((void**)&dev_cumsum_along_x_, NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(int)));
@@ -337,14 +334,12 @@ void PointPillars::generateAnchors(float* anchors_px_, float* anchors_py_, float
 
 void PointPillars::putAnchorsInDeviceMemory()
 {
-  GPU_CHECK(
-      cudaMemcpy(dev_box_anchors_min_x_, box_anchors_min_x_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice));
-  GPU_CHECK(
-      cudaMemcpy(dev_box_anchors_min_y_, box_anchors_min_y_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice));
-  GPU_CHECK(
-      cudaMemcpy(dev_box_anchors_max_x_, box_anchors_max_x_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice));
-  GPU_CHECK(
-      cudaMemcpy(dev_box_anchors_max_y_, box_anchors_max_y_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice));
+  // clang-format off
+  GPU_CHECK(cudaMemcpy(dev_box_anchors_min_x_, box_anchors_min_x_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_box_anchors_min_y_, box_anchors_min_y_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_box_anchors_max_x_, box_anchors_max_x_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_box_anchors_max_y_, box_anchors_max_y_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice));
+  // clang-format on
 
   GPU_CHECK(cudaMemcpy(dev_anchors_px_, anchors_px_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice));
   GPU_CHECK(cudaMemcpy(dev_anchors_py_, anchors_py_, NUM_ANCHOR_ * sizeof(float), cudaMemcpyHostToDevice));
@@ -490,26 +485,19 @@ void PointPillars::preprocessCPU(const float* in_points_array, const int in_num_
   GPU_CHECK(cudaMemset(dev_num_points_per_pillar_, 0, MAX_NUM_PILLARS_ * sizeof(float)));
   GPU_CHECK(cudaMemset(dev_sparse_pillar_map_, 0, NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(int)));
 
+  // clang-format off
   GPU_CHECK(cudaMemcpy(dev_x_coors_, x_coors, MAX_NUM_PILLARS_ * sizeof(int), cudaMemcpyHostToDevice));
   GPU_CHECK(cudaMemcpy(dev_y_coors_, y_coors, MAX_NUM_PILLARS_ * sizeof(int), cudaMemcpyHostToDevice));
-  GPU_CHECK(cudaMemcpy(dev_pillar_x_, pillar_x, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),
-                       cudaMemcpyHostToDevice));
-  GPU_CHECK(cudaMemcpy(dev_pillar_y_, pillar_y, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),
-                       cudaMemcpyHostToDevice));
-  GPU_CHECK(cudaMemcpy(dev_pillar_z_, pillar_z, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),
-                       cudaMemcpyHostToDevice));
-  GPU_CHECK(cudaMemcpy(dev_pillar_i_, pillar_i, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),
-                       cudaMemcpyHostToDevice));
-  GPU_CHECK(cudaMemcpy(dev_x_coors_for_sub_shaped_, x_coors_for_sub_shaped,
-                       MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyHostToDevice));
-  GPU_CHECK(cudaMemcpy(dev_y_coors_for_sub_shaped_, y_coors_for_sub_shaped,
-                       MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyHostToDevice));
-  GPU_CHECK(cudaMemcpy(dev_num_points_per_pillar_, num_points_per_pillar, MAX_NUM_PILLARS_ * sizeof(float),
-                       cudaMemcpyHostToDevice));
-  GPU_CHECK(cudaMemcpy(dev_pillar_feature_mask_, pillar_feature_mask,
-                       MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyHostToDevice));
-  GPU_CHECK(cudaMemcpy(dev_sparse_pillar_map_, sparse_pillar_map,
-                       NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_pillar_x_, pillar_x, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_pillar_y_, pillar_y, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_pillar_z_, pillar_z, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_pillar_i_, pillar_i, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_x_coors_for_sub_shaped_, x_coors_for_sub_shaped, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_y_coors_for_sub_shaped_, y_coors_for_sub_shaped,MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_num_points_per_pillar_, num_points_per_pillar, MAX_NUM_PILLARS_ * sizeof(float),cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_pillar_feature_mask_, pillar_feature_mask,MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_sparse_pillar_map_, sparse_pillar_map,NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(float), cudaMemcpyHostToDevice));
+  // clang-format on
 
   delete[] pillar_x;
   delete[] pillar_y;
@@ -568,29 +556,16 @@ void PointPillars::doInference(const float* in_points_array, const int in_num_po
 
   cudaStream_t stream;
   GPU_CHECK(cudaStreamCreate(&stream));
-  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[0], dev_pillar_x_,
-                            BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),
-                            cudaMemcpyDeviceToDevice, stream));
-  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[1], dev_pillar_y_,
-                            BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),
-                            cudaMemcpyDeviceToDevice, stream));
-  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[2], dev_pillar_z_,
-                            BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),
-                            cudaMemcpyDeviceToDevice, stream));
-  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[3], dev_pillar_i_,
-                            BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),
-                            cudaMemcpyDeviceToDevice, stream));
-  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[4], dev_num_points_per_pillar_, BATCH_SIZE_ * MAX_NUM_PILLARS_ * sizeof(float),
-                            cudaMemcpyDeviceToDevice, stream));
-  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[5], dev_x_coors_for_sub_shaped_,
-                            BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),
-                            cudaMemcpyDeviceToDevice, stream));
-  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[6], dev_y_coors_for_sub_shaped_,
-                            BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),
-                            cudaMemcpyDeviceToDevice, stream));
-  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[7], dev_pillar_feature_mask_,
-                            BATCH_SIZE_ * MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float),
-                            cudaMemcpyDeviceToDevice, stream));
+  // clang-format off
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[0], dev_pillar_x_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[1], dev_pillar_y_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[2], dev_pillar_z_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[3], dev_pillar_i_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[4], dev_num_points_per_pillar_, MAX_NUM_PILLARS_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[5], dev_x_coors_for_sub_shaped_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[6], dev_y_coors_for_sub_shaped_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
+  GPU_CHECK(cudaMemcpyAsync(pfe_buffers_[7], dev_pillar_feature_mask_, MAX_NUM_PILLARS_ * MAX_NUM_POINTS_PER_PILLAR_ * sizeof(float), cudaMemcpyDeviceToDevice, stream));
+  // clang-format on
   pfe_context_->enqueue(BATCH_SIZE_, pfe_buffers_, stream, nullptr);
 
   GPU_CHECK(cudaMemset(dev_scattered_feature_, 0, RPN_INPUT_SIZE_ * sizeof(float)));

From 36d9d10bf76ab8216819e526fcc9eab1262c3769 Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Thu, 28 Feb 2019 11:24:33 +0900
Subject: [PATCH 11/37] apply ros clang

---
 .../include/point_pillars.h                   |  8 +++----
 .../include/point_pillars_ros.h               |  3 +--
 .../nodes/point_pillars.cpp                   | 23 +++++++++----------
 .../nodes/point_pillars_ros.cpp               |  5 ++--
 .../test/src/test_point_pillars.cpp           |  8 +++----
 5 files changed, 21 insertions(+), 26 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
index b2f6c04728f..735b80e36b4 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
@@ -231,8 +231,8 @@ class PointPillars
   * @param[in] anchors_ro_ Represents rotation value for corresponding anchor
   * @details Generate anchors for each grid
   */
-  void generateAnchors(float* anchors_px_, float* anchors_py_, float* anchors_pz_,
-                       float* anchors_dx_, float* anchors_dy_, float* anchors_dz_, float* anchors_ro_);
+  void generateAnchors(float* anchors_px_, float* anchors_py_, float* anchors_pz_, float* anchors_dx_,
+                       float* anchors_dy_, float* anchors_dz_, float* anchors_ro_);
 
   /**
   * @brief Convert ONNX to TensorRT model
@@ -279,8 +279,8 @@ class PointPillars
   * @details Make box anchors for nms
   */
   void convertAnchors2BoxAnchors(float* anchors_px_, float* anchors_py_, float* anchors_dx_, float* anchors_dy_,
-                                 float* box_anchors_min_x_, float* box_anchors_min_y_,
-                                 float* box_anchors_max_x_, float* box_anchors_max_y_);
+                                 float* box_anchors_min_x_, float* box_anchors_min_y_, float* box_anchors_max_x_,
+                                 float* box_anchors_max_y_);
 
   /**
   * @brief Memory allocation for anchors
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h
index 43c0994f547..4aa50f3160d 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h
@@ -122,8 +122,7 @@ class PointPillarsROS
   * @param[in] pc_header Header from pointcloud
   * @details Convert std::vector to DetectedObject, and publish them
   */
-  void pubDetectedObject(const std::vector<float>& detections,
-                         const std_msgs::Header& pc_header);
+  void pubDetectedObject(const std::vector<float>& detections, const std_msgs::Header& pc_header);
 
 public:
   PointPillarsROS();
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
index 4d23bfcb6c5..be307a5d68a 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
@@ -265,18 +265,16 @@ void PointPillars::deviceMemoryMalloc()
 
 void PointPillars::initAnchors()
 {
-  generateAnchors(anchors_px_, anchors_py_, anchors_pz_,
-                  anchors_dx_, anchors_dy_, anchors_dz_, anchors_ro_);
+  generateAnchors(anchors_px_, anchors_py_, anchors_pz_, anchors_dx_, anchors_dy_, anchors_dz_, anchors_ro_);
 
-  convertAnchors2BoxAnchors(anchors_px_, anchors_py_, anchors_dx_, anchors_dy_,
-                            box_anchors_min_x_, box_anchors_min_y_,
+  convertAnchors2BoxAnchors(anchors_px_, anchors_py_, anchors_dx_, anchors_dy_, box_anchors_min_x_, box_anchors_min_y_,
                             box_anchors_max_x_, box_anchors_max_y_);
 
   putAnchorsInDeviceMemory();
 }
 
-void PointPillars::generateAnchors(float* anchors_px_, float* anchors_py_, float* anchors_pz_,
-                                   float* anchors_dx_, float* anchors_dy_, float* anchors_dz_, float* anchors_ro_)
+void PointPillars::generateAnchors(float* anchors_px_, float* anchors_py_, float* anchors_pz_, float* anchors_dx_,
+                                   float* anchors_dy_, float* anchors_dz_, float* anchors_ro_)
 {
   // zero clear
   for (size_t i = 0; i < NUM_ANCHOR_; i++)
@@ -512,8 +510,10 @@ void PointPillars::preprocessCPU(const float* in_points_array, const int in_num_
 void PointPillars::preprocessGPU(const float* in_points_array, const int in_num_points)
 {
   float* dev_points;
+  // clang-format off
   GPU_CHECK(cudaMalloc((void**)&dev_points, in_num_points * NUM_BOX_CORNERS_ * sizeof(float)));
-  GPU_CHECK(cudaMemcpy(dev_points, in_points_array, in_num_points * NUM_BOX_CORNERS_ * sizeof(float), cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_points, in_points_array, in_num_points * NUM_BOX_CORNERS_ * sizeof(float),　cudaMemcpyHostToDevice));
+  // clang-format on
 
   GPU_CHECK(cudaMemset(dev_pillar_count_histo_, 0, GRID_Y_SIZE_ * GRID_X_SIZE_ * sizeof(int)));
   GPU_CHECK(cudaMemset(dev_sparse_pillar_map_, 0, NUM_INDS_FOR_SCAN_ * NUM_INDS_FOR_SCAN_ * sizeof(int)));
@@ -577,11 +577,10 @@ void PointPillars::doInference(const float* in_points_array, const int in_num_po
   rpn_context_->enqueue(BATCH_SIZE_, rpn_buffers_, stream, nullptr);
 
   GPU_CHECK(cudaMemset(dev_filter_count_, 0, sizeof(int)));
-  postprocess_cuda_ptr_->doPostprocessCuda((float*)rpn_buffers_[1], (float*)rpn_buffers_[2], (float*)rpn_buffers_[3],
-                                           dev_anchor_mask_, dev_anchors_px_, dev_anchors_py_, dev_anchors_pz_,
-                                           dev_anchors_dx_, dev_anchors_dy_, dev_anchors_dz_, dev_anchors_ro_,
-                                           dev_filtered_box_, dev_filtered_score_, dev_filtered_dir_, dev_box_for_nms_,
-                                           dev_filter_count_, out_detection);
+  postprocess_cuda_ptr_->doPostprocessCuda(
+      (float*)rpn_buffers_[1], (float*)rpn_buffers_[2], (float*)rpn_buffers_[3], dev_anchor_mask_, dev_anchors_px_,
+      dev_anchors_py_, dev_anchors_pz_, dev_anchors_dx_, dev_anchors_dy_, dev_anchors_dz_, dev_anchors_ro_,
+      dev_filtered_box_, dev_filtered_score_, dev_filtered_dir_, dev_box_for_nms_, dev_filter_count_, out_detection);
 
   // release the stream and the buffers
   cudaStreamDestroy(stream);
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
index 2d6c88907d2..93969a403a7 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
@@ -68,12 +68,11 @@ geometry_msgs::Pose PointPillarsROS::getTransformedPose(const geometry_msgs::Pos
   return out_pose.pose;
 }
 
-void PointPillarsROS::pubDetectedObject(const std::vector<float>& detections,
-                                        const std_msgs::Header& pc_header)
+void PointPillarsROS::pubDetectedObject(const std::vector<float>& detections, const std_msgs::Header& pc_header)
 {
   autoware_msgs::DetectedObjectArray objects;
   objects.header = pc_header;
-  int num_objects = detections.size()/OUTPUT_NUM_BOX_FEATURE_;
+  int num_objects = detections.size() / OUTPUT_NUM_BOX_FEATURE_;
   for (size_t i = 0; i < num_objects; i++)
   {
     autoware_msgs::DetectedObject object;
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
index 51a54579963..d0736414ef0 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
@@ -71,7 +71,6 @@ class TestClass
                   float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped, float* pillar_feature_mask,
                   float* sparse_pillar_map, int* host_pillar_count);
 
-
 private:
   std::unique_ptr<PreprocessPoints> preprocess_points_ptr_;
 };
@@ -137,7 +136,7 @@ void TestClass::initPoints(pcl::PointCloud<pcl::PointXYZI>::Ptr in_pcl_pc_ptr)
   point.x = 12.489;
   point.y = -9.59703;
   point.z = -2.15565;
-  point.intensity =11;
+  point.intensity = 11;
   in_pcl_pc_ptr->push_back(point);
   point.x = 12.9084;
   point.y = -10.9626;
@@ -156,12 +155,12 @@ void TestClass::initPoints(pcl::PointCloud<pcl::PointXYZI>::Ptr in_pcl_pc_ptr)
   in_pcl_pc_ptr->push_back(point);
   point.x = 13.8213;
   point.y = -10.8529;
-  point.z =-1.22883;
+  point.z = -1.22883;
   point.intensity = 19;
   in_pcl_pc_ptr->push_back(point);
   point.x = 11.8957;
   point.y = -10.3189;
-  point.z =-1.28556;
+  point.z = -1.28556;
   point.intensity = 13;
   in_pcl_pc_ptr->push_back(point);
 }
@@ -175,7 +174,6 @@ TEST(TestSuite, CheckPreprocessPointsCPU)
   std::string in_file = package_path + "/test/data/1527836009720148.pcd";
   test_obj.initPoints(pcl_pc_ptr);
 
-
   float* points_array = new float[pcl_pc_ptr->size() * 4];
   test_obj.pclToArray(pcl_pc_ptr, points_array);
 

From 267467d0ab5440c5cf5845cf4e7654ceba276896 Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Thu, 28 Feb 2019 11:52:44 +0900
Subject: [PATCH 12/37] refactor codes

---
 .../packages/lidar_point_pillars/CMakeLists.txt     |  3 +--
 .../packages/lidar_point_pillars/README.md          | 13 ++++++++++++-
 .../lidar_point_pillars/nodes/point_pillars.cpp     | 13 +++++--------
 3 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
index b4c451a393b..1978f5f47eb 100755
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
@@ -80,9 +80,8 @@ if(TRT_AVAIL AND CUDA_AVAIL)
          nodes/point_pillars.cpp
          nodes/preprocess_points.cpp
          )
-  # Need ${catkin_LIBRARIES} because of #include <ros/console.h> for ROS_ERROR
+         
   target_link_libraries(point_pillars_lib
-         ${catkin_LIBRARIES}
          nvinfer
          nvonnxparser
          gpu_point_pillars_lib
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md
index ffae0b6c011..d20ad5b2bfc 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md
@@ -1,4 +1,4 @@
-# Point Pillars for 3D Object Detection
+# Point Pillars for 3D Object Detection: ver. 1.0
 
 Autoware package for Point Pillars.  ([reference paper](https://arxiv.org/abs/1812.05784))
 
@@ -14,6 +14,17 @@ TensorRT : [How to install](https://docs.nvidia.com/deeplearning/sdk/tensorrt-in
 Using launch file:
 `roslaunch lidar_point_pillars lidar_point_pillars.launch pfe_onnx_file:=/PATH/TO/FILE.onnx rpn_onnx_file:=/PATH/TO/FILE.onnx input_topic:=/points_raw`
 
+## API
+```
+/**
+* @brief Call PointPillars for the inference
+* @param[in] in_points_array pointcloud array
+* @param[in] in_num_points Number of points
+* @param[in] out_detections Network output bounding box
+* @details This is interface for the algorithm
+*/
+void doInference(const float* in_points_array, const int in_num_points, std::vector<float>& out_detection);
+```
 
 ## Parameters
 
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
index be307a5d68a..0c4293a95d3 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
@@ -18,9 +18,6 @@
 #include <chrono>
 #include <iostream>
 
-// headers in ROS
-#include <ros/console.h>
-
 // headers in local files
 #include "point_pillars.h"
 
@@ -391,7 +388,7 @@ void PointPillars::initTRT()
   onnxToTRTModel(rpn_onnx_file_, rpn_trt_model_stream);
   if (pfe_trt_model_stream == nullptr || rpn_trt_model_stream == nullptr)
   {
-    ROS_ERROR("Failed to load %s or %s ", pfe_onnx_file_.c_str(), rpn_onnx_file_.c_str());
+    std::cerr<< "Failed to load ONNX file " << std::endl;
   }
 
   // deserialize the engine
@@ -399,7 +396,7 @@ void PointPillars::initTRT()
   rpn_runtime_ = nvinfer1::createInferRuntime(g_logger_);
   if (pfe_runtime_ == nullptr || rpn_runtime_ == nullptr)
   {
-    ROS_ERROR("Failed to create TensorRT Runtime object.");
+    std::cerr<<"Failed to create TensorRT Runtime object."<<std::endl;
   }
   pfe_engine_ =
       pfe_runtime_->deserializeCudaEngine(pfe_trt_model_stream->data(), pfe_trt_model_stream->size(), nullptr);
@@ -407,7 +404,7 @@ void PointPillars::initTRT()
       rpn_runtime_->deserializeCudaEngine(rpn_trt_model_stream->data(), rpn_trt_model_stream->size(), nullptr);
   if (pfe_engine_ == nullptr || rpn_engine_ == nullptr)
   {
-    ROS_ERROR("Failed to create TensorRT Engine.");
+    std::cerr << "Failed to create TensorRT Engine." << std::endl;
   }
   pfe_trt_model_stream->destroy();
   rpn_trt_model_stream->destroy();
@@ -415,7 +412,7 @@ void PointPillars::initTRT()
   rpn_context_ = rpn_engine_->createExecutionContext();
   if (pfe_context_ == nullptr || rpn_context_ == nullptr)
   {
-    ROS_ERROR("Failed to create TensorRT Execution Context.");
+    std::cerr << "Failed to create TensorRT Execution Context." << std::endl;;
   }
 }
 
@@ -512,7 +509,7 @@ void PointPillars::preprocessGPU(const float* in_points_array, const int in_num_
   float* dev_points;
   // clang-format off
   GPU_CHECK(cudaMalloc((void**)&dev_points, in_num_points * NUM_BOX_CORNERS_ * sizeof(float)));
-  GPU_CHECK(cudaMemcpy(dev_points, in_points_array, in_num_points * NUM_BOX_CORNERS_ * sizeof(float),　cudaMemcpyHostToDevice));
+  GPU_CHECK(cudaMemcpy(dev_points, in_points_array, in_num_points * NUM_BOX_CORNERS_ * sizeof(float), cudaMemcpyHostToDevice));
   // clang-format on
 
   GPU_CHECK(cudaMemset(dev_pillar_count_histo_, 0, GRID_Y_SIZE_ * GRID_X_SIZE_ * sizeof(int)));

From 37ee941c9b59b8fd049dc87507b0698db44eb2b0 Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Thu, 28 Feb 2019 12:09:43 +0900
Subject: [PATCH 13/37] remove cuda description in package.xml

---
 .../packages/lidar_point_pillars/CMakeLists.txt            | 7 ++++---
 .../packages/lidar_point_pillars/package.xml               | 4 +---
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
index 1978f5f47eb..1a832d99e8a 100755
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
@@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 2.8.3)
 project(lidar_point_pillars)
 
-# set flags for CUDA and TensorRT availability
+# set flags for CUDA availability
 option(CUDA_AVAIL "CUDA available" OFF)
 find_package(CUDA)
 if (CUDA_FOUND)
@@ -14,8 +14,9 @@ else()
   set(CUDA_AVAIL OFF)
 endif (CUDA_FOUND)
 
-# try to find the tensorRT modules
+# set flags for TensorRT availability
 option(TRT_AVAIL "TensorRT available" OFF)
+# try to find the tensorRT modules
 find_library(NVINFER NAMES nvinfer)
 find_library(NVPARSERS NAMES nvparsers)
 if(NVINFER AND NVPARSERS)
@@ -80,7 +81,7 @@ if(TRT_AVAIL AND CUDA_AVAIL)
          nodes/point_pillars.cpp
          nodes/preprocess_points.cpp
          )
-         
+
   target_link_libraries(point_pillars_lib
          nvinfer
          nvonnxparser
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml
index 2c79d3dbb70..082e644fa5d 100755
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml
@@ -5,19 +5,17 @@
     <description>lidar_point_pillars</description>
 
     <maintainer email="cirpue49@yahoo.co.jp">Kosuke Murakami</maintainer>
-    <license>BSD</license>
+    <license>Apache 2.0</license>
 
     <buildtool_depend>catkin</buildtool_depend>
 
     <build_depend>roscpp</build_depend>
     <build_depend>pcl_ros</build_depend>
-    <build_depend>cuda-toolkit-10-0</build_depend>
     <build_depend>autoware_msgs</build_depend>
 
 
     <run_depend>roscpp</run_depend>
     <run_depend>pcl_ros</run_depend>
-    <run_depend>cuda-toolkit-10-0</run_depend>
     <run_depend>autoware_msgs</run_depend>
 
     <export></export>

From 036aba69d915286b20cd040ef0cea40877d0d027 Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Thu, 28 Feb 2019 12:10:09 +0900
Subject: [PATCH 14/37] Add specific version for TensorRT in README.md

---
 .../packages/lidar_point_pillars/README.md            | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md
index d20ad5b2bfc..e105af0d2a6 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md
@@ -2,11 +2,12 @@
 
 Autoware package for Point Pillars.  ([reference paper](https://arxiv.org/abs/1812.05784))
 
-## Pre requisites
+## Requirements
 
 CUDA Toolkit v9.0 or 10.0
 
-TensorRT : [How to install](https://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html#installing)
+TensorRT: Tested with 5.0.2 -> [How to install](https://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html#installing)
+
 
 
 ## How to launch
@@ -17,11 +18,11 @@ Using launch file:
 ## API
 ```
 /**
-* @brief Call PointPillars for the inference
+* @brief Call PointPillars for the inference.
 * @param[in] in_points_array pointcloud array
 * @param[in] in_num_points Number of points
-* @param[in] out_detections Network output bounding box
-* @details This is interface for the algorithm
+* @param[in] out_detections Output bounding box from the network
+* @details This is an interface for the algorithm.
 */
 void doInference(const float* in_points_array, const int in_num_points, std::vector<float>& out_detection);
 ```

From 602d21caeca6d5ca333d588cafb52414ac0ced8c Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Thu, 28 Feb 2019 12:18:39 +0900
Subject: [PATCH 15/37] update readme

---
 .../packages/lidar_point_pillars/README.md       | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md
index e105af0d2a6..ff08e3108e8 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md
@@ -1,10 +1,10 @@
 # Point Pillars for 3D Object Detection: ver. 1.0
 
-Autoware package for Point Pillars.  ([reference paper](https://arxiv.org/abs/1812.05784))
+Autoware package for Point Pillars.  [Referenced paper](https://arxiv.org/abs/1812.05784).
 
 ## Requirements
 
-CUDA Toolkit v9.0 or 10.0
+CUDA Toolkit v9.0 or v10.0
 
 TensorRT: Tested with 5.0.2 -> [How to install](https://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html#installing)
 
@@ -12,19 +12,21 @@ TensorRT: Tested with 5.0.2 -> [How to install](https://docs.nvidia.com/deeplear
 
 ## How to launch
 
-Using launch file:
+* Launch file:
 `roslaunch lidar_point_pillars lidar_point_pillars.launch pfe_onnx_file:=/PATH/TO/FILE.onnx rpn_onnx_file:=/PATH/TO/FILE.onnx input_topic:=/points_raw`
 
+* You can launch it through the runtime manager in Computing tab, as well.
+
 ## API
 ```
 /**
 * @brief Call PointPillars for the inference.
 * @param[in] in_points_array pointcloud array
 * @param[in] in_num_points Number of points
-* @param[in] out_detections Output bounding box from the network
+* @param[out] out_detections Output bounding box from the network
 * @details This is an interface for the algorithm.
 */
-void doInference(const float* in_points_array, const int in_num_points, std::vector<float>& out_detection);
+void doInference(float* in_points_array, int in_num_points, std::vector<float> out_detection);
 ```
 
 ## Parameters
@@ -50,6 +52,6 @@ void doInference(const float* in_points_array, const int in_num_points, std::vec
 * To display the results in Rviz `objects_visualizer` is required.
 (Launch file launches automatically this node).
 
-* Pre trained models can be downloaded from the [github repository](https://github.com/cirpue49/kitti_pretrained_pp).Notice that this model is under `BY-NC-SA 3.0` license.
+* Pre trained models can be downloaded from the [github repository](https://github.com/cirpue49/kitti_pretrained_pp). Notice that this model is under `BY-NC-SA 3.0` license.
 
-* If trained model comes from KITTI data, users might not be allowed to use the model for Commercial purpose.
+* If trained model comes from KITTI data, users might not be allowed to use the model for commercial purposes.

From c6414f793d22b5be61d6c934974af9a8b82abe9d Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Thu, 28 Feb 2019 12:46:12 +0900
Subject: [PATCH 16/37] refactor codes

---
 .../include/point_pillars_ros.h               |  4 +--
 .../nodes/point_pillars.cpp                   | 33 ++++++++++---------
 .../nodes/point_pillars_ros.cpp               |  9 +++--
 3 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h
index 4aa50f3160d..c190fb56a97 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h
@@ -119,10 +119,10 @@ class PointPillarsROS
   /**
   * @brief publish DetectedObject
   * @param[in] detections Network output bounding box
-  * @param[in] pc_header Header from pointcloud
+  * @param[in] in_header Header from pointcloud
   * @details Convert std::vector to DetectedObject, and publish them
   */
-  void pubDetectedObject(const std::vector<float>& detections, const std_msgs::Header& pc_header);
+  void pubDetectedObject(const std::vector<float>& detections, const std_msgs::Header& in_header);
 
 public:
   PointPillarsROS();
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
index 0c4293a95d3..24ce3b021a7 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
@@ -21,6 +21,7 @@
 // headers in local files
 #include "point_pillars.h"
 
+// clang-format off
 PointPillars::PointPillars(const bool reproduce_result_mode, const float score_threshold,
                            const float nms_overlap_threshold, const std::string pfe_onnx_file,
                            const std::string rpn_onnx_file)
@@ -54,27 +55,14 @@ PointPillars::PointPillars(const bool reproduce_result_mode, const float score_t
   , MAX_Z_RANGE_(1)
   , BATCH_SIZE_(1)
   , NUM_INDS_FOR_SCAN_(512)
-  , NUM_THREADS_(64)
-  ,  // if you chancge NUM_THREADS_, need to modify nms_kernel's shared mempry size
-  SENSOR_HEIGHT_(1.73)
+  , NUM_THREADS_(64) // if you chancge NUM_THREADS_, need to modify nms_kernel's shared mempry size
+  , SENSOR_HEIGHT_(1.73)
   , ANCHOR_DX_SIZE_(1.6)
   , ANCHOR_DY_SIZE_(3.9)
   , ANCHOR_DZ_SIZE_(1.56)
   , NUM_BOX_CORNERS_(4)
   , NUM_OUTPUT_BOX_FEATURE_(7)
 {
-  anchors_px_ = new float[NUM_ANCHOR_];
-  anchors_py_ = new float[NUM_ANCHOR_];
-  anchors_pz_ = new float[NUM_ANCHOR_];
-  anchors_dx_ = new float[NUM_ANCHOR_];
-  anchors_dy_ = new float[NUM_ANCHOR_];
-  anchors_dz_ = new float[NUM_ANCHOR_];
-  anchors_ro_ = new float[NUM_ANCHOR_];
-  box_anchors_min_x_ = new float[NUM_ANCHOR_];
-  box_anchors_min_y_ = new float[NUM_ANCHOR_];
-  box_anchors_max_x_ = new float[NUM_ANCHOR_];
-  box_anchors_max_y_ = new float[NUM_ANCHOR_];
-
   if (reproduce_result_mode_)
   {
     preprocess_points_ptr_.reset(new PreprocessPoints(MAX_NUM_PILLARS_, MAX_NUM_POINTS_PER_PILLAR_, GRID_X_SIZE_,
@@ -105,6 +93,7 @@ PointPillars::PointPillars(const bool reproduce_result_mode, const float score_t
   initTRT();
   initAnchors();
 }
+// clang-format on
 
 PointPillars::~PointPillars()
 {
@@ -262,6 +251,20 @@ void PointPillars::deviceMemoryMalloc()
 
 void PointPillars::initAnchors()
 {
+  // allocate memory for anchors
+  anchors_px_ = new float[NUM_ANCHOR_];
+  anchors_py_ = new float[NUM_ANCHOR_];
+  anchors_pz_ = new float[NUM_ANCHOR_];
+  anchors_dx_ = new float[NUM_ANCHOR_];
+  anchors_dy_ = new float[NUM_ANCHOR_];
+  anchors_dz_ = new float[NUM_ANCHOR_];
+  anchors_ro_ = new float[NUM_ANCHOR_];
+  box_anchors_min_x_ = new float[NUM_ANCHOR_];
+  box_anchors_min_y_ = new float[NUM_ANCHOR_];
+  box_anchors_max_x_ = new float[NUM_ANCHOR_];
+  box_anchors_max_y_ = new float[NUM_ANCHOR_];
+  // deallocate these memory in deconstructor
+
   generateAnchors(anchors_px_, anchors_py_, anchors_pz_, anchors_dx_, anchors_dy_, anchors_dz_, anchors_ro_);
 
   convertAnchors2BoxAnchors(anchors_px_, anchors_py_, anchors_dx_, anchors_dy_, box_anchors_min_x_, box_anchors_min_y_,
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
index 93969a403a7..def9759cf7c 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
@@ -39,7 +39,10 @@ PointPillarsROS::PointPillarsROS()
   , NORMALIZING_INTENSITY_VALUE_(255.0)
   , BASELINK_FRAME_("base_link")
 {
+  //ros related param
   private_nh_.param<bool>("baselink_support", baselink_support_, true);
+
+  //algorithm related params
   private_nh_.param<bool>("reproduce_result_mode", reproduce_result_mode_, false);
   private_nh_.param<float>("score_threshold", score_threshold_, 0.5);
   private_nh_.param<float>("nms_overlap_threshold", nms_overlap_threshold_, 0.5);
@@ -68,15 +71,15 @@ geometry_msgs::Pose PointPillarsROS::getTransformedPose(const geometry_msgs::Pos
   return out_pose.pose;
 }
 
-void PointPillarsROS::pubDetectedObject(const std::vector<float>& detections, const std_msgs::Header& pc_header)
+void PointPillarsROS::pubDetectedObject(const std::vector<float>& detections, const std_msgs::Header& in_header)
 {
   autoware_msgs::DetectedObjectArray objects;
-  objects.header = pc_header;
+  objects.header = in_header;
   int num_objects = detections.size() / OUTPUT_NUM_BOX_FEATURE_;
   for (size_t i = 0; i < num_objects; i++)
   {
     autoware_msgs::DetectedObject object;
-    object.header = pc_header;
+    object.header = in_header;
     object.valid = true;
     object.pose_reliable = true;
 

From 65fd09492dce1617b130638dda104c53da80d1a4 Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Thu, 28 Feb 2019 13:22:10 +0900
Subject: [PATCH 17/37] add empty package declaration when not finding
 cuda/tensorrt

---
 .../lidar_detector/packages/lidar_point_pillars/CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
index 1a832d99e8a..ef411001240 100755
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
@@ -24,7 +24,6 @@ if(NVINFER AND NVPARSERS)
   message("NVINFER: ${NVINFER}")
   message("NVPARSERS: ${NVPARSERS}")
   set(TRT_AVAIL ON)
-  add_definitions(-DTRT_AVAIL)
 else()
   message("TensorRT is NOT Available")
   set(TRT_AVAIL OFF)
@@ -116,5 +115,7 @@ if(TRT_AVAIL AND CUDA_AVAIL)
     target_link_libraries(test-point_pillars ${catkin_LIBRARIES} point_pillars_lib)
   endif()
 ELSE()
+  find_package(catkin REQUIRED)
+  catkin_package()
   message("PointPillars won't be built, CUDA and/or TensorRT were not found.")
 ENDIF ()

From b60a3e5e547ce7177930772194f0e8da8ff18c5d Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Thu, 28 Feb 2019 14:39:22 +0900
Subject: [PATCH 18/37] add more tests

---
 .../include/point_pillars.h                   |   1 +
 .../lidar_point_pillars/test/data/dummy.onnx  | Bin 0 -> 1432 bytes
 .../test/src/test_point_pillars.cpp           | 185 +++++++++++++++++-
 3 files changed, 183 insertions(+), 3 deletions(-)
 create mode 100644 ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/data/dummy.onnx

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
index 735b80e36b4..983c785c618 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
@@ -86,6 +86,7 @@ class Logger : public nvinfer1::ILogger
 class PointPillars
 {
 private:
+  friend class TestClass;
   // initize in initializer list
   const bool reproduce_result_mode_;
   const float score_threshold_;
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/data/dummy.onnx b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/data/dummy.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..1a24ae75cbe3920f6b833ea359bc0c01caeb044a
GIT binary patch
literal 1432
zcmb`HJ#P~+7{}wL<PLXeuv8RPLZV0r2uj?0$y<S*lmVm+NUU8f=W<D0eD>+ir#7&Z
zZ^8#)WdOmz!1rKe<`ZCFlA#q9N|Td(!{f)l|KIlG$1Wo)nJJlu)BW;c(?9yK1Haku
z786OXAaEdXA@CsZ5okX^eX7FgYnjHBvv*W6De8Bi6(t$tQtW;D`Ka>|fI5P9EaOP1
zJl-pnE&?5Me_J@uQp{yYIp%DLwdhm|ui3^UmZ?TU!?$!2Ww@|{g}~!nghqk#W-@#1
zv|aDX(fy$@K!kPxSLMZWlyfamAPm7^J8cae%k+$<qZ<?FWP|fN9nB*dmuF;y)NVl~
z`sg?ztJyqd3KPNvQ-m0YZp+#1j=Mdx<BfQ8dgeAJks>OkVMHb=O{S}DR|;{CbH#X8
z6iv@H=H@giM#QiSDjBCTCi>zczK($F`bMDO5GB`l>~_udkmx*-sanUGb7-91m~*LQ
zO!X~Di&lXXPG`g%cJYXe)bsf(IEJWNJmH3}yvBT0{lKnNK_57S+Ly2IgJ&;ku>bT;
z@Z<9=`19iX@vDb0Sf;pxTIrf^nd1FF#WyJz!M{d*w`%o4YhW)`P7W5$4%OgY8z2O?
zY^X;CmhE4l_?_(PMu;G^_4KxxG6DB$z`GjoF9miWXrT(+wk=c|-h-fhy<UH-diyV^
C2&td|

literal 0
HcmV?d00001

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
index d0736414ef0..d42fa7d7c44 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
@@ -70,9 +70,15 @@ class TestClass
                   float* num_points_per_pillar, float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i,
                   float* x_coors_for_sub_shaped, float* y_coors_for_sub_shaped, float* pillar_feature_mask,
                   float* sparse_pillar_map, int* host_pillar_count);
+  void generateAnchors(float* anchors_px, float* anchors_py, float* anchors_pz, float* anchors_dx,
+                       float* anchors_dy, float* anchors_dz, float* anchors_ro);
+  void convertAnchors2BoxAnchors(float* anchors_px, float* anchors_py, float* anchors_dx, float* anchors_dy,
+                                 float* box_anchors_min_x, float* box_anchors_min_y,
+                                 float* box_anchors_max_x, float* box_anchors_max_y);
 
 private:
   std::unique_ptr<PreprocessPoints> preprocess_points_ptr_;
+  std::unique_ptr<PointPillars> point_pillars_ptr_;
 };
 
 TestClass::TestClass(const int MAX_NUM_PILLARS, const int MAX_NUM_POINTS_PER_PILLAR, const int GRID_X_SIZE,
@@ -96,6 +102,18 @@ TestClass::TestClass(const int MAX_NUM_PILLARS, const int MAX_NUM_POINTS_PER_PIL
   preprocess_points_ptr_.reset(new PreprocessPoints(
       MAX_NUM_PILLARS_, MAX_NUM_POINTS_PER_PILLAR_, GRID_X_SIZE_, GRID_Y_SIZE_, GRID_Z_SIZE_, PILLAR_X_SIZE_,
       PILLAR_Y_SIZE_, PILLAR_Z_SIZE_, MIN_X_RANGE_, MIN_Y_RANGE_, MIN_Z_RANGE_, NUM_INDS_FOR_SCAN_, NUM_BOX_CORNERS_));
+
+  bool baselink_support=true;
+  bool reproduce_result_mode=false;
+  float score_threshold = 0.5;
+  float nms_overlap_threshold = 0.5;
+  std::string package_path = ros::package::getPath("lidar_point_pillars");
+  std::string onnx_path = package_path + "/test/data/dummy.onnx";
+  std::string pfe_onnx_file = onnx_path;
+  std::string rpn_onnx_file = onnx_path;
+
+  point_pillars_ptr_.reset(new PointPillars(reproduce_result_mode, score_threshold, nms_overlap_threshold,
+                                            pfe_onnx_file, rpn_onnx_file));
 };
 
 void TestClass::preprocess(const float* in_points_array, int in_num_points, int* x_coors, int* y_coors,
@@ -165,13 +183,52 @@ void TestClass::initPoints(pcl::PointCloud<pcl::PointXYZI>::Ptr in_pcl_pc_ptr)
   in_pcl_pc_ptr->push_back(point);
 }
 
+void TestClass::generateAnchors(float* anchors_px, float* anchors_py, float* anchors_pz, float* anchors_dx,
+                                float* anchors_dy, float* anchors_dz, float* anchors_ro)
+{
+  return point_pillars_ptr_->generateAnchors(anchors_px, anchors_py, anchors_pz, anchors_dx,
+                                             anchors_dy, anchors_dz, anchors_ro);
+}
+
+void TestClass::convertAnchors2BoxAnchors(float* anchors_px, float* anchors_py, float* anchors_dx, float* anchors_dy,
+                                          float* box_anchors_min_x, float* box_anchors_min_y,
+                                          float* box_anchors_max_x, float* box_anchors_max_y)
+{
+  return point_pillars_ptr_->convertAnchors2BoxAnchors(anchors_px, anchors_py, anchors_dx, anchors_dy,
+                                                       box_anchors_min_x, box_anchors_min_y,
+                                                       box_anchors_max_x, box_anchors_max_y);
+}
+
 TEST(TestSuite, CheckPreprocessPointsCPU)
 {
-  TestClass test_obj(12000, 100, 432, 496, 1, 0.16, 0.16, 4.0, 0, -39.68, -3.0, 512, 4);
+  const int MAX_NUM_PILLARS = 12000;
+  const int MAX_NUM_POINTS_PER_PILLAR = 100;
+  const int GRID_X_SIZE = 432;
+  const int GRID_Y_SIZE = 496;
+  const int GRID_Z_SIZE = 1;
+  const float PILLAR_X_SIZE = 0.16;
+  const float PILLAR_Y_SIZE = 0.16;
+  const float PILLAR_Z_SIZE = 4.0;
+  const float MIN_X_RANGE = 0;
+  const float MIN_Y_RANGE = -39.68;
+  const float MIN_Z_RANGE = -3.0;
+  const int NUM_INDS_FOR_SCAN = 512;
+  const int NUM_BOX_CORNERS = 4;
+  TestClass test_obj(MAX_NUM_PILLARS,
+                     MAX_NUM_POINTS_PER_PILLAR,
+                     GRID_X_SIZE,
+                     GRID_Y_SIZE,
+                     GRID_Z_SIZE,
+                     PILLAR_X_SIZE,
+                     PILLAR_Y_SIZE,
+                     PILLAR_Z_SIZE,
+                     MIN_X_RANGE,
+                     MIN_Y_RANGE,
+                     MIN_Z_RANGE,
+                     NUM_INDS_FOR_SCAN,
+                     NUM_BOX_CORNERS);
 
   pcl::PointCloud<pcl::PointXYZI>::Ptr pcl_pc_ptr(new pcl::PointCloud<pcl::PointXYZI>);
-  std::string package_path = ros::package::getPath("lidar_point_pillars");
-  std::string in_file = package_path + "/test/data/1527836009720148.pcd";
   test_obj.initPoints(pcl_pc_ptr);
 
   float* points_array = new float[pcl_pc_ptr->size() * 4];
@@ -212,6 +269,128 @@ TEST(TestSuite, CheckPreprocessPointsCPU)
   delete[] sparse_pillar_map;
 }
 
+TEST(TestSuite, CheckGenerateAnchors)
+{
+  const int MAX_NUM_PILLARS = 12000;
+  const int MAX_NUM_POINTS_PER_PILLAR = 100;
+  const int GRID_X_SIZE = 432;
+  const int GRID_Y_SIZE = 496;
+  const int GRID_Z_SIZE = 1;
+  const float PILLAR_X_SIZE = 0.16;
+  const float PILLAR_Y_SIZE = 0.16;
+  const float PILLAR_Z_SIZE = 4.0;
+  const float MIN_X_RANGE = 0;
+  const float MIN_Y_RANGE = -39.68;
+  const float MIN_Z_RANGE = -3.0;
+  const int NUM_INDS_FOR_SCAN = 512;
+  const int NUM_BOX_CORNERS = 4;
+  TestClass test_obj(MAX_NUM_PILLARS,
+                     MAX_NUM_POINTS_PER_PILLAR,
+                     GRID_X_SIZE,
+                     GRID_Y_SIZE,
+                     GRID_Z_SIZE,
+                     PILLAR_X_SIZE,
+                     PILLAR_Y_SIZE,
+                     PILLAR_Z_SIZE,
+                     MIN_X_RANGE,
+                     MIN_Y_RANGE,
+                     MIN_Z_RANGE,
+                     NUM_INDS_FOR_SCAN,
+                     NUM_BOX_CORNERS);
+
+  const int NUM_ANCHOR = 432*0.5*496*0.5*2;
+  float* anchors_px = new float[NUM_ANCHOR];
+  float* anchors_py = new float[NUM_ANCHOR];
+  float* anchors_pz = new float[NUM_ANCHOR];
+  float* anchors_dx = new float[NUM_ANCHOR];
+  float* anchors_dy = new float[NUM_ANCHOR];
+  float* anchors_dz = new float[NUM_ANCHOR];
+  float* anchors_ro = new float[NUM_ANCHOR];
+  test_obj.generateAnchors(anchors_px, anchors_py, anchors_pz, anchors_dx, anchors_dy, anchors_dz, anchors_ro);
+
+  EXPECT_NEAR(0.48, anchors_px[3], 0.001);
+  EXPECT_NEAR(-39.52, anchors_py[109], 0.001);
+  EXPECT_NEAR(-1.73, anchors_pz[76], 0.001);
+  EXPECT_NEAR(1.6, anchors_dx[338], 0.001);
+  EXPECT_NEAR(3.9, anchors_dy[22], 0.001);
+  EXPECT_NEAR(1.56, anchors_dz[993], 0.001);
+  EXPECT_NEAR(1.5708, anchors_ro[1765], 0.001);
+
+  delete[] anchors_px;
+  delete[] anchors_py;
+  delete[] anchors_pz;
+  delete[] anchors_dx;
+  delete[] anchors_dy;
+  delete[] anchors_dz;
+  delete[] anchors_ro;
+}
+
+TEST(TestSuite, CheckGenerateBoxAnchors)
+{
+  const int MAX_NUM_PILLARS = 12000;
+  const int MAX_NUM_POINTS_PER_PILLAR = 100;
+  const int GRID_X_SIZE = 432;
+  const int GRID_Y_SIZE = 496;
+  const int GRID_Z_SIZE = 1;
+  const float PILLAR_X_SIZE = 0.16;
+  const float PILLAR_Y_SIZE = 0.16;
+  const float PILLAR_Z_SIZE = 4.0;
+  const float MIN_X_RANGE = 0;
+  const float MIN_Y_RANGE = -39.68;
+  const float MIN_Z_RANGE = -3.0;
+  const int NUM_INDS_FOR_SCAN = 512;
+  const int NUM_BOX_CORNERS = 4;
+  TestClass test_obj(MAX_NUM_PILLARS,
+                     MAX_NUM_POINTS_PER_PILLAR,
+                     GRID_X_SIZE,
+                     GRID_Y_SIZE,
+                     GRID_Z_SIZE,
+                     PILLAR_X_SIZE,
+                     PILLAR_Y_SIZE,
+                     PILLAR_Z_SIZE,
+                     MIN_X_RANGE,
+                     MIN_Y_RANGE,
+                     MIN_Z_RANGE,
+                     NUM_INDS_FOR_SCAN,
+                     NUM_BOX_CORNERS);
+
+  const int NUM_ANCHOR = 432*0.5*496*0.5*2;
+
+  float* anchors_px = new float[NUM_ANCHOR];
+  float* anchors_py = new float[NUM_ANCHOR];
+  float* anchors_pz = new float[NUM_ANCHOR];
+  float* anchors_dx = new float[NUM_ANCHOR];
+  float* anchors_dy = new float[NUM_ANCHOR];
+  float* anchors_dz = new float[NUM_ANCHOR];
+  float* anchors_ro = new float[NUM_ANCHOR];
+  float* box_anchors_min_x = new float[NUM_ANCHOR];
+  float* box_anchors_min_y = new float[NUM_ANCHOR];
+  float* box_anchors_max_x = new float[NUM_ANCHOR];
+  float* box_anchors_max_y = new float[NUM_ANCHOR];
+  test_obj.generateAnchors(anchors_px, anchors_py, anchors_pz, anchors_dx, anchors_dy, anchors_dz, anchors_ro);
+  test_obj.convertAnchors2BoxAnchors(anchors_px, anchors_py, anchors_dx, anchors_dy,
+                                    box_anchors_min_x, box_anchors_min_y,
+                                    box_anchors_max_x, box_anchors_max_y);
+
+  EXPECT_NEAR(53.25,  box_anchors_min_x[345], 0.001);
+  EXPECT_NEAR(-41.47, box_anchors_min_y[22], 0.001);
+  EXPECT_NEAR(38.4,   box_anchors_max_x[1098], 0.001);
+  EXPECT_NEAR(-38.4,  box_anchors_max_y[675], 0.001);
+
+
+  delete[] anchors_px;
+  delete[] anchors_py;
+  delete[] anchors_pz;
+  delete[] anchors_dx;
+  delete[] anchors_dy;
+  delete[] anchors_dz;
+  delete[] anchors_ro;
+  delete[] box_anchors_min_x;
+  delete[] box_anchors_min_y;
+  delete[] box_anchors_max_x;
+  delete[] box_anchors_max_y;
+}
+
 int main(int argc, char** argv)
 {
   testing::InitGoogleTest(&argc, argv);

From 18a39e06046f915ada14f7b45ba932e1bbf61f00 Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Thu, 28 Feb 2019 18:07:38 +0900
Subject: [PATCH 19/37] Update README.md

---
 .../lidar_detector/packages/lidar_point_pillars/README.md     | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md
index ff08e3108e8..447cf5ca13c 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md
@@ -52,6 +52,4 @@ void doInference(float* in_points_array, int in_num_points, std::vector<float> o
 * To display the results in Rviz `objects_visualizer` is required.
 (Launch file launches automatically this node).
 
-* Pre trained models can be downloaded from the [github repository](https://github.com/cirpue49/kitti_pretrained_pp). Notice that this model is under `BY-NC-SA 3.0` license.
-
-* If trained model comes from KITTI data, users might not be allowed to use the model for commercial purposes.
+* Pretrained models are available [here](https://github.com/cirpue49/kitti_pretrained_pp), trained with the help of the KITTI dataset. For this reason, these are not suitable for commercial purposes. Derivative works are bound to the BY-NC-SA 3.0 License. (https://creativecommons.org/licenses/by-nc-sa/3.0/)

From 0f9d824326e3e586086f68a0d395553d8bef508d Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Thu, 28 Feb 2019 18:53:15 +0900
Subject: [PATCH 20/37] refactor codes

---
 .../packages/lidar_point_pillars/README.md           | 12 ++++++------
 .../lidar_point_pillars/include/point_pillars.h      |  2 +-
 .../lidar_point_pillars/nodes/point_pillars.cpp      |  4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md
index 447cf5ca13c..ddca50651f5 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md
@@ -26,18 +26,18 @@ TensorRT: Tested with 5.0.2 -> [How to install](https://docs.nvidia.com/deeplear
 * @param[out] out_detections Output bounding box from the network
 * @details This is an interface for the algorithm.
 */
-void doInference(float* in_points_array, int in_num_points, std::vector<float> out_detection);
+void doInference(float* in_points_array, int in_num_points, std::vector<float> out_detections);
 ```
 
 ## Parameters
 
 |Parameter| Type| Description|Default|
 ----------|-----|--------|----|
-|`input_topic`|*String*|Input topic Pointcloud. Default.|`/points_raw`|
-|`baselink_support`|*Bool*|Whether to use baselink to adjust parameters. Default.|`True`|
-|`reproduce_result_mode`|*Bool*|Whether to enable reproducible result mode at the cost of the runtime. Default.|`False`|
-|`score_threshold`|*Float*|Minimum score required to include the result (0.1)|0.5|
-|`nms_overlap_threshold`|*Float*|Minimum IOU required to have when applying NMS (0.1)|0.5|
+|`input_topic`|*String*|Input topic Pointcloud. |`/points_raw`|
+|`baselink_support`|*Bool*|Whether to use baselink to adjust parameters. |`True`|
+|`reproduce_result_mode`|*Bool*|Whether to enable reproducible result mode at the cost of the runtime. |`False`|
+|`score_threshold`|*Float*|Minimum score required to include the result [0,1]|0.5|
+|`nms_overlap_threshold`|*Float*|Minimum IOU required to have when applying NMS [0,1]|0.5|
 |`pfe_onnx_file`|*String* |Path to the PFE onnx file||
 |`rpn_onnx_file`|*String* |Path to the RPN onnx file||
 
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
index 983c785c618..7945df61354 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
@@ -310,7 +310,7 @@ class PointPillars
   * @param[in] out_detections Network output bounding box
   * @details This is interface for the algorithm
   */
-  void doInference(const float* in_points_array, const int in_num_points, std::vector<float>& out_detection);
+  void doInference(const float* in_points_array, const int in_num_points, std::vector<float>& out_detections);
 };
 
 #endif  // POINTS_PILLAR_H
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
index 24ce3b021a7..35beffa4bb0 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
@@ -546,7 +546,7 @@ void PointPillars::preprocess(const float* in_points_array, const int in_num_poi
   }
 }
 
-void PointPillars::doInference(const float* in_points_array, const int in_num_points, std::vector<float>& out_detection)
+void PointPillars::doInference(const float* in_points_array, const int in_num_points, std::vector<float>& out_detections)
 {
   preprocess(in_points_array, in_num_points);
 
@@ -580,7 +580,7 @@ void PointPillars::doInference(const float* in_points_array, const int in_num_po
   postprocess_cuda_ptr_->doPostprocessCuda(
       (float*)rpn_buffers_[1], (float*)rpn_buffers_[2], (float*)rpn_buffers_[3], dev_anchor_mask_, dev_anchors_px_,
       dev_anchors_py_, dev_anchors_pz_, dev_anchors_dx_, dev_anchors_dy_, dev_anchors_dz_, dev_anchors_ro_,
-      dev_filtered_box_, dev_filtered_score_, dev_filtered_dir_, dev_box_for_nms_, dev_filter_count_, out_detection);
+      dev_filtered_box_, dev_filtered_score_, dev_filtered_dir_, dev_box_for_nms_, dev_filter_count_, out_detections);
 
   // release the stream and the buffers
   cudaStreamDestroy(stream);

From e7fde83c8629d26707dac2b5781ef07494900441 Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Thu, 28 Feb 2019 19:02:03 +0900
Subject: [PATCH 21/37] refactor codes

---
 .../packages/lidar_point_pillars/nodes/point_pillars.cpp    | 6 +++---
 .../lidar_point_pillars/nodes/preprocess_points_cuda.cu     | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
index 35beffa4bb0..a202a0b2e14 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
@@ -55,7 +55,7 @@ PointPillars::PointPillars(const bool reproduce_result_mode, const float score_t
   , MAX_Z_RANGE_(1)
   , BATCH_SIZE_(1)
   , NUM_INDS_FOR_SCAN_(512)
-  , NUM_THREADS_(64) // if you chancge NUM_THREADS_, need to modify nms_kernel's shared mempry size
+  , NUM_THREADS_(64) // if you change NUM_THREADS_, need to modify nms_kernel's shared memory size
   , SENSOR_HEIGHT_(1.73)
   , ANCHOR_DX_SIZE_(1.6)
   , ANCHOR_DY_SIZE_(3.9)
@@ -352,7 +352,7 @@ void PointPillars::convertAnchors2BoxAnchors(float* anchors_px, float* anchors_p
                                              float* box_anchors_min_x_, float* box_anchors_min_y_,
                                              float* box_anchors_max_x_, float* box_anchors_max_y_)
 {
-  // flip box's dimension when the at the third axis == 1
+  // flip box's dimension when the third axis == 1
   float flipped_anchors_dx[NUM_ANCHOR_] = { 0 };
   float flipped_anchors_dy[NUM_ANCHOR_] = { 0 };
   for (size_t x = 0; x < NUM_ANCHOR_X_INDS_; x++)
@@ -390,7 +390,7 @@ void PointPillars::initTRT()
   onnxToTRTModel(pfe_onnx_file_, pfe_trt_model_stream);
   onnxToTRTModel(rpn_onnx_file_, rpn_trt_model_stream);
   if (pfe_trt_model_stream == nullptr || rpn_trt_model_stream == nullptr)
-  {
+  {//use std:cerr instead of ROS_ERROR because want to keep this fille ros-agnostics
     std::cerr<< "Failed to load ONNX file " << std::endl;
   }
 
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu
index 0ee9bba397e..c175ac36b3e 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu
@@ -107,7 +107,7 @@ __global__ void make_pillar_index_kernel(
     dev_x_coors[count] = x;
     dev_y_coors[count] = y;
 
-    //TODO Need to be modified after making properly training model
+    //TODO Need to be modified after making properly trained model
     // x_offset = self.vx / 2 + pc_range[0]
     // y_offset = self.vy / 2 + pc_range[1]
     // x_sub = coors_x.unsqueeze(1) * 0.16 + x_offset

From 6512482c5d810ed4cf18bc4dbff9a273f32b1cfb Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Mon, 4 Mar 2019 09:12:34 +0900
Subject: [PATCH 22/37] Modify cmake

---
 .../lidar_detector/packages/lidar_point_pillars/CMakeLists.txt  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
index ef411001240..4174b349c51 100755
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
@@ -97,7 +97,7 @@ if(TRT_AVAIL AND CUDA_AVAIL)
          lidar_point_pillars
          ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
          LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
-         RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION}
+         RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
   )
 
   install(DIRECTORY launch/

From 5904fd114a4aed6628a5f079558324f9930328e8 Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Mon, 4 Mar 2019 09:27:07 +0900
Subject: [PATCH 23/37] update readme

---
 .../packages/lidar_point_pillars/README.md    | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md
index ddca50651f5..2a4b5d3697b 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md
@@ -8,6 +8,29 @@ CUDA Toolkit v9.0 or v10.0
 
 TensorRT: Tested with 5.0.2 -> [How to install](https://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html#installing)
 
+## How to setup
+
+1. Install CUDA from this [website](https://developer.nvidia.com/cuda-downloads)
+
+2. [Download](https://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html#downloading) the TensorRT local repo file that matches the Ubuntu version you are using.
+
+3. Install TensorRT from the Debian local repo package.
+
+```
+$ sudo dpkg -i  
+nv-tensorrt-repo-ubuntu1x04-cudax.x-trt5.x.x.x-ga-yyyymmdd_1-1_amd64.deb
+$ sudo apt-key add /var/nv-tensorrt-repo-cudax.x-trt5.x.x.x-ga-yyyymmdd/7fa2af80.pub
+
+$ sudo apt-get update
+$ sudo apt-get install tensorrt
+```
+
+4. Download the pretrained file from [here](https://github.com/cirpue49/kitti_pretrained_pp).
+
+```
+$ git clone git@github.com:cirpue49/kitti_pretrained_point_pillars.git
+```
+
 
 
 ## How to launch

From b350ec9661aa6583f61aeec2685b8e81020e12ac Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Wed, 6 Mar 2019 18:14:06 +0900
Subject: [PATCH 24/37] add script in cmake to install lib

---
 .../lidar_detector/packages/lidar_point_pillars/CMakeLists.txt  | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
index 4174b349c51..6916195aaf7 100755
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
@@ -94,6 +94,8 @@ if(TRT_AVAIL AND CUDA_AVAIL)
 
 
   install(TARGETS
+         gpu_point_pillars_lib
+         point_pillars_lib
          lidar_point_pillars
          ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
          LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}

From 10535100188519490b8e6a22bac65f227334a7e1 Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Wed, 6 Mar 2019 18:33:58 +0900
Subject: [PATCH 25/37] add script in package.xml for autoware_build_flags

---
 .../lidar_detector/packages/lidar_point_pillars/package.xml    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml
index 082e644fa5d..88ed828f0a4 100755
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml
@@ -1,13 +1,14 @@
 <?xml version="1.0"?>
 <package>
     <name>lidar_point_pillars</name>
-    <version>1.9.1</version>
+    <version>1.10.0</version>
     <description>lidar_point_pillars</description>
 
     <maintainer email="cirpue49@yahoo.co.jp">Kosuke Murakami</maintainer>
     <license>Apache 2.0</license>
 
     <buildtool_depend>catkin</buildtool_depend>
+    <buildtool_depend>autoware_build_flags</buildtool_depend>
 
     <build_depend>roscpp</build_depend>
     <build_depend>pcl_ros</build_depend>

From 60f3a1298c84ce6955cff1901f687490ab8efffe Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Wed, 6 Mar 2019 19:41:08 +0900
Subject: [PATCH 26/37] Modify cmake, include CUDNN

---
 .../lidar_point_pillars/CMakeLists.txt        | 45 ++++++++++++++-----
 1 file changed, 34 insertions(+), 11 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
index 6916195aaf7..8be73787630 100755
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
@@ -19,17 +19,36 @@ option(TRT_AVAIL "TensorRT available" OFF)
 # try to find the tensorRT modules
 find_library(NVINFER NAMES nvinfer)
 find_library(NVPARSERS NAMES nvparsers)
-if(NVINFER AND NVPARSERS)
-  message("TensorRT is available!")
-  message("NVINFER: ${NVINFER}")
-  message("NVPARSERS: ${NVPARSERS}")
-  set(TRT_AVAIL ON)
+find_library(NVONNXPARSERS NAMES nvonnxparser)
+if(NVINFER AND NVPARSERS AND NVONNXPARSERS)
+   message("TensorRT is available!")
+   message("NVINFER: ${NVINFER}")
+   message("NVPARSERS: ${NVPARSERS}")
+   message("NVONNXPARSERS: ${NVONNXPARSERS}")
+   set(TRT_AVAIL ON)
 else()
   message("TensorRT is NOT Available")
   set(TRT_AVAIL OFF)
 endif()
 
-if(TRT_AVAIL AND CUDA_AVAIL)
+# set flags for CUDNN availability
+option(CUDNN_AVAIL "CUDNN available" OFF)
+# try to find the CUDNN module
+find_library(CUDNN_LIBRARY
+NAMES libcudnn.so${__cudnn_ver_suffix} libcudnn${__cudnn_ver_suffix}.dylib ${__cudnn_lib_win_name}
+PATHS $ENV{LD_LIBRARY_PATH} ${__libpath_cudart} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX}
+PATH_SUFFIXES lib lib64 bin
+DOC "CUDNN library." )
+if(CUDNN_LIBRARY)
+   message("CUDNN is available!")
+   message("CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
+   set(CUDNN_AVAIL ON)
+else()
+  message("CUDNN is NOT Available")
+  set(CUDNN_AVAIL OFF)
+endif()
+
+if(TRT_AVAIL AND CUDA_AVAIL AND CUDNN_AVAIL)
 
   find_package(autoware_build_flags REQUIRED)
   find_package(catkin REQUIRED COMPONENTS
@@ -81,9 +100,13 @@ if(TRT_AVAIL AND CUDA_AVAIL)
          nodes/preprocess_points.cpp
          )
 
-  target_link_libraries(point_pillars_lib
-         nvinfer
-         nvonnxparser
+   target_link_libraries(point_pillars_lib
+         ${NVINFER}
+         ${NVONNXPARSERS}
+         ${CUDA_LIBRARIES}
+         ${CUDA_CUBLAS_LIBRARIES}
+         ${CUDA_curand_LIBRARY}
+         ${CUDNN_LIBRARY}
          gpu_point_pillars_lib
          )
 
@@ -116,8 +139,8 @@ if(TRT_AVAIL AND CUDA_AVAIL)
     catkin_add_gtest(test-point_pillars test/src/test_point_pillars.cpp)
     target_link_libraries(test-point_pillars ${catkin_LIBRARIES} point_pillars_lib)
   endif()
-ELSE()
+else()
   find_package(catkin REQUIRED)
   catkin_package()
   message("PointPillars won't be built, CUDA and/or TensorRT were not found.")
-ENDIF ()
+endif()

From a384528a9ad95122a3a65f557831c176ca96c1cf Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Wed, 6 Mar 2019 19:49:06 +0900
Subject: [PATCH 27/37] Add CUDNN dependency script in README

---
 .../lidar_detector/packages/lidar_point_pillars/README.md | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md
index 2a4b5d3697b..3cef61d109a 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/README.md
@@ -6,15 +6,19 @@ Autoware package for Point Pillars.  [Referenced paper](https://arxiv.org/abs/18
 
 CUDA Toolkit v9.0 or v10.0
 
+CUDNN: Tested with v7.3.1
+
 TensorRT: Tested with 5.0.2 -> [How to install](https://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html#installing)
 
 ## How to setup
 
 1. Install CUDA from this [website](https://developer.nvidia.com/cuda-downloads)
 
-2. [Download](https://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html#downloading) the TensorRT local repo file that matches the Ubuntu version you are using.
+2. Install CUDNN
+
+3. [Download](https://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html#downloading) the TensorRT local repo file that matches the Ubuntu version you are using.
 
-3. Install TensorRT from the Debian local repo package.
+4. Install TensorRT from the Debian local repo package.
 
 ```
 $ sudo dpkg -i  

From 367bcb19927688f2817e86210b38f4d63f3c8241 Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Wed, 6 Mar 2019 19:57:15 +0900
Subject: [PATCH 28/37] Delete unnecessary default value for onnx files in
 launch file

---
 .../lidar_point_pillars/launch/lidar_point_pillars.launch     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/launch/lidar_point_pillars.launch b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/launch/lidar_point_pillars.launch
index 4a7618eacc8..9bf500fc6e8 100755
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/launch/lidar_point_pillars.launch
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/launch/lidar_point_pillars.launch
@@ -4,8 +4,8 @@
   <arg name="reproduce_result_mode" default="true" />
   <arg name="score_threshold" default="0.5" />
   <arg name="nms_overlap_threshold" default="0.5" />
-  <arg name="pfe_onnx_file" default="" />
-  <arg name="rpn_onnx_file" default="" />
+  <arg name="pfe_onnx_file"/>
+  <arg name="rpn_onnx_file"/>
   <node pkg="lidar_point_pillars" type="lidar_point_pillars"
     name="lidar_point_pillars" output="screen" >
     <remap from="/points_raw" to="$(arg input_topic)"/>

From b5cd3ff7c5430c6eb6ab6119c3014818441d5345 Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Wed, 6 Mar 2019 20:11:38 +0900
Subject: [PATCH 29/37] add label when creating DetectedObject

---
 .../lidar_point_pillars/nodes/point_pillars_ros.cpp   | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
index def9759cf7c..9da67a411d5 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
@@ -87,9 +87,10 @@ void PointPillarsROS::pubDetectedObject(const std::vector<float>& detections, co
     object.pose.position.y = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 1];
     object.pose.position.z = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 2];
 
+    // Trained this way
     float yaw = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 6];
+    yaw += M_PI/2;
     yaw = std::atan2(std::sin(yaw), std::cos(yaw));
-    // Trained this way
     geometry_msgs::Quaternion q = tf::createQuaternionMsgFromYaw(-yaw);
     object.pose.orientation = q;
 
@@ -98,10 +99,14 @@ void PointPillarsROS::pubDetectedObject(const std::vector<float>& detections, co
       object.pose = getTransformedPose(object.pose, angle_transform_inversed_);
     }
 
-    object.dimensions.x = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 3];
-    object.dimensions.y = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 4];
+    // Again: Trained this way
+    object.dimensions.x = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 4];
+    object.dimensions.y = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 3];
     object.dimensions.z = detections[i * OUTPUT_NUM_BOX_FEATURE_ + 5];
 
+    //Only detects car in Version 1.0
+    object.label = "car";
+
     objects.objects.push_back(object);
   }
   pub_objects_.publish(objects);

From 3e226acf6ce1ef4df86dd54727d65ec37108a25a Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Thu, 7 Mar 2019 12:53:55 +0900
Subject: [PATCH 30/37] Add include/{}/

---
 .../packages/lidar_point_pillars/CMakeLists.txt      |  2 +-
 .../{ => lidar_point_pillars}/anchor_mask_cuda.h     |  0
 .../include/{ => lidar_point_pillars}/common.h       |  0
 .../include/{ => lidar_point_pillars}/nms_cuda.h     |  2 +-
 .../{ => lidar_point_pillars}/point_pillars.h        | 12 ++++++------
 .../{ => lidar_point_pillars}/point_pillars_ros.h    |  2 +-
 .../{ => lidar_point_pillars}/postprocess_cuda.h     |  2 +-
 .../{ => lidar_point_pillars}/preprocess_points.h    |  0
 .../preprocess_points_cuda.h                         |  0
 .../include/{ => lidar_point_pillars}/scatter_cuda.h |  0
 .../lidar_point_pillars/nodes/anchor_mask_cuda.cu    |  4 ++--
 .../nodes/lidar_point_pillars_node.cpp               |  2 +-
 .../packages/lidar_point_pillars/nodes/nms_cuda.cu   |  2 +-
 .../lidar_point_pillars/nodes/point_pillars.cpp      |  2 +-
 .../lidar_point_pillars/nodes/point_pillars_ros.cpp  |  2 +-
 .../lidar_point_pillars/nodes/postprocess_cuda.cu    |  2 +-
 .../lidar_point_pillars/nodes/preprocess_points.cpp  |  2 +-
 .../nodes/preprocess_points_cuda.cu                  |  4 ++--
 .../lidar_point_pillars/nodes/scatter_cuda.cu        |  2 +-
 .../test/src/test_point_pillars.cpp                  |  6 ++++--
 20 files changed, 25 insertions(+), 23 deletions(-)
 rename ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/{ => lidar_point_pillars}/anchor_mask_cuda.h (100%)
 rename ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/{ => lidar_point_pillars}/common.h (100%)
 rename ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/{ => lidar_point_pillars}/nms_cuda.h (97%)
 rename ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/{ => lidar_point_pillars}/point_pillars.h (97%)
 rename ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/{ => lidar_point_pillars}/point_pillars_ros.h (98%)
 rename ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/{ => lidar_point_pillars}/postprocess_cuda.h (99%)
 rename ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/{ => lidar_point_pillars}/preprocess_points.h (100%)
 rename ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/{ => lidar_point_pillars}/preprocess_points_cuda.h (100%)
 rename ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/{ => lidar_point_pillars}/scatter_cuda.h (100%)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
index 8be73787630..8b0bee0e491 100755
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
@@ -130,7 +130,7 @@ if(TRT_AVAIL AND CUDA_AVAIL AND CUDNN_AVAIL)
          PATTERN ".svn" EXCLUDE)
 
   install(DIRECTORY include/
-         DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
+         DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}/${PROJECT_NAME}/
          PATTERN ".svn" EXCLUDE
          )
 
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/anchor_mask_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/anchor_mask_cuda.h
similarity index 100%
rename from ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/anchor_mask_cuda.h
rename to ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/anchor_mask_cuda.h
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/common.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/common.h
similarity index 100%
rename from ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/common.h
rename to ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/common.h
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/nms_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h
similarity index 97%
rename from ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/nms_cuda.h
rename to ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h
index 30f2a997062..496019d6c25 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/nms_cuda.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h
@@ -29,7 +29,7 @@
 #include <iostream>
 
 // headers in local files
-#include "common.h"
+#include "lidar_point_pillars/common.h"
 
 class NMSCuda
 {
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars.h
similarity index 97%
rename from ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
rename to ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars.h
index 7945df61354..ea9a0549201 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars.h
@@ -38,12 +38,12 @@
 #include "NvOnnxParser.h"
 
 // headers in local files
-#include "common.h"
-#include "preprocess_points.h"
-#include "preprocess_points_cuda.h"
-#include "anchor_mask_cuda.h"
-#include "scatter_cuda.h"
-#include "postprocess_cuda.h"
+#include "lidar_point_pillars/common.h"
+#include "lidar_point_pillars/preprocess_points.h"
+#include "lidar_point_pillars/preprocess_points_cuda.h"
+#include "lidar_point_pillars/anchor_mask_cuda.h"
+#include "lidar_point_pillars/scatter_cuda.h"
+#include "lidar_point_pillars/postprocess_cuda.h"
 
 // Logger for TensorRT info/warning/errors
 class Logger : public nvinfer1::ILogger
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars_ros.h
similarity index 98%
rename from ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h
rename to ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars_ros.h
index c190fb56a97..b2c18c4a36b 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/point_pillars_ros.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars_ros.h
@@ -37,7 +37,7 @@
 #include <pcl/io/pcd_io.h>
 
 // headers in local files
-#include "point_pillars.h"
+#include "lidar_point_pillars/point_pillars.h"
 
 class PointPillarsROS
 {
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/postprocess_cuda.h
similarity index 99%
rename from ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h
rename to ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/postprocess_cuda.h
index 2add152f904..be32518be09 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/postprocess_cuda.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/postprocess_cuda.h
@@ -29,7 +29,7 @@
 #include <iostream>
 
 // headers in local files
-#include "nms_cuda.h"
+#include "lidar_point_pillars/nms_cuda.h"
 
 class PostprocessCuda
 {
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points.h
similarity index 100%
rename from ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points.h
rename to ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points.h
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h
similarity index 100%
rename from ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/preprocess_points_cuda.h
rename to ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/scatter_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/scatter_cuda.h
similarity index 100%
rename from ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/scatter_cuda.h
rename to ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/scatter_cuda.h
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu
index 72c7556f0c8..a46030857a9 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu
@@ -15,8 +15,8 @@
  */
 
 //headers in local files
-#include "common.h"
-#include "anchor_mask_cuda.h"
+#include "lidar_point_pillars/common.h"
+#include "lidar_point_pillars/anchor_mask_cuda.h"
 
 //modified prefix sum code from https://www.mimuw.edu.pl/~ps209291/kgkp/slides/scan.pdf
 __global__ void scan_x(int *g_odata, int *g_idata, int n)
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp
index 8faa0bede56..001933b8197 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/lidar_point_pillars_node.cpp
@@ -18,7 +18,7 @@
 #include <iostream>
 
 // headers in local files
-#include "point_pillars_ros.h"
+#include "lidar_point_pillars/point_pillars_ros.h"
 
 int main(int argc, char** argv)
 {
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu
index d675f9e32f8..d73ef0b92d8 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu
@@ -6,7 +6,7 @@
 
 
 //headers in local files
-#include "nms_cuda.h"
+#include "lidar_point_pillars/nms_cuda.h"
 
 
 __device__ inline float devIoU(float const *const a, float const *const b)
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
index a202a0b2e14..afb89b2c22f 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
@@ -19,7 +19,7 @@
 #include <iostream>
 
 // headers in local files
-#include "point_pillars.h"
+#include "lidar_point_pillars/point_pillars.h"
 
 // clang-format off
 PointPillars::PointPillars(const bool reproduce_result_mode, const float score_threshold,
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
index 9da67a411d5..126ffc63a97 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
@@ -28,7 +28,7 @@
 
 // headers in local files
 #include "autoware_msgs/DetectedObjectArray.h"
-#include "point_pillars_ros.h"
+#include "lidar_point_pillars/point_pillars_ros.h"
 
 PointPillarsROS::PointPillarsROS()
   : private_nh_("~")
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu
index cf82496926a..95e8ef6036e 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu
@@ -20,7 +20,7 @@
 #include <thrust/sort.h>
 
 //headers in local files
-#include "postprocess_cuda.h"
+#include "lidar_point_pillars/postprocess_cuda.h"
 
 __global__ void filter_kernel(const float* box_preds, const float* cls_preds, const float* dir_preds, const int* anchor_mask,
                               const float* dev_anchors_px, const float* dev_anchors_py, const float* dev_anchors_pz,
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp
index e36055debc9..479094cf0d2 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp
@@ -19,7 +19,7 @@
 #include <iostream>
 
 // headers in local files
-#include "preprocess_points.h"
+#include "lidar_point_pillars/preprocess_points.h"
 
 PreprocessPoints::PreprocessPoints(const int MAX_NUM_PILLARS, const int MAX_POINTS_PER_PILLAR, const int GRID_X_SIZE,
                                    const int GRID_Y_SIZE, const int GRID_Z_SIZE, const float PILLAR_X_SIZE,
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu
index c175ac36b3e..0f6504980a6 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu
@@ -18,8 +18,8 @@
 #include <iostream>
 
 //headers in local files
-#include "common.h"
-#include "preprocess_points_cuda.h"
+#include "lidar_point_pillars/common.h"
+#include "lidar_point_pillars/preprocess_points_cuda.h"
 
 __global__ void make_pillar_histo_kernel(
                                      const float* dev_points,
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/scatter_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/scatter_cuda.cu
index 6933f1da93e..107e3de6e4a 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/scatter_cuda.cu
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/scatter_cuda.cu
@@ -15,7 +15,7 @@
  */
 
 //headers in local files
-#include "scatter_cuda.h"
+#include "lidar_point_pillars/scatter_cuda.h"
 
 __global__ void scatter_kernel( int *x_coors, int *y_coors, float *pfe_output, float *scattered_feature,
                                 const int MAX_NUM_PILLARS_, const int GRID_X_SIZE, const int GRID_Y_SIZE)
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
index d42fa7d7c44..f06956b56dc 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
@@ -29,8 +29,8 @@
 #include <pcl/io/pcd_io.h>
 #include <pcl/point_types.h>
 
-#include "point_pillars_ros.h"
-#include "preprocess_points.h"
+#include "lidar_point_pillars/point_pillars_ros.h"
+#include "lidar_point_pillars/preprocess_points.h"
 
 class TestSuite : public ::testing::Test
 {
@@ -376,6 +376,8 @@ TEST(TestSuite, CheckGenerateBoxAnchors)
   EXPECT_NEAR(-41.47, box_anchors_min_y[22], 0.001);
   EXPECT_NEAR(38.4,   box_anchors_max_x[1098], 0.001);
   EXPECT_NEAR(-38.4,  box_anchors_max_y[675], 0.001);
+  EXPECT_NEAR(-31,  box_anchors_max_y[675], 0.001);
+  EXPECT_EQ(true, false);
 
 
   delete[] anchors_px;

From 7daa788295c2187308c3f8a7a5eea39874565462 Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Thu, 7 Mar 2019 13:04:41 +0900
Subject: [PATCH 31/37] Delete unnecessary code from previous commit

---
 .../lidar_point_pillars/test/src/test_point_pillars.cpp         | 2 --
 1 file changed, 2 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
index f06956b56dc..f11ec3d257f 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
@@ -376,8 +376,6 @@ TEST(TestSuite, CheckGenerateBoxAnchors)
   EXPECT_NEAR(-41.47, box_anchors_min_y[22], 0.001);
   EXPECT_NEAR(38.4,   box_anchors_max_x[1098], 0.001);
   EXPECT_NEAR(-38.4,  box_anchors_max_y[675], 0.001);
-  EXPECT_NEAR(-31,  box_anchors_max_y[675], 0.001);
-  EXPECT_EQ(true, false);
 
 
   delete[] anchors_px;

From a04b5ac358a16c2a6c8597a77a2252b612d1a727 Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Thu, 7 Mar 2019 16:07:26 +0900
Subject: [PATCH 32/37] Remove magic number by using MACRO

---
 .../include/lidar_point_pillars/common.h      |  5 ++++
 .../include/lidar_point_pillars/nms_cuda.h    |  4 ++-
 .../preprocess_points_cuda.h                  |  4 ++-
 .../nodes/anchor_mask_cuda.cu                 |  2 +-
 .../lidar_point_pillars/nodes/nms_cuda.cu     | 30 ++++++++++---------
 .../nodes/point_pillars.cpp                   |  2 +-
 .../nodes/postprocess_cuda.cu                 |  8 ++---
 .../nodes/preprocess_points.cpp               |  3 +-
 .../nodes/preprocess_points_cuda.cu           | 30 +++++++++++--------
 9 files changed, 52 insertions(+), 36 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/common.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/common.h
index 46e816db735..283e16be0e3 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/common.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/common.h
@@ -30,6 +30,11 @@
 // headers in CUDA
 #include <cuda_runtime_api.h>
 
+//using MACRO to allocate memory inside CUDA kernel
+#define NUM_3D_BOX_CORNERS_MACRO 8
+#define NUM_2D_BOX_CORNERS_MACRO 4
+#define NUM_THREADS_MACRO 64 // need to be changed when NUM_THREADS is changed
+
 #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
 
 #define GPU_CHECK(ans)                                                                                                 \
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h
index 496019d6c25..e3395227f2b 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h
@@ -35,16 +35,18 @@ class NMSCuda
 {
 private:
   const int NUM_THREADS_;
+  const int NUM_BOX_CORNERS_;
   const float nms_overlap_threshold_;
 
 public:
   /**
   * @brief Constructor
   * @param[in] NUM_THRESD Number of threads for launching cuda kernel
+  * @param[in] NUM_BOX_CORNERS Number of corners for 2D box
   * @param[in] nms_overlap_threshold IOU threshold for NMS
   * @details Captital variables never change after the compile, Non-captital variables could be chaned through rosparam
   */
-  NMSCuda(const int NUM_THREADS, const float nms_overlap_threshold);
+  NMSCuda(const int NUM_THREADS, const int NUM_BOX_CORNERS, const float nms_overlap_threshold);
 
   /**
   * @brief GPU Non-Maximum Suppresion for network output
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h
index 710a14a87c3..cb79f0bc29c 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h
@@ -41,6 +41,7 @@ class PreprocessPointsCuda
   const float MIN_X_RANGE_;
   const float MIN_Y_RANGE_;
   const float MIN_Z_RANGE_;
+  const int NUM_BOX_CORNERS_;
   // end initalizer list
 
   float* dev_pillar_x_in_coors_;
@@ -70,12 +71,13 @@ class PreprocessPointsCuda
   * @param[in] MIN_X_RANGE Minimum x value for pointcloud
   * @param[in] MIN_Y_RANGE Minimum y value for pointcloud
   * @param[in] MIN_Z_RANGE Minimum z value for pointcloud
+  * @param[in] NUM_BOX_CORNERS Number of corners for 2D box
   * @details Captital variables never change after the compile
   */
   PreprocessPointsCuda(const int NUM_THREADS, const int MAX_NUM_PILLARS, const int MAX_POINTS_PER_PILLAR,
                        const int NUM_INDS_FOR_SCAN, const int GRID_X_SIZE, const int GRID_Y_SIZE, const int GRID_Z_SIZE,
                        const float PILLAR_X_SIZE, const float PILLAR_Y_SIZE, const float PILLAR_Z_SIZE,
-                       const float MIN_X_RANGE, const float MIN_Y_RANGE, const float MIN_Z_RANGE);
+                       const float MIN_X_RANGE, const float MIN_Y_RANGE, const float MIN_Z_RANGE, const int NUM_BOX_CORNERS);
   ~PreprocessPointsCuda();
 
   /**
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu
index a46030857a9..6b58021cbc4 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/anchor_mask_cuda.cu
@@ -127,7 +127,7 @@ __global__ void make_anchor_mask_kernel(const float* dev_box_anchors_min_x, cons
                                         const int NUM_INDS_FOR_SCAN)
 {
   int tid = threadIdx.x + blockIdx.x * blockDim.x;
-  int anchor_coor[4] = {0};
+  int anchor_coor[NUM_2D_BOX_CORNERS_MACRO] = {0};
   const int GRID_X_SIZE_1 = GRID_X_SIZE - 1;//grid_x_size - 1
   const int GRID_Y_SIZE_1 = GRID_Y_SIZE - 1;//grid_y_size - 1
 
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu
index d73ef0b92d8..3f52973e810 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu
@@ -21,7 +21,8 @@ __device__ inline float devIoU(float const *const a, float const *const b)
 }
 
 __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
-                           const float *dev_boxes, unsigned long long *dev_mask)
+                           const float *dev_boxes, unsigned long long *dev_mask,
+                           const int NUM_BOX_CORNERS)
 {
   const int row_start = blockIdx.y;
   const int col_start = blockIdx.x;
@@ -33,24 +34,24 @@ __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
       min(n_boxes - row_start * block_threads, block_threads);
   const int col_size =
       min(n_boxes - col_start * block_threads, block_threads);
-  //Only can initialize with figures, not by variables
-  __shared__ float block_boxes[64 * 4];
+
+  __shared__ float block_boxes[NUM_THREADS_MACRO * NUM_2D_BOX_CORNERS_MACRO];
   if (threadIdx.x < col_size)
   {
-    block_boxes[threadIdx.x * 4 + 0] = dev_boxes[(block_threads * col_start + threadIdx.x) * 4 + 0];
-    block_boxes[threadIdx.x * 4 + 1] = dev_boxes[(block_threads * col_start + threadIdx.x) * 4 + 1];
-    block_boxes[threadIdx.x * 4 + 2] = dev_boxes[(block_threads * col_start + threadIdx.x) * 4 + 2];
-    block_boxes[threadIdx.x * 4 + 3] = dev_boxes[(block_threads * col_start + threadIdx.x) * 4 + 3];
+    block_boxes[threadIdx.x * NUM_BOX_CORNERS + 0] = dev_boxes[(block_threads * col_start + threadIdx.x) * NUM_BOX_CORNERS + 0];
+    block_boxes[threadIdx.x * NUM_BOX_CORNERS + 1] = dev_boxes[(block_threads * col_start + threadIdx.x) * NUM_BOX_CORNERS + 1];
+    block_boxes[threadIdx.x * NUM_BOX_CORNERS + 2] = dev_boxes[(block_threads * col_start + threadIdx.x) * NUM_BOX_CORNERS + 2];
+    block_boxes[threadIdx.x * NUM_BOX_CORNERS + 3] = dev_boxes[(block_threads * col_start + threadIdx.x) * NUM_BOX_CORNERS + 3];
   }
   __syncthreads();
 
   if (threadIdx.x < row_size)
   {
     const int cur_box_idx = block_threads * row_start + threadIdx.x;
-    const float cur_box[4] = {dev_boxes[cur_box_idx*4 + 0],
-                              dev_boxes[cur_box_idx*4 + 1],
-                              dev_boxes[cur_box_idx*4 + 2],
-                              dev_boxes[cur_box_idx*4 + 3]};
+    const float cur_box[NUM_2D_BOX_CORNERS_MACRO] = {dev_boxes[cur_box_idx*NUM_BOX_CORNERS + 0],
+                                                     dev_boxes[cur_box_idx*NUM_BOX_CORNERS + 1],
+                                                     dev_boxes[cur_box_idx*NUM_BOX_CORNERS + 2],
+                                                     dev_boxes[cur_box_idx*NUM_BOX_CORNERS + 3]};
     unsigned long long t = 0;
     int start = 0;
     if (row_start == col_start)
@@ -59,7 +60,7 @@ __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
     }
     for (int i = start; i < col_size; i++)
     {
-      if (devIoU(cur_box, block_boxes + i * 4) > nms_overlap_thresh)
+      if (devIoU(cur_box, block_boxes + i * NUM_BOX_CORNERS) > nms_overlap_thresh)
       {
         t |= 1ULL << i;
       }
@@ -69,8 +70,9 @@ __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
   }
 }
 
-NMSCuda::NMSCuda(const int NUM_THREADS, const float nms_overlap_threshold):
+NMSCuda::NMSCuda(const int NUM_THREADS, const int NUM_BOX_CORNERS ,const float nms_overlap_threshold):
 NUM_THREADS_(NUM_THREADS),
+NUM_BOX_CORNERS_(NUM_BOX_CORNERS),
 nms_overlap_threshold_(nms_overlap_threshold)
 {
 }
@@ -84,7 +86,7 @@ void NMSCuda::doNMSCuda(const int host_filter_count, float* dev_sorted_box_for_n
   unsigned long long *dev_mask = NULL;
   GPU_CHECK(cudaMalloc(&dev_mask, host_filter_count * col_blocks * sizeof(unsigned long long)));
 
-  nms_kernel<<<blocks, threads>>>(host_filter_count, nms_overlap_threshold_, dev_sorted_box_for_nms, dev_mask);
+  nms_kernel<<<blocks, threads>>>(host_filter_count, nms_overlap_threshold_, dev_sorted_box_for_nms, dev_mask, NUM_BOX_CORNERS_);
 
   // postprocess for nms output
   std::vector<unsigned long long> host_mask(host_filter_count * col_blocks);
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
index afb89b2c22f..b3f8d7afa68 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
@@ -74,7 +74,7 @@ PointPillars::PointPillars(const bool reproduce_result_mode, const float score_t
   {
     preprocess_points_cuda_ptr_.reset(new PreprocessPointsCuda(
         NUM_THREADS_, MAX_NUM_PILLARS_, MAX_NUM_POINTS_PER_PILLAR_, NUM_INDS_FOR_SCAN_, GRID_X_SIZE_, GRID_Y_SIZE_,
-        GRID_Z_SIZE_, PILLAR_X_SIZE_, PILLAR_Y_SIZE_, PILLAR_Z_SIZE_, MIN_X_RANGE_, MIN_Y_RANGE_, MIN_Z_RANGE_));
+        GRID_Z_SIZE_, PILLAR_X_SIZE_, PILLAR_Y_SIZE_, PILLAR_Z_SIZE_, MIN_X_RANGE_, MIN_Y_RANGE_, MIN_Z_RANGE_, NUM_BOX_CORNERS_));
   }
 
   anchor_mask_cuda_ptr_.reset(new AnchorMaskCuda(NUM_INDS_FOR_SCAN_, NUM_ANCHOR_X_INDS_, NUM_ANCHOR_Y_INDS_,
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu
index 95e8ef6036e..87abbee141e 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/postprocess_cuda.cu
@@ -72,15 +72,14 @@ __global__ void filter_kernel(const float* box_preds, const float* cls_preds, co
 
     //convrt normal box(normal boxes: x, y, z, w, l, h, r) to box(xmin, ymin, xmax, ymax) for nms calculation
     //First: dx, dy -> box(x0y0, x0y1, x1y0, x1y1)
-    float corners[2*4] = {float(-0.5*box_dx), float(-0.5*box_dy),
+    float corners[NUM_3D_BOX_CORNERS_MACRO] = {float(-0.5*box_dx), float(-0.5*box_dy),
                                         float(-0.5*box_dx), float( 0.5*box_dy),
                                         float( 0.5*box_dx), float( 0.5*box_dy),
                                         float( 0.5*box_dx), float(-0.5*box_dy)};
 
     //Second: Rotate, Offset and convert to point(xmin. ymin, xmax, ymax)
-    //cannot use variable initialization since "error: expression must have a constant value"
-    float rotated_corners[2*4];
-    float offset_corners[2*4];
+    float rotated_corners[NUM_3D_BOX_CORNERS_MACRO];
+    float offset_corners[NUM_3D_BOX_CORNERS_MACRO];
     float sin_yaw = sinf(box_ro);
     float cos_yaw = cosf(box_ro);
     float xmin = FLOAT_MAX;
@@ -152,6 +151,7 @@ NUM_OUTPUT_BOX_FEATURE_(NUM_OUTPUT_BOX_FEATURE)
 {
   nms_cuda_ptr_.reset(new NMSCuda(
     NUM_THREADS,
+    NUM_BOX_CORNERS,
     nms_overlap_threshold));
 }
 
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp
index 479094cf0d2..97c84918210 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp
@@ -114,11 +114,12 @@ void PreprocessPoints::preprocess(const float* in_points_array, int in_num_point
       // float y_offset = PILLAR_Y_SIZE_/ 2 + MIN_Y_RANGE_;
       // float x_offset = PILLAR_X_SIZE_/ 2 + MIN_X_RANGE_;
       // TODO Need to be modified after proper trining code
+      // Will be modified in ver 1.1
       y_coors_for_sub[pillar_index] = std::floor(y_coor) * PILLAR_Y_SIZE_ + -39.9;
       x_coors_for_sub[pillar_index] = std::floor(x_coor) * PILLAR_X_SIZE_ + 0.1;
 
       // sparse pillar map
-      sparse_pillar_map[y_coor * 512 + x_coor] = 1;
+      sparse_pillar_map[y_coor * NUM_INDS_FOR_SCAN_ + x_coor] = 1;
     }
     int num = num_points_per_pillar[pillar_index];
     if (num < MAX_NUM_POINTS_PER_PILLAR_)
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu
index 0f6504980a6..89ef5131d6e 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu
@@ -38,7 +38,8 @@ __global__ void make_pillar_histo_kernel(
                                      const float MIN_Z_RANGE,
                                      const float PILLAR_X_SIZE,
                                      const float PILLAR_Y_SIZE,
-                                     const float PILLAR_Z_SIZE
+                                     const float PILLAR_Z_SIZE,
+                                     const int NUM_BOX_CORNERS
                                    )
 {
   int th_i = threadIdx.x + blockIdx.x * blockDim.x;
@@ -46,9 +47,9 @@ __global__ void make_pillar_histo_kernel(
   {
     return;
   }
-  int y_coor = floor((dev_points[th_i*4 + 1] - MIN_Y_RANGE)/PILLAR_Y_SIZE);
-  int x_coor = floor((dev_points[th_i*4 + 0] - MIN_X_RANGE)/PILLAR_X_SIZE);
-  int z_coor = floor((dev_points[th_i*4 + 2] - MIN_Z_RANGE)/PILLAR_Z_SIZE);
+  int y_coor = floor((dev_points[th_i*NUM_BOX_CORNERS + 1] - MIN_Y_RANGE)/PILLAR_Y_SIZE);
+  int x_coor = floor((dev_points[th_i*NUM_BOX_CORNERS + 0] - MIN_X_RANGE)/PILLAR_X_SIZE);
+  int z_coor = floor((dev_points[th_i*NUM_BOX_CORNERS + 2] - MIN_Z_RANGE)/PILLAR_Z_SIZE);
 
   if(x_coor >= 0 && x_coor < GRID_X_SIZE &&
      y_coor >= 0 && y_coor < GRID_Y_SIZE &&
@@ -58,10 +59,10 @@ __global__ void make_pillar_histo_kernel(
     if(count < max_points_per_pillar)
     {
       int ind = y_coor*GRID_X_SIZE*max_points_per_pillar + x_coor*max_points_per_pillar + count;
-      dev_pillar_x_in_coors[ind] = dev_points[th_i*4 + 0];
-      dev_pillar_y_in_coors[ind] = dev_points[th_i*4 + 1];
-      dev_pillar_z_in_coors[ind] = dev_points[th_i*4 + 2];
-      dev_pillar_i_in_coors[ind] = dev_points[th_i*4 + 3];
+      dev_pillar_x_in_coors[ind] = dev_points[th_i*NUM_BOX_CORNERS + 0];
+      dev_pillar_y_in_coors[ind] = dev_points[th_i*NUM_BOX_CORNERS + 1];
+      dev_pillar_z_in_coors[ind] = dev_points[th_i*NUM_BOX_CORNERS + 2];
+      dev_pillar_i_in_coors[ind] = dev_points[th_i*NUM_BOX_CORNERS + 3];
     }
   }
 }
@@ -107,12 +108,12 @@ __global__ void make_pillar_index_kernel(
     dev_x_coors[count] = x;
     dev_y_coors[count] = y;
 
-    //TODO Need to be modified after making properly trained model
+    //TODO Need to be modified after making properly trained weight
+    // Will be modified in ver 1.1
     // x_offset = self.vx / 2 + pc_range[0]
     // y_offset = self.vy / 2 + pc_range[1]
     // x_sub = coors_x.unsqueeze(1) * 0.16 + x_offset
     // y_sub = coors_y.unsqueeze(1) * 0.16 + y_offset
-
     dev_x_coors_for_sub[count] =  x*  PILLAR_X_SIZE + 0.1;
     dev_y_coors_for_sub[count] =  y*  PILLAR_Y_SIZE + -39.9;
     dev_sparse_pillar_map[y*NUM_INDS_FOR_SCAN + x] = 1;
@@ -194,7 +195,8 @@ PreprocessPointsCuda::PreprocessPointsCuda(const int NUM_THREADS,
                                            const float PILLAR_Z_SIZE,
                                            const float MIN_X_RANGE,
                                            const float MIN_Y_RANGE,
-                                           const float MIN_Z_RANGE)
+                                           const float MIN_Z_RANGE,
+                                           const int NUM_BOX_CORNERS)
 :
 NUM_THREADS_(NUM_THREADS),
 MAX_NUM_PILLARS_(MAX_NUM_PILLARS),
@@ -208,7 +210,8 @@ PILLAR_Y_SIZE_(PILLAR_Y_SIZE),
 PILLAR_Z_SIZE_(PILLAR_Z_SIZE),
 MIN_X_RANGE_(MIN_X_RANGE),
 MIN_Y_RANGE_(MIN_Y_RANGE),
-MIN_Z_RANGE_(MIN_Z_RANGE)
+MIN_Z_RANGE_(MIN_Z_RANGE),
+NUM_BOX_CORNERS_(NUM_BOX_CORNERS)
 {
   GPU_CHECK(cudaMalloc((void**)&dev_pillar_x_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
   GPU_CHECK(cudaMalloc((void**)&dev_pillar_y_in_coors_, GRID_Y_SIZE_*GRID_X_SIZE_*MAX_NUM_POINTS_PER_PILLAR_* sizeof(float)));
@@ -265,7 +268,8 @@ void PreprocessPointsCuda::doPreprocessPointsCuda(const float* dev_points, const
                                         MIN_Z_RANGE_,
                                         PILLAR_X_SIZE_,
                                         PILLAR_Y_SIZE_,
-                                        PILLAR_Z_SIZE_);
+                                        PILLAR_Z_SIZE_,
+                                        NUM_BOX_CORNERS_);
 
 
   make_pillar_index_kernel<<<GRID_X_SIZE_, GRID_Y_SIZE_>>>(

From c59fa712de081abd1a2d69b96546f4b9bcdfec89 Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Thu, 7 Mar 2019 16:14:13 +0900
Subject: [PATCH 33/37] Clarify float number

---
 .../nodes/point_pillars.cpp                   | 38 +++++++++----------
 .../nodes/point_pillars_ros.cpp               |  8 ++--
 .../nodes/preprocess_points.cpp               |  8 ++--
 .../nodes/preprocess_points_cuda.cu           |  4 +-
 4 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
index b3f8d7afa68..72d30f772e8 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
@@ -44,22 +44,22 @@ PointPillars::PointPillars(const bool reproduce_result_mode, const float score_t
   , RPN_BOX_OUTPUT_SIZE_(NUM_ANCHOR_ * 7)
   , RPN_CLS_OUTPUT_SIZE_(NUM_ANCHOR_)
   , RPN_DIR_OUTPUT_SIZE_(NUM_ANCHOR_ * 2)
-  , PILLAR_X_SIZE_(0.16)
-  , PILLAR_Y_SIZE_(0.16)
-  , PILLAR_Z_SIZE_(4.0)
-  , MIN_X_RANGE_(0.0)
-  , MIN_Y_RANGE_(-39.68)
-  , MIN_Z_RANGE_(-3.0)
-  , MAX_X_RANGE_(69.12)
-  , MAX_Y_RANGE_(39.68)
+  , PILLAR_X_SIZE_(0.16f)
+  , PILLAR_Y_SIZE_(0.16f)
+  , PILLAR_Z_SIZE_(4.0f)
+  , MIN_X_RANGE_(0.0f)
+  , MIN_Y_RANGE_(-39.68f)
+  , MIN_Z_RANGE_(-3.0f)
+  , MAX_X_RANGE_(69.12f)
+  , MAX_Y_RANGE_(39.68f)
   , MAX_Z_RANGE_(1)
   , BATCH_SIZE_(1)
   , NUM_INDS_FOR_SCAN_(512)
   , NUM_THREADS_(64) // if you change NUM_THREADS_, need to modify nms_kernel's shared memory size
-  , SENSOR_HEIGHT_(1.73)
-  , ANCHOR_DX_SIZE_(1.6)
-  , ANCHOR_DY_SIZE_(3.9)
-  , ANCHOR_DZ_SIZE_(1.56)
+  , SENSOR_HEIGHT_(1.73f)
+  , ANCHOR_DX_SIZE_(1.6f)
+  , ANCHOR_DY_SIZE_(3.9f)
+  , ANCHOR_DZ_SIZE_(1.56f)
   , NUM_BOX_CORNERS_(4)
   , NUM_OUTPUT_BOX_FEATURE_(7)
 {
@@ -232,7 +232,7 @@ void PointPillars::deviceMemoryMalloc()
   GPU_CHECK(cudaMalloc(&rpn_buffers_[3], RPN_DIR_OUTPUT_SIZE_ * sizeof(float)));
 
   // for scatter kernel
-  GPU_CHECK(cudaMalloc((void**)&dev_scattered_feature_, 64 * GRID_Y_SIZE_ * GRID_X_SIZE_ * sizeof(float)));
+  GPU_CHECK(cudaMalloc((void**)&dev_scattered_feature_, NUM_THREADS_ * GRID_Y_SIZE_ * GRID_X_SIZE_ * sizeof(float)));
 
   // for filter
   GPU_CHECK(cudaMalloc((void**)&dev_anchors_px_, NUM_ANCHOR_ * sizeof(float)));
@@ -292,8 +292,8 @@ void PointPillars::generateAnchors(float* anchors_px_, float* anchors_py_, float
     box_anchors_max_y_[i] = 0;
   }
 
-  float x_stride = PILLAR_X_SIZE_ * 2;
-  float y_stride = PILLAR_Y_SIZE_ * 2;
+  float x_stride = PILLAR_X_SIZE_ * 2.0f;
+  float y_stride = PILLAR_Y_SIZE_ * 2.0f;
   float x_offset = MIN_X_RANGE_ + PILLAR_X_SIZE_;
   float y_offset = MIN_Y_RANGE_ + PILLAR_Y_SIZE_;
 
@@ -373,10 +373,10 @@ void PointPillars::convertAnchors2BoxAnchors(float* anchors_px, float* anchors_p
       for (size_t r = 0; r < NUM_ANCHOR_R_INDS_; r++)
       {
         int ind = x * NUM_ANCHOR_Y_INDS_ * NUM_ANCHOR_R_INDS_ + y * NUM_ANCHOR_R_INDS_ + r;
-        box_anchors_min_x_[ind] = anchors_px[ind] - flipped_anchors_dx[ind] / 2;
-        box_anchors_min_y_[ind] = anchors_py[ind] - flipped_anchors_dy[ind] / 2;
-        box_anchors_max_x_[ind] = anchors_px[ind] + flipped_anchors_dx[ind] / 2;
-        box_anchors_max_y_[ind] = anchors_py[ind] + flipped_anchors_dy[ind] / 2;
+        box_anchors_min_x_[ind] = anchors_px[ind] - flipped_anchors_dx[ind] / 2.0f;
+        box_anchors_min_y_[ind] = anchors_py[ind] - flipped_anchors_dy[ind] / 2.0f;
+        box_anchors_max_x_[ind] = anchors_px[ind] + flipped_anchors_dx[ind] / 2.0f;
+        box_anchors_max_y_[ind] = anchors_py[ind] + flipped_anchors_dy[ind] / 2.0f;
       }
     }
   }
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
index 126ffc63a97..776ce59394a 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars_ros.cpp
@@ -35,8 +35,8 @@ PointPillarsROS::PointPillarsROS()
   , has_subscribed_baselink_(false)
   , NUM_POINT_FEATURE_(4)
   , OUTPUT_NUM_BOX_FEATURE_(7)
-  , TRAINED_SENSOR_HEIGHT_(1.73)
-  , NORMALIZING_INTENSITY_VALUE_(255.0)
+  , TRAINED_SENSOR_HEIGHT_(1.73f)
+  , NORMALIZING_INTENSITY_VALUE_(255.0f)
   , BASELINK_FRAME_("base_link")
 {
   //ros related param
@@ -44,8 +44,8 @@ PointPillarsROS::PointPillarsROS()
 
   //algorithm related params
   private_nh_.param<bool>("reproduce_result_mode", reproduce_result_mode_, false);
-  private_nh_.param<float>("score_threshold", score_threshold_, 0.5);
-  private_nh_.param<float>("nms_overlap_threshold", nms_overlap_threshold_, 0.5);
+  private_nh_.param<float>("score_threshold", score_threshold_, 0.5f);
+  private_nh_.param<float>("nms_overlap_threshold", nms_overlap_threshold_, 0.5f);
   private_nh_.param<std::string>("pfe_onnx_file", pfe_onnx_file_, "");
   private_nh_.param<std::string>("rpn_onnx_file", rpn_onnx_file_, "");
 
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp
index 97c84918210..3180f94d72b 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp
@@ -115,8 +115,8 @@ void PreprocessPoints::preprocess(const float* in_points_array, int in_num_point
       // float x_offset = PILLAR_X_SIZE_/ 2 + MIN_X_RANGE_;
       // TODO Need to be modified after proper trining code
       // Will be modified in ver 1.1
-      y_coors_for_sub[pillar_index] = std::floor(y_coor) * PILLAR_Y_SIZE_ + -39.9;
-      x_coors_for_sub[pillar_index] = std::floor(x_coor) * PILLAR_X_SIZE_ + 0.1;
+      y_coors_for_sub[pillar_index] = std::floor(y_coor) * PILLAR_Y_SIZE_ + -39.9f;
+      x_coors_for_sub[pillar_index] = std::floor(x_coor) * PILLAR_X_SIZE_ + 0.1f;
 
       // sparse pillar map
       sparse_pillar_map[y_coor * NUM_INDS_FOR_SCAN_ + x_coor] = 1;
@@ -143,11 +143,11 @@ void PreprocessPoints::preprocess(const float* in_points_array, int in_num_point
       y_coors_for_sub_shaped[i * MAX_NUM_POINTS_PER_PILLAR_ + j] = y;
       if (j < num_points_for_a_pillar)
       {
-        pillar_feature_mask[i * MAX_NUM_POINTS_PER_PILLAR_ + j] = 1.0;
+        pillar_feature_mask[i * MAX_NUM_POINTS_PER_PILLAR_ + j] = 1.0f;
       }
       else
       {
-        pillar_feature_mask[i * MAX_NUM_POINTS_PER_PILLAR_ + j] = 0.0;
+        pillar_feature_mask[i * MAX_NUM_POINTS_PER_PILLAR_ + j] = 0.0f;
       }
     }
   }
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu
index 89ef5131d6e..343f30ccea1 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points_cuda.cu
@@ -114,8 +114,8 @@ __global__ void make_pillar_index_kernel(
     // y_offset = self.vy / 2 + pc_range[1]
     // x_sub = coors_x.unsqueeze(1) * 0.16 + x_offset
     // y_sub = coors_y.unsqueeze(1) * 0.16 + y_offset
-    dev_x_coors_for_sub[count] =  x*  PILLAR_X_SIZE + 0.1;
-    dev_y_coors_for_sub[count] =  y*  PILLAR_Y_SIZE + -39.9;
+    dev_x_coors_for_sub[count] =  x*  PILLAR_X_SIZE + 0.1f;
+    dev_y_coors_for_sub[count] =  y*  PILLAR_Y_SIZE + -39.9f;
     dev_sparse_pillar_map[y*NUM_INDS_FOR_SCAN + x] = 1;
   }
 }

From 7b9cd43487aa0699e73c4fff5b0f6d1ed656bfbb Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Thu, 7 Mar 2019 16:22:57 +0900
Subject: [PATCH 34/37] Add roslib dependency

---
 .../packages/lidar_point_pillars/CMakeLists.txt               | 2 ++
 .../lidar_detector/packages/lidar_point_pillars/package.xml   | 2 ++
 .../lidar_point_pillars/test/src/test_point_pillars.cpp       | 4 ++++
 3 files changed, 8 insertions(+)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
index 8b0bee0e491..87d2a5728b8 100755
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/CMakeLists.txt
@@ -53,6 +53,7 @@ if(TRT_AVAIL AND CUDA_AVAIL AND CUDNN_AVAIL)
   find_package(autoware_build_flags REQUIRED)
   find_package(catkin REQUIRED COMPONENTS
    roscpp
+   roslib
    pcl_ros
    autoware_msgs
    )
@@ -62,6 +63,7 @@ if(TRT_AVAIL AND CUDA_AVAIL AND CUDNN_AVAIL)
   catkin_package(
    CATKIN_DEPENDS
    roscpp
+   roslib
    pcl_ros
    autoware_msgs
    )
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml
index 88ed828f0a4..8c937c49787 100755
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/package.xml
@@ -11,11 +11,13 @@
     <buildtool_depend>autoware_build_flags</buildtool_depend>
 
     <build_depend>roscpp</build_depend>
+    <build_depend>roslib</build_depend>
     <build_depend>pcl_ros</build_depend>
     <build_depend>autoware_msgs</build_depend>
 
 
     <run_depend>roscpp</run_depend>
+    <run_depend>roslib</run_depend>
     <run_depend>pcl_ros</run_depend>
     <run_depend>autoware_msgs</run_depend>
 
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
index f11ec3d257f..babc6ecb35f 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
@@ -21,14 +21,18 @@
 * @date 2019/02/26
 */
 
+// headers in ROS
 #include <ros/ros.h>
 #include <ros/package.h>
 
+// headers in gtest
 #include <gtest/gtest.h>
 
+//headers in PCL
 #include <pcl/io/pcd_io.h>
 #include <pcl/point_types.h>
 
+//headers in local files
 #include "lidar_point_pillars/point_pillars_ros.h"
 #include "lidar_point_pillars/preprocess_points.h"
 

From d67f2ecfcf32e99635649a614e9b8c9dfcd81659 Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Thu, 7 Mar 2019 16:31:53 +0900
Subject: [PATCH 35/37] Fix typo

---
 .../include/lidar_point_pillars/nms_cuda.h                 | 2 +-
 .../include/lidar_point_pillars/postprocess_cuda.h         | 6 +++---
 .../include/lidar_point_pillars/preprocess_points_cuda.h   | 2 +-
 .../lidar_point_pillars/test/src/test_point_pillars.cpp    | 7 ++++---
 4 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h
index e3395227f2b..ea88b3602aa 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h
@@ -41,7 +41,7 @@ class NMSCuda
 public:
   /**
   * @brief Constructor
-  * @param[in] NUM_THRESD Number of threads for launching cuda kernel
+  * @param[in] NUM_THREADS Number of threads for launching cuda kernel
   * @param[in] NUM_BOX_CORNERS Number of corners for 2D box
   * @param[in] nms_overlap_threshold IOU threshold for NMS
   * @details Captital variables never change after the compile, Non-captital variables could be chaned through rosparam
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/postprocess_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/postprocess_cuda.h
index be32518be09..29de5f97141 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/postprocess_cuda.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/postprocess_cuda.h
@@ -56,11 +56,11 @@ class PostprocessCuda
   * @param[in] NUM_ANCHOR_Y_INDS Number of y-indexes for anchors
   * @param[in] NUM_ANCHOR_R_INDS Number of rotation-indexes for anchors
   * @param[in] score_threshold Score threshold for filtering output
-  * @param[in] NUM_THREDS Number of threads for launching cuda kernel
+  * @param[in] NUM_THREADS Number of threads for launching cuda kernel
   * @param[in] nms_overlap_threshold IOU threshold for NMS
   * @param[in] NUM_BOX_CORNERS Number of box's corner
-  * @param[in] NUM_OUTPUT_BOX_FEATURE_ Number of output box's feature
-  * @details Captital variables never change after the compile, non-capital variables could be chaned through rosparam
+  * @param[in] NUM_OUTPUT_BOX_FEATURE Number of output box's feature
+  * @details Captital variables never change after the compile, non-capital variables could be changed through rosparam
   */
   PostprocessCuda(const float FLOAT_MIN, const float FLOAT_MAX, const int NUM_ANCHOR_X_INDS,
                   const int NUM_ANCHOR_Y_INDS, const int NUM_ANCHOR_R_INDS, const float score_threshold,
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h
index cb79f0bc29c..92cc3b6eb1e 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h
@@ -58,7 +58,7 @@ class PreprocessPointsCuda
 public:
   /**
   * @brief Constructor
-  * @param[in] NUM_THREDS Number of threads for launching cuda kernel
+  * @param[in] NUM_THREADS Number of threads for launching cuda kernel
   * @param[in] MAX_NUM_PILLARS Maximum number of pillars
   * @param[in] MAX_POINTS_PER_PILLAR Maximum number of points per pillar
   * @param[in] NUM_INDS_FOR_SCAN Number of indexes for scan(cumsum)
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
index babc6ecb35f..70b3a4fa0ae 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/test/src/test_point_pillars.cpp
@@ -68,7 +68,8 @@ class TestClass
   const int NUM_INDS_FOR_SCAN_;
   const int NUM_BOX_CORNERS_;
 
-  void initPoints(pcl::PointCloud<pcl::PointXYZI>::Ptr in_pcl_pc_ptr);
+  // Make pointcloud for test
+  void makePointsForTest(pcl::PointCloud<pcl::PointXYZI>::Ptr in_pcl_pc_ptr);
   void pclToArray(const pcl::PointCloud<pcl::PointXYZI>::Ptr& in_pcl_pc_ptr, float* out_points_array);
   void preprocess(const float* in_points_array, int in_num_points, int* x_coors, int* y_coors,
                   float* num_points_per_pillar, float* pillar_x, float* pillar_y, float* pillar_z, float* pillar_i,
@@ -142,7 +143,7 @@ void TestClass::pclToArray(const pcl::PointCloud<pcl::PointXYZI>::Ptr& in_pcl_pc
   }
 }
 
-void TestClass::initPoints(pcl::PointCloud<pcl::PointXYZI>::Ptr in_pcl_pc_ptr)
+void TestClass::makePointsForTest(pcl::PointCloud<pcl::PointXYZI>::Ptr in_pcl_pc_ptr)
 {
   pcl::PointXYZI point;
   point.x = 12.9892;
@@ -233,7 +234,7 @@ TEST(TestSuite, CheckPreprocessPointsCPU)
                      NUM_BOX_CORNERS);
 
   pcl::PointCloud<pcl::PointXYZI>::Ptr pcl_pc_ptr(new pcl::PointCloud<pcl::PointXYZI>);
-  test_obj.initPoints(pcl_pc_ptr);
+  test_obj.makePointsForTest(pcl_pc_ptr);
 
   float* points_array = new float[pcl_pc_ptr->size() * 4];
   test_obj.pclToArray(pcl_pc_ptr, points_array);

From 816ba2d17a101db17e928e39234bc4812cda03e0 Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Fri, 8 Mar 2019 10:47:38 +0900
Subject: [PATCH 36/37] Modify comment

---
 .../lidar_point_pillars/anchor_mask_cuda.h    | 18 +++++-----
 .../include/lidar_point_pillars/nms_cuda.h    |  8 ++---
 .../lidar_point_pillars/point_pillars.h       | 32 ++++++++---------
 .../lidar_point_pillars/point_pillars_ros.h   |  2 +-
 .../lidar_point_pillars/postprocess_cuda.h    | 36 +++++++++----------
 .../lidar_point_pillars/preprocess_points.h   | 32 ++++++++---------
 .../preprocess_points_cuda.h                  | 20 +++++------
 .../lidar_point_pillars/nodes/nms_cuda.cu     |  1 -
 .../nodes/point_pillars.cpp                   |  6 ++--
 .../nodes/preprocess_points.cpp               |  2 --
 .../sensing/drivers/lidar/packages/robosense  |  2 +-
 11 files changed, 78 insertions(+), 81 deletions(-)

diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/anchor_mask_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/anchor_mask_cuda.h
index 6a8b555422c..255ac480bb5 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/anchor_mask_cuda.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/anchor_mask_cuda.h
@@ -60,15 +60,15 @@ class AnchorMaskCuda
   /**
   * @brief call cuda code for making anchor mask
   * @param[in] dev_sparse_pillar_map Grid map representation for pillar occupancy
-  * @param[in] dev_cumsum_along_x Cumsum dev_sparse_pillar_map along x axis
-  * @param[in] dev_cumsum_along_y Cumsum dev_cumsum_along_x along y axis
-  * @param[in] dev_box_anchors_min_x Array for storng min x value for each anchor
-  * @param[in] dev_box_anchors_min_y Array for storng min y value for each anchor
-  * @param[in] dev_box_anchors_max_x Array for storng max x value for each anchor
-  * @param[in] dev_box_anchors_max_y Array for storng max y value for each anchor
-  * @param[in] dev_box_anchors_max_y Array for storng max y value for each anchor
-  * @param[out] dev_anchor_mask Anchor mask for filtering output
-  * @details dev_* means device memory. Make a mask for activating pillar occupancy area
+  * @param[in] dev_cumsum_along_x Array for storing cumsum-ed dev_sparse_pillar_map values
+  * @param[in] dev_cumsum_along_y Array for storing cumsum-ed dev_cumsum_along_y values
+  * @param[in] dev_box_anchors_min_x Array for storing min x value for each anchor
+  * @param[in] dev_box_anchors_min_y Array for storing min y value for each anchor
+  * @param[in] dev_box_anchors_max_x Array for storing max x value for each anchor
+  * @param[in] dev_box_anchors_max_y Array for storing max y value for each anchor
+  * @param[in] dev_box_anchors_max_y Array for storing max y value for each anchor
+  * @param[out] dev_anchor_mask Anchor mask for filtering the network output
+  * @details dev_* means device memory. Make a mask for filtering pillar occupancy area
   */
   void doAnchorMaskCuda(int* dev_sparse_pillar_map, int* dev_cumsum_along_x, int* dev_cumsum_along_y,
                         const float* dev_box_anchors_min_x, const float* dev_box_anchors_min_y,
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h
index ea88b3602aa..bf4c3501ba1 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/nms_cuda.h
@@ -41,7 +41,7 @@ class NMSCuda
 public:
   /**
   * @brief Constructor
-  * @param[in] NUM_THREADS Number of threads for launching cuda kernel
+  * @param[in] NUM_THREADS Number of threads when launching cuda kernel
   * @param[in] NUM_BOX_CORNERS Number of corners for 2D box
   * @param[in] nms_overlap_threshold IOU threshold for NMS
   * @details Captital variables never change after the compile, Non-captital variables could be chaned through rosparam
@@ -52,9 +52,9 @@ class NMSCuda
   * @brief GPU Non-Maximum Suppresion for network output
   * @param[in] host_filter_count Number of filtered output
   * @param[in] dev_sorted_box_for_nms Bounding box output sorted by score
-  * @param[out] out_keep_inds Indexes for selected bounding box
-  * @param[out] out_num_to_keep Number of keep bounding box
-  * @details Includes CUDA NMS and postprocessing for selecting box in CPU
+  * @param[out] out_keep_inds Indexes of selected bounding box
+  * @param[out] out_num_to_keep Number of kept bounding boxes
+  * @details NMS in GPU and postprocessing for selecting box in CPU
   */
   void doNMSCuda(const int host_filter_count, float* dev_sorted_box_for_nms, int* out_keep_inds, int& out_num_to_keep);
 };
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars.h
index ea9a0549201..249a8da4245 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars.h
@@ -205,31 +205,31 @@ class PointPillars
 
   /**
   * @brief Memory allocation for device memory
-  * @details Call in the constructor
+  * @details Called in the constructor
   */
   void deviceMemoryMalloc();
 
   /**
-  * @brief Initialize anchor
-  * @details Call in the constructor
+  * @brief Initializing anchor
+  * @details Called in the constructor
   */
   void initAnchors();
 
   /**
-  * @brief Initialize TensorRT isntances
-  * @details Call in the constructor
+  * @brief Initializing TensorRT instances
+  * @details Called in the constructor
   */
   void initTRT();
 
   /**
   * @brief Generate anchors
-  * @param[in] anchors_px_ Represents x-coordinate value for corresponding anchor
-  * @param[in] anchors_py_ Represents y-coordinate value for corresponding anchor
-  * @param[in] anchors_pz_ Represents z-coordinate value for corresponding anchor
-  * @param[in] anchors_dx_ Represents x-dimension value for corresponding anchor
-  * @param[in] anchors_dy_ Represents y-dimension value for corresponding anchor
-  * @param[in] anchors_dz_ Represents z-dimension value for corresponding anchor
-  * @param[in] anchors_ro_ Represents rotation value for corresponding anchor
+  * @param[in] anchors_px_ Represents x-coordinate values for a corresponding anchor
+  * @param[in] anchors_py_ Represents y-coordinate values for a corresponding anchor
+  * @param[in] anchors_pz_ Represents z-coordinate values for a corresponding anchor
+  * @param[in] anchors_dx_ Represents x-dimension values for a corresponding anchor
+  * @param[in] anchors_dy_ Represents y-dimension values for a corresponding anchor
+  * @param[in] anchors_dz_ Represents z-dimension values for a corresponding anchor
+  * @param[in] anchors_ro_ Represents rotation values for a corresponding anchor
   * @details Generate anchors for each grid
   */
   void generateAnchors(float* anchors_px_, float* anchors_py_, float* anchors_pz_, float* anchors_dx_,
@@ -255,7 +255,7 @@ class PointPillars
   * @brief Preproces by CPU
   * @param[in] in_points_array pointcloud array
   * @param[in] in_num_points Number of points
-  * @details The output from CPU preprocess is reproducible
+  * @details The output from preprocessCPU is reproducible, while preprocessGPU is not
   */
   void preprocessCPU(const float* in_points_array, const int in_num_points);
 
@@ -263,7 +263,7 @@ class PointPillars
   * @brief Preproces by GPU
   * @param[in] in_points_array pointcloud array
   * @param[in] in_num_points Number of points
-  * @details Fast preprocess comapared with CPU preprocess
+  * @details Faster preprocess comapared with CPU preprocess
   */
   void preprocessGPU(const float* in_points_array, const int in_num_points);
 
@@ -305,10 +305,10 @@ class PointPillars
 
   /**
   * @brief Call PointPillars for the inference
-  * @param[in] in_points_array pointcloud array
+  * @param[in] in_points_array Pointcloud array
   * @param[in] in_num_points Number of points
   * @param[in] out_detections Network output bounding box
-  * @details This is interface for the algorithm
+  * @details This is an interface for the algorithm
   */
   void doInference(const float* in_points_array, const int in_num_points, std::vector<float>& out_detections);
 };
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars_ros.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars_ros.h
index b2c18c4a36b..7484b10c36c 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars_ros.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/point_pillars_ros.h
@@ -78,7 +78,7 @@ class PointPillarsROS
 
   /**
   * @brief Get base_link to lidar transformation
-  * @param[in] target_frameid target lidar frame_id
+  * @param[in] target_frameid Name of lidar frame_id to be targeted
   * @details Get transformation info
   */
   void getBaselinkToLidarTF(const std::string& target_frameid);
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/postprocess_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/postprocess_cuda.h
index 29de5f97141..61f2985562f 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/postprocess_cuda.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/postprocess_cuda.h
@@ -50,13 +50,13 @@ class PostprocessCuda
 public:
   /**
   * @brief Constructor
-  * @param[in] FLOAT_MIN the lowest float value
-  * @param[in] FLOAT_MAX the maximum float value
+  * @param[in] FLOAT_MIN The lowest float value
+  * @param[in] FLOAT_MAX The maximum float value
   * @param[in] NUM_ANCHOR_X_INDS Number of x-indexes for anchors
   * @param[in] NUM_ANCHOR_Y_INDS Number of y-indexes for anchors
   * @param[in] NUM_ANCHOR_R_INDS Number of rotation-indexes for anchors
   * @param[in] score_threshold Score threshold for filtering output
-  * @param[in] NUM_THREADS Number of threads for launching cuda kernel
+  * @param[in] NUM_THREADS Number of threads when launching cuda kernel
   * @param[in] nms_overlap_threshold IOU threshold for NMS
   * @param[in] NUM_BOX_CORNERS Number of box's corner
   * @param[in] NUM_OUTPUT_BOX_FEATURE Number of output box's feature
@@ -69,21 +69,21 @@ class PostprocessCuda
 
   /**
   * @brief Postprocessing for the network output
-  * @param[in] rpn_box_output Box prediction from the network output
-  * @param[in] rpn_cls_output Class prediction from the network output
-  * @param[in] rpn_dir_output Direction prediction from the network output
-  * @param[in] dev_anchor_mask Anchor mask for filtering
-  * @param[in] dev_anchors_px X-coordinate value for corresponding anchor
-  * @param[in] dev_anchors_py Y-coordinate value for corresponding anchor
-  * @param[in] dev_anchors_pz Z-coordinate value for corresponding anchor
-  * @param[in] dev_anchors_dx X-dimension value for corresponding anchor
-  * @param[in] dev_anchors_dy Y-dimension value for corresponding anchor
-  * @param[in] dev_anchors_dz Z-dimension value for corresponding anchor
-  * @param[in] dev_anchors_ro Rotation value for corresponding anchor
-  * @param[in] dev_filtered_box Filtered box prediction
-  * @param[in] dev_filtered_score Filtered score prediction
-  * @param[in] dev_filtered_dir Filtered direction prediction
-  * @param[in] dev_box_for_nms Decoded box from pose and dimension to min_x min_y max_x max_y represenation for nms
+  * @param[in] rpn_box_output Box predictions from the network output
+  * @param[in] rpn_cls_output Class predictions from the network output
+  * @param[in] rpn_dir_output Direction predictions from the network output
+  * @param[in] dev_anchor_mask Anchor mask for filtering the network output
+  * @param[in] dev_anchors_px X-coordinate values for corresponding anchors
+  * @param[in] dev_anchors_py Y-coordinate values for corresponding anchors
+  * @param[in] dev_anchors_pz Z-coordinate values for corresponding anchors
+  * @param[in] dev_anchors_dx X-dimension values for corresponding anchors
+  * @param[in] dev_anchors_dy Y-dimension values for corresponding anchors
+  * @param[in] dev_anchors_dz Z-dimension values for corresponding anchors
+  * @param[in] dev_anchors_ro Rotation values for corresponding anchors
+  * @param[in] dev_filtered_box Filtered box predictions
+  * @param[in] dev_filtered_score Filtered score predictions
+  * @param[in] dev_filtered_dir Filtered direction predictions
+  * @param[in] dev_box_for_nms Decoded boxes in min_x min_y max_x max_y represenation from pose and dimension
   * @param[in] dev_filter_count The number of filtered output
   * @param[out] out_detection Output bounding boxes
   * @details dev_* represents device memory allocated variables
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points.h
index b8b361db22e..b09aea1e42e 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points.h
@@ -72,14 +72,14 @@ class PreprocessPoints
   * @param[in] x_coors X-coordinate indexes for corresponding pillars
   * @param[in] y_coors Y-coordinate indexes for corresponding pillars
   * @param[in] num_points_per_pillar Number of points in corresponding pillars
-  * @param[in] pillar_x X-coordinate value for points in each pillar
-  * @param[in] pillar_y Y-coordinate value for points in each pillar
-  * @param[in] pillar_z Z-coordinate value for points in each pillar
-  * @param[in] pillar_i Intensity value for points in each pillar
-  * @param[in] x_coors_for_sub_shaped Used for x substraction in the network
-  * @param[in] y_coors_for_sub_shaped Used for y substraction in the network
-  * @param[in] pillar_feature_mask Mask to make sure pillars' feature equal to zero where no points in the pillars
-  * @param[in] sparse_pillar_map Grid map represents occupancy pillars
+  * @param[in] pillar_x X-coordinate values for points in each pillar
+  * @param[in] pillar_y Y-coordinate values for points in each pillar
+  * @param[in] pillar_z Z-coordinate values for points in each pillar
+  * @param[in] pillar_i Intensity values for points in each pillar
+  * @param[in] x_coors_for_sub_shaped Array for x substraction in the network
+  * @param[in] y_coors_for_sub_shaped Array for y substraction in the network
+  * @param[in] pillar_feature_mask Mask to make pillars' feature zero where no points in the pillars
+  * @param[in] sparse_pillar_map Grid map representation for pillar-occupancy
   * @param[in] host_pillar_count The numnber of valid pillars for the input pointcloud
   * @details Convert pointcloud to pillar representation
   */
@@ -90,14 +90,14 @@ class PreprocessPoints
 
   /**
   * @brief Initializing variables for preprocessing
-  * @param[in] coor_to_pillaridx Map for converting from one set of coordinate to a pillar
-  * @param[in] sparse_pillar_map Grid map represents occupancy pillars
-  * @param[in] pillar_x X-coordinate value for points in each pillar
-  * @param[in] pillar_y Y-coordinate value for points in each pillar
-  * @param[in] pillar_z Z-coordinate value for points in each pillar
-  * @param[in] pillar_i Intensity value for points in each pillar
-  * @param[in] x_coors_for_sub_shaped Used for x substraction in the network
-  * @param[in] y_coors_for_sub_shaped Used for y substraction in the network
+  * @param[in] coor_to_pillaridx Map for converting one set of coordinate to a pillar
+  * @param[in] sparse_pillar_map Grid map representation for pillar-occupancy
+  * @param[in] pillar_x X-coordinate values for points in each pillar
+  * @param[in] pillar_y Y-coordinate values for points in each pillar
+  * @param[in] pillar_z Z-coordinate values for points in each pillar
+  * @param[in] pillar_i Intensity values for points in each pillar
+  * @param[in] x_coors_for_sub_shaped Array for x substraction in the network
+  * @param[in] y_coors_for_sub_shaped Array for y substraction in the network
   * @details Initializeing input arguments with certain values
   */
   void initializeVariables(int* coor_to_pillaridx, float* sparse_pillar_map, float* pillar_x, float* pillar_y,
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h
index 92cc3b6eb1e..67df868e3f9 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/include/lidar_point_pillars/preprocess_points_cuda.h
@@ -58,7 +58,7 @@ class PreprocessPointsCuda
 public:
   /**
   * @brief Constructor
-  * @param[in] NUM_THREADS Number of threads for launching cuda kernel
+  * @param[in] NUM_THREADS Number of threads when launching cuda kernel
   * @param[in] MAX_NUM_PILLARS Maximum number of pillars
   * @param[in] MAX_POINTS_PER_PILLAR Maximum number of points per pillar
   * @param[in] NUM_INDS_FOR_SCAN Number of indexes for scan(cumsum)
@@ -87,15 +87,15 @@ class PreprocessPointsCuda
   * @param[in] dev_x_coors X-coordinate indexes for corresponding pillars
   * @param[in] dev_y_coors Y-coordinate indexes for corresponding pillars
   * @param[in] dev_num_points_per_pillar Number of points in corresponding pillars
-  * @param[in] dev_pillar_x X-coordinate value for points in each pillar
-  * @param[in] dev_pillar_y Y-coordinate value for points in each pillar
-  * @param[in] dev_pillar_z Z-coordinate value for points in each pillar
-  * @param[in] dev_pillar_i Intensity value for points in each pillar
-  * @param[in] dev_x_coors_for_sub_shaped Used for x substraction in the network
-  * @param[in] dev_y_coors_for_sub_shaped Used for y substraction in the network
-  * @param[in] dev_pillar_feature_mask Mask to make sure pillars' feature equal to zero where no points in the pillars
-  * @param[in] dev_sparse_pillar_map Grid map represents occupancy pillars
-  * @param[in] host_pillar_count The numnber of valid pillars for the input pointcloud
+  * @param[in] dev_pillar_x X-coordinate values for points in each pillar
+  * @param[in] dev_pillar_y Y-coordinate values for points in each pillar
+  * @param[in] dev_pillar_z Z-coordinate values for points in each pillar
+  * @param[in] dev_pillar_i Intensity values for points in each pillar
+  * @param[in] dev_x_coors_for_sub_shaped Array for x substraction in the network
+  * @param[in] dev_y_coors_for_sub_shaped Array for y substraction in the network
+  * @param[in] dev_pillar_feature_mask Mask to make pillars' feature zero where no points in the pillars
+  * @param[in] dev_sparse_pillar_map Grid map representation for pillar-occupancy
+  * @param[in] host_pillar_count The numnber of valid pillars for an input pointcloud
   * @details Convert pointcloud to pillar representation
   */
   void doPreprocessPointsCuda(const float* dev_points, const int in_num_points, int* dev_x_coors, int* dev_y_coors,
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu
index 3f52973e810..4bb01ecac3d 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/nms_cuda.cu
@@ -27,7 +27,6 @@ __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
   const int row_start = blockIdx.y;
   const int col_start = blockIdx.x;
 
-  // if (row_start > col_start) return;
   const int block_threads = blockDim.x;
 
   const int row_size =
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
index 72d30f772e8..52fa314bf85 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/point_pillars.cpp
@@ -55,7 +55,7 @@ PointPillars::PointPillars(const bool reproduce_result_mode, const float score_t
   , MAX_Z_RANGE_(1)
   , BATCH_SIZE_(1)
   , NUM_INDS_FOR_SCAN_(512)
-  , NUM_THREADS_(64) // if you change NUM_THREADS_, need to modify nms_kernel's shared memory size
+  , NUM_THREADS_(64) // if you change NUM_THREADS_, need to modify NUM_THREADS_MACRO in common.h
   , SENSOR_HEIGHT_(1.73f)
   , ANCHOR_DX_SIZE_(1.6f)
   , ANCHOR_DY_SIZE_(3.9f)
@@ -263,7 +263,7 @@ void PointPillars::initAnchors()
   box_anchors_min_y_ = new float[NUM_ANCHOR_];
   box_anchors_max_x_ = new float[NUM_ANCHOR_];
   box_anchors_max_y_ = new float[NUM_ANCHOR_];
-  // deallocate these memory in deconstructor
+  // deallocate these memories in deconstructor
 
   generateAnchors(anchors_px_, anchors_py_, anchors_pz_, anchors_dx_, anchors_dy_, anchors_dz_, anchors_ro_);
 
@@ -352,7 +352,7 @@ void PointPillars::convertAnchors2BoxAnchors(float* anchors_px, float* anchors_p
                                              float* box_anchors_min_x_, float* box_anchors_min_y_,
                                              float* box_anchors_max_x_, float* box_anchors_max_y_)
 {
-  // flip box's dimension when the third axis == 1
+  // flipping box's dimension
   float flipped_anchors_dx[NUM_ANCHOR_] = { 0 };
   float flipped_anchors_dy[NUM_ANCHOR_] = { 0 };
   for (size_t x = 0; x < NUM_ANCHOR_X_INDS_; x++)
diff --git a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp
index 3180f94d72b..82b6fd46d68 100644
--- a/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp
+++ b/ros/src/computing/perception/detection/lidar_detector/packages/lidar_point_pillars/nodes/preprocess_points.cpp
@@ -107,7 +107,6 @@ void PreprocessPoints::preprocess(const float* in_points_array, int in_num_point
       pillar_count += 1;
       coor_to_pillaridx[y_coor * GRID_X_SIZE_ + x_coor] = pillar_index;
 
-      // reverse index
       y_coors[pillar_index] = std::floor(y_coor);
       x_coors[pillar_index] = std::floor(x_coor);
 
@@ -118,7 +117,6 @@ void PreprocessPoints::preprocess(const float* in_points_array, int in_num_point
       y_coors_for_sub[pillar_index] = std::floor(y_coor) * PILLAR_Y_SIZE_ + -39.9f;
       x_coors_for_sub[pillar_index] = std::floor(x_coor) * PILLAR_X_SIZE_ + 0.1f;
 
-      // sparse pillar map
       sparse_pillar_map[y_coor * NUM_INDS_FOR_SCAN_ + x_coor] = 1;
     }
     int num = num_points_per_pillar[pillar_index];
diff --git a/ros/src/sensing/drivers/lidar/packages/robosense b/ros/src/sensing/drivers/lidar/packages/robosense
index 4a284cc1ba5..038e5f65243 160000
--- a/ros/src/sensing/drivers/lidar/packages/robosense
+++ b/ros/src/sensing/drivers/lidar/packages/robosense
@@ -1 +1 @@
-Subproject commit 4a284cc1ba595a7ad0679c2e5978975189a33d51
+Subproject commit 038e5f6524305d0d287805fdc9029872627ac7de

From 4b6b0ac618e583d579afb415d563ce19d0d6824b Mon Sep 17 00:00:00 2001
From: Kosuke Murakami <cirpue49@yahoo.co.jp>
Date: Fri, 8 Mar 2019 11:20:29 +0900
Subject: [PATCH 37/37] revert robosense submodule

---
 ros/src/sensing/drivers/lidar/packages/robosense | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ros/src/sensing/drivers/lidar/packages/robosense b/ros/src/sensing/drivers/lidar/packages/robosense
index 038e5f65243..4a284cc1ba5 160000
--- a/ros/src/sensing/drivers/lidar/packages/robosense
+++ b/ros/src/sensing/drivers/lidar/packages/robosense
@@ -1 +1 @@
-Subproject commit 038e5f6524305d0d287805fdc9029872627ac7de
+Subproject commit 4a284cc1ba595a7ad0679c2e5978975189a33d51