diff --git a/include/caffe/layers/box_annotator_ohem_layer.hpp b/include/caffe/layers/box_annotator_ohem_layer.hpp new file mode 100644 index 00000000000..17d1cf90209 --- /dev/null +++ b/include/caffe/layers/box_annotator_ohem_layer.hpp @@ -0,0 +1,57 @@ +#ifndef CAFFE_BOX_ANNOTATOR_OHEM_LAYER_HPP_ +#define CAFFE_BOX_ANNOTATOR_OHEM_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/loss_layer.hpp" + +namespace caffe { + + /** + * @brief BoxAnnotatorOHEMLayer: Annotate box labels for Online Hard Example Mining (OHEM) training + * R-FCN + * Written by Yi Li + */ + template + class BoxAnnotatorOHEMLayer :public Layer{ + public: + explicit BoxAnnotatorOHEMLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "BoxAnnotatorOHEM"; } + + virtual inline int ExactNumBottomBlobs() const { return 4; } + virtual inline int ExactNumTopBlobs() const { return 2; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + int num_; + int height_; + int width_; + int spatial_dim_; + int bbox_channels_; + + int roi_per_img_; + int ignore_label_; + }; + +} // namespace caffe + +#endif // CAFFE_BOX_ANNOTATOR_OHEM_LAYER_HPP_ diff --git a/src/caffe/layers/box_annotator_ohem_layer.cpp b/src/caffe/layers/box_annotator_ohem_layer.cpp new file mode 100644 index 00000000000..a9216a8c7c0 --- /dev/null +++ b/src/caffe/layers/box_annotator_ohem_layer.cpp @@ -0,0 +1,87 @@ +// ------------------------------------------------------------------ +// R-FCN +// Written by Yi Li +// ------------------------------------------------------------------ + +#include + +#include +#include +#include + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/layer.hpp" +#include "caffe/layers/box_annotator_ohem_layer.hpp" +#include "caffe/proto/caffe.pb.h" + +using std::max; +using std::min; +using std::floor; +using std::ceil; + +namespace caffe { + + template + void BoxAnnotatorOHEMLayer::LayerSetUp( + const vector*>& bottom, + const vector*>& top) { + BoxAnnotatorOHEMParameter box_anno_param = + this->layer_param_.box_annotator_ohem_param(); + roi_per_img_ = box_anno_param.roi_per_img(); + CHECK_GT(roi_per_img_, 0); + ignore_label_ = box_anno_param.ignore_label(); + } + + template + void BoxAnnotatorOHEMLayer::Reshape(const vector*>& bottom, + const vector*>& top) { + num_ = bottom[0]->num(); + CHECK_EQ(5, bottom[0]->channels()); + height_ = bottom[0]->height(); + width_ = bottom[0]->width(); + spatial_dim_ = height_*width_; + + CHECK_EQ(bottom[1]->num(), num_); + CHECK_EQ(bottom[1]->channels(), 1); + CHECK_EQ(bottom[1]->height(), height_); + CHECK_EQ(bottom[1]->width(), width_); + + CHECK_EQ(bottom[2]->num(), num_); + CHECK_EQ(bottom[2]->channels(), 1); + CHECK_EQ(bottom[2]->height(), height_); + CHECK_EQ(bottom[2]->width(), width_); + + CHECK_EQ(bottom[3]->num(), num_); + bbox_channels_ = bottom[3]->channels(); + CHECK_EQ(bottom[3]->height(), height_); + CHECK_EQ(bottom[3]->width(), width_); + + // Labels for scoring + top[0]->Reshape(num_, 1, height_, width_); + // Loss weights for bbox regression + top[1]->Reshape(num_, bbox_channels_, height_, width_); + } + + template + void BoxAnnotatorOHEMLayer::Forward_cpu( + const vector*>& bottom, const vector*>& top) { + NOT_IMPLEMENTED; + } + + template + void BoxAnnotatorOHEMLayer::Backward_cpu( + const vector*>& top, const vector& propagate_down, + const vector*>& bottom) { + NOT_IMPLEMENTED; + } + + +#ifdef CPU_ONLY + STUB_GPU(BoxAnnotatorOHEMLayer); +#endif + + INSTANTIATE_CLASS(BoxAnnotatorOHEMLayer); + REGISTER_LAYER_CLASS(BoxAnnotatorOHEM); + +} // namespace caffe diff --git a/src/caffe/layers/box_annotator_ohem_layer.cu b/src/caffe/layers/box_annotator_ohem_layer.cu new file mode 100644 index 00000000000..400c0cc89e0 --- /dev/null +++ b/src/caffe/layers/box_annotator_ohem_layer.cu @@ -0,0 +1,82 @@ +// ------------------------------------------------------------------ +// R-FCN +// Written by Yi Li +// ------------------------------------------------------------------ + +#include +#include +#include + +#include "caffe/layers/box_annotator_ohem_layer.hpp" + +using std::max; +using std::min; + +namespace caffe { + template + void BoxAnnotatorOHEMLayer::Forward_gpu( + const vector*>& bottom, + const vector*>& top) { + const Dtype* bottom_rois = bottom[0]->cpu_data(); + const Dtype* bottom_loss = bottom[1]->cpu_data(); + const Dtype* bottom_labels = bottom[2]->cpu_data(); + const Dtype* bottom_bbox_loss_weights = bottom[3]->cpu_data(); + Dtype* top_labels = top[0]->mutable_cpu_data(); + Dtype* top_bbox_loss_weights = top[1]->mutable_cpu_data(); + caffe_set(top[0]->count(), Dtype(ignore_label_), top_labels); + caffe_set(top[1]->count(), Dtype(0), top_bbox_loss_weights); + + int num_rois_ = bottom[1]->count(); + + int num_imgs = -1; + for (int n = 0; n < num_rois_; n++) { + for (int s = 0; s < spatial_dim_; s++) { + num_imgs = bottom_rois[0] > num_imgs ? bottom_rois[0] : num_imgs; + bottom_rois++; + } + bottom_rois += (5-1)*spatial_dim_; + } + num_imgs++; + CHECK_GT(num_imgs, 0) + << "number of images must be greater than 0 at BoxAnnotatorOHEMLayer"; + bottom_rois = bottom[0]->cpu_data(); + + // Find rois with max loss + vector sorted_idx(num_rois_); + for (int i = 0; i < num_rois_; i++) { + sorted_idx[i] = i; + } + std::sort(sorted_idx.begin(), sorted_idx.end(), + [bottom_loss](int i1, int i2) { + return bottom_loss[i1] > bottom_loss[i2]; + }); + + // Generate output labels for scoring and loss_weights for bbox regression + vector number_left(num_imgs, roi_per_img_); + for (int i = 0; i < num_rois_; i++) { + int index = sorted_idx[i]; + int s = index % (width_*height_); + int n = index / (width_*height_); + int batch_ind = bottom_rois[n*5*spatial_dim_+s]; + if (number_left[batch_ind] > 0) { + number_left[batch_ind]--; + top_labels[index] = bottom_labels[index]; + for (int j = 0; j < bbox_channels_; j++) { + int bbox_index = (n*bbox_channels_+j)*spatial_dim_+s; + top_bbox_loss_weights[bbox_index] = + bottom_bbox_loss_weights[bbox_index]; + } + } + } + } + + template + void BoxAnnotatorOHEMLayer::Backward_gpu( + const vector*>& top, const vector& propagate_down, + const vector*>& bottom) { + return; + } + + INSTANTIATE_LAYER_GPU_FUNCS(BoxAnnotatorOHEMLayer); + +} // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 0a77e3ce6db..642472679e0 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -363,7 +363,7 @@ message LayerParameter { optional AccuracyParameter accuracy_param = 102; optional ArgMaxParameter argmax_param = 103; optional BatchNormParameter batch_norm_param = 139; - // optional BoxAnnotatorOHEMParameter box_annotator_ohem_param = 150; + optional BoxAnnotatorOHEMParameter box_annotator_ohem_param = 150; optional BiasParameter bias_param = 141; optional ConcatParameter concat_param = 104; optional ContrastiveLossParameter contrastive_loss_param = 105; @@ -531,10 +531,10 @@ message BatchNormParameter { optional float eps = 3 [default = 1e-5]; } -// message BoxAnnotatorOHEMParameter { -// required uint32 roi_per_img = 1; // number of rois for training -// optional int32 ignore_label = 2 [default = -1]; // ignore_label in scoring -// } +message BoxAnnotatorOHEMParameter { + required uint32 roi_per_img = 1; // number of rois for training + optional int32 ignore_label = 2 [default = -1]; // ignore_label in scoring +} message BiasParameter { // The first axis of bottom[0] (the first input Blob) along which to apply