Skip to content

Commit

Permalink
standardize memory optimization configurations (BVLC#99)
Browse files Browse the repository at this point in the history
* improvise memory opt configs

* take care of share data with excluded blob
  • Loading branch information
yjxiong authored Aug 11, 2016
1 parent f971cdb commit 4506029
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 18 deletions.
2 changes: 2 additions & 0 deletions include/caffe/net.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,9 @@ class Net {
/// Whether to compute and display debug info for the net.
bool debug_info_;

/// Memory optimization related stuff.
vector< shared_ptr<SyncedMemory> > shared_storage_;
std::set<string> excluded_blob_names_;

DISABLE_COPY_AND_ASSIGN(Net);
};
Expand Down
48 changes: 38 additions & 10 deletions src/caffe/net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
for (int input_id = 0; input_id < param.input_size(); ++input_id) {
const int layer_id = -1; // inputs have fake layer ID -1
AppendTop(param, layer_id, input_id, &available_blobs, &blob_name_to_idx);

// input blobs are excluded from memory optimization by default
excluded_blob_names_.insert(param.input(input_id));
}
DLOG(INFO) << "Memory required for data: " << memory_used_ * sizeof(Dtype);
// For each layer, set up its input and output
Expand Down Expand Up @@ -282,6 +285,9 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
LOG(INFO) << "This network produces output " << *it;
net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get());
net_output_blob_indices_.push_back(blob_name_to_idx[*it]);

// add output blob name to default excluded blobs
excluded_blob_names_.insert(*it);
}
for (size_t blob_id = 0; blob_id < blob_names_.size(); ++blob_id) {
blob_names_index_[blob_names_[blob_id]] = blob_id;
Expand All @@ -295,10 +301,16 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
LOG(INFO) << "Memory required for data: " << memory_used_ * sizeof(Dtype);

// optimize memory
const NetParameter_OptimizeMem om = param.optimize_mem();
const bool need_optimze_mem =
(om == NetParameter_OptimizeMem_TRAIN_ONLY && phase_ == TRAIN)
|| (om == NetParameter_OptimizeMem_ALL_OPTIM);
(param.mem_param().optimize_train() && phase_ == TRAIN)
|| (param.mem_param().optimize_test() && phase_ == TEST);

// add additional specified blobs to the exclusion list
for (int ex_id = 0; ex_id < param.mem_param().exclude_blob_size(); ++ex_id){
excluded_blob_names_.insert(param.mem_param().exclude_blob(ex_id));
}

// launch memory optimization if necessary
if (!debug_info_ && need_optimze_mem) {
MemoryOptimize();
}
Expand Down Expand Up @@ -1036,6 +1048,10 @@ int FindSlot(vector<SlotMeta>& slot_vec, const string& key){
return -1;
}

inline bool check_exclude(const std::set<string>& exclude_list, const string& blob_name){
return exclude_list.find(blob_name) != exclude_list.end();
}

template <typename Dtype>
void Net<Dtype>::MemoryOptimize() {
// Dry run phase
Expand All @@ -1046,13 +1062,15 @@ void Net<Dtype>::MemoryOptimize() {

// Forward pass, try to reuse blobs' data memory
for (int i = 0; i < layers_.size(); ++i) {
if (layers_[i]->layer_param().no_mem_opt()) continue;
const vector<Blob<Dtype>* >& layer_top = top_vecs_[i];
const vector<Blob<Dtype>* >& layer_bottom = bottom_vecs_[i];
LOG(INFO) << "layer " << i << " " << layer_names_[i];
// Find slot for each top blob's data
for (int i_top = 0; i_top < layer_top.size(); ++i_top) {
const string& top_name = blob_names_[top_id_vecs_[i][i_top]];

if (check_exclude(excluded_blob_names_, top_name)) continue;

int idx = FindSlot(slots, top_name + "_data");
if (idx == -1) {
// Detect share data conditions
Expand All @@ -1075,8 +1093,12 @@ void Net<Dtype>::MemoryOptimize() {
LOG(INFO) << "top " << top_name << " acquires data slot " << idx;
}
} else {
slots[idx].IncRef();
slot_index[top_name + "_data"] = idx;
if (idx != -1) {
// idx == -1 means the top blob is (recursively) sharing data with an excluded bottom blob
// This makes this blob itself excluded from the optimization
slots[idx].IncRef();
slot_index[top_name + "_data"] = idx;
}
}
} else {
// Top data blob is already assigned a slot (maybe inplace layer).
Expand All @@ -1088,6 +1110,9 @@ void Net<Dtype>::MemoryOptimize() {
if (phase_ == TRAIN && layer_need_backward_[i]) continue;
for (int i_bottom = 0; i_bottom < layer_bottom.size(); ++i_bottom) {
const string& bottom_name = blob_names_[bottom_id_vecs_[i][i_bottom]];

if (check_exclude(excluded_blob_names_, bottom_name)) continue;

int idx = FindSlot(slots, bottom_name + "_data");
if (slot_index.find(bottom_name + "_data") != slot_index.end()) {
idx = slot_index[bottom_name + "_data"];
Expand All @@ -1113,8 +1138,10 @@ void Net<Dtype>::MemoryOptimize() {
// first deal with bottoms
for (int i_bottom = 0; i_bottom < layer_bottom.size(); ++i_bottom){
const string& bottom_name = blob_names_[layer_bottom_idx[i_bottom]];

if (check_exclude(excluded_blob_names_, bottom_name)) continue;

int idx = FindSlot(slots, bottom_name + "_diff");
if (!(layers_[i]->layer_param().no_mem_opt())){
if (idx == -1){
//detect share diff conditions
bool sharing_diff = false;
Expand All @@ -1132,8 +1159,8 @@ void Net<Dtype>::MemoryOptimize() {
}else{
LOG(INFO) << "sharing diff using slot "<<idx;
if(idx != -1) {
// idx == -1 means this is an output blob
// as a good practice, we do not touch the output blobs' diff memroy cause leads to unwanted behaviors.
// idx == -1 means the bottom blob is (recursively) sharing diff with an excluded top blob
// This makes this blob itself excluded from the optimization
slots[idx].IncRef();
slot_index[bottom_name + "_diff"] = idx;
}
Expand All @@ -1143,7 +1170,6 @@ void Net<Dtype>::MemoryOptimize() {
// usually this means in-place operation
slots[idx].IncRef();
}
}
LOG(INFO)<<"bottom blob "<<i_bottom<<" name "
<<bottom_name<<" slot id "<<idx;
}
Expand All @@ -1152,6 +1178,8 @@ void Net<Dtype>::MemoryOptimize() {
for (int i_top = 0; i_top < layer_top.size(); ++i_top){
const string& top_name = blob_names_[layer_top_idx[i_top]];

if (check_exclude(excluded_blob_names_, top_name)) continue;

// find the top in the slots
int idx = FindSlot(slots, top_name + "_diff");

Expand Down
26 changes: 18 additions & 8 deletions src/caffe/proto/caffe.proto
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,8 @@ message NetParameter {
// connectivity and behavior, is specified as a LayerParameter.
repeated LayerParameter layer = 100; // ID 100 so layers are printed last.

// Whether to perform memory optimization
enum OptimizeMem {
NO_OPTIM = 0;
TRAIN_ONLY = 1;
ALL_OPTIM = 2;
}
optional OptimizeMem optimize_mem = 11 [default=TRAIN_ONLY];
// The configuration of memory optimization
optional MemoryOptimizationParameter mem_param = 200;

// DEPRECATED: use 'layer' instead.
repeated V1LayerParameter layers = 2;
Expand Down Expand Up @@ -322,7 +317,6 @@ message LayerParameter {
// Parameters shared by loss layers.
optional LossParameter loss_param = 101;

optional bool no_mem_opt = 900 [default = false];
// Layer type-specific parameters.
//
// Note: certain layers may have more than one computational engine
Expand Down Expand Up @@ -1301,3 +1295,19 @@ message BatchReductionParameter {
repeated int32 level = 1;
optional ReductionParameter reduction_param = 2;
}

message MemoryOptimizationParameter {
// Mode of optimization

// whether to optimize for all nets specified in train phase
optional bool optimize_train = 1 [default = true];

// whether to optimize for all nets specified in test phase
optional bool optimize_test = 2 [default = false];

// By default, all blobs for input and output are excluded from the optimization for safety.
// However, one can also claim a few blobs to be excluded.
// This is rather helpful when extracting features from intermediate blobs or debugging problems.
repeated string exclude_blob = 3;

}

0 comments on commit 4506029

Please sign in to comment.