standardize memory optimization configurations (BVLC#99)

* improvise memory opt configs * take care of share data with excluded blob
Cysu · Aug 11, 2016 · 4506029 · 4506029
1 parent f971cdb
commit 4506029
Show file tree

Hide file tree

Showing 3 changed files with 58 additions and 18 deletions.
diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp
@@ -271,7 +271,9 @@ class Net {
   /// Whether to compute and display debug info for the net.
   bool debug_info_;
 
+  /// Memory optimization related stuff.
   vector< shared_ptr<SyncedMemory> > shared_storage_;
+  std::set<string> excluded_blob_names_;
 
   DISABLE_COPY_AND_ASSIGN(Net);
 };

diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
@@ -67,6 +67,9 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
   for (int input_id = 0; input_id < param.input_size(); ++input_id) {
     const int layer_id = -1;  // inputs have fake layer ID -1
     AppendTop(param, layer_id, input_id, &available_blobs, &blob_name_to_idx);
+
+    // input blobs are excluded from memory optimization by default
+    excluded_blob_names_.insert(param.input(input_id));
   }
   DLOG(INFO) << "Memory required for data: " << memory_used_ * sizeof(Dtype);
   // For each layer, set up its input and output
@@ -282,6 +285,9 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
     LOG(INFO) << "This network produces output " << *it;
     net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get());
     net_output_blob_indices_.push_back(blob_name_to_idx[*it]);
+
+    // add output blob name to default excluded blobs
+    excluded_blob_names_.insert(*it);
   }
   for (size_t blob_id = 0; blob_id < blob_names_.size(); ++blob_id) {
     blob_names_index_[blob_names_[blob_id]] = blob_id;
@@ -295,10 +301,16 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
   LOG(INFO) << "Memory required for data: " << memory_used_ * sizeof(Dtype);
 
   // optimize memory
-  const NetParameter_OptimizeMem om = param.optimize_mem();
   const bool need_optimze_mem =
-    (om == NetParameter_OptimizeMem_TRAIN_ONLY && phase_ == TRAIN)
-    || (om == NetParameter_OptimizeMem_ALL_OPTIM);
+    (param.mem_param().optimize_train() && phase_ == TRAIN)
+    || (param.mem_param().optimize_test() && phase_ == TEST);
+
+  // add additional specified blobs to the exclusion list
+  for (int ex_id = 0; ex_id < param.mem_param().exclude_blob_size(); ++ex_id){
+    excluded_blob_names_.insert(param.mem_param().exclude_blob(ex_id));
+  }
+
+  // launch memory optimization if necessary
   if (!debug_info_ && need_optimze_mem) {
     MemoryOptimize();
   }
@@ -1036,6 +1048,10 @@ int FindSlot(vector<SlotMeta>& slot_vec, const string& key){
   return -1;
 }
 
+inline bool check_exclude(const std::set<string>& exclude_list, const string& blob_name){
+  return exclude_list.find(blob_name) != exclude_list.end();
+}
+
 template <typename Dtype>
 void Net<Dtype>::MemoryOptimize() {
   // Dry run phase
@@ -1046,13 +1062,15 @@ void Net<Dtype>::MemoryOptimize() {
 
   // Forward pass, try to reuse blobs' data memory
   for (int i = 0; i < layers_.size(); ++i) {
-    if (layers_[i]->layer_param().no_mem_opt()) continue;
     const vector<Blob<Dtype>* >& layer_top = top_vecs_[i];
     const vector<Blob<Dtype>* >& layer_bottom = bottom_vecs_[i];
     LOG(INFO) << "layer " << i << " " << layer_names_[i];
     // Find slot for each top blob's data
     for (int i_top = 0; i_top < layer_top.size(); ++i_top) {
       const string& top_name = blob_names_[top_id_vecs_[i][i_top]];
+
+      if (check_exclude(excluded_blob_names_, top_name)) continue;
+
       int idx = FindSlot(slots, top_name + "_data");
       if (idx == -1) {
         // Detect share data conditions
@@ -1075,8 +1093,12 @@ void Net<Dtype>::MemoryOptimize() {
             LOG(INFO) << "top " << top_name << " acquires data slot " << idx;
           }
         } else {
-          slots[idx].IncRef();
-          slot_index[top_name + "_data"] = idx;
+          if (idx != -1) {
+            // idx == -1 means the top blob is (recursively) sharing data with an excluded bottom blob
+            // This makes this blob itself excluded from the optimization
+            slots[idx].IncRef();
+            slot_index[top_name + "_data"] = idx;
+          }
         }
       } else {
         // Top data blob is already assigned a slot (maybe inplace layer).
@@ -1088,6 +1110,9 @@ void Net<Dtype>::MemoryOptimize() {
     if (phase_ == TRAIN && layer_need_backward_[i]) continue;
     for (int i_bottom = 0; i_bottom < layer_bottom.size(); ++i_bottom) {
       const string& bottom_name = blob_names_[bottom_id_vecs_[i][i_bottom]];
+
+      if (check_exclude(excluded_blob_names_, bottom_name)) continue;
+
       int idx = FindSlot(slots, bottom_name + "_data");
       if (slot_index.find(bottom_name + "_data") != slot_index.end()) {
         idx = slot_index[bottom_name + "_data"];
@@ -1113,8 +1138,10 @@ void Net<Dtype>::MemoryOptimize() {
     // first deal with bottoms
     for (int i_bottom = 0; i_bottom < layer_bottom.size(); ++i_bottom){
       const string& bottom_name = blob_names_[layer_bottom_idx[i_bottom]];
+
+      if (check_exclude(excluded_blob_names_, bottom_name)) continue;
+
       int idx = FindSlot(slots, bottom_name + "_diff");
-      if (!(layers_[i]->layer_param().no_mem_opt())){
       if (idx == -1){
         //detect share diff conditions
         bool sharing_diff = false;
@@ -1132,8 +1159,8 @@ void Net<Dtype>::MemoryOptimize() {
         }else{
           LOG(INFO) << "sharing diff using slot "<<idx;
           if(idx != -1) {
-            // idx == -1 means this is an output blob
-            // as a good practice, we do not touch the output blobs' diff memroy cause leads to unwanted behaviors.
+            // idx == -1 means the bottom blob is (recursively) sharing diff with an excluded top blob
+            // This makes this blob itself excluded from the optimization
             slots[idx].IncRef();
             slot_index[bottom_name + "_diff"] = idx;
           }
@@ -1143,7 +1170,6 @@ void Net<Dtype>::MemoryOptimize() {
         // usually this means in-place operation
         slots[idx].IncRef();
       }
-      }
       LOG(INFO)<<"bottom blob "<<i_bottom<<" name "
                <<bottom_name<<" slot id "<<idx;
     }
@@ -1152,6 +1178,8 @@ void Net<Dtype>::MemoryOptimize() {
     for (int i_top = 0; i_top < layer_top.size(); ++i_top){
       const string& top_name = blob_names_[layer_top_idx[i_top]];
 
+      if (check_exclude(excluded_blob_names_, top_name)) continue;
+
       // find the top in the slots
       int idx = FindSlot(slots, top_name + "_diff");
 

diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
@@ -89,13 +89,8 @@ message NetParameter {
   // connectivity and behavior, is specified as a LayerParameter.
   repeated LayerParameter layer = 100;  // ID 100 so layers are printed last.
 
-  // Whether to perform memory optimization
-  enum OptimizeMem {
-    NO_OPTIM = 0;
-    TRAIN_ONLY = 1;
-    ALL_OPTIM = 2;
-  }
-  optional OptimizeMem optimize_mem = 11 [default=TRAIN_ONLY];
+  // The configuration of memory optimization
+  optional MemoryOptimizationParameter mem_param = 200;
 
   // DEPRECATED: use 'layer' instead.
   repeated V1LayerParameter layers = 2;
@@ -322,7 +317,6 @@ message LayerParameter {
   // Parameters shared by loss layers.
   optional LossParameter loss_param = 101;
 
-  optional bool no_mem_opt = 900 [default = false];
   // Layer type-specific parameters.
   //
   // Note: certain layers may have more than one computational engine
@@ -1301,3 +1295,19 @@ message BatchReductionParameter {
     repeated int32 level = 1;
     optional ReductionParameter reduction_param = 2;
 }
+
+message MemoryOptimizationParameter {
+  // Mode of optimization
+
+  // whether to optimize for all nets specified in train phase
+  optional bool optimize_train = 1 [default = true];
+
+  // whether to optimize for all nets specified in test phase
+  optional bool optimize_test = 2 [default = false];
+
+  // By default, all blobs for input and output are excluded from the optimization for safety.
+  // However, one can also claim a few blobs to be excluded.
+  // This is rather helpful when extracting features from intermediate blobs or debugging problems.
+  repeated string exclude_blob = 3;
+
+}