From 823a7c77e3d1ced39d342d5f78d98381c193df4c Mon Sep 17 00:00:00 2001 From: Tong Xiao Date: Wed, 20 Jul 2016 16:09:13 +0800 Subject: [PATCH] Reuse data memory if not propagate down (#86) * Specify whether bottom and top blobs are sharing data * Reuse data memory if not propagate down * Skip a layer if no_mem_opt is set * Fix a bug and add optimize_mem enum * Code formatting --- include/caffe/common_layers.hpp | 25 +++- include/caffe/layer.hpp | 3 +- include/caffe/net.hpp | 3 +- src/caffe/net.cpp | 205 ++++++++++++++++++++------------ src/caffe/proto/caffe.proto | 7 +- 5 files changed, 164 insertions(+), 79 deletions(-) diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp index c56c642201f..29858a1e6f3 100644 --- a/include/caffe/common_layers.hpp +++ b/include/caffe/common_layers.hpp @@ -269,7 +269,9 @@ class FlattenLayer : public Layer { virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 1; } - + virtual inline bool is_sharing_data(int top_id, int bottom_id){ + return top_id == bottom_id; + } virtual inline bool is_sharing_diff(int top_id, int bottom_id){ return top_id == bottom_id; } @@ -391,6 +393,9 @@ class ReshapeLayer : public Layer { virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 1; } + virtual inline bool is_sharing_data(int top_id, int bottom_id) { + return top_id == bottom_id; + } virtual inline bool is_sharing_diff(int top_id, int bottom_id) { return top_id == bottom_id; } @@ -570,6 +575,10 @@ class SplitLayer : public Layer { virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int MinTopBlobs() const { return 1; } + virtual inline bool is_sharing_data(int top_id, int bottom_id) { + return true; + } + protected: virtual void Forward_cpu(const vector*>& bottom, const vector*>& top); @@ -643,6 +652,13 @@ class SliceLayer : public Layer { virtual inline bool EqualNumBottomTopBlobs() const { return true; } virtual inline bool is_gathering() {return true;} + virtual inline bool is_sharing_data(int top_id, int bottom_id){ +#ifndef USE_MPI + return top_id == bottom_id; +#else + return (top_id == bottom_id) && (Caffe::parallel_mode()!=Caffe::MPI); +#endif + } virtual inline bool is_sharing_diff(int top_id, int bottom_id){ #ifndef USE_MPI return top_id == bottom_id; @@ -685,6 +701,13 @@ class SliceLayer : public Layer { virtual inline bool EqualNumBottomTopBlobs() const { return true; } + virtual inline bool is_sharing_data(int top_id, int bottom_id){ +#ifndef USE_MPI + return top_id == bottom_id; +#else + return (top_id == bottom_id) && (Caffe::parallel_mode()!=Caffe::MPI); +#endif + } virtual inline bool is_sharing_diff(int top_id, int bottom_id){ #ifndef USE_MPI return top_id == bottom_id; diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp index 5162c0808e5..2319fe26c7c 100644 --- a/include/caffe/layer.hpp +++ b/include/caffe/layer.hpp @@ -309,8 +309,9 @@ class Layer { #endif /** - * @brief express whether this layer shares the diff between bottom and top + * @brief express whether this layer shares the data/diff between bottom and top */ + virtual inline bool is_sharing_data(int top_id, int bottom_id){return false;} virtual inline bool is_sharing_diff(int top_id, int bottom_id){return false;} diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp index 220beafc52c..d5a5199d466 100644 --- a/include/caffe/net.hpp +++ b/include/caffe/net.hpp @@ -271,8 +271,7 @@ class Net { /// Whether to compute and display debug info for the net. bool debug_info_; - vector< shared_ptr > shared_diff_storage_; - vector< shared_ptr > shared_data_storage_; + vector< shared_ptr > shared_storage_; DISABLE_COPY_AND_ASSIGN(Net); }; diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index 81a3138d2be..415cb7649d8 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -295,7 +295,11 @@ void Net::Init(const NetParameter& in_param) { LOG(INFO) << "Memory required for data: " << memory_used_ * sizeof(Dtype); // optimize memory - if (phase_ == TRAIN && !debug_info_ && param.optimize_mem()){ + const NetParameter_OptimizeMem om = param.optimize_mem(); + const bool need_optimze_mem = + (om == NetParameter_OptimizeMem_TRAIN_ONLY && phase_ == TRAIN) + || (om == NetParameter_OptimizeMem_ALL_OPTIM); + if (!debug_info_ && need_optimze_mem) { MemoryOptimize(); } } @@ -961,68 +965,63 @@ const shared_ptr > Net::layer_by_name( return layer_ptr; } -template -class BlobMeta { +class SlotMeta { public: - BlobMeta(Blob* blob, int ref, int layer_id){ - blob_ = blob; - ref_ = ref; - layer_id_ = layer_id; - } + SlotMeta() + : key_(), ref_(0) { } - BlobMeta():blob_(NULL), ref_(0), layer_id_(-1){}; + SlotMeta(const string& key, int ref) + : key_(key), ref_(ref) { } + + inline const string& key() const { return key_; } + inline int ref() const { return ref_; } inline void DerefOne(){ CHECK_GT(ref_, 0); ref_ -= 1; if (ref_ == 0){ - blob_ = NULL; - layer_id_ = -1; + key_.clear(); } } inline void IncRef(){ref_ += 1;} - void RefBlob(Blob* blob, int ref, int layer_id){ - CHECK(blob_==NULL); - CHECK_NE(blob, blob_); + void RefSlot(const string& key, int ref) { + CHECK(key_.empty()); + CHECK_NE(key_, key); CHECK_GT(ref, 0); - blob_ = blob; + key_ = key; ref_ = ref; - layer_id_ = layer_id; } inline bool Empty(){ - return blob_ == NULL; + return key_.empty(); } - inline bool isBlob(Blob* blob){return blob_==blob;} + inline bool isSlot(const string& key){return key_ == key;} private: - Blob* blob_; + string key_; int ref_; - int layer_id_; }; -template -size_t AcquireSlot(vector >& slot_vec, Blob* blob, int ref, int layer_id){ - for (size_t i = 0 ; i < slot_vec.size(); ++i){ - if (slot_vec[i].Empty()){ - slot_vec[i].RefBlob(blob, ref, layer_id); +size_t AcquireSlot(vector& slot_vec, const string& key, int ref) { + for (size_t i = 0 ; i < slot_vec.size(); ++i) { + if (slot_vec[i].Empty()) { + slot_vec[i].RefSlot(key, ref); return i; } } // no available slot, need a new one - slot_vec.push_back(BlobMeta(blob, ref, layer_id)); + slot_vec.push_back(SlotMeta(key, ref)); return slot_vec.size() - 1; } -template -int FindBlob(vector >& slot_vec, Blob* blob){ +int FindSlot(vector& slot_vec, const string& key){ for (int i = 0; i < slot_vec.size(); ++i){ - if (slot_vec[i].isBlob(blob)){ + if (slot_vec[i].isSlot(key)){ return i; } } @@ -1033,11 +1032,65 @@ template void Net::MemoryOptimize() { // Dry run phase // In this phase, we assume the network topology has been setup - boost::unordered_map*, int> blob_log; - - vector > slots; + boost::unordered_map slot_index; + vector slots; + // Forward pass, try to reuse blobs' data memory + for (int i = 0; i < layers_.size(); ++i) { + if (layers_[i]->layer_param().no_mem_opt()) continue; + const vector* >& layer_top = top_vecs_[i]; + const vector* >& layer_bottom = bottom_vecs_[i]; + LOG(INFO) << "layer " << i << " " << layer_names_[i]; + // Find slot for each top blob's data + for (int i_top = 0; i_top < layer_top.size(); ++i_top) { + const string& top_name = blob_names_[top_id_vecs_[i][i_top]]; + int idx = FindSlot(slots, top_name + "_data"); + if (idx == -1) { + // Detect share data conditions + bool sharing_data = false; + for (int i_bottom = 0; i_bottom < layer_bottom.size(); ++i_bottom) { + if (layers_[i]->is_sharing_data(i_top, i_bottom)) { + sharing_data = true; + const string& bottom_name = blob_names_[bottom_id_vecs_[i][i_bottom]]; + idx = FindSlot(slots, bottom_name + "_data"); + LOG(INFO) << "top " << top_name + << " shares data with bottom " << bottom_name + << " slot " << idx; + break; + } + } + if (!sharing_data) { + if (!layers_[i]->loss(i_top)) { + idx = (int)AcquireSlot(slots, top_name + "_data", 1); + slot_index[top_name + "_data"] = idx; + LOG(INFO) << "top " << top_name << " acquires data slot " << idx; + } + } else { + slots[idx].IncRef(); + slot_index[top_name + "_data"] = idx; + } + } else { + // Top data blob is already assigned a slot (maybe inplace layer). + slots[idx].IncRef(); + LOG(INFO) << "top " << top_name << " refers to data slot " << idx; + } + } + // Deref bottom blob's data slot if this layer does not propagate down. + if (phase_ == TRAIN && layer_need_backward_[i]) continue; + for (int i_bottom = 0; i_bottom < layer_bottom.size(); ++i_bottom) { + const string& bottom_name = blob_names_[bottom_id_vecs_[i][i_bottom]]; + int idx = FindSlot(slots, bottom_name + "_data"); + if (slot_index.find(bottom_name + "_data") != slot_index.end()) { + idx = slot_index[bottom_name + "_data"]; + } + if (idx >= 0) { + // idx == -1 if this is an input blob + slots[idx].DerefOne(); + } + LOG(INFO) << "bottom " << bottom_name << " derefs data slot " << idx; + } + } for (int i = (layers_.size() -1); i >=0; --i){ vector* >& layer_top = top_vecs_[i]; @@ -1049,31 +1102,28 @@ void Net::MemoryOptimize() { // first deal with bottoms for (int i_bottom = 0; i_bottom < layer_bottom.size(); ++i_bottom){ - Blob* bottom = layer_bottom[i_bottom]; - int idx = FindBlob(slots, bottom); - + const string& bottom_name = blob_names_[layer_bottom_idx[i_bottom]]; + int idx = FindSlot(slots, bottom_name + "_diff"); if (!(layers_[i]->layer_param().no_mem_opt())){ if (idx == -1){ - //detect share diff conditions bool sharing_diff = false; - for (int i_top = 0; i_top < layer_top.size(); ++i_top){ if(layers_[i]->is_sharing_diff(i_top, i_bottom)){ + const string& top_name = blob_names_[layer_top_idx[i_top]]; sharing_diff = true; - idx = FindBlob(slots, layer_top[i_top]); + idx = FindSlot(slots, top_name + "_diff"); } } - - if (!sharing_diff ) { - idx = (int) AcquireSlot(slots, bottom, 1, i); - blob_log[bottom] = idx; + if (!sharing_diff) { + idx = (int) AcquireSlot(slots, bottom_name + "_diff", 1); + slot_index[bottom_name + "_diff"] = idx; LOG(INFO) << "acquired slot for new blob"; }else{ LOG(INFO) << "sharing diff using slot "<::MemoryOptimize() { slots[idx].IncRef(); } } - string blob_name = blob_names_[layer_bottom_idx[i_bottom]]; LOG(INFO)<<"bottom blob "<* top = layer_top[i_top]; + const string& top_name = blob_names_[layer_top_idx[i_top]]; // find the top in the slots - int idx = FindBlob(slots, top); + int idx = FindSlot(slots, top_name + "_diff"); // look for shared diff - if (blob_log.find(top) != blob_log.end()){ - idx = blob_log[top]; + if (slot_index.find(top_name + "_diff") != slot_index.end()){ + idx = slot_index[top_name + "_diff"]; } //after the layer's operation, the refcount of top should be decreased by 1 if (idx != -1) slots[idx].DerefOne(); - string blob_name = blob_names_[layer_top_idx[i_top]]; - LOG(INFO)<<"top blob "<count() * sizeof(Dtype); + count_raw += bytes * 2; + int idx = -1; + if (slot_index.find(name + "_data") != slot_index.end()) { + idx = slot_index[name + "_data"]; + blobs_[i_blob]->SetDataStorage(shared_storage_[idx]); + shared_storage_[idx]->Resize(bytes); + } else { + count_opt += bytes; } - int idx = blob_log[blobs_[i_blob].get()]; - blobs_[i_blob]->SetDiffStorage(shared_diff_storage_[idx]); - LOG(INFO)<<"blob "<Resize(blobs_[i_blob]->count() * sizeof(Dtype)); - count_raw += blobs_[i_blob]->count() * sizeof(Dtype); + LOG(INFO) << "blob " << i_blob + << " name " << blob_names_[i_blob] + << " data idx " << idx; + if (slot_index.find(name + "_diff") != slot_index.end()) { + idx = slot_index[name + "_diff"]; + blobs_[i_blob]->SetDiffStorage(shared_storage_[idx]); + shared_storage_[idx]->Resize(bytes); + } else { + count_opt += bytes; + } + LOG(INFO) << "blob " << i_blob + << " name " << blob_names_[i_blob] + << " diff idx " << idx; } - for (int i_mem = 0; i_mem < shared_diff_storage_.size(); i_mem++){ - count_opt += shared_diff_storage_[i_mem]->size(); + for (int i_mem = 0; i_mem < shared_storage_.size(); i_mem++){ + LOG(INFO) << "storage memory slot " << i_mem + << " size " << shared_storage_[i_mem]->size(); + count_opt += shared_storage_[i_mem]->size(); } - LOG(INFO)<<"raw memory "<