Skip to content

Commit

Permalink
Reuse data memory if not propagate down (BVLC#86)
Browse files Browse the repository at this point in the history
* Specify whether bottom and top blobs are sharing data

* Reuse data memory if not propagate down

* Skip a layer if no_mem_opt is set

* Fix a bug and add optimize_mem enum

* Code formatting
  • Loading branch information
Cysu authored and yjxiong committed Jul 20, 2016
1 parent 8d9fca8 commit 823a7c7
Show file tree
Hide file tree
Showing 5 changed files with 164 additions and 79 deletions.
25 changes: 24 additions & 1 deletion include/caffe/common_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,9 @@ class FlattenLayer : public Layer<Dtype> {
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }


virtual inline bool is_sharing_data(int top_id, int bottom_id){
return top_id == bottom_id;
}
virtual inline bool is_sharing_diff(int top_id, int bottom_id){
return top_id == bottom_id;
}
Expand Down Expand Up @@ -391,6 +393,9 @@ class ReshapeLayer : public Layer<Dtype> {
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }

virtual inline bool is_sharing_data(int top_id, int bottom_id) {
return top_id == bottom_id;
}
virtual inline bool is_sharing_diff(int top_id, int bottom_id) {
return top_id == bottom_id;
}
Expand Down Expand Up @@ -570,6 +575,10 @@ class SplitLayer : public Layer<Dtype> {
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int MinTopBlobs() const { return 1; }

virtual inline bool is_sharing_data(int top_id, int bottom_id) {
return true;
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
Expand Down Expand Up @@ -643,6 +652,13 @@ class SliceLayer : public Layer<Dtype> {
virtual inline bool EqualNumBottomTopBlobs() const { return true; }
virtual inline bool is_gathering() {return true;}

virtual inline bool is_sharing_data(int top_id, int bottom_id){
#ifndef USE_MPI
return top_id == bottom_id;
#else
return (top_id == bottom_id) && (Caffe::parallel_mode()!=Caffe::MPI);
#endif
}
virtual inline bool is_sharing_diff(int top_id, int bottom_id){
#ifndef USE_MPI
return top_id == bottom_id;
Expand Down Expand Up @@ -685,6 +701,13 @@ class SliceLayer : public Layer<Dtype> {

virtual inline bool EqualNumBottomTopBlobs() const { return true; }

virtual inline bool is_sharing_data(int top_id, int bottom_id){
#ifndef USE_MPI
return top_id == bottom_id;
#else
return (top_id == bottom_id) && (Caffe::parallel_mode()!=Caffe::MPI);
#endif
}
virtual inline bool is_sharing_diff(int top_id, int bottom_id){
#ifndef USE_MPI
return top_id == bottom_id;
Expand Down
3 changes: 2 additions & 1 deletion include/caffe/layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,9 @@ class Layer {
#endif

/**
* @brief express whether this layer shares the diff between bottom and top
* @brief express whether this layer shares the data/diff between bottom and top
*/
virtual inline bool is_sharing_data(int top_id, int bottom_id){return false;}
virtual inline bool is_sharing_diff(int top_id, int bottom_id){return false;}


Expand Down
3 changes: 1 addition & 2 deletions include/caffe/net.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,7 @@ class Net {
/// Whether to compute and display debug info for the net.
bool debug_info_;

vector< shared_ptr<SyncedMemory> > shared_diff_storage_;
vector< shared_ptr<SyncedMemory> > shared_data_storage_;
vector< shared_ptr<SyncedMemory> > shared_storage_;

DISABLE_COPY_AND_ASSIGN(Net);
};
Expand Down
205 changes: 131 additions & 74 deletions src/caffe/net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,11 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
LOG(INFO) << "Memory required for data: " << memory_used_ * sizeof(Dtype);

// optimize memory
if (phase_ == TRAIN && !debug_info_ && param.optimize_mem()){
const NetParameter_OptimizeMem om = param.optimize_mem();
const bool need_optimze_mem =
(om == NetParameter_OptimizeMem_TRAIN_ONLY && phase_ == TRAIN)
|| (om == NetParameter_OptimizeMem_ALL_OPTIM);
if (!debug_info_ && need_optimze_mem) {
MemoryOptimize();
}
}
Expand Down Expand Up @@ -961,68 +965,63 @@ const shared_ptr<Layer<Dtype> > Net<Dtype>::layer_by_name(
return layer_ptr;
}

template <typename Dtype>
class BlobMeta {
class SlotMeta {
public:
BlobMeta(Blob<Dtype>* blob, int ref, int layer_id){
blob_ = blob;
ref_ = ref;
layer_id_ = layer_id;
}
SlotMeta()
: key_(), ref_(0) { }

BlobMeta():blob_(NULL), ref_(0), layer_id_(-1){};
SlotMeta(const string& key, int ref)
: key_(key), ref_(ref) { }

inline const string& key() const { return key_; }
inline int ref() const { return ref_; }

inline void DerefOne(){
CHECK_GT(ref_, 0);
ref_ -= 1;
if (ref_ == 0){
blob_ = NULL;
layer_id_ = -1;
key_.clear();
}
}

inline void IncRef(){ref_ += 1;}

void RefBlob(Blob<Dtype>* blob, int ref, int layer_id){
CHECK(blob_==NULL);
CHECK_NE(blob, blob_);
void RefSlot(const string& key, int ref) {
CHECK(key_.empty());
CHECK_NE(key_, key);
CHECK_GT(ref, 0);
blob_ = blob;
key_ = key;
ref_ = ref;
layer_id_ = layer_id;
}

inline bool Empty(){
return blob_ == NULL;
return key_.empty();
}

inline bool isBlob(Blob<Dtype>* blob){return blob_==blob;}
inline bool isSlot(const string& key){return key_ == key;}

private:
Blob<Dtype>* blob_;
string key_;
int ref_;
int layer_id_;
};

template <typename Dtype>
size_t AcquireSlot(vector<BlobMeta<Dtype> >& slot_vec, Blob<Dtype>* blob, int ref, int layer_id){
for (size_t i = 0 ; i < slot_vec.size(); ++i){
if (slot_vec[i].Empty()){
slot_vec[i].RefBlob(blob, ref, layer_id);
size_t AcquireSlot(vector<SlotMeta>& slot_vec, const string& key, int ref) {
for (size_t i = 0 ; i < slot_vec.size(); ++i) {
if (slot_vec[i].Empty()) {
slot_vec[i].RefSlot(key, ref);
return i;
}
}

// no available slot, need a new one
slot_vec.push_back(BlobMeta<Dtype>(blob, ref, layer_id));
slot_vec.push_back(SlotMeta(key, ref));

return slot_vec.size() - 1;
}

template <typename Dtype>
int FindBlob(vector<BlobMeta<Dtype> >& slot_vec, Blob<Dtype>* blob){
int FindSlot(vector<SlotMeta>& slot_vec, const string& key){
for (int i = 0; i < slot_vec.size(); ++i){
if (slot_vec[i].isBlob(blob)){
if (slot_vec[i].isSlot(key)){
return i;
}
}
Expand All @@ -1033,11 +1032,65 @@ template <typename Dtype>
void Net<Dtype>::MemoryOptimize() {
// Dry run phase
// In this phase, we assume the network topology has been setup
boost::unordered_map<Blob<Dtype>*, int> blob_log;

vector<BlobMeta<Dtype> > slots;
boost::unordered_map<string, int> slot_index;

vector<SlotMeta> slots;

// Forward pass, try to reuse blobs' data memory
for (int i = 0; i < layers_.size(); ++i) {
if (layers_[i]->layer_param().no_mem_opt()) continue;
const vector<Blob<Dtype>* >& layer_top = top_vecs_[i];
const vector<Blob<Dtype>* >& layer_bottom = bottom_vecs_[i];
LOG(INFO) << "layer " << i << " " << layer_names_[i];
// Find slot for each top blob's data
for (int i_top = 0; i_top < layer_top.size(); ++i_top) {
const string& top_name = blob_names_[top_id_vecs_[i][i_top]];
int idx = FindSlot(slots, top_name + "_data");
if (idx == -1) {
// Detect share data conditions
bool sharing_data = false;
for (int i_bottom = 0; i_bottom < layer_bottom.size(); ++i_bottom) {
if (layers_[i]->is_sharing_data(i_top, i_bottom)) {
sharing_data = true;
const string& bottom_name = blob_names_[bottom_id_vecs_[i][i_bottom]];
idx = FindSlot(slots, bottom_name + "_data");
LOG(INFO) << "top " << top_name
<< " shares data with bottom " << bottom_name
<< " slot " << idx;
break;
}
}
if (!sharing_data) {
if (!layers_[i]->loss(i_top)) {
idx = (int)AcquireSlot(slots, top_name + "_data", 1);
slot_index[top_name + "_data"] = idx;
LOG(INFO) << "top " << top_name << " acquires data slot " << idx;
}
} else {
slots[idx].IncRef();
slot_index[top_name + "_data"] = idx;
}
} else {
// Top data blob is already assigned a slot (maybe inplace layer).
slots[idx].IncRef();
LOG(INFO) << "top " << top_name << " refers to data slot " << idx;
}
}
// Deref bottom blob's data slot if this layer does not propagate down.
if (phase_ == TRAIN && layer_need_backward_[i]) continue;
for (int i_bottom = 0; i_bottom < layer_bottom.size(); ++i_bottom) {
const string& bottom_name = blob_names_[bottom_id_vecs_[i][i_bottom]];
int idx = FindSlot(slots, bottom_name + "_data");
if (slot_index.find(bottom_name + "_data") != slot_index.end()) {
idx = slot_index[bottom_name + "_data"];
}
if (idx >= 0) {
// idx == -1 if this is an input blob
slots[idx].DerefOne();
}
LOG(INFO) << "bottom " << bottom_name << " derefs data slot " << idx;
}
}

for (int i = (layers_.size() -1); i >=0; --i){
vector<Blob<Dtype>* >& layer_top = top_vecs_[i];
Expand All @@ -1049,31 +1102,28 @@ void Net<Dtype>::MemoryOptimize() {

// first deal with bottoms
for (int i_bottom = 0; i_bottom < layer_bottom.size(); ++i_bottom){
Blob<Dtype>* bottom = layer_bottom[i_bottom];
int idx = FindBlob(slots, bottom);

const string& bottom_name = blob_names_[layer_bottom_idx[i_bottom]];
int idx = FindSlot(slots, bottom_name + "_diff");
if (!(layers_[i]->layer_param().no_mem_opt())){
if (idx == -1){

//detect share diff conditions
bool sharing_diff = false;

for (int i_top = 0; i_top < layer_top.size(); ++i_top){
if(layers_[i]->is_sharing_diff(i_top, i_bottom)){
const string& top_name = blob_names_[layer_top_idx[i_top]];
sharing_diff = true;
idx = FindBlob(slots, layer_top[i_top]);
idx = FindSlot(slots, top_name + "_diff");
}
}

if (!sharing_diff ) {
idx = (int) AcquireSlot(slots, bottom, 1, i);
blob_log[bottom] = idx;
if (!sharing_diff) {
idx = (int) AcquireSlot(slots, bottom_name + "_diff", 1);
slot_index[bottom_name + "_diff"] = idx;
LOG(INFO) << "acquired slot for new blob";
}else{
LOG(INFO) << "sharing diff using slot "<<idx;
if(idx != -1) {
slots[idx].IncRef();
blob_log[bottom] = idx;
slot_index[bottom_name + "_diff"] = idx;
}
}
}else{
Expand All @@ -1082,65 +1132,72 @@ void Net<Dtype>::MemoryOptimize() {
slots[idx].IncRef();
}
}
string blob_name = blob_names_[layer_bottom_idx[i_bottom]];
LOG(INFO)<<"bottom blob "<<i_bottom<<" name "
<<blob_name<<" ptr "<<bottom<<" slot id "<<idx;

<<bottom_name<<" slot id "<<idx;
}


// then deal with top
for (int i_top = 0; i_top < layer_top.size(); ++i_top){
Blob<Dtype>* top = layer_top[i_top];
const string& top_name = blob_names_[layer_top_idx[i_top]];

// find the top in the slots
int idx = FindBlob(slots, top);
int idx = FindSlot(slots, top_name + "_diff");

// look for shared diff
if (blob_log.find(top) != blob_log.end()){
idx = blob_log[top];
if (slot_index.find(top_name + "_diff") != slot_index.end()){
idx = slot_index[top_name + "_diff"];
}
//after the layer's operation, the refcount of top should be decreased by 1
if (idx != -1)
slots[idx].DerefOne();

string blob_name = blob_names_[layer_top_idx[i_top]];
LOG(INFO)<<"top blob "<<i_top
<<" name "<<blob_name<<" ptr "<<top<<" slot id "<<idx;
LOG(INFO) << "top blob " << i_top
<< " name " << top_name << " slot id " << idx;
}


}

// Memory assignment phase
shared_diff_storage_.resize(slots.size());
for (int i_mem = 0; i_mem < shared_diff_storage_.size(); i_mem++){
shared_diff_storage_[i_mem].reset(new SyncedMemory(1));
shared_storage_.resize(slots.size());
for (int i_mem = 0; i_mem < shared_storage_.size(); i_mem++){
shared_storage_[i_mem].reset(new SyncedMemory(1));
}

size_t count_raw = 0;
size_t count_opt = 0;
for (int i_blob = 0; i_blob < blobs_.size(); ++i_blob){
if (blob_log.find(blobs_[i_blob].get()) == blob_log.end()){
// loss blob cannot be shared due to loss weight
continue;
const string& name = blob_names_[i_blob];
const size_t bytes = blobs_[i_blob]->count() * sizeof(Dtype);
count_raw += bytes * 2;
int idx = -1;
if (slot_index.find(name + "_data") != slot_index.end()) {
idx = slot_index[name + "_data"];
blobs_[i_blob]->SetDataStorage(shared_storage_[idx]);
shared_storage_[idx]->Resize(bytes);
} else {
count_opt += bytes;
}
int idx = blob_log[blobs_[i_blob].get()];
blobs_[i_blob]->SetDiffStorage(shared_diff_storage_[idx]);
LOG(INFO)<<"blob "<<i_blob<<" name "<<blob_names_[i_blob]<<" idx "<<idx;

//recover the necessary mem size for the blob
shared_diff_storage_[idx]->Resize(blobs_[i_blob]->count() * sizeof(Dtype));
count_raw += blobs_[i_blob]->count() * sizeof(Dtype);
LOG(INFO) << "blob " << i_blob
<< " name " << blob_names_[i_blob]
<< " data idx " << idx;
if (slot_index.find(name + "_diff") != slot_index.end()) {
idx = slot_index[name + "_diff"];
blobs_[i_blob]->SetDiffStorage(shared_storage_[idx]);
shared_storage_[idx]->Resize(bytes);
} else {
count_opt += bytes;
}
LOG(INFO) << "blob " << i_blob
<< " name " << blob_names_[i_blob]
<< " diff idx " << idx;
}

for (int i_mem = 0; i_mem < shared_diff_storage_.size(); i_mem++){
count_opt += shared_diff_storage_[i_mem]->size();
for (int i_mem = 0; i_mem < shared_storage_.size(); i_mem++){
LOG(INFO) << "storage memory slot " << i_mem
<< " size " << shared_storage_[i_mem]->size();
count_opt += shared_storage_[i_mem]->size();
}

LOG(INFO)<<"raw memory "<<count_raw<<" opt memory "<<count_opt;

// LOG(FATAL)<<"";
LOG(INFO) << "raw memory " << count_raw << " opt memory " << count_opt;
}
INSTANTIATE_CLASS(Net);

Expand Down
Loading

0 comments on commit 823a7c7

Please sign in to comment.