Skip to content

Commit

Permalink
Merge pull request BVLC#99 from yjxiong/fix/mem_config
Browse files Browse the repository at this point in the history
standardize memory optimization configurations

* yjxiong/fix/mem_config:
  take care of share data with excluded blob
  improvise memory opt configs
  fix cudnn conv legacy bug (BVLC#96)
  add TOC
  Update README.md
  Update README.md (BVLC#95)
  Update README.md
  Improve the python interface (BVLC#80)
  Update README.md
  • Loading branch information
myfavouritekk committed Aug 11, 2016
2 parents d5b66c2 + e331bdd commit 24d1538
Show file tree
Hide file tree
Showing 10 changed files with 361 additions and 74 deletions.
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,18 @@

This branch hosts the code for the technical report ["Towards Good Practices for Very Deep Two-stream ConvNets"](http://arxiv.org/abs/1507.02159), and more.

* [Updates](#updates)
* [Features](#features)
* [Usage](#usage)
* [Working Examples](#working-examples)
* [Extension](#extension)
* [Questions](#questions)
* [Citation](#citation)


### Updates
- Jul 16, 2016
* New working sample: "Real-time Action Recognition with Enhanced Motion Vector CNNs" on CVPR 2016.
- Apr 27, 2016
* cuDNN v5 support, featuring the super fast WINOGrad Convolution and cuDNN implementation of BatchNormalization.
- Dec 23, 2015
Expand Down Expand Up @@ -66,6 +77,12 @@ mpirun -np 4 ./install/bin/caffe train --solver=<Your Solver File> [--weights=<P
- To disable memory optimization, set `optimize_mem` to `NO_OPTIM`. This may be useful when working with intermediate blobs.

### Working Examples
- Actionness Estimation Using Hybrid FCNs
- [CVPR 2016 paper](http://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Wang_Actionness_Estimation_Using_CVPR_2016_paper.pdf)
- [Project Site](https://github.com/wanglimin/Actionness-Estimation)
- Real-time Action Recognition with Enhanced Motion Vector CNNs
- [CVPR 2016 paper](https://wanglimin.github.io/papers/ZhangWWQW_CVPR16.pdf)
- [Project Site](http://zbwglory.github.io/MV-CNN/index.html)
- Action recognition on UCF101
- [Project Site](http://personal.ie.cuhk.edu.hk/~xy012/others/action_recog/)
- [Caffe Model Files](https://github.com/yjxiong/caffe/tree/action_recog/models/action_recognition)
Expand Down
2 changes: 2 additions & 0 deletions include/caffe/net.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,9 @@ class Net {
/// Whether to compute and display debug info for the net.
bool debug_info_;

/// Memory optimization related stuff.
vector< shared_ptr<SyncedMemory> > shared_storage_;
std::set<string> excluded_blob_names_;

DISABLE_COPY_AND_ASSIGN(Net);
};
Expand Down
15 changes: 12 additions & 3 deletions python/bn_convert_style.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
import os.path as osp
from argparse import ArgumentParser

pycaffe_dir = osp.dirname(__file__)
if osp.join(pycaffe_dir) not in sys.path:
sys.path.insert(0, pycaffe_dir)
import caffe


Expand All @@ -15,26 +18,32 @@ def main(args):
if name.endswith('_bn'):
if conversion == 'var_to_inv_std':
var = param[3].data
inv_std = 1. / np.sqrt(var + eps)
inv_std = np.power(var + eps, -0.5)
param[3].data[...] = inv_std
elif conversion == 'inv_std_to_var':
inv_std = param[3].data
var = np.power(inv_std, -2) - eps
param[3].data[...] = var
else:
raise ValueError("Unknown conversion type {}".format(conversion))
raise ValueError("Unknown conversion {}".format(conversion))
if args.output is None:
name, ext = osp.splitext(args.weights)
suffix = conversion.split('_to_')[-1]
args.output = name + '_' + suffix + ext
net.save(args.output)


if __name__ == '__main__':
parser = ArgumentParser(
description="This script converts between two styles of BN models. "
"Specifically, in history we have two version of BN implementation, one storing running variance"
"Specifically, in history we have two versions of BN "
"implementation---one storing running variance, and"
"the other storing running inverse std.")
parser.add_argument('model', help="The deploy prototxt")
parser.add_argument('weights', help="The caffemodel")
parser.add_argument('--output', '-o', help="Output caffemodel")
parser.add_argument('--conversion', type=str, default="inv_std_to_var",
choices=['inv_std_to_var', 'var_to_inv_std'],
help='can be "var_to_inv_std" or "inv_std_to_var"')
parser.add_argument('--epsilon', type=float, default=1e-5,
help='the epsilon in the inverse, default to 1e-5')
Expand Down
18 changes: 17 additions & 1 deletion python/caffe/_caffe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,19 @@
#define PyArray_SetBaseObject(arr, x) (PyArray_BASE(arr) = (x))
#endif

/* Fix to avoid registration warnings in pycaffe (#3960) */
#define BP_REGISTER_SHARED_PTR_TO_PYTHON(PTR) do { \
const boost::python::type_info info = \
boost::python::type_id<shared_ptr<PTR > >(); \
const boost::python::converter::registration* reg = \
boost::python::converter::registry::query(info); \
if (reg == NULL) { \
bp::register_ptr_to_python<shared_ptr<PTR > >(); \
} else if ((*reg).m_to_python == NULL) { \
bp::register_ptr_to_python<shared_ptr<PTR > >(); \
} \
} while (0)

namespace bp = boost::python;

namespace caffe {
Expand Down Expand Up @@ -227,6 +240,7 @@ BOOST_PYTHON_MODULE(_caffe) {
.def("_set_input_arrays", &Net_SetInputArrays,
bp::with_custodian_and_ward<1, 2, bp::with_custodian_and_ward<1, 3> >())
.def("save", &Net_Save);
BP_REGISTER_SHARED_PTR_TO_PYTHON(Net<Dtype>);

bp::class_<Blob<Dtype>, shared_ptr<Blob<Dtype> >, boost::noncopyable>(
"Blob", bp::no_init)
Expand All @@ -241,6 +255,7 @@ BOOST_PYTHON_MODULE(_caffe) {
NdarrayCallPolicies()))
.add_property("diff", bp::make_function(&Blob<Dtype>::mutable_cpu_diff,
NdarrayCallPolicies()));
BP_REGISTER_SHARED_PTR_TO_PYTHON(Blob<Dtype>);

bp::class_<Layer<Dtype>, shared_ptr<PythonLayer<Dtype> >,
boost::noncopyable>("Layer", bp::init<const LayerParameter&>())
Expand All @@ -249,7 +264,7 @@ BOOST_PYTHON_MODULE(_caffe) {
.def("setup", &Layer<Dtype>::LayerSetUp)
.def("reshape", &Layer<Dtype>::Reshape)
.add_property("type", bp::make_function(&Layer<Dtype>::type));
bp::register_ptr_to_python<shared_ptr<Layer<Dtype> > >();
BP_REGISTER_SHARED_PTR_TO_PYTHON(Layer<Dtype>);

bp::class_<LayerParameter>("LayerParameter", bp::no_init);

Expand All @@ -263,6 +278,7 @@ BOOST_PYTHON_MODULE(_caffe) {
&Solver<Dtype>::Solve), SolveOverloads())
.def("step", &Solver<Dtype>::Step)
.def("restore", &Solver<Dtype>::Restore);
BP_REGISTER_SHARED_PTR_TO_PYTHON(Solver<Dtype>);

bp::class_<SGDSolver<Dtype>, bp::bases<Solver<Dtype> >,
shared_ptr<SGDSolver<Dtype> >, boost::noncopyable>(
Expand Down
84 changes: 84 additions & 0 deletions python/convert_to_fully_conv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import numpy as np
import os
import os.path as osp
import sys
import google.protobuf as pb
from argparse import ArgumentParser

pycaffe_dir = osp.dirname(__file__)
if osp.join(pycaffe_dir) not in sys.path:
sys.path.insert(0, pycaffe_dir)
import caffe
from caffe.proto import caffe_pb2


def main(args):
caffe.set_mode_cpu()
fc_net = caffe.Net(args.model, args.weights, caffe.TEST)
# make fully conv prototxt
fc_proto = caffe_pb2.NetParameter()
with open(args.model, 'r') as f:
pb.text_format.Parse(f.read(), fc_proto)
layers = []
fc_to_conv_dic = {}
for layer in fc_proto.layer:
if layer.type != 'InnerProduct':
layers.append(layer)
continue
new_ = caffe_pb2.LayerParameter()
new_.name = layer.name + '_conv'
fc_to_conv_dic[layer.name] = new_.name
new_.type = 'Convolution'
new_.bottom.extend(layer.bottom)
new_.top.extend(layer.top)
new_.convolution_param.num_output = layer.inner_product_param.num_output
bottom_shape = fc_net.blobs[layer.bottom[0]].data.shape
if len(bottom_shape) == 4:
new_.convolution_param.kernel_h = bottom_shape[2]
new_.convolution_param.kernel_w = bottom_shape[3]
else:
new_.convolution_param.kernel_size = 1
layers.append(new_)
conv_proto = caffe_pb2.NetParameter()
conv_proto.CopyFrom(fc_proto)
del(conv_proto.layer[:])
conv_proto.layer.extend(layers)
if args.save_model is None:
name, ext = osp.splitext(args.model)
args.save_model = name + '_fully_conv' + ext
with open(args.save_model, 'w') as f:
f.write(pb.text_format.MessageToString(conv_proto))
# make fully conv parameters
conv_net = caffe.Net(args.save_model, args.weights, caffe.TEST)
for fc, conv in fc_to_conv_dic.iteritems():
conv_net.params[conv][0].data.flat = fc_net.params[fc][0].data.flat
conv_net.params[conv][1].data[...] = fc_net.params[fc][1].data
if args.save_weights is None:
name, ext = osp.splitext(args.weights)
args.save_weights = name + '_fully_conv' + ext
conv_net.save(args.save_weights)
print args.model, args.weights


if __name__ == '__main__':
parser = ArgumentParser(
description="Convert fully connected layers to convolution layers"
)
parser.add_argument(
'model',
help="Path to input deploy prototxt"
)
parser.add_argument(
'weights',
help="Path to input caffemodel"
)
parser.add_argument(
'--save_model',
help="Path to output deploy prototxt"
)
parser.add_argument(
'--save_weights',
help="Path to output caffemodel"
)
args = parser.parse_args()
main(args)
Loading

0 comments on commit 24d1538

Please sign in to comment.