From 268561c360a861abfd093f5930d931c9286b4e5e Mon Sep 17 00:00:00 2001 From: Zhirong Wu Date: Wed, 24 Sep 2014 03:04:00 -0400 Subject: [PATCH 1/6] local config changes --- examples/imagenet/create_imagenet.sh | 11 ++++++----- examples/imagenet/make_imagenet_mean.sh | 2 +- examples/imagenet/train_caffenet.sh | 1 + models/bvlc_reference_caffenet/train_val.prototxt | 4 ++-- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/examples/imagenet/create_imagenet.sh b/examples/imagenet/create_imagenet.sh index e912ac43cd7..fdf3cdde41d 100755 --- a/examples/imagenet/create_imagenet.sh +++ b/examples/imagenet/create_imagenet.sh @@ -6,12 +6,13 @@ EXAMPLE=examples/imagenet DATA=data/ilsvrc12 TOOLS=build/tools -TRAIN_DATA_ROOT=/path/to/imagenet/train/ -VAL_DATA_ROOT=/path/to/imagenet/val/ +DEST=/home/common/imagenet +TRAIN_DATA_ROOT=/home/common/imagenet/train/ +VAL_DATA_ROOT=/home/common/imagenet/val/ # Set RESIZE=true to resize the images to 256x256. Leave as false if images have # already been resized using another tool. -RESIZE=false +RESIZE=true if $RESIZE; then RESIZE_HEIGHT=256 RESIZE_WIDTH=256 @@ -42,7 +43,7 @@ GLOG_logtostderr=1 $TOOLS/convert_imageset \ --shuffle \ $TRAIN_DATA_ROOT \ $DATA/train.txt \ - $EXAMPLE/ilsvrc12_train_lmdb + $DEST/ilsvrc12_train_lmdb echo "Creating val lmdb..." @@ -52,6 +53,6 @@ GLOG_logtostderr=1 $TOOLS/convert_imageset \ --shuffle \ $VAL_DATA_ROOT \ $DATA/val.txt \ - $EXAMPLE/ilsvrc12_val_lmdb + $DEST/ilsvrc12_val_lmdb echo "Done." diff --git a/examples/imagenet/make_imagenet_mean.sh b/examples/imagenet/make_imagenet_mean.sh index d3d0c9af5d2..2ecd14ed719 100755 --- a/examples/imagenet/make_imagenet_mean.sh +++ b/examples/imagenet/make_imagenet_mean.sh @@ -2,7 +2,7 @@ # Compute the mean image from the imagenet training leveldb # N.B. this is available in data/ilsvrc12 -./build/tools/compute_image_mean examples/imagenet/ilsvrc12_train_leveldb \ +./build/tools/compute_image_mean /home/common/imagenet/ilsvrc12_train_lmdb \ data/ilsvrc12/imagenet_mean.binaryproto echo "Done." diff --git a/examples/imagenet/train_caffenet.sh b/examples/imagenet/train_caffenet.sh index 94558ec5466..f5f14341a29 100755 --- a/examples/imagenet/train_caffenet.sh +++ b/examples/imagenet/train_caffenet.sh @@ -2,3 +2,4 @@ ./build/tools/caffe train \ --solver=models/bvlc_reference_caffenet/solver.prototxt + --log_dir=. diff --git a/models/bvlc_reference_caffenet/train_val.prototxt b/models/bvlc_reference_caffenet/train_val.prototxt index 073d8aeff4a..3d9ed65c030 100644 --- a/models/bvlc_reference_caffenet/train_val.prototxt +++ b/models/bvlc_reference_caffenet/train_val.prototxt @@ -5,7 +5,7 @@ layers { top: "data" top: "label" data_param { - source: "examples/imagenet/ilsvrc12_train_lmdb" + source: "/home/common/imagenet/ilsvrc12_train_lmdb" backend: LMDB batch_size: 256 } @@ -22,7 +22,7 @@ layers { top: "data" top: "label" data_param { - source: "examples/imagenet/ilsvrc12_val_lmdb" + source: "/home/common/imagenet/ilsvrc12_val_lmdb" backend: LMDB batch_size: 50 } From 2397f3ac3ff8fcc372e67a876dcacae0af0c7ee7 Mon Sep 17 00:00:00 2001 From: zhirongw Date: Wed, 24 Sep 2014 15:08:49 +0800 Subject: [PATCH 2/6] googlenet --- models/googlenet/solver.prototxt | 14 + models/googlenet/train_val.prototxt | 2260 +++++++++++++++++++++++++++ 2 files changed, 2274 insertions(+) create mode 100644 models/googlenet/solver.prototxt create mode 100644 models/googlenet/train_val.prototxt diff --git a/models/googlenet/solver.prototxt b/models/googlenet/solver.prototxt new file mode 100644 index 00000000000..c20dcfe54e2 --- /dev/null +++ b/models/googlenet/solver.prototxt @@ -0,0 +1,14 @@ +net: "models/googlenet/train_val.prototxt" +test_iter: 1000 +test_interval: 1000 +base_lr: 0.01 +lr_policy: "step" +gamma: 0.1 +stepsize: 100000 +display: 20 +max_iter: 450000 +momentum: 0.9 +weight_decay: 0.0005 +snapshot: 10000 +snapshot_prefix: "models/googlenet/googlenet_train" +solver_mode: GPU diff --git a/models/googlenet/train_val.prototxt b/models/googlenet/train_val.prototxt new file mode 100644 index 00000000000..2a3f073f985 --- /dev/null +++ b/models/googlenet/train_val.prototxt @@ -0,0 +1,2260 @@ +name: "GoogLeNet" +layers { + name: "data" + type: DATA + top: "data" + top: "label" + data_param { + source: "examples/imagenet/ilsvrc12_train_lmdb" + backend: LMDB + batch_size: 256 + } + transform_param { + crop_size: 224 + mean_file: "data/ilsvrc12/imagenet_mean.binaryproto" + mirror: true + } + include: { phase: TRAIN } +} +layers { + name: "data" + type: DATA + top: "data" + top: "label" + data_param { + source: "examples/imagenet/ilsvrc12_val_lmdb" + backend: LMDB + batch_size: 50 + } + transform_param { + crop_size: 224 + mean_file: "data/ilsvrc12/imagenet_mean.binaryproto" + mirror: false + } + include: { phase: TEST } +} +layers { + name: "conv1" + type: CONVOLUTION + bottom: "data" + top: "conv1" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 64 + kernel_size: 7 + stride: 2 + pad: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu1" + type: RELU + bottom: "conv1" + top: "conv1" +} +layers { + name: "pool1" + type: POOLING + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + pad: 1 + } +} +layers { + name: "norm1" + type: LRN + bottom: "pool1" + top: "norm1" + lrn_param { + local_size: 5 + alpha: 0.0001 + beta: 0.75 + } +} +layers { + name: "reduction2" + type: CONVOLUTION + bottom: "norm1" + top: "reduction2" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 64 + pad: 0 + kernel_size: 1 + group: 2 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 1 + } + } +} +layers { + name: "relu_reduction2" + type: RELU + bottom: "reduction2" + top: "reduction2" +} +layers { + name: "conv2" + type: CONVOLUTION + bottom: "reduction2" + top: "conv2" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 192 + pad: 1 + kernel_size: 3 + group: 2 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 1 + } + } +} +layers { + name: "relu2" + type: RELU + bottom: "conv2" + top: "conv2" +} +layers { + name: "norm2" + type: LRN + bottom: "conv2" + top: "norm2" + lrn_param { + local_size: 5 + alpha: 0.0001 + beta: 0.75 + } +} +layers { + name: "pool2" + type: POOLING + bottom: "norm2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + pad: 1 + } +} +# Inception module 1 *************** +layers { + name: icp1_reduction1" + type: CONVOLUTION + bottom: "pool2" + top: "icp1_reduction1" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 96 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp1_reduction1" + type: RELU + bottom: "icp1_reduction1" + top: "icp1_reduction1" +} +layers { + name: "icp1_reduction2" + type: CONVOLUTION + bottom: "pool2" + top: "icp1_reduction2" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 16 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp1_reduction2" + type: RELU + bottom: "icp1_reduction2" + top: "icp1_reduction2" +} +layers { + name: "icp1_pool" + type: POOLING + bottom: "pool2" + top: "icp1_pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +# *********** +layers { + name: "icp1_out0" + type: CONVOLUTION + bottom: "pool2" + top: "icp1_out0" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 64 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp1_out0" + type: RELU + bottom: "icp1_out0" + top: "icp1_out0" +} +layers { + name: "icp1_out1" + type: CONVOLUTION + bottom: "icp1_reduction1" + top: "icp1_out1" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp1_out1" + type: RELU + bottom: "icp1_out1" + top: "icp1_out1" +} +layers { + name: "icp1_out2" + type: CONVOLUTION + bottom: "icp1_reduction2" + top: "icp1_out2" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 32 + pad: 2 + kernel_size: 5 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp1_out2" + type: RELU + bottom: "icp1_out2" + top: "icp1_out2" +} +layers { + name: "icp1_out3" + type: CONVOLUTION + bottom: "icp1_pool" + top: "icp1_out3" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 32 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp1_out3" + type: RELU + bottom: "icp1_out3" + top: "icp1_out3" +} +# Concat them together +layers { + name: "icp2_in" + type: CONCAT + bottom: "icp1_out0" + bottom: "icp1_out1" + bottom: "icp1_out2" + bottom: "icp1_out3" + top: "icp2_in" +} + +# Inception module 2 *************** +layers { + name: icp2_reduction1" + type: CONVOLUTION + bottom: "icp2_in" + top: "icp2_reduction1" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp2_reduction1" + type: RELU + bottom: "icp2_reduction1" + top: "icp2_reduction1" +} +layers { + name: "icp2_reduction2" + type: CONVOLUTION + bottom: "icp2_in" + top: "icp2_reduction2" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 32 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp2_reduction2" + type: RELU + bottom: "icp2_reduction2" + top: "icp2_reduction2" +} +layers { + name: "icp2_pool" + type: POOLING + bottom: "icp2_in" + top: "icp2_pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +# *********** +layers { + name: "icp2_out0" + type: CONVOLUTION + bottom: "icp2_in" + top: "icp2_out0" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp2_out0" + type: RELU + bottom: "icp2_out0" + top: "icp2_out0" +} +layers { + name: "icp2_out1" + type: CONVOLUTION + bottom: "icp2_reduction1" + top: "icp2_out1" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 192 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp2_out1" + type: RELU + bottom: "icp2_out1" + top: "icp2_out1" +} +layers { + name: "icp2_out2" + type: CONVOLUTION + bottom: "icp2_reduction2" + top: "icp2_out2" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 96 + pad: 2 + kernel_size: 5 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp2_out2" + type: RELU + bottom: "icp2_out2" + top: "icp2_out2" +} +layers { + name: "icp2_out3" + type: CONVOLUTION + bottom: "icp2_pool" + top: "icp2_out3" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 64 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp2_out3" + type: RELU + bottom: "icp2_out3" + top: "icp2_out3" +} +# Concat them together +layers { + name: "icp2_out" + type: CONCAT + bottom: "icp2_out0" + bottom: "icp2_out1" + bottom: "icp2_out2" + bottom: "icp2_out3" + top: "icp2_out" +} +layers { + name: "icp3_in" + type: POOLING + bottom: "icp2_out" + top: "icp3_in" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + pad: 1 + } +} + +# Inception module 3 *************** +layers { + name: icp3_reduction1" + type: CONVOLUTION + bottom: "icp3_in" + top: "icp3_reduction1" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 96 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp3_reduction1" + type: RELU + bottom: "icp3_reduction1" + top: "icp3_reduction1" +} +layers { + name: "icp3_reduction2" + type: CONVOLUTION + bottom: "icp3_in" + top: "icp3_reduction2" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 16 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp3_reduction2" + type: RELU + bottom: "icp3_reduction2" + top: "icp3_reduction2" +} +layers { + name: "icp3_pool" + type: POOLING + bottom: "icp3_in" + top: "icp3_pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +# *********** +layers { + name: "icp3_out0" + type: CONVOLUTION + bottom: "icp3_in" + top: "icp3_out0" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 192 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp3_out0" + type: RELU + bottom: "icp3_out0" + top: "icp3_out0" +} +layers { + name: "icp3_out1" + type: CONVOLUTION + bottom: "icp3_reduction1" + top: "icp3_out1" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 208 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp3_out1" + type: RELU + bottom: "icp3_out1" + top: "icp3_out1" +} +layers { + name: "icp3_out2" + type: CONVOLUTION + bottom: "icp3_reduction2" + top: "icp3_out2" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 48 + pad: 2 + kernel_size: 5 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp3_out2" + type: RELU + bottom: "icp3_out2" + top: "icp3_out2" +} +layers { + name: "icp3_out3" + type: CONVOLUTION + bottom: "icp3_pool" + top: "icp3_out3" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 64 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp3_out3" + type: RELU + bottom: "icp3_out3" + top: "icp3_out3" +} +# Concat them together +layers { + name: "icp3_out" + type: CONCAT + bottom: "icp3_out0" + bottom: "icp3_out1" + bottom: "icp3_out2" + bottom: "icp3_out3" + top: "icp3_out" +} + +# first classification branch ************ +layers { + name: "cls1_pool" + type: POOLING + bottom: "icp3_out" + top: "cls1_pool" + pooling_param { + pool: AVE + kernel_size: 5 + stride: 3 + pad: 0 + # this padding is somewhat special + } +} +layers { + name: "cls1_reduction" + type: CONVOLUTION + bottom: "cls1_pool" + top: "cls1_reduction" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_cls1_reduction" + type: RELU + bottom: "cls1_reduction" + top: "cls1_reduction" +} +layers { + name: "cls1_fc1" + type: INNER_PRODUCT + bottom: "cls1_reduction" + top: "cls1_fc1" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + inner_product_param { + num_output: 1024 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_cls1_fc1" + type: RELU + bottom: "cls1_fc1" + top: "cls1_fc1" +} +layers { + name: "cls1_drop" + type: DROPOUT + bottom: "cls1_fc1" + top: "cls1_fc1" + dropout_param { + dropout_ratio: 0.7 + } +} +layers { + name: "cls1_fc2" + type: INNER_PRODUCT + bottom: "cls1_fc1" + top: "cls1_fc2" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + inner_product_param { + num_output: 1000 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "loss1" + type: SOFTMAX_LOSS + bottom: "cls1_fc2" + bottom: "label" + top: "loss1" +} + +# Inception module 4 *************** +layers { + name: icp4_reduction1" + type: CONVOLUTION + bottom: "icp3_out" + top: "icp4_reduction1" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 112 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp4_reduction1" + type: RELU + bottom: "icp4_reduction1" + top: "icp4_reduction1" +} +layers { + name: "icp4_reduction2" + type: CONVOLUTION + bottom: "icp3_out" + top: "icp4_reduction2" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 24 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp4_reduction2" + type: RELU + bottom: "icp4_reduction2" + top: "icp4_reduction2" +} +layers { + name: "icp4_pool" + type: POOLING + bottom: "icp3_out" + top: "icp4_pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +# *********** +layers { + name: "icp4_out0" + type: CONVOLUTION + bottom: "icp3_out" + top: "icp4_out0" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 160 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp4_out0" + type: RELU + bottom: "icp4_out0" + top: "icp4_out0" +} +layers { + name: "icp4_out1" + type: CONVOLUTION + bottom: "icp4_reduction1" + top: "icp4_out1" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 224 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp4_out1" + type: RELU + bottom: "icp4_out1" + top: "icp4_out1" +} +layers { + name: "icp4_out2" + type: CONVOLUTION + bottom: "icp4_reduction2" + top: "icp4_out2" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 64 + pad: 2 + kernel_size: 5 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp4_out2" + type: RELU + bottom: "icp4_out2" + top: "icp4_out2" +} +layers { + name: "icp4_out3" + type: CONVOLUTION + bottom: "icp4_pool" + top: "icp4_out3" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 64 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp4_out3" + type: RELU + bottom: "icp4_out3" + top: "icp4_out3" +} +# Concat them together +layers { + name: "icp4_out" + type: CONCAT + bottom: "icp4_out0" + bottom: "icp4_out1" + bottom: "icp4_out2" + bottom: "icp4_out3" + top: "icp4_out" +} + +# Inception module 5 *************** +layers { + name: icp5_reduction1" + type: CONVOLUTION + bottom: "icp4_out" + top: "icp5_reduction1" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp5_reduction1" + type: RELU + bottom: "icp5_reduction1" + top: "icp5_reduction1" +} +layers { + name: "icp5_reduction2" + type: CONVOLUTION + bottom: "icp4_out" + top: "icp5_reduction2" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 24 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp5_reduction2" + type: RELU + bottom: "icp5_reduction2" + top: "icp5_reduction2" +} +layers { + name: "icp5_pool" + type: POOLING + bottom: "icp4_out" + top: "icp5_pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +# *********** +layers { + name: "icp5_out0" + type: CONVOLUTION + bottom: "icp4_out" + top: "icp5_out0" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp5_out0" + type: RELU + bottom: "icp5_out0" + top: "icp5_out0" +} +layers { + name: "icp5_out1" + type: CONVOLUTION + bottom: "icp5_reduction1" + top: "icp5_out1" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp5_out1" + type: RELU + bottom: "icp5_out1" + top: "icp5_out1" +} +layers { + name: "icp5_out2" + type: CONVOLUTION + bottom: "icp5_reduction2" + top: "icp5_out2" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 64 + pad: 2 + kernel_size: 5 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp5_out2" + type: RELU + bottom: "icp5_out2" + top: "icp5_out2" +} +layers { + name: "icp5_out3" + type: CONVOLUTION + bottom: "icp5_pool" + top: "icp5_out3" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 64 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp5_out3" + type: RELU + bottom: "icp5_out3" + top: "icp5_out3" +} +# Concat them together +layers { + name: "icp5_out" + type: CONCAT + bottom: "icp5_out0" + bottom: "icp5_out1" + bottom: "icp5_out2" + bottom: "icp5_out3" + top: "icp5_out" +} + +# Inception module 6 *************** +layers { + name: icp6_reduction1" + type: CONVOLUTION + bottom: "icp5_out" + top: "icp6_reduction1" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 144 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp6_reduction1" + type: RELU + bottom: "icp6_reduction1" + top: "icp6_reduction1" +} +layers { + name: "icp6_reduction2" + type: CONVOLUTION + bottom: "icp5_out" + top: "icp6_reduction2" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 32 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp6_reduction2" + type: RELU + bottom: "icp6_reduction2" + top: "icp6_reduction2" +} +layers { + name: "icp6_pool" + type: POOLING + bottom: "icp5_out" + top: "icp6_pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +# *********** +layers { + name: "icp6_out0" + type: CONVOLUTION + bottom: "icp5_out" + top: "icp6_out0" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 112 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp6_out0" + type: RELU + bottom: "icp6_out0" + top: "icp6_out0" +} +layers { + name: "icp6_out1" + type: CONVOLUTION + bottom: "icp6_reduction1" + top: "icp6_out1" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 288 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp6_out1" + type: RELU + bottom: "icp6_out1" + top: "icp6_out1" +} +layers { + name: "icp6_out2" + type: CONVOLUTION + bottom: "icp6_reduction2" + top: "icp6_out2" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 64 + pad: 2 + kernel_size: 5 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp6_out2" + type: RELU + bottom: "icp6_out2" + top: "icp6_out2" +} +layers { + name: "icp6_out3" + type: CONVOLUTION + bottom: "icp6_pool" + top: "icp6_out3" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 64 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp6_out3" + type: RELU + bottom: "icp6_out3" + top: "icp6_out3" +} +# Concat them together +layers { + name: "icp6_out" + type: CONCAT + bottom: "icp6_out0" + bottom: "icp6_out1" + bottom: "icp6_out2" + bottom: "icp6_out3" + top: "icp6_out" +} + +# second classification branch ************ +layers { + name: "cls2_pool" + type: POOLING + bottom: "icp6_out" + top: "cls2_pool" + pooling_param { + pool: MAX + kernel_size: 5 + stride: 3 + pad: 0 + # this padding is somewhat special + } +} +layers { + name: "cls2_reduction" + type: CONVOLUTION + bottom: "cls2_pool" + top: "cls2_reduction" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_cls2_reduction" + type: RELU + bottom: "cls2_reduction" + top: "cls2_reduction" +} +layers { + name: "cls2_fc1" + type: INNER_PRODUCT + bottom: "cls2_reduction" + top: "cls2_fc1" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + inner_product_param { + num_output: 1024 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_cls2_fc1" + type: RELU + bottom: "cls2_fc1" + top: "cls2_fc1" +} +layers { + name: "cls2_drop" + type: DROPOUT + bottom: "cls2_fc1" + top: "cls2_fc1" + dropout_param { + dropout_ratio: 0.7 + } +} +layers { + name: "cls2_fc2" + type: INNER_PRODUCT + bottom: "cls2_fc1" + top: "cls2_fc2" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + inner_product_param { + num_output: 1000 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "loss2" + type: SOFTMAX_LOSS + bottom: "cls2_fc2" + bottom: "label" + top: "loss2" +} + +# Inception module 7 *************** +layers { + name: icp7_reduction1" + type: CONVOLUTION + bottom: "icp6_out" + top: "icp7_reduction1" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 160 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp7_reduction1" + type: RELU + bottom: "icp7_reduction1" + top: "icp7_reduction1" +} +layers { + name: "icp7_reduction2" + type: CONVOLUTION + bottom: "icp6_out" + top: "icp7_reduction2" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 32 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp7_reduction2" + type: RELU + bottom: "icp2_icp7_reduction2" + top: "icp2_icp7_reduction2" +} +layers { + name: "icp7_pool" + type: POOLING + bottom: "icp6_out" + top: "icp7_pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +# *********** +layers { + name: "icp7_out0" + type: CONVOLUTION + bottom: "icp6_out" + top: "icp7_out0" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 256 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp7_out0" + type: RELU + bottom: "icp7_out0" + top: "icp7_out0" +} +layers { + name: "icp7_out1" + type: CONVOLUTION + bottom: "icp7_reduction1" + top: "icp7_out1" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 320 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp7_out1" + type: RELU + bottom: "icp7_out1" + top: "icp7_out1" +} +layers { + name: "icp7_out2" + type: CONVOLUTION + bottom: "icp7_reduction2" + top: "icp7_out2" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 128 + pad: 2 + kernel_size: 5 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp7_out2" + type: RELU + bottom: "icp7_out2" + top: "icp7_out2" +} +layers { + name: "icp7_out3" + type: CONVOLUTION + bottom: "icp7_pool" + top: "icp7_out3" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp7_out3" + type: RELU + bottom: "icp7_out3" + top: "icp7_out3" +} +# Concat them together +layers { + name: "icp7_out" + type: CONCAT + bottom: "icp7_out0" + bottom: "icp7_out1" + bottom: "icp7_out2" + bottom: "icp7_out3" + top: "icp7_out" +} +layers { + name: "icp8_in" + type: POOLING + bottom: "icp7_out" + top: "icp8_in" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + pad: 1 + } +} +# Inception module 8 *************** +layers { + name: icp8_reduction1" + type: CONVOLUTION + bottom: "icp8_in" + top: "icp8_reduction1" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 160 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp8_reduction1" + type: RELU + bottom: "icp8_reduction1" + top: "icp8_reduction1" +} +layers { + name: "icp8_reduction2" + type: CONVOLUTION + bottom: "icp8_in" + top: "icp8_reduction2" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 32 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp8_reduction2" + type: RELU + bottom: "icp8_reduction2" + top: "icp8_reduction2" +} +layers { + name: "icp8_pool" + type: POOLING + bottom: "icp8_in" + top: "icp8_pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +# *********** +layers { + name: "icp8_out0" + type: CONVOLUTION + bottom: "icp8_in" + top: "icp8_out0" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 256 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp8_out0" + type: RELU + bottom: "icp8_out0" + top: "icp8_out0" +} +layers { + name: "icp8_out1" + type: CONVOLUTION + bottom: "icp8_reduction1" + top: "icp8_out1" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 320 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp8_out1" + type: RELU + bottom: "icp8_out1" + top: "icp8_out1" +} +layers { + name: "icp8_out2" + type: CONVOLUTION + bottom: "icp8_reduction2" + top: "icp8_out2" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 128 + pad: 2 + kernel_size: 5 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp8_out2" + type: RELU + bottom: "icp8_out2" + top: "icp8_out2" +} +layers { + name: "icp8_out3" + type: CONVOLUTION + bottom: "icp8_pool" + top: "icp8_out3" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp8_out3" + type: RELU + bottom: "icp8_out3" + top: "icp8_out3" +} +# Concat them together +layers { + name: "icp8_out" + type: CONCAT + bottom: "icp8_out0" + bottom: "icp8_out1" + bottom: "icp8_out2" + bottom: "icp8_out3" + top: "icp8_out" +} + +# Inception module 9 *************** +layers { + name: icp9_reduction1" + type: CONVOLUTION + bottom: "icp8_out" + top: "icp9_reduction1" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 192 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp9_reduction1" + type: RELU + bottom: "icp9_reduction1" + top: "icp9_reduction1" +} +layers { + name: "icp9_reduction2" + type: CONVOLUTION + bottom: "icp8_out" + top: "icp9_reduction2" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 48 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp9_reduction2" + type: RELU + bottom: "icp9_reduction2" + top: "icp9_reduction2" +} +layers { + name: "icp9_pool" + type: POOLING + bottom: "icp8_out" + top: "icp9_pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +# *********** +layers { + name: "icp9_out0" + type: CONVOLUTION + bottom: "icp8_out" + top: "icp9_out0" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 384 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp9_out0" + type: RELU + bottom: "icp9_out0" + top: "icp9_out0" +} +layers { + name: "icp9_out1" + type: CONVOLUTION + bottom: "icp9_reduction1" + top: "icp9_out1" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 384 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp9_out1" + type: RELU + bottom: "icp9_out1" + top: "icp9_out1" +} +layers { + name: "icp9_out2" + type: CONVOLUTION + bottom: "icp9_reduction2" + top: "icp9_out2" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 128 + pad: 2 + kernel_size: 5 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp9_out2" + type: RELU + bottom: "icp9_out2" + top: "icp9_out2" +} +layers { + name: "icp9_out3" + type: CONVOLUTION + bottom: "icp9_pool" + top: "icp9_out3" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu_icp9_out3" + type: RELU + bottom: "icp9_out3" + top: "icp9_out3" +} +# Concat them together +layers { + name: "icp9_out" + type: CONCAT + bottom: "icp9_out0" + bottom: "icp9_out1" + bottom: "icp9_out2" + bottom: "icp9_out3" + top: "icp9_out" +} + +# third classification branch +layers { + name: "cls3_pool" + type: POOLING + bottom: "icp9_out" + top: "cls3_pool" + pooling_param { + pool: MAX + kernel_size: 7 + stride: 1 + pad: 0 + # This padding is somewhat special + } +} +layers { + name: "cls3_drop" + type: DROPOUT + bottom: "cls3_pool" + top: "cls3_pool" + dropout_param { + dropout_ratio: 0.4 + } +} +layers { + name: "cls3_fc" + type: INNER_PRODUCT + bottom: "cls3_pool" + top: "cls3_fc" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + inner_product_param { + num_output: 1000 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "loss3" + type: SOFTMAX_LOSS + bottom: "cls3_fc" + bottom: "label" + top: "loss3" +} +layers { + name: "accuracy" + type: ACCURACY + bottom: "cls3_fc" + bottom: "label" + top: "accuracy" + include: { phase: TEST } +} From 9030a32853733b41d9428f6e36004fae12ee0a85 Mon Sep 17 00:00:00 2001 From: zhirongw Date: Wed, 24 Sep 2014 07:47:36 -0400 Subject: [PATCH 3/6] googlenet prototype --- examples/imagenet/train_googlenet.sh | 5 +++ .../bvlc_reference_caffenet/solver.prototxt | 1 + models/googlenet/solver.prototxt | 2 + models/googlenet/train_val.prototxt | 38 ++++++++++--------- src/caffe/layers/cudnn_pooling_layer.cu | 4 +- 5 files changed, 30 insertions(+), 20 deletions(-) create mode 100755 examples/imagenet/train_googlenet.sh diff --git a/examples/imagenet/train_googlenet.sh b/examples/imagenet/train_googlenet.sh new file mode 100755 index 00000000000..ae380381a60 --- /dev/null +++ b/examples/imagenet/train_googlenet.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env sh + +GOOGLE_LOG_DIR=models/googlenet \ +./build/tools/caffe train \ + --solver=models/googlenet/solver.prototxt diff --git a/models/bvlc_reference_caffenet/solver.prototxt b/models/bvlc_reference_caffenet/solver.prototxt index af1315ba2ac..91c725ba3e7 100644 --- a/models/bvlc_reference_caffenet/solver.prototxt +++ b/models/bvlc_reference_caffenet/solver.prototxt @@ -12,3 +12,4 @@ weight_decay: 0.0005 snapshot: 10000 snapshot_prefix: "models/bvlc_reference_caffenet/caffenet_train" solver_mode: GPU +device_id: 2 diff --git a/models/googlenet/solver.prototxt b/models/googlenet/solver.prototxt index c20dcfe54e2..ca182132580 100644 --- a/models/googlenet/solver.prototxt +++ b/models/googlenet/solver.prototxt @@ -12,3 +12,5 @@ weight_decay: 0.0005 snapshot: 10000 snapshot_prefix: "models/googlenet/googlenet_train" solver_mode: GPU +device_id: 3 +test_initialization: true diff --git a/models/googlenet/train_val.prototxt b/models/googlenet/train_val.prototxt index 2a3f073f985..c84ff77b5af 100644 --- a/models/googlenet/train_val.prototxt +++ b/models/googlenet/train_val.prototxt @@ -5,9 +5,9 @@ layers { top: "data" top: "label" data_param { - source: "examples/imagenet/ilsvrc12_train_lmdb" + source: "/home/common/imagenet/ilsvrc12_train_lmdb" backend: LMDB - batch_size: 256 + batch_size: 128 } transform_param { crop_size: 224 @@ -22,7 +22,7 @@ layers { top: "data" top: "label" data_param { - source: "examples/imagenet/ilsvrc12_val_lmdb" + source: "/home/common/imagenet/ilsvrc12_val_lmdb" backend: LMDB batch_size: 50 } @@ -72,7 +72,7 @@ layers { pool: MAX kernel_size: 3 stride: 2 - pad: 1 + pad: 0 } } layers { @@ -166,12 +166,12 @@ layers { pool: MAX kernel_size: 3 stride: 2 - pad: 1 + pad: 0 } } # Inception module 1 *************** layers { - name: icp1_reduction1" + name: "icp1_reduction1" type: CONVOLUTION bottom: "pool2" top: "icp1_reduction1" @@ -370,7 +370,7 @@ layers { # Inception module 2 *************** layers { - name: icp2_reduction1" + name: "icp2_reduction1" type: CONVOLUTION bottom: "icp2_in" top: "icp2_reduction1" @@ -575,13 +575,13 @@ layers { pool: MAX kernel_size: 3 stride: 2 - pad: 1 + pad: 0 } } # Inception module 3 *************** layers { - name: icp3_reduction1" + name: "icp3_reduction1" type: CONVOLUTION bottom: "icp3_in" top: "icp3_reduction1" @@ -884,11 +884,12 @@ layers { bottom: "cls1_fc2" bottom: "label" top: "loss1" + loss_weight: 0.3 } # Inception module 4 *************** layers { - name: icp4_reduction1" + name: "icp4_reduction1" type: CONVOLUTION bottom: "icp3_out" top: "icp4_reduction1" @@ -1087,7 +1088,7 @@ layers { # Inception module 5 *************** layers { - name: icp5_reduction1" + name: "icp5_reduction1" type: CONVOLUTION bottom: "icp4_out" top: "icp5_reduction1" @@ -1286,7 +1287,7 @@ layers { # Inception module 6 *************** layers { - name: icp6_reduction1" + name: "icp6_reduction1" type: CONVOLUTION bottom: "icp5_out" top: "icp6_reduction1" @@ -1589,11 +1590,12 @@ layers { bottom: "cls2_fc2" bottom: "label" top: "loss2" + loss_weight: 0.3 } # Inception module 7 *************** layers { - name: icp7_reduction1" + name: "icp7_reduction1" type: CONVOLUTION bottom: "icp6_out" top: "icp7_reduction1" @@ -1647,8 +1649,8 @@ layers { layers { name: "relu_icp7_reduction2" type: RELU - bottom: "icp2_icp7_reduction2" - top: "icp2_icp7_reduction2" + bottom: "icp7_reduction2" + top: "icp7_reduction2" } layers { name: "icp7_pool" @@ -1798,12 +1800,12 @@ layers { pool: MAX kernel_size: 3 stride: 2 - pad: 1 + pad: 0 } } # Inception module 8 *************** layers { - name: icp8_reduction1" + name: "icp8_reduction1" type: CONVOLUTION bottom: "icp8_in" top: "icp8_reduction1" @@ -2002,7 +2004,7 @@ layers { # Inception module 9 *************** layers { - name: icp9_reduction1" + name: "icp9_reduction1" type: CONVOLUTION bottom: "icp8_out" top: "icp9_reduction1" diff --git a/src/caffe/layers/cudnn_pooling_layer.cu b/src/caffe/layers/cudnn_pooling_layer.cu index 99c409dcc96..0e9c1a39398 100644 --- a/src/caffe/layers/cudnn_pooling_layer.cu +++ b/src/caffe/layers/cudnn_pooling_layer.cu @@ -14,7 +14,7 @@ void CuDNNPoolingLayer::Forward_gpu(const vector*>& bottom, vector*>* top) { // Fallback to Caffe for padded pooling, max top mask. if ((this->pad_h_ > 0 || this->pad_w_ > 0) || (*top).size() > 1) { - LOG(WARNING) << "Falling back to standard Caffe for padded pooling."; + //LOG(WARNING) << "Falling back to standard Caffe for padded pooling."; return PoolingLayer::Forward_gpu(bottom, top); } @@ -33,7 +33,7 @@ void CuDNNPoolingLayer::Backward_gpu(const vector*>& top, // Fallback to Caffe for padded pooling, max top mask. if ((this->pad_h_ > 0 || this->pad_w_ > 0) || top.size() > 1) { - LOG(WARNING) << "Falling back to standard Caffe for padded pooling."; + // LOG(WARNING) << "Falling back to standard Caffe for padded pooling."; return PoolingLayer::Backward_gpu(top, propagate_down, bottom); } From e9dce2d395a842ca4937a5d96cdfa71197c8de40 Mon Sep 17 00:00:00 2001 From: zhirongw Date: Fri, 26 Sep 2014 07:38:38 -0400 Subject: [PATCH 4/6] just modified model parameters, no core code touched --- examples/imagenet/train_caffenet.sh | 5 +++-- examples/imagenet/train_googlenet.sh | 3 ++- examples/imagenet/train_googlenet_gpus.sh | 6 ++++++ examples/imagenet/train_vgg.sh | 6 ++++++ models/bvlc_reference_caffenet/solver.prototxt | 5 ++--- models/bvlc_reference_caffenet/train_val.prototxt | 2 +- models/googlenet/solver.prototxt | 5 ++--- models/googlenet/train_val.prototxt | 4 ++-- 8 files changed, 24 insertions(+), 12 deletions(-) create mode 100755 examples/imagenet/train_googlenet_gpus.sh create mode 100755 examples/imagenet/train_vgg.sh diff --git a/examples/imagenet/train_caffenet.sh b/examples/imagenet/train_caffenet.sh index f5f14341a29..1ad46600ddd 100755 --- a/examples/imagenet/train_caffenet.sh +++ b/examples/imagenet/train_caffenet.sh @@ -1,5 +1,6 @@ #!/usr/bin/env sh +GOOGLE_LOG_DIR="models/bvlc_reference_caffenet" \ ./build/tools/caffe train \ - --solver=models/bvlc_reference_caffenet/solver.prototxt - --log_dir=. + --solver=models/bvlc_reference_caffenet/solver.prototxt \ + --gpu=1 diff --git a/examples/imagenet/train_googlenet.sh b/examples/imagenet/train_googlenet.sh index ae380381a60..64b5486bd8a 100755 --- a/examples/imagenet/train_googlenet.sh +++ b/examples/imagenet/train_googlenet.sh @@ -2,4 +2,5 @@ GOOGLE_LOG_DIR=models/googlenet \ ./build/tools/caffe train \ - --solver=models/googlenet/solver.prototxt + --solver=models/googlenet/solver.prototxt \ + --gpu=2 diff --git a/examples/imagenet/train_googlenet_gpus.sh b/examples/imagenet/train_googlenet_gpus.sh new file mode 100755 index 00000000000..71b42593874 --- /dev/null +++ b/examples/imagenet/train_googlenet_gpus.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env sh + +GLOG_alsologtostderr=1 \ +GOOGLE_LOG_DIR=models/googlenet \ +./build/examples/parallel/gpus.bin \ + models/googlenet/solver.prototxt 1:2 diff --git a/examples/imagenet/train_vgg.sh b/examples/imagenet/train_vgg.sh new file mode 100755 index 00000000000..a9673581895 --- /dev/null +++ b/examples/imagenet/train_vgg.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env sh + +GOOGLE_LOG_DIR=models/vgg \ +./build/tools/caffe train \ + --solver=models/vgg/solver.prototxt \ + --gpu=3 diff --git a/models/bvlc_reference_caffenet/solver.prototxt b/models/bvlc_reference_caffenet/solver.prototxt index 91c725ba3e7..add67f5a1d5 100644 --- a/models/bvlc_reference_caffenet/solver.prototxt +++ b/models/bvlc_reference_caffenet/solver.prototxt @@ -1,15 +1,14 @@ net: "models/bvlc_reference_caffenet/train_val.prototxt" test_iter: 1000 test_interval: 1000 -base_lr: 0.01 +base_lr: 0.004 lr_policy: "step" gamma: 0.1 stepsize: 100000 display: 20 max_iter: 450000 -momentum: 0.9 +momentum: 0.95 weight_decay: 0.0005 snapshot: 10000 snapshot_prefix: "models/bvlc_reference_caffenet/caffenet_train" solver_mode: GPU -device_id: 2 diff --git a/models/bvlc_reference_caffenet/train_val.prototxt b/models/bvlc_reference_caffenet/train_val.prototxt index 3d9ed65c030..ea905e30ff0 100644 --- a/models/bvlc_reference_caffenet/train_val.prototxt +++ b/models/bvlc_reference_caffenet/train_val.prototxt @@ -7,7 +7,7 @@ layers { data_param { source: "/home/common/imagenet/ilsvrc12_train_lmdb" backend: LMDB - batch_size: 256 + batch_size: 128 } transform_param { crop_size: 227 diff --git a/models/googlenet/solver.prototxt b/models/googlenet/solver.prototxt index ca182132580..c47e6110c55 100644 --- a/models/googlenet/solver.prototxt +++ b/models/googlenet/solver.prototxt @@ -1,16 +1,15 @@ net: "models/googlenet/train_val.prototxt" test_iter: 1000 test_interval: 1000 -base_lr: 0.01 +base_lr: 0.001 lr_policy: "step" gamma: 0.1 stepsize: 100000 display: 20 max_iter: 450000 -momentum: 0.9 +momentum: 0.975 weight_decay: 0.0005 snapshot: 10000 snapshot_prefix: "models/googlenet/googlenet_train" solver_mode: GPU -device_id: 3 test_initialization: true diff --git a/models/googlenet/train_val.prototxt b/models/googlenet/train_val.prototxt index c84ff77b5af..4ec1435e09a 100644 --- a/models/googlenet/train_val.prototxt +++ b/models/googlenet/train_val.prototxt @@ -106,7 +106,7 @@ layers { } bias_filler { type: "constant" - value: 1 + value: 0 } } } @@ -136,7 +136,7 @@ layers { } bias_filler { type: "constant" - value: 1 + value: 0 } } } From 345d85d0ed46d45a0be84bd62626d9d77849a02e Mon Sep 17 00:00:00 2001 From: zhirongw Date: Fri, 26 Sep 2014 11:49:38 -0400 Subject: [PATCH 5/6] add gradient accumulation for allowing bigger batch size --- include/caffe/blob.hpp | 9 ++++ include/caffe/common.hpp | 6 ++- include/caffe/net.hpp | 3 ++ src/caffe/blob.cpp | 101 ++++++++++++++++++++++++++++++++++++ src/caffe/net.cpp | 13 +++++ src/caffe/proto/caffe.proto | 2 + src/caffe/solver.cpp | 21 +++++++- 7 files changed, 153 insertions(+), 2 deletions(-) diff --git a/include/caffe/blob.hpp b/include/caffe/blob.hpp index ef10aea53f0..0c0b0c1a2ac 100644 --- a/include/caffe/blob.hpp +++ b/include/caffe/blob.hpp @@ -94,10 +94,14 @@ class Blob { const Dtype* gpu_data() const; const Dtype* cpu_diff() const; const Dtype* gpu_diff() const; + const Dtype* cpu_acum_diff() const; + const Dtype* gpu_acum_diff() const; Dtype* mutable_cpu_data(); Dtype* mutable_gpu_data(); Dtype* mutable_cpu_diff(); Dtype* mutable_gpu_diff(); + Dtype* mutable_gpu_acum_diff(); + Dtype* mutable_cpu_acum_diff(); void Update(); void FromProto(const BlobProto& proto); void ToProto(BlobProto* proto, bool write_diff = false) const; @@ -107,6 +111,10 @@ class Blob { /// @brief Compute the sum of absolute values (L1 norm) of the diff. Dtype asum_diff() const; + // added for allowing bigger batch_size + void AccumulateDiff(); + void UpdateDiff(); + /** * @brief Set the data_ shared_ptr to point to the SyncedMemory holding the * data_ of Blob other -- useful in Layer&s which simply perform a copy @@ -129,6 +137,7 @@ class Blob { protected: shared_ptr data_; shared_ptr diff_; + shared_ptr acum_diff_; int num_; int channels_; int height_; diff --git a/include/caffe/common.hpp b/include/caffe/common.hpp index 9c6eb4d6834..b61e919801e 100644 --- a/include/caffe/common.hpp +++ b/include/caffe/common.hpp @@ -127,6 +127,9 @@ class Caffe { static void SetDevice(const int device_id); // Prints the current GPU status. static void DeviceQuery(); + // added for allowing bigger batch size + inline static void set_accumulate(bool acum) { Get().accumulate_ = acum; } + inline static bool accumulate() { return Get().accumulate_; } protected: #ifndef CPU_ONLY @@ -134,7 +137,8 @@ class Caffe { curandGenerator_t curand_generator_; #endif shared_ptr random_generator_; - + // added for allowing bigger batch size + bool accumulate_; Brew mode_; Phase phase_; static shared_ptr singleton_; diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp index 1d06dc45533..1d70ea632d3 100644 --- a/include/caffe/net.hpp +++ b/include/caffe/net.hpp @@ -85,6 +85,9 @@ class Net { /// @brief Updates the network weights based on the diff values computed. void Update(); + // added for allowing large batch size + void AccumulateDiff(); + void UpdateDiff(); /** * @brief For an already initialized net, implicitly copies (i.e., using no * additional memory) the pre-trained layers from another Net. diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp index cfffc379eb1..03742d077ab 100644 --- a/src/caffe/blob.cpp +++ b/src/caffe/blob.cpp @@ -21,6 +21,8 @@ void Blob::Reshape(const int num, const int channels, const int height, capacity_ = count_; data_.reset(new SyncedMemory(capacity_ * sizeof(Dtype))); diff_.reset(new SyncedMemory(capacity_ * sizeof(Dtype))); + if (Caffe::accumulate()) + acum_diff_.reset(new SyncedMemory(capacity_ * sizeof(Dtype))); } } @@ -61,12 +63,24 @@ const Dtype* Blob::cpu_diff() const { return (const Dtype*)diff_->cpu_data(); } +template +const Dtype* Blob::cpu_acum_diff() const{ + CHECK(acum_diff_); + return (const Dtype*)acum_diff_->cpu_data(); +} + template const Dtype* Blob::gpu_diff() const { CHECK(diff_); return (const Dtype*)diff_->gpu_data(); } +template +const Dtype* Blob::gpu_acum_diff() const{ + CHECK(acum_diff_); + return (const Dtype*)acum_diff_->gpu_data(); +} + template Dtype* Blob::mutable_cpu_data() { CHECK(data_); @@ -85,12 +99,24 @@ Dtype* Blob::mutable_cpu_diff() { return static_cast(diff_->mutable_cpu_data()); } +template +Dtype* Blob::mutable_cpu_acum_diff(){ + CHECK(acum_diff_); + return static_cast(acum_diff_->mutable_cpu_data()); +} + template Dtype* Blob::mutable_gpu_diff() { CHECK(diff_); return static_cast(diff_->mutable_gpu_data()); } +template +Dtype* Blob::mutable_gpu_acum_diff(){ + CHECK(acum_diff_); + return static_cast(acum_diff_->mutable_gpu_data()); +} + template void Blob::ShareData(const Blob& other) { CHECK_EQ(count_, other.count()); @@ -135,6 +161,81 @@ void Blob::Update() { } } +// added for allowing bigger batch_size +template <> void Blob::AccumulateDiff(){ + NOT_IMPLEMENTED; + return; +} + +template <> void Blob::AccumulateDiff(){ + NOT_IMPLEMENTED; + return; +} + +template +void Blob::AccumulateDiff(){ + switch (data_->head()){ + case SyncedMemory::HEAD_AT_CPU: + // perform computation on CPU + caffe_axpy(count_, Dtype(1.0), + static_cast(diff_->cpu_data()), + static_cast(acum_diff_->mutable_cpu_data())); + break; + case SyncedMemory::HEAD_AT_GPU: + case SyncedMemory::SYNCED: +#ifndef CPU_ONLY + // perform computation on GPU + caffe_gpu_axpy(count_, Dtype(1.0), + static_cast(diff_->gpu_data()), + static_cast(acum_diff_->mutable_gpu_data())); +#else + NO_GPU; +#endif + break; + default: + LOG(FATAL) << "Syncedmem not initialized."; + } +} + +template <> void Blob::UpdateDiff(){ + NOT_IMPLEMENTED; + return; +} + +template <> void Blob::UpdateDiff(){ + NOT_IMPLEMENTED; + return; +} + +template +void Blob::UpdateDiff(){ + switch (data_->head()){ + case SyncedMemory::HEAD_AT_CPU: + // perform computation on CPU + caffe_axpy(count_, Dtype(1.0), + static_cast(acum_diff_->cpu_data()), + static_cast(diff_->mutable_cpu_data())); + caffe_memset(sizeof(Dtype)*count_, 0, + acum_diff_->mutable_cpu_data()); + break; + case SyncedMemory::HEAD_AT_GPU: + case SyncedMemory::SYNCED: +#ifndef CPU_ONLY + // perform computation on GPU + caffe_gpu_axpy(count_, Dtype(1.0), + static_cast(acum_diff_->gpu_data()), + static_cast(diff_->mutable_gpu_data())); + caffe_gpu_memset(sizeof(Dtype)*count_, 0, + acum_diff_->mutable_gpu_data()); +#else + NO_GPU; +#endif + break; + default: + LOG(FATAL) << "Syncedmem not initialized."; + } +} + template <> unsigned int Blob::asum_data() const { NOT_IMPLEMENTED; return 0; diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index 6f4a651fb10..6adaa3c759b 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -783,6 +783,19 @@ void Net::Update() { } } +// added for allowing bigger batch size +template +void Net::AccumulateDiff(){ + for (int i = 0; i < params_.size(); ++i) + params_[i]->AccumulateDiff(); +} + +template +void Net::UpdateDiff(){ + for (int i = 0; i < params_.size(); ++i) + params_[i]->UpdateDiff(); +} + template bool Net::has_blob(const string& blob_name) { return blob_names_index_.find(blob_name) != blob_names_index_.end(); diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 9395c38f3e9..188fd8404cd 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -140,6 +140,8 @@ message SolverParameter { // random number generator -- useful for reproducible results. Otherwise, // (and by default) initialize using a seed derived from the system clock. optional int64 random_seed = 20 [default = -1]; + // added to allow big batch_size + optional int32 update_interval = 33 [default = 1]; // Solver type enum SolverType { diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 0ea4edcf9b8..84fa08e95c3 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -35,6 +35,11 @@ void Solver::Init(const SolverParameter& param) { if (param_.random_seed() >= 0) { Caffe::set_random_seed(param_.random_seed()); } + // added for allowing bigger batch size + if (!param_.has_update_interval() || param_.update_interval() == 1) + Caffe::set_accumulate(false); + else + Caffe::set_accumulate(true); // Scaffolding code InitTrainNet(); InitTestNets(); @@ -186,7 +191,21 @@ void Solver::Solve(const char* resume_file) { const bool display = param_.display() && iter_ % param_.display() == 0; net_->set_debug_info(display && param_.debug_info()); - Dtype loss = net_->ForwardBackward(bottom_vec); + + // added for allowing bigger batch size + Dtype loss = 0; + if ( !Caffe::accumulate() ) + loss = net_->ForwardBackward(bottom_vec); + else{ + for (int acum_num = 0; acum_num < param_.update_interval() - 1; ++acum_num){ + loss += net_->ForwardBackward(bottom_vec); + net_->AccumulateDiff(); + } + loss += net_->ForwardBackward(bottom_vec); + net_->UpdateDiff(); + loss /= Dtype(param_.update_interval()); + } + if (display) { LOG(INFO) << "Iteration " << iter_ << ", loss = " << loss; const vector*>& result = net_->output_blobs(); From 069c0aa4038973fa0cca00ee79bdbd995be5da0d Mon Sep 17 00:00:00 2001 From: zhirongw Date: Sun, 28 Sep 2014 11:07:10 -0400 Subject: [PATCH 6/6] 1. add debug diplay freq 2. add vgg and change googlenet init --- examples/imagenet/train_caffenet.sh | 4 +- examples/imagenet/train_googlenet.sh | 2 +- examples/imagenet/train_vgg.sh | 2 +- .../bvlc_reference_caffenet/solver.prototxt | 3 + models/googlenet/solver.prototxt | 13 +- models/googlenet/train_val.prototxt | 122 ++--- models/vgg/solver.prototxt | 18 + models/vgg/train_val.prototxt | 429 ++++++++++++++++++ src/caffe/proto/caffe.proto | 1 + src/caffe/solver.cpp | 3 +- 10 files changed, 526 insertions(+), 71 deletions(-) create mode 100644 models/vgg/solver.prototxt create mode 100644 models/vgg/train_val.prototxt diff --git a/examples/imagenet/train_caffenet.sh b/examples/imagenet/train_caffenet.sh index 1ad46600ddd..80c93fb90e6 100755 --- a/examples/imagenet/train_caffenet.sh +++ b/examples/imagenet/train_caffenet.sh @@ -1,6 +1,6 @@ #!/usr/bin/env sh -GOOGLE_LOG_DIR="models/bvlc_reference_caffenet" \ +GOOGLE_LOG_DIR=models/bvlc_reference_caffenet \ ./build/tools/caffe train \ --solver=models/bvlc_reference_caffenet/solver.prototxt \ - --gpu=1 + --gpu=2 diff --git a/examples/imagenet/train_googlenet.sh b/examples/imagenet/train_googlenet.sh index 64b5486bd8a..cb1148b8d29 100755 --- a/examples/imagenet/train_googlenet.sh +++ b/examples/imagenet/train_googlenet.sh @@ -3,4 +3,4 @@ GOOGLE_LOG_DIR=models/googlenet \ ./build/tools/caffe train \ --solver=models/googlenet/solver.prototxt \ - --gpu=2 + --gpu=3 diff --git a/examples/imagenet/train_vgg.sh b/examples/imagenet/train_vgg.sh index a9673581895..31dc19b1bf2 100755 --- a/examples/imagenet/train_vgg.sh +++ b/examples/imagenet/train_vgg.sh @@ -3,4 +3,4 @@ GOOGLE_LOG_DIR=models/vgg \ ./build/tools/caffe train \ --solver=models/vgg/solver.prototxt \ - --gpu=3 + --gpu=1 diff --git a/models/bvlc_reference_caffenet/solver.prototxt b/models/bvlc_reference_caffenet/solver.prototxt index add67f5a1d5..26269654beb 100644 --- a/models/bvlc_reference_caffenet/solver.prototxt +++ b/models/bvlc_reference_caffenet/solver.prototxt @@ -12,3 +12,6 @@ weight_decay: 0.0005 snapshot: 10000 snapshot_prefix: "models/bvlc_reference_caffenet/caffenet_train" solver_mode: GPU +test_initialization: false +debug_info: true +debug_display: 1000 diff --git a/models/googlenet/solver.prototxt b/models/googlenet/solver.prototxt index c47e6110c55..19f0eaeca1b 100644 --- a/models/googlenet/solver.prototxt +++ b/models/googlenet/solver.prototxt @@ -1,15 +1,18 @@ net: "models/googlenet/train_val.prototxt" test_iter: 1000 test_interval: 1000 -base_lr: 0.001 +update_interval: 2 +base_lr: 0.0002 lr_policy: "step" -gamma: 0.1 -stepsize: 100000 +gamma: 0.98 +stepsize: 10 display: 20 max_iter: 450000 -momentum: 0.975 +momentum: 0.9 weight_decay: 0.0005 snapshot: 10000 snapshot_prefix: "models/googlenet/googlenet_train" solver_mode: GPU -test_initialization: true +test_initialization: false +debug_info: true +debug_display: 1000 diff --git a/models/googlenet/train_val.prototxt b/models/googlenet/train_val.prototxt index 4ec1435e09a..c31b5f0fa9a 100644 --- a/models/googlenet/train_val.prototxt +++ b/models/googlenet/train_val.prototxt @@ -49,7 +49,7 @@ layers { pad: 3 weight_filler { type: "gaussian" - std: 0.01 + std: 0.015 } bias_filler { type: "constant" @@ -102,7 +102,7 @@ layers { group: 2 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -132,7 +132,7 @@ layers { group: 2 weight_filler { type: "gaussian" - std: 0.01 + std: 0.02 } bias_filler { type: "constant" @@ -185,7 +185,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -214,7 +214,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -256,7 +256,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -285,7 +285,7 @@ layers { kernel_size: 3 weight_filler { type: "gaussian" - std: 0.01 + std: 0.04 } bias_filler { type: "constant" @@ -314,7 +314,7 @@ layers { kernel_size: 5 weight_filler { type: "gaussian" - std: 0.01 + std: 0.08 } bias_filler { type: "constant" @@ -343,7 +343,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -384,7 +384,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -413,7 +413,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -455,7 +455,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -484,7 +484,7 @@ layers { kernel_size: 3 weight_filler { type: "gaussian" - std: 0.01 + std: 0.04 } bias_filler { type: "constant" @@ -513,7 +513,7 @@ layers { kernel_size: 5 weight_filler { type: "gaussian" - std: 0.01 + std: 0.08 } bias_filler { type: "constant" @@ -542,7 +542,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -595,7 +595,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -624,7 +624,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -666,7 +666,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -695,7 +695,7 @@ layers { kernel_size: 3 weight_filler { type: "gaussian" - std: 0.01 + std: 0.04 } bias_filler { type: "constant" @@ -724,7 +724,7 @@ layers { kernel_size: 5 weight_filler { type: "gaussian" - std: 0.01 + std: 0.08 } bias_filler { type: "constant" @@ -753,7 +753,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -807,7 +807,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -903,7 +903,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -932,7 +932,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -974,7 +974,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -1003,7 +1003,7 @@ layers { kernel_size: 3 weight_filler { type: "gaussian" - std: 0.01 + std: 0.04 } bias_filler { type: "constant" @@ -1032,7 +1032,7 @@ layers { kernel_size: 5 weight_filler { type: "gaussian" - std: 0.01 + std: 0.08 } bias_filler { type: "constant" @@ -1061,7 +1061,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -1102,7 +1102,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -1131,7 +1131,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -1173,7 +1173,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -1202,7 +1202,7 @@ layers { kernel_size: 3 weight_filler { type: "gaussian" - std: 0.01 + std: 0.04 } bias_filler { type: "constant" @@ -1231,7 +1231,7 @@ layers { kernel_size: 5 weight_filler { type: "gaussian" - std: 0.01 + std: 0.08 } bias_filler { type: "constant" @@ -1260,7 +1260,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -1301,7 +1301,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -1330,7 +1330,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -1372,7 +1372,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -1401,7 +1401,7 @@ layers { kernel_size: 3 weight_filler { type: "gaussian" - std: 0.01 + std: 0.04 } bias_filler { type: "constant" @@ -1430,7 +1430,7 @@ layers { kernel_size: 5 weight_filler { type: "gaussian" - std: 0.01 + std: 0.08 } bias_filler { type: "constant" @@ -1459,7 +1459,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -1491,7 +1491,7 @@ layers { bottom: "icp6_out" top: "cls2_pool" pooling_param { - pool: MAX + pool: AVE kernel_size: 5 stride: 3 pad: 0 @@ -1513,7 +1513,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -1609,7 +1609,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -1638,7 +1638,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -1680,7 +1680,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -1709,7 +1709,7 @@ layers { kernel_size: 3 weight_filler { type: "gaussian" - std: 0.01 + std: 0.04 } bias_filler { type: "constant" @@ -1738,7 +1738,7 @@ layers { kernel_size: 5 weight_filler { type: "gaussian" - std: 0.01 + std: 0.08 } bias_filler { type: "constant" @@ -1767,7 +1767,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -1819,7 +1819,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -1848,7 +1848,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -1890,7 +1890,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -1919,7 +1919,7 @@ layers { kernel_size: 3 weight_filler { type: "gaussian" - std: 0.01 + std: 0.04 } bias_filler { type: "constant" @@ -1948,7 +1948,7 @@ layers { kernel_size: 5 weight_filler { type: "gaussian" - std: 0.01 + std: 0.08 } bias_filler { type: "constant" @@ -1977,7 +1977,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -2018,7 +2018,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -2047,7 +2047,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -2089,7 +2089,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -2118,7 +2118,7 @@ layers { kernel_size: 3 weight_filler { type: "gaussian" - std: 0.01 + std: 0.04 } bias_filler { type: "constant" @@ -2147,7 +2147,7 @@ layers { kernel_size: 5 weight_filler { type: "gaussian" - std: 0.01 + std: 0.08 } bias_filler { type: "constant" @@ -2176,7 +2176,7 @@ layers { kernel_size: 1 weight_filler { type: "gaussian" - std: 0.01 + std: 0.1 } bias_filler { type: "constant" @@ -2208,7 +2208,7 @@ layers { bottom: "icp9_out" top: "cls3_pool" pooling_param { - pool: MAX + pool: AVE kernel_size: 7 stride: 1 pad: 0 diff --git a/models/vgg/solver.prototxt b/models/vgg/solver.prototxt new file mode 100644 index 00000000000..b2be9e93844 --- /dev/null +++ b/models/vgg/solver.prototxt @@ -0,0 +1,18 @@ +net: "models/vgg/train_val.prototxt" +test_iter: 1000 +test_interval: 4000 +update_interval: 4 +base_lr: 0.002 +lr_policy: "step" +gamma: 0.1 +stepsize: 200000 +display: 20 +max_iter: 700000 +momentum: 0.9 +weight_decay: 0.0005 +snapshot: 10000 +snapshot_prefix: "models/vgg/vgg_train" +solver_mode: GPU +test_initialization: false +debug_info: true +debug_display: 1000 diff --git a/models/vgg/train_val.prototxt b/models/vgg/train_val.prototxt new file mode 100644 index 00000000000..abb0a483d16 --- /dev/null +++ b/models/vgg/train_val.prototxt @@ -0,0 +1,429 @@ +name: "VGGNet" +layers { + name: "data" + type: DATA + top: "data" + top: "label" + data_param { + source: "/home/common/imagenet/ilsvrc12_train_lmdb" + backend: LMDB + batch_size: 64 + } + transform_param { + crop_size: 224 + mean_file: "data/ilsvrc12/imagenet_mean.binaryproto" + mirror: true + } + include: { phase: TRAIN } +} +layers { + name: "data" + type: DATA + top: "data" + top: "label" + data_param { + source: "/home/common/imagenet/ilsvrc12_val_lmdb" + backend: LMDB + batch_size: 50 + } + transform_param { + crop_size: 224 + mean_file: "data/ilsvrc12/imagenet_mean.binaryproto" + mirror: false + } + include: { phase: TEST } +} +layers { + name: "conv1" + type: CONVOLUTION + bottom: "data" + top: "conv1" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 64 + pad: 0 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu1" + type: RELU + bottom: "conv1" + top: "conv1" +} +layers { + name: "pool1" + type: POOLING + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layers { + name: "conv2" + type: CONVOLUTION + bottom: "pool1" + top: "conv2" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu2" + type: RELU + bottom: "conv2" + top: "conv2" +} +layers { + name: "pool2" + type: POOLING + bottom: "conv2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layers { + name: "conv3" + type: CONVOLUTION + bottom: "pool2" + top: "conv3" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 256 + pad: 0 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.02 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu3" + type: RELU + bottom: "conv3" + top: "conv3" +} +layers { + name: "conv4" + type: CONVOLUTION + bottom: "conv3" + top: "conv4" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.02 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu4" + type: RELU + bottom: "conv4" + top: "conv4" +} +layers { + name: "pool4" + type: POOLING + bottom: "conv4" + top: "pool4" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layers { + name: "conv5" + type: CONVOLUTION + bottom: "pool4" + top: "conv5" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 512 + pad: 0 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.02 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu5" + type: RELU + bottom: "conv5" + top: "conv5" +} +layers { + name: "conv6" + type: CONVOLUTION + bottom: "conv5" + top: "conv6" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.02 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu6" + type: RELU + bottom: "conv6" + top: "conv6" +} +layers { + name: "pool6" + type: POOLING + bottom: "conv6" + top: "pool6" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layers { + name: "conv7" + type: CONVOLUTION + bottom: "pool6" + top: "conv7" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 512 + pad: 0 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.02 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu7" + type: RELU + bottom: "conv7" + top: "conv7" +} +layers { + name: "conv8" + type: CONVOLUTION + bottom: "conv7" + top: "conv8" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "gaussian" + std: 0.02 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu8" + type: RELU + bottom: "conv8" + top: "conv8" +} +layers { + name: "pool8" + type: POOLING + bottom: "conv8" + top: "pool8" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layers { + name: "fc9" + type: INNER_PRODUCT + bottom: "pool8" + top: "fc9" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + inner_product_param { + num_output: 4096 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "relu9" + type: RELU + bottom: "fc9" + top: "fc9" +} +layers { + name: "drop9" + type: DROPOUT + bottom: "fc9" + top: "fc9" + dropout_param { + dropout_ratio: 0.5 + } +} +layers { + name: "fc10" + type: INNER_PRODUCT + bottom: "fc9" + top: "fc10" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + inner_product_param { + num_output: 4096 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0. + } + } +} +layers { + name: "relu10" + type: RELU + bottom: "fc10" + top: "fc10" +} +layers { + name: "drop10" + type: DROPOUT + bottom: "fc10" + top: "fc10" + dropout_param { + dropout_ratio: 0.5 + } +} +layers { + name: "fc11" + type: INNER_PRODUCT + bottom: "fc10" + top: "fc11" + blobs_lr: 1 + blobs_lr: 2 + weight_decay: 1 + weight_decay: 0 + inner_product_param { + num_output: 1000 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layers { + name: "loss" + type: SOFTMAX_LOSS + bottom: "fc11" + bottom: "label" +} +layers { + name: "accuracy" + type: ACCURACY + bottom: "fc11" + bottom: "label" + top: "accuracy" + include: { phase: TEST } +} diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 188fd8404cd..74b67edaa8e 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -156,6 +156,7 @@ message SolverParameter { // If true, print information about the state of the net that may help with // debugging learning problems. optional bool debug_info = 23 [default = false]; + optional int32 debug_display = 40; // If false, don't save a snapshot after training finishes. optional bool snapshot_after_train = 28 [default = true]; diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 84fa08e95c3..d4c91d88d41 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -190,7 +190,8 @@ void Solver::Solve(const char* resume_file) { } const bool display = param_.display() && iter_ % param_.display() == 0; - net_->set_debug_info(display && param_.debug_info()); + const bool debug_display = param_.debug_info() && iter_ % param_.debug_display() == 0; + net_->set_debug_info(debug_display); // added for allowing bigger batch size Dtype loss = 0;