From 268561c360a861abfd093f5930d931c9286b4e5e Mon Sep 17 00:00:00 2001
From: Zhirong Wu <zhirongw@pvg-gpu-desktop.(none)>
Date: Wed, 24 Sep 2014 03:04:00 -0400
Subject: [PATCH 1/6] local config changes

---
 examples/imagenet/create_imagenet.sh              | 11 ++++++-----
 examples/imagenet/make_imagenet_mean.sh           |  2 +-
 examples/imagenet/train_caffenet.sh               |  1 +
 models/bvlc_reference_caffenet/train_val.prototxt |  4 ++--
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/examples/imagenet/create_imagenet.sh b/examples/imagenet/create_imagenet.sh
index e912ac43cd7..fdf3cdde41d 100755
--- a/examples/imagenet/create_imagenet.sh
+++ b/examples/imagenet/create_imagenet.sh
@@ -6,12 +6,13 @@ EXAMPLE=examples/imagenet
 DATA=data/ilsvrc12
 TOOLS=build/tools
 
-TRAIN_DATA_ROOT=/path/to/imagenet/train/
-VAL_DATA_ROOT=/path/to/imagenet/val/
+DEST=/home/common/imagenet
+TRAIN_DATA_ROOT=/home/common/imagenet/train/
+VAL_DATA_ROOT=/home/common/imagenet/val/
 
 # Set RESIZE=true to resize the images to 256x256. Leave as false if images have
 # already been resized using another tool.
-RESIZE=false
+RESIZE=true
 if $RESIZE; then
   RESIZE_HEIGHT=256
   RESIZE_WIDTH=256
@@ -42,7 +43,7 @@ GLOG_logtostderr=1 $TOOLS/convert_imageset \
     --shuffle \
     $TRAIN_DATA_ROOT \
     $DATA/train.txt \
-    $EXAMPLE/ilsvrc12_train_lmdb
+    $DEST/ilsvrc12_train_lmdb
 
 echo "Creating val lmdb..."
 
@@ -52,6 +53,6 @@ GLOG_logtostderr=1 $TOOLS/convert_imageset \
     --shuffle \
     $VAL_DATA_ROOT \
     $DATA/val.txt \
-    $EXAMPLE/ilsvrc12_val_lmdb
+    $DEST/ilsvrc12_val_lmdb
 
 echo "Done."
diff --git a/examples/imagenet/make_imagenet_mean.sh b/examples/imagenet/make_imagenet_mean.sh
index d3d0c9af5d2..2ecd14ed719 100755
--- a/examples/imagenet/make_imagenet_mean.sh
+++ b/examples/imagenet/make_imagenet_mean.sh
@@ -2,7 +2,7 @@
 # Compute the mean image from the imagenet training leveldb
 # N.B. this is available in data/ilsvrc12
 
-./build/tools/compute_image_mean examples/imagenet/ilsvrc12_train_leveldb \
+./build/tools/compute_image_mean /home/common/imagenet/ilsvrc12_train_lmdb \
   data/ilsvrc12/imagenet_mean.binaryproto
 
 echo "Done."
diff --git a/examples/imagenet/train_caffenet.sh b/examples/imagenet/train_caffenet.sh
index 94558ec5466..f5f14341a29 100755
--- a/examples/imagenet/train_caffenet.sh
+++ b/examples/imagenet/train_caffenet.sh
@@ -2,3 +2,4 @@
 
 ./build/tools/caffe train \
     --solver=models/bvlc_reference_caffenet/solver.prototxt
+    --log_dir=.
diff --git a/models/bvlc_reference_caffenet/train_val.prototxt b/models/bvlc_reference_caffenet/train_val.prototxt
index 073d8aeff4a..3d9ed65c030 100644
--- a/models/bvlc_reference_caffenet/train_val.prototxt
+++ b/models/bvlc_reference_caffenet/train_val.prototxt
@@ -5,7 +5,7 @@ layers {
   top: "data"
   top: "label"
   data_param {
-    source: "examples/imagenet/ilsvrc12_train_lmdb"
+    source: "/home/common/imagenet/ilsvrc12_train_lmdb"
     backend: LMDB
     batch_size: 256
   }
@@ -22,7 +22,7 @@ layers {
   top: "data"
   top: "label"
   data_param {
-    source: "examples/imagenet/ilsvrc12_val_lmdb"
+    source: "/home/common/imagenet/ilsvrc12_val_lmdb"
     backend: LMDB
     batch_size: 50
   }

From 2397f3ac3ff8fcc372e67a876dcacae0af0c7ee7 Mon Sep 17 00:00:00 2001
From: zhirongw <xavibrowu@gmail.com>
Date: Wed, 24 Sep 2014 15:08:49 +0800
Subject: [PATCH 2/6] googlenet

---
 models/googlenet/solver.prototxt    |   14 +
 models/googlenet/train_val.prototxt | 2260 +++++++++++++++++++++++++++
 2 files changed, 2274 insertions(+)
 create mode 100644 models/googlenet/solver.prototxt
 create mode 100644 models/googlenet/train_val.prototxt

diff --git a/models/googlenet/solver.prototxt b/models/googlenet/solver.prototxt
new file mode 100644
index 00000000000..c20dcfe54e2
--- /dev/null
+++ b/models/googlenet/solver.prototxt
@@ -0,0 +1,14 @@
+net: "models/googlenet/train_val.prototxt"
+test_iter: 1000
+test_interval: 1000
+base_lr: 0.01
+lr_policy: "step"
+gamma: 0.1
+stepsize: 100000
+display: 20
+max_iter: 450000
+momentum: 0.9
+weight_decay: 0.0005
+snapshot: 10000
+snapshot_prefix: "models/googlenet/googlenet_train"
+solver_mode: GPU
diff --git a/models/googlenet/train_val.prototxt b/models/googlenet/train_val.prototxt
new file mode 100644
index 00000000000..2a3f073f985
--- /dev/null
+++ b/models/googlenet/train_val.prototxt
@@ -0,0 +1,2260 @@
+name: "GoogLeNet"
+layers {
+  name: "data"
+  type: DATA
+  top: "data"
+  top: "label"
+  data_param {
+    source: "examples/imagenet/ilsvrc12_train_lmdb"
+    backend: LMDB
+    batch_size: 256
+  }
+  transform_param {
+    crop_size: 224
+    mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
+    mirror: true
+  }
+  include: { phase: TRAIN }
+}
+layers {
+  name: "data"
+  type: DATA
+  top: "data"
+  top: "label"
+  data_param {
+    source: "examples/imagenet/ilsvrc12_val_lmdb"
+    backend: LMDB
+    batch_size: 50
+  }
+  transform_param {
+    crop_size: 224
+    mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
+    mirror: false
+  }
+  include: { phase: TEST }
+}
+layers {
+  name: "conv1"
+  type: CONVOLUTION
+  bottom: "data"
+  top: "conv1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 64
+    kernel_size: 7
+    stride: 2
+    pad: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu1"
+  type: RELU
+  bottom: "conv1"
+  top: "conv1"
+}
+layers {
+  name: "pool1"
+  type: POOLING
+  bottom: "conv1"
+  top: "pool1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+    pad: 1
+  }
+}
+layers {
+  name: "norm1"
+  type: LRN
+  bottom: "pool1"
+  top: "norm1"
+  lrn_param {
+    local_size: 5
+    alpha: 0.0001
+    beta: 0.75
+  }
+}
+layers {
+  name: "reduction2"
+  type: CONVOLUTION
+  bottom: "norm1"
+  top: "reduction2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 64
+    pad: 0
+    kernel_size: 1
+    group: 2
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 1
+    }
+  }
+}
+layers {
+  name: "relu_reduction2"
+  type: RELU
+  bottom: "reduction2"
+  top: "reduction2"
+}
+layers {
+  name: "conv2"
+  type: CONVOLUTION
+  bottom: "reduction2"
+  top: "conv2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 192
+    pad: 1
+    kernel_size: 3
+    group: 2
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 1
+    }
+  }
+}
+layers {
+  name: "relu2"
+  type: RELU
+  bottom: "conv2"
+  top: "conv2"
+}
+layers {
+  name: "norm2"
+  type: LRN
+  bottom: "conv2"
+  top: "norm2"
+  lrn_param {
+    local_size: 5
+    alpha: 0.0001
+    beta: 0.75
+  }
+}
+layers {
+  name: "pool2"
+  type: POOLING
+  bottom: "norm2"
+  top: "pool2"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+    pad: 1
+  }
+}
+# Inception module 1 ***************
+layers {
+  name: icp1_reduction1"
+  type: CONVOLUTION
+  bottom: "pool2"
+  top: "icp1_reduction1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 96
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp1_reduction1"
+  type: RELU
+  bottom: "icp1_reduction1"
+  top: "icp1_reduction1"
+}
+layers {
+  name: "icp1_reduction2"
+  type: CONVOLUTION
+  bottom: "pool2"
+  top: "icp1_reduction2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 16
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp1_reduction2"
+  type: RELU
+  bottom: "icp1_reduction2"
+  top: "icp1_reduction2"
+}
+layers {
+  name: "icp1_pool"
+  type: POOLING
+  bottom: "pool2"
+  top: "icp1_pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+# ***********
+layers {
+  name: "icp1_out0"
+  type: CONVOLUTION
+  bottom: "pool2"
+  top: "icp1_out0"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 64
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp1_out0"
+  type: RELU
+  bottom: "icp1_out0"
+  top: "icp1_out0"
+}
+layers {
+  name: "icp1_out1"
+  type: CONVOLUTION
+  bottom: "icp1_reduction1"
+  top: "icp1_out1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp1_out1"
+  type: RELU
+  bottom: "icp1_out1"
+  top: "icp1_out1"
+}
+layers {
+  name: "icp1_out2"
+  type: CONVOLUTION
+  bottom: "icp1_reduction2"
+  top: "icp1_out2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 32
+    pad: 2
+    kernel_size: 5
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp1_out2"
+  type: RELU
+  bottom: "icp1_out2"
+  top: "icp1_out2"
+}
+layers {
+  name: "icp1_out3"
+  type: CONVOLUTION
+  bottom: "icp1_pool"
+  top: "icp1_out3"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 32
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp1_out3"
+  type: RELU
+  bottom: "icp1_out3"
+  top: "icp1_out3"
+}
+# Concat them together
+layers {
+  name: "icp2_in"
+  type: CONCAT
+  bottom: "icp1_out0"
+  bottom: "icp1_out1"
+  bottom: "icp1_out2"
+  bottom: "icp1_out3"
+  top: "icp2_in"
+}
+
+# Inception module 2 ***************
+layers {
+  name: icp2_reduction1"
+  type: CONVOLUTION
+  bottom: "icp2_in"
+  top: "icp2_reduction1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp2_reduction1"
+  type: RELU
+  bottom: "icp2_reduction1"
+  top: "icp2_reduction1"
+}
+layers {
+  name: "icp2_reduction2"
+  type: CONVOLUTION
+  bottom: "icp2_in"
+  top: "icp2_reduction2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 32
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp2_reduction2"
+  type: RELU
+  bottom: "icp2_reduction2"
+  top: "icp2_reduction2"
+}
+layers {
+  name: "icp2_pool"
+  type: POOLING
+  bottom: "icp2_in"
+  top: "icp2_pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+# ***********
+layers {
+  name: "icp2_out0"
+  type: CONVOLUTION
+  bottom: "icp2_in"
+  top: "icp2_out0"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp2_out0"
+  type: RELU
+  bottom: "icp2_out0"
+  top: "icp2_out0"
+}
+layers {
+  name: "icp2_out1"
+  type: CONVOLUTION
+  bottom: "icp2_reduction1"
+  top: "icp2_out1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 192
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp2_out1"
+  type: RELU
+  bottom: "icp2_out1"
+  top: "icp2_out1"
+}
+layers {
+  name: "icp2_out2"
+  type: CONVOLUTION
+  bottom: "icp2_reduction2"
+  top: "icp2_out2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 96
+    pad: 2
+    kernel_size: 5
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp2_out2"
+  type: RELU
+  bottom: "icp2_out2"
+  top: "icp2_out2"
+}
+layers {
+  name: "icp2_out3"
+  type: CONVOLUTION
+  bottom: "icp2_pool"
+  top: "icp2_out3"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 64
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp2_out3"
+  type: RELU
+  bottom: "icp2_out3"
+  top: "icp2_out3"
+}
+# Concat them together
+layers {
+  name: "icp2_out"
+  type: CONCAT
+  bottom: "icp2_out0"
+  bottom: "icp2_out1"
+  bottom: "icp2_out2"
+  bottom: "icp2_out3"
+  top: "icp2_out"
+}
+layers {
+  name: "icp3_in"
+  type: POOLING
+  bottom: "icp2_out"
+  top: "icp3_in"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+    pad: 1
+  }
+}
+
+# Inception module 3 ***************
+layers {
+  name: icp3_reduction1"
+  type: CONVOLUTION
+  bottom: "icp3_in"
+  top: "icp3_reduction1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 96
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp3_reduction1"
+  type: RELU
+  bottom: "icp3_reduction1"
+  top: "icp3_reduction1"
+}
+layers {
+  name: "icp3_reduction2"
+  type: CONVOLUTION
+  bottom: "icp3_in"
+  top: "icp3_reduction2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 16
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp3_reduction2"
+  type: RELU
+  bottom: "icp3_reduction2"
+  top: "icp3_reduction2"
+}
+layers {
+  name: "icp3_pool"
+  type: POOLING
+  bottom: "icp3_in"
+  top: "icp3_pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+# ***********
+layers {
+  name: "icp3_out0"
+  type: CONVOLUTION
+  bottom: "icp3_in"
+  top: "icp3_out0"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 192
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp3_out0"
+  type: RELU
+  bottom: "icp3_out0"
+  top: "icp3_out0"
+}
+layers {
+  name: "icp3_out1"
+  type: CONVOLUTION
+  bottom: "icp3_reduction1"
+  top: "icp3_out1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 208
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp3_out1"
+  type: RELU
+  bottom: "icp3_out1"
+  top: "icp3_out1"
+}
+layers {
+  name: "icp3_out2"
+  type: CONVOLUTION
+  bottom: "icp3_reduction2"
+  top: "icp3_out2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 48
+    pad: 2
+    kernel_size: 5
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp3_out2"
+  type: RELU
+  bottom: "icp3_out2"
+  top: "icp3_out2"
+}
+layers {
+  name: "icp3_out3"
+  type: CONVOLUTION
+  bottom: "icp3_pool"
+  top: "icp3_out3"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 64
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp3_out3"
+  type: RELU
+  bottom: "icp3_out3"
+  top: "icp3_out3"
+}
+# Concat them together
+layers {
+  name: "icp3_out"
+  type: CONCAT
+  bottom: "icp3_out0"
+  bottom: "icp3_out1"
+  bottom: "icp3_out2"
+  bottom: "icp3_out3"
+  top: "icp3_out"
+}
+
+# first classification branch ************
+layers {
+  name: "cls1_pool"
+  type: POOLING
+  bottom: "icp3_out"
+  top: "cls1_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 5
+    stride: 3
+    pad: 0
+    # this padding is somewhat special
+  }
+}
+layers {
+  name: "cls1_reduction"
+  type: CONVOLUTION
+  bottom: "cls1_pool"
+  top: "cls1_reduction"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_cls1_reduction"
+  type: RELU
+  bottom: "cls1_reduction"
+  top: "cls1_reduction"
+}
+layers {
+  name: "cls1_fc1"
+  type: INNER_PRODUCT
+  bottom: "cls1_reduction"
+  top: "cls1_fc1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
+    num_output: 1024
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_cls1_fc1"
+  type: RELU
+  bottom: "cls1_fc1"
+  top: "cls1_fc1"
+}
+layers {
+  name: "cls1_drop"
+  type: DROPOUT
+  bottom: "cls1_fc1"
+  top: "cls1_fc1"
+  dropout_param {
+    dropout_ratio: 0.7
+  }
+}
+layers {
+  name: "cls1_fc2"
+  type: INNER_PRODUCT
+  bottom: "cls1_fc1"
+  top: "cls1_fc2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
+    num_output: 1000
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "loss1"
+  type: SOFTMAX_LOSS
+  bottom: "cls1_fc2"
+  bottom: "label"
+  top: "loss1"
+}
+
+# Inception module 4 ***************
+layers {
+  name: icp4_reduction1"
+  type: CONVOLUTION
+  bottom: "icp3_out"
+  top: "icp4_reduction1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 112
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp4_reduction1"
+  type: RELU
+  bottom: "icp4_reduction1"
+  top: "icp4_reduction1"
+}
+layers {
+  name: "icp4_reduction2"
+  type: CONVOLUTION
+  bottom: "icp3_out"
+  top: "icp4_reduction2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 24
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp4_reduction2"
+  type: RELU
+  bottom: "icp4_reduction2"
+  top: "icp4_reduction2"
+}
+layers {
+  name: "icp4_pool"
+  type: POOLING
+  bottom: "icp3_out"
+  top: "icp4_pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+# ***********
+layers {
+  name: "icp4_out0"
+  type: CONVOLUTION
+  bottom: "icp3_out"
+  top: "icp4_out0"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 160
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp4_out0"
+  type: RELU
+  bottom: "icp4_out0"
+  top: "icp4_out0"
+}
+layers {
+  name: "icp4_out1"
+  type: CONVOLUTION
+  bottom: "icp4_reduction1"
+  top: "icp4_out1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 224
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp4_out1"
+  type: RELU
+  bottom: "icp4_out1"
+  top: "icp4_out1"
+}
+layers {
+  name: "icp4_out2"
+  type: CONVOLUTION
+  bottom: "icp4_reduction2"
+  top: "icp4_out2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 64
+    pad: 2
+    kernel_size: 5
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp4_out2"
+  type: RELU
+  bottom: "icp4_out2"
+  top: "icp4_out2"
+}
+layers {
+  name: "icp4_out3"
+  type: CONVOLUTION
+  bottom: "icp4_pool"
+  top: "icp4_out3"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 64
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp4_out3"
+  type: RELU
+  bottom: "icp4_out3"
+  top: "icp4_out3"
+}
+# Concat them together
+layers {
+  name: "icp4_out"
+  type: CONCAT
+  bottom: "icp4_out0"
+  bottom: "icp4_out1"
+  bottom: "icp4_out2"
+  bottom: "icp4_out3"
+  top: "icp4_out"
+}
+
+# Inception module 5 ***************
+layers {
+  name: icp5_reduction1"
+  type: CONVOLUTION
+  bottom: "icp4_out"
+  top: "icp5_reduction1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp5_reduction1"
+  type: RELU
+  bottom: "icp5_reduction1"
+  top: "icp5_reduction1"
+}
+layers {
+  name: "icp5_reduction2"
+  type: CONVOLUTION
+  bottom: "icp4_out"
+  top: "icp5_reduction2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 24
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp5_reduction2"
+  type: RELU
+  bottom: "icp5_reduction2"
+  top: "icp5_reduction2"
+}
+layers {
+  name: "icp5_pool"
+  type: POOLING
+  bottom: "icp4_out"
+  top: "icp5_pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+# ***********
+layers {
+  name: "icp5_out0"
+  type: CONVOLUTION
+  bottom: "icp4_out"
+  top: "icp5_out0"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp5_out0"
+  type: RELU
+  bottom: "icp5_out0"
+  top: "icp5_out0"
+}
+layers {
+  name: "icp5_out1"
+  type: CONVOLUTION
+  bottom: "icp5_reduction1"
+  top: "icp5_out1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp5_out1"
+  type: RELU
+  bottom: "icp5_out1"
+  top: "icp5_out1"
+}
+layers {
+  name: "icp5_out2"
+  type: CONVOLUTION
+  bottom: "icp5_reduction2"
+  top: "icp5_out2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 64
+    pad: 2
+    kernel_size: 5
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp5_out2"
+  type: RELU
+  bottom: "icp5_out2"
+  top: "icp5_out2"
+}
+layers {
+  name: "icp5_out3"
+  type: CONVOLUTION
+  bottom: "icp5_pool"
+  top: "icp5_out3"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 64
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp5_out3"
+  type: RELU
+  bottom: "icp5_out3"
+  top: "icp5_out3"
+}
+# Concat them together
+layers {
+  name: "icp5_out"
+  type: CONCAT
+  bottom: "icp5_out0"
+  bottom: "icp5_out1"
+  bottom: "icp5_out2"
+  bottom: "icp5_out3"
+  top: "icp5_out"
+}
+
+# Inception module 6 ***************
+layers {
+  name: icp6_reduction1"
+  type: CONVOLUTION
+  bottom: "icp5_out"
+  top: "icp6_reduction1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 144
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp6_reduction1"
+  type: RELU
+  bottom: "icp6_reduction1"
+  top: "icp6_reduction1"
+}
+layers {
+  name: "icp6_reduction2"
+  type: CONVOLUTION
+  bottom: "icp5_out"
+  top: "icp6_reduction2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 32
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp6_reduction2"
+  type: RELU
+  bottom: "icp6_reduction2"
+  top: "icp6_reduction2"
+}
+layers {
+  name: "icp6_pool"
+  type: POOLING
+  bottom: "icp5_out"
+  top: "icp6_pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+# ***********
+layers {
+  name: "icp6_out0"
+  type: CONVOLUTION
+  bottom: "icp5_out"
+  top: "icp6_out0"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 112
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp6_out0"
+  type: RELU
+  bottom: "icp6_out0"
+  top: "icp6_out0"
+}
+layers {
+  name: "icp6_out1"
+  type: CONVOLUTION
+  bottom: "icp6_reduction1"
+  top: "icp6_out1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 288
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp6_out1"
+  type: RELU
+  bottom: "icp6_out1"
+  top: "icp6_out1"
+}
+layers {
+  name: "icp6_out2"
+  type: CONVOLUTION
+  bottom: "icp6_reduction2"
+  top: "icp6_out2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 64
+    pad: 2
+    kernel_size: 5
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp6_out2"
+  type: RELU
+  bottom: "icp6_out2"
+  top: "icp6_out2"
+}
+layers {
+  name: "icp6_out3"
+  type: CONVOLUTION
+  bottom: "icp6_pool"
+  top: "icp6_out3"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 64
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp6_out3"
+  type: RELU
+  bottom: "icp6_out3"
+  top: "icp6_out3"
+}
+# Concat them together
+layers {
+  name: "icp6_out"
+  type: CONCAT
+  bottom: "icp6_out0"
+  bottom: "icp6_out1"
+  bottom: "icp6_out2"
+  bottom: "icp6_out3"
+  top: "icp6_out"
+}
+
+# second classification branch ************
+layers {
+  name: "cls2_pool"
+  type: POOLING
+  bottom: "icp6_out"
+  top: "cls2_pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 5
+    stride: 3
+    pad: 0
+    # this padding is somewhat special
+  }
+}
+layers {
+  name: "cls2_reduction"
+  type: CONVOLUTION
+  bottom: "cls2_pool"
+  top: "cls2_reduction"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_cls2_reduction"
+  type: RELU
+  bottom: "cls2_reduction"
+  top: "cls2_reduction"
+}
+layers {
+  name: "cls2_fc1"
+  type: INNER_PRODUCT
+  bottom: "cls2_reduction"
+  top: "cls2_fc1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
+    num_output: 1024
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_cls2_fc1"
+  type: RELU
+  bottom: "cls2_fc1"
+  top: "cls2_fc1"
+}
+layers {
+  name: "cls2_drop"
+  type: DROPOUT
+  bottom: "cls2_fc1"
+  top: "cls2_fc1"
+  dropout_param {
+    dropout_ratio: 0.7
+  }
+}
+layers {
+  name: "cls2_fc2"
+  type: INNER_PRODUCT
+  bottom: "cls2_fc1"
+  top: "cls2_fc2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
+    num_output: 1000
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "loss2"
+  type: SOFTMAX_LOSS
+  bottom: "cls2_fc2"
+  bottom: "label"
+  top: "loss2"
+}
+
+# Inception module 7 ***************
+layers {
+  name: icp7_reduction1"
+  type: CONVOLUTION
+  bottom: "icp6_out"
+  top: "icp7_reduction1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 160
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp7_reduction1"
+  type: RELU
+  bottom: "icp7_reduction1"
+  top: "icp7_reduction1"
+}
+layers {
+  name: "icp7_reduction2"
+  type: CONVOLUTION
+  bottom: "icp6_out"
+  top: "icp7_reduction2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 32
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp7_reduction2"
+  type: RELU
+  bottom: "icp2_icp7_reduction2"
+  top: "icp2_icp7_reduction2"
+}
+layers {
+  name: "icp7_pool"
+  type: POOLING
+  bottom: "icp6_out"
+  top: "icp7_pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+# ***********
+layers {
+  name: "icp7_out0"
+  type: CONVOLUTION
+  bottom: "icp6_out"
+  top: "icp7_out0"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 256
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp7_out0"
+  type: RELU
+  bottom: "icp7_out0"
+  top: "icp7_out0"
+}
+layers {
+  name: "icp7_out1"
+  type: CONVOLUTION
+  bottom: "icp7_reduction1"
+  top: "icp7_out1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 320
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp7_out1"
+  type: RELU
+  bottom: "icp7_out1"
+  top: "icp7_out1"
+}
+layers {
+  name: "icp7_out2"
+  type: CONVOLUTION
+  bottom: "icp7_reduction2"
+  top: "icp7_out2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 128
+    pad: 2
+    kernel_size: 5
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp7_out2"
+  type: RELU
+  bottom: "icp7_out2"
+  top: "icp7_out2"
+}
+layers {
+  name: "icp7_out3"
+  type: CONVOLUTION
+  bottom: "icp7_pool"
+  top: "icp7_out3"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp7_out3"
+  type: RELU
+  bottom: "icp7_out3"
+  top: "icp7_out3"
+}
+# Concat them together
+layers {
+  name: "icp7_out"
+  type: CONCAT
+  bottom: "icp7_out0"
+  bottom: "icp7_out1"
+  bottom: "icp7_out2"
+  bottom: "icp7_out3"
+  top: "icp7_out"
+}
+layers {
+  name: "icp8_in"
+  type: POOLING
+  bottom: "icp7_out"
+  top: "icp8_in"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+    pad: 1
+  }
+}
+# Inception module 8 ***************
+layers {
+  name: icp8_reduction1"
+  type: CONVOLUTION
+  bottom: "icp8_in"
+  top: "icp8_reduction1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 160
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp8_reduction1"
+  type: RELU
+  bottom: "icp8_reduction1"
+  top: "icp8_reduction1"
+}
+layers {
+  name: "icp8_reduction2"
+  type: CONVOLUTION
+  bottom: "icp8_in"
+  top: "icp8_reduction2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 32
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp8_reduction2"
+  type: RELU
+  bottom: "icp8_reduction2"
+  top: "icp8_reduction2"
+}
+layers {
+  name: "icp8_pool"
+  type: POOLING
+  bottom: "icp8_in"
+  top: "icp8_pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+# ***********
+layers {
+  name: "icp8_out0"
+  type: CONVOLUTION
+  bottom: "icp8_in"
+  top: "icp8_out0"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 256
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp8_out0"
+  type: RELU
+  bottom: "icp8_out0"
+  top: "icp8_out0"
+}
+layers {
+  name: "icp8_out1"
+  type: CONVOLUTION
+  bottom: "icp8_reduction1"
+  top: "icp8_out1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 320
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp8_out1"
+  type: RELU
+  bottom: "icp8_out1"
+  top: "icp8_out1"
+}
+layers {
+  name: "icp8_out2"
+  type: CONVOLUTION
+  bottom: "icp8_reduction2"
+  top: "icp8_out2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 128
+    pad: 2
+    kernel_size: 5
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp8_out2"
+  type: RELU
+  bottom: "icp8_out2"
+  top: "icp8_out2"
+}
+layers {
+  name: "icp8_out3"
+  type: CONVOLUTION
+  bottom: "icp8_pool"
+  top: "icp8_out3"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp8_out3"
+  type: RELU
+  bottom: "icp8_out3"
+  top: "icp8_out3"
+}
+# Concat them together
+layers {
+  name: "icp8_out"
+  type: CONCAT
+  bottom: "icp8_out0"
+  bottom: "icp8_out1"
+  bottom: "icp8_out2"
+  bottom: "icp8_out3"
+  top: "icp8_out"
+}
+
+# Inception module 9 ***************
+layers {
+  name: icp9_reduction1"
+  type: CONVOLUTION
+  bottom: "icp8_out"
+  top: "icp9_reduction1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 192
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp9_reduction1"
+  type: RELU
+  bottom: "icp9_reduction1"
+  top: "icp9_reduction1"
+}
+layers {
+  name: "icp9_reduction2"
+  type: CONVOLUTION
+  bottom: "icp8_out"
+  top: "icp9_reduction2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 48
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp9_reduction2"
+  type: RELU
+  bottom: "icp9_reduction2"
+  top: "icp9_reduction2"
+}
+layers {
+  name: "icp9_pool"
+  type: POOLING
+  bottom: "icp8_out"
+  top: "icp9_pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+# ***********
+layers {
+  name: "icp9_out0"
+  type: CONVOLUTION
+  bottom: "icp8_out"
+  top: "icp9_out0"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 384
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp9_out0"
+  type: RELU
+  bottom: "icp9_out0"
+  top: "icp9_out0"
+}
+layers {
+  name: "icp9_out1"
+  type: CONVOLUTION
+  bottom: "icp9_reduction1"
+  top: "icp9_out1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 384
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp9_out1"
+  type: RELU
+  bottom: "icp9_out1"
+  top: "icp9_out1"
+}
+layers {
+  name: "icp9_out2"
+  type: CONVOLUTION
+  bottom: "icp9_reduction2"
+  top: "icp9_out2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 128
+    pad: 2
+    kernel_size: 5
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp9_out2"
+  type: RELU
+  bottom: "icp9_out2"
+  top: "icp9_out2"
+}
+layers {
+  name: "icp9_out3"
+  type: CONVOLUTION
+  bottom: "icp9_pool"
+  top: "icp9_out3"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu_icp9_out3"
+  type: RELU
+  bottom: "icp9_out3"
+  top: "icp9_out3"
+}
+# Concat them together
+layers {
+  name: "icp9_out"
+  type: CONCAT
+  bottom: "icp9_out0"
+  bottom: "icp9_out1"
+  bottom: "icp9_out2"
+  bottom: "icp9_out3"
+  top: "icp9_out"
+}
+
+# third classification branch
+layers {
+  name: "cls3_pool"
+  type: POOLING
+  bottom: "icp9_out"
+  top: "cls3_pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 7
+    stride: 1
+    pad: 0
+    # This padding is somewhat special
+  }
+}
+layers {
+  name: "cls3_drop"
+  type: DROPOUT
+  bottom: "cls3_pool"
+  top: "cls3_pool"
+  dropout_param {
+    dropout_ratio: 0.4
+  }
+}
+layers {
+  name: "cls3_fc"
+  type: INNER_PRODUCT
+  bottom: "cls3_pool"
+  top: "cls3_fc"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
+    num_output: 1000
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "loss3"
+  type: SOFTMAX_LOSS
+  bottom: "cls3_fc"
+  bottom: "label"
+  top: "loss3"
+}
+layers {
+  name: "accuracy"
+  type: ACCURACY
+  bottom: "cls3_fc"
+  bottom: "label"
+  top: "accuracy"
+  include: { phase: TEST }
+}

From 9030a32853733b41d9428f6e36004fae12ee0a85 Mon Sep 17 00:00:00 2001
From: zhirongw <xavibrowu@gmail.com>
Date: Wed, 24 Sep 2014 07:47:36 -0400
Subject: [PATCH 3/6] googlenet prototype

---
 examples/imagenet/train_googlenet.sh          |  5 +++
 .../bvlc_reference_caffenet/solver.prototxt   |  1 +
 models/googlenet/solver.prototxt              |  2 +
 models/googlenet/train_val.prototxt           | 38 ++++++++++---------
 src/caffe/layers/cudnn_pooling_layer.cu       |  4 +-
 5 files changed, 30 insertions(+), 20 deletions(-)
 create mode 100755 examples/imagenet/train_googlenet.sh

diff --git a/examples/imagenet/train_googlenet.sh b/examples/imagenet/train_googlenet.sh
new file mode 100755
index 00000000000..ae380381a60
--- /dev/null
+++ b/examples/imagenet/train_googlenet.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env sh
+
+GOOGLE_LOG_DIR=models/googlenet \
+./build/tools/caffe train \
+    --solver=models/googlenet/solver.prototxt
diff --git a/models/bvlc_reference_caffenet/solver.prototxt b/models/bvlc_reference_caffenet/solver.prototxt
index af1315ba2ac..91c725ba3e7 100644
--- a/models/bvlc_reference_caffenet/solver.prototxt
+++ b/models/bvlc_reference_caffenet/solver.prototxt
@@ -12,3 +12,4 @@ weight_decay: 0.0005
 snapshot: 10000
 snapshot_prefix: "models/bvlc_reference_caffenet/caffenet_train"
 solver_mode: GPU
+device_id: 2
diff --git a/models/googlenet/solver.prototxt b/models/googlenet/solver.prototxt
index c20dcfe54e2..ca182132580 100644
--- a/models/googlenet/solver.prototxt
+++ b/models/googlenet/solver.prototxt
@@ -12,3 +12,5 @@ weight_decay: 0.0005
 snapshot: 10000
 snapshot_prefix: "models/googlenet/googlenet_train"
 solver_mode: GPU
+device_id: 3
+test_initialization: true
diff --git a/models/googlenet/train_val.prototxt b/models/googlenet/train_val.prototxt
index 2a3f073f985..c84ff77b5af 100644
--- a/models/googlenet/train_val.prototxt
+++ b/models/googlenet/train_val.prototxt
@@ -5,9 +5,9 @@ layers {
   top: "data"
   top: "label"
   data_param {
-    source: "examples/imagenet/ilsvrc12_train_lmdb"
+    source: "/home/common/imagenet/ilsvrc12_train_lmdb"
     backend: LMDB
-    batch_size: 256
+    batch_size: 128 
   }
   transform_param {
     crop_size: 224
@@ -22,7 +22,7 @@ layers {
   top: "data"
   top: "label"
   data_param {
-    source: "examples/imagenet/ilsvrc12_val_lmdb"
+    source: "/home/common/imagenet/ilsvrc12_val_lmdb"
     backend: LMDB
     batch_size: 50
   }
@@ -72,7 +72,7 @@ layers {
     pool: MAX
     kernel_size: 3
     stride: 2
-    pad: 1
+    pad: 0
   }
 }
 layers {
@@ -166,12 +166,12 @@ layers {
     pool: MAX
     kernel_size: 3
     stride: 2
-    pad: 1
+    pad: 0
   }
 }
 # Inception module 1 ***************
 layers {
-  name: icp1_reduction1"
+  name: "icp1_reduction1"
   type: CONVOLUTION
   bottom: "pool2"
   top: "icp1_reduction1"
@@ -370,7 +370,7 @@ layers {
 
 # Inception module 2 ***************
 layers {
-  name: icp2_reduction1"
+  name: "icp2_reduction1"
   type: CONVOLUTION
   bottom: "icp2_in"
   top: "icp2_reduction1"
@@ -575,13 +575,13 @@ layers {
     pool: MAX
     kernel_size: 3
     stride: 2
-    pad: 1
+    pad: 0
   }
 }
 
 # Inception module 3 ***************
 layers {
-  name: icp3_reduction1"
+  name: "icp3_reduction1"
   type: CONVOLUTION
   bottom: "icp3_in"
   top: "icp3_reduction1"
@@ -884,11 +884,12 @@ layers {
   bottom: "cls1_fc2"
   bottom: "label"
   top: "loss1"
+  loss_weight: 0.3
 }
 
 # Inception module 4 ***************
 layers {
-  name: icp4_reduction1"
+  name: "icp4_reduction1"
   type: CONVOLUTION
   bottom: "icp3_out"
   top: "icp4_reduction1"
@@ -1087,7 +1088,7 @@ layers {
 
 # Inception module 5 ***************
 layers {
-  name: icp5_reduction1"
+  name: "icp5_reduction1"
   type: CONVOLUTION
   bottom: "icp4_out"
   top: "icp5_reduction1"
@@ -1286,7 +1287,7 @@ layers {
 
 # Inception module 6 ***************
 layers {
-  name: icp6_reduction1"
+  name: "icp6_reduction1"
   type: CONVOLUTION
   bottom: "icp5_out"
   top: "icp6_reduction1"
@@ -1589,11 +1590,12 @@ layers {
   bottom: "cls2_fc2"
   bottom: "label"
   top: "loss2"
+  loss_weight: 0.3
 }
 
 # Inception module 7 ***************
 layers {
-  name: icp7_reduction1"
+  name: "icp7_reduction1"
   type: CONVOLUTION
   bottom: "icp6_out"
   top: "icp7_reduction1"
@@ -1647,8 +1649,8 @@ layers {
 layers {
   name: "relu_icp7_reduction2"
   type: RELU
-  bottom: "icp2_icp7_reduction2"
-  top: "icp2_icp7_reduction2"
+  bottom: "icp7_reduction2"
+  top: "icp7_reduction2"
 }
 layers {
   name: "icp7_pool"
@@ -1798,12 +1800,12 @@ layers {
     pool: MAX
     kernel_size: 3
     stride: 2
-    pad: 1
+    pad: 0
   }
 }
 # Inception module 8 ***************
 layers {
-  name: icp8_reduction1"
+  name: "icp8_reduction1"
   type: CONVOLUTION
   bottom: "icp8_in"
   top: "icp8_reduction1"
@@ -2002,7 +2004,7 @@ layers {
 
 # Inception module 9 ***************
 layers {
-  name: icp9_reduction1"
+  name: "icp9_reduction1"
   type: CONVOLUTION
   bottom: "icp8_out"
   top: "icp9_reduction1"
diff --git a/src/caffe/layers/cudnn_pooling_layer.cu b/src/caffe/layers/cudnn_pooling_layer.cu
index 99c409dcc96..0e9c1a39398 100644
--- a/src/caffe/layers/cudnn_pooling_layer.cu
+++ b/src/caffe/layers/cudnn_pooling_layer.cu
@@ -14,7 +14,7 @@ void CuDNNPoolingLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
     vector<Blob<Dtype>*>* top) {
   // Fallback to Caffe for padded pooling, max top mask.
   if ((this->pad_h_ > 0 || this->pad_w_ > 0) || (*top).size() > 1) {
-    LOG(WARNING) << "Falling back to standard Caffe for padded pooling.";
+    //LOG(WARNING) << "Falling back to standard Caffe for padded pooling.";
     return PoolingLayer<Dtype>::Forward_gpu(bottom, top);
   }
 
@@ -33,7 +33,7 @@ void CuDNNPoolingLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
 
   // Fallback to Caffe for padded pooling, max top mask.
   if ((this->pad_h_ > 0 || this->pad_w_ > 0) || top.size() > 1) {
-    LOG(WARNING) << "Falling back to standard Caffe for padded pooling.";
+    // LOG(WARNING) << "Falling back to standard Caffe for padded pooling.";
     return PoolingLayer<Dtype>::Backward_gpu(top, propagate_down, bottom);
   }
 

From e9dce2d395a842ca4937a5d96cdfa71197c8de40 Mon Sep 17 00:00:00 2001
From: zhirongw <xavibrowu@gmail.com>
Date: Fri, 26 Sep 2014 07:38:38 -0400
Subject: [PATCH 4/6] just modified model parameters, no core code touched

---
 examples/imagenet/train_caffenet.sh               | 5 +++--
 examples/imagenet/train_googlenet.sh              | 3 ++-
 examples/imagenet/train_googlenet_gpus.sh         | 6 ++++++
 examples/imagenet/train_vgg.sh                    | 6 ++++++
 models/bvlc_reference_caffenet/solver.prototxt    | 5 ++---
 models/bvlc_reference_caffenet/train_val.prototxt | 2 +-
 models/googlenet/solver.prototxt                  | 5 ++---
 models/googlenet/train_val.prototxt               | 4 ++--
 8 files changed, 24 insertions(+), 12 deletions(-)
 create mode 100755 examples/imagenet/train_googlenet_gpus.sh
 create mode 100755 examples/imagenet/train_vgg.sh

diff --git a/examples/imagenet/train_caffenet.sh b/examples/imagenet/train_caffenet.sh
index f5f14341a29..1ad46600ddd 100755
--- a/examples/imagenet/train_caffenet.sh
+++ b/examples/imagenet/train_caffenet.sh
@@ -1,5 +1,6 @@
 #!/usr/bin/env sh
 
+GOOGLE_LOG_DIR="models/bvlc_reference_caffenet" \
 ./build/tools/caffe train \
-    --solver=models/bvlc_reference_caffenet/solver.prototxt
-    --log_dir=.
+    --solver=models/bvlc_reference_caffenet/solver.prototxt \
+    --gpu=1
diff --git a/examples/imagenet/train_googlenet.sh b/examples/imagenet/train_googlenet.sh
index ae380381a60..64b5486bd8a 100755
--- a/examples/imagenet/train_googlenet.sh
+++ b/examples/imagenet/train_googlenet.sh
@@ -2,4 +2,5 @@
 
 GOOGLE_LOG_DIR=models/googlenet \
 ./build/tools/caffe train \
-    --solver=models/googlenet/solver.prototxt
+    --solver=models/googlenet/solver.prototxt \
+    --gpu=2
diff --git a/examples/imagenet/train_googlenet_gpus.sh b/examples/imagenet/train_googlenet_gpus.sh
new file mode 100755
index 00000000000..71b42593874
--- /dev/null
+++ b/examples/imagenet/train_googlenet_gpus.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env sh
+
+GLOG_alsologtostderr=1 \
+GOOGLE_LOG_DIR=models/googlenet \
+./build/examples/parallel/gpus.bin \
+    models/googlenet/solver.prototxt 1:2
diff --git a/examples/imagenet/train_vgg.sh b/examples/imagenet/train_vgg.sh
new file mode 100755
index 00000000000..a9673581895
--- /dev/null
+++ b/examples/imagenet/train_vgg.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env sh
+
+GOOGLE_LOG_DIR=models/vgg \
+./build/tools/caffe train \
+    --solver=models/vgg/solver.prototxt \
+    --gpu=3
diff --git a/models/bvlc_reference_caffenet/solver.prototxt b/models/bvlc_reference_caffenet/solver.prototxt
index 91c725ba3e7..add67f5a1d5 100644
--- a/models/bvlc_reference_caffenet/solver.prototxt
+++ b/models/bvlc_reference_caffenet/solver.prototxt
@@ -1,15 +1,14 @@
 net: "models/bvlc_reference_caffenet/train_val.prototxt"
 test_iter: 1000
 test_interval: 1000
-base_lr: 0.01
+base_lr: 0.004
 lr_policy: "step"
 gamma: 0.1
 stepsize: 100000
 display: 20
 max_iter: 450000
-momentum: 0.9
+momentum: 0.95
 weight_decay: 0.0005
 snapshot: 10000
 snapshot_prefix: "models/bvlc_reference_caffenet/caffenet_train"
 solver_mode: GPU
-device_id: 2
diff --git a/models/bvlc_reference_caffenet/train_val.prototxt b/models/bvlc_reference_caffenet/train_val.prototxt
index 3d9ed65c030..ea905e30ff0 100644
--- a/models/bvlc_reference_caffenet/train_val.prototxt
+++ b/models/bvlc_reference_caffenet/train_val.prototxt
@@ -7,7 +7,7 @@ layers {
   data_param {
     source: "/home/common/imagenet/ilsvrc12_train_lmdb"
     backend: LMDB
-    batch_size: 256
+    batch_size: 128
   }
   transform_param {
     crop_size: 227
diff --git a/models/googlenet/solver.prototxt b/models/googlenet/solver.prototxt
index ca182132580..c47e6110c55 100644
--- a/models/googlenet/solver.prototxt
+++ b/models/googlenet/solver.prototxt
@@ -1,16 +1,15 @@
 net: "models/googlenet/train_val.prototxt"
 test_iter: 1000
 test_interval: 1000
-base_lr: 0.01
+base_lr: 0.001
 lr_policy: "step"
 gamma: 0.1
 stepsize: 100000
 display: 20
 max_iter: 450000
-momentum: 0.9
+momentum: 0.975
 weight_decay: 0.0005
 snapshot: 10000
 snapshot_prefix: "models/googlenet/googlenet_train"
 solver_mode: GPU
-device_id: 3
 test_initialization: true
diff --git a/models/googlenet/train_val.prototxt b/models/googlenet/train_val.prototxt
index c84ff77b5af..4ec1435e09a 100644
--- a/models/googlenet/train_val.prototxt
+++ b/models/googlenet/train_val.prototxt
@@ -106,7 +106,7 @@ layers {
     }
     bias_filler {
       type: "constant"
-      value: 1
+      value: 0
     }
   }
 }
@@ -136,7 +136,7 @@ layers {
     }
     bias_filler {
       type: "constant"
-      value: 1
+      value: 0
     }
   }
 }

From 345d85d0ed46d45a0be84bd62626d9d77849a02e Mon Sep 17 00:00:00 2001
From: zhirongw <xavibrowu@gmail.com>
Date: Fri, 26 Sep 2014 11:49:38 -0400
Subject: [PATCH 5/6] add gradient accumulation for allowing bigger batch size

---
 include/caffe/blob.hpp      |   9 ++++
 include/caffe/common.hpp    |   6 ++-
 include/caffe/net.hpp       |   3 ++
 src/caffe/blob.cpp          | 101 ++++++++++++++++++++++++++++++++++++
 src/caffe/net.cpp           |  13 +++++
 src/caffe/proto/caffe.proto |   2 +
 src/caffe/solver.cpp        |  21 +++++++-
 7 files changed, 153 insertions(+), 2 deletions(-)

diff --git a/include/caffe/blob.hpp b/include/caffe/blob.hpp
index ef10aea53f0..0c0b0c1a2ac 100644
--- a/include/caffe/blob.hpp
+++ b/include/caffe/blob.hpp
@@ -94,10 +94,14 @@ class Blob {
   const Dtype* gpu_data() const;
   const Dtype* cpu_diff() const;
   const Dtype* gpu_diff() const;
+  const Dtype* cpu_acum_diff() const;
+  const Dtype* gpu_acum_diff() const;
   Dtype* mutable_cpu_data();
   Dtype* mutable_gpu_data();
   Dtype* mutable_cpu_diff();
   Dtype* mutable_gpu_diff();
+  Dtype* mutable_gpu_acum_diff();
+  Dtype* mutable_cpu_acum_diff();
   void Update();
   void FromProto(const BlobProto& proto);
   void ToProto(BlobProto* proto, bool write_diff = false) const;
@@ -107,6 +111,10 @@ class Blob {
   /// @brief Compute the sum of absolute values (L1 norm) of the diff.
   Dtype asum_diff() const;
 
+  // added for allowing bigger batch_size
+  void AccumulateDiff();
+  void UpdateDiff();
+
   /**
    * @brief Set the data_ shared_ptr to point to the SyncedMemory holding the
    *        data_ of Blob other -- useful in Layer&s which simply perform a copy
@@ -129,6 +137,7 @@ class Blob {
  protected:
   shared_ptr<SyncedMemory> data_;
   shared_ptr<SyncedMemory> diff_;
+  shared_ptr<SyncedMemory> acum_diff_;
   int num_;
   int channels_;
   int height_;
diff --git a/include/caffe/common.hpp b/include/caffe/common.hpp
index 9c6eb4d6834..b61e919801e 100644
--- a/include/caffe/common.hpp
+++ b/include/caffe/common.hpp
@@ -127,6 +127,9 @@ class Caffe {
   static void SetDevice(const int device_id);
   // Prints the current GPU status.
   static void DeviceQuery();
+  // added for allowing bigger batch size
+  inline static void set_accumulate(bool acum) { Get().accumulate_ = acum; }
+  inline static bool accumulate() { return Get().accumulate_; }
 
  protected:
 #ifndef CPU_ONLY
@@ -134,7 +137,8 @@ class Caffe {
   curandGenerator_t curand_generator_;
 #endif
   shared_ptr<RNG> random_generator_;
-
+  // added for allowing bigger batch size
+  bool accumulate_;
   Brew mode_;
   Phase phase_;
   static shared_ptr<Caffe> singleton_;
diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp
index 1d06dc45533..1d70ea632d3 100644
--- a/include/caffe/net.hpp
+++ b/include/caffe/net.hpp
@@ -85,6 +85,9 @@ class Net {
   /// @brief Updates the network weights based on the diff values computed.
   void Update();
 
+  // added for allowing large batch size
+  void AccumulateDiff();
+  void UpdateDiff();
   /**
    * @brief For an already initialized net, implicitly copies (i.e., using no
    *        additional memory) the pre-trained layers from another Net.
diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp
index cfffc379eb1..03742d077ab 100644
--- a/src/caffe/blob.cpp
+++ b/src/caffe/blob.cpp
@@ -21,6 +21,8 @@ void Blob<Dtype>::Reshape(const int num, const int channels, const int height,
     capacity_ = count_;
     data_.reset(new SyncedMemory(capacity_ * sizeof(Dtype)));
     diff_.reset(new SyncedMemory(capacity_ * sizeof(Dtype)));
+    if (Caffe::accumulate())
+      acum_diff_.reset(new SyncedMemory(capacity_ * sizeof(Dtype)));
   }
 }
 
@@ -61,12 +63,24 @@ const Dtype* Blob<Dtype>::cpu_diff() const {
   return (const Dtype*)diff_->cpu_data();
 }
 
+template <typename Dtype>
+const Dtype* Blob<Dtype>::cpu_acum_diff() const{
+  CHECK(acum_diff_);
+  return (const Dtype*)acum_diff_->cpu_data();
+}
+
 template <typename Dtype>
 const Dtype* Blob<Dtype>::gpu_diff() const {
   CHECK(diff_);
   return (const Dtype*)diff_->gpu_data();
 }
 
+template <typename Dtype>
+const Dtype* Blob<Dtype>::gpu_acum_diff() const{
+  CHECK(acum_diff_);
+  return (const Dtype*)acum_diff_->gpu_data();
+}
+
 template <typename Dtype>
 Dtype* Blob<Dtype>::mutable_cpu_data() {
   CHECK(data_);
@@ -85,12 +99,24 @@ Dtype* Blob<Dtype>::mutable_cpu_diff() {
   return static_cast<Dtype*>(diff_->mutable_cpu_data());
 }
 
+template <typename Dtype>
+Dtype* Blob<Dtype>::mutable_cpu_acum_diff(){
+  CHECK(acum_diff_);
+  return static_cast<Dtype*>(acum_diff_->mutable_cpu_data());
+}
+
 template <typename Dtype>
 Dtype* Blob<Dtype>::mutable_gpu_diff() {
   CHECK(diff_);
   return static_cast<Dtype*>(diff_->mutable_gpu_data());
 }
 
+template <typename Dtype>
+Dtype* Blob<Dtype>::mutable_gpu_acum_diff(){
+  CHECK(acum_diff_);
+  return static_cast<Dtype*>(acum_diff_->mutable_gpu_data());
+}
+
 template <typename Dtype>
 void Blob<Dtype>::ShareData(const Blob& other) {
   CHECK_EQ(count_, other.count());
@@ -135,6 +161,81 @@ void Blob<Dtype>::Update() {
   }
 }
 
+// added for allowing bigger batch_size
+template <> void Blob<unsigned int>::AccumulateDiff(){
+  NOT_IMPLEMENTED;
+  return;
+}
+
+template <> void Blob<int>::AccumulateDiff(){
+  NOT_IMPLEMENTED;
+  return;
+}
+
+template <typename Dtype>
+void Blob<Dtype>::AccumulateDiff(){
+  switch (data_->head()){
+  case SyncedMemory::HEAD_AT_CPU:
+    // perform computation on CPU
+    caffe_axpy<Dtype>(count_, Dtype(1.0),
+      static_cast<const Dtype*>(diff_->cpu_data()),
+      static_cast<Dtype*>(acum_diff_->mutable_cpu_data()));
+    break;
+  case SyncedMemory::HEAD_AT_GPU:
+  case SyncedMemory::SYNCED:
+#ifndef CPU_ONLY
+    // perform computation on GPU
+    caffe_gpu_axpy<Dtype>(count_, Dtype(1.0),
+      static_cast<const Dtype*>(diff_->gpu_data()),
+      static_cast<Dtype*>(acum_diff_->mutable_gpu_data()));
+#else
+    NO_GPU;
+#endif
+    break;
+  default:
+    LOG(FATAL) << "Syncedmem not initialized.";
+  }
+}
+
+template <> void Blob<unsigned int>::UpdateDiff(){
+  NOT_IMPLEMENTED;
+  return;
+}
+
+template <> void Blob<int>::UpdateDiff(){
+  NOT_IMPLEMENTED;
+  return;
+}
+
+template <typename Dtype>
+void Blob<Dtype>::UpdateDiff(){
+  switch (data_->head()){
+  case SyncedMemory::HEAD_AT_CPU:
+    // perform computation on CPU
+    caffe_axpy<Dtype>(count_, Dtype(1.0),
+      static_cast<const Dtype*>(acum_diff_->cpu_data()),
+      static_cast<Dtype*>(diff_->mutable_cpu_data()));
+    caffe_memset(sizeof(Dtype)*count_, 0,
+      acum_diff_->mutable_cpu_data());
+    break;
+  case SyncedMemory::HEAD_AT_GPU:
+  case SyncedMemory::SYNCED:
+#ifndef CPU_ONLY
+    // perform computation on GPU
+    caffe_gpu_axpy<Dtype>(count_, Dtype(1.0),
+      static_cast<const Dtype*>(acum_diff_->gpu_data()),
+      static_cast<Dtype*>(diff_->mutable_gpu_data()));
+    caffe_gpu_memset(sizeof(Dtype)*count_, 0,
+      acum_diff_->mutable_gpu_data());
+#else
+    NO_GPU;
+#endif
+    break;
+  default:
+    LOG(FATAL) << "Syncedmem not initialized.";
+  }
+}
+
 template <> unsigned int Blob<unsigned int>::asum_data() const {
   NOT_IMPLEMENTED;
   return 0;
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 6f4a651fb10..6adaa3c759b 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -783,6 +783,19 @@ void Net<Dtype>::Update() {
   }
 }
 
+// added for allowing bigger batch size
+template <typename Dtype>
+void Net<Dtype>::AccumulateDiff(){
+  for (int i = 0; i < params_.size(); ++i)
+    params_[i]->AccumulateDiff();
+}
+
+template <typename Dtype>
+void Net<Dtype>::UpdateDiff(){
+  for (int i = 0; i < params_.size(); ++i)
+    params_[i]->UpdateDiff();
+}
+
 template <typename Dtype>
 bool Net<Dtype>::has_blob(const string& blob_name) {
   return blob_names_index_.find(blob_name) != blob_names_index_.end();
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 9395c38f3e9..188fd8404cd 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -140,6 +140,8 @@ message SolverParameter {
   // random number generator -- useful for reproducible results. Otherwise,
   // (and by default) initialize using a seed derived from the system clock.
   optional int64 random_seed = 20 [default = -1];
+  // added to allow big batch_size
+  optional int32 update_interval = 33 [default = 1];
 
   // Solver type
   enum SolverType {
diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp
index 0ea4edcf9b8..84fa08e95c3 100644
--- a/src/caffe/solver.cpp
+++ b/src/caffe/solver.cpp
@@ -35,6 +35,11 @@ void Solver<Dtype>::Init(const SolverParameter& param) {
   if (param_.random_seed() >= 0) {
     Caffe::set_random_seed(param_.random_seed());
   }
+  // added for allowing bigger batch size
+  if (!param_.has_update_interval() || param_.update_interval() == 1)
+    Caffe::set_accumulate(false);
+  else
+    Caffe::set_accumulate(true);
   // Scaffolding code
   InitTrainNet();
   InitTestNets();
@@ -186,7 +191,21 @@ void Solver<Dtype>::Solve(const char* resume_file) {
 
     const bool display = param_.display() && iter_ % param_.display() == 0;
     net_->set_debug_info(display && param_.debug_info());
-    Dtype loss = net_->ForwardBackward(bottom_vec);
+
+    // added for allowing bigger batch size
+    Dtype loss = 0;
+    if ( !Caffe::accumulate() )
+      loss = net_->ForwardBackward(bottom_vec);
+    else{
+      for (int acum_num = 0; acum_num < param_.update_interval() - 1; ++acum_num){
+        loss += net_->ForwardBackward(bottom_vec);
+        net_->AccumulateDiff();
+      }
+      loss += net_->ForwardBackward(bottom_vec);
+      net_->UpdateDiff();
+      loss /= Dtype(param_.update_interval());
+    }
+
     if (display) {
       LOG(INFO) << "Iteration " << iter_ << ", loss = " << loss;
       const vector<Blob<Dtype>*>& result = net_->output_blobs();

From 069c0aa4038973fa0cca00ee79bdbd995be5da0d Mon Sep 17 00:00:00 2001
From: zhirongw <xavibrowu@gmail.com>
Date: Sun, 28 Sep 2014 11:07:10 -0400
Subject: [PATCH 6/6] 1. add debug diplay freq 2. add vgg and change googlenet
 init

---
 examples/imagenet/train_caffenet.sh           |   4 +-
 examples/imagenet/train_googlenet.sh          |   2 +-
 examples/imagenet/train_vgg.sh                |   2 +-
 .../bvlc_reference_caffenet/solver.prototxt   |   3 +
 models/googlenet/solver.prototxt              |  13 +-
 models/googlenet/train_val.prototxt           | 122 ++---
 models/vgg/solver.prototxt                    |  18 +
 models/vgg/train_val.prototxt                 | 429 ++++++++++++++++++
 src/caffe/proto/caffe.proto                   |   1 +
 src/caffe/solver.cpp                          |   3 +-
 10 files changed, 526 insertions(+), 71 deletions(-)
 create mode 100644 models/vgg/solver.prototxt
 create mode 100644 models/vgg/train_val.prototxt

diff --git a/examples/imagenet/train_caffenet.sh b/examples/imagenet/train_caffenet.sh
index 1ad46600ddd..80c93fb90e6 100755
--- a/examples/imagenet/train_caffenet.sh
+++ b/examples/imagenet/train_caffenet.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env sh
 
-GOOGLE_LOG_DIR="models/bvlc_reference_caffenet" \
+GOOGLE_LOG_DIR=models/bvlc_reference_caffenet \
 ./build/tools/caffe train \
     --solver=models/bvlc_reference_caffenet/solver.prototxt \
-    --gpu=1
+    --gpu=2
diff --git a/examples/imagenet/train_googlenet.sh b/examples/imagenet/train_googlenet.sh
index 64b5486bd8a..cb1148b8d29 100755
--- a/examples/imagenet/train_googlenet.sh
+++ b/examples/imagenet/train_googlenet.sh
@@ -3,4 +3,4 @@
 GOOGLE_LOG_DIR=models/googlenet \
 ./build/tools/caffe train \
     --solver=models/googlenet/solver.prototxt \
-    --gpu=2
+    --gpu=3
diff --git a/examples/imagenet/train_vgg.sh b/examples/imagenet/train_vgg.sh
index a9673581895..31dc19b1bf2 100755
--- a/examples/imagenet/train_vgg.sh
+++ b/examples/imagenet/train_vgg.sh
@@ -3,4 +3,4 @@
 GOOGLE_LOG_DIR=models/vgg \
 ./build/tools/caffe train \
     --solver=models/vgg/solver.prototxt \
-    --gpu=3
+    --gpu=1
diff --git a/models/bvlc_reference_caffenet/solver.prototxt b/models/bvlc_reference_caffenet/solver.prototxt
index add67f5a1d5..26269654beb 100644
--- a/models/bvlc_reference_caffenet/solver.prototxt
+++ b/models/bvlc_reference_caffenet/solver.prototxt
@@ -12,3 +12,6 @@ weight_decay: 0.0005
 snapshot: 10000
 snapshot_prefix: "models/bvlc_reference_caffenet/caffenet_train"
 solver_mode: GPU
+test_initialization: false
+debug_info: true
+debug_display: 1000
diff --git a/models/googlenet/solver.prototxt b/models/googlenet/solver.prototxt
index c47e6110c55..19f0eaeca1b 100644
--- a/models/googlenet/solver.prototxt
+++ b/models/googlenet/solver.prototxt
@@ -1,15 +1,18 @@
 net: "models/googlenet/train_val.prototxt"
 test_iter: 1000
 test_interval: 1000
-base_lr: 0.001
+update_interval: 2
+base_lr: 0.0002
 lr_policy: "step"
-gamma: 0.1
-stepsize: 100000
+gamma: 0.98
+stepsize: 10
 display: 20
 max_iter: 450000
-momentum: 0.975
+momentum: 0.9
 weight_decay: 0.0005
 snapshot: 10000
 snapshot_prefix: "models/googlenet/googlenet_train"
 solver_mode: GPU
-test_initialization: true
+test_initialization: false
+debug_info: true
+debug_display: 1000
diff --git a/models/googlenet/train_val.prototxt b/models/googlenet/train_val.prototxt
index 4ec1435e09a..c31b5f0fa9a 100644
--- a/models/googlenet/train_val.prototxt
+++ b/models/googlenet/train_val.prototxt
@@ -49,7 +49,7 @@ layers {
     pad: 3
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.015
     }
     bias_filler {
       type: "constant"
@@ -102,7 +102,7 @@ layers {
     group: 2
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -132,7 +132,7 @@ layers {
     group: 2
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.02
     }
     bias_filler {
       type: "constant"
@@ -185,7 +185,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -214,7 +214,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -256,7 +256,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -285,7 +285,7 @@ layers {
     kernel_size: 3
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.04
     }
     bias_filler {
       type: "constant"
@@ -314,7 +314,7 @@ layers {
     kernel_size: 5
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.08
     }
     bias_filler {
       type: "constant"
@@ -343,7 +343,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -384,7 +384,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -413,7 +413,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -455,7 +455,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -484,7 +484,7 @@ layers {
     kernel_size: 3
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.04
     }
     bias_filler {
       type: "constant"
@@ -513,7 +513,7 @@ layers {
     kernel_size: 5
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.08
     }
     bias_filler {
       type: "constant"
@@ -542,7 +542,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -595,7 +595,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -624,7 +624,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -666,7 +666,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -695,7 +695,7 @@ layers {
     kernel_size: 3
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.04
     }
     bias_filler {
       type: "constant"
@@ -724,7 +724,7 @@ layers {
     kernel_size: 5
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.08
     }
     bias_filler {
       type: "constant"
@@ -753,7 +753,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -807,7 +807,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -903,7 +903,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -932,7 +932,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -974,7 +974,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -1003,7 +1003,7 @@ layers {
     kernel_size: 3
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.04
     }
     bias_filler {
       type: "constant"
@@ -1032,7 +1032,7 @@ layers {
     kernel_size: 5
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.08
     }
     bias_filler {
       type: "constant"
@@ -1061,7 +1061,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -1102,7 +1102,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -1131,7 +1131,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -1173,7 +1173,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -1202,7 +1202,7 @@ layers {
     kernel_size: 3
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.04
     }
     bias_filler {
       type: "constant"
@@ -1231,7 +1231,7 @@ layers {
     kernel_size: 5
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.08
     }
     bias_filler {
       type: "constant"
@@ -1260,7 +1260,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -1301,7 +1301,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -1330,7 +1330,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -1372,7 +1372,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -1401,7 +1401,7 @@ layers {
     kernel_size: 3
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.04
     }
     bias_filler {
       type: "constant"
@@ -1430,7 +1430,7 @@ layers {
     kernel_size: 5
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.08
     }
     bias_filler {
       type: "constant"
@@ -1459,7 +1459,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -1491,7 +1491,7 @@ layers {
   bottom: "icp6_out"
   top: "cls2_pool"
   pooling_param {
-    pool: MAX
+    pool: AVE
     kernel_size: 5
     stride: 3
     pad: 0
@@ -1513,7 +1513,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -1609,7 +1609,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -1638,7 +1638,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -1680,7 +1680,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -1709,7 +1709,7 @@ layers {
     kernel_size: 3
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.04
     }
     bias_filler {
       type: "constant"
@@ -1738,7 +1738,7 @@ layers {
     kernel_size: 5
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.08
     }
     bias_filler {
       type: "constant"
@@ -1767,7 +1767,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -1819,7 +1819,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -1848,7 +1848,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -1890,7 +1890,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -1919,7 +1919,7 @@ layers {
     kernel_size: 3
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.04
     }
     bias_filler {
       type: "constant"
@@ -1948,7 +1948,7 @@ layers {
     kernel_size: 5
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.08
     }
     bias_filler {
       type: "constant"
@@ -1977,7 +1977,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -2018,7 +2018,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -2047,7 +2047,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -2089,7 +2089,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -2118,7 +2118,7 @@ layers {
     kernel_size: 3
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.04
     }
     bias_filler {
       type: "constant"
@@ -2147,7 +2147,7 @@ layers {
     kernel_size: 5
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.08
     }
     bias_filler {
       type: "constant"
@@ -2176,7 +2176,7 @@ layers {
     kernel_size: 1
     weight_filler {
       type: "gaussian"
-      std: 0.01
+      std: 0.1
     }
     bias_filler {
       type: "constant"
@@ -2208,7 +2208,7 @@ layers {
   bottom: "icp9_out"
   top: "cls3_pool"
   pooling_param {
-    pool: MAX
+    pool: AVE
     kernel_size: 7
     stride: 1
     pad: 0
diff --git a/models/vgg/solver.prototxt b/models/vgg/solver.prototxt
new file mode 100644
index 00000000000..b2be9e93844
--- /dev/null
+++ b/models/vgg/solver.prototxt
@@ -0,0 +1,18 @@
+net: "models/vgg/train_val.prototxt"
+test_iter: 1000
+test_interval: 4000
+update_interval: 4
+base_lr: 0.002
+lr_policy: "step"
+gamma: 0.1
+stepsize: 200000
+display: 20
+max_iter: 700000
+momentum: 0.9
+weight_decay: 0.0005
+snapshot: 10000
+snapshot_prefix: "models/vgg/vgg_train"
+solver_mode: GPU
+test_initialization: false
+debug_info: true
+debug_display: 1000
diff --git a/models/vgg/train_val.prototxt b/models/vgg/train_val.prototxt
new file mode 100644
index 00000000000..abb0a483d16
--- /dev/null
+++ b/models/vgg/train_val.prototxt
@@ -0,0 +1,429 @@
+name: "VGGNet"
+layers {
+  name: "data"
+  type: DATA
+  top: "data"
+  top: "label"
+  data_param {
+    source: "/home/common/imagenet/ilsvrc12_train_lmdb"
+    backend: LMDB
+    batch_size: 64
+  }
+  transform_param {
+    crop_size: 224
+    mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
+    mirror: true
+  }
+  include: { phase: TRAIN }
+}
+layers {
+  name: "data"
+  type: DATA
+  top: "data"
+  top: "label"
+  data_param {
+    source: "/home/common/imagenet/ilsvrc12_val_lmdb"
+    backend: LMDB
+    batch_size: 50
+  }
+  transform_param {
+    crop_size: 224
+    mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
+    mirror: false
+  }
+  include: { phase: TEST }
+}
+layers {
+  name: "conv1"
+  type: CONVOLUTION
+  bottom: "data"
+  top: "conv1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 64
+    pad: 0
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.03
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu1"
+  type: RELU
+  bottom: "conv1"
+  top: "conv1"
+}
+layers {
+  name: "pool1"
+  type: POOLING
+  bottom: "conv1"
+  top: "pool1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layers {
+  name: "conv2"
+  type: CONVOLUTION
+  bottom: "pool1"
+  top: "conv2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.03
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu2"
+  type: RELU
+  bottom: "conv2"
+  top: "conv2"
+}
+layers {
+  name: "pool2"
+  type: POOLING
+  bottom: "conv2"
+  top: "pool2"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layers {
+  name: "conv3"
+  type: CONVOLUTION
+  bottom: "pool2"
+  top: "conv3"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 256
+    pad: 0
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.02
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu3"
+  type: RELU
+  bottom: "conv3"
+  top: "conv3"
+}
+layers {
+  name: "conv4"
+  type: CONVOLUTION
+  bottom: "conv3"
+  top: "conv4"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.02
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu4"
+  type: RELU
+  bottom: "conv4"
+  top: "conv4"
+}
+layers {
+  name: "pool4"
+  type: POOLING
+  bottom: "conv4"
+  top: "pool4"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layers {
+  name: "conv5"
+  type: CONVOLUTION
+  bottom: "pool4"
+  top: "conv5"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 512
+    pad: 0
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.02
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu5"
+  type: RELU
+  bottom: "conv5"
+  top: "conv5"
+}
+layers {
+  name: "conv6"
+  type: CONVOLUTION
+  bottom: "conv5"
+  top: "conv6"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.02
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu6"
+  type: RELU
+  bottom: "conv6"
+  top: "conv6"
+}
+layers {
+  name: "pool6"
+  type: POOLING
+  bottom: "conv6"
+  top: "pool6"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layers {
+  name: "conv7"
+  type: CONVOLUTION
+  bottom: "pool6"
+  top: "conv7"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 512
+    pad: 0
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.02
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu7"
+  type: RELU
+  bottom: "conv7"
+  top: "conv7"
+}
+layers {
+  name: "conv8"
+  type: CONVOLUTION
+  bottom: "conv7"
+  top: "conv8"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "gaussian"
+      std: 0.02
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu8"
+  type: RELU
+  bottom: "conv8"
+  top: "conv8"
+}
+layers {
+  name: "pool8"
+  type: POOLING
+  bottom: "conv8"
+  top: "pool8"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layers {
+  name: "fc9"
+  type: INNER_PRODUCT
+  bottom: "pool8"
+  top: "fc9"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
+    num_output: 4096
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "relu9"
+  type: RELU
+  bottom: "fc9"
+  top: "fc9"
+}
+layers {
+  name: "drop9"
+  type: DROPOUT
+  bottom: "fc9"
+  top: "fc9"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
+}
+layers {
+  name: "fc10"
+  type: INNER_PRODUCT
+  bottom: "fc9"
+  top: "fc10"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
+    num_output: 4096
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0.
+    }
+  }
+}
+layers {
+  name: "relu10"
+  type: RELU
+  bottom: "fc10"
+  top: "fc10"
+}
+layers {
+  name: "drop10"
+  type: DROPOUT
+  bottom: "fc10"
+  top: "fc10"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
+}
+layers {
+  name: "fc11"
+  type: INNER_PRODUCT
+  bottom: "fc10"
+  top: "fc11"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
+    num_output: 1000
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layers {
+  name: "loss"
+  type: SOFTMAX_LOSS
+  bottom: "fc11"
+  bottom: "label"
+}
+layers {
+  name: "accuracy"
+  type: ACCURACY
+  bottom: "fc11"
+  bottom: "label"
+  top: "accuracy"
+  include: { phase: TEST }
+}
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 188fd8404cd..74b67edaa8e 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -156,6 +156,7 @@ message SolverParameter {
   // If true, print information about the state of the net that may help with
   // debugging learning problems.
   optional bool debug_info = 23 [default = false];
+  optional int32 debug_display = 40;
 
   // If false, don't save a snapshot after training finishes.
   optional bool snapshot_after_train = 28 [default = true];
diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp
index 84fa08e95c3..d4c91d88d41 100644
--- a/src/caffe/solver.cpp
+++ b/src/caffe/solver.cpp
@@ -190,7 +190,8 @@ void Solver<Dtype>::Solve(const char* resume_file) {
     }
 
     const bool display = param_.display() && iter_ % param_.display() == 0;
-    net_->set_debug_info(display && param_.debug_info());
+    const bool debug_display = param_.debug_info() && iter_ % param_.debug_display() == 0;
+    net_->set_debug_info(debug_display);
 
     // added for allowing bigger batch size
     Dtype loss = 0;