From a95ccb6ebf5e22454e2a07e96272bbc0f2add6fd Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Wed, 8 Apr 2020 11:43:22 +0800 Subject: [PATCH] remove old nas examples (#2285) --- .../mnist-nas/classic_mode/config_hpo.yml | 16 - .../mnist-nas/classic_mode/config_nas.yml | 18 - .../trials/mnist-nas/classic_mode/mnist.py | 253 ----------- .../mnist-nas/classic_mode/operators.py | 109 ----- examples/trials/mnist-nas/config_ppo.yml | 19 - .../mnist-nas/darts_mode/config_darts.yml | 18 - .../mnist-nas/darts_mode/mnist-darts.py | 258 ----------- .../trials/mnist-nas/darts_mode/operators.py | 109 ----- .../mnist-nas/enas_mode/config_enas.yml | 19 - .../trials/mnist-nas/enas_mode/mnist-enas.py | 254 ----------- .../trials/mnist-nas/enas_mode/operators.py | 93 ---- .../mnist-nas/oneshot_mode/config_oneshot.yml | 18 - .../mnist-nas/oneshot_mode/mnist-oneshot.py | 253 ----------- .../mnist-nas/oneshot_mode/operators.py | 109 ----- examples/trials/nas_cifar10/README.md | 10 - examples/trials/nas_cifar10/README_zh_CN.md | 9 - .../trials/nas_cifar10/config_paiYarn_ppo.yml | 31 -- .../trials/nas_cifar10/config_pai_ppo.yml | 34 -- examples/trials/nas_cifar10/config_ppo.yml | 24 - examples/trials/nas_cifar10/data/download.sh | 2 - examples/trials/nas_cifar10/macro_cifar10.sh | 31 -- .../trials/nas_cifar10/macro_cifar10_pai.sh | 31 -- examples/trials/nas_cifar10/src/__init__.py | 0 .../nas_cifar10/src/cifar10/__init__.py | 0 .../nas_cifar10/src/cifar10/data_utils.py | 74 --- .../nas_cifar10/src/cifar10/general_child.py | 423 ------------------ .../trials/nas_cifar10/src/cifar10/models.py | 196 -------- .../src/cifar10/nni_child_cifar10.py | 162 ------- .../trials/nas_cifar10/src/cifar10_flags.py | 45 -- examples/trials/nas_cifar10/src/common_ops.py | 255 ----------- examples/trials/nas_cifar10/src/utils.py | 262 ----------- examples/tuners/enas_nni/README.md | 6 - examples/tuners/enas_nni/README_zh_CN.md | 5 - 33 files changed, 3146 deletions(-) delete mode 100644 examples/trials/mnist-nas/classic_mode/config_hpo.yml delete mode 100644 examples/trials/mnist-nas/classic_mode/config_nas.yml delete mode 100644 examples/trials/mnist-nas/classic_mode/mnist.py delete mode 100644 examples/trials/mnist-nas/classic_mode/operators.py delete mode 100644 examples/trials/mnist-nas/config_ppo.yml delete mode 100644 examples/trials/mnist-nas/darts_mode/config_darts.yml delete mode 100644 examples/trials/mnist-nas/darts_mode/mnist-darts.py delete mode 100644 examples/trials/mnist-nas/darts_mode/operators.py delete mode 100644 examples/trials/mnist-nas/enas_mode/config_enas.yml delete mode 100644 examples/trials/mnist-nas/enas_mode/mnist-enas.py delete mode 100644 examples/trials/mnist-nas/enas_mode/operators.py delete mode 100644 examples/trials/mnist-nas/oneshot_mode/config_oneshot.yml delete mode 100644 examples/trials/mnist-nas/oneshot_mode/mnist-oneshot.py delete mode 100644 examples/trials/mnist-nas/oneshot_mode/operators.py delete mode 100644 examples/trials/nas_cifar10/README.md delete mode 100644 examples/trials/nas_cifar10/README_zh_CN.md delete mode 100644 examples/trials/nas_cifar10/config_paiYarn_ppo.yml delete mode 100644 examples/trials/nas_cifar10/config_pai_ppo.yml delete mode 100644 examples/trials/nas_cifar10/config_ppo.yml delete mode 100755 examples/trials/nas_cifar10/data/download.sh delete mode 100644 examples/trials/nas_cifar10/macro_cifar10.sh delete mode 100644 examples/trials/nas_cifar10/macro_cifar10_pai.sh delete mode 100644 examples/trials/nas_cifar10/src/__init__.py delete mode 100644 examples/trials/nas_cifar10/src/cifar10/__init__.py delete mode 100644 examples/trials/nas_cifar10/src/cifar10/data_utils.py delete mode 100644 examples/trials/nas_cifar10/src/cifar10/general_child.py delete mode 100644 examples/trials/nas_cifar10/src/cifar10/models.py delete mode 100644 examples/trials/nas_cifar10/src/cifar10/nni_child_cifar10.py delete mode 100644 examples/trials/nas_cifar10/src/cifar10_flags.py delete mode 100644 examples/trials/nas_cifar10/src/common_ops.py delete mode 100644 examples/trials/nas_cifar10/src/utils.py delete mode 100644 examples/tuners/enas_nni/README.md delete mode 100644 examples/tuners/enas_nni/README_zh_CN.md diff --git a/examples/trials/mnist-nas/classic_mode/config_hpo.yml b/examples/trials/mnist-nas/classic_mode/config_hpo.yml deleted file mode 100644 index 3c04a62f9f..0000000000 --- a/examples/trials/mnist-nas/classic_mode/config_hpo.yml +++ /dev/null @@ -1,16 +0,0 @@ -authorName: default -experimentName: example_mnist -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -#choice: true, false -useAnnotation: true -tuner: - builtinTunerName: TPE -trial: - command: python3 mnist.py --batch_num 200 - codeDir: . - gpuNum: 0 - nasMode: classic_mode diff --git a/examples/trials/mnist-nas/classic_mode/config_nas.yml b/examples/trials/mnist-nas/classic_mode/config_nas.yml deleted file mode 100644 index 75baa72405..0000000000 --- a/examples/trials/mnist-nas/classic_mode/config_nas.yml +++ /dev/null @@ -1,18 +0,0 @@ -authorName: default -experimentName: example_mnist -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -#choice: true, false -useAnnotation: true -tuner: - codeDir: ../../../tuners/random_nas_tuner - classFileName: random_nas_tuner.py - className: RandomNASTuner -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 - nasMode: classic_mode diff --git a/examples/trials/mnist-nas/classic_mode/mnist.py b/examples/trials/mnist-nas/classic_mode/mnist.py deleted file mode 100644 index 377e766e66..0000000000 --- a/examples/trials/mnist-nas/classic_mode/mnist.py +++ /dev/null @@ -1,253 +0,0 @@ -"""A deep MNIST classifier using convolutional layers.""" - -import argparse -import logging -import math -import tempfile -import time - -import tensorflow as tf -from tensorflow.examples.tutorials.mnist import input_data - -import operators as op - -FLAGS = None - -logger = logging.getLogger('mnist_AutoML') - - -class MnistNetwork(object): - ''' - MnistNetwork is for initializing and building basic network for mnist. - ''' - def __init__(self, - channel_1_num, - channel_2_num, - conv_size, - hidden_size, - pool_size, - learning_rate, - x_dim=784, - y_dim=10): - self.channel_1_num = channel_1_num - self.channel_2_num = channel_2_num - self.conv_size = conv_size - self.hidden_size = hidden_size - self.pool_size = pool_size - self.learning_rate = learning_rate - self.x_dim = x_dim - self.y_dim = y_dim - - self.images = tf.placeholder(tf.float32, [None, self.x_dim], name='input_x') - self.labels = tf.placeholder(tf.float32, [None, self.y_dim], name='input_y') - self.keep_prob = tf.placeholder(tf.float32, name='keep_prob') - - self.train_step = None - self.accuracy = None - - def build_network(self): - ''' - Building network for mnist, meanwhile specifying its neural architecture search space - ''' - - # Reshape to use within a convolutional neural net. - # Last dimension is for "features" - there is only one here, since images are - # grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc. - with tf.name_scope('reshape'): - try: - input_dim = int(math.sqrt(self.x_dim)) - except: - print( - 'input dim cannot be sqrt and reshape. input dim: ' + str(self.x_dim)) - logger.debug( - 'input dim cannot be sqrt and reshape. input dim: %s', str(self.x_dim)) - raise - x_image = tf.reshape(self.images, [-1, input_dim, input_dim, 1]) - - """@nni.mutable_layers( - { - layer_choice: [op.conv2d(size=1, in_ch=1, out_ch=self.channel_1_num), - op.conv2d(size=3, in_ch=1, out_ch=self.channel_1_num), - op.twice_conv2d(size=3, in_ch=1, out_ch=self.channel_1_num), - op.twice_conv2d(size=7, in_ch=1, out_ch=self.channel_1_num), - op.dilated_conv(in_ch=1, out_ch=self.channel_1_num), - op.separable_conv(size=3, in_ch=1, out_ch=self.channel_1_num), - op.separable_conv(size=5, in_ch=1, out_ch=self.channel_1_num), - op.separable_conv(size=7, in_ch=1, out_ch=self.channel_1_num)], - fixed_inputs: [x_image], - layer_output: conv1_out - }, - { - layer_choice: [op.post_process(ch_size=self.channel_1_num)], - fixed_inputs: [conv1_out], - layer_output: post1_out - }, - { - layer_choice: [op.max_pool(size=3), - op.max_pool(size=5), - op.max_pool(size=7), - op.avg_pool(size=3), - op.avg_pool(size=5), - op.avg_pool(size=7)], - fixed_inputs: [post1_out], - layer_output: pool1_out - }, - { - layer_choice: [op.conv2d(size=1, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.conv2d(size=3, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.twice_conv2d(size=3, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.twice_conv2d(size=7, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.dilated_conv(in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.separable_conv(size=3, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.separable_conv(size=5, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.separable_conv(size=7, in_ch=self.channel_1_num, out_ch=self.channel_2_num)], - fixed_inputs: [pool1_out], - optional_inputs: [post1_out], - optional_input_size: [0, 1], - layer_output: conv2_out - }, - { - layer_choice: [op.post_process(ch_size=self.channel_2_num)], - fixed_inputs: [conv2_out], - layer_output: post2_out - }, - { - layer_choice: [op.max_pool(size=3), - op.max_pool(size=5), - op.max_pool(size=7), - op.avg_pool(size=3), - op.avg_pool(size=5), - op.avg_pool(size=7)], - fixed_inputs: [post2_out], - optional_inputs: [post1_out, pool1_out], - optional_input_size: [0, 1], - layer_output: pool2_out - } - )""" - - # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image - # is down to 7x7x64 feature maps -- maps this to 1024 features. - last_dim_list = pool2_out.get_shape().as_list() - assert(last_dim_list[1] == last_dim_list[2]) - last_dim = last_dim_list[1] - with tf.name_scope('fc1'): - w_fc1 = op.weight_variable( - [last_dim * last_dim * self.channel_2_num, self.hidden_size]) - b_fc1 = op.bias_variable([self.hidden_size]) - - h_pool2_flat = tf.reshape( - pool2_out, [-1, last_dim * last_dim * self.channel_2_num]) - h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1) - - # Dropout - controls the complexity of the model, prevents co-adaptation of features. - with tf.name_scope('dropout'): - h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob) - - # Map the 1024 features to 10 classes, one for each digit - with tf.name_scope('fc2'): - w_fc2 = op.weight_variable([self.hidden_size, self.y_dim]) - b_fc2 = op.bias_variable([self.y_dim]) - y_conv = tf.matmul(h_fc1_drop, w_fc2) + b_fc2 - - with tf.name_scope('loss'): - cross_entropy = tf.reduce_mean( - tf.nn.softmax_cross_entropy_with_logits(labels=self.labels, logits=y_conv)) - with tf.name_scope('adam_optimizer'): - self.train_step = tf.train.AdamOptimizer( - self.learning_rate).minimize(cross_entropy) - - with tf.name_scope('accuracy'): - correct_prediction = tf.equal( - tf.argmax(y_conv, 1), tf.argmax(self.labels, 1)) - self.accuracy = tf.reduce_mean( - tf.cast(correct_prediction, tf.float32)) - - -def download_mnist_retry(data_dir, max_num_retries=20): - """Try to download mnist dataset and avoid errors""" - for _ in range(max_num_retries): - try: - return input_data.read_data_sets(data_dir, one_hot=True) - except tf.errors.AlreadyExistsError: - time.sleep(1) - raise Exception("Failed to download MNIST.") - -def main(params): - ''' - Main function, build mnist network, run and send result to NNI. - ''' - # Import data - mnist = download_mnist_retry(params['data_dir']) - print('Mnist download data done.') - logger.debug('Mnist download data done.') - - # Create the model - # Build the graph for the deep net - mnist_network = MnistNetwork(channel_1_num=params['channel_1_num'], - channel_2_num=params['channel_2_num'], - conv_size=params['conv_size'], - hidden_size=params['hidden_size'], - pool_size=params['pool_size'], - learning_rate=params['learning_rate']) - mnist_network.build_network() - logger.debug('Mnist build network done.') - - # Write log - graph_location = tempfile.mkdtemp() - logger.debug('Saving graph to: %s', graph_location) - train_writer = tf.summary.FileWriter(graph_location) - train_writer.add_graph(tf.get_default_graph()) - - test_acc = 0.0 - with tf.Session() as sess: - sess.run(tf.global_variables_initializer()) - for i in range(params['batch_num']): - batch = mnist.train.next_batch(params['batch_size']) - mnist_network.train_step.run(feed_dict={mnist_network.images: batch[0], - mnist_network.labels: batch[1], - mnist_network.keep_prob: 1 - params['dropout_rate']} - ) - - if i % 100 == 0: - test_acc = mnist_network.accuracy.eval( - feed_dict={mnist_network.images: mnist.test.images, - mnist_network.labels: mnist.test.labels, - mnist_network.keep_prob: 1.0}) - - """@nni.report_intermediate_result(test_acc)""" - logger.debug('test accuracy %g', test_acc) - logger.debug('Pipe send intermediate result done.') - - test_acc = mnist_network.accuracy.eval( - feed_dict={mnist_network.images: mnist.test.images, - mnist_network.labels: mnist.test.labels, - mnist_network.keep_prob: 1.0}) - - """@nni.report_final_result(test_acc)""" - logger.debug('Final result is %g', test_acc) - logger.debug('Send final result done.') - -def get_params(): - ''' Get parameters from command line ''' - parser = argparse.ArgumentParser() - parser.add_argument("--data_dir", type=str, default='/tmp/tensorflow/mnist/input_data', help="data directory") - parser.add_argument("--dropout_rate", type=float, default=0.5, help="dropout rate") - parser.add_argument("--channel_1_num", type=int, default=32) - parser.add_argument("--channel_2_num", type=int, default=64) - parser.add_argument("--conv_size", type=int, default=5) - parser.add_argument("--pool_size", type=int, default=2) - parser.add_argument("--hidden_size", type=int, default=1024) - parser.add_argument("--learning_rate", type=float, default=1e-4) - parser.add_argument("--batch_num", type=int, default=2000) - parser.add_argument("--batch_size", type=int, default=32) - - args, _ = parser.parse_known_args() - return args - -if __name__ == '__main__': - try: - params = vars(get_params()) - main(params) - except Exception as exception: - logger.exception(exception) - raise diff --git a/examples/trials/mnist-nas/classic_mode/operators.py b/examples/trials/mnist-nas/classic_mode/operators.py deleted file mode 100644 index f870353188..0000000000 --- a/examples/trials/mnist-nas/classic_mode/operators.py +++ /dev/null @@ -1,109 +0,0 @@ -import tensorflow as tf -import math - - -def weight_variable(shape): - """weight_variable generates a weight variable of a given shape.""" - initial = tf.truncated_normal(shape, stddev=0.1) - return tf.Variable(initial) - -def bias_variable(shape): - """bias_variable generates a bias variable of a given shape.""" - initial = tf.constant(0.1, shape=shape) - return tf.Variable(initial) - -def sum_op(inputs): - """sum_op""" - fixed_input = inputs[0][0] - optional_input = inputs[1][0] - fixed_shape = fixed_input.get_shape().as_list() - optional_shape = optional_input.get_shape().as_list() - assert fixed_shape[1] == fixed_shape[2] - assert optional_shape[1] == optional_shape[2] - pool_size = math.ceil(optional_shape[1] / fixed_shape[1]) - pool_out = tf.nn.avg_pool(optional_input, ksize=[1, pool_size, pool_size, 1], strides=[1, pool_size, pool_size, 1], padding='SAME') - conv_matrix = weight_variable([1, 1, optional_shape[3], fixed_shape[3]]) - conv_out = tf.nn.conv2d(pool_out, conv_matrix, strides=[1, 1, 1, 1], padding='SAME') - return fixed_input + conv_out - - -def conv2d(inputs, size=-1, in_ch=-1, out_ch=-1): - """conv2d returns a 2d convolution layer with full stride.""" - if not inputs[1]: - x_input = inputs[0][0] - else: - x_input = sum_op(inputs) - if size in [1, 3]: - w_matrix = weight_variable([size, size, in_ch, out_ch]) - return tf.nn.conv2d(x_input, w_matrix, strides=[1, 1, 1, 1], padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - -def twice_conv2d(inputs, size=-1, in_ch=-1, out_ch=-1): - """twice_conv2d""" - if not inputs[1]: - x_input = inputs[0][0] - else: - x_input = sum_op(inputs) - if size in [3, 7]: - w_matrix1 = weight_variable([1, size, in_ch, int(out_ch/2)]) - out = tf.nn.conv2d(x_input, w_matrix1, strides=[1, 1, 1, 1], padding='SAME') - w_matrix2 = weight_variable([size, 1, int(out_ch/2), out_ch]) - return tf.nn.conv2d(out, w_matrix2, strides=[1, 1, 1, 1], padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - -def dilated_conv(inputs, size=3, in_ch=-1, out_ch=-1): - """dilated_conv""" - if not inputs[1]: - x_input = inputs[0][0] - else: - x_input = sum_op(inputs) - if size == 3: - w_matrix = weight_variable([size, size, in_ch, out_ch]) - return tf.nn.atrous_conv2d(x_input, w_matrix, rate=2, padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - -def separable_conv(inputs, size=-1, in_ch=-1, out_ch=-1): - """separable_conv""" - if not inputs[1]: - x_input = inputs[0][0] - else: - x_input = sum_op(inputs) - if size in [3, 5, 7]: - depth_matrix = weight_variable([size, size, in_ch, 1]) - point_matrix = weight_variable([1, 1, 1*in_ch, out_ch]) - return tf.nn.separable_conv2d(x_input, depth_matrix, point_matrix, strides=[1, 1, 1, 1], padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - - -def avg_pool(inputs, size=-1): - """avg_pool downsamples a feature map.""" - if not inputs[1]: - x_input = inputs[0][0] - else: - x_input = sum_op(inputs) - if size in [3, 5, 7]: - return tf.nn.avg_pool(x_input, ksize=[1, size, size, 1], strides=[1, size, size, 1], padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - -def max_pool(inputs, size=-1): - """max_pool downsamples a feature map.""" - if not inputs[1]: - x_input = inputs[0][0] - else: - x_input = sum_op(inputs) - if size in [3, 5, 7]: - return tf.nn.max_pool(x_input, ksize=[1, size, size, 1], strides=[1, size, size, 1], padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - - -def post_process(inputs, ch_size=-1): - """post_process""" - x_input = inputs[0][0] - bias_matrix = bias_variable([ch_size]) - return tf.nn.relu(x_input + bias_matrix) diff --git a/examples/trials/mnist-nas/config_ppo.yml b/examples/trials/mnist-nas/config_ppo.yml deleted file mode 100644 index 9be8e78570..0000000000 --- a/examples/trials/mnist-nas/config_ppo.yml +++ /dev/null @@ -1,19 +0,0 @@ -authorName: NNI-example -experimentName: example_mnist -trialConcurrency: 1 -maxExecDuration: 100h -maxTrialNum: 10000 -#choice: local, remote, pai -trainingServicePlatform: local -#choice: true, false -useAnnotation: true -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - #SMAC, PPO (SMAC and PPO should be installed through nnictl) - builtinTunerName: PPOTuner - classArgs: - optimize_mode: maximize -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 diff --git a/examples/trials/mnist-nas/darts_mode/config_darts.yml b/examples/trials/mnist-nas/darts_mode/config_darts.yml deleted file mode 100644 index 34af310b80..0000000000 --- a/examples/trials/mnist-nas/darts_mode/config_darts.yml +++ /dev/null @@ -1,18 +0,0 @@ -authorName: default -experimentName: example_mnist -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -#choice: true, false -useAnnotation: true -tuner: - codeDir: ../../../tuners/random_nas_tuner - classFileName: random_nas_tuner.py - className: RandomNASTuner -trial: - command: python3 mnist-darts.py - codeDir: . - gpuNum: 0 - nasMode: oneshot_mode diff --git a/examples/trials/mnist-nas/darts_mode/mnist-darts.py b/examples/trials/mnist-nas/darts_mode/mnist-darts.py deleted file mode 100644 index c3ef35bf14..0000000000 --- a/examples/trials/mnist-nas/darts_mode/mnist-darts.py +++ /dev/null @@ -1,258 +0,0 @@ -"""A deep MNIST classifier using convolutional layers.""" - -import argparse -import logging -import math -import tempfile -import time - -import tensorflow as tf -from tensorflow.examples.tutorials.mnist import input_data - -import operators as op - -FLAGS = None - -logger = logging.getLogger('mnist_AutoML') - - -class MnistNetwork(object): - ''' - MnistNetwork is for initializing and building basic network for mnist. - ''' - def __init__(self, - channel_1_num, - channel_2_num, - conv_size, - hidden_size, - pool_size, - learning_rate, - x_dim=784, - y_dim=10): - self.channel_1_num = channel_1_num - self.channel_2_num = channel_2_num - self.conv_size = conv_size - self.hidden_size = hidden_size - self.pool_size = pool_size - self.learning_rate = learning_rate - self.x_dim = x_dim - self.y_dim = y_dim - - self.images = tf.placeholder(tf.float32, [None, self.x_dim], name='input_x') - self.labels = tf.placeholder(tf.float32, [None, self.y_dim], name='input_y') - self.keep_prob = tf.placeholder(tf.float32, name='keep_prob') - - self.train_step = None - self.accuracy = None - - def build_network(self): - ''' - Building network for mnist, meanwhile specifying its neural architecture search space - ''' - - # Reshape to use within a convolutional neural net. - # Last dimension is for "features" - there is only one here, since images are - # grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc. - with tf.name_scope('reshape'): - try: - input_dim = int(math.sqrt(self.x_dim)) - except: - print( - 'input dim cannot be sqrt and reshape. input dim: ' + str(self.x_dim)) - logger.debug( - 'input dim cannot be sqrt and reshape. input dim: %s', str(self.x_dim)) - raise - x_image = tf.reshape(self.images, [-1, input_dim, input_dim, 1]) - - """@nni.mutable_layers( - { - layer_choice: [op.conv2d(size=1, in_ch=1, out_ch=self.channel_1_num), - op.conv2d(size=3, in_ch=1, out_ch=self.channel_1_num), - op.twice_conv2d(size=3, in_ch=1, out_ch=self.channel_1_num), - op.twice_conv2d(size=7, in_ch=1, out_ch=self.channel_1_num), - op.dilated_conv(in_ch=1, out_ch=self.channel_1_num), - op.separable_conv(size=3, in_ch=1, out_ch=self.channel_1_num), - op.separable_conv(size=5, in_ch=1, out_ch=self.channel_1_num), - op.separable_conv(size=7, in_ch=1, out_ch=self.channel_1_num)], - fixed_inputs: [x_image], - layer_output: conv1_out - }, - { - layer_choice: [op.post_process(ch_size=self.channel_1_num)], - fixed_inputs: [conv1_out], - layer_output: post1_out - }, - { - layer_choice: [op.max_pool(size=3), - op.max_pool(size=5), - op.max_pool(size=7), - op.avg_pool(size=3), - op.avg_pool(size=5), - op.avg_pool(size=7)], - fixed_inputs: [post1_out], - layer_output: pool1_out - }, - { - layer_choice: [op.conv2d(size=1, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.conv2d(size=3, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.twice_conv2d(size=3, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.twice_conv2d(size=7, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.dilated_conv(in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.separable_conv(size=3, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.separable_conv(size=5, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.separable_conv(size=7, in_ch=self.channel_1_num, out_ch=self.channel_2_num)], - fixed_inputs: [pool1_out], - optional_inputs: [post1_out], - optional_input_size: [0, 1], - layer_output: conv2_out - }, - { - layer_choice: [op.post_process(ch_size=self.channel_2_num)], - fixed_inputs: [conv2_out], - layer_output: post2_out - }, - { - layer_choice: [op.max_pool(size=3), - op.max_pool(size=5), - op.max_pool(size=7), - op.avg_pool(size=3), - op.avg_pool(size=5), - op.avg_pool(size=7)], - fixed_inputs: [post2_out], - optional_inputs: [post1_out, pool1_out], - optional_input_size: [0, 1], - layer_output: pool2_out - } - )""" - - # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image - # is down to 7x7x64 feature maps -- maps this to 1024 features. - last_dim_list = pool2_out.get_shape().as_list() - assert(last_dim_list[1] == last_dim_list[2]) - last_dim = last_dim_list[1] - with tf.name_scope('fc1'): - w_fc1 = op.weight_variable( - [last_dim * last_dim * self.channel_2_num, self.hidden_size]) - b_fc1 = op.bias_variable([self.hidden_size]) - - h_pool2_flat = tf.reshape( - pool2_out, [-1, last_dim * last_dim * self.channel_2_num]) - h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1) - - # Dropout - controls the complexity of the model, prevents co-adaptation of features. - with tf.name_scope('dropout'): - h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob) - - # Map the 1024 features to 10 classes, one for each digit - with tf.name_scope('fc2'): - w_fc2 = op.weight_variable([self.hidden_size, self.y_dim]) - b_fc2 = op.bias_variable([self.y_dim]) - y_conv = tf.matmul(h_fc1_drop, w_fc2) + b_fc2 - - with tf.name_scope('loss'): - self.cross_entropy = tf.reduce_mean( - tf.nn.softmax_cross_entropy_with_logits(labels=self.labels, logits=y_conv)) - with tf.name_scope('adam_optimizer'): - self.train_step = tf.train.AdamOptimizer( - self.learning_rate).minimize(self.cross_entropy) - - with tf.name_scope('accuracy'): - correct_prediction = tf.equal( - tf.argmax(y_conv, 1), tf.argmax(self.labels, 1)) - self.accuracy = tf.reduce_mean( - tf.cast(correct_prediction, tf.float32)) - - -def download_mnist_retry(data_dir, max_num_retries=20): - """Try to download mnist dataset and avoid errors""" - for _ in range(max_num_retries): - try: - return input_data.read_data_sets(data_dir, one_hot=True) - except tf.errors.AlreadyExistsError: - time.sleep(1) - raise Exception("Failed to download MNIST.") - -def main(params): - ''' - Main function, build mnist network, run and send result to NNI. - ''' - # Import data - mnist = download_mnist_retry(params['data_dir']) - print('Mnist download data done.') - logger.debug('Mnist download data done.') - - # Create the model - # Build the graph for the deep net - mnist_network = MnistNetwork(channel_1_num=params['channel_1_num'], - channel_2_num=params['channel_2_num'], - conv_size=params['conv_size'], - hidden_size=params['hidden_size'], - pool_size=params['pool_size'], - learning_rate=params['learning_rate']) - mnist_network.build_network() - logger.debug('Mnist build network done.') - - # Write log - graph_location = tempfile.mkdtemp() - logger.debug('Saving graph to: %s', graph_location) - train_writer = tf.summary.FileWriter(graph_location) - train_writer.add_graph(tf.get_default_graph()) - - test_acc = 0.0 - with tf.Session() as sess: - sess.run(tf.global_variables_initializer()) - for i in range(params['batch_num']): - batch = mnist.train.next_batch(params['batch_size']) - feed_dict={mnist_network.images: batch[0], - mnist_network.labels: batch[1], - mnist_network.keep_prob: 1 - params['dropout_rate']} - """@nni.training_update(tf, sess, mnist_network.cross_entropy)""" - batch = mnist.train.next_batch(params['batch_size']) - feed_dict={mnist_network.images: batch[0], - mnist_network.labels: batch[1], - mnist_network.keep_prob: 1 - params['dropout_rate']} - mnist_network.train_step.run(feed_dict=feed_dict) - - if i % 100 == 0: - test_acc = mnist_network.accuracy.eval( - feed_dict={mnist_network.images: mnist.test.images, - mnist_network.labels: mnist.test.labels, - mnist_network.keep_prob: 1.0}) - - """@nni.report_intermediate_result(test_acc)""" - logger.debug('test accuracy %g', test_acc) - logger.debug('Pipe send intermediate result done.') - - test_acc = mnist_network.accuracy.eval( - feed_dict={mnist_network.images: mnist.test.images, - mnist_network.labels: mnist.test.labels, - mnist_network.keep_prob: 1.0}) - - """@nni.report_final_result(test_acc)""" - logger.debug('Final result is %g', test_acc) - logger.debug('Send final result done.') - -def get_params(): - ''' Get parameters from command line ''' - parser = argparse.ArgumentParser() - parser.add_argument("--data_dir", type=str, default='/tmp/tensorflow/mnist/input_data', help="data directory") - parser.add_argument("--dropout_rate", type=float, default=0.5, help="dropout rate") - parser.add_argument("--channel_1_num", type=int, default=32) - parser.add_argument("--channel_2_num", type=int, default=64) - parser.add_argument("--conv_size", type=int, default=5) - parser.add_argument("--pool_size", type=int, default=2) - parser.add_argument("--hidden_size", type=int, default=1024) - parser.add_argument("--learning_rate", type=float, default=1e-4) - parser.add_argument("--batch_num", type=int, default=2000) - parser.add_argument("--batch_size", type=int, default=32) - - args, _ = parser.parse_known_args() - return args - -if __name__ == '__main__': - try: - params = vars(get_params()) - main(params) - except Exception as exception: - logger.exception(exception) - raise diff --git a/examples/trials/mnist-nas/darts_mode/operators.py b/examples/trials/mnist-nas/darts_mode/operators.py deleted file mode 100644 index ce7b8650ac..0000000000 --- a/examples/trials/mnist-nas/darts_mode/operators.py +++ /dev/null @@ -1,109 +0,0 @@ -import tensorflow as tf -import math - - -def weight_variable(shape): - """weight_variable generates a weight variable of a given shape.""" - initial = tf.truncated_normal(shape, stddev=0.1) - return tf.Variable(initial) - -def bias_variable(shape): - """bias_variable generates a bias variable of a given shape.""" - initial = tf.constant(0.1, shape=shape) - return tf.Variable(initial) - -def sum_op(inputs): - """sum_op""" - fixed_input = inputs[0][0] - optional_input = tf.concat(inputs[1], axis=3) - fixed_shape = fixed_input.get_shape().as_list() - optional_shape = optional_input.get_shape().as_list() - assert fixed_shape[1] == fixed_shape[2] - assert optional_shape[1] == optional_shape[2] - pool_size = math.ceil(optional_shape[1] / fixed_shape[1]) - pool_out = tf.nn.avg_pool(optional_input, ksize=[1, pool_size, pool_size, 1], strides=[1, pool_size, pool_size, 1], padding='SAME') - conv_matrix = weight_variable([1, 1, optional_shape[3], fixed_shape[3]]) - conv_out = tf.nn.conv2d(pool_out, conv_matrix, strides=[1, 1, 1, 1], padding='SAME') - return fixed_input + conv_out - - -def conv2d(inputs, size=-1, in_ch=-1, out_ch=-1): - """conv2d returns a 2d convolution layer with full stride.""" - if not inputs[1]: - x_input = inputs[0][0] - else: - x_input = sum_op(inputs) - if size in [1, 3]: - w_matrix = weight_variable([size, size, in_ch, out_ch]) - return tf.nn.conv2d(x_input, w_matrix, strides=[1, 1, 1, 1], padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - -def twice_conv2d(inputs, size=-1, in_ch=-1, out_ch=-1): - """twice_conv2d""" - if not inputs[1]: - x_input = inputs[0][0] - else: - x_input = sum_op(inputs) - if size in [3, 7]: - w_matrix1 = weight_variable([1, size, in_ch, int(out_ch/2)]) - out = tf.nn.conv2d(x_input, w_matrix1, strides=[1, 1, 1, 1], padding='SAME') - w_matrix2 = weight_variable([size, 1, int(out_ch/2), out_ch]) - return tf.nn.conv2d(out, w_matrix2, strides=[1, 1, 1, 1], padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - -def dilated_conv(inputs, size=3, in_ch=-1, out_ch=-1): - """dilated_conv""" - if not inputs[1]: - x_input = inputs[0][0] - else: - x_input = sum_op(inputs) - if size == 3: - w_matrix = weight_variable([size, size, in_ch, out_ch]) - return tf.nn.atrous_conv2d(x_input, w_matrix, rate=2, padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - -def separable_conv(inputs, size=-1, in_ch=-1, out_ch=-1): - """separable_conv""" - if not inputs[1]: - x_input = inputs[0][0] - else: - x_input = sum_op(inputs) - if size in [3, 5, 7]: - depth_matrix = weight_variable([size, size, in_ch, 1]) - point_matrix = weight_variable([1, 1, 1*in_ch, out_ch]) - return tf.nn.separable_conv2d(x_input, depth_matrix, point_matrix, strides=[1, 1, 1, 1], padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - - -def avg_pool(inputs, size=-1): - """avg_pool downsamples a feature map.""" - if not inputs[1]: - x_input = inputs[0][0] - else: - x_input = sum_op(inputs) - if size in [3, 5, 7]: - return tf.nn.avg_pool(x_input, ksize=[1, size, size, 1], strides=[1, 1, 1, 1], padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - -def max_pool(inputs, size=-1): - """max_pool downsamples a feature map.""" - if not inputs[1]: - x_input = inputs[0][0] - else: - x_input = sum_op(inputs) - if size in [3, 5, 7]: - return tf.nn.max_pool(x_input, ksize=[1, size, size, 1], strides=[1, 1, 1, 1], padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - - -def post_process(inputs, ch_size=-1): - """post_process""" - x_input = inputs[0][0] - bias_matrix = bias_variable([ch_size]) - return tf.nn.relu(x_input + bias_matrix) diff --git a/examples/trials/mnist-nas/enas_mode/config_enas.yml b/examples/trials/mnist-nas/enas_mode/config_enas.yml deleted file mode 100644 index c1d659350e..0000000000 --- a/examples/trials/mnist-nas/enas_mode/config_enas.yml +++ /dev/null @@ -1,19 +0,0 @@ -authorName: default -experimentName: example_mnist -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -#choice: true, false -useAnnotation: true -multiPhase: true -tuner: - codeDir: ../../../tuners/random_nas_tuner - classFileName: random_nas_tuner.py - className: RandomNASTuner -trial: - command: python3 mnist-enas.py - codeDir: . - gpuNum: 0 - nasMode: enas_mode diff --git a/examples/trials/mnist-nas/enas_mode/mnist-enas.py b/examples/trials/mnist-nas/enas_mode/mnist-enas.py deleted file mode 100644 index bb82b1e426..0000000000 --- a/examples/trials/mnist-nas/enas_mode/mnist-enas.py +++ /dev/null @@ -1,254 +0,0 @@ -"""A deep MNIST classifier using convolutional layers.""" - -import argparse -import logging -import math -import tempfile -import time - -import tensorflow as tf -from tensorflow.examples.tutorials.mnist import input_data - -import operators as op - -FLAGS = None - -logger = logging.getLogger('mnist_AutoML') - - -class MnistNetwork(object): - ''' - MnistNetwork is for initializing and building basic network for mnist. - ''' - def __init__(self, - channel_1_num, - channel_2_num, - conv_size, - hidden_size, - pool_size, - learning_rate, - x_dim=784, - y_dim=10): - self.channel_1_num = channel_1_num - self.channel_2_num = channel_2_num - self.conv_size = conv_size - self.hidden_size = hidden_size - self.pool_size = pool_size - self.learning_rate = learning_rate - self.x_dim = x_dim - self.y_dim = y_dim - - self.images = tf.placeholder(tf.float32, [None, self.x_dim], name='input_x') - self.labels = tf.placeholder(tf.float32, [None, self.y_dim], name='input_y') - self.keep_prob = tf.placeholder(tf.float32, name='keep_prob') - - self.train_step = None - self.accuracy = None - - def build_network(self): - ''' - Building network for mnist, meanwhile specifying its neural architecture search space - ''' - - # Reshape to use within a convolutional neural net. - # Last dimension is for "features" - there is only one here, since images are - # grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc. - with tf.name_scope('reshape'): - try: - input_dim = int(math.sqrt(self.x_dim)) - except: - print( - 'input dim cannot be sqrt and reshape. input dim: ' + str(self.x_dim)) - logger.debug( - 'input dim cannot be sqrt and reshape. input dim: %s', str(self.x_dim)) - raise - x_image = tf.reshape(self.images, [-1, input_dim, input_dim, 1]) - - """@nni.mutable_layers( - { - layer_choice: [op.conv2d(size=1, in_ch=1, out_ch=self.channel_1_num), - op.conv2d(size=3, in_ch=1, out_ch=self.channel_1_num), - op.twice_conv2d(size=3, in_ch=1, out_ch=self.channel_1_num), - op.twice_conv2d(size=7, in_ch=1, out_ch=self.channel_1_num), - op.dilated_conv(in_ch=1, out_ch=self.channel_1_num), - op.separable_conv(size=3, in_ch=1, out_ch=self.channel_1_num), - op.separable_conv(size=5, in_ch=1, out_ch=self.channel_1_num), - op.separable_conv(size=7, in_ch=1, out_ch=self.channel_1_num)], - fixed_inputs: [x_image], - layer_output: conv1_out - }, - { - layer_choice: [op.post_process(ch_size=self.channel_1_num)], - fixed_inputs: [conv1_out], - layer_output: post1_out - }, - { - layer_choice: [op.max_pool(size=3), - op.max_pool(size=5), - op.max_pool(size=7), - op.avg_pool(size=3), - op.avg_pool(size=5), - op.avg_pool(size=7)], - fixed_inputs: [post1_out], - layer_output: pool1_out - }, - { - layer_choice: [op.conv2d(size=1, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.conv2d(size=3, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.twice_conv2d(size=3, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.twice_conv2d(size=7, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.dilated_conv(in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.separable_conv(size=3, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.separable_conv(size=5, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.separable_conv(size=7, in_ch=self.channel_1_num, out_ch=self.channel_2_num)], - fixed_inputs: [pool1_out], - optional_inputs: [post1_out], - optional_input_size: [0, 1], - layer_output: conv2_out - }, - { - layer_choice: [op.post_process(ch_size=self.channel_2_num)], - fixed_inputs: [conv2_out], - layer_output: post2_out - }, - { - layer_choice: [op.max_pool(size=3), - op.max_pool(size=5), - op.max_pool(size=7), - op.avg_pool(size=3), - op.avg_pool(size=5), - op.avg_pool(size=7)], - fixed_inputs: [post2_out], - optional_inputs: [post1_out, pool1_out], - optional_input_size: [0, 1], - layer_output: pool2_out - } - )""" - - # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image - # is down to 7x7x64 feature maps -- maps this to 1024 features. - last_dim_list = pool2_out.get_shape().as_list() - assert(last_dim_list[1] == last_dim_list[2]) - last_dim = last_dim_list[1] - with tf.name_scope('fc1'): - w_fc1 = op.weight_variable( - [last_dim * last_dim * self.channel_2_num, self.hidden_size]) - b_fc1 = op.bias_variable([self.hidden_size]) - - h_pool2_flat = tf.reshape( - pool2_out, [-1, last_dim * last_dim * self.channel_2_num]) - h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1) - - # Dropout - controls the complexity of the model, prevents co-adaptation of features. - with tf.name_scope('dropout'): - h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob) - - # Map the 1024 features to 10 classes, one for each digit - with tf.name_scope('fc2'): - w_fc2 = op.weight_variable([self.hidden_size, self.y_dim]) - b_fc2 = op.bias_variable([self.y_dim]) - y_conv = tf.matmul(h_fc1_drop, w_fc2) + b_fc2 - - with tf.name_scope('loss'): - cross_entropy = tf.reduce_mean( - tf.nn.softmax_cross_entropy_with_logits(labels=self.labels, logits=y_conv)) - with tf.name_scope('adam_optimizer'): - self.train_step = tf.train.AdamOptimizer( - self.learning_rate).minimize(cross_entropy) - - with tf.name_scope('accuracy'): - correct_prediction = tf.equal( - tf.argmax(y_conv, 1), tf.argmax(self.labels, 1)) - self.accuracy = tf.reduce_mean( - tf.cast(correct_prediction, tf.float32)) - - -def download_mnist_retry(data_dir, max_num_retries=20): - """Try to download mnist dataset and avoid errors""" - for _ in range(max_num_retries): - try: - return input_data.read_data_sets(data_dir, one_hot=True) - except tf.errors.AlreadyExistsError: - time.sleep(1) - raise Exception("Failed to download MNIST.") - -def main(params): - ''' - Main function, build mnist network, run and send result to NNI. - ''' - # Import data - mnist = download_mnist_retry(params['data_dir']) - print('Mnist download data done.') - logger.debug('Mnist download data done.') - - # Create the model - # Build the graph for the deep net - mnist_network = MnistNetwork(channel_1_num=params['channel_1_num'], - channel_2_num=params['channel_2_num'], - conv_size=params['conv_size'], - hidden_size=params['hidden_size'], - pool_size=params['pool_size'], - learning_rate=params['learning_rate']) - mnist_network.build_network() - logger.debug('Mnist build network done.') - - # Write log - graph_location = tempfile.mkdtemp() - logger.debug('Saving graph to: %s', graph_location) - train_writer = tf.summary.FileWriter(graph_location) - train_writer.add_graph(tf.get_default_graph()) - - test_acc = 0.0 - with tf.Session() as sess: - sess.run(tf.global_variables_initializer()) - for i in range(params['batch_num']): - """@nni.training_update(tf, sess)""" - batch = mnist.train.next_batch(params['batch_size']) - mnist_network.train_step.run(feed_dict={mnist_network.images: batch[0], - mnist_network.labels: batch[1], - mnist_network.keep_prob: 1 - params['dropout_rate']} - ) - - if i % 100 == 0: - test_acc = mnist_network.accuracy.eval( - feed_dict={mnist_network.images: mnist.test.images, - mnist_network.labels: mnist.test.labels, - mnist_network.keep_prob: 1.0}) - - """@nni.report_intermediate_result(test_acc)""" - logger.debug('test accuracy %g', test_acc) - logger.debug('Pipe send intermediate result done.') - - test_acc = mnist_network.accuracy.eval( - feed_dict={mnist_network.images: mnist.test.images, - mnist_network.labels: mnist.test.labels, - mnist_network.keep_prob: 1.0}) - - """@nni.report_final_result(test_acc)""" - logger.debug('Final result is %g', test_acc) - logger.debug('Send final result done.') - -def get_params(): - ''' Get parameters from command line ''' - parser = argparse.ArgumentParser() - parser.add_argument("--data_dir", type=str, default='/tmp/tensorflow/mnist/input_data', help="data directory") - parser.add_argument("--dropout_rate", type=float, default=0.5, help="dropout rate") - parser.add_argument("--channel_1_num", type=int, default=32) - parser.add_argument("--channel_2_num", type=int, default=64) - parser.add_argument("--conv_size", type=int, default=5) - parser.add_argument("--pool_size", type=int, default=2) - parser.add_argument("--hidden_size", type=int, default=1024) - parser.add_argument("--learning_rate", type=float, default=1e-4) - parser.add_argument("--batch_num", type=int, default=2000) - parser.add_argument("--batch_size", type=int, default=32) - - args, _ = parser.parse_known_args() - return args - -if __name__ == '__main__': - try: - params = vars(get_params()) - main(params) - except Exception as exception: - logger.exception(exception) - raise diff --git a/examples/trials/mnist-nas/enas_mode/operators.py b/examples/trials/mnist-nas/enas_mode/operators.py deleted file mode 100644 index 8eb3dc12c6..0000000000 --- a/examples/trials/mnist-nas/enas_mode/operators.py +++ /dev/null @@ -1,93 +0,0 @@ -import tensorflow as tf -import math - - -def weight_variable(shape): - """weight_variable generates a weight variable of a given shape.""" - initial = tf.truncated_normal(shape, stddev=0.1) - return tf.Variable(initial) - -def bias_variable(shape): - """bias_variable generates a bias variable of a given shape.""" - initial = tf.constant(0.1, shape=shape) - return tf.Variable(initial) - -def sum_op(inputs): - """sum_op""" - fixed_input = inputs[0][0] - optional_input = tf.reduce_sum(inputs[1], axis=0) - if len(optional_input.get_shape()) < 1: - return fixed_input - fixed_shape = fixed_input.get_shape().as_list() - optional_shape = optional_input.get_shape().as_list() - assert fixed_shape[1] == fixed_shape[2] - assert optional_shape[1] == optional_shape[2] - pool_size = math.ceil(optional_shape[1] / fixed_shape[1]) - pool_out = tf.nn.avg_pool(optional_input, ksize=[1, pool_size, pool_size, 1], strides=[1, pool_size, pool_size, 1], padding='SAME') - conv_matrix = weight_variable([1, 1, optional_shape[3], fixed_shape[3]]) - conv_out = tf.nn.conv2d(pool_out, conv_matrix, strides=[1, 1, 1, 1], padding='SAME') - return fixed_input + conv_out - - -def conv2d(inputs, size=-1, in_ch=-1, out_ch=-1): - """conv2d returns a 2d convolution layer with full stride.""" - x_input = sum_op(inputs) - if size in [1, 3]: - w_matrix = weight_variable([size, size, in_ch, out_ch]) - return tf.nn.conv2d(x_input, w_matrix, strides=[1, 1, 1, 1], padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - -def twice_conv2d(inputs, size=-1, in_ch=-1, out_ch=-1): - """twice_conv2d""" - x_input = sum_op(inputs) - if size in [3, 7]: - w_matrix1 = weight_variable([1, size, in_ch, int(out_ch/2)]) - out = tf.nn.conv2d(x_input, w_matrix1, strides=[1, 1, 1, 1], padding='SAME') - w_matrix2 = weight_variable([size, 1, int(out_ch/2), out_ch]) - return tf.nn.conv2d(out, w_matrix2, strides=[1, 1, 1, 1], padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - -def dilated_conv(inputs, size=3, in_ch=-1, out_ch=-1): - """dilated_conv""" - x_input = sum_op(inputs) - if size == 3: - w_matrix = weight_variable([size, size, in_ch, out_ch]) - return tf.nn.atrous_conv2d(x_input, w_matrix, rate=2, padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - -def separable_conv(inputs, size=-1, in_ch=-1, out_ch=-1): - """separable_conv""" - x_input = sum_op(inputs) - if size in [3, 5, 7]: - depth_matrix = weight_variable([size, size, in_ch, 1]) - point_matrix = weight_variable([1, 1, 1*in_ch, out_ch]) - return tf.nn.separable_conv2d(x_input, depth_matrix, point_matrix, strides=[1, 1, 1, 1], padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - - -def avg_pool(inputs, size=-1): - """avg_pool downsamples a feature map.""" - x_input = sum_op(inputs) - if size in [3, 5, 7]: - return tf.nn.avg_pool(x_input, ksize=[1, size, size, 1], strides=[1, 1, 1, 1], padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - -def max_pool(inputs, size=-1): - """max_pool downsamples a feature map.""" - x_input = sum_op(inputs) - if size in [3, 5, 7]: - return tf.nn.max_pool(x_input, ksize=[1, size, size, 1], strides=[1, 1, 1, 1], padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - - -def post_process(inputs, ch_size=-1): - """post_process""" - x_input = inputs[0][0] - bias_matrix = bias_variable([ch_size]) - return tf.nn.relu(x_input + bias_matrix) diff --git a/examples/trials/mnist-nas/oneshot_mode/config_oneshot.yml b/examples/trials/mnist-nas/oneshot_mode/config_oneshot.yml deleted file mode 100644 index 966e2e556a..0000000000 --- a/examples/trials/mnist-nas/oneshot_mode/config_oneshot.yml +++ /dev/null @@ -1,18 +0,0 @@ -authorName: default -experimentName: example_mnist -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -#choice: true, false -useAnnotation: true -tuner: - codeDir: ../../../tuners/random_nas_tuner - classFileName: random_nas_tuner.py - className: RandomNASTuner -trial: - command: python3 mnist-oneshot.py - codeDir: . - gpuNum: 0 - nasMode: oneshot_mode diff --git a/examples/trials/mnist-nas/oneshot_mode/mnist-oneshot.py b/examples/trials/mnist-nas/oneshot_mode/mnist-oneshot.py deleted file mode 100644 index 377e766e66..0000000000 --- a/examples/trials/mnist-nas/oneshot_mode/mnist-oneshot.py +++ /dev/null @@ -1,253 +0,0 @@ -"""A deep MNIST classifier using convolutional layers.""" - -import argparse -import logging -import math -import tempfile -import time - -import tensorflow as tf -from tensorflow.examples.tutorials.mnist import input_data - -import operators as op - -FLAGS = None - -logger = logging.getLogger('mnist_AutoML') - - -class MnistNetwork(object): - ''' - MnistNetwork is for initializing and building basic network for mnist. - ''' - def __init__(self, - channel_1_num, - channel_2_num, - conv_size, - hidden_size, - pool_size, - learning_rate, - x_dim=784, - y_dim=10): - self.channel_1_num = channel_1_num - self.channel_2_num = channel_2_num - self.conv_size = conv_size - self.hidden_size = hidden_size - self.pool_size = pool_size - self.learning_rate = learning_rate - self.x_dim = x_dim - self.y_dim = y_dim - - self.images = tf.placeholder(tf.float32, [None, self.x_dim], name='input_x') - self.labels = tf.placeholder(tf.float32, [None, self.y_dim], name='input_y') - self.keep_prob = tf.placeholder(tf.float32, name='keep_prob') - - self.train_step = None - self.accuracy = None - - def build_network(self): - ''' - Building network for mnist, meanwhile specifying its neural architecture search space - ''' - - # Reshape to use within a convolutional neural net. - # Last dimension is for "features" - there is only one here, since images are - # grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc. - with tf.name_scope('reshape'): - try: - input_dim = int(math.sqrt(self.x_dim)) - except: - print( - 'input dim cannot be sqrt and reshape. input dim: ' + str(self.x_dim)) - logger.debug( - 'input dim cannot be sqrt and reshape. input dim: %s', str(self.x_dim)) - raise - x_image = tf.reshape(self.images, [-1, input_dim, input_dim, 1]) - - """@nni.mutable_layers( - { - layer_choice: [op.conv2d(size=1, in_ch=1, out_ch=self.channel_1_num), - op.conv2d(size=3, in_ch=1, out_ch=self.channel_1_num), - op.twice_conv2d(size=3, in_ch=1, out_ch=self.channel_1_num), - op.twice_conv2d(size=7, in_ch=1, out_ch=self.channel_1_num), - op.dilated_conv(in_ch=1, out_ch=self.channel_1_num), - op.separable_conv(size=3, in_ch=1, out_ch=self.channel_1_num), - op.separable_conv(size=5, in_ch=1, out_ch=self.channel_1_num), - op.separable_conv(size=7, in_ch=1, out_ch=self.channel_1_num)], - fixed_inputs: [x_image], - layer_output: conv1_out - }, - { - layer_choice: [op.post_process(ch_size=self.channel_1_num)], - fixed_inputs: [conv1_out], - layer_output: post1_out - }, - { - layer_choice: [op.max_pool(size=3), - op.max_pool(size=5), - op.max_pool(size=7), - op.avg_pool(size=3), - op.avg_pool(size=5), - op.avg_pool(size=7)], - fixed_inputs: [post1_out], - layer_output: pool1_out - }, - { - layer_choice: [op.conv2d(size=1, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.conv2d(size=3, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.twice_conv2d(size=3, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.twice_conv2d(size=7, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.dilated_conv(in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.separable_conv(size=3, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.separable_conv(size=5, in_ch=self.channel_1_num, out_ch=self.channel_2_num), - op.separable_conv(size=7, in_ch=self.channel_1_num, out_ch=self.channel_2_num)], - fixed_inputs: [pool1_out], - optional_inputs: [post1_out], - optional_input_size: [0, 1], - layer_output: conv2_out - }, - { - layer_choice: [op.post_process(ch_size=self.channel_2_num)], - fixed_inputs: [conv2_out], - layer_output: post2_out - }, - { - layer_choice: [op.max_pool(size=3), - op.max_pool(size=5), - op.max_pool(size=7), - op.avg_pool(size=3), - op.avg_pool(size=5), - op.avg_pool(size=7)], - fixed_inputs: [post2_out], - optional_inputs: [post1_out, pool1_out], - optional_input_size: [0, 1], - layer_output: pool2_out - } - )""" - - # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image - # is down to 7x7x64 feature maps -- maps this to 1024 features. - last_dim_list = pool2_out.get_shape().as_list() - assert(last_dim_list[1] == last_dim_list[2]) - last_dim = last_dim_list[1] - with tf.name_scope('fc1'): - w_fc1 = op.weight_variable( - [last_dim * last_dim * self.channel_2_num, self.hidden_size]) - b_fc1 = op.bias_variable([self.hidden_size]) - - h_pool2_flat = tf.reshape( - pool2_out, [-1, last_dim * last_dim * self.channel_2_num]) - h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1) - - # Dropout - controls the complexity of the model, prevents co-adaptation of features. - with tf.name_scope('dropout'): - h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob) - - # Map the 1024 features to 10 classes, one for each digit - with tf.name_scope('fc2'): - w_fc2 = op.weight_variable([self.hidden_size, self.y_dim]) - b_fc2 = op.bias_variable([self.y_dim]) - y_conv = tf.matmul(h_fc1_drop, w_fc2) + b_fc2 - - with tf.name_scope('loss'): - cross_entropy = tf.reduce_mean( - tf.nn.softmax_cross_entropy_with_logits(labels=self.labels, logits=y_conv)) - with tf.name_scope('adam_optimizer'): - self.train_step = tf.train.AdamOptimizer( - self.learning_rate).minimize(cross_entropy) - - with tf.name_scope('accuracy'): - correct_prediction = tf.equal( - tf.argmax(y_conv, 1), tf.argmax(self.labels, 1)) - self.accuracy = tf.reduce_mean( - tf.cast(correct_prediction, tf.float32)) - - -def download_mnist_retry(data_dir, max_num_retries=20): - """Try to download mnist dataset and avoid errors""" - for _ in range(max_num_retries): - try: - return input_data.read_data_sets(data_dir, one_hot=True) - except tf.errors.AlreadyExistsError: - time.sleep(1) - raise Exception("Failed to download MNIST.") - -def main(params): - ''' - Main function, build mnist network, run and send result to NNI. - ''' - # Import data - mnist = download_mnist_retry(params['data_dir']) - print('Mnist download data done.') - logger.debug('Mnist download data done.') - - # Create the model - # Build the graph for the deep net - mnist_network = MnistNetwork(channel_1_num=params['channel_1_num'], - channel_2_num=params['channel_2_num'], - conv_size=params['conv_size'], - hidden_size=params['hidden_size'], - pool_size=params['pool_size'], - learning_rate=params['learning_rate']) - mnist_network.build_network() - logger.debug('Mnist build network done.') - - # Write log - graph_location = tempfile.mkdtemp() - logger.debug('Saving graph to: %s', graph_location) - train_writer = tf.summary.FileWriter(graph_location) - train_writer.add_graph(tf.get_default_graph()) - - test_acc = 0.0 - with tf.Session() as sess: - sess.run(tf.global_variables_initializer()) - for i in range(params['batch_num']): - batch = mnist.train.next_batch(params['batch_size']) - mnist_network.train_step.run(feed_dict={mnist_network.images: batch[0], - mnist_network.labels: batch[1], - mnist_network.keep_prob: 1 - params['dropout_rate']} - ) - - if i % 100 == 0: - test_acc = mnist_network.accuracy.eval( - feed_dict={mnist_network.images: mnist.test.images, - mnist_network.labels: mnist.test.labels, - mnist_network.keep_prob: 1.0}) - - """@nni.report_intermediate_result(test_acc)""" - logger.debug('test accuracy %g', test_acc) - logger.debug('Pipe send intermediate result done.') - - test_acc = mnist_network.accuracy.eval( - feed_dict={mnist_network.images: mnist.test.images, - mnist_network.labels: mnist.test.labels, - mnist_network.keep_prob: 1.0}) - - """@nni.report_final_result(test_acc)""" - logger.debug('Final result is %g', test_acc) - logger.debug('Send final result done.') - -def get_params(): - ''' Get parameters from command line ''' - parser = argparse.ArgumentParser() - parser.add_argument("--data_dir", type=str, default='/tmp/tensorflow/mnist/input_data', help="data directory") - parser.add_argument("--dropout_rate", type=float, default=0.5, help="dropout rate") - parser.add_argument("--channel_1_num", type=int, default=32) - parser.add_argument("--channel_2_num", type=int, default=64) - parser.add_argument("--conv_size", type=int, default=5) - parser.add_argument("--pool_size", type=int, default=2) - parser.add_argument("--hidden_size", type=int, default=1024) - parser.add_argument("--learning_rate", type=float, default=1e-4) - parser.add_argument("--batch_num", type=int, default=2000) - parser.add_argument("--batch_size", type=int, default=32) - - args, _ = parser.parse_known_args() - return args - -if __name__ == '__main__': - try: - params = vars(get_params()) - main(params) - except Exception as exception: - logger.exception(exception) - raise diff --git a/examples/trials/mnist-nas/oneshot_mode/operators.py b/examples/trials/mnist-nas/oneshot_mode/operators.py deleted file mode 100644 index ce7b8650ac..0000000000 --- a/examples/trials/mnist-nas/oneshot_mode/operators.py +++ /dev/null @@ -1,109 +0,0 @@ -import tensorflow as tf -import math - - -def weight_variable(shape): - """weight_variable generates a weight variable of a given shape.""" - initial = tf.truncated_normal(shape, stddev=0.1) - return tf.Variable(initial) - -def bias_variable(shape): - """bias_variable generates a bias variable of a given shape.""" - initial = tf.constant(0.1, shape=shape) - return tf.Variable(initial) - -def sum_op(inputs): - """sum_op""" - fixed_input = inputs[0][0] - optional_input = tf.concat(inputs[1], axis=3) - fixed_shape = fixed_input.get_shape().as_list() - optional_shape = optional_input.get_shape().as_list() - assert fixed_shape[1] == fixed_shape[2] - assert optional_shape[1] == optional_shape[2] - pool_size = math.ceil(optional_shape[1] / fixed_shape[1]) - pool_out = tf.nn.avg_pool(optional_input, ksize=[1, pool_size, pool_size, 1], strides=[1, pool_size, pool_size, 1], padding='SAME') - conv_matrix = weight_variable([1, 1, optional_shape[3], fixed_shape[3]]) - conv_out = tf.nn.conv2d(pool_out, conv_matrix, strides=[1, 1, 1, 1], padding='SAME') - return fixed_input + conv_out - - -def conv2d(inputs, size=-1, in_ch=-1, out_ch=-1): - """conv2d returns a 2d convolution layer with full stride.""" - if not inputs[1]: - x_input = inputs[0][0] - else: - x_input = sum_op(inputs) - if size in [1, 3]: - w_matrix = weight_variable([size, size, in_ch, out_ch]) - return tf.nn.conv2d(x_input, w_matrix, strides=[1, 1, 1, 1], padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - -def twice_conv2d(inputs, size=-1, in_ch=-1, out_ch=-1): - """twice_conv2d""" - if not inputs[1]: - x_input = inputs[0][0] - else: - x_input = sum_op(inputs) - if size in [3, 7]: - w_matrix1 = weight_variable([1, size, in_ch, int(out_ch/2)]) - out = tf.nn.conv2d(x_input, w_matrix1, strides=[1, 1, 1, 1], padding='SAME') - w_matrix2 = weight_variable([size, 1, int(out_ch/2), out_ch]) - return tf.nn.conv2d(out, w_matrix2, strides=[1, 1, 1, 1], padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - -def dilated_conv(inputs, size=3, in_ch=-1, out_ch=-1): - """dilated_conv""" - if not inputs[1]: - x_input = inputs[0][0] - else: - x_input = sum_op(inputs) - if size == 3: - w_matrix = weight_variable([size, size, in_ch, out_ch]) - return tf.nn.atrous_conv2d(x_input, w_matrix, rate=2, padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - -def separable_conv(inputs, size=-1, in_ch=-1, out_ch=-1): - """separable_conv""" - if not inputs[1]: - x_input = inputs[0][0] - else: - x_input = sum_op(inputs) - if size in [3, 5, 7]: - depth_matrix = weight_variable([size, size, in_ch, 1]) - point_matrix = weight_variable([1, 1, 1*in_ch, out_ch]) - return tf.nn.separable_conv2d(x_input, depth_matrix, point_matrix, strides=[1, 1, 1, 1], padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - - -def avg_pool(inputs, size=-1): - """avg_pool downsamples a feature map.""" - if not inputs[1]: - x_input = inputs[0][0] - else: - x_input = sum_op(inputs) - if size in [3, 5, 7]: - return tf.nn.avg_pool(x_input, ksize=[1, size, size, 1], strides=[1, 1, 1, 1], padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - -def max_pool(inputs, size=-1): - """max_pool downsamples a feature map.""" - if not inputs[1]: - x_input = inputs[0][0] - else: - x_input = sum_op(inputs) - if size in [3, 5, 7]: - return tf.nn.max_pool(x_input, ksize=[1, size, size, 1], strides=[1, 1, 1, 1], padding='SAME') - else: - raise Exception("Unknown filter size: %d." % size) - - -def post_process(inputs, ch_size=-1): - """post_process""" - x_input = inputs[0][0] - bias_matrix = bias_variable([ch_size]) - return tf.nn.relu(x_input + bias_matrix) diff --git a/examples/trials/nas_cifar10/README.md b/examples/trials/nas_cifar10/README.md deleted file mode 100644 index 3c002ab33b..0000000000 --- a/examples/trials/nas_cifar10/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# Run Neural Architecture Search in NNI - -Now we have an NAS example [NNI-NAS-Example](https://github.com/Crysple/NNI-NAS-Example) run in NNI using NAS interface from our contributors. - -We have included its trial code in this folder, and provided example config files to show how to use PPO tuner to tune the trial code. - -To prepare for the dataset, please run `cd data && . download.sh`. - -Thanks our lovely contributors, and welcome more and more people to join us! - diff --git a/examples/trials/nas_cifar10/README_zh_CN.md b/examples/trials/nas_cifar10/README_zh_CN.md deleted file mode 100644 index 518367b9c3..0000000000 --- a/examples/trials/nas_cifar10/README_zh_CN.md +++ /dev/null @@ -1,9 +0,0 @@ -# 在 NNI 中运行神经网络架构搜索 - -参考 [NNI-NAS-Example](https://github.com/Crysple/NNI-NAS-Example),来使用贡献者提供的 NAS 接口。 - -此目录中包含了 Trial 代码,并提供了示例的配置文件来展示如何使用 PPO Tuner 来调优此 Trial 代码。 - -运行下列代码来准备数据集 `cd data && . download.sh`. - -感谢可爱的志愿者,欢迎更多的人加入我们! \ No newline at end of file diff --git a/examples/trials/nas_cifar10/config_paiYarn_ppo.yml b/examples/trials/nas_cifar10/config_paiYarn_ppo.yml deleted file mode 100644 index eb1fb8abc3..0000000000 --- a/examples/trials/nas_cifar10/config_paiYarn_ppo.yml +++ /dev/null @@ -1,31 +0,0 @@ -authorName: Unknown -experimentName: enas_macro -trialConcurrency: 20 -maxExecDuration: 2400h -maxTrialNum: 20000 -#choice: local, remote -trainingServicePlatform: paiYarn -#choice: true, false -useAnnotation: true -multiPhase: false -versionCheck: false -nniManagerIp: 0.0.0.0 -tuner: - builtinTunerName: PPOTuner - classArgs: - optimize_mode: maximize - trials_per_update: 60 - epochs_per_update: 20 - minibatch_size: 6 -trial: - command: sh ./macro_cifar10_pai.sh - codeDir: ./ - gpuNum: 1 - cpuNum: 1 - memoryMB: 8196 - image: msranni/nni:latest - virtualCluster: nni -paiYarnConfig: - userName: your_account - passWord: your_passwd - host: 0.0.0.0 diff --git a/examples/trials/nas_cifar10/config_pai_ppo.yml b/examples/trials/nas_cifar10/config_pai_ppo.yml deleted file mode 100644 index f5082d87d0..0000000000 --- a/examples/trials/nas_cifar10/config_pai_ppo.yml +++ /dev/null @@ -1,34 +0,0 @@ -authorName: Unknown -experimentName: enas_macro -trialConcurrency: 20 -maxExecDuration: 2400h -maxTrialNum: 20000 -#choice: local, remote -trainingServicePlatform: pai -#choice: true, false -useAnnotation: true -multiPhase: false -versionCheck: false -nniManagerIp: 0.0.0.0 -tuner: - builtinTunerName: PPOTuner - classArgs: - optimize_mode: maximize - trials_per_update: 60 - epochs_per_update: 20 - minibatch_size: 6 -trial: - command: sh ./macro_cifar10_pai.sh - codeDir: ./ - gpuNum: 1 - cpuNum: 1 - memoryMB: 8196 - image: msranni/nni:latest - virtualCluster: nni - nniManagerNFSMountPath: /home/user/mnt - containerNFSMountPath: /mnt/data/user - paiStoragePlugin: team_wise -paiConfig: - userName: your_account - token: your_token - host: 0.0.0.0 diff --git a/examples/trials/nas_cifar10/config_ppo.yml b/examples/trials/nas_cifar10/config_ppo.yml deleted file mode 100644 index 8de1c5123f..0000000000 --- a/examples/trials/nas_cifar10/config_ppo.yml +++ /dev/null @@ -1,24 +0,0 @@ -authorName: Unknown -experimentName: enas_macro -trialConcurrency: 4 -maxExecDuration: 2400h -maxTrialNum: 20000 -#choice: local, remote -trainingServicePlatform: local -#choice: true, false -useAnnotation: true -multiPhase: false -tuner: - builtinTunerName: PPOTuner - classArgs: - optimize_mode: maximize - trials_per_update: 60 - epochs_per_update: 12 - minibatch_size: 10 - #could use the No. 0 gpu for this tuner - #if want to specify multiple gpus, here is an example of specifying three gpus: 0,1,2 - gpuIndices: 0 -trial: - command: sh ./macro_cifar10.sh - codeDir: ./ - gpuNum: 1 diff --git a/examples/trials/nas_cifar10/data/download.sh b/examples/trials/nas_cifar10/data/download.sh deleted file mode 100755 index 08c7256424..0000000000 --- a/examples/trials/nas_cifar10/data/download.sh +++ /dev/null @@ -1,2 +0,0 @@ -wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz -tar xzf cifar-10-python.tar.gz && mv cifar-10-batches-py cifar10 \ No newline at end of file diff --git a/examples/trials/nas_cifar10/macro_cifar10.sh b/examples/trials/nas_cifar10/macro_cifar10.sh deleted file mode 100644 index 863256d802..0000000000 --- a/examples/trials/nas_cifar10/macro_cifar10.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -set -e -export PYTHONPATH="$(pwd)" - -python3 src/cifar10/nni_child_cifar10.py \ - --data_format="NCHW" \ - --search_for="macro" \ - --reset_output_dir \ - --data_path="data/cifar10" \ - --output_dir="outputs" \ - --train_data_size=45000 \ - --batch_size=100 \ - --num_epochs=8 \ - --log_every=50 \ - --eval_every_epochs=1 \ - --child_use_aux_heads \ - --child_num_layers=12 \ - --child_out_filters=36 \ - --child_l2_reg=0.0002 \ - --child_num_branches=6 \ - --child_num_cell_layers=5 \ - --child_keep_prob=0.50 \ - --child_drop_path_keep_prob=0.60 \ - --child_lr_cosine \ - --child_lr_max=0.05 \ - --child_lr_min=0.001 \ - --child_lr_T_0=10 \ - --child_lr_T_mul=2 \ - --child_mode="subgraph" \ - "$@" - diff --git a/examples/trials/nas_cifar10/macro_cifar10_pai.sh b/examples/trials/nas_cifar10/macro_cifar10_pai.sh deleted file mode 100644 index 226955edc7..0000000000 --- a/examples/trials/nas_cifar10/macro_cifar10_pai.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -set -e -export PYTHONPATH="$(pwd)" - -python3 src/cifar10/nni_child_cifar10.py \ - --data_format="NCHW" \ - --search_for="macro" \ - --reset_output_dir \ - --data_path="data/cifar10" \ - --output_dir="outputs" \ - --train_data_size=45000 \ - --batch_size=100 \ - --num_epochs=30 \ - --log_every=50 \ - --eval_every_epochs=1 \ - --child_use_aux_heads \ - --child_num_layers=12 \ - --child_out_filters=36 \ - --child_l2_reg=0.0002 \ - --child_num_branches=6 \ - --child_num_cell_layers=5 \ - --child_keep_prob=0.50 \ - --child_drop_path_keep_prob=0.60 \ - --child_lr_cosine \ - --child_lr_max=0.05 \ - --child_lr_min=0.001 \ - --child_lr_T_0=10 \ - --child_lr_T_mul=2 \ - --child_mode="subgraph" \ - "$@" - diff --git a/examples/trials/nas_cifar10/src/__init__.py b/examples/trials/nas_cifar10/src/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/examples/trials/nas_cifar10/src/cifar10/__init__.py b/examples/trials/nas_cifar10/src/cifar10/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/examples/trials/nas_cifar10/src/cifar10/data_utils.py b/examples/trials/nas_cifar10/src/cifar10/data_utils.py deleted file mode 100644 index b8a8c36339..0000000000 --- a/examples/trials/nas_cifar10/src/cifar10/data_utils.py +++ /dev/null @@ -1,74 +0,0 @@ -import os -import sys -import pickle -import numpy as np -import tensorflow as tf - - -def _read_data(data_path, train_files): - """Reads CIFAR-10 format data. Always returns NHWC format. - - Returns: - images: np tensor of size [N, H, W, C] - labels: np tensor of size [N] - """ - images, labels = [], [] - for file_name in train_files: - print(file_name) - full_name = os.path.join(data_path, file_name) - with open(full_name, "rb") as finp: - data = pickle.load(finp, encoding='latin1') - batch_images = data["data"].astype(np.float32) / 255.0 - batch_labels = np.array(data["labels"], dtype=np.int32) - images.append(batch_images) - labels.append(batch_labels) - images = np.concatenate(images, axis=0) - labels = np.concatenate(labels, axis=0) - images = np.reshape(images, [-1, 3, 32, 32]) - images = np.transpose(images, [0, 2, 3, 1]) - - return images, labels - - -def read_data(data_path, num_valids=5000): - print("-" * 80) - print("Reading data") - - images, labels = {}, {} - - train_files = [ - "data_batch_1", - "data_batch_2", - "data_batch_3", - "data_batch_4", - "data_batch_5", - ] - test_file = [ - "test_batch", - ] - images["train"], labels["train"] = _read_data(data_path, train_files) - - if num_valids: - images["valid"] = images["train"][-num_valids:] - labels["valid"] = labels["train"][-num_valids:] - - images["train"] = images["train"][:-num_valids] - labels["train"] = labels["train"][:-num_valids] - else: - images["valid"], labels["valid"] = None, None - - images["test"], labels["test"] = _read_data(data_path, test_file) - - print("Prepropcess: [subtract mean], [divide std]") - mean = np.mean(images["train"], axis=(0, 1, 2), keepdims=True) - std = np.std(images["train"], axis=(0, 1, 2), keepdims=True) - - print("mean: {}".format(np.reshape(mean * 255.0, [-1]))) - print("std: {}".format(np.reshape(std * 255.0, [-1]))) - - images["train"] = (images["train"] - mean) / std - if num_valids: - images["valid"] = (images["valid"] - mean) / std - images["test"] = (images["test"] - mean) / std - - return images, labels diff --git a/examples/trials/nas_cifar10/src/cifar10/general_child.py b/examples/trials/nas_cifar10/src/cifar10/general_child.py deleted file mode 100644 index 4e80dc340e..0000000000 --- a/examples/trials/nas_cifar10/src/cifar10/general_child.py +++ /dev/null @@ -1,423 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import numpy as np -import tensorflow as tf -from src.common_ops import create_weight, batch_norm, batch_norm_with_mask, global_avg_pool, conv_op, pool_op -from src.utils import count_model_params, get_train_ops, get_C, get_strides -from src.cifar10.models import Model - - -class GeneralChild(Model): - def __init__(self, - images, - labels, - cutout_size=None, - fixed_arc=None, - out_filters_scale=1, - num_layers=2, - num_branches=6, - out_filters=24, - keep_prob=1.0, - batch_size=32, - clip_mode=None, - grad_bound=None, - l2_reg=1e-4, - lr_init=0.1, - lr_dec_start=0, - lr_dec_every=10000, - lr_dec_rate=0.1, - lr_cosine=False, - lr_max=None, - lr_min=None, - lr_T_0=None, - lr_T_mul=None, - optim_algo=None, - sync_replicas=False, - num_aggregate=None, - num_replicas=None, - data_format="NHWC", - name="child", - mode="subgraph", - *args, - **kwargs - ): - - super(self.__class__, self).__init__( - images, - labels, - cutout_size=cutout_size, - batch_size=batch_size, - clip_mode=clip_mode, - grad_bound=grad_bound, - l2_reg=l2_reg, - lr_init=lr_init, - lr_dec_start=lr_dec_start, - lr_dec_every=lr_dec_every, - lr_dec_rate=lr_dec_rate, - keep_prob=keep_prob, - optim_algo=optim_algo, - sync_replicas=sync_replicas, - num_aggregate=num_aggregate, - num_replicas=num_replicas, - data_format=data_format, - name=name) - - self.lr_cosine = lr_cosine - self.lr_max = lr_max - self.lr_min = lr_min - self.lr_T_0 = lr_T_0 - self.lr_T_mul = lr_T_mul - self.out_filters = out_filters * out_filters_scale - self.num_layers = num_layers - self.mode = mode - - self.num_branches = num_branches - self.fixed_arc = fixed_arc - self.out_filters_scale = out_filters_scale - - pool_distance = self.num_layers // 3 - self.pool_layers = [pool_distance - 1, 2 * pool_distance - 1] - - - - def _factorized_reduction(self, x, out_filters, stride, is_training): - """Reduces the shape of x without information loss due to striding.""" - assert out_filters % 2 == 0, ( - "Need even number of filters when using this factorized reduction.") - if stride == 1: - with tf.variable_scope("path_conv"): - inp_c = get_C(x, self.data_format) - w = create_weight("w", [1, 1, inp_c, out_filters]) - x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME", - data_format=self.data_format) - x = batch_norm(x, is_training, data_format=self.data_format) - return x - - stride_spec = get_strides(stride, self.data_format) - # Skip path 1 - path1 = tf.nn.avg_pool( - x, [1, 1, 1, 1], stride_spec, "VALID", data_format=self.data_format) - with tf.variable_scope("path1_conv"): - inp_c = get_C(path1, self.data_format) - w = create_weight("w", [1, 1, inp_c, out_filters // 2]) - path1 = tf.nn.conv2d(path1, w, [1, 1, 1, 1], "SAME", - data_format=self.data_format) - - # Skip path 2 - # First pad with 0"s on the right and bottom, then shift the filter to - # include those 0"s that were added. - if self.data_format == "NHWC": - pad_arr = [[0, 0], [0, 1], [0, 1], [0, 0]] - path2 = tf.pad(x, pad_arr)[:, 1:, 1:, :] - concat_axis = 3 - else: - pad_arr = [[0, 0], [0, 0], [0, 1], [0, 1]] - path2 = tf.pad(x, pad_arr)[:, :, 1:, 1:] - concat_axis = 1 - - path2 = tf.nn.avg_pool( - path2, [1, 1, 1, 1], stride_spec, "VALID", data_format=self.data_format) - with tf.variable_scope("path2_conv"): - inp_c = get_C(path2, self.data_format) - w = create_weight("w", [1, 1, inp_c, out_filters // 2]) - path2 = tf.nn.conv2d(path2, w, [1, 1, 1, 1], "SAME", - data_format=self.data_format) - - # Concat and apply BN - final_path = tf.concat(values=[path1, path2], axis=concat_axis) - final_path = batch_norm(final_path, is_training, - data_format=self.data_format) - - return final_path - - def _model(self, images, is_training, reuse=False): - '''Build model''' - with tf.variable_scope(self.name, reuse=reuse): - layers = [] - - out_filters = self.out_filters - with tf.variable_scope("stem_conv"): - w = create_weight("w", [3, 3, 3, out_filters]) - x = tf.nn.conv2d( - images, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) - x = batch_norm(x, is_training, data_format=self.data_format) - layers.append(x) - - def add_fixed_pooling_layer(layer_id, layers, out_filters, is_training): - '''Add a fixed pooling layer every four layers''' - out_filters *= 2 - with tf.variable_scope("pool_at_{0}".format(layer_id)): - pooled_layers = [] - for i, layer in enumerate(layers): - with tf.variable_scope("from_{0}".format(i)): - x = self._factorized_reduction( - layer, out_filters, 2, is_training) - pooled_layers.append(x) - return pooled_layers, out_filters - - def post_process_out(out, optional_inputs): - '''Form skip connection and perform batch norm''' - with tf.variable_scope("skip"): - inputs = layers[-1] - if self.data_format == "NHWC": - inp_h = inputs.get_shape()[1].value - inp_w = inputs.get_shape()[2].value - inp_c = inputs.get_shape()[3].value - out.set_shape([None, inp_h, inp_w, out_filters]) - elif self.data_format == "NCHW": - inp_c = inputs.get_shape()[1].value - inp_h = inputs.get_shape()[2].value - inp_w = inputs.get_shape()[3].value - out.set_shape([None, out_filters, inp_h, inp_w]) - optional_inputs.append(out) - pout = tf.add_n(optional_inputs) - out = batch_norm(pout, is_training, - data_format=self.data_format) - layers.append(out) - return out - - global layer_id - layer_id = -1 - - def get_layer_id(): - global layer_id - layer_id += 1 - return 'layer_' + str(layer_id) - - def conv3(inputs): - # res_layers is pre_layers that are chosen to form skip connection - # layers[-1] is always the latest input - with tf.variable_scope(get_layer_id()): - with tf.variable_scope('branch_0'): - out = conv_op( - inputs[0][0], 3, is_training, out_filters, out_filters, self.data_format, start_idx=None) - out = post_process_out(out, inputs[1]) - return out - - def conv3_sep(inputs): - with tf.variable_scope(get_layer_id()): - with tf.variable_scope('branch_1'): - out = conv_op( - inputs[0][0], 3, is_training, out_filters, out_filters, self.data_format, start_idx=None, separable=True) - out = post_process_out(out, inputs[1]) - return out - - def conv5(inputs): - with tf.variable_scope(get_layer_id()): - with tf.variable_scope('branch_2'): - out = conv_op( - inputs[0][0], 5, is_training, out_filters, out_filters, self.data_format, start_idx=None) - out = post_process_out(out, inputs[1]) - return out - - def conv5_sep(inputs): - with tf.variable_scope(get_layer_id()): - with tf.variable_scope('branch_3'): - out = conv_op( - inputs[0][0], 5, is_training, out_filters, out_filters, self.data_format, start_idx=None, separable=True) - out = post_process_out(out, inputs[1]) - return out - - def avg_pool(inputs): - with tf.variable_scope(get_layer_id()): - with tf.variable_scope('branch_4'): - out = pool_op( - inputs[0][0], is_training, out_filters, out_filters, "avg", self.data_format, start_idx=None) - out = post_process_out(out, inputs[1]) - return out - - def max_pool(inputs): - with tf.variable_scope(get_layer_id()): - with tf.variable_scope('branch_5'): - out = pool_op( - inputs[0][0], is_training, out_filters, out_filters, "max", self.data_format, start_idx=None) - out = post_process_out(out, inputs[1]) - return out - - """@nni.mutable_layers( - { - layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()], - fixed_inputs:[x], - layer_output: layer_0_out - }, - { - layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()], - fixed_inputs:[layer_0_out], - optional_inputs: [layer_0_out], - optional_input_size: [0, 1], - layer_output: layer_1_out - }, - { - layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()], - fixed_inputs:[layer_1_out], - optional_inputs: [layer_0_out, layer_1_out], - optional_input_size: [0, 1], - layer_output: layer_2_out - }, - { - layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()], - fixed_inputs:[layer_2_out], - optional_inputs: [layer_0_out, layer_1_out, layer_2_out], - optional_input_size: [0, 1], - layer_output: layer_3_out - } - )""" - layers, out_filters = add_fixed_pooling_layer( - 3, layers, out_filters, is_training) - layer_0_out, layer_1_out, layer_2_out, layer_3_out = layers[-4:] - """@nni.mutable_layers( - { - layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()], - fixed_inputs: [layer_3_out], - optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out], - optional_input_size: [0, 1], - layer_output: layer_4_out - }, - { - layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()], - fixed_inputs: [layer_4_out], - optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out], - optional_input_size: [0, 1], - layer_output: layer_5_out - }, - { - layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()], - fixed_inputs: [layer_5_out], - optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out], - optional_input_size: [0, 1], - layer_output: layer_6_out - }, - { - layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()], - fixed_inputs: [layer_6_out], - optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out], - optional_input_size: [0, 1], - layer_output: layer_7_out - } - )""" - layers, out_filters = add_fixed_pooling_layer( - 7, layers, out_filters, is_training) - layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out = layers[ - -8:] - """@nni.mutable_layers( - { - layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()], - fixed_inputs: [layer_7_out], - optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out], - optional_input_size: [0, 1], - layer_output: layer_8_out - }, - { - layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()], - fixed_inputs: [layer_8_out], - optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out], - optional_input_size: [0, 1], - layer_output: layer_9_out - }, - { - layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()], - fixed_inputs: [layer_9_out], - optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out, layer_9_out], - optional_input_size: [0, 1], - layer_output: layer_10_out - }, - { - layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()], - fixed_inputs:[layer_10_out], - optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out, layer_9_out, layer_10_out], - optional_input_size: [0, 1], - layer_output: layer_11_out - } - )""" - - x = global_avg_pool(layer_11_out, data_format=self.data_format) - if is_training: - x = tf.nn.dropout(x, self.keep_prob) - with tf.variable_scope("fc"): - if self.data_format == "NHWC": - inp_c = x.get_shape()[3].value - elif self.data_format == "NCHW": - inp_c = x.get_shape()[1].value - else: - raise ValueError( - "Unknown data_format {0}".format(self.data_format)) - w = create_weight("w", [inp_c, 10]) - x = tf.matmul(x, w) - return x - - - # override - def _build_train(self): - print("-" * 80) - print("Build train graph") - logits = self._model(self.x_train, is_training=True) - log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits( - logits=logits, labels=self.y_train) - self.loss = tf.reduce_mean(log_probs) - - self.train_preds = tf.argmax(logits, axis=1) - self.train_preds = tf.to_int32(self.train_preds) - self.train_acc = tf.equal(self.train_preds, self.y_train) - self.train_acc = tf.to_int32(self.train_acc) - self.train_acc = tf.reduce_sum(self.train_acc) - - tf_variables = [var - for var in tf.trainable_variables() if var.name.startswith(self.name)] - self.num_vars = count_model_params(tf_variables) - print("Model has {} params".format(self.num_vars)) - - self.global_step = tf.Variable( - 0, dtype=tf.int32, trainable=False, name="global_step") - - self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( - self.loss, - tf_variables, - self.global_step, - clip_mode=self.clip_mode, - grad_bound=self.grad_bound, - l2_reg=self.l2_reg, - lr_init=self.lr_init, - lr_dec_start=self.lr_dec_start, - lr_dec_every=self.lr_dec_every, - lr_dec_rate=self.lr_dec_rate, - lr_cosine=self.lr_cosine, - lr_max=self.lr_max, - lr_min=self.lr_min, - lr_T_0=self.lr_T_0, - lr_T_mul=self.lr_T_mul, - num_train_batches=self.num_train_batches, - optim_algo=self.optim_algo, - sync_replicas=False, - num_aggregate=self.num_aggregate, - num_replicas=self.num_replicas) - - # override - def _build_valid(self): - if self.x_valid is not None: - print("-" * 80) - print("Build valid graph") - logits = self._model(self.x_valid, False, reuse=True) - self.valid_preds = tf.argmax(logits, axis=1) - self.valid_preds = tf.to_int32(self.valid_preds) - self.valid_acc = tf.equal(self.valid_preds, self.y_valid) - self.valid_acc = tf.to_int32(self.valid_acc) - self.valid_acc = tf.reduce_sum(self.valid_acc) - - # override - def _build_test(self): - print("-" * 80) - print("Build test graph") - logits = self._model(self.x_test, False, reuse=True) - self.test_preds = tf.argmax(logits, axis=1) - self.test_preds = tf.to_int32(self.test_preds) - self.test_acc = tf.equal(self.test_preds, self.y_test) - self.test_acc = tf.to_int32(self.test_acc) - self.test_acc = tf.reduce_sum(self.test_acc) - - - def build_model(self): - - self._build_train() - self._build_valid() - self._build_test() diff --git a/examples/trials/nas_cifar10/src/cifar10/models.py b/examples/trials/nas_cifar10/src/cifar10/models.py deleted file mode 100644 index 089fe846a6..0000000000 --- a/examples/trials/nas_cifar10/src/cifar10/models.py +++ /dev/null @@ -1,196 +0,0 @@ -import os -import sys - -import numpy as np -import tensorflow as tf - - -class Model(object): - def __init__(self, - images, - labels, - cutout_size=None, - batch_size=32, - eval_batch_size=100, - clip_mode=None, - grad_bound=None, - l2_reg=1e-4, - lr_init=0.1, - lr_dec_start=0, - lr_dec_every=100, - lr_dec_rate=0.1, - keep_prob=1.0, - optim_algo=None, - sync_replicas=False, - num_aggregate=None, - num_replicas=None, - data_format="NHWC", - name="generic_model", - seed=None, - ): - """ - Args: - lr_dec_every: number of epochs to decay - """ - print("-" * 80) - print("Build model {}".format(name)) - - self.cutout_size = cutout_size - self.batch_size = batch_size - self.eval_batch_size = eval_batch_size - self.clip_mode = clip_mode - self.grad_bound = grad_bound - self.l2_reg = l2_reg - self.lr_init = lr_init - self.lr_dec_start = lr_dec_start - self.lr_dec_rate = lr_dec_rate - self.keep_prob = keep_prob - self.optim_algo = optim_algo - self.sync_replicas = sync_replicas - self.num_aggregate = num_aggregate - self.num_replicas = num_replicas - self.data_format = data_format - self.name = name - self.seed = seed - - self.global_step = None - self.valid_acc = None - self.test_acc = None - print("Build data ops") - with tf.device("/cpu:0"): - # training data - self.num_train_examples = np.shape(images["train"])[0] - - self.num_train_batches = ( - self.num_train_examples + self.batch_size - 1) // self.batch_size - x_train, y_train = tf.train.shuffle_batch( - [images["train"], labels["train"]], - batch_size=self.batch_size, - capacity=50000, - enqueue_many=True, - min_after_dequeue=0, - num_threads=16, - seed=self.seed, - allow_smaller_final_batch=True, - ) - self.lr_dec_every = lr_dec_every * self.num_train_batches - - def _pre_process(x): - x = tf.pad(x, [[4, 4], [4, 4], [0, 0]]) - x = tf.random_crop(x, [32, 32, 3], seed=self.seed) - x = tf.image.random_flip_left_right(x, seed=self.seed) - if self.cutout_size is not None: - mask = tf.ones( - [self.cutout_size, self.cutout_size], dtype=tf.int32) - start = tf.random_uniform( - [2], minval=0, maxval=32, dtype=tf.int32) - mask = tf.pad(mask, [[self.cutout_size + start[0], 32 - start[0]], - [self.cutout_size + start[1], 32 - start[1]]]) - mask = mask[self.cutout_size: self.cutout_size + 32, - self.cutout_size: self.cutout_size + 32] - mask = tf.reshape(mask, [32, 32, 1]) - mask = tf.tile(mask, [1, 1, 3]) - x = tf.where(tf.equal(mask, 0), x=x, y=tf.zeros_like(x)) - if self.data_format == "NCHW": - x = tf.transpose(x, [2, 0, 1]) - - return x - self.x_train = tf.map_fn(_pre_process, x_train, back_prop=False) - self.y_train = y_train - - # valid data - self.x_valid, self.y_valid = None, None - if images["valid"] is not None: - images["valid_original"] = np.copy(images["valid"]) - labels["valid_original"] = np.copy(labels["valid"]) - if self.data_format == "NCHW": - images["valid"] = tf.transpose( - images["valid"], [0, 3, 1, 2]) - self.num_valid_examples = np.shape(images["valid"])[0] - self.num_valid_batches = ( - (self.num_valid_examples + self.eval_batch_size - 1) - // self.eval_batch_size) - self.x_valid, self.y_valid = tf.train.batch( - [images["valid"], labels["valid"]], - batch_size=self.eval_batch_size, - capacity=5000, - enqueue_many=True, - num_threads=1, - allow_smaller_final_batch=True, - ) - - # test data - if self.data_format == "NCHW": - images["test"] = tf.transpose(images["test"], [0, 3, 1, 2]) - self.num_test_examples = np.shape(images["test"])[0] - self.num_test_batches = ( - (self.num_test_examples + self.eval_batch_size - 1) - // self.eval_batch_size) - self.x_test, self.y_test = tf.train.batch( - [images["test"], labels["test"]], - batch_size=self.eval_batch_size, - capacity=10000, - enqueue_many=True, - num_threads=1, - allow_smaller_final_batch=True, - ) - - # cache images and labels - self.images = images - self.labels = labels - - def eval_once(self, sess, eval_set, child_model, verbose=False): - """Expects self.acc and self.global_step to be defined. - - Args: - sess: tf.Session() or one of its wrap arounds. - feed_dict: can be used to give more information to sess.run(). - eval_set: "valid" or "test" - """ - - assert self.global_step is not None - global_step = sess.run(self.global_step) - print("Eval at {}".format(global_step)) - - if eval_set == "valid": - assert self.x_valid is not None - assert self.valid_acc is not None - num_examples = self.num_valid_examples - num_batches = self.num_valid_batches - acc_op = self.valid_acc - elif eval_set == "test": - assert self.test_acc is not None - num_examples = self.num_test_examples - num_batches = self.num_test_batches - acc_op = self.test_acc - else: - raise NotImplementedError("Unknown eval_set '{}'".format(eval_set)) - - total_acc = 0 - total_exp = 0 - - for batch_id in range(num_batches): - acc = sess.run(acc_op) - - total_acc += acc - total_exp += self.eval_batch_size - if verbose: - sys.stdout.write( - "\r{:<5d}/{:>5d}".format(total_acc, total_exp)) - if verbose: - print("") - print("{}_accuracy: {:<6.4f}".format( - eval_set, float(total_acc) / total_exp)) - return float(total_acc) / total_exp - - def _model(self, images, is_training, reuse=None): - raise NotImplementedError("Abstract method") - - def _build_train(self): - raise NotImplementedError("Abstract method") - - def _build_valid(self): - raise NotImplementedError("Abstract method") - - def _build_test(self): - raise NotImplementedError("Abstract method") diff --git a/examples/trials/nas_cifar10/src/cifar10/nni_child_cifar10.py b/examples/trials/nas_cifar10/src/cifar10/nni_child_cifar10.py deleted file mode 100644 index 5481ba7b07..0000000000 --- a/examples/trials/nas_cifar10/src/cifar10/nni_child_cifar10.py +++ /dev/null @@ -1,162 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -import shutil -import logging -import tensorflow as tf -from src.cifar10.data_utils import read_data -from src.cifar10.general_child import GeneralChild -import src.cifar10_flags -from src.cifar10_flags import FLAGS - - -def build_logger(log_name): - logger = logging.getLogger(log_name) - logger.setLevel(logging.DEBUG) - fh = logging.FileHandler(log_name+'.log') - fh.setLevel(logging.DEBUG) - logger.addHandler(fh) - return logger - - -logger = build_logger("nni_child_cifar10") - - -def build_trial(images, labels, ChildClass): - '''Build child class''' - child_model = ChildClass( - images, - labels, - use_aux_heads=FLAGS.child_use_aux_heads, - cutout_size=FLAGS.child_cutout_size, - num_layers=FLAGS.child_num_layers, - num_cells=FLAGS.child_num_cells, - num_branches=FLAGS.child_num_branches, - fixed_arc=FLAGS.child_fixed_arc, - out_filters_scale=FLAGS.child_out_filters_scale, - out_filters=FLAGS.child_out_filters, - keep_prob=FLAGS.child_keep_prob, - drop_path_keep_prob=FLAGS.child_drop_path_keep_prob, - num_epochs=FLAGS.num_epochs, - l2_reg=FLAGS.child_l2_reg, - data_format=FLAGS.data_format, - batch_size=FLAGS.batch_size, - clip_mode="norm", - grad_bound=FLAGS.child_grad_bound, - lr_init=FLAGS.child_lr, - lr_dec_every=FLAGS.child_lr_dec_every, - lr_dec_rate=FLAGS.child_lr_dec_rate, - lr_cosine=FLAGS.child_lr_cosine, - lr_max=FLAGS.child_lr_max, - lr_min=FLAGS.child_lr_min, - lr_T_0=FLAGS.child_lr_T_0, - lr_T_mul=FLAGS.child_lr_T_mul, - optim_algo="momentum", - sync_replicas=FLAGS.child_sync_replicas, - num_aggregate=FLAGS.child_num_aggregate, - num_replicas=FLAGS.child_num_replicas - ) - - return child_model - - -def get_child_ops(child_model): - '''Assemble child op to a dict''' - child_ops = { - "global_step": child_model.global_step, - "loss": child_model.loss, - "train_op": child_model.train_op, - "lr": child_model.lr, - "grad_norm": child_model.grad_norm, - "train_acc": child_model.train_acc, - "optimizer": child_model.optimizer, - "num_train_batches": child_model.num_train_batches, - "eval_every": child_model.num_train_batches * FLAGS.eval_every_epochs, - "eval_func": child_model.eval_once, - } - return child_ops - - -class NASTrial(): - - def __init__(self): - images, labels = read_data(FLAGS.data_path, num_valids=0) - - self.output_dir = os.path.join(os.getenv('NNI_OUTPUT_DIR'), '../..') - self.file_path = os.path.join( - self.output_dir, 'trainable_variable.txt') - - self.graph = tf.Graph() - with self.graph.as_default(): - self.child_model = build_trial(images, labels, GeneralChild) - - self.total_data = {} - - self.child_model.build_model() - self.child_ops = get_child_ops(self.child_model) - config = tf.ConfigProto( - intra_op_parallelism_threads=0, - inter_op_parallelism_threads=0, - allow_soft_placement=True) - - self.sess = tf.train.SingularMonitoredSession(config=config) - - logger.debug('initlize NASTrial done.') - - def run_one_step(self): - '''Run this model on a batch of data''' - run_ops = [ - self.child_ops["loss"], - self.child_ops["lr"], - self.child_ops["grad_norm"], - self.child_ops["train_acc"], - self.child_ops["train_op"], - ] - loss, lr, gn, tr_acc, _ = self.sess.run(run_ops) - global_step = self.sess.run(self.child_ops["global_step"]) - log_string = "" - log_string += "ch_step={:<6d}".format(global_step) - log_string += " loss={:<8.6f}".format(loss) - log_string += " lr={:<8.4f}".format(lr) - log_string += " |g|={:<8.4f}".format(gn) - log_string += " tr_acc={:<3d}/{:>3d}".format(tr_acc, FLAGS.batch_size) - if int(global_step) % FLAGS.log_every == 0: - logger.debug(log_string) - return loss, global_step - - def run(self): - '''Run this model according to the `epoch` set in FALGS''' - max_acc = 0 - while True: - _, global_step = self.run_one_step() - if global_step % self.child_ops['num_train_batches'] == 0: - acc = self.child_ops["eval_func"]( - self.sess, "test", self.child_model) - max_acc = max(max_acc, acc) - '''@nni.report_intermediate_result(acc)''' - if global_step / self.child_ops['num_train_batches'] >= FLAGS.num_epochs: - '''@nni.report_final_result(max_acc)''' - break - - -def main(_): - logger.debug("-" * 80) - - if not os.path.isdir(FLAGS.output_dir): - logger.debug( - "Path {} does not exist. Creating.".format(FLAGS.output_dir)) - os.makedirs(FLAGS.output_dir) - elif FLAGS.reset_output_dir: - logger.debug( - "Path {} exists. Remove and remake.".format(FLAGS.output_dir)) - shutil.rmtree(FLAGS.output_dir) - os.makedirs(FLAGS.output_dir) - logger.debug("-" * 80) - trial = NASTrial() - - trial.run() - - -if __name__ == "__main__": - tf.app.run() diff --git a/examples/trials/nas_cifar10/src/cifar10_flags.py b/examples/trials/nas_cifar10/src/cifar10_flags.py deleted file mode 100644 index 2374f76b90..0000000000 --- a/examples/trials/nas_cifar10/src/cifar10_flags.py +++ /dev/null @@ -1,45 +0,0 @@ -import tensorflow as tf -from src.utils import DEFINE_boolean -from src.utils import DEFINE_float -from src.utils import DEFINE_integer -from src.utils import DEFINE_string -flags = tf.app.flags -FLAGS = flags.FLAGS - -DEFINE_boolean("reset_output_dir", False, "Delete output_dir if exists.") -DEFINE_string("data_path", "", "") -DEFINE_string("output_dir", "", "") -DEFINE_string("data_format", "NHWC", "'NHWC' or 'NCWH'") -DEFINE_string("search_for", None, "Must be [macro|micro]") -DEFINE_integer("train_data_size", 45000, "") -DEFINE_integer("batch_size", 32, "") - -DEFINE_integer("num_epochs", 300, "") -DEFINE_integer("child_lr_dec_every", 100, "") -DEFINE_integer("child_num_layers", 5, "") -DEFINE_integer("child_num_cells", 5, "") -DEFINE_integer("child_filter_size", 5, "") -DEFINE_integer("child_out_filters", 48, "") -DEFINE_integer("child_out_filters_scale", 1, "") -DEFINE_integer("child_num_branches", 4, "") -DEFINE_integer("child_num_aggregate", None, "") -DEFINE_integer("child_num_replicas", 1, "") -DEFINE_integer("child_block_size", 3, "") -DEFINE_integer("child_lr_T_0", None, "for lr schedule") -DEFINE_integer("child_lr_T_mul", None, "for lr schedule") -DEFINE_integer("child_cutout_size", None, "CutOut size") -DEFINE_float("child_grad_bound", 5.0, "Gradient clipping") -DEFINE_float("child_lr", 0.1, "") -DEFINE_float("child_lr_dec_rate", 0.1, "") -DEFINE_float("child_keep_prob", 0.5, "") -DEFINE_float("child_drop_path_keep_prob", 1.0, "minimum drop_path_keep_prob") -DEFINE_float("child_l2_reg", 1e-4, "") -DEFINE_float("child_lr_max", None, "for lr schedule") -DEFINE_float("child_lr_min", None, "for lr schedule") -DEFINE_string("child_skip_pattern", None, "Must be ['dense', None]") -DEFINE_string("child_fixed_arc", None, "") -DEFINE_boolean("child_use_aux_heads", False, "Should we use an aux head") -DEFINE_boolean("child_sync_replicas", False, "To sync or not to sync.") -DEFINE_boolean("child_lr_cosine", False, "Use cosine lr schedule") -DEFINE_integer("log_every", 50, "How many steps to log") -DEFINE_integer("eval_every_epochs", 1, "How many epochs to eval") diff --git a/examples/trials/nas_cifar10/src/common_ops.py b/examples/trials/nas_cifar10/src/common_ops.py deleted file mode 100644 index e0933f6e53..0000000000 --- a/examples/trials/nas_cifar10/src/common_ops.py +++ /dev/null @@ -1,255 +0,0 @@ -import numpy as np -import tensorflow as tf -from tensorflow.python.training import moving_averages - - -def lstm(x, prev_c, prev_h, w): - ifog = tf.matmul(tf.concat([x, prev_h], axis=1), w) - i, f, o, g = tf.split(ifog, 4, axis=1) - i = tf.sigmoid(i) - f = tf.sigmoid(f) - o = tf.sigmoid(o) - g = tf.tanh(g) - next_c = i * g + f * prev_c - next_h = o * tf.tanh(next_c) - return next_c, next_h - - -def stack_lstm(x, prev_c, prev_h, w): - next_c, next_h = [], [] - for layer_id, (_c, _h, _w) in enumerate(zip(prev_c, prev_h, w)): - inputs = x if layer_id == 0 else next_h[-1] - curr_c, curr_h = lstm(inputs, _c, _h, _w) - next_c.append(curr_c) - next_h.append(curr_h) - return next_c, next_h - - -def create_weight(name, shape, initializer=None, trainable=True, seed=None): - if initializer is None: - initializer = tf.contrib.keras.initializers.he_normal(seed=seed) - return tf.get_variable(name, shape, initializer=initializer, trainable=trainable) - - -def create_bias(name, shape, initializer=None): - if initializer is None: - initializer = tf.constant_initializer(0.0, dtype=tf.float32) - return tf.get_variable(name, shape, initializer=initializer) - - -def conv_op(inputs, filter_size, is_training, count, out_filters, - data_format, ch_mul=1, start_idx=None, separable=False): - """ - Args: - start_idx: where to start taking the output channels. if None, assuming - fixed_arc mode - count: how many output_channels to take. - """ - - if data_format == "NHWC": - inp_c = inputs.get_shape()[3].value - elif data_format == "NCHW": - inp_c = inputs.get_shape()[1].value - - with tf.variable_scope("inp_conv_1"): - w = create_weight("w", [1, 1, inp_c, out_filters]) - x = tf.nn.conv2d(inputs, w, [1, 1, 1, 1], - "SAME", data_format=data_format) - x = batch_norm(x, is_training, data_format=data_format) - x = tf.nn.relu(x) - - with tf.variable_scope("out_conv_{}".format(filter_size)): - if start_idx is None: - if separable: - w_depth = create_weight( - "w_depth", [filter_size, filter_size, out_filters, ch_mul]) - w_point = create_weight( - "w_point", [1, 1, out_filters * ch_mul, count]) - x = tf.nn.separable_conv2d(x, w_depth, w_point, strides=[1, 1, 1, 1], - padding="SAME", data_format=data_format) - x = batch_norm( - x, is_training, data_format=data_format) - else: - w = create_weight( - "w", [filter_size, filter_size, inp_c, count]) - x = tf.nn.conv2d( - x, w, [1, 1, 1, 1], "SAME", data_format=data_format) - x = batch_norm( - x, is_training, data_format=data_format) - else: - if separable: - w_depth = create_weight( - "w_depth", [filter_size, filter_size, out_filters, ch_mul]) - #test_depth = w_depth - w_point = create_weight( - "w_point", [out_filters, out_filters * ch_mul]) - w_point = w_point[start_idx:start_idx+count, :] - w_point = tf.transpose(w_point, [1, 0]) - w_point = tf.reshape( - w_point, [1, 1, out_filters * ch_mul, count]) - - x = tf.nn.separable_conv2d(x, w_depth, w_point, strides=[1, 1, 1, 1], - padding="SAME", data_format=data_format) - mask = tf.range(0, out_filters, dtype=tf.int32) - mask = tf.logical_and( - start_idx <= mask, mask < start_idx + count) - x = batch_norm_with_mask( - x, is_training, mask, out_filters, data_format=data_format) - else: - w = create_weight( - "w", [filter_size, filter_size, out_filters, out_filters]) - w = tf.transpose(w, [3, 0, 1, 2]) - w = w[start_idx:start_idx+count, :, :, :] - w = tf.transpose(w, [1, 2, 3, 0]) - x = tf.nn.conv2d( - x, w, [1, 1, 1, 1], "SAME", data_format=data_format) - mask = tf.range(0, out_filters, dtype=tf.int32) - mask = tf.logical_and( - start_idx <= mask, mask < start_idx + count) - x = batch_norm_with_mask( - x, is_training, mask, out_filters, data_format=data_format) - x = tf.nn.relu(x) - return x - -def pool_op(inputs, is_training, count, out_filters, avg_or_max, data_format, start_idx=None): - """ - Args: - start_idx: where to start taking the output channels. if None, assuming - fixed_arc mode - count: how many output_channels to take. - """ - - if data_format == "NHWC": - inp_c = inputs.get_shape()[3].value - elif data_format == "NCHW": - inp_c = inputs.get_shape()[1].value - - with tf.variable_scope("conv_1"): - w = create_weight("w", [1, 1, inp_c, out_filters]) - x = tf.nn.conv2d(inputs, w, [1, 1, 1, 1], - "SAME", data_format=data_format) - x = batch_norm(x, is_training, data_format=data_format) - x = tf.nn.relu(x) - - with tf.variable_scope("pool"): - if data_format == "NHWC": - actual_data_format = "channels_last" - elif data_format == "NCHW": - actual_data_format = "channels_first" - - if avg_or_max == "avg": - x = tf.layers.average_pooling2d( - x, [3, 3], [1, 1], "SAME", data_format=actual_data_format) - elif avg_or_max == "max": - x = tf.layers.max_pooling2d( - x, [3, 3], [1, 1], "SAME", data_format=actual_data_format) - else: - raise ValueError("Unknown pool {}".format(avg_or_max)) - - if start_idx is not None: - if data_format == "NHWC": - x = x[:, :, :, start_idx: start_idx+count] - elif data_format == "NCHW": - x = x[:, start_idx: start_idx+count, :, :] - - return x - - -def global_avg_pool(x, data_format="NHWC"): - if data_format == "NHWC": - x = tf.reduce_mean(x, [1, 2]) - elif data_format == "NCHW": - x = tf.reduce_mean(x, [2, 3]) - else: - raise NotImplementedError("Unknown data_format {}".format(data_format)) - return x - - -def batch_norm(x, is_training, name="bn", decay=0.9, epsilon=1e-5, - data_format="NHWC"): - if data_format == "NHWC": - shape = [x.get_shape()[3]] - elif data_format == "NCHW": - shape = [x.get_shape()[1]] - else: - raise NotImplementedError("Unknown data_format {}".format(data_format)) - - with tf.variable_scope(name, reuse=None if is_training else True): - offset = tf.get_variable( - "offset", shape, - initializer=tf.constant_initializer(0.0, dtype=tf.float32)) - scale = tf.get_variable( - "scale", shape, - initializer=tf.constant_initializer(1.0, dtype=tf.float32)) - moving_mean = tf.get_variable( - "moving_mean", shape, trainable=False, - initializer=tf.constant_initializer(0.0, dtype=tf.float32)) - moving_variance = tf.get_variable( - "moving_variance", shape, trainable=False, - initializer=tf.constant_initializer(1.0, dtype=tf.float32)) - - if is_training: - x, mean, variance = tf.nn.fused_batch_norm( - x, scale, offset, epsilon=epsilon, data_format=data_format, - is_training=True) - update_mean = moving_averages.assign_moving_average( - moving_mean, mean, decay) - update_variance = moving_averages.assign_moving_average( - moving_variance, variance, decay) - with tf.control_dependencies([update_mean, update_variance]): - x = tf.identity(x) - else: - x, _, _ = tf.nn.fused_batch_norm(x, scale, offset, mean=moving_mean, - variance=moving_variance, - epsilon=epsilon, data_format=data_format, - is_training=False) - return x - - -def batch_norm_with_mask(x, is_training, mask, num_channels, name="bn", - decay=0.9, epsilon=1e-3, data_format="NHWC"): - - shape = [num_channels] - indices = tf.where(mask) - indices = tf.to_int32(indices) - indices = tf.reshape(indices, [-1]) - - with tf.variable_scope(name, reuse=None if is_training else True): - offset = tf.get_variable( - "offset", shape, - initializer=tf.constant_initializer(0.0, dtype=tf.float32)) - scale = tf.get_variable( - "scale", shape, - initializer=tf.constant_initializer(1.0, dtype=tf.float32)) - offset = tf.boolean_mask(offset, mask) - scale = tf.boolean_mask(scale, mask) - - moving_mean = tf.get_variable( - "moving_mean", shape, trainable=False, - initializer=tf.constant_initializer(0.0, dtype=tf.float32)) - moving_variance = tf.get_variable( - "moving_variance", shape, trainable=False, - initializer=tf.constant_initializer(1.0, dtype=tf.float32)) - - if is_training: - x, mean, variance = tf.nn.fused_batch_norm( - x, scale, offset, epsilon=epsilon, data_format=data_format, - is_training=True) - mean = (1.0 - decay) * (tf.boolean_mask(moving_mean, mask) - mean) - variance = (1.0 - decay) * \ - (tf.boolean_mask(moving_variance, mask) - variance) - update_mean = tf.scatter_sub( - moving_mean, indices, mean, use_locking=True) - update_variance = tf.scatter_sub( - moving_variance, indices, variance, use_locking=True) - with tf.control_dependencies([update_mean, update_variance]): - x = tf.identity(x) - else: - masked_moving_mean = tf.boolean_mask(moving_mean, mask) - masked_moving_variance = tf.boolean_mask(moving_variance, mask) - x, _, _ = tf.nn.fused_batch_norm(x, scale, offset, - mean=masked_moving_mean, - variance=masked_moving_variance, - epsilon=epsilon, data_format=data_format, - is_training=False) - return x diff --git a/examples/trials/nas_cifar10/src/utils.py b/examples/trials/nas_cifar10/src/utils.py deleted file mode 100644 index 65d57af7f1..0000000000 --- a/examples/trials/nas_cifar10/src/utils.py +++ /dev/null @@ -1,262 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import sys -import numpy as np -import tensorflow as tf - - -user_flags = [] - - -def DEFINE_string(name, default_value, doc_string): - tf.app.flags.DEFINE_string(name, default_value, doc_string) - global user_flags - user_flags.append(name) - - -def DEFINE_integer(name, default_value, doc_string): - tf.app.flags.DEFINE_integer(name, default_value, doc_string) - global user_flags - user_flags.append(name) - - -def DEFINE_float(name, default_value, doc_string): - tf.app.flags.DEFINE_float(name, default_value, doc_string) - global user_flags - user_flags.append(name) - - -def DEFINE_boolean(name, default_value, doc_string): - tf.app.flags.DEFINE_boolean(name, default_value, doc_string) - global user_flags - user_flags.append(name) - - -def print_user_flags(line_limit=80): - print("-" * 80) - - global user_flags - FLAGS = tf.app.flags.FLAGS - - for flag_name in sorted(user_flags): - value = "{}".format(getattr(FLAGS, flag_name)) - log_string = flag_name - log_string += "." * (line_limit - len(flag_name) - len(value)) - log_string += value - print(log_string) - - -def get_C(x, data_format): - """ - Args: - x: tensor of shape [N, H, W, C] or [N, C, H, W] - """ - if data_format == "NHWC": - return x.get_shape()[3].value - elif data_format == "NCHW": - return x.get_shape()[1].value - else: - raise ValueError( - "Unknown data_format '{0}'".format(data_format)) - -def get_HW(x, data_format): - """ - Args: - x: tensor of shape [N, H, W, C] or [N, C, H, W] - """ - return x.get_shape()[2].value - -def get_strides(stride, data_format): - """ - Args: - x: tensor of shape [N, H, W, C] or [N, C, H, W] - """ - if data_format == "NHWC": - return [1, stride, stride, 1] - elif data_format == "NCHW": - return [1, 1, stride, stride] - else: - raise ValueError( - "Unknown data_format '{0}'".format(data_format)) - - -class TextColors: - HEADER = '\033[95m' - OKBLUE = '\033[94m' - OKGREEN = '\033[92m' - WARNING = '\033[93m' - FAIL = '\033[91m' - ENDC = '\033[0m' - BOLD = '\033[1m' - UNDERLINE = '\033[4m' - - -class Logger(object): - def __init__(self, output_file): - self.terminal = sys.stdout - self.log = open(output_file, "a") - - def write(self, message): - self.terminal.write(message) - self.terminal.flush() - self.log.write(message) - self.log.flush() - - -def count_model_params(tf_variables): - """ - Args: - tf_variables: list of all model variables - """ - - num_vars = 0 - for var in tf_variables: - num_vars += np.prod([dim.value for dim in var.get_shape()]) - return num_vars - - -def get_train_ops( - loss, - tf_variables, - train_step, - clip_mode=None, - grad_bound=None, - l2_reg=1e-4, - lr_warmup_val=None, - lr_warmup_steps=100, - lr_init=0.1, - lr_dec_start=0, - lr_dec_every=10000, - lr_dec_rate=0.1, - lr_dec_min=None, - lr_cosine=False, - lr_max=None, - lr_min=None, - lr_T_0=None, - lr_T_mul=None, - num_train_batches=None, - optim_algo=None, - sync_replicas=False, - num_aggregate=None, - num_replicas=None, - get_grad_norms=False, - moving_average=None): - """ - Args: - clip_mode: "global", "norm", or None. - moving_average: store the moving average of parameters - """ - - if l2_reg > 0: - l2_losses = [] - for var in tf_variables: - l2_losses.append(tf.reduce_sum(var ** 2)) - l2_loss = tf.add_n(l2_losses) - loss += l2_reg * l2_loss - - grads = tf.gradients(loss, tf_variables) - grad_norm = tf.global_norm(grads) - - grad_norms = {} - for v, g in zip(tf_variables, grads): - if v is None or g is None: - continue - if isinstance(g, tf.IndexedSlices): - grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g.values ** 2)) - else: - grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g ** 2)) - - if clip_mode is not None: - assert grad_bound is not None, "Need grad_bound to clip gradients." - if clip_mode == "global": - grads, _ = tf.clip_by_global_norm(grads, grad_bound) - elif clip_mode == "norm": - clipped = [] - for g in grads: - if isinstance(g, tf.IndexedSlices): - c_g = tf.clip_by_norm(g.values, grad_bound) - c_g = tf.IndexedSlices(g.indices, c_g) - else: - c_g = tf.clip_by_norm(g, grad_bound) - clipped.append(g) - grads = clipped - else: - raise NotImplementedError("Unknown clip_mode {}".format(clip_mode)) - - if lr_cosine: - assert lr_max is not None, "Need lr_max to use lr_cosine" - assert lr_min is not None, "Need lr_min to use lr_cosine" - assert lr_T_0 is not None, "Need lr_T_0 to use lr_cosine" - assert lr_T_mul is not None, "Need lr_T_mul to use lr_cosine" - assert num_train_batches is not None, ("Need num_train_batches to use" - " lr_cosine") - - curr_epoch = train_step // num_train_batches - - last_reset = tf.Variable(0, dtype=tf.int32, trainable=False, - name="last_reset") - T_i = tf.Variable(lr_T_0, dtype=tf.int32, trainable=False, name="T_i") - T_curr = curr_epoch - last_reset - - def _update(): - update_last_reset = tf.assign( - last_reset, curr_epoch, use_locking=True) - update_T_i = tf.assign(T_i, T_i * lr_T_mul, use_locking=True) - with tf.control_dependencies([update_last_reset, update_T_i]): - rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926 - lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate)) - return lr - - def _no_update(): - rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926 - lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate)) - return lr - - learning_rate = tf.cond( - tf.greater_equal(T_curr, T_i), _update, _no_update) - else: - learning_rate = tf.train.exponential_decay( - lr_init, tf.maximum(train_step - lr_dec_start, 0), lr_dec_every, - lr_dec_rate, staircase=True) - if lr_dec_min is not None: - learning_rate = tf.maximum(learning_rate, lr_dec_min) - - if lr_warmup_val is not None: - learning_rate = tf.cond(tf.less(train_step, lr_warmup_steps), - lambda: lr_warmup_val, lambda: learning_rate) - - if optim_algo == "momentum": - opt = tf.train.MomentumOptimizer( - learning_rate, 0.9, use_locking=True, use_nesterov=True) - elif optim_algo == "sgd": - opt = tf.train.GradientDescentOptimizer( - learning_rate, use_locking=True) - elif optim_algo == "adam": - opt = tf.train.AdamOptimizer(learning_rate, beta1=0.0, epsilon=1e-3, - use_locking=True) - else: - raise ValueError("Unknown optim_algo {}".format(optim_algo)) - - if sync_replicas: - assert num_aggregate is not None, "Need num_aggregate to sync." - assert num_replicas is not None, "Need num_replicas to sync." - - opt = tf.train.SyncReplicasOptimizer( - opt, - replicas_to_aggregate=num_aggregate, - total_num_replicas=num_replicas, - use_locking=True) - - if moving_average is not None: - opt = tf.contrib.opt.MovingAverageOptimizer( - opt, average_decay=moving_average) - - train_op = opt.apply_gradients( - zip(grads, tf_variables), global_step=train_step) - - if get_grad_norms: - return train_op, learning_rate, grad_norm, opt, grad_norms - else: - return train_op, learning_rate, grad_norm, opt diff --git a/examples/tuners/enas_nni/README.md b/examples/tuners/enas_nni/README.md deleted file mode 100644 index ed6c4f4401..0000000000 --- a/examples/tuners/enas_nni/README.md +++ /dev/null @@ -1,6 +0,0 @@ - **Run ENAS in NNI** - === - - Now we have an enas example [enas-nni](https://github.com/countif/enas_nni) run in NNI from our contributors. - Thanks our lovely contributors. - And welcome more and more people to join us! diff --git a/examples/tuners/enas_nni/README_zh_CN.md b/examples/tuners/enas_nni/README_zh_CN.md deleted file mode 100644 index bbea28dd5d..0000000000 --- a/examples/tuners/enas_nni/README_zh_CN.md +++ /dev/null @@ -1,5 +0,0 @@ -**在 NNI 中运行 ENAS** -=== - -来自贡献者的 [enas-nni](https://github.com/countif/enas_nni) 可运行在 NNI 中。 非常感谢! -欢迎更多志愿者加入我们! \ No newline at end of file