diff --git a/.gitignore b/.gitignore
index a0f8ba7486..bc67f86f1d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -81,6 +81,7 @@ typings/
 __pycache__
 build
 *.egg-info
+.eggs/
 setup.pye
 **/__init__.pye
 **/.ipynb_checkpoints
diff --git a/README.md b/README.md
index e03b339b80..42e6aa3552 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@
 
 **NNI (Neural Network Intelligence)** is a lightweight but powerful toolkit to help users **automate** <a href="docs/en_US/FeatureEngineering/Overview.md">Feature Engineering</a>, <a href="docs/en_US/NAS/Overview.md">Neural Architecture Search</a>, <a href="docs/en_US/Tuner/BuiltinTuner.md">Hyperparameter Tuning</a> and <a href="docs/en_US/Compression/Overview.md">Model Compression</a>.
 
-The tool manages automated machine learning (AutoML) experiments, **dispatches and runs** experiments' trial jobs generated by tuning algorithms to search the best neural architecture and/or hyper-parameters in **different training environments** like <a href="docs/en_US/TrainingService/LocalMode.md">Local Machine</a>, <a href="docs/en_US/TrainingService/RemoteMachineMode.md">Remote Servers</a>, <a href="docs/en_US/TrainingService/PaiMode.md">OpenPAI</a>, <a href="docs/en_US/TrainingService/KubeflowMode.md">Kubeflow</a>, <a href="docs/en_US/TrainingService/FrameworkControllerMode.md">FrameworkController on K8S (AKS etc.)</a>, <a href="docs/en_US/TrainingService/DLTSMode.md">DLWorkspace (aka. DLTS)</a>, <a href="docs/en_US/TrainingService/AMLMode.md">AML (Azure Machine Learning)</a> and other cloud options.
+The tool manages automated machine learning (AutoML) experiments, **dispatches and runs** experiments' trial jobs generated by tuning algorithms to search the best neural architecture and/or hyper-parameters in **different training environments** like <a href="docs/en_US/TrainingService/LocalMode.md">Local Machine</a>, <a href="docs/en_US/TrainingService/RemoteMachineMode.md">Remote Servers</a>, <a href="docs/en_US/TrainingService/PaiMode.md">OpenPAI</a>, <a href="docs/en_US/TrainingService/KubeflowMode.md">Kubeflow</a>, <a href="docs/en_US/TrainingService/FrameworkControllerMode.md">FrameworkController on K8S (AKS etc.)</a>, <a href="docs/en_US/TrainingService/DLTSMode.md">DLWorkspace (aka. DLTS)</a>, <a href="docs/en_US/TrainingService/AMLMode.md">AML (Azure Machine Learning)</a>, <a href="docs/en_US/TrainingService/AdaptDLMode.md">AdaptDL (aka. ADL)</a> and other cloud options.
 
 ## **Who should consider using NNI**
 
@@ -173,11 +173,13 @@ Within the following table, we summarized the current NNI capabilities, we are g
         <li><a href="docs/en_US/TrainingService/RemoteMachineMode.md">Remote Servers</a></li>
         <li><a href="docs/en_US/TrainingService/AMLMode.md">AML(Azure Machine Learning)</a></li>
         <li><b>Kubernetes based services</b></li>
-            <ul><li><a href="docs/en_US/TrainingService/PaiMode.md">OpenPAI</a></li>
-            <li><a href="docs/en_US/TrainingService/KubeflowMode.md">Kubeflow</a></li>
-            <li><a href="docs/en_US/TrainingService/FrameworkControllerMode.md">FrameworkController on K8S (AKS etc.)</a></li>
-            </ul>
-            <ul><li><a href="docs/en_US/TrainingService/DLTSMode.md">DLWorkspace (aka. DLTS)</a></li>
+        <ul>
+          <li><a href="docs/en_US/TrainingService/PaiMode.md">OpenPAI</a></li>
+          <li><a href="docs/en_US/TrainingService/KubeflowMode.md">Kubeflow</a></li>
+          <li><a href="docs/en_US/TrainingService/FrameworkControllerMode.md">FrameworkController on K8S (AKS etc.)</a></li>
+          <li><a href="docs/en_US/TrainingService/DLTSMode.md">DLWorkspace (aka. DLTS)</a></li>
+          <li><a href="docs/en_US/TrainingService/AdaptDLMode.md">AdaptDL (aka. ADL)</a></li>
+        </ul>
       </ul>
       </td>
     </tr>
diff --git a/docs/en_US/Assessor/BuiltinAssessor.md b/docs/archive_en_US/Assessor/BuiltinAssessor.md
similarity index 100%
rename from docs/en_US/Assessor/BuiltinAssessor.md
rename to docs/archive_en_US/Assessor/BuiltinAssessor.md
diff --git a/docs/en_US/Assessor/CurvefittingAssessor.md b/docs/archive_en_US/Assessor/CurvefittingAssessor.md
similarity index 100%
rename from docs/en_US/Assessor/CurvefittingAssessor.md
rename to docs/archive_en_US/Assessor/CurvefittingAssessor.md
diff --git a/docs/en_US/Assessor/CustomizeAssessor.md b/docs/archive_en_US/Assessor/CustomizeAssessor.md
similarity index 100%
rename from docs/en_US/Assessor/CustomizeAssessor.md
rename to docs/archive_en_US/Assessor/CustomizeAssessor.md
diff --git a/docs/en_US/Assessor/MedianstopAssessor.md b/docs/archive_en_US/Assessor/MedianstopAssessor.md
similarity index 100%
rename from docs/en_US/Assessor/MedianstopAssessor.md
rename to docs/archive_en_US/Assessor/MedianstopAssessor.md
diff --git a/docs/en_US/CommunitySharings/AutoCompletion.md b/docs/archive_en_US/CommunitySharings/AutoCompletion.md
similarity index 100%
rename from docs/en_US/CommunitySharings/AutoCompletion.md
rename to docs/archive_en_US/CommunitySharings/AutoCompletion.md
diff --git a/docs/en_US/CommunitySharings/HpoComparison.md b/docs/archive_en_US/CommunitySharings/HpoComparison.md
similarity index 100%
rename from docs/en_US/CommunitySharings/HpoComparison.md
rename to docs/archive_en_US/CommunitySharings/HpoComparison.md
diff --git a/docs/en_US/CommunitySharings/ModelCompressionComparison.md b/docs/archive_en_US/CommunitySharings/ModelCompressionComparison.md
similarity index 100%
rename from docs/en_US/CommunitySharings/ModelCompressionComparison.md
rename to docs/archive_en_US/CommunitySharings/ModelCompressionComparison.md
diff --git a/docs/en_US/CommunitySharings/NNI_AutoFeatureEng.md b/docs/archive_en_US/CommunitySharings/NNI_AutoFeatureEng.md
similarity index 100%
rename from docs/en_US/CommunitySharings/NNI_AutoFeatureEng.md
rename to docs/archive_en_US/CommunitySharings/NNI_AutoFeatureEng.md
diff --git a/docs/en_US/CommunitySharings/NNI_colab_support.md b/docs/archive_en_US/CommunitySharings/NNI_colab_support.md
similarity index 100%
rename from docs/en_US/CommunitySharings/NNI_colab_support.md
rename to docs/archive_en_US/CommunitySharings/NNI_colab_support.md
diff --git a/docs/en_US/CommunitySharings/NasComparison.md b/docs/archive_en_US/CommunitySharings/NasComparison.md
similarity index 100%
rename from docs/en_US/CommunitySharings/NasComparison.md
rename to docs/archive_en_US/CommunitySharings/NasComparison.md
diff --git a/docs/en_US/CommunitySharings/ParallelizingTpeSearch.md b/docs/archive_en_US/CommunitySharings/ParallelizingTpeSearch.md
similarity index 100%
rename from docs/en_US/CommunitySharings/ParallelizingTpeSearch.md
rename to docs/archive_en_US/CommunitySharings/ParallelizingTpeSearch.md
diff --git a/docs/en_US/CommunitySharings/RecommendersSvd.md b/docs/archive_en_US/CommunitySharings/RecommendersSvd.md
similarity index 100%
rename from docs/en_US/CommunitySharings/RecommendersSvd.md
rename to docs/archive_en_US/CommunitySharings/RecommendersSvd.md
diff --git a/docs/en_US/CommunitySharings/SptagAutoTune.md b/docs/archive_en_US/CommunitySharings/SptagAutoTune.md
similarity index 100%
rename from docs/en_US/CommunitySharings/SptagAutoTune.md
rename to docs/archive_en_US/CommunitySharings/SptagAutoTune.md
diff --git a/docs/en_US/Compression/AutoPruningUsingTuners.md b/docs/archive_en_US/Compression/AutoPruningUsingTuners.md
similarity index 100%
rename from docs/en_US/Compression/AutoPruningUsingTuners.md
rename to docs/archive_en_US/Compression/AutoPruningUsingTuners.md
diff --git a/docs/en_US/Compression/CompressionReference.md b/docs/archive_en_US/Compression/CompressionReference.md
similarity index 100%
rename from docs/en_US/Compression/CompressionReference.md
rename to docs/archive_en_US/Compression/CompressionReference.md
diff --git a/docs/en_US/Compression/CompressionUtils.md b/docs/archive_en_US/Compression/CompressionUtils.md
similarity index 100%
rename from docs/en_US/Compression/CompressionUtils.md
rename to docs/archive_en_US/Compression/CompressionUtils.md
diff --git a/docs/en_US/Compression/CustomizeCompressor.md b/docs/archive_en_US/Compression/CustomizeCompressor.md
similarity index 100%
rename from docs/en_US/Compression/CustomizeCompressor.md
rename to docs/archive_en_US/Compression/CustomizeCompressor.md
diff --git a/docs/en_US/Compression/DependencyAware.md b/docs/archive_en_US/Compression/DependencyAware.md
similarity index 100%
rename from docs/en_US/Compression/DependencyAware.md
rename to docs/archive_en_US/Compression/DependencyAware.md
diff --git a/docs/en_US/Compression/Framework.md b/docs/archive_en_US/Compression/Framework.md
similarity index 100%
rename from docs/en_US/Compression/Framework.md
rename to docs/archive_en_US/Compression/Framework.md
diff --git a/docs/en_US/Compression/ModelSpeedup.md b/docs/archive_en_US/Compression/ModelSpeedup.md
similarity index 100%
rename from docs/en_US/Compression/ModelSpeedup.md
rename to docs/archive_en_US/Compression/ModelSpeedup.md
diff --git a/docs/en_US/Compression/Overview.md b/docs/archive_en_US/Compression/Overview.md
similarity index 100%
rename from docs/en_US/Compression/Overview.md
rename to docs/archive_en_US/Compression/Overview.md
diff --git a/docs/en_US/Compression/Pruner.md b/docs/archive_en_US/Compression/Pruner.md
similarity index 100%
rename from docs/en_US/Compression/Pruner.md
rename to docs/archive_en_US/Compression/Pruner.md
diff --git a/docs/en_US/Compression/Quantizer.md b/docs/archive_en_US/Compression/Quantizer.md
similarity index 100%
rename from docs/en_US/Compression/Quantizer.md
rename to docs/archive_en_US/Compression/Quantizer.md
diff --git a/docs/en_US/Compression/QuickStart.md b/docs/archive_en_US/Compression/QuickStart.md
similarity index 100%
rename from docs/en_US/Compression/QuickStart.md
rename to docs/archive_en_US/Compression/QuickStart.md
diff --git a/docs/en_US/FeatureEngineering/GBDTSelector.md b/docs/archive_en_US/FeatureEngineering/GBDTSelector.md
similarity index 100%
rename from docs/en_US/FeatureEngineering/GBDTSelector.md
rename to docs/archive_en_US/FeatureEngineering/GBDTSelector.md
diff --git a/docs/en_US/FeatureEngineering/GradientFeatureSelector.md b/docs/archive_en_US/FeatureEngineering/GradientFeatureSelector.md
similarity index 100%
rename from docs/en_US/FeatureEngineering/GradientFeatureSelector.md
rename to docs/archive_en_US/FeatureEngineering/GradientFeatureSelector.md
diff --git a/docs/en_US/FeatureEngineering/Overview.md b/docs/archive_en_US/FeatureEngineering/Overview.md
similarity index 100%
rename from docs/en_US/FeatureEngineering/Overview.md
rename to docs/archive_en_US/FeatureEngineering/Overview.md
diff --git a/docs/en_US/NAS/Advanced.md b/docs/archive_en_US/NAS/Advanced.md
similarity index 100%
rename from docs/en_US/NAS/Advanced.md
rename to docs/archive_en_US/NAS/Advanced.md
diff --git a/docs/en_US/NAS/Benchmarks.md b/docs/archive_en_US/NAS/Benchmarks.md
similarity index 100%
rename from docs/en_US/NAS/Benchmarks.md
rename to docs/archive_en_US/NAS/Benchmarks.md
diff --git a/docs/en_US/NAS/CDARTS.md b/docs/archive_en_US/NAS/CDARTS.md
similarity index 100%
rename from docs/en_US/NAS/CDARTS.md
rename to docs/archive_en_US/NAS/CDARTS.md
diff --git a/docs/en_US/NAS/ClassicNas.md b/docs/archive_en_US/NAS/ClassicNas.md
similarity index 100%
rename from docs/en_US/NAS/ClassicNas.md
rename to docs/archive_en_US/NAS/ClassicNas.md
diff --git a/docs/en_US/NAS/Cream.md b/docs/archive_en_US/NAS/Cream.md
similarity index 100%
rename from docs/en_US/NAS/Cream.md
rename to docs/archive_en_US/NAS/Cream.md
diff --git a/docs/en_US/NAS/DARTS.md b/docs/archive_en_US/NAS/DARTS.md
similarity index 100%
rename from docs/en_US/NAS/DARTS.md
rename to docs/archive_en_US/NAS/DARTS.md
diff --git a/docs/en_US/NAS/ENAS.md b/docs/archive_en_US/NAS/ENAS.md
similarity index 100%
rename from docs/en_US/NAS/ENAS.md
rename to docs/archive_en_US/NAS/ENAS.md
diff --git a/docs/en_US/NAS/NasGuide.md b/docs/archive_en_US/NAS/NasGuide.md
similarity index 100%
rename from docs/en_US/NAS/NasGuide.md
rename to docs/archive_en_US/NAS/NasGuide.md
diff --git a/docs/en_US/NAS/NasReference.md b/docs/archive_en_US/NAS/NasReference.md
similarity index 100%
rename from docs/en_US/NAS/NasReference.md
rename to docs/archive_en_US/NAS/NasReference.md
diff --git a/docs/en_US/NAS/Overview.md b/docs/archive_en_US/NAS/Overview.md
similarity index 100%
rename from docs/en_US/NAS/Overview.md
rename to docs/archive_en_US/NAS/Overview.md
diff --git a/docs/en_US/NAS/PDARTS.md b/docs/archive_en_US/NAS/PDARTS.md
similarity index 100%
rename from docs/en_US/NAS/PDARTS.md
rename to docs/archive_en_US/NAS/PDARTS.md
diff --git a/docs/en_US/NAS/Proxylessnas.md b/docs/archive_en_US/NAS/Proxylessnas.md
similarity index 100%
rename from docs/en_US/NAS/Proxylessnas.md
rename to docs/archive_en_US/NAS/Proxylessnas.md
diff --git a/docs/en_US/NAS/SPOS.md b/docs/archive_en_US/NAS/SPOS.md
similarity index 100%
rename from docs/en_US/NAS/SPOS.md
rename to docs/archive_en_US/NAS/SPOS.md
diff --git a/docs/en_US/NAS/SearchSpaceZoo.md b/docs/archive_en_US/NAS/SearchSpaceZoo.md
similarity index 100%
rename from docs/en_US/NAS/SearchSpaceZoo.md
rename to docs/archive_en_US/NAS/SearchSpaceZoo.md
diff --git a/docs/en_US/NAS/TextNAS.md b/docs/archive_en_US/NAS/TextNAS.md
similarity index 100%
rename from docs/en_US/NAS/TextNAS.md
rename to docs/archive_en_US/NAS/TextNAS.md
diff --git a/docs/en_US/NAS/Visualization.md b/docs/archive_en_US/NAS/Visualization.md
similarity index 100%
rename from docs/en_US/NAS/Visualization.md
rename to docs/archive_en_US/NAS/Visualization.md
diff --git a/docs/en_US/NAS/WriteSearchSpace.md b/docs/archive_en_US/NAS/WriteSearchSpace.md
similarity index 100%
rename from docs/en_US/NAS/WriteSearchSpace.md
rename to docs/archive_en_US/NAS/WriteSearchSpace.md
diff --git a/docs/en_US/Overview.md b/docs/archive_en_US/Overview.md
similarity index 100%
rename from docs/en_US/Overview.md
rename to docs/archive_en_US/Overview.md
diff --git a/docs/en_US/Release.md b/docs/archive_en_US/Release.md
similarity index 100%
rename from docs/en_US/Release.md
rename to docs/archive_en_US/Release.md
diff --git a/docs/en_US/ResearchPublications.md b/docs/archive_en_US/ResearchPublications.md
similarity index 100%
rename from docs/en_US/ResearchPublications.md
rename to docs/archive_en_US/ResearchPublications.md
diff --git a/docs/en_US/SupportedFramework_Library.md b/docs/archive_en_US/SupportedFramework_Library.md
similarity index 100%
rename from docs/en_US/SupportedFramework_Library.md
rename to docs/archive_en_US/SupportedFramework_Library.md
diff --git a/docs/en_US/TrainingService/AMLMode.md b/docs/archive_en_US/TrainingService/AMLMode.md
similarity index 100%
rename from docs/en_US/TrainingService/AMLMode.md
rename to docs/archive_en_US/TrainingService/AMLMode.md
diff --git a/docs/en_US/TrainingService/AdaptDLMode.md b/docs/archive_en_US/TrainingService/AdaptDLMode.md
similarity index 100%
rename from docs/en_US/TrainingService/AdaptDLMode.md
rename to docs/archive_en_US/TrainingService/AdaptDLMode.md
diff --git a/docs/en_US/TrainingService/DLTSMode.md b/docs/archive_en_US/TrainingService/DLTSMode.md
similarity index 100%
rename from docs/en_US/TrainingService/DLTSMode.md
rename to docs/archive_en_US/TrainingService/DLTSMode.md
diff --git a/docs/en_US/TrainingService/FrameworkControllerMode.md b/docs/archive_en_US/TrainingService/FrameworkControllerMode.md
similarity index 100%
rename from docs/en_US/TrainingService/FrameworkControllerMode.md
rename to docs/archive_en_US/TrainingService/FrameworkControllerMode.md
diff --git a/docs/en_US/TrainingService/HowToImplementTrainingService.md b/docs/archive_en_US/TrainingService/HowToImplementTrainingService.md
similarity index 100%
rename from docs/en_US/TrainingService/HowToImplementTrainingService.md
rename to docs/archive_en_US/TrainingService/HowToImplementTrainingService.md
diff --git a/docs/en_US/TrainingService/KubeflowMode.md b/docs/archive_en_US/TrainingService/KubeflowMode.md
similarity index 100%
rename from docs/en_US/TrainingService/KubeflowMode.md
rename to docs/archive_en_US/TrainingService/KubeflowMode.md
diff --git a/docs/en_US/TrainingService/LocalMode.md b/docs/archive_en_US/TrainingService/LocalMode.md
similarity index 100%
rename from docs/en_US/TrainingService/LocalMode.md
rename to docs/archive_en_US/TrainingService/LocalMode.md
diff --git a/docs/en_US/TrainingService/Overview.md b/docs/archive_en_US/TrainingService/Overview.md
similarity index 100%
rename from docs/en_US/TrainingService/Overview.md
rename to docs/archive_en_US/TrainingService/Overview.md
diff --git a/docs/en_US/TrainingService/PaiMode.md b/docs/archive_en_US/TrainingService/PaiMode.md
similarity index 100%
rename from docs/en_US/TrainingService/PaiMode.md
rename to docs/archive_en_US/TrainingService/PaiMode.md
diff --git a/docs/en_US/TrainingService/PaiYarnMode.md b/docs/archive_en_US/TrainingService/PaiYarnMode.md
similarity index 100%
rename from docs/en_US/TrainingService/PaiYarnMode.md
rename to docs/archive_en_US/TrainingService/PaiYarnMode.md
diff --git a/docs/en_US/TrainingService/RemoteMachineMode.md b/docs/archive_en_US/TrainingService/RemoteMachineMode.md
similarity index 100%
rename from docs/en_US/TrainingService/RemoteMachineMode.md
rename to docs/archive_en_US/TrainingService/RemoteMachineMode.md
diff --git a/docs/en_US/TrialExample/Cifar10Examples.md b/docs/archive_en_US/TrialExample/Cifar10Examples.md
similarity index 100%
rename from docs/en_US/TrialExample/Cifar10Examples.md
rename to docs/archive_en_US/TrialExample/Cifar10Examples.md
diff --git a/docs/en_US/TrialExample/EfficientNet.md b/docs/archive_en_US/TrialExample/EfficientNet.md
similarity index 67%
rename from docs/en_US/TrialExample/EfficientNet.md
rename to docs/archive_en_US/TrialExample/EfficientNet.md
index e22da7e42e..f71a0f7f08 100644
--- a/docs/en_US/TrialExample/EfficientNet.md
+++ b/docs/archive_en_US/TrialExample/EfficientNet.md
@@ -9,7 +9,7 @@ Use Grid search to find the best combination of alpha, beta and gamma for Effici
 [Example code](https://github.com/microsoft/nni/tree/v1.9/examples/trials/efficientnet)
 
 1. Set your working directory here in the example code directory.
-2. Run `git clone https://github.com/ultmaster/EfficientNet-PyTorch` to clone this modified version of [EfficientNet-PyTorch](https://github.com/lukemelas/EfficientNet-PyTorch). The modifications were done to adhere to the original [Tensorflow version](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet) as close as possible (including EMA, label smoothing and etc.); also added are the part which gets parameters from tuner and reports intermediate/final results. Clone it into `EfficientNet-PyTorch`; the files like `main.py`, `train_imagenet.sh` will appear inside, as specified in the configuration files.
+2. Run `git clone https://github.com/ultmaster/EfficientNet-PyTorch` to clone the [ultmaster modified version](https://github.com/ultmaster/EfficientNet-PyTorch) of the original [EfficientNet-PyTorch](https://github.com/lukemelas/EfficientNet-PyTorch). The modifications were done to adhere to the original [Tensorflow version](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet) as close as possible (including EMA, label smoothing and etc.); also added are the part which gets parameters from tuner and reports intermediate/final results. Clone it into `EfficientNet-PyTorch`; the files like `main.py`, `train_imagenet.sh` will appear inside, as specified in the configuration files.
 3. Run `nnictl create --config config_local.yml` (use `config_pai.yml` for OpenPAI) to find the best EfficientNet-B1. Adjust the training service (PAI/local/remote), batch size in the config files according to the environment.
 
 For training on ImageNet, read `EfficientNet-PyTorch/train_imagenet.sh`. Download ImageNet beforehand and extract it adhering to [PyTorch format](https://pytorch.org/docs/stable/torchvision/datasets.html#imagenet) and then replace `/mnt/data/imagenet` in with the location of the ImageNet storage. This file should also be a good example to follow for mounting ImageNet into the container on OpenPAI.
diff --git a/docs/en_US/TrialExample/GbdtExample.md b/docs/archive_en_US/TrialExample/GbdtExample.md
similarity index 100%
rename from docs/en_US/TrialExample/GbdtExample.md
rename to docs/archive_en_US/TrialExample/GbdtExample.md
diff --git a/docs/en_US/TrialExample/KDExample.md b/docs/archive_en_US/TrialExample/KDExample.md
similarity index 100%
rename from docs/en_US/TrialExample/KDExample.md
rename to docs/archive_en_US/TrialExample/KDExample.md
diff --git a/docs/en_US/TrialExample/MnistExamples.md b/docs/archive_en_US/TrialExample/MnistExamples.md
similarity index 100%
rename from docs/en_US/TrialExample/MnistExamples.md
rename to docs/archive_en_US/TrialExample/MnistExamples.md
diff --git a/docs/en_US/TrialExample/OpEvoExamples.md b/docs/archive_en_US/TrialExample/OpEvoExamples.md
similarity index 100%
rename from docs/en_US/TrialExample/OpEvoExamples.md
rename to docs/archive_en_US/TrialExample/OpEvoExamples.md
diff --git a/docs/en_US/TrialExample/RocksdbExamples.md b/docs/archive_en_US/TrialExample/RocksdbExamples.md
similarity index 100%
rename from docs/en_US/TrialExample/RocksdbExamples.md
rename to docs/archive_en_US/TrialExample/RocksdbExamples.md
diff --git a/docs/en_US/TrialExample/SklearnExamples.md b/docs/archive_en_US/TrialExample/SklearnExamples.md
similarity index 100%
rename from docs/en_US/TrialExample/SklearnExamples.md
rename to docs/archive_en_US/TrialExample/SklearnExamples.md
diff --git a/docs/en_US/TrialExample/SquadEvolutionExamples.md b/docs/archive_en_US/TrialExample/SquadEvolutionExamples.md
similarity index 100%
rename from docs/en_US/TrialExample/SquadEvolutionExamples.md
rename to docs/archive_en_US/TrialExample/SquadEvolutionExamples.md
diff --git a/docs/en_US/TrialExample/Trials.md b/docs/archive_en_US/TrialExample/Trials.md
similarity index 100%
rename from docs/en_US/TrialExample/Trials.md
rename to docs/archive_en_US/TrialExample/Trials.md
diff --git a/docs/en_US/Tuner/BatchTuner.md b/docs/archive_en_US/Tuner/BatchTuner.md
similarity index 100%
rename from docs/en_US/Tuner/BatchTuner.md
rename to docs/archive_en_US/Tuner/BatchTuner.md
diff --git a/docs/en_US/Tuner/BohbAdvisor.md b/docs/archive_en_US/Tuner/BohbAdvisor.md
similarity index 100%
rename from docs/en_US/Tuner/BohbAdvisor.md
rename to docs/archive_en_US/Tuner/BohbAdvisor.md
diff --git a/docs/en_US/Tuner/BuiltinTuner.md b/docs/archive_en_US/Tuner/BuiltinTuner.md
similarity index 100%
rename from docs/en_US/Tuner/BuiltinTuner.md
rename to docs/archive_en_US/Tuner/BuiltinTuner.md
diff --git a/docs/en_US/Tuner/CustomizeAdvisor.md b/docs/archive_en_US/Tuner/CustomizeAdvisor.md
similarity index 100%
rename from docs/en_US/Tuner/CustomizeAdvisor.md
rename to docs/archive_en_US/Tuner/CustomizeAdvisor.md
diff --git a/docs/en_US/Tuner/CustomizeTuner.md b/docs/archive_en_US/Tuner/CustomizeTuner.md
similarity index 100%
rename from docs/en_US/Tuner/CustomizeTuner.md
rename to docs/archive_en_US/Tuner/CustomizeTuner.md
diff --git a/docs/en_US/Tuner/EvolutionTuner.md b/docs/archive_en_US/Tuner/EvolutionTuner.md
similarity index 100%
rename from docs/en_US/Tuner/EvolutionTuner.md
rename to docs/archive_en_US/Tuner/EvolutionTuner.md
diff --git a/docs/en_US/Tuner/GPTuner.md b/docs/archive_en_US/Tuner/GPTuner.md
similarity index 100%
rename from docs/en_US/Tuner/GPTuner.md
rename to docs/archive_en_US/Tuner/GPTuner.md
diff --git a/docs/en_US/Tuner/GridsearchTuner.md b/docs/archive_en_US/Tuner/GridsearchTuner.md
similarity index 100%
rename from docs/en_US/Tuner/GridsearchTuner.md
rename to docs/archive_en_US/Tuner/GridsearchTuner.md
diff --git a/docs/en_US/Tuner/HyperbandAdvisor.md b/docs/archive_en_US/Tuner/HyperbandAdvisor.md
similarity index 100%
rename from docs/en_US/Tuner/HyperbandAdvisor.md
rename to docs/archive_en_US/Tuner/HyperbandAdvisor.md
diff --git a/docs/en_US/Tuner/HyperoptTuner.md b/docs/archive_en_US/Tuner/HyperoptTuner.md
similarity index 100%
rename from docs/en_US/Tuner/HyperoptTuner.md
rename to docs/archive_en_US/Tuner/HyperoptTuner.md
diff --git a/docs/en_US/Tuner/InstallCustomizedTuner.md b/docs/archive_en_US/Tuner/InstallCustomizedTuner.md
similarity index 100%
rename from docs/en_US/Tuner/InstallCustomizedTuner.md
rename to docs/archive_en_US/Tuner/InstallCustomizedTuner.md
diff --git a/docs/en_US/Tuner/MetisTuner.md b/docs/archive_en_US/Tuner/MetisTuner.md
similarity index 100%
rename from docs/en_US/Tuner/MetisTuner.md
rename to docs/archive_en_US/Tuner/MetisTuner.md
diff --git a/docs/en_US/Tuner/NetworkmorphismTuner.md b/docs/archive_en_US/Tuner/NetworkmorphismTuner.md
similarity index 100%
rename from docs/en_US/Tuner/NetworkmorphismTuner.md
rename to docs/archive_en_US/Tuner/NetworkmorphismTuner.md
diff --git a/docs/en_US/Tuner/PBTTuner.md b/docs/archive_en_US/Tuner/PBTTuner.md
similarity index 100%
rename from docs/en_US/Tuner/PBTTuner.md
rename to docs/archive_en_US/Tuner/PBTTuner.md
diff --git a/docs/en_US/Tuner/PPOTuner.md b/docs/archive_en_US/Tuner/PPOTuner.md
similarity index 100%
rename from docs/en_US/Tuner/PPOTuner.md
rename to docs/archive_en_US/Tuner/PPOTuner.md
diff --git a/docs/en_US/Tuner/SmacTuner.md b/docs/archive_en_US/Tuner/SmacTuner.md
similarity index 100%
rename from docs/en_US/Tuner/SmacTuner.md
rename to docs/archive_en_US/Tuner/SmacTuner.md
diff --git a/docs/en_US/Tutorial/AnnotationSpec.md b/docs/archive_en_US/Tutorial/AnnotationSpec.md
similarity index 100%
rename from docs/en_US/Tutorial/AnnotationSpec.md
rename to docs/archive_en_US/Tutorial/AnnotationSpec.md
diff --git a/docs/en_US/Tutorial/Contributing.md b/docs/archive_en_US/Tutorial/Contributing.md
similarity index 100%
rename from docs/en_US/Tutorial/Contributing.md
rename to docs/archive_en_US/Tutorial/Contributing.md
diff --git a/docs/en_US/Tutorial/ExperimentConfig.md b/docs/archive_en_US/Tutorial/ExperimentConfig.md
similarity index 100%
rename from docs/en_US/Tutorial/ExperimentConfig.md
rename to docs/archive_en_US/Tutorial/ExperimentConfig.md
diff --git a/docs/en_US/Tutorial/FAQ.md b/docs/archive_en_US/Tutorial/FAQ.md
similarity index 100%
rename from docs/en_US/Tutorial/FAQ.md
rename to docs/archive_en_US/Tutorial/FAQ.md
diff --git a/docs/en_US/Tutorial/HowToDebug.md b/docs/archive_en_US/Tutorial/HowToDebug.md
similarity index 100%
rename from docs/en_US/Tutorial/HowToDebug.md
rename to docs/archive_en_US/Tutorial/HowToDebug.md
diff --git a/docs/en_US/Tutorial/HowToUseDocker.md b/docs/archive_en_US/Tutorial/HowToUseDocker.md
similarity index 100%
rename from docs/en_US/Tutorial/HowToUseDocker.md
rename to docs/archive_en_US/Tutorial/HowToUseDocker.md
diff --git a/docs/en_US/Tutorial/InstallCustomizedAlgos.md b/docs/archive_en_US/Tutorial/InstallCustomizedAlgos.md
similarity index 100%
rename from docs/en_US/Tutorial/InstallCustomizedAlgos.md
rename to docs/archive_en_US/Tutorial/InstallCustomizedAlgos.md
diff --git a/docs/en_US/Tutorial/InstallationLinux.md b/docs/archive_en_US/Tutorial/InstallationLinux.md
similarity index 100%
rename from docs/en_US/Tutorial/InstallationLinux.md
rename to docs/archive_en_US/Tutorial/InstallationLinux.md
diff --git a/docs/en_US/Tutorial/InstallationWin.md b/docs/archive_en_US/Tutorial/InstallationWin.md
similarity index 100%
rename from docs/en_US/Tutorial/InstallationWin.md
rename to docs/archive_en_US/Tutorial/InstallationWin.md
diff --git a/docs/en_US/Tutorial/Nnictl.md b/docs/archive_en_US/Tutorial/Nnictl.md
similarity index 100%
rename from docs/en_US/Tutorial/Nnictl.md
rename to docs/archive_en_US/Tutorial/Nnictl.md
diff --git a/docs/en_US/Tutorial/QuickStart.md b/docs/archive_en_US/Tutorial/QuickStart.md
similarity index 100%
rename from docs/en_US/Tutorial/QuickStart.md
rename to docs/archive_en_US/Tutorial/QuickStart.md
diff --git a/docs/en_US/Tutorial/SearchSpaceSpec.md b/docs/archive_en_US/Tutorial/SearchSpaceSpec.md
similarity index 100%
rename from docs/en_US/Tutorial/SearchSpaceSpec.md
rename to docs/archive_en_US/Tutorial/SearchSpaceSpec.md
diff --git a/docs/en_US/Tutorial/SetupNniDeveloperEnvironment.md b/docs/archive_en_US/Tutorial/SetupNniDeveloperEnvironment.md
similarity index 100%
rename from docs/en_US/Tutorial/SetupNniDeveloperEnvironment.md
rename to docs/archive_en_US/Tutorial/SetupNniDeveloperEnvironment.md
diff --git a/docs/en_US/Tutorial/WebUI.md b/docs/archive_en_US/Tutorial/WebUI.md
similarity index 100%
rename from docs/en_US/Tutorial/WebUI.md
rename to docs/archive_en_US/Tutorial/WebUI.md
diff --git a/docs/en_US/autotune_ref.md b/docs/archive_en_US/autotune_ref.md
similarity index 100%
rename from docs/en_US/autotune_ref.md
rename to docs/archive_en_US/autotune_ref.md
diff --git a/docs/en_US/nnicli_ref.md b/docs/archive_en_US/nnicli_ref.md
similarity index 100%
rename from docs/en_US/nnicli_ref.md
rename to docs/archive_en_US/nnicli_ref.md
diff --git a/docs/en_US/Assessor/BuiltinAssessor.rst b/docs/en_US/Assessor/BuiltinAssessor.rst
new file mode 100644
index 0000000000..6b85253a73
--- /dev/null
+++ b/docs/en_US/Assessor/BuiltinAssessor.rst
@@ -0,0 +1,101 @@
+.. role:: raw-html(raw)
+   :format: html
+
+
+Built-in Assessors
+==================
+
+NNI provides state-of-the-art tuning algorithms within our builtin-assessors and makes them easy to use. Below is a brief overview of NNI's current builtin Assessors.
+
+Note: Click the **Assessor's name** to get each Assessor's installation requirements, suggested usage scenario, and a config example. A link to a detailed description of each algorithm is provided at the end of the suggested scenario for each Assessor.
+
+Currently, we support the following Assessors:
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Assessor
+     - Brief Introduction of Algorithm
+   * - `Medianstop <#MedianStop>`__
+     - Medianstop is a simple early stopping rule. It stops a pending trial X at step S if the trial’s best objective value by step S is strictly worse than the median value of the running averages of all completed trials’ objectives reported up to step S. `Reference Paper <https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/46180.pdf>`__
+   * - `Curvefitting <#Curvefitting>`__
+     - Curve Fitting Assessor is an LPA (learning, predicting, assessing) algorithm. It stops a pending trial X at step S if the prediction of the final epoch's performance worse than the best final performance in the trial history. In this algorithm, we use 12 curves to fit the accuracy curve. `Reference Paper <http://aad.informatik.uni-freiburg.de/papers/15-IJCAI-Extrapolation_of_Learning_Curves.pdf>`__
+
+
+Usage of Builtin Assessors
+--------------------------
+
+Usage of builtin assessors provided by the NNI SDK requires one to declare the  **builtinAssessorName** and **classArgs** in the ``config.yml`` file. In this part, we will introduce the details of usage and the suggested scenarios, classArg requirements, and an example for each assessor.
+
+Note: Please follow the provided format when writing your ``config.yml`` file.
+
+:raw-html:`<a name="MedianStop"></a>`
+
+Median Stop Assessor
+^^^^^^^^^^^^^^^^^^^^
+
+..
+
+   Builtin Assessor Name: **Medianstop**
+
+
+**Suggested scenario**
+
+It's applicable in a wide range of performance curves, thus, it can be used in various scenarios to speed up the tuning progress. `Detailed Description <./MedianstopAssessor.rst>`__
+
+**classArgs requirements:**
+
+
+* **optimize_mode** (*maximize or minimize, optional, default = maximize*\ ) - If 'maximize', assessor will **stop** the trial with smaller expectation. If 'minimize', assessor will **stop** the trial with larger expectation.
+* **start_step** (*int, optional, default = 0*\ ) - A trial is determined to be stopped or not only after receiving start_step number of reported intermediate results.
+
+**Usage example:**
+
+.. code-block:: yaml
+
+   # config.yml
+   assessor:
+       builtinAssessorName: Medianstop
+       classArgs:
+         optimize_mode: maximize
+         start_step: 5
+
+:raw-html:`<br>`
+
+:raw-html:`<a name="Curvefitting"></a>`
+
+Curve Fitting Assessor
+^^^^^^^^^^^^^^^^^^^^^^
+
+..
+
+   Builtin Assessor Name: **Curvefitting**
+
+
+**Suggested scenario**
+
+It's applicable in a wide range of performance curves, thus, it can be used in various scenarios to speed up the tuning progress. Even better, it's able to handle and assess curves with similar performance. `Detailed Description <./CurvefittingAssessor.rst>`__
+
+**Note**\ , according to the original paper, only incremental functions are supported. Therefore this assessor can only be used to maximize optimization metrics. For example, it can be used for accuracy, but not for loss.
+
+**classArgs requirements:**
+
+
+* **epoch_num** (*int,** required***\ ) - The total number of epochs. We need to know the number of epochs to determine which points we need to predict.
+* **start_step** (*int, optional, default = 6*\ ) - A trial is determined to be stopped or not only after receiving start_step number of reported intermediate results.
+* **threshold** (*float, optional, default = 0.95*\ ) - The threshold that we use to decide to early stop the worst performance curve. For example: if threshold = 0.95, and the best performance in the history is 0.9, then we will stop the trial who's predicted value is lower than 0.95 * 0.9 = 0.855.
+* **gap** (*int, optional, default = 1*\ ) - The gap interval between Assessor judgements. For example: if gap = 2, start_step = 6, then we will assess the result when we get 6, 8, 10, 12...intermediate results.
+
+**Usage example:**
+
+.. code-block:: yaml
+
+   # config.yml
+   assessor:
+       builtinAssessorName: Curvefitting
+       classArgs:
+         epoch_num: 20
+         start_step: 6
+         threshold: 0.95
+         gap: 1
diff --git a/docs/en_US/Assessor/CurvefittingAssessor.rst b/docs/en_US/Assessor/CurvefittingAssessor.rst
new file mode 100644
index 0000000000..41c6d2c147
--- /dev/null
+++ b/docs/en_US/Assessor/CurvefittingAssessor.rst
@@ -0,0 +1,101 @@
+Curve Fitting Assessor on NNI
+=============================
+
+Introduction
+------------
+
+The Curve Fitting Assessor is an LPA (learning, predicting, assessing) algorithm. It stops a pending trial X at step S if the prediction of the final epoch's performance is worse than the best final performance in the trial history.
+
+In this algorithm, we use 12 curves to fit the learning curve. The set of parametric curve models are chosen from this `reference paper <http://aad.informatik.uni-freiburg.de/papers/15-IJCAI-Extrapolation_of_Learning_Curves.pdf>`__. The learning curves' shape coincides with our prior knowledge about the form of learning curves: They are typically increasing, saturating functions.
+
+
+.. image:: ../../img/curvefitting_learning_curve.PNG
+   :target: ../../img/curvefitting_learning_curve.PNG
+   :alt: learning_curve
+
+
+We combine all learning curve models into a single, more powerful model. This combined model is given by a weighted linear combination:
+
+
+.. image:: ../../img/curvefitting_f_comb.gif
+   :target: ../../img/curvefitting_f_comb.gif
+   :alt: f_comb
+
+
+with the new combined parameter vector
+
+
+.. image:: ../../img/curvefitting_expression_xi.gif
+   :target: ../../img/curvefitting_expression_xi.gif
+   :alt: expression_xi
+
+
+Assuming additive Gaussian noise and the noise parameter being initialized to its maximum likelihood estimate.
+
+We determine the maximum probability value of the new combined parameter vector by learning the historical data. We use such a value to predict future trial performance and stop the inadequate experiments to save computing resources.
+
+Concretely, this algorithm goes through three stages of learning, predicting, and assessing.
+
+
+* 
+  Step1: Learning. We will learn about the trial history of the current trial and determine the \xi at the Bayesian angle. First of all, We fit each curve using the least-squares method, implemented by ``fit_theta``. After we obtained the parameters, we filter the curve and remove the outliers, implemented by ``filter_curve``. Finally, we use the MCMC sampling method. implemented by ``mcmc_sampling``\ , to adjust the weight of each curve. Up to now, we have determined all the parameters in \xi.
+
+* 
+  Step2: Predicting. It calculates the expected final result accuracy, implemented by ``f_comb``\ , at the target position (i.e., the total number of epochs) by \xi and the formula of the combined model.
+
+* 
+  Step3: If the fitting result doesn't converge, the predicted value will be ``None``. In this case, we return ``AssessResult.Good`` to ask for future accuracy information and predict again. Furthermore, we will get a positive value from the ``predict()`` function. If this value is strictly greater than the best final performance in history * ``THRESHOLD``\ (default value = 0.95), return ``AssessResult.Good``\ , otherwise, return  ``AssessResult.Bad``
+
+The figure below is the result of our algorithm on MNIST trial history data, where the green point represents the data obtained by Assessor, the blue point represents the future but unknown data, and the red line is the Curve predicted by the Curve fitting assessor.
+
+
+.. image:: ../../img/curvefitting_example.PNG
+   :target: ../../img/curvefitting_example.PNG
+   :alt: examples
+
+
+Usage
+-----
+
+To use Curve Fitting Assessor, you should add the following spec in your experiment's YAML config file:
+
+.. code-block:: yaml
+
+   assessor:
+     builtinAssessorName: Curvefitting
+     classArgs:
+       # (required)The total number of epoch.
+       #  We need to know the number of epoch to determine which point we need to predict.
+       epoch_num: 20
+       # (optional) In order to save our computing resource, we start to predict when we have more than only after receiving start_step number of reported intermediate results.
+       # The default value of start_step is 6.
+       start_step: 6
+       # (optional) The threshold that we decide to early stop the worse performance curve.
+       # For example: if threshold = 0.95, best performance in the history is 0.9, then we will stop the trial which predict value is lower than 0.95 * 0.9 = 0.855.
+       # The default value of threshold is 0.95.
+       threshold: 0.95
+       # (optional) The gap interval between Assesor judgements.
+       # For example: if gap = 2, start_step = 6, then we will assess the result when we get 6, 8, 10, 12...intermedian result.
+       # The default value of gap is 1.
+       gap: 1
+
+Limitation
+----------
+
+According to the original paper, only incremental functions are supported. Therefore this assessor can only be used to maximize optimization metrics. For example, it can be used for accuracy, but not for loss.
+
+File Structure
+--------------
+
+The assessor has a lot of different files, functions, and classes. Here we briefly describe a few of them.
+
+
+* ``curvefunctions.py`` includes all the function expressions and default parameters.
+* ``modelfactory.py`` includes learning and predicting; the corresponding calculation part is also implemented here.
+* ``curvefitting_assessor.py`` is the assessor which receives the trial history and assess whether to early stop the trial.
+
+TODO
+----
+
+
+* Further improve the accuracy of the prediction and test it on more models.
diff --git a/docs/en_US/Assessor/CustomizeAssessor.rst b/docs/en_US/Assessor/CustomizeAssessor.rst
new file mode 100644
index 0000000000..3926d7a306
--- /dev/null
+++ b/docs/en_US/Assessor/CustomizeAssessor.rst
@@ -0,0 +1,67 @@
+Customize Assessor
+==================
+
+NNI supports to build an assessor by yourself for tuning demand.
+
+If you want to implement a customized Assessor, there are three things to do:
+
+
+#. Inherit the base Assessor class
+#. Implement assess_trial function
+#. Configure your customized Assessor in experiment YAML config file
+
+**1. Inherit the base Assessor class**
+
+.. code-block:: python
+
+   from nni.assessor import Assessor
+
+   class CustomizedAssessor(Assessor):
+       def __init__(self, ...):
+           ...
+
+**2. Implement assess trial function**
+
+.. code-block:: python
+
+   from nni.assessor import Assessor, AssessResult
+
+   class CustomizedAssessor(Assessor):
+       def __init__(self, ...):
+           ...
+
+       def assess_trial(self, trial_history):
+           """
+           Determines whether a trial should be killed. Must override.
+           trial_history: a list of intermediate result objects.
+           Returns AssessResult.Good or AssessResult.Bad.
+           """
+           # you code implement here.
+           ...
+
+**3. Configure your customized Assessor in experiment YAML config file**
+
+NNI needs to locate your customized Assessor class and instantiate the class, so you need to specify the location of the customized Assessor class and pass literal values as parameters to the __init__ constructor.
+
+.. code-block:: yaml
+
+   assessor:
+     codeDir: /home/abc/myassessor
+     classFileName: my_customized_assessor.py
+     className: CustomizedAssessor
+     # Any parameter need to pass to your Assessor class __init__ constructor
+     # can be specified in this optional classArgs field, for example
+     classArgs:
+       arg1: value1
+
+Please noted in **2**. The object ``trial_history`` are exact the object that Trial send to Assessor by using SDK ``report_intermediate_result`` function.
+
+The working directory of your assessor is ``<home>/nni-experiments/<experiment_id>/log``\ , which can be retrieved with environment variable ``NNI_LOG_DIRECTORY``\ ,
+
+More detail example you could see:
+
+..
+
+   * :githublink:`medianstop-assessor <src/sdk/pynni/nni/medianstop_assessor>`
+   * :githublink:`curvefitting-assessor <src/sdk/pynni/nni/curvefitting_assessor>`
+
diff --git a/docs/en_US/Assessor/MedianstopAssessor.rst b/docs/en_US/Assessor/MedianstopAssessor.rst
new file mode 100644
index 0000000000..5a307bf0d3
--- /dev/null
+++ b/docs/en_US/Assessor/MedianstopAssessor.rst
@@ -0,0 +1,7 @@
+Medianstop Assessor on NNI
+==========================
+
+Median Stop
+-----------
+
+Medianstop is a simple early stopping rule mentioned in this `paper <https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/46180.pdf>`__. It stops a pending trial X after step S if the trial’s best objective value by step S is strictly worse than the median value of the running averages of all completed trials’ objectives reported up to step S.
diff --git a/docs/en_US/CommunitySharings/AutoCompletion.rst b/docs/en_US/CommunitySharings/AutoCompletion.rst
new file mode 100644
index 0000000000..cb0c76c12f
--- /dev/null
+++ b/docs/en_US/CommunitySharings/AutoCompletion.rst
@@ -0,0 +1,55 @@
+Auto Completion for nnictl Commands
+===================================
+
+NNI's command line tool **nnictl** support auto-completion, i.e., you can complete a nnictl command by pressing the ``tab`` key.
+
+For example, if the current command is
+
+.. code-block:: bash
+
+   nnictl cre
+
+By pressing the ``tab`` key, it will be completed to
+
+.. code-block:: bash
+
+   nnictl create
+
+For now, auto-completion will not be enabled by default if you install NNI through ``pip``\ , and it only works on Linux with bash shell. If you want to enable this feature on your computer, please refer to the following steps:
+
+Step 1. Download ``bash-completion``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: bash
+
+   cd ~
+   wget https://raw.githubusercontent.com/microsoft/nni/{nni-version}/tools/bash-completion
+
+Here, {nni-version} should by replaced by the version of NNI, e.g., ``master``\ , ``v1.9``. You can also check the latest ``bash-completion`` script :githublink:`here <tools/bash-completion>`.
+
+Step 2. Install the script
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If you are running a root account and want to install this script for all the users
+
+.. code-block:: bash
+
+   install -m644 ~/bash-completion /usr/share/bash-completion/completions/nnictl
+
+If you just want to install this script for your self
+
+.. code-block:: bash
+
+   mkdir -p ~/.bash_completion.d
+   install -m644 ~/bash-completion ~/.bash_completion.d/nnictl
+   echo '[[ -f ~/.bash_completion.d/nnictl ]] && source ~/.bash_completion.d/nnictl' >> ~/.bash_completion
+
+Step 3. Reopen your terminal
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Reopen your terminal and you should be able to use the auto-completion feature. Enjoy!
+
+Step 4. Uninstall
+^^^^^^^^^^^^^^^^^
+
+If you want to uninstall this feature, just revert the changes in the steps above.
diff --git a/docs/en_US/CommunitySharings/HpoComparison.rst b/docs/en_US/CommunitySharings/HpoComparison.rst
new file mode 100644
index 0000000000..75925ab2e9
--- /dev/null
+++ b/docs/en_US/CommunitySharings/HpoComparison.rst
@@ -0,0 +1,385 @@
+Hyper Parameter Optimization Comparison
+=======================================
+
+*Posted by Anonymous Author*
+
+Comparison of Hyperparameter Optimization (HPO) algorithms on several problems.
+
+Hyperparameter Optimization algorithms are list below:
+
+
+* `Random Search <../Tuner/BuiltinTuner.rst>`__
+* `Grid Search <../Tuner/BuiltinTuner.rst>`__
+* `Evolution <../Tuner/BuiltinTuner.rst>`__
+* `Anneal <../Tuner/BuiltinTuner.rst>`__
+* `Metis <../Tuner/BuiltinTuner.rst>`__
+* `TPE <../Tuner/BuiltinTuner.rst>`__
+* `SMAC <../Tuner/BuiltinTuner.rst>`__
+* `HyperBand <../Tuner/BuiltinTuner.rst>`__
+* `BOHB <../Tuner/BuiltinTuner.rst>`__
+
+All algorithms run in NNI local environment.
+
+Machine Environment：
+
+.. code-block:: bash
+
+   OS: Linux Ubuntu 16.04 LTS
+   CPU: Intel(R) Xeon(R) CPU E5-2690 v3 @ 2.60GHz 2600 MHz
+   Memory: 112 GB
+   NNI Version: v0.7
+   NNI Mode(local|pai|remote): local
+   Python version: 3.6
+   Is conda or virtualenv used?: Conda
+   is running in docker?: no
+
+AutoGBDT Example
+----------------
+
+Problem Description
+^^^^^^^^^^^^^^^^^^^
+
+Nonconvex problem on the hyper-parameter search of `AutoGBDT <../TrialExample/GbdtExample.rst>`__ example.
+
+Search Space
+^^^^^^^^^^^^
+
+.. code-block:: json
+
+   {
+     "num_leaves": {
+       "_type": "choice",
+       "_value": [10, 12, 14, 16, 18, 20, 22, 24, 28, 32, 48, 64, 96, 128]
+     },
+     "learning_rate": {
+       "_type": "choice",
+       "_value": [0.00001, 0.0001, 0.001, 0.01, 0.05, 0.1, 0.2, 0.5]
+     },
+     "max_depth": {
+       "_type": "choice",
+       "_value": [-1, 2, 3, 4, 5, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 28, 32, 48, 64, 96, 128]
+     },
+     "feature_fraction": {
+       "_type": "choice",
+       "_value": [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2]
+     },
+     "bagging_fraction": {
+       "_type": "choice",
+       "_value": [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2]
+     },
+     "bagging_freq": {
+       "_type": "choice",
+       "_value": [1, 2, 4, 8, 10, 12, 14, 16]
+     }
+   }
+
+The total search space is 1,204,224, we set the number of maximum trial to 1000. The time limitation is 48 hours.
+
+Results
+^^^^^^^
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Algorithm
+     - Best loss
+     - Average of Best 5 Losses
+     - Average of Best 10 Losses
+   * - Random Search
+     - 0.418854
+     - 0.420352
+     - 0.421553
+   * - Random Search
+     - 0.417364
+     - 0.420024
+     - 0.420997
+   * - Random Search
+     - 0.417861
+     - 0.419744
+     - 0.420642
+   * - Grid Search
+     - 0.498166
+     - 0.498166
+     - 0.498166
+   * - Evolution
+     - 0.409887
+     - 0.409887
+     - 0.409887
+   * - Evolution
+     - 0.413620
+     - 0.413875
+     - 0.414067
+   * - Evolution
+     - 0.409887
+     - 0.409887
+     - 0.409887
+   * - Anneal
+     - 0.414877
+     - 0.417289
+     - 0.418281
+   * - Anneal
+     - 0.409887
+     - 0.409887
+     - 0.410118
+   * - Anneal
+     - 0.413683
+     - 0.416949
+     - 0.417537
+   * - Metis
+     - 0.416273
+     - 0.420411
+     - 0.422380
+   * - Metis
+     - 0.420262
+     - 0.423175
+     - 0.424816
+   * - Metis
+     - 0.421027
+     - 0.424172
+     - 0.425714
+   * - TPE
+     - 0.414478
+     - 0.414478
+     - 0.414478
+   * - TPE
+     - 0.415077
+     - 0.417986
+     - 0.418797
+   * - TPE
+     - 0.415077
+     - 0.417009
+     - 0.418053
+   * - SMAC
+     - **0.408386**
+     - **0.408386**
+     - **0.408386**
+   * - SMAC
+     - 0.414012
+     - 0.414012
+     - 0.414012
+   * - SMAC
+     - **0.408386**
+     - **0.408386**
+     - **0.408386**
+   * - BOHB
+     - 0.410464
+     - 0.415319
+     - 0.417755
+   * - BOHB
+     - 0.418995
+     - 0.420268
+     - 0.422604
+   * - BOHB
+     - 0.415149
+     - 0.418072
+     - 0.418932
+   * - HyperBand
+     - 0.414065
+     - 0.415222
+     - 0.417628
+   * - HyperBand
+     - 0.416807
+     - 0.417549
+     - 0.418828
+   * - HyperBand
+     - 0.415550
+     - 0.415977
+     - 0.417186
+   * - GP
+     - 0.414353
+     - 0.418563
+     - 0.420263
+   * - GP
+     - 0.414395
+     - 0.418006
+     - 0.420431
+   * - GP
+     - 0.412943
+     - 0.416566
+     - 0.418443
+
+
+In this example, all the algorithms are used with default parameters. For Metis, there are about 300 trials because it runs slowly due to its high time complexity O(n^3) in Gaussian Process.
+
+RocksDB Benchmark 'fillrandom' and 'readrandom'
+-----------------------------------------------
+
+Problem Description
+^^^^^^^^^^^^^^^^^^^
+
+`DB_Bench <https://github.com/facebook/rocksdb/wiki/Benchmarking-tools>`__ is the main tool that is used to benchmark `RocksDB <https://rocksdb.org/>`__\ 's performance. It has so many hapermeter to tune.
+
+The performance of ``DB_Bench`` is associated with the machine configuration and installation method. We run the ``DB_Bench``\ in the Linux machine and install the Rock in shared library.
+
+Machine configuration
+^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: bash
+
+   RocksDB:    version 6.1
+   CPU:        6 * Intel(R) Xeon(R) CPU E5-2690 v4 @ 2.60GHz
+   CPUCache:   35840 KB
+   Keys:       16 bytes each
+   Values:     100 bytes each (50 bytes after compression)
+   Entries:    1000000
+
+Storage performance
+^^^^^^^^^^^^^^^^^^^
+
+**Latency**\ : each IO request will take some time to complete, this is called the average latency. There are several factors that would affect this time including network connection quality and hard disk IO performance.
+
+**IOPS**\ :** IO operations per second**\ , which means the amount of *read or write operations* that could be done in one seconds time.
+
+**IO size**\ :** the size of each IO request**. Depending on the operating system and the application/service that needs disk access it will issue a request to read or write a certain amount of data at the same time.
+
+**Throughput (in MB/s) = Average IO size x IOPS** 
+
+IOPS is related to online processing ability and we use the IOPS as the metric in my experiment.
+
+Search Space
+^^^^^^^^^^^^
+
+.. code-block:: json
+
+   {
+     "max_background_compactions": {
+       "_type": "quniform",
+       "_value": [1, 256, 1]
+     },
+     "block_size": {
+       "_type": "quniform",
+       "_value": [1, 500000, 1]
+     },
+     "write_buffer_size": {
+       "_type": "quniform",
+       "_value": [1, 130000000, 1]
+     },
+     "max_write_buffer_number": {
+       "_type": "quniform",
+       "_value": [1, 128, 1]
+     },
+     "min_write_buffer_number_to_merge": {
+       "_type": "quniform",
+       "_value": [1, 32, 1]
+     },
+     "level0_file_num_compaction_trigger": {
+       "_type": "quniform",
+       "_value": [1, 256, 1]
+     },
+     "level0_slowdown_writes_trigger": {
+       "_type": "quniform",
+       "_value": [1, 1024, 1]
+     },
+     "level0_stop_writes_trigger": {
+       "_type": "quniform",
+       "_value": [1, 1024, 1]
+     },
+     "cache_size": {
+       "_type": "quniform",
+       "_value": [1, 30000000, 1]
+     },
+     "compaction_readahead_size": {
+       "_type": "quniform",
+       "_value": [1, 30000000, 1]
+     },
+     "new_table_reader_for_compaction_inputs": {
+       "_type": "randint",
+       "_value": [1]
+     }
+   }
+
+The search space is enormous (about 10^40) and we set the maximum number of trial to 100 to limit the computation resource.
+
+Results
+^^^^^^^
+
+fillrandom' Benchmark
+^^^^^^^^^^^^^^^^^^^^^
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Model
+     - Best IOPS (Repeat 1)
+     - Best IOPS (Repeat 2)
+     - Best IOPS (Repeat 3)
+   * - Random
+     - 449901
+     - 427620
+     - 477174
+   * - Anneal
+     - 461896
+     - 467150
+     - 437528
+   * - Evolution
+     - 436755
+     - 389956
+     - 389790
+   * - TPE
+     - 378346
+     - 482316
+     - 468989
+   * - SMAC
+     - 491067
+     - 490472
+     - **491136**
+   * - Metis
+     - 444920
+     - 457060
+     - 454438
+
+
+Figure:
+
+
+.. image:: ../../img/hpo_rocksdb_fillrandom.png
+   :target: ../../img/hpo_rocksdb_fillrandom.png
+   :alt: 
+
+
+'readrandom' Benchmark
+^^^^^^^^^^^^^^^^^^^^^^
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Model
+     - Best IOPS (Repeat 1)
+     - Best IOPS (Repeat 2)
+     - Best IOPS (Repeat 3)
+   * - Random
+     - 2276157
+     - 2285301
+     - 2275142
+   * - Anneal
+     - 2286330
+     - 2282229
+     - 2284012
+   * - Evolution
+     - 2286524
+     - 2283673
+     - 2283558
+   * - TPE
+     - 2287366
+     - 2282865
+     - 2281891
+   * - SMAC
+     - 2270874
+     - 2284904
+     - 2282266
+   * - Metis
+     - **2287696**
+     - 2283496
+     - 2277701
+
+
+Figure:
+
+
+.. image:: ../../img/hpo_rocksdb_readrandom.png
+   :target: ../../img/hpo_rocksdb_readrandom.png
+   :alt: 
+
diff --git a/docs/en_US/CommunitySharings/ModelCompressionComparison.rst b/docs/en_US/CommunitySharings/ModelCompressionComparison.rst
new file mode 100644
index 0000000000..12cc009e25
--- /dev/null
+++ b/docs/en_US/CommunitySharings/ModelCompressionComparison.rst
@@ -0,0 +1,133 @@
+Comparison of Filter Pruning Algorithms
+=======================================
+
+To provide an initial insight into the performance of various filter pruning algorithms, 
+we conduct extensive experiments with various pruning algorithms on some benchmark models and datasets.
+We present the experiment result in this document.
+In addition, we provide friendly instructions on the re-implementation of these experiments to facilitate further contributions to this effort.
+
+Experiment Setting
+------------------
+
+The experiments are performed with the following pruners/datasets/models:
+
+
+* 
+  Models: :githublink:`VGG16, ResNet18, ResNet50 <examples/model_compress/models/cifar10>`
+
+* 
+  Datasets: CIFAR-10
+
+* 
+  Pruners: 
+
+
+  * These pruners are included:
+
+    * Pruners with scheduling : ``SimulatedAnnealing Pruner``\ , ``NetAdapt Pruner``\ , ``AutoCompress Pruner``.
+      Given the overal sparsity requirement, these pruners can automatically generate a sparsity distribution among different layers.
+    * One-shot pruners: ``L1Filter Pruner``\ , ``L2Filter Pruner``\ , ``FPGM Pruner``.
+      The sparsity of each layer is set the same as the overall sparsity in this experiment.
+
+  * 
+    Only **filter pruning** performances are compared here. 
+
+    For the pruners with scheduling, ``L1Filter Pruner`` is used as the base algorithm. That is to say, after the sparsities distribution is decided by the scheduling algorithm, ``L1Filter Pruner`` is used to performn real pruning.
+
+  * 
+    All the pruners listed above are implemented in :githublink:`nni <docs/en_US/Compression/Overview.rst>`.
+
+Experiment Result
+-----------------
+
+For each dataset/model/pruner combination, we prune the model to different levels by setting a series of target sparsities for the pruner. 
+
+Here we plot both **Number of Weights - Performances** curve and** FLOPs - Performance** curve. 
+As a reference, we also plot the result declared in the paper `AutoCompress: An Automatic DNN Structured Pruning Framework for Ultra-High Compression Rates <http://arxiv.org/abs/1907.03141>`__ for models VGG16 and ResNet18 on CIFAR-10.
+
+The experiment result are shown in the following figures:
+
+CIFAR-10, VGG16:
+
+
+.. image:: ../../../examples/model_compress/comparison_of_pruners/img/performance_comparison_vgg16.png
+   :target: ../../../examples/model_compress/comparison_of_pruners/img/performance_comparison_vgg16.png
+   :alt: 
+
+
+CIFAR-10, ResNet18:
+
+
+.. image:: ../../../examples/model_compress/comparison_of_pruners/img/performance_comparison_resnet18.png
+   :target: ../../../examples/model_compress/comparison_of_pruners/img/performance_comparison_resnet18.png
+   :alt: 
+
+
+CIFAR-10, ResNet50:
+
+
+.. image:: ../../../examples/model_compress/comparison_of_pruners/img/performance_comparison_resnet50.png
+   :target: ../../../examples/model_compress/comparison_of_pruners/img/performance_comparison_resnet50.png
+   :alt: 
+
+
+Analysis
+--------
+
+From the experiment result, we get the following conclusions:
+
+
+* Given the constraint on the number of parameters, the pruners with scheduling ( ``AutoCompress Pruner`` , ``SimualatedAnnealing Pruner`` ) performs better than the others when the constraint is strict. However, they have no such advantage in FLOPs/Performances comparison since only number of parameters constraint is considered in the optimization process; 
+* The basic algorithms ``L1Filter Pruner`` , ``L2Filter Pruner`` , ``FPGM Pruner`` performs very similarly in these experiments; 
+* ``NetAdapt Pruner`` can not achieve very high compression rate. This is caused by its mechanism that it prunes only one layer each pruning iteration. This leads to un-acceptable complexity if the sparsity per iteration is much lower than the overall sparisity constraint.
+
+Experiments Reproduction
+------------------------
+
+Implementation Details
+^^^^^^^^^^^^^^^^^^^^^^
+
+
+* 
+  The experiment results are all collected with the default configuration of the pruners in nni, which means that when we call a pruner class in nni, we don't change any default class arguments.
+
+* 
+  Both FLOPs and the number of parameters are counted with :githublink:`Model FLOPs/Parameters Counter <docs/en_US/Compression/CompressionUtils.md#model-flopsparameters-counter>` after :githublink:`model speed up <docs/en_US/Compression/ModelSpeedup.rst>`.
+  This avoids potential issues of counting them of masked models.
+
+* 
+  The experiment code can be found :githublink:`here <examples/model_compress/auto_pruners_torch.py>`.
+
+Experiment Result Rendering
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* 
+  If you follow the practice in the :githublink:`example <examples/model_compress/auto_pruners_torch.py>`\ , for every single pruning experiment, the experiment result will be saved in JSON format as follows:
+
+  .. code-block:: json
+
+       {
+           "performance": {"original": 0.9298, "pruned": 0.1, "speedup": 0.1, "finetuned": 0.7746}, 
+           "params": {"original": 14987722.0, "speedup": 167089.0}, 
+           "flops": {"original": 314018314.0, "speedup": 38589922.0}
+       }
+
+* 
+  The experiment results are saved :githublink:`here <examples/model_compress/comparison_of_pruners>`. 
+  You can refer to :githublink:`analyze <examples/model_compress/comparison_of_pruners/analyze.py>` to plot new performance comparison figures.
+
+Contribution
+------------
+
+TODO Items
+^^^^^^^^^^
+
+
+* Pruners constrained by FLOPS/latency
+* More pruning algorithms/datasets/models
+
+Issues
+^^^^^^
+
+For algorithm implementation & experiment issues, please `create an issue <https://github.com/microsoft/nni/issues/new/>`__.
diff --git a/docs/en_US/CommunitySharings/NNI_AutoFeatureEng.rst b/docs/en_US/CommunitySharings/NNI_AutoFeatureEng.rst
new file mode 100644
index 0000000000..d01a824517
--- /dev/null
+++ b/docs/en_US/CommunitySharings/NNI_AutoFeatureEng.rst
@@ -0,0 +1,141 @@
+.. role:: raw-html(raw)
+   :format: html
+
+
+NNI review article from Zhihu: :raw-html:`<an open source project with highly reasonable design>` - By Garvin Li
+========================================================================================================================
+
+The article is by a NNI user on Zhihu forum. In the article, Garvin had shared his experience on using NNI for Automatic Feature Engineering. We think this article is very useful for users who are interested in using NNI for feature engineering. With author's permission, we translated the original article into English.  
+
+**原文(source)**\ : `如何看待微软最新发布的AutoML平台NNI？By Garvin Li <https://www.zhihu.com/question/297982959/answer/964961829?utm_source=wechat_session&utm_medium=social&utm_oi=28812108627968&from=singlemessage&isappinstalled=0>`__
+
+01 Overview of AutoML
+---------------------
+
+In author's opinion, AutoML is not only about hyperparameter optimization, but
+also a process that can target various stages of the machine learning process,
+including feature engineering, NAS, HPO, etc.
+
+02 Overview of NNI
+------------------
+
+NNI (Neural Network Intelligence) is an open source AutoML toolkit from
+Microsoft, to help users design and tune machine learning models, neural network
+architectures, or a complex system’s parameters in an efficient and automatic
+way.
+
+Link:\ ` https://github.com/Microsoft/nni <https://github.com/Microsoft/nni>`__
+
+In general, most of Microsoft tools have one prominent characteristic: the
+design is highly reasonable (regardless of the technology innovation degree).
+NNI's AutoFeatureENG basically meets all user requirements of AutoFeatureENG
+with a very reasonable underlying framework design.
+
+03 Details of NNI-AutoFeatureENG
+--------------------------------
+
+..
+
+   The article is following the github project: `https://github.com/SpongebBob/tabular_automl_NNI <https://github.com/SpongebBob/tabular_automl_NNI>`__. 
+
+
+Each new user could do AutoFeatureENG with NNI easily and efficiently. To exploring the AutoFeatureENG capability, downloads following required files, and then run NNI install through pip.
+
+
+.. image:: https://pic3.zhimg.com/v2-8886eea730cad25f5ac06ef1897cd7e4_r.jpg
+   :target: https://pic3.zhimg.com/v2-8886eea730cad25f5ac06ef1897cd7e4_r.jpg
+   :alt: 
+
+NNI treats AutoFeatureENG as a two-steps-task, feature generation exploration and feature selection. Feature generation exploration is mainly about feature derivation and high-order feature combination.
+
+04 Feature Exploration
+----------------------
+
+For feature derivation, NNI offers many operations which could automatically generate new features, which list \ `as following <https://github.com/SpongebBob/tabular_automl_NNI/blob/master/AutoFEOp.rst>`__\  :
+
+**count**\ : Count encoding is based on replacing categories with their counts computed on the train set, also named frequency encoding.
+
+**target**\ : Target encoding is based on encoding categorical variable values with the mean of target variable per value.
+
+**embedding**\ : Regard features as sentences, generate vectors using *Word2Vec.*
+
+**crosscout**\ : Count encoding on more than one-dimension, alike CTR (Click Through Rate).
+
+**aggregete**\ : Decide the aggregation functions of the features, including min/max/mean/var.
+
+**nunique**\ : Statistics of the number of unique features.
+
+**histsta**\ : Statistics of feature buckets, like histogram statistics.
+
+Search space could be defined in a **JSON file**\ : to define how specific features intersect, which two columns intersect and how features generate from corresponding columns.
+
+
+.. image:: https://pic1.zhimg.com/v2-3c3eeec6eea9821e067412725e5d2317_r.jpg
+   :target: https://pic1.zhimg.com/v2-3c3eeec6eea9821e067412725e5d2317_r.jpg
+   :alt: 
+
+
+The picture shows us the procedure of defining search space. NNI provides count encoding for 1-order-op, as well as cross count encoding, aggerate statistics (min max var mean median nunique) for 2-order-op. 
+
+For example, we want to search the features which are a frequency encoding (valuecount) features on columns name {“C1”, ...,” C26”}, in the following way:
+
+
+.. image:: https://github.com/JSong-Jia/Pic/blob/master/images/pic%203.jpg
+   :target: https://github.com/JSong-Jia/Pic/blob/master/images/pic%203.jpg
+   :alt: 
+
+
+we can define a cross frequency encoding (value count on cross dims) method on columns {"C1",...,"C26"} x {"C1",...,"C26"} in the following way:
+
+
+.. image:: https://github.com/JSong-Jia/Pic/blob/master/images/pic%204.jpg
+   :target: https://github.com/JSong-Jia/Pic/blob/master/images/pic%204.jpg
+   :alt: 
+
+
+The purpose of Exploration is to generate new features. You can use **get_next_parameter** function to get received feature candidates of one trial.
+
+..
+
+   RECEIVED_PARAMS = nni.get_next_parameter()
+
+
+05 Feature selection
+--------------------
+
+To avoid feature explosion and overfitting, feature selection is necessary. In the feature selection of NNI-AutoFeatureENG, LightGBM (Light Gradient Boosting Machine), a gradient boosting framework developed by Microsoft, is mainly promoted.
+
+
+.. image:: https://pic2.zhimg.com/v2-7bf9c6ae1303692101a911def478a172_r.jpg
+   :target: https://pic2.zhimg.com/v2-7bf9c6ae1303692101a911def478a172_r.jpg
+   :alt: 
+
+
+If you have used **XGBoost** or** GBDT**\ , you would know the algorithm based on tree structure can easily calculate the importance of each feature on results. LightGBM is able to make feature selection naturally.
+
+The issue is that selected features might be applicable to *GBDT* (Gradient Boosting Decision Tree), but not to the linear algorithm like *LR* (Logistic Regression).
+
+
+.. image:: https://pic4.zhimg.com/v2-d2f919497b0ed937acad0577f7a8df83_r.jpg
+   :target: https://pic4.zhimg.com/v2-d2f919497b0ed937acad0577f7a8df83_r.jpg
+   :alt: 
+
+
+06 Summary
+----------
+
+NNI's AutoFeatureEng sets a well-established standard, showing us the operation procedure, available modules, which is highly convenient to use. However, a simple model is probably not enough for good results.
+
+Suggestions to NNI
+------------------
+
+About Exploration: If consider using DNN (like xDeepFM) to extract high-order feature would be better.
+
+About Selection: There could be more intelligent options, such as automatic selection system based on downstream models.
+
+Conclusion: NNI could offer users some inspirations of design and it is a good open source project. I suggest researchers leverage it to accelerate the AI research.
+
+Tips: Because the scripts of open source projects are compiled based on gcc7, Mac system may encounter problems of gcc (GNU Compiler Collection). The solution is as follows:
+
+brew install libomp
+===================
diff --git a/docs/en_US/CommunitySharings/NNI_colab_support.rst b/docs/en_US/CommunitySharings/NNI_colab_support.rst
new file mode 100644
index 0000000000..438f66bb26
--- /dev/null
+++ b/docs/en_US/CommunitySharings/NNI_colab_support.rst
@@ -0,0 +1,47 @@
+Use NNI on Google Colab
+=======================
+
+NNI can easily run on Google Colab platform. However, Colab doesn't expose its public IP and ports, so by default you can not access NNI's Web UI on Colab. To solve this, you need a reverse proxy software like ``ngrok`` or ``frp``. This tutorial will show you how to use ngrok to access NNI's Web UI on Colab.
+
+How to Open NNI's Web UI on Google Colab
+----------------------------------------
+
+
+#. Install required packages and softwares.
+
+.. code-block:: bash
+
+   ! pip install nni # install nni
+   ! wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip # download ngrok and unzip it
+   ! unzip ngrok-stable-linux-amd64.zip
+   ! mkdir -p nni_repo
+   ! git clone https://github.com/microsoft/nni.git nni_repo/nni # clone NNI's offical repo to get examples
+
+
+#. Register a ngrok account `here <https://ngrok.com/>`__\ , then connect to your account using your authtoken.
+
+.. code-block:: bash
+
+   ! ./ngrok authtoken <your-authtoken>
+
+
+#. Start an NNI example on a port bigger than 1024, then start ngrok with the same port. If you want to use gpu, make sure gpuNum >= 1 in config.yml. Use ``get_ipython()`` to start ngrok since it will be stuck if you use ``! ngrok http 5000 &``.
+
+.. code-block:: bash
+
+   ! nnictl create --config nni_repo/nni/examples/trials/mnist-pytorch/config.yml --port 5000 &
+   get_ipython().system_raw('./ngrok http 5000 &')
+
+
+#. Check the public url.
+
+.. code-block:: bash
+
+   ! curl -s http://localhost:4040/api/tunnels # don't change the port number 4040
+
+You will see an url like http://xxxx.ngrok.io after step 4, open this url and you will find NNI's Web UI. Have fun :)
+
+Access Web UI with frp
+----------------------
+
+frp is another reverse proxy software with similar functions. However, frp doesn't provide free public urls, so you may need an server with public IP as a frp server. See `here <https://github.com/fatedier/frp>`__ to know more about how to deploy frp.
diff --git a/docs/en_US/CommunitySharings/NasComparison.rst b/docs/en_US/CommunitySharings/NasComparison.rst
new file mode 100644
index 0000000000..d2a9ac1131
--- /dev/null
+++ b/docs/en_US/CommunitySharings/NasComparison.rst
@@ -0,0 +1,165 @@
+Neural Architecture Search Comparison
+=====================================
+
+*Posted by Anonymous Author*
+
+Train and Compare NAS (Neural Architecture Search) models including Autokeras, DARTS, ENAS and NAO.
+
+Their source code link is as below:
+
+
+* 
+  Autokeras: `https://github.com/jhfjhfj1/autokeras <https://github.com/jhfjhfj1/autokeras>`__
+
+* 
+  DARTS: `https://github.com/quark0/darts <https://github.com/quark0/darts>`__
+
+* 
+  ENAS: `https://github.com/melodyguan/enas <https://github.com/melodyguan/enas>`__
+
+* 
+  NAO: `https://github.com/renqianluo/NAO <https://github.com/renqianluo/NAO>`__
+
+Experiment Description
+----------------------
+
+To avoid over-fitting in **CIFAR-10**\ , we also compare the models in the other five datasets including Fashion-MNIST, CIFAR-100, OUI-Adience-Age, ImageNet-10-1 (subset of ImageNet), ImageNet-10-2 (another subset of ImageNet). We just sample a subset with 10 different labels from ImageNet to make ImageNet-10-1 or ImageNet-10-2.
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Dataset
+     - Training Size
+     - Numer of Classes
+     - Descriptions
+   * - `Fashion-MNIST <https://github.com/zalandoresearch/fashion-mnist>`__
+     - 60,000
+     - 10
+     - T-shirt/top, trouser, pullover, dress, coat, sandal, shirt, sneaker, bag and ankle boot.
+   * - `CIFAR-10 <https://www.cs.toronto.edu/~kriz/cifar.html>`__
+     - 50,000
+     - 10
+     - Airplanes, cars, birds, cats, deer, dogs, frogs, horses, ships and trucks.
+   * - `CIFAR-100 <https://www.cs.toronto.edu/~kriz/cifar.html>`__
+     - 50,000
+     - 100
+     - Similar to CIFAR-10 but with 100 classes and 600 images each.
+   * - `OUI-Adience-Age <https://talhassner.github.io/home/projects/Adience/Adience-data.html>`__
+     - 26,580
+     - 8
+     - 8 age groups/labels (0-2, 4-6, 8-13, 15-20, 25-32, 38-43, 48-53, 60-).
+   * - `ImageNet-10-1 <http://www.image-net.org/>`__
+     - 9,750
+     - 10
+     - Coffee mug, computer keyboard, dining table, wardrobe, lawn mower, microphone, swing, sewing machine, odometer and gas pump.
+   * - `ImageNet-10-2 <http://www.image-net.org/>`__
+     - 9,750
+     - 10
+     - Drum, banj, whistle, grand piano, violin, organ, acoustic guitar, trombone, flute and sax.
+
+
+We do not change the default fine-tuning technique in their source code. In order to match each task, the codes of input image shape and output numbers are changed.
+
+Search phase time for all NAS methods is **two days** as well as the retrain time.  Average results are reported based on** three repeat times**. Our evaluation machines have one Nvidia Tesla P100 GPU, 112GB of RAM and one 2.60GHz CPU (Intel E5-2690).
+
+For NAO, it requires too much computing resources, so we only use NAO-WS which provides the pipeline script.
+
+For AutoKeras, we used  0.2.18 version because it was the latest version when we started the experiment.
+
+NAS Performance
+---------------
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - NAS
+     - AutoKeras (%)
+     - ENAS (macro) (%)
+     - ENAS (micro) (%)
+     - DARTS (%)
+     - NAO-WS (%)
+   * - Fashion-MNIST
+     - 91.84
+     - 95.44
+     - 95.53
+     - **95.74**
+     - 95.20
+   * - CIFAR-10
+     - 75.78
+     - 95.68
+     - **96.16**
+     - 94.23
+     - 95.64
+   * - CIFAR-100
+     - 43.61
+     - 78.13
+     - 78.84
+     - **79.74**
+     - 75.75
+   * - OUI-Adience-Age
+     - 63.20
+     - **80.34**
+     - 78.55
+     - 76.83
+     - 72.96
+   * - ImageNet-10-1
+     - 61.80
+     - 77.07
+     - 79.80
+     - **80.48**
+     - 77.20
+   * - ImageNet-10-2
+     - 37.20
+     - 58.13
+     - 56.47
+     - 60.53
+     - **61.20**
+
+
+Unfortunately, we cannot reproduce all the results in the paper.
+
+The best or average results reported in the paper:
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - NAS
+     - AutoKeras(%)
+     - ENAS (macro) (%)
+     - ENAS (micro) (%)
+     - DARTS (%)
+     - NAO-WS (%)
+   * - CIFAR- 10
+     - 88.56(best)
+     - 96.13(best)
+     - 97.11(best)
+     - 97.17(average)
+     - 96.47(best)
+
+
+For AutoKeras, it has relatively worse performance across all datasets due to its random factor on network morphism.
+
+For ENAS, ENAS (macro) shows good results in OUI-Adience-Age and ENAS (micro)  shows good results in CIFAR-10.
+
+For DARTS, it has a good performance on some datasets but we found its high variance in other datasets. The difference among three runs of benchmarks can be up to 5.37% in OUI-Adience-Age and 4.36% in ImageNet-10-1.
+
+For NAO-WS, it shows good results in ImageNet-10-2 but it can perform very poorly in OUI-Adience-Age.
+
+Reference
+---------
+
+
+#. 
+   Jin, Haifeng, Qingquan Song, and Xia Hu. "Efficient neural architecture search with network morphism." *arXiv preprint arXiv:1806.10282* (2018).
+
+#. 
+   Liu, Hanxiao, Karen Simonyan, and Yiming Yang. "Darts: Differentiable architecture search." arXiv preprint arXiv:1806.09055 (2018).
+
+#. 
+   Pham, Hieu, et al. "Efficient Neural Architecture Search via Parameters Sharing." international conference on machine learning (2018): 4092-4101.
+
+#. 
+   Luo, Renqian, et al. "Neural Architecture Optimization." neural information processing systems (2018): 7827-7838.
diff --git a/docs/en_US/CommunitySharings/ParallelizingTpeSearch.rst b/docs/en_US/CommunitySharings/ParallelizingTpeSearch.rst
new file mode 100644
index 0000000000..3d75962f6c
--- /dev/null
+++ b/docs/en_US/CommunitySharings/ParallelizingTpeSearch.rst
@@ -0,0 +1,183 @@
+.. role:: raw-html(raw)
+   :format: html
+
+
+Parallelizing a Sequential Algorithm TPE
+========================================
+
+TPE approaches were actually run asynchronously in order to make use of multiple compute nodes and to avoid wasting time waiting for trial evaluations to complete. For the TPE approach, the so-called constant liar approach was used: each time a candidate point x∗ was proposed, a fake fitness evaluation of the y was assigned temporarily, until the evaluation completed and reported the actual loss f(x∗).
+
+Introduction and Problems
+-------------------------
+
+Sequential Model-based Global Optimization
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Sequential Model-Based Global Optimization (SMBO) algorithms have been used in many applications where evaluation of the fitness function is expensive. In an application where the true fitness function f: X → R is costly to evaluate, model-based algorithms approximate f with a surrogate that is cheaper to evaluate. Typically the inner loop in an SMBO algorithm is the numerical optimization of this surrogate, or some transformation of the surrogate. The point x∗ that maximizes the surrogate (or its transformation) becomes the proposal for where the true function f should be evaluated. This active-learning-like algorithm template is summarized in the figure below. SMBO algorithms differ in what criterion they optimize to obtain x∗ given a model (or surrogate) of f, and in they model f via observation history H.
+
+
+.. image:: ../../img/parallel_tpe_search4.PNG
+   :target: ../../img/parallel_tpe_search4.PNG
+   :alt: 
+
+
+The algorithms in this work optimize the criterion of Expected Improvement (EI). Other criteria have been suggested, such as Probability of Improvement and Expected Improvement, minimizing the Conditional Entropy of the Minimizer, and the bandit-based criterion. We chose to use the EI criterion in TPE because it is intuitive, and has been shown to work well in a variety of settings. Expected improvement is the expectation under some model M of f : X → RN that f(x) will exceed (negatively) some threshold y∗:
+
+
+.. image:: ../../img/parallel_tpe_search_ei.PNG
+   :target: ../../img/parallel_tpe_search_ei.PNG
+   :alt: 
+
+
+Since calculation of p(y|x) is expensive, TPE approach modeled p(y|x) by p(x|y) and p(y).The TPE defines p(x|y) using two such densities:
+
+
+.. image:: ../../img/parallel_tpe_search_tpe.PNG
+   :target: ../../img/parallel_tpe_search_tpe.PNG
+   :alt: 
+
+
+where l(x) is the density formed by using the observations {x(i)} such that corresponding loss
+f(x(i)) was less than y∗ and g(x) is the density formed by using the remaining observations. TPE algorithm depends on a y∗ that is larger than the best observed f(x) so that some points can be used to form l(x). The TPE algorithm chooses y∗ to be some quantile γ of the observed y values, so that p(y<\ ``y∗``\ ) = γ, but no specific model for p(y) is necessary. The tree-structured form of l and g makes it easy to draw many candidates according to l and evaluate them according to g(x)/l(x). On each iteration, the algorithm returns the candidate x∗ with the greatest EI.
+
+Here is a simulation of the TPE algorithm in a two-dimensional search space. The difference of background color represents different values. It can be seen that TPE combines exploration and exploitation very well. (Black indicates the points of this round samples, and yellow indicates the points has been taken in the history.)
+
+
+.. image:: ../../img/parallel_tpe_search1.gif
+   :target: ../../img/parallel_tpe_search1.gif
+   :alt: 
+
+
+**Since EI is a continuous function, the highest x of EI is determined at a certain status.** As shown in the figure below, the blue triangle is the point that is most likely to be sampled in this state.
+
+
+.. image:: ../../img/parallel_tpe_search_ei2.PNG
+   :target: ../../img/parallel_tpe_search_ei2.PNG
+   :alt: 
+
+
+TPE performs well when we use it in sequential, but if we provide a larger concurrency, then **there will be a large number of points produced in the same EI state**\ , too concentrated points will reduce the exploration ability of the tuner, resulting in resources waste.
+
+Here is the simulation figure when we set ``concurrency=60``\ , It can be seen that this phenomenon is obvious.
+
+
+.. image:: ../../img/parallel_tpe_search2.gif
+   :target: ../../img/parallel_tpe_search2.gif
+   :alt: 
+
+
+Research solution
+-----------------
+
+Approximated q-EI Maximization
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The multi-points criterion that we have presented below can potentially be used to deliver an additional design of experiments in one step through the resolution of the optimization problem.
+
+
+.. image:: ../../img/parallel_tpe_search_qEI.PNG
+   :target: ../../img/parallel_tpe_search_qEI.PNG
+   :alt: 
+
+
+However, the computation of q-EI becomes intensive as q increases. After our research, there are four popular greedy strategies that approach the result of problem while avoiding its numerical cost.
+
+Solution 1: Believing the OK Predictor: The KB(Kriging Believer) Heuristic Strategy
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The Kriging Believer strategy replaces the conditional knowledge about the responses at the sites chosen within the last iterations by deterministic values equal to the expectation of the Kriging predictor. Keeping the same notations as previously, the strategy can be summed up as follows:
+
+
+.. image:: ../../img/parallel_tpe_search_kb.PNG
+   :target: ../../img/parallel_tpe_search_kb.PNG
+   :alt: 
+
+
+This sequential strategy delivers a q-points design and is computationally affordable since it relies on the analytically known EI, optimized in d dimensions. However, there is a risk of failure, since believing an OK predictor that overshoots the observed data may lead to a sequence that gets trapped in a non-optimal region for many iterations. We now propose a second strategy that reduces this risk.
+
+Solution 2: The CL(Constant Liar) Heuristic Strategy
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Let us now consider a sequential strategy in which the metamodel is updated (still without hyperparameter re-estimation) at each iteration with a value L exogenously fixed by the user, here called a ”lie”. The strategy referred to as the Constant Liar consists in lying with the same value L at every iteration: maximize EI (i.e. find xn+1), actualize the model as if y(xn+1) = L, and so on always with the same L ∈ R:
+
+
+.. image:: ../../img/parallel_tpe_search_cl.PNG
+   :target: ../../img/parallel_tpe_search_cl.PNG
+   :alt: 
+
+
+L should logically be determined on the basis of the values taken by y at X. Three values, min{Y}, mean{Y}, and max{Y} are considered here. **The larger L is, the more explorative the algorithm will be, and vice versa.**
+
+We have simulated the method above. The following figure shows the result of using mean value liars to maximize q-EI. We find that the points we have taken have begun to be scattered.
+
+
+.. image:: ../../img/parallel_tpe_search3.gif
+   :target: ../../img/parallel_tpe_search3.gif
+   :alt: 
+
+
+Experiment
+----------
+
+Branin-Hoo
+^^^^^^^^^^
+
+The four optimization strategies presented in the last section are now compared on the Branin-Hoo function which is a classical test-case in global optimization.
+
+
+.. image:: ../../img/parallel_tpe_search_branin.PNG
+   :target: ../../img/parallel_tpe_search_branin.PNG
+   :alt: 
+
+
+The recommended values of a, b, c, r, s and t are: a = 1, b = 5.1 ⁄ (4π2), c = 5 ⁄ π, r = 6, s = 10 and t = 1 ⁄ (8π). This function has three global minimizers(-3.14, 12.27), (3.14, 2.27), (9.42, 2.47).
+
+Next is the comparison of the q-EI associated with the q first points (q ∈ [1,10]) given by the constant liar strategies (min and max), 2000 q-points designs uniformly drawn for every q, and 2000 q-points LHS designs taken at random for every q.
+
+
+.. image:: ../../img/parallel_tpe_search_result.PNG
+   :target: ../../img/parallel_tpe_search_result.PNG
+   :alt: 
+
+
+As we can seen on figure, CL[max] and CL[min] offer very good q-EI results compared to random designs, especially for small values of q.
+
+Gaussian Mixed Model function
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+We also compared the case of using parallel optimization and not using parallel optimization. A two-dimensional multimodal Gaussian Mixed distribution is used to simulate, the following is our result:
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - 
+     - concurrency=80
+     - concurrency=60
+     - concurrency=40
+     - concurrency=20
+     - concurrency=10
+   * - Without parallel optimization
+     - avg =  0.4841 :raw-html:`<br>` var =  0.1953
+     - avg =  0.5155 :raw-html:`<br>` var =  0.2219
+     - avg =  0.5773 :raw-html:`<br>` var =  0.2570
+     - avg =  0.4680 :raw-html:`<br>` var =  0.1994
+     - avg = 0.2774 :raw-html:`<br>` var = 0.1217
+   * - With parallel optimization
+     - avg =  0.2132 :raw-html:`<br>` var = 0.0700
+     - avg =  0.2177\ :raw-html:`<br>`\ var =  0.0796
+     - avg =  0.1835 :raw-html:`<br>` var =  0.0533
+     - avg =  0.1671 :raw-html:`<br>` var =  0.0413
+     - avg =  0.1918 :raw-html:`<br>` var =  0.0697
+
+
+Note: The total number of samples per test is 240 (ensure that the budget is equal). The trials in each form were repeated 1000 times, the value is the average and variance of the best results in 1000 trials.
+
+References
+----------
+
+[1] James Bergstra, Remi Bardenet, Yoshua Bengio, Balazs Kegl. "Algorithms for Hyper-Parameter Optimization". `Link <https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf>`__
+
+[2] Meng-Hiot Lim, Yew-Soon Ong. "Computational Intelligence in Expensive Optimization Problems". `Link <https://link.springer.com/content/pdf/10.1007%2F978-3-642-10701-6.pdf>`__
+
+[3] M. Jordan, J. Kleinberg, B. Scho¨lkopf. "Pattern Recognition and Machine Learning". `Link <http://users.isr.ist.utl.pt/~wurmd/Livros/school/Bishop%20-%20Pattern%20Recognition%20And%20Machine%20Learning%20-%20Springer%20%202006.pdf>`__
diff --git a/docs/en_US/CommunitySharings/RecommendersSvd.rst b/docs/en_US/CommunitySharings/RecommendersSvd.rst
new file mode 100644
index 0000000000..5c90b2b507
--- /dev/null
+++ b/docs/en_US/CommunitySharings/RecommendersSvd.rst
@@ -0,0 +1,15 @@
+Automatically tuning SVD (NNI in Recommenders)
+==============================================
+
+In this tutorial, we first introduce a github repo `Recommenders <https://github.com/Microsoft/Recommenders>`__. It is a repository that provides examples and best practices for building recommendation systems, provided as Jupyter notebooks. It has various models that are popular and widely deployed in recommendation systems. To provide a complete end-to-end experience, they present each example in five key tasks, as shown below:
+
+
+* `Prepare Data <https://github.com/Microsoft/Recommenders/blob/master/notebooks/01_prepare_data/README.rst>`__\ : Preparing and loading data for each recommender algorithm.
+* `Model <https://github.com/Microsoft/Recommenders/blob/master/notebooks/02_model/README.rst>`__\ : Building models using various classical and deep learning recommender algorithms such as Alternating Least Squares (\ `ALS <https://spark.apache.org/docs/latest/api/python/_modules/pyspark/ml/recommendation.html#ALS>`__\ ) or eXtreme Deep Factorization Machines (\ `xDeepFM <https://arxiv.org/abs/1803.05170>`__\ ).
+* `Evaluate <https://github.com/Microsoft/Recommenders/blob/master/notebooks/03_evaluate/README.rst>`__\ : Evaluating algorithms with offline metrics.
+* `Model Select and Optimize <https://github.com/Microsoft/Recommenders/blob/master/notebooks/04_model_select_and_optimize/README.rst>`__\ : Tuning and optimizing hyperparameters for recommender models.
+* `Operationalize <https://github.com/Microsoft/Recommenders/blob/master/notebooks/05_operationalize/README.rst>`__\ : Operationalizing models in a production environment on Azure.
+
+The fourth task is tuning and optimizing the model's hyperparameters, this is where NNI could help. To give a concrete example that NNI tunes the models in Recommenders, let's demonstrate with the model `SVD <https://github.com/Microsoft/Recommenders/blob/master/notebooks/02_model/surprise_svd_deep_dive.ipynb>`__\ , and data Movielens100k. There are more than 10 hyperparameters to be tuned in this model.
+
+`This Jupyter notebook <https://github.com/Microsoft/Recommenders/blob/master/notebooks/04_model_select_and_optimize/nni_surprise_svd.ipynb>`__ provided by Recommenders is a very detailed step-by-step tutorial for this example. It uses different built-in tuning algorithms in NNI, including ``Annealing``\ , ``SMAC``\ , ``Random Search``\ , ``TPE``\ , ``Hyperband``\ , ``Metis`` and ``Evolution``. Finally, the results of different tuning algorithms are compared. Please go through this notebook to learn how to use NNI to tune SVD model, then you could further use NNI to tune other models in Recommenders.
diff --git a/docs/en_US/CommunitySharings/SptagAutoTune.rst b/docs/en_US/CommunitySharings/SptagAutoTune.rst
new file mode 100644
index 0000000000..6f6e8df601
--- /dev/null
+++ b/docs/en_US/CommunitySharings/SptagAutoTune.rst
@@ -0,0 +1,9 @@
+Automatically tuning SPTAG with NNI
+===================================
+
+`SPTAG <https://github.com/microsoft/SPTAG>`__ (Space Partition Tree And Graph) is a library for large scale vector approximate nearest neighbor search scenario released by `Microsoft Research (MSR) <https://www.msra.cn/>`__ and `Microsoft Bing <https://www.bing.com/>`__.
+
+This library assumes that the samples are represented as vectors and that the vectors can be compared by L2 distances or cosine distances. Vectors returned for a query vector are the vectors that have smallest L2 distance or cosine distances with the query vector.
+SPTAG provides two methods: kd-tree and relative neighborhood graph (SPTAG-KDT) and balanced k-means tree and relative neighborhood graph (SPTAG-BKT). SPTAG-KDT is advantageous in index building cost, and SPTAG-BKT is advantageous in search accuracy in very high-dimensional data.
+
+In SPTAG, there are tens of parameters that can be tuned for specified scenarios or datasets. NNI is a great tool for automatically tuning those parameters. The authors of SPTAG tried NNI for the auto tuning and found good-performing parameters easily, thus, they shared the practice of tuning SPTAG on NNI in their document `here <https://github.com/microsoft/SPTAG/blob/master/docs/Parameters.rst>`__. Please refer to it for detailed tutorial.
diff --git a/docs/en_US/Compression/AutoPruningUsingTuners.rst b/docs/en_US/Compression/AutoPruningUsingTuners.rst
new file mode 100644
index 0000000000..abda796614
--- /dev/null
+++ b/docs/en_US/Compression/AutoPruningUsingTuners.rst
@@ -0,0 +1,121 @@
+Automatic Model Pruning using NNI Tuners
+========================================
+
+It's convenient to implement auto model pruning with NNI compression and NNI tuners
+
+First, model compression with NNI
+---------------------------------
+
+You can easily compress a model with NNI compression. Take pruning for example, you can prune a pretrained model with LevelPruner like this
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.pruning import LevelPruner
+   config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
+   pruner = LevelPruner(model, config_list)
+   pruner.compress()
+
+The 'default' op_type stands for the module types defined in :githublink:`default_layers.py <src/sdk/pynni/nni/compression/pytorch/default_layers.py>` for pytorch.
+
+Therefore ``{ 'sparsity': 0.8, 'op_types': ['default'] }``\ means that **all layers with specified op_types will be compressed with the same 0.8 sparsity**. When ``pruner.compress()`` called, the model is compressed with masks and after that you can normally fine tune this model and **pruned weights won't be updated** which have been masked.
+
+Then, make this automatic
+-------------------------
+
+The previous example manually choosed LevelPruner and pruned all layers with the same sparsity, this is obviously sub-optimal because different layers may have different redundancy. Layer sparsity should be carefully tuned to achieve least model performance degradation and this can be done with NNI tuners.
+
+The first thing we need to do is to design a search space, here we use a nested search space which contains  choosing pruning algorithm and optimizing layer sparsity.
+
+.. code-block:: json
+
+   {
+     "prune_method": {
+       "_type": "choice",
+       "_value": [
+         {
+           "_name": "agp",
+           "conv0_sparsity": {
+             "_type": "uniform",
+             "_value": [
+               0.1,
+               0.9
+             ]
+           },
+           "conv1_sparsity": {
+             "_type": "uniform",
+             "_value": [
+               0.1,
+               0.9
+             ]
+           },
+         },
+         {
+           "_name": "level",
+           "conv0_sparsity": {
+             "_type": "uniform",
+             "_value": [
+               0.1,
+               0.9
+             ]
+           },
+           "conv1_sparsity": {
+             "_type": "uniform",
+             "_value": [
+               0.01,
+               0.9
+             ]
+           },
+         }
+       ]
+     }
+   }
+
+Then we need to modify our codes for few lines
+
+.. code-block:: python
+
+   import nni
+   from nni.algorithms.compression.pytorch.pruning import *
+   params = nni.get_parameters()
+   conv0_sparsity = params['prune_method']['conv0_sparsity']
+   conv1_sparsity = params['prune_method']['conv1_sparsity']
+   # these raw sparsity should be scaled if you need total sparsity constrained
+   config_list_level = [{ 'sparsity': conv0_sparsity, 'op_name': 'conv0' },
+                        { 'sparsity': conv1_sparsity, 'op_name': 'conv1' }]
+   config_list_agp = [{'initial_sparsity': 0, 'final_sparsity': conv0_sparsity,
+                       'start_epoch': 0, 'end_epoch': 3,
+                       'frequency': 1,'op_name': 'conv0' },
+                      {'initial_sparsity': 0, 'final_sparsity': conv1_sparsity,
+                       'start_epoch': 0, 'end_epoch': 3,
+                       'frequency': 1,'op_name': 'conv1' },]
+   PRUNERS = {'level':LevelPruner(model, config_list_level), 'agp':AGPPruner(model, config_list_agp)}
+   pruner = PRUNERS(params['prune_method']['_name'])
+   pruner.compress()
+   ... # fine tuning
+   acc = evaluate(model) # evaluation
+   nni.report_final_results(acc)
+
+Last, define our task and automatically tuning pruning methods with layers sparsity
+
+.. code-block:: yaml
+
+   authorName: default
+   experimentName: Auto_Compression
+   trialConcurrency: 2
+   maxExecDuration: 100h
+   maxTrialNum: 500
+   #choice: local, remote, pai
+   trainingServicePlatform: local
+   #choice: true, false
+   useAnnotation: False
+   searchSpacePath: search_space.json
+   tuner:
+     #choice: TPE, Random, Anneal...
+     builtinTunerName: TPE
+     classArgs:
+       #choice: maximize, minimize
+       optimize_mode: maximize
+   trial:
+     command: bash run_prune.sh
+     codeDir: .
+     gpuNum: 1
diff --git a/docs/en_US/Compression/CompressionReference.rst b/docs/en_US/Compression/CompressionReference.rst
new file mode 100644
index 0000000000..0ead9cec81
--- /dev/null
+++ b/docs/en_US/Compression/CompressionReference.rst
@@ -0,0 +1,33 @@
+Python API Reference of Compression Utilities
+=============================================
+
+.. contents::
+
+Sensitivity Utilities
+---------------------
+
+..  autoclass:: nni.compression.pytorch.utils.sensitivity_analysis.SensitivityAnalysis
+    :members:
+
+Topology Utilities
+------------------
+
+..  autoclass:: nni.compression.pytorch.utils.shape_dependency.ChannelDependency
+    :members:
+
+..  autoclass:: nni.compression.pytorch.utils.shape_dependency.GroupDependency
+    :members:
+
+..  autoclass:: nni.compression.pytorch.utils.mask_conflict.CatMaskPadding
+    :members:
+
+..  autoclass:: nni.compression.pytorch.utils.mask_conflict.GroupMaskConflict
+    :members:
+
+..  autoclass:: nni.compression.pytorch.utils.mask_conflict.ChannelMaskConflict
+    :members:
+
+Model FLOPs/Parameters Counter
+------------------------------
+
+..  autofunction:: nni.compression.pytorch.utils.counter.count_flops_params
diff --git a/docs/en_US/Compression/CompressionUtils.rst b/docs/en_US/Compression/CompressionUtils.rst
new file mode 100644
index 0000000000..c56d80d085
--- /dev/null
+++ b/docs/en_US/Compression/CompressionUtils.rst
@@ -0,0 +1,175 @@
+Analysis Utils for Model Compression
+====================================
+
+.. contents::
+
+We provide several easy-to-use tools for users to analyze their model during model compression.
+
+Sensitivity Analysis
+--------------------
+
+First, we provide a sensitivity analysis tool (\ **SensitivityAnalysis**\ ) for users to analyze the sensitivity of each convolutional layer in their model. Specifically, the SensitiviyAnalysis gradually prune each layer of the model, and test the accuracy of the model at the same time. Note that, SensitivityAnalysis only prunes a layer once a time, and the other layers are set to their original weights. According to the accuracies of different convolutional layers under different sparsities, we can easily find out which layers the model accuracy is more sensitive to. 
+
+Usage
+^^^^^
+
+The following codes show the basic usage of the SensitivityAnalysis.
+
+.. code-block:: python
+
+   from nni.compression.pytorch.utils.sensitivity_analysis import SensitivityAnalysis
+
+   def val(model):
+       model.eval()
+       total = 0
+       correct = 0
+       with torch.no_grad():
+           for batchid, (data, label) in enumerate(val_loader):
+               data, label = data.cuda(), label.cuda()
+               out = model(data)
+               _, predicted = out.max(1)
+               total += data.size(0)
+               correct += predicted.eq(label).sum().item()
+       return correct / total
+
+   s_analyzer = SensitivityAnalysis(model=net, val_func=val)
+   sensitivity = s_analyzer.analysis(val_args=[net])
+   os.makedir(outdir)
+   s_analyzer.export(os.path.join(outdir, filename))
+
+Two key parameters of SensitivityAnalysis are ``model``\ , and ``val_func``. ``model`` is the neural network that to be analyzed and the ``val_func`` is the validation function that returns the model accuracy/loss/ or other metrics on the validation dataset. Due to different scenarios may have different ways to calculate the loss/accuracy, so users should prepare a function that returns the model accuracy/loss on the dataset and pass it to SensitivityAnalysis.
+SensitivityAnalysis can export the sensitivity results as a csv file usage is shown in the example above.
+
+Futhermore, users can specify the sparsities values used to prune for each layer by optional parameter ``sparsities``.
+
+.. code-block:: python
+
+   s_analyzer = SensitivityAnalysis(model=net, val_func=val, sparsities=[0.25, 0.5, 0.75])
+
+the SensitivityAnalysis will prune 25% 50% 75% weights gradually for each layer, and record the model's accuracy at the same time (SensitivityAnalysis only prune a layer once a time, the other layers are set to their original weights). If the sparsities is not set, SensitivityAnalysis will use the numpy.arange(0.1, 1.0, 0.1) as the default sparsity values.
+
+Users can also speed up the progress of sensitivity analysis by the early_stop_mode and early_stop_value option. By default, the SensitivityAnalysis will test the accuracy under all sparsities for each layer. In contrast, when the early_stop_mode and early_stop_value are set, the sensitivity analysis for a layer will stop, when the accuracy/loss has already met the threshold set by early_stop_value. We support four early stop modes:  minimize, maximize, dropped, raised.
+
+minimize: The analysis stops when the validation metric return by the val_func lower than ``early_stop_value``.
+
+maximize: The analysis stops when the validation metric return by the val_func larger than ``early_stop_value``.
+
+dropped: The analysis stops when the validation metric has dropped by ``early_stop_value``.
+
+raised: The analysis stops when the validation metric has raised by ``early_stop_value``.
+
+.. code-block:: python
+
+   s_analyzer = SensitivityAnalysis(model=net, val_func=val, sparsities=[0.25, 0.5, 0.75], early_stop_mode='dropped', early_stop_value=0.1)
+
+If users only want to analyze several specified convolutional layers, users can specify the target conv layers by the ``specified_layers`` in analysis function. ``specified_layers`` is a list that consists of the Pytorch module names of the conv layers. For example
+
+.. code-block:: python
+
+   sensitivity = s_analyzer.analysis(val_args=[net], specified_layers=['Conv1'])
+
+In this example, only the ``Conv1`` layer is analyzed. In addtion, users can quickly and easily achieve the analysis parallelization by launching multiple processes and assigning different conv layers of the same model to each process.
+
+Output example
+^^^^^^^^^^^^^^
+
+The following lines are the example csv file exported from SensitivityAnalysis. The first line is constructed by 'layername' and sparsity list. Here the sparsity value means how much weight SensitivityAnalysis prune for each layer. Each line below records the model accuracy when this layer is under different sparsities. Note that, due to the early_stop option, some layers may
+not have model accuracies/losses under all sparsities, for example, its accuracy drop has already exceeded the threshold set by the user.
+
+.. code-block:: bash
+
+   layername,0.05,0.1,0.2,0.3,0.4,0.5,0.7,0.85,0.95
+   features.0,0.54566,0.46308,0.06978,0.0374,0.03024,0.01512,0.00866,0.00492,0.00184
+   features.3,0.54878,0.51184,0.37978,0.19814,0.07178,0.02114,0.00438,0.00442,0.00142
+   features.6,0.55128,0.53566,0.4887,0.4167,0.31178,0.19152,0.08612,0.01258,0.00236
+   features.8,0.55696,0.54194,0.48892,0.42986,0.33048,0.2266,0.09566,0.02348,0.0056
+   features.10,0.55468,0.5394,0.49576,0.4291,0.3591,0.28138,0.14256,0.05446,0.01578
+
+Topology Analysis
+-----------------
+
+We also provide several tools for the topology analysis during the model compression. These tools are to help users compress their model better. Because of the complex topology of the network, when compressing the model, users often need to spend a lot of effort to check whether the compression configuration is reasonable. So we provide these tools for topology analysis to reduce the burden on users.
+
+ChannelDependency
+^^^^^^^^^^^^^^^^^
+
+Complicated models may have residual connection/concat operations in their models. When the user prunes these models, they need to be careful about the channel-count dependencies between the convolution layers in the model. Taking the following residual block in the resnet18 as an example. The output features of the ``layer2.0.conv2`` and ``layer2.0.downsample.0`` are added together, so the number of the output channels of ``layer2.0.conv2`` and ``layer2.0.downsample.0`` should be the same, or there may be a tensor shape conflict.
+
+
+.. image:: ../../img/channel_dependency_example.jpg
+   :target: ../../img/channel_dependency_example.jpg
+   :alt: 
+ 
+
+If the layers have channel dependency are assigned with different sparsities (here we only discuss the structured pruning by L1FilterPruner/L2FilterPruner), then there will be a shape conflict during these layers. Even the pruned model with mask works fine, the pruned model cannot be speedup to the final model directly that runs on the devices, because there will be a shape conflict when the model tries to add/concat the outputs of these layers. This tool is to find the layers that have channel count dependencies to help users better prune their model.
+
+Usage
+^^^^^
+
+.. code-block:: python
+
+   from nni.compression.pytorch.utils.shape_dependency import ChannelDependency
+   data = torch.ones(1, 3, 224, 224).cuda()
+   channel_depen = ChannelDependency(net, data)
+   channel_depen.export('dependency.csv')
+
+Output Example
+^^^^^^^^^^^^^^
+
+The following lines are the output example of torchvision.models.resnet18 exported by ChannelDependency. The layers at the same line have output channel dependencies with each other. For example, layer1.1.conv2, conv1, and layer1.0.conv2 have output channel dependencies with each other, which means the output channel(filters) numbers of these three layers should be same with each other, otherwise, the model may have shape conflict. 
+
+.. code-block:: bash
+
+   Dependency Set,Convolutional Layers
+   Set 1,layer1.1.conv2,layer1.0.conv2,conv1
+   Set 2,layer1.0.conv1
+   Set 3,layer1.1.conv1
+   Set 4,layer2.0.conv1
+   Set 5,layer2.1.conv2,layer2.0.conv2,layer2.0.downsample.0
+   Set 6,layer2.1.conv1
+   Set 7,layer3.0.conv1
+   Set 8,layer3.0.downsample.0,layer3.1.conv2,layer3.0.conv2
+   Set 9,layer3.1.conv1
+   Set 10,layer4.0.conv1
+   Set 11,layer4.0.downsample.0,layer4.1.conv2,layer4.0.conv2
+   Set 12,layer4.1.conv1
+
+MaskConflict
+^^^^^^^^^^^^
+
+When the masks of different layers in a model have conflict (for example, assigning different sparsities for the layers that have channel dependency), we can fix the mask conflict by MaskConflict. Specifically, the MaskConflict loads the masks exported by the pruners(L1FilterPruner, etc), and check if there is mask conflict, if so, MaskConflict sets the conflicting masks to the same value.
+
+.. code-block:: bash
+
+   from nni.compression.pytorch.utils.mask_conflict import fix_mask_conflict
+   fixed_mask = fix_mask_conflict('./resnet18_mask', net, data)
+
+Model FLOPs/Parameters Counter
+------------------------------
+
+We provide a model counter for calculating the model FLOPs and parameters. This counter supports calculating FLOPs/parameters of a normal model without masks, it can also calculates FLOPs/parameters of a model with mask wrappers, which helps users easily check model complexity during model compression on NNI. Note that, for sturctured pruning, we only identify the remained filters according to its mask, which not taking the pruned input channels into consideration, so the calculated FLOPs will be larger than real number (i.e., the number calculated after Model Speedup). 
+
+We support two modes to collect information of modules. The first mode is ``default``\ , which only collect the information of convolution and linear. The second mode is ``full``\ , which also collect the information of other operations. Users can easily use our collected ``results`` for futher analysis.
+
+Usage
+^^^^^
+
+.. code-block:: python
+
+   from nni.compression.pytorch.utils.counter import count_flops_params
+
+   # Given input size (1, 1, 28, 28)
+   flops, params, results = count_flops_params(model, (1, 1, 28, 28)) 
+
+   # Given input tensor with size (1, 1, 28, 28) and switch to full mode
+   x = torch.randn(1, 1, 28, 28)
+
+   flops, params, results = count_flops_params(model, (x,) mode='full') # tuple of tensor as input
+
+   # Format output size to M (i.e., 10^6)
+   print(f'FLOPs: {flops/1e6:.3f}M,  Params: {params/1e6:.3f}M)
+   print(results)
+   {
+   'conv': {'flops': [60], 'params': [20], 'weight_size': [(5, 3, 1, 1)], 'input_size': [(1, 3, 2, 2)], 'output_size': [(1, 5, 2, 2)], 'module_type': ['Conv2d']}, 
+   'conv2': {'flops': [100], 'params': [30], 'weight_size': [(5, 5, 1, 1)], 'input_size': [(1, 5, 2, 2)], 'output_size': [(1, 5, 2, 2)], 'module_type': ['Conv2d']}
+   }
diff --git a/docs/en_US/Compression/CustomizeCompressor.rst b/docs/en_US/Compression/CustomizeCompressor.rst
new file mode 100644
index 0000000000..7457439c9c
--- /dev/null
+++ b/docs/en_US/Compression/CustomizeCompressor.rst
@@ -0,0 +1,179 @@
+Customize New Compression Algorithm
+===================================
+
+.. contents::
+
+In order to simplify the process of writing new compression algorithms, we have designed simple and flexible programming interface, which covers pruning and quantization. Below, we first demonstrate how to customize a new pruning algorithm and then demonstrate how to customize a new quantization algorithm.
+
+**Important Note** To better understand how to customize new pruning/quantization algorithms, users should first understand the framework that supports various pruning algorithms in NNI. Reference `Framework overview of model compression </Compression/Framework.html>`__
+
+Customize a new pruning algorithm
+---------------------------------
+
+Implementing a new pruning algorithm requires implementing a ``weight masker`` class which shoud be a subclass of ``WeightMasker``\ , and a ``pruner`` class, which should be a subclass ``Pruner``.
+
+An implementation of ``weight masker`` may look like this:
+
+.. code-block:: python
+
+   class MyMasker(WeightMasker):
+       def __init__(self, model, pruner):
+           super().__init__(model, pruner)
+           # You can do some initialization here, such as collecting some statistics data
+           # if it is necessary for your algorithms to calculate the masks.
+
+       def calc_mask(self, sparsity, wrapper, wrapper_idx=None):
+           # calculate the masks based on the wrapper.weight, and sparsity, 
+           # and anything else
+           # mask = ...
+           return {'weight_mask': mask}
+
+You can reference nni provided :githublink:`weight masker <src/sdk/pynni/nni/compression/pytorch/pruning/structured_pruning.py>` implementations to implement your own weight masker.
+
+A basic ``pruner`` looks likes this:
+
+.. code-block:: python
+
+   class MyPruner(Pruner):
+       def __init__(self, model, config_list, optimizer):
+           super().__init__(model, config_list, optimizer)
+           self.set_wrappers_attribute("if_calculated", False)
+           # construct a weight masker instance
+           self.masker = MyMasker(model, self)
+
+       def calc_mask(self, wrapper, wrapper_idx=None):
+           sparsity = wrapper.config['sparsity']
+           if wrapper.if_calculated:
+               # Already pruned, do not prune again as a one-shot pruner
+               return None
+           else:
+               # call your masker to actually calcuate the mask for this layer
+               masks = self.masker.calc_mask(sparsity=sparsity, wrapper=wrapper, wrapper_idx=wrapper_idx)
+               wrapper.if_calculated = True
+               return masks
+
+Reference nni provided :githublink:`pruner <src/sdk/pynni/nni/compression/pytorch/pruning/one_shot.py>` implementations to implement your own pruner class.
+
+----
+
+Customize a new quantization algorithm
+--------------------------------------
+
+To write a new quantization algorithm, you can write a class that inherits ``nni.compression.pytorch.Quantizer``. Then, override the member functions with the logic of your algorithm. The member function to override is ``quantize_weight``. ``quantize_weight`` directly returns the quantized weights rather than mask, because for quantization the quantized weights cannot be obtained by applying mask.
+
+.. code-block:: python
+
+   from nni.compression.pytorch import Quantizer
+
+   class YourQuantizer(Quantizer):
+       def __init__(self, model, config_list):
+           """
+           Suggest you to use the NNI defined spec for config
+           """
+           super().__init__(model, config_list)
+
+       def quantize_weight(self, weight, config, **kwargs):
+           """
+           quantize should overload this method to quantize weight tensors.
+           This method is effectively hooked to :meth:`forward` of the model.
+
+           Parameters
+           ----------
+           weight : Tensor
+               weight that needs to be quantized
+           config : dict
+               the configuration for weight quantization
+           """
+
+           # Put your code to generate `new_weight` here
+
+           return new_weight
+
+       def quantize_output(self, output, config, **kwargs):
+           """
+           quantize should overload this method to quantize output.
+           This method is effectively hooked to `:meth:`forward` of the model.
+
+           Parameters
+           ----------
+           output : Tensor
+               output that needs to be quantized
+           config : dict
+               the configuration for output quantization
+           """
+
+           # Put your code to generate `new_output` here
+
+           return new_output
+
+       def quantize_input(self, *inputs, config, **kwargs):
+           """
+           quantize should overload this method to quantize input.
+           This method is effectively hooked to :meth:`forward` of the model.
+
+           Parameters
+           ----------
+           inputs : Tensor
+               inputs that needs to be quantized
+           config : dict
+               the configuration for inputs quantization
+           """
+
+           # Put your code to generate `new_input` here
+
+           return new_input
+
+       def update_epoch(self, epoch_num):
+           pass
+
+       def step(self):
+           """
+           Can do some processing based on the model or weights binded
+           in the func bind_model
+           """
+           pass
+
+Customize backward function
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Sometimes it's necessary for a quantization operation to have a customized backward function, such as `Straight-Through Estimator <https://stackoverflow.com/questions/38361314/the-concept-of-straight-through-estimator-ste>`__\ , user can customize a backward function as follow:
+
+.. code-block:: python
+
+   from nni.compression.pytorch.compressor import Quantizer, QuantGrad, QuantType
+
+   class ClipGrad(QuantGrad):
+       @staticmethod
+       def quant_backward(tensor, grad_output, quant_type):
+           """
+           This method should be overrided by subclass to provide customized backward function,
+           default implementation is Straight-Through Estimator
+           Parameters
+           ----------
+           tensor : Tensor
+               input of quantization operation
+           grad_output : Tensor
+               gradient of the output of quantization operation
+           quant_type : QuantType
+               the type of quantization, it can be `QuantType.QUANT_INPUT`, `QuantType.QUANT_WEIGHT`, `QuantType.QUANT_OUTPUT`,
+               you can define different behavior for different types.
+           Returns
+           -------
+           tensor
+               gradient of the input of quantization operation
+           """
+
+           # for quant_output function, set grad to zero if the absolute value of tensor is larger than 1
+           if quant_type == QuantType.QUANT_OUTPUT: 
+               grad_output[torch.abs(tensor) > 1] = 0
+           return grad_output
+
+
+   class YourQuantizer(Quantizer):
+       def __init__(self, model, config_list):
+           super().__init__(model, config_list)
+           # set your customized backward function to overwrite default backward function
+           self.quant_grad = ClipGrad
+
+If you do not customize ``QuantGrad``\ , the default backward is Straight-Through Estimator. 
+*Coming Soon* ...
diff --git a/docs/en_US/Compression/DependencyAware.rst b/docs/en_US/Compression/DependencyAware.rst
new file mode 100644
index 0000000000..5001ca7430
--- /dev/null
+++ b/docs/en_US/Compression/DependencyAware.rst
@@ -0,0 +1,77 @@
+Dependency-aware Mode for Filter Pruning
+========================================
+
+Currently, we have several filter pruning algorithm for the convolutional layers: FPGM Pruner, L1Filter Pruner, L2Filter Pruner, Activation APoZ Rank Filter Pruner, Activation Mean Rank Filter Pruner, Taylor FO On Weight Pruner. In these filter pruning algorithms, the pruner will prune each convolutional layer separately. While pruning a convolution layer, the algorithm will quantify the importance of each filter based on some specific rules(such as l1-norm), and prune the less important filters.
+
+As `dependency analysis utils <./CompressionUtils.md>`__ shows, if the output channels of two convolutional layers(conv1, conv2) are added together, then these two conv layers have channel dependency with each other(more details please see `Compression Utils <./CompressionUtils.rst>`__\ ). Take the following figure as an example.
+
+
+.. image:: ../../img/mask_conflict.jpg
+   :target: ../../img/mask_conflict.jpg
+   :alt: 
+
+
+If we prune the first 50% of output channels(filters) for conv1, and prune the last 50% of output channels for conv2. Although both layers have pruned 50% of the filters, the speedup module still needs to add zeros to align the output channels. In this case, we cannot harvest the speed benefit from the model pruning.
+
+ To better gain the speed benefit of the model pruning, we add a dependency-aware mode for the Filter Pruner. In the dependency-aware mode, the pruner prunes the model not only based on the l1 norm of each filter, but also the topology of the whole network architecture.
+
+In the dependency-aware mode(\ ``dependency_aware`` is set ``True``\ ), the pruner will try to prune the same output channels for the layers that have the channel dependencies with each other, as shown in the following figure.
+
+
+.. image:: ../../img/dependency-aware.jpg
+   :target: ../../img/dependency-aware.jpg
+   :alt: 
+
+
+Take the dependency-aware mode of L1Filter Pruner as an example. Specifically, the pruner will calculate the L1 norm (for example) sum of all the layers in the dependency set for each channel. Obviously, the number of channels that can actually be pruned of this dependency set in the end is determined by the minimum sparsity of layers in this dependency set(denoted by ``min_sparsity``\ ). According to the L1 norm sum of each channel, the pruner will prune the same ``min_sparsity`` channels for all the layers. Next, the pruner will additionally prune ``sparsity`` - ``min_sparsity`` channels for each convolutional layer based on its own L1 norm of each channel. For example, suppose the output channels of ``conv1`` , ``conv2`` are added together and the configured sparsities of ``conv1`` and ``conv2`` are 0.3, 0.2 respectively. In this case, the ``dependency-aware pruner`` will 
+
+.. code-block:: bash
+
+   - First, prune the same 20% of channels for `conv1` and `conv2` according to L1 norm sum of `conv1` and `conv2`. 
+   - Second, the pruner will additionally prune 10% channels for `conv1` according to the L1 norm of each channel of `conv1`.
+
+
+In addition, for the convolutional layers that have more than one filter group, ``dependency-aware pruner`` will also try to prune the same number of the channels for each filter group. Overall, this pruner will prune the model according to the L1 norm of each filter and try to meet the topological constrains(channel dependency, etc) to improve the final speed gain after the speedup process. 
+
+In the dependency-aware mode, the pruner will provide a better speed gain from the model pruning.
+
+Usage
+-----
+
+In this section, we will show how to enable the dependency-aware mode for the filter pruner. Currently, only the one-shot pruners such as FPGM Pruner, L1Filter Pruner, L2Filter Pruner, Activation APoZ Rank Filter Pruner, Activation Mean Rank Filter Pruner, Taylor FO On Weight Pruner, support the dependency-aware mode.
+
+To enable the dependency-aware mode for ``L1FilterPruner``\ :
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.pruning import L1FilterPruner
+   config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'] }]
+   # dummy_input is necessary for the dependency_aware mode
+   dummy_input = torch.ones(1, 3, 224, 224).cuda()
+   pruner = L1FilterPruner(model, config_list, dependency_aware=True, dummy_input=dummy_input)
+   # for L2FilterPruner
+   # pruner = L2FilterPruner(model, config_list, dependency_aware=True, dummy_input=dummy_input)
+   # for FPGMPruner
+   # pruner = FPGMPruner(model, config_list, dependency_aware=True, dummy_input=dummy_input)
+   # for ActivationAPoZRankFilterPruner
+   # pruner = ActivationAPoZRankFilterPruner(model, config_list, statistics_batch_num=1, , dependency_aware=True, dummy_input=dummy_input)
+   # for ActivationMeanRankFilterPruner
+   # pruner = ActivationMeanRankFilterPruner(model, config_list, statistics_batch_num=1, dependency_aware=True, dummy_input=dummy_input)
+   # for TaylorFOWeightFilterPruner
+   # pruner = TaylorFOWeightFilterPruner(model, config_list, statistics_batch_num=1, dependency_aware=True, dummy_input=dummy_input)
+
+   pruner.compress()
+
+Evaluation
+----------
+
+In order to compare the performance of the pruner with or without the dependency-aware mode, we use L1FilterPruner to prune the Mobilenet_v2 separately when the dependency-aware mode is turned on and off. To simplify the experiment, we use the uniform pruning which means we allocate the same sparsity for all convolutional layers in the model.
+We trained a Mobilenet_v2 model on the cifar10 dataset and prune the model based on this pretrained checkpoint. The following figure shows the accuracy and FLOPs of the model pruned by different pruners.
+
+
+.. image:: ../../img/mobilev2_l1_cifar.jpg
+   :target: ../../img/mobilev2_l1_cifar.jpg
+   :alt: 
+
+
+In the figure, the ``Dependency-aware`` represents the L1FilterPruner with dependency-aware mode enabled. ``L1 Filter`` is the normal ``L1FilterPruner`` without the dependency-aware mode, and the ``No-Dependency`` means  pruner only prunes the layers that has no channel dependency with other layers. As we can see in the figure, when the dependency-aware mode enabled, the pruner can bring higher accuracy under the same Flops.
diff --git a/docs/en_US/Compression/Framework.rst b/docs/en_US/Compression/Framework.rst
new file mode 100644
index 0000000000..fa46b60230
--- /dev/null
+++ b/docs/en_US/Compression/Framework.rst
@@ -0,0 +1,209 @@
+Framework overview of model compression
+=======================================
+
+.. contents::
+
+Below picture shows the components overview of model compression framework.
+
+
+.. image:: ../../img/compressor_framework.jpg
+   :target: ../../img/compressor_framework.jpg
+   :alt: 
+
+
+There are 3 major components/classes in NNI model compression framework: ``Compressor``\ , ``Pruner`` and ``Quantizer``. Let's look at them in detail one by one:
+
+Compressor
+----------
+
+Compressor is the base class for pruner and quntizer, it provides a unified interface for pruner and quantizer for end users, so that pruner and quantizer can be used in the same way. For example, to use a pruner:
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.pruning import LevelPruner
+
+   # load a pretrained model or train a model before using a pruner
+
+   configure_list = [{
+       'sparsity': 0.7,
+       'op_types': ['Conv2d', 'Linear'],
+   }]
+
+   optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)
+   pruner = LevelPruner(model, configure_list, optimizer)
+   model = pruner.compress()
+
+   # model is ready for pruning, now start finetune the model,
+   # the model will be pruned during training automatically
+
+To use a quantizer:
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.pruning import DoReFaQuantizer
+
+   configure_list = [{
+       'quant_types': ['weight'],
+       'quant_bits': {
+           'weight': 8,
+       },
+       'op_types':['Conv2d', 'Linear']
+   }]
+   optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)
+   quantizer = DoReFaQuantizer(model, configure_list, optimizer)
+   quantizer.compress()
+
+View :githublink:`example code <examples/model_compress>` for more information.
+
+``Compressor`` class provides some utility methods for subclass and users:
+
+Set wrapper attribute
+^^^^^^^^^^^^^^^^^^^^^
+
+Sometimes ``calc_mask`` must save some state data, therefore users can use ``set_wrappers_attribute`` API to register attribute just like how buffers are registered in PyTorch modules. These buffers will be registered to ``module wrapper``. Users can access these buffers through ``module wrapper``.
+In above example, we use ``set_wrappers_attribute`` to set a buffer ``if_calculated`` which is used as flag indicating if the mask of a layer is already calculated.
+
+Collect data during forward
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Sometimes users want to collect some data during the modules' forward method, for example, the mean value of the activation. This can be done by adding a customized collector to module.
+
+.. code-block:: python
+
+   class MyMasker(WeightMasker):
+       def __init__(self, model, pruner):
+           super().__init__(model, pruner)
+           # Set attribute `collected_activation` for all wrappers to store
+           # activations for each layer
+           self.pruner.set_wrappers_attribute("collected_activation", [])
+           self.activation = torch.nn.functional.relu
+
+           def collector(wrapper, input_, output):
+               # The collected activation can be accessed via each wrapper's collected_activation
+               # attribute
+               wrapper.collected_activation.append(self.activation(output.detach().cpu()))
+
+           self.pruner.hook_id = self.pruner.add_activation_collector(collector)
+
+The collector function will be called each time the forward method runs.
+
+Users can also remove this collector like this:
+
+.. code-block:: python
+
+   # Save the collector identifier
+   collector_id = self.pruner.add_activation_collector(collector)
+
+   # When the collector is not used any more, it can be remove using
+   # the saved collector identifier
+   self.pruner.remove_activation_collector(collector_id)
+
+----
+
+Pruner
+------
+
+A pruner receives ``model``\ , ``config_list`` and ``optimizer`` as arguments. It prunes the model per the ``config_list`` during training loop by adding a hook on ``optimizer.step()``.
+
+Pruner class is a subclass of Compressor, so it contains everything in the Compressor class and some additional components only for pruning, it contains:
+
+Weight masker
+^^^^^^^^^^^^^
+
+A ``weight masker`` is the implementation of pruning algorithms, it can prune a specified layer wrapped by ``module wrapper`` with specified sparsity.
+
+Pruning module wrapper
+^^^^^^^^^^^^^^^^^^^^^^
+
+A ``pruning module wrapper`` is a module containing:
+
+
+#. the origin module
+#. some buffers used by ``calc_mask``
+#. a new forward method that applies masks before running the original forward method.
+
+the reasons to use ``module wrapper``\ :
+
+
+#. some buffers are needed by ``calc_mask`` to calculate masks and these buffers should be registered in ``module wrapper`` so that the original modules are not contaminated.
+#. a new ``forward`` method is needed to apply masks to weight before calling the real ``forward`` method.
+
+Pruning hook
+^^^^^^^^^^^^
+
+A pruning hook is installed on a pruner when the pruner is constructed, it is used to call pruner's calc_mask method at ``optimizer.step()`` is invoked.
+
+----
+
+Quantizer
+---------
+
+Quantizer class is also a subclass of ``Compressor``\ , it is used to compress models by reducing the number of bits required to represent weights or activations, which can reduce the computations and the inference time. It contains:
+
+Quantization module wrapper
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Each module/layer of the model to be quantized is wrapped by a quantization module wrapper, it provides a new ``forward`` method to quantize the original module's weight, input and output.
+
+Quantization hook
+^^^^^^^^^^^^^^^^^
+
+A quantization hook is installed on a quntizer when it is constructed, it is call at ``optimizer.step()``.
+
+Quantization methods
+^^^^^^^^^^^^^^^^^^^^
+
+``Quantizer`` class provides following methods for subclass to implement quantization algorithms:
+
+.. code-block:: python
+
+   class Quantizer(Compressor):
+       """
+       Base quantizer for pytorch quantizer
+       """
+       def quantize_weight(self, weight, wrapper, **kwargs):
+           """
+           quantize should overload this method to quantize weight.
+           This method is effectively hooked to :meth:`forward` of the model.
+           Parameters
+           ----------
+           weight : Tensor
+               weight that needs to be quantized
+           wrapper : QuantizerModuleWrapper
+               the wrapper for origin module
+           """
+           raise NotImplementedError('Quantizer must overload quantize_weight()')
+
+       def quantize_output(self, output, wrapper, **kwargs):
+           """
+           quantize should overload this method to quantize output.
+           This method is effectively hooked to :meth:`forward` of the model.
+           Parameters
+           ----------
+           output : Tensor
+               output that needs to be quantized
+           wrapper : QuantizerModuleWrapper
+               the wrapper for origin module
+           """
+           raise NotImplementedError('Quantizer must overload quantize_output()')
+
+       def quantize_input(self, *inputs, wrapper, **kwargs):
+           """
+           quantize should overload this method to quantize input.
+           This method is effectively hooked to :meth:`forward` of the model.
+           Parameters
+           ----------
+           inputs : Tensor
+               inputs that needs to be quantized
+           wrapper : QuantizerModuleWrapper
+               the wrapper for origin module
+           """
+           raise NotImplementedError('Quantizer must overload quantize_input()')
+
+----
+
+Multi-GPU support
+-----------------
+
+On multi-GPU training, buffers and parameters are copied to multiple GPU every time the ``forward`` method runs on multiple GPU. If buffers and parameters are updated in the ``forward`` method, an ``in-place`` update is needed to ensure the update is effective.
+Since ``calc_mask`` is called in the ``optimizer.step`` method, which happens after the ``forward`` method and happens only on one GPU, it supports multi-GPU naturally.
diff --git a/docs/en_US/Compression/ModelSpeedup.rst b/docs/en_US/Compression/ModelSpeedup.rst
new file mode 100644
index 0000000000..ed7ec2a78b
--- /dev/null
+++ b/docs/en_US/Compression/ModelSpeedup.rst
@@ -0,0 +1,190 @@
+Speed up Masked Model
+=====================
+
+*This feature is in Beta version.*
+
+Introduction
+------------
+
+Pruning algorithms usually use weight masks to simulate the real pruning. Masks can be used
+to check model performance of a specific pruning (or sparsity), but there is no real speedup.
+Since model speedup is the ultimate goal of model pruning, we try to provide a tool to users
+to convert a model to a smaller one based on user provided masks (the masks come from the
+pruning algorithms).
+
+There are two types of pruning. One is fine-grained pruning, it does not change the shape of weights, and input/output tensors. Sparse kernel is required to speed up a fine-grained pruned layer. The other is coarse-grained pruning (e.g., channels), shape of weights and input/output tensors usually change due to such pruning. To speed up this kind of pruning, there is no need to use sparse kernel, just replace the pruned layer with smaller one. Since the support of sparse kernels in community is limited, we only support the speedup of coarse-grained pruning and leave the support of fine-grained pruning in future.
+
+Design and Implementation
+-------------------------
+
+To speed up a model, the pruned layers should be replaced, either replaced with smaller layer for coarse-grained mask, or replaced with sparse kernel for fine-grained mask. Coarse-grained mask usually changes the shape of weights or input/output tensors, thus, we should do shape inference to check are there other unpruned layers should be replaced as well due to shape change. Therefore, in our design, there are two main steps: first, do shape inference to find out all the modules that should be replaced; second, replace the modules. The first step requires topology (i.e., connections) of the model, we use ``jit.trace`` to obtain the model graph for PyTorch.
+
+For each module, we should prepare four functions, three for shape inference and one for module replacement. The three shape inference functions are: given weight shape infer input/output shape, given input shape infer weight/output shape, given output shape infer weight/input shape. The module replacement function returns a newly created module which is smaller.
+
+Usage
+-----
+
+.. code-block:: python
+
+   from nni.compression.pytorch import ModelSpeedup
+   # model: the model you want to speed up
+   # dummy_input: dummy input of the model, given to `jit.trace`
+   # masks_file: the mask file created by pruning algorithms
+   m_speedup = ModelSpeedup(model, dummy_input.to(device), masks_file)
+   m_speedup.speedup_model()
+   dummy_input = dummy_input.to(device)
+   start = time.time()
+   out = model(dummy_input)
+   print('elapsed time: ', time.time() - start)
+
+For complete examples please refer to :githublink:`the code <examples/model_compress/model_speedup.py>`
+
+NOTE: The current implementation supports PyTorch 1.3.1 or newer.
+
+Limitations
+-----------
+
+Since every module requires four functions for shape inference and module replacement, this is a large amount of work, we only implemented the ones that are required by the examples. If you want to speed up your own model which cannot supported by the current implementation, you are welcome to contribute.
+
+For PyTorch we can only replace modules, if functions in ``forward`` should be replaced, our current implementation does not work. One workaround is make the function a PyTorch module.
+
+Speedup Results of Examples
+---------------------------
+
+The code of these experiments can be found :githublink:`here <examples/model_compress/model_speedup.py>`.
+
+slim pruner example
+^^^^^^^^^^^^^^^^^^^
+
+on one V100 GPU,
+input tensor: ``torch.randn(64, 3, 32, 32)``
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Times
+     - Mask Latency
+     - Speedup Latency
+   * - 1
+     - 0.01197
+     - 0.005107
+   * - 2
+     - 0.02019
+     - 0.008769
+   * - 4
+     - 0.02733
+     - 0.014809
+   * - 8
+     - 0.04310
+     - 0.027441
+   * - 16
+     - 0.07731
+     - 0.05008
+   * - 32
+     - 0.14464
+     - 0.10027
+
+
+fpgm pruner example
+^^^^^^^^^^^^^^^^^^^
+
+on cpu,
+input tensor: ``torch.randn(64, 1, 28, 28)``\ ,
+too large variance
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Times
+     - Mask Latency
+     - Speedup Latency
+   * - 1
+     - 0.01383
+     - 0.01839
+   * - 2
+     - 0.01167
+     - 0.003558
+   * - 4
+     - 0.01636
+     - 0.01088
+   * - 40
+     - 0.14412
+     - 0.08268
+   * - 40
+     - 1.29385
+     - 0.14408
+   * - 40
+     - 0.41035
+     - 0.46162
+   * - 400
+     - 6.29020
+     - 5.82143
+
+
+l1filter pruner example
+^^^^^^^^^^^^^^^^^^^^^^^
+
+on one V100 GPU,
+input tensor: ``torch.randn(64, 3, 32, 32)``
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Times
+     - Mask Latency
+     - Speedup Latency
+   * - 1
+     - 0.01026
+     - 0.003677
+   * - 2
+     - 0.01657
+     - 0.008161
+   * - 4
+     - 0.02458
+     - 0.020018
+   * - 8
+     - 0.03498
+     - 0.025504
+   * - 16
+     - 0.06757
+     - 0.047523
+   * - 32
+     - 0.10487
+     - 0.086442
+
+
+APoZ pruner example
+^^^^^^^^^^^^^^^^^^^
+
+on one V100 GPU,
+input tensor: ``torch.randn(64, 3, 32, 32)``
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Times
+     - Mask Latency
+     - Speedup Latency
+   * - 1
+     - 0.01389
+     - 0.004208
+   * - 2
+     - 0.01628
+     - 0.008310
+   * - 4
+     - 0.02521
+     - 0.014008
+   * - 8
+     - 0.03386
+     - 0.023923
+   * - 16
+     - 0.06042
+     - 0.046183
+   * - 32
+     - 0.12421
+     - 0.087113
+
diff --git a/docs/en_US/Compression/Overview.rst b/docs/en_US/Compression/Overview.rst
new file mode 100644
index 0000000000..676d2d586f
--- /dev/null
+++ b/docs/en_US/Compression/Overview.rst
@@ -0,0 +1,118 @@
+Model Compression with NNI
+==========================
+
+.. contents::
+
+As larger neural networks with more layers and nodes are considered, reducing their storage and computational cost becomes critical, especially for some real-time applications. Model compression can be used to address this problem.
+
+NNI provides a model compression toolkit to help user compress and speed up their model with state-of-the-art compression algorithms and strategies. There are several core features supported by NNI model compression:
+
+
+* Support many popular pruning and quantization algorithms.
+* Automate model pruning and quantization process with state-of-the-art strategies and NNI's auto tuning power.
+* Speed up a compressed model to make it have lower inference latency and also make it become smaller.
+* Provide friendly and easy-to-use compression utilities for users to dive into the compression process and results.
+* Concise interface for users to customize their own compression algorithms.
+
+*Note that the interface and APIs are unified for both PyTorch and TensorFlow, currently only PyTorch version has been supported, TensorFlow version will be supported in future.*
+
+Supported Algorithms
+--------------------
+
+The algorithms include pruning algorithms and quantization algorithms.
+
+Pruning Algorithms
+^^^^^^^^^^^^^^^^^^
+
+Pruning algorithms compress the original network by removing redundant weights or channels of layers, which can reduce model complexity and address the over-ﬁtting issue.
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name
+     - Brief Introduction of Algorithm
+   * - `Level Pruner </Compression/Pruner.html#level-pruner>`__
+     - Pruning the specified ratio on each weight based on absolute values of weights
+   * - `AGP Pruner </Compression/Pruner.html#agp-pruner>`__
+     - Automated gradual pruning (To prune, or not to prune: exploring the efficacy of pruning for model compression) `Reference Paper <https://arxiv.org/abs/1710.01878>`__
+   * - `Lottery Ticket Pruner </Compression/Pruner.html#lottery-ticket-hypothesis>`__
+     - The pruning process used by "The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks". It prunes a model iteratively. `Reference Paper <https://arxiv.org/abs/1803.03635>`__
+   * - `FPGM Pruner </Compression/Pruner.html#fpgm-pruner>`__
+     - Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration `Reference Paper <https://arxiv.org/pdf/1811.00250.pdf>`__
+   * - `L1Filter Pruner </Compression/Pruner.html#l1filter-pruner>`__
+     - Pruning filters with the smallest L1 norm of weights in convolution layers (Pruning Filters for Efficient Convnets) `Reference Paper <https://arxiv.org/abs/1608.08710>`__
+   * - `L2Filter Pruner </Compression/Pruner.html#l2filter-pruner>`__
+     - Pruning filters with the smallest L2 norm of weights in convolution layers
+   * - `ActivationAPoZRankFilterPruner </Compression/Pruner.html#activationapozrankfilterpruner>`__
+     - Pruning filters based on the metric APoZ (average percentage of zeros) which measures the percentage of zeros in activations of (convolutional) layers. `Reference Paper <https://arxiv.org/abs/1607.03250>`__
+   * - `ActivationMeanRankFilterPruner </Compression/Pruner.html#activationmeanrankfilterpruner>`__
+     - Pruning filters based on the metric that calculates the smallest mean value of output activations
+   * - `Slim Pruner </Compression/Pruner.html#slim-pruner>`__
+     - Pruning channels in convolution layers by pruning scaling factors in BN layers(Learning Efficient Convolutional Networks through Network Slimming) `Reference Paper <https://arxiv.org/abs/1708.06519>`__
+   * - `TaylorFO Pruner </Compression/Pruner.html#taylorfoweightfilterpruner>`__
+     - Pruning filters based on the first order taylor expansion on weights(Importance Estimation for Neural Network Pruning) `Reference Paper <http://jankautz.com/publications/Importance4NNPruning_CVPR19.pdf>`__
+   * - `ADMM Pruner </Compression/Pruner.html#admm-pruner>`__
+     - Pruning based on ADMM optimization technique `Reference Paper <https://arxiv.org/abs/1804.03294>`__
+   * - `NetAdapt Pruner </Compression/Pruner.html#netadapt-pruner>`__
+     - Automatically simplify a pretrained network to meet the resource budget by iterative pruning  `Reference Paper <https://arxiv.org/abs/1804.03230>`__
+   * - `SimulatedAnnealing Pruner </Compression/Pruner.html#simulatedannealing-pruner>`__
+     - Automatic pruning with a guided heuristic search method, Simulated Annealing algorithm `Reference Paper <https://arxiv.org/abs/1907.03141>`__
+   * - `AutoCompress Pruner </Compression/Pruner.html#autocompress-pruner>`__
+     - Automatic pruning by iteratively call SimulatedAnnealing Pruner and ADMM Pruner `Reference Paper <https://arxiv.org/abs/1907.03141>`__
+   * - `AMC Pruner </Compression/Pruner.html#amc-pruner>`__
+     - AMC: AutoML for Model Compression and Acceleration on Mobile Devices `Reference Paper <https://arxiv.org/pdf/1802.03494.pdf>`__
+
+
+You can refer to this :githublink:`benchmark <docs/en_US/CommunitySharings/ModelCompressionComparison.rst>` for the performance of these pruners on some benchmark problems.
+
+Quantization Algorithms
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Quantization algorithms compress the original network by reducing the number of bits required to represent weights or activations, which can reduce the computations and the inference time.
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name
+     - Brief Introduction of Algorithm
+   * - `Naive Quantizer </Compression/Quantizer.html#naive-quantizer>`__
+     - Quantize weights to default 8 bits
+   * - `QAT Quantizer </Compression/Quantizer.html#qat-quantizer>`__
+     - Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference. `Reference Paper <http://openaccess.thecvf.com/content_cvpr_2018/papers/Jacob_Quantization_and_Training_CVPR_2018_paper.pdf>`__
+   * - `DoReFa Quantizer </Compression/Quantizer.html#dorefa-quantizer>`__
+     - DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients. `Reference Paper <https://arxiv.org/abs/1606.06160>`__
+   * - `BNN Quantizer </Compression/Quantizer.html#bnn-quantizer>`__
+     - Binarized Neural Networks: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1. `Reference Paper <https://arxiv.org/abs/1602.02830>`__
+
+
+Automatic Model Compression
+---------------------------
+
+Given targeted compression ratio, it is pretty hard to obtain the best compressed ratio in a one shot manner. An automatic model compression algorithm usually need to explore the compression space by compressing different layers with different sparsities. NNI provides such algorithms to free users from specifying sparsity of each layer in a model. Moreover, users could leverage NNI's auto tuning power to automatically compress a model. Detailed document can be found `here <./AutoPruningUsingTuners.rst>`__.
+
+Model Speedup
+-------------
+
+The final goal of model compression is to reduce inference latency and model size. However, existing model compression algorithms mainly use simulation to check the performance (e.g., accuracy) of compressed model, for example, using masks for pruning algorithms, and storing quantized values still in float32 for quantization algorithms. Given the output masks and quantization bits produced by those algorithms, NNI can really speed up the model. The detailed tutorial of Model Speedup can be found `here <./ModelSpeedup.rst>`__.
+
+Compression Utilities
+---------------------
+
+Compression utilities include some useful tools for users to understand and analyze the model they want to compress. For example, users could check sensitivity of each layer to pruning. Users could easily calculate the FLOPs and parameter size of a model. Please refer to `here <./CompressionUtils.rst>`__ for a complete list of compression utilities.
+
+Customize Your Own Compression Algorithms
+-----------------------------------------
+
+NNI model compression leaves simple interface for users to customize a new compression algorithm. The design philosophy of the interface is making users focus on the compression logic while hiding framework specific implementation details from users. The detailed tutorial for customizing a new compression algorithm (pruning algorithm or quantization algorithm) can be found `here <./Framework.rst>`__.
+
+Reference and Feedback
+----------------------
+
+
+* To `report a bug <https://github.com/microsoft/nni/issues/new?template=bug-report.rst>`__ for this feature in GitHub;
+* To `file a feature or improvement request <https://github.com/microsoft/nni/issues/new?template=enhancement.rst>`__ for this feature in GitHub;
+* To know more about `Feature Engineering with NNI <../FeatureEngineering/Overview.rst>`__\ ;
+* To know more about `NAS with NNI <../NAS/Overview.rst>`__\ ;
+* To know more about `Hyperparameter Tuning with NNI <../Tuner/BuiltinTuner.rst>`__\ ;
diff --git a/docs/en_US/Compression/Pruner.rst b/docs/en_US/Compression/Pruner.rst
new file mode 100644
index 0000000000..e677f69b46
--- /dev/null
+++ b/docs/en_US/Compression/Pruner.rst
@@ -0,0 +1,801 @@
+Supported Pruning Algorithms on NNI
+===================================
+
+We provide several pruning algorithms that support fine-grained weight pruning and structural filter pruning. **Fine-grained Pruning** generally results in  unstructured models, which need specialized haredware or software to speed up the sparse network.** Filter Pruning** achieves acceleratation by removing the entire filter.  We also provide an algorithm to control the** pruning schedule**.
+
+**Fine-grained Pruning**
+
+
+* `Level Pruner <#level-pruner>`__
+
+**Filter Pruning**
+
+
+* `Slim Pruner <#slim-pruner>`__
+* `FPGM Pruner <#fpgm-pruner>`__
+* `L1Filter Pruner <#l1filter-pruner>`__
+* `L2Filter Pruner <#l2filter-pruner>`__
+* `Activation APoZ Rank Filter Pruner <#activationAPoZRankFilter-pruner>`__
+* `Activation Mean Rank Filter Pruner <#activationmeanrankfilter-pruner>`__
+* `Taylor FO On Weight Pruner <#taylorfoweightfilter-pruner>`__
+
+**Pruning Schedule**
+
+
+* `AGP Pruner <#agp-pruner>`__
+* `NetAdapt Pruner <#netadapt-pruner>`__
+* `SimulatedAnnealing Pruner <#simulatedannealing-pruner>`__
+* `AutoCompress Pruner <#autocompress-pruner>`__
+* `AMC Pruner <#amc-pruner>`__
+* `Sensitivity Pruner <#sensitivity-pruner>`__
+
+**Others**
+
+
+* `ADMM Pruner <#admm-pruner>`__
+* `Lottery Ticket Hypothesis <#lottery-ticket-hypothesis>`__
+
+Level Pruner
+------------
+
+This is one basic one-shot pruner: you can set a target sparsity level (expressed as a fraction, 0.6 means we will prune 60% of the weight parameters). 
+
+We first sort the weights in the specified layer by their absolute values. And then mask to zero the smallest magnitude weights until the desired sparsity level is reached.
+
+Usage
+^^^^^
+
+Tensorflow code
+
+.. code-block:: python
+
+   from nni.algorithms.compression.tensorflow.pruning import LevelPruner
+   config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
+   pruner = LevelPruner(model, config_list)
+   pruner.compress()
+
+PyTorch code
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.pruning import LevelPruner
+   config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
+   pruner = LevelPruner(model, config_list)
+   pruner.compress()
+
+User configuration for Level Pruner
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+**PyTorch**
+
+..  autoclass:: nni.algorithms.compression.pytorch.pruning.LevelPruner
+
+Tensorflow
+""""""""""
+
+..  autoclass:: nni.algorithms.compression.tensorflow.pruning.LevelPruner
+
+Slim Pruner
+-----------
+
+This is an one-shot pruner, In `'Learning Efficient Convolutional Networks through Network Slimming' <https://arxiv.org/pdf/1708.06519.pdf>`__\ , authors Zhuang Liu, Jianguo Li, Zhiqiang Shen, Gao Huang, Shoumeng Yan and Changshui Zhang.
+
+
+.. image:: ../../img/slim_pruner.png
+   :target: ../../img/slim_pruner.png
+   :alt: 
+
+
+..
+
+   Slim Pruner **prunes channels in the convolution layers by masking corresponding scaling factors in the later BN layers**\ , L1 regularization on the scaling factors should be applied in batch normalization (BN) layers while training, scaling factors of BN layers are** globally ranked** while pruning, so the sparse model can be automatically found given sparsity.
+
+
+Usage
+^^^^^
+
+PyTorch code
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.pruning import SlimPruner
+   config_list = [{ 'sparsity': 0.8, 'op_types': ['BatchNorm2d'] }]
+   pruner = SlimPruner(model, config_list)
+   pruner.compress()
+
+User configuration for Slim Pruner
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+**PyTorch**
+
+..  autoclass:: nni.algorithms.compression.pytorch.pruning.SlimPruner
+
+Reproduced Experiment
+^^^^^^^^^^^^^^^^^^^^^
+
+We implemented one of the experiments in `'Learning Efficient Convolutional Networks through Network Slimming' <https://arxiv.org/pdf/1708.06519.pdf>`__\ , we pruned $70\%$ channels in the **VGGNet** for CIFAR-10 in the paper, in which $88.5\%$ parameters are pruned. Our experiments results are as follows:
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Model
+     - Error(paper/ours)
+     - Parameters
+     - Pruned
+   * - VGGNet
+     - 6.34/6.40
+     - 20.04M
+     - 
+   * - Pruned-VGGNet
+     - 6.20/6.26
+     - 2.03M
+     - 88.5%
+
+
+The experiments code can be found at :githublink:`examples/model_compress <examples/model_compress/>`
+
+----
+
+FPGM Pruner
+-----------
+
+This is an one-shot pruner, FPGM Pruner is an implementation of paper `Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration <https://arxiv.org/pdf/1811.00250.pdf>`__
+
+FPGMPruner prune filters with the smallest geometric median.
+
+ 
+.. image:: ../../img/fpgm_fig1.png
+   :target: ../../img/fpgm_fig1.png
+   :alt: 
+
+
+..
+
+   Previous works utilized “smaller-norm-less-important” criterion to prune filters with smaller norm values in a convolutional neural network. In this paper, we analyze this norm-based criterion and point out that its effectiveness depends on two requirements that are not always met: (1) the norm deviation of the filters should be large; (2) the minimum norm of the filters should be small. To solve this problem, we propose a novel filter pruning method, namely Filter Pruning via Geometric Median (FPGM), to compress the model regardless of those two requirements. Unlike previous methods, FPGM compresses CNN models by pruning filters with redundancy, rather than those with “relatively less” importance. 
+
+
+We also provide a dependency-aware mode for this pruner to get better speedup from the pruning. Please reference `dependency-aware <./DependencyAware.rst>`__ for more details.
+
+Usage
+^^^^^
+
+PyTorch code
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.pruning import FPGMPruner
+   config_list = [{
+       'sparsity': 0.5,
+       'op_types': ['Conv2d']
+   }]
+   pruner = FPGMPruner(model, config_list)
+   pruner.compress()
+
+User configuration for FPGM Pruner
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+**PyTorch**
+
+..  autoclass:: nni.algorithms.compression.pytorch.pruning.FPGMPruner
+
+L1Filter Pruner
+---------------
+
+This is an one-shot pruner, In `'PRUNING FILTERS FOR EFFICIENT CONVNETS' <https://arxiv.org/abs/1608.08710>`__\ , authors Hao Li, Asim Kadav, Igor Durdanovic, Hanan Samet and Hans Peter Graf.
+
+
+.. image:: ../../img/l1filter_pruner.png
+   :target: ../../img/l1filter_pruner.png
+   :alt: 
+
+
+..
+
+   L1Filter Pruner prunes filters in the **convolution layers**
+
+   The procedure of pruning m filters from the ith convolutional layer is as follows:
+
+
+   #. For each filter :math:`F_{i,j}`, calculate the sum of its absolute kernel weights :math:`s_j=\sum_{l=1}^{n_i}\sum|K_l|`.
+
+   #. Sort the filters by :math:`s_j`.
+
+   #. Prune :math:`m` filters with the smallest sum values and their corresponding feature maps. The
+      kernels in the next convolutional layer corresponding to the pruned feature maps are also removed.
+
+   #. A new kernel matrix is created for both the :math:`i`-th and :math:`i+1`-th layers, and the remaining kernel
+      weights are copied to the new model.
+
+
+In addition, we also provide a dependency-aware mode for the L1FilterPruner. For more details about the dependency-aware mode, please reference `dependency-aware mode <./DependencyAware.rst>`__.
+
+Usage
+^^^^^
+
+PyTorch code
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.pruning import L1FilterPruner
+   config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'] }]
+   pruner = L1FilterPruner(model, config_list)
+   pruner.compress()
+
+User configuration for L1Filter Pruner
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+**PyTorch**
+
+..  autoclass:: nni.algorithms.compression.pytorch.pruning.L1FilterPruner
+
+Reproduced Experiment
+^^^^^^^^^^^^^^^^^^^^^
+
+We implemented one of the experiments in `'PRUNING FILTERS FOR EFFICIENT CONVNETS' <https://arxiv.org/abs/1608.08710>`__ with **L1FilterPruner**\ , we pruned** VGG-16** for CIFAR-10 to** VGG-16-pruned-A** in the paper, in which $64\%$ parameters are pruned. Our experiments results are as follows:
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Model
+     - Error(paper/ours)
+     - Parameters
+     - Pruned
+   * - VGG-16
+     - 6.75/6.49
+     - 1.5x10^7
+     - 
+   * - VGG-16-pruned-A
+     - 6.60/6.47
+     - 5.4x10^6
+     - 64.0%
+
+
+The experiments code can be found at :githublink:`examples/model_compress <examples/model_compress/>`
+
+----
+
+L2Filter Pruner
+---------------
+
+This is a structured pruning algorithm that prunes the filters with the smallest L2 norm of the weights. It is implemented as a one-shot pruner.
+
+We also provide a dependency-aware mode for this pruner to get better speedup from the pruning. Please reference `dependency-aware <./DependencyAware.rst>`__ for more details.
+
+Usage
+^^^^^
+
+PyTorch code
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.pruning import L2FilterPruner
+   config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'] }]
+   pruner = L2FilterPruner(model, config_list)
+   pruner.compress()
+
+User configuration for L2Filter Pruner
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+**PyTorch**
+
+..  autoclass:: nni.algorithms.compression.pytorch.pruning.L2FilterPruner
+
+----
+
+ActivationAPoZRankFilter Pruner
+-------------------------------
+
+ActivationAPoZRankFilter Pruner is a pruner which prunes the filters with the smallest importance criterion ``APoZ`` calculated from the output activations of convolution layers to achieve a preset level of network sparsity. The pruning criterion ``APoZ`` is explained in the paper `Network Trimming: A Data-Driven Neuron Pruning Approach towards Efficient Deep Architectures <https://arxiv.org/abs/1607.03250>`__.
+
+The APoZ is defined as:
+
+
+.. image:: ../../img/apoz.png
+   :target: ../../img/apoz.png
+   :alt: 
+
+
+We also provide a dependency-aware mode for this pruner to get better speedup from the pruning. Please reference `dependency-aware <./DependencyAware.rst>`__ for more details.
+
+Usage
+^^^^^
+
+PyTorch code
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.pruning import ActivationAPoZRankFilterPruner
+   config_list = [{
+       'sparsity': 0.5,
+       'op_types': ['Conv2d']
+   }]
+   pruner = ActivationAPoZRankFilterPruner(model, config_list, statistics_batch_num=1)
+   pruner.compress()
+
+Note: ActivationAPoZRankFilterPruner is used to prune convolutional layers within deep neural networks, therefore the ``op_types`` field supports only convolutional layers.
+
+You can view :githublink:`example <examples/model_compress/model_prune_torch.py>` for more information.
+
+User configuration for ActivationAPoZRankFilter Pruner
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+**PyTorch**
+
+..  autoclass:: nni.algorithms.compression.pytorch.pruning.ActivationAPoZRankFilterPruner
+
+----
+
+ActivationMeanRankFilter Pruner
+-------------------------------
+
+ActivationMeanRankFilterPruner is a pruner which prunes the filters with the smallest importance criterion ``mean activation`` calculated from the output activations of convolution layers to achieve a preset level of network sparsity. The pruning criterion ``mean activation`` is explained in section 2.2 of the paper\ `Pruning Convolutional Neural Networks for Resource Efficient Inference <https://arxiv.org/abs/1611.06440>`__. Other pruning criteria mentioned in this paper will be supported in future release.
+
+We also provide a dependency-aware mode for this pruner to get better speedup from the pruning. Please reference `dependency-aware <./DependencyAware.rst>`__ for more details.
+
+Usage
+^^^^^
+
+PyTorch code
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.pruning import ActivationMeanRankFilterPruner
+   config_list = [{
+       'sparsity': 0.5,
+       'op_types': ['Conv2d']
+   }]
+   pruner = ActivationMeanRankFilterPruner(model, config_list, statistics_batch_num=1)
+   pruner.compress()
+
+Note: ActivationMeanRankFilterPruner is used to prune convolutional layers within deep neural networks, therefore the ``op_types`` field supports only convolutional layers.
+
+You can view :githublink:`example <examples/model_compress/model_prune_torch.py>` for more information.
+
+User configuration for ActivationMeanRankFilterPruner
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+**PyTorch**
+
+..  autoclass:: nni.algorithms.compression.pytorch.pruning.ActivationMeanRankFilterPruner
+
+----
+
+TaylorFOWeightFilter Pruner
+---------------------------
+
+TaylorFOWeightFilter Pruner is a pruner which prunes convolutional layers based on estimated importance calculated from the first order taylor expansion on weights to achieve a preset level of network sparsity. The estimated importance of filters is defined as the paper `Importance Estimation for Neural Network Pruning <http://jankautz.com/publications/Importance4NNPruning_CVPR19.pdf>`__. Other pruning criteria mentioned in this paper will be supported in future release.
+
+..
+
+
+
+
+
+.. image:: ../../img/importance_estimation_sum.png
+   :target: ../../img/importance_estimation_sum.png
+   :alt: 
+
+
+We also provide a dependency-aware mode for this pruner to get better speedup from the pruning. Please reference `dependency-aware <./DependencyAware.rst>`__ for more details.
+
+Usage
+^^^^^
+
+PyTorch code
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.pruning import TaylorFOWeightFilterPruner
+   config_list = [{
+       'sparsity': 0.5,
+       'op_types': ['Conv2d']
+   }]
+   pruner = TaylorFOWeightFilterPruner(model, config_list, statistics_batch_num=1)
+   pruner.compress()
+
+User configuration for TaylorFOWeightFilter Pruner
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+**PyTorch**
+
+..  autoclass:: nni.algorithms.compression.pytorch.pruning.TaylorFOWeightFilterPruner
+
+----
+
+AGP Pruner
+----------
+
+This is an iterative pruner, In `To prune, or not to prune: exploring the efficacy of pruning for model compression <https://arxiv.org/abs/1710.01878>`__\ , authors Michael Zhu and Suyog Gupta provide an algorithm to prune the weight gradually.
+
+..
+
+   We introduce a new automated gradual pruning algorithm in which the sparsity is increased from an initial sparsity value si (usually 0) to a final sparsity value sf over a span of n pruning steps, starting at training step t0 and with pruning frequency ∆t:
+
+   .. image:: ../../img/agp_pruner.png
+      :target: ../../img/agp_pruner.png
+      :alt: 
+
+
+   The binary weight masks are updated every ∆t steps as the network is trained to gradually increase the sparsity of the network while allowing the network training steps to recover from any pruning-induced loss in accuracy. In our experience, varying the pruning frequency ∆t between 100 and 1000 training steps had a negligible impact on the final model quality. Once the model achieves the target sparsity sf , the weight masks are no longer updated. The intuition behind this sparsity function in equation (1).
+
+
+Usage
+^^^^^
+
+You can prune all weight from 0% to 80% sparsity in 10 epoch with the code below.
+
+PyTorch code
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.pruning import AGPPruner
+   config_list = [{
+       'initial_sparsity': 0,
+       'final_sparsity': 0.8,
+       'start_epoch': 0,
+       'end_epoch': 10,
+       'frequency': 1,
+       'op_types': ['default']
+   }]
+
+   # load a pretrained model or train a model before using a pruner
+   # model = MyModel()
+   # model.load_state_dict(torch.load('mycheckpoint.pth'))
+
+   # AGP pruner prunes model while fine tuning the model by adding a hook on
+   # optimizer.step(), so an optimizer is required to prune the model.
+   optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)
+
+   pruner = AGPPruner(model, config_list, optimizer, pruning_algorithm='level')
+   pruner.compress()
+
+AGP pruner uses ``LevelPruner`` algorithms to prune the weight by default, however you can set ``pruning_algorithm`` parameter to other values to use other pruning algorithms:
+
+
+* ``level``\ : LevelPruner
+* ``slim``\ : SlimPruner
+* ``l1``\ : L1FilterPruner
+* ``l2``\ : L2FilterPruner
+* ``fpgm``\ : FPGMPruner
+* ``taylorfo``\ : TaylorFOWeightFilterPruner
+* ``apoz``\ : ActivationAPoZRankFilterPruner
+* ``mean_activation``\ : ActivationMeanRankFilterPruner
+
+You should add code below to update epoch number when you finish one epoch in your training code.
+
+PyTorch code
+
+.. code-block:: python
+
+   pruner.update_epoch(epoch)
+
+You can view :githublink:`example <examples/model_compress/model_prune_torch.py>` for more information.
+
+User configuration for AGP Pruner
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+**PyTorch**
+
+..  autoclass:: nni.algorithms.compression.pytorch.pruning.AGPPruner
+
+----
+
+NetAdapt Pruner
+---------------
+
+NetAdapt allows a user to automatically simplify a pretrained network to meet the resource budget. 
+Given the overall sparsity, NetAdapt will automatically generate the sparsities distribution among different layers by iterative pruning.
+
+For more details, please refer to `NetAdapt: Platform-Aware Neural Network Adaptation for Mobile Applications <https://arxiv.org/abs/1804.03230>`__.
+
+
+.. image:: ../../img/algo_NetAdapt.png
+   :target: ../../img/algo_NetAdapt.png
+   :alt: 
+
+
+Usage
+^^^^^
+
+PyTorch code
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.pruning import NetAdaptPruner
+   config_list = [{
+       'sparsity': 0.5,
+       'op_types': ['Conv2d']
+   }]
+   pruner = NetAdaptPruner(model, config_list, short_term_fine_tuner=short_term_fine_tuner, evaluator=evaluator,base_algo='l1', experiment_data_dir='./')
+   pruner.compress()
+
+You can view :githublink:`example <examples/model_compress/auto_pruners_torch.py>` for more information.
+
+User configuration for NetAdapt Pruner
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+**PyTorch**
+
+..  autoclass:: nni.algorithms.compression.pytorch.pruning.NetAdaptPruner
+
+SimulatedAnnealing Pruner
+-------------------------
+
+We implement a guided heuristic search method, Simulated Annealing (SA) algorithm, with enhancement on guided search based on prior experience. 
+The enhanced SA technique is based on the observation that a DNN layer with more number of weights often has a higher degree of model compression with less impact on overall accuracy.
+
+
+* Randomly initialize a pruning rate distribution (sparsities).
+* While current_temperature < stop_temperature:
+
+  #. generate a perturbation to current distribution
+  #. Perform fast evaluation on the perturbated distribution
+  #. accept the perturbation according to the performance and probability, if not accepted, return to step 1
+  #. cool down, current_temperature <- current_temperature * cool_down_rate
+
+For more details, please refer to `AutoCompress: An Automatic DNN Structured Pruning Framework for Ultra-High Compression Rates <https://arxiv.org/abs/1907.03141>`__.
+
+Usage
+^^^^^
+
+PyTorch code
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.pruning import SimulatedAnnealingPruner
+   config_list = [{
+       'sparsity': 0.5,
+       'op_types': ['Conv2d']
+   }]
+   pruner = SimulatedAnnealingPruner(model, config_list, evaluator=evaluator, base_algo='l1', cool_down_rate=0.9, experiment_data_dir='./')
+   pruner.compress()
+
+You can view :githublink:`example <examples/model_compress/auto_pruners_torch.py>` for more information.
+
+User configuration for SimulatedAnnealing Pruner
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+**PyTorch**
+
+..  autoclass:: nni.algorithms.compression.pytorch.pruning.SimulatedAnnealingPruner
+
+AutoCompress Pruner
+-------------------
+
+For each round, AutoCompressPruner prune the model for the same sparsity to achive the overall sparsity:
+
+.. code-block:: bash
+
+       1. Generate sparsities distribution using SimulatedAnnealingPruner
+       2. Perform ADMM-based structured pruning to generate pruning result for the next round.
+          Here we use `speedup` to perform real pruning.
+
+
+For more details, please refer to `AutoCompress: An Automatic DNN Structured Pruning Framework for Ultra-High Compression Rates <https://arxiv.org/abs/1907.03141>`__.
+
+Usage
+^^^^^
+
+PyTorch code
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.pruning import ADMMPruner
+   config_list = [{
+           'sparsity': 0.5,
+           'op_types': ['Conv2d']
+       }]
+   pruner = AutoCompressPruner(
+               model, config_list, trainer=trainer, evaluator=evaluator,
+               dummy_input=dummy_input, num_iterations=3, optimize_mode='maximize', base_algo='l1',
+               cool_down_rate=0.9, admm_num_iterations=30, admm_training_epochs=5, experiment_data_dir='./')
+   pruner.compress()
+
+You can view :githublink:`example <examples/model_compress/auto_pruners_torch.py>` for more information.
+
+User configuration for AutoCompress Pruner
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+**PyTorch**
+
+..  autoclass:: nni.algorithms.compression.pytorch.pruning.AutoCompressPruner
+
+AMC Pruner
+----------
+
+AMC pruner leverages reinforcement learning to provide the model compression policy.
+This learning-based compression policy outperforms conventional rule-based compression policy by having higher compression ratio,
+better preserving the accuracy and freeing human labor.
+
+
+.. image:: ../../img/amc_pruner.jpg
+   :target: ../../img/amc_pruner.jpg
+   :alt: 
+
+
+For more details, please refer to `AMC: AutoML for Model Compression and Acceleration on Mobile Devices <https://arxiv.org/pdf/1802.03494.pdf>`__.
+
+Usage
+^^^^^
+
+PyTorch code
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.pruning import AMCPruner
+   config_list = [{
+           'op_types': ['Conv2d', 'Linear']
+       }]
+   pruner = AMCPruner(model, config_list, evaluator, val_loader, flops_ratio=0.5)
+   pruner.compress()
+
+You can view :githublink:`example <examples/model_compress/amc/>` for more information.
+
+User configuration for AutoCompress Pruner
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+**PyTorch**
+
+..  autoclass:: nni.algorithms.compression.pytorch.pruning.AMCPruner
+
+Reproduced Experiment
+^^^^^^^^^^^^^^^^^^^^^
+
+We implemented one of the experiments in `AMC: AutoML for Model Compression and Acceleration on Mobile Devices <https://arxiv.org/pdf/1802.03494.pdf>`__\ , we pruned **MobileNet** to 50% FLOPS for ImageNet in the paper. Our experiments results are as follows:
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Model
+     - Top 1 acc.(paper/ours)
+     - Top 5 acc. (paper/ours)
+     - FLOPS
+   * - MobileNet
+     - 70.5% / 69.9%
+     - 89.3% / 89.1%
+     - 50%
+
+
+The experiments code can be found at :githublink:`examples/model_compress <examples/model_compress/amc/>`
+
+ADMM Pruner
+-----------
+
+Alternating Direction Method of Multipliers (ADMM) is a mathematical optimization technique,
+by decomposing the original nonconvex problem into two subproblems that can be solved iteratively. In weight pruning problem, these two subproblems are solved via 1) gradient descent algorithm and 2) Euclidean projection respectively. 
+
+During the process of solving these two subproblems, the weights of the original model will be changed. An one-shot pruner will then be applied to prune the model according to the config list given.
+
+This solution framework applies both to non-structured and different variations of structured pruning schemes.
+
+For more details, please refer to `A Systematic DNN Weight Pruning Framework using Alternating Direction Method of Multipliers <https://arxiv.org/abs/1804.03294>`__.
+
+Usage
+^^^^^
+
+PyTorch code
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.pruning import ADMMPruner
+   config_list = [{
+               'sparsity': 0.8,
+               'op_types': ['Conv2d'],
+               'op_names': ['conv1']
+           }, {
+               'sparsity': 0.92,
+               'op_types': ['Conv2d'],
+               'op_names': ['conv2']
+           }]
+   pruner = ADMMPruner(model, config_list, trainer=trainer, num_iterations=30, epochs=5)
+   pruner.compress()
+
+You can view :githublink:`example <examples/model_compress/auto_pruners_torch.py>` for more information.
+
+User configuration for ADMM Pruner
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+**PyTorch**
+
+..  autoclass:: nni.algorithms.compression.pytorch.pruning.ADMMPruner
+
+Lottery Ticket Hypothesis
+-------------------------
+
+`The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks <https://arxiv.org/abs/1803.03635>`__\ , authors Jonathan Frankle and Michael Carbin,provides comprehensive measurement and analysis, and articulate the *lottery ticket hypothesis*\ : dense, randomly-initialized, feed-forward networks contain subnetworks (*winning tickets*\ ) that -- when trained in isolation -- reach test accuracy comparable to the original network in a similar number of iterations.
+
+In this paper, the authors use the following process to prune a model, called *iterative prunning*\ :
+
+..
+
+   #. Randomly initialize a neural network f(x;theta_0) (where theta\ *0 follows D*\ {theta}).
+   #. Train the network for j iterations, arriving at parameters theta_j.
+   #. Prune p% of the parameters in theta_j, creating a mask m.
+   #. Reset the remaining parameters to their values in theta_0, creating the winning ticket f(x;m*theta_0).
+   #. Repeat step 2, 3, and 4.
+
+
+If the configured final sparsity is P (e.g., 0.8) and there are n times iterative pruning, each iterative pruning prunes 1-(1-P)^(1/n) of the weights that survive the previous round.
+
+Usage
+^^^^^
+
+PyTorch code
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.pruning import LotteryTicketPruner
+   config_list = [{
+       'prune_iterations': 5,
+       'sparsity': 0.8,
+       'op_types': ['default']
+   }]
+   pruner = LotteryTicketPruner(model, config_list, optimizer)
+   pruner.compress()
+   for _ in pruner.get_prune_iterations():
+       pruner.prune_iteration_start()
+       for epoch in range(epoch_num):
+           ...
+
+The above configuration means that there are 5 times of iterative pruning. As the 5 times iterative pruning are executed in the same run, LotteryTicketPruner needs ``model`` and ``optimizer`` (\ **Note that should add ``lr_scheduler`` if used**\ ) to reset their states every time a new prune iteration starts. Please use ``get_prune_iterations`` to get the pruning iterations, and invoke ``prune_iteration_start`` at the beginning of each iteration. ``epoch_num`` is better to be large enough for model convergence, because the hypothesis is that the performance (accuracy) got in latter rounds with high sparsity could be comparable with that got in the first round.
+
+*Tensorflow version will be supported later.*
+
+User configuration for LotteryTicket Pruner
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+**PyTorch**
+
+..  autoclass:: nni.algorithms.compression.pytorch.pruning.LotteryTicketPruner
+
+Reproduced Experiment
+^^^^^^^^^^^^^^^^^^^^^
+
+We try to reproduce the experiment result of the fully connected network on MNIST using the same configuration as in the paper. The code can be referred :githublink:`here <examples/model_compress/lottery_torch_mnist_fc.py>`. In this experiment, we prune 10 times, for each pruning we train the pruned model for 50 epochs.
+
+
+.. image:: ../../img/lottery_ticket_mnist_fc.png
+   :target: ../../img/lottery_ticket_mnist_fc.png
+   :alt: 
+
+
+The above figure shows the result of the fully connected network. ``round0-sparsity-0.0`` is the performance without pruning. Consistent with the paper, pruning around 80% also obtain similar performance compared to non-pruning, and converges a little faster. If pruning too much, e.g., larger than 94%, the accuracy becomes lower and convergence becomes a little slower. A little different from the paper, the trend of the data in the paper is relatively more clear.
+
+Sensitivity Pruner
+------------------
+
+For each round, SensitivityPruner prunes the model based on the sensitivity to the accuracy of each layer until meeting the final configured sparsity of the whole model:
+
+.. code-block:: bash
+
+       1. Analyze the sensitivity of each layer in the current state of the model.
+       2. Prune each layer according to the sensitivity.
+
+
+For more details, please refer to `Learning both Weights and Connections for Efficient Neural Networks  <https://arxiv.org/abs/1506.02626>`__.
+
+Usage
+^^^^^
+
+PyTorch code
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.pruning import SensitivityPruner
+   config_list = [{
+           'sparsity': 0.5,
+           'op_types': ['Conv2d']
+       }]
+   pruner = SensitivityPruner(model, config_list, finetuner=fine_tuner, evaluator=evaluator)
+   # eval_args and finetune_args are the parameters passed to the evaluator and finetuner respectively
+   pruner.compress(eval_args=[model], finetune_args=[model])
+
+User configuration for Sensitivity Pruner
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+**PyTorch**
+
+..  autoclass:: nni.algorithms.compression.pytorch.pruning.SensitivityPruner
diff --git a/docs/en_US/Compression/Quantizer.rst b/docs/en_US/Compression/Quantizer.rst
new file mode 100644
index 0000000000..61d0607b8c
--- /dev/null
+++ b/docs/en_US/Compression/Quantizer.rst
@@ -0,0 +1,184 @@
+Supported Quantization Algorithms on NNI
+========================================
+
+Index of supported quantization algorithms
+
+
+* `Naive Quantizer <#naive-quantizer>`__
+* `QAT Quantizer <#qat-quantizer>`__
+* `DoReFa Quantizer <#dorefa-quantizer>`__
+* `BNN Quantizer <#bnn-quantizer>`__
+
+Naive Quantizer
+---------------
+
+We provide Naive Quantizer to quantizer weight to default 8 bits, you can use it to test quantize algorithm without any configure.
+
+Usage
+^^^^^
+
+pytorch
+
+.. code-block:: python
+
+   model = nni.algorithms.compression.pytorch.quantization.NaiveQuantizer(model).compress()
+
+----
+
+QAT Quantizer
+-------------
+
+In `Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference <http://openaccess.thecvf.com/content_cvpr_2018/papers/Jacob_Quantization_and_Training_CVPR_2018_paper.pdf>`__\ , authors Benoit Jacob and Skirmantas Kligys provide an algorithm to quantize the model with training.
+
+..
+
+   We propose an approach that simulates quantization effects in the forward pass of training. Backpropagation still happens as usual, and all weights and biases are stored in floating point so that they can be easily nudged by small amounts. The forward propagation pass however simulates quantized inference as it will happen in the inference engine, by implementing in floating-point arithmetic the rounding behavior of the quantization scheme
+
+
+   * Weights are quantized before they are convolved with the input. If batch normalization (see [17]) is used for the layer, the batch normalization parameters are “folded into” the weights before quantization.
+   * Activations are quantized at points where they would be during inference, e.g. after the activation function is applied to a convolutional or fully connected layer’s output, or after a bypass connection adds or concatenates the outputs of several layers together such as in ResNets.
+
+
+Usage
+^^^^^
+
+You can quantize your model to 8 bits with the code below before your training code.
+
+PyTorch code
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.quantization import QAT_Quantizer
+   model = Mnist()
+
+   config_list = [{
+       'quant_types': ['weight'],
+       'quant_bits': {
+           'weight': 8,
+       }, # you can just use `int` here because all `quan_types` share same bits length, see config for `ReLu6` below.
+       'op_types':['Conv2d', 'Linear']
+   }, {
+       'quant_types': ['output'],
+       'quant_bits': 8,
+       'quant_start_step': 7000,
+       'op_types':['ReLU6']
+   }]
+   quantizer = QAT_Quantizer(model, config_list)
+   quantizer.compress()
+
+You can view example for more information
+
+User configuration for QAT Quantizer
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+common configuration needed by compression algorithms can be found at `Specification of ``config_list`` <./QuickStart.rst>`__.
+
+configuration needed by this algorithm :
+
+
+* **quant_start_step:** int
+
+disable quantization until model are run by certain number of steps, this allows the network to enter a more stable
+state where activation quantization ranges do not exclude a signiﬁcant fraction of values, default value is 0
+
+note
+^^^^
+
+batch normalization folding is currently not supported.
+
+----
+
+DoReFa Quantizer
+----------------
+
+In `DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients <https://arxiv.org/abs/1606.06160>`__\ , authors Shuchang Zhou and Yuxin Wu provide an algorithm named DoReFa to quantize the weight, activation and gradients with training.
+
+Usage
+^^^^^
+
+To implement DoReFa Quantizer, you can add code below before your training code
+
+PyTorch code
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.quantization import DoReFaQuantizer
+   config_list = [{ 
+       'quant_types': ['weight'],
+       'quant_bits': 8, 
+       'op_types': 'default' 
+   }]
+   quantizer = DoReFaQuantizer(model, config_list)
+   quantizer.compress()
+
+You can view example for more information
+
+User configuration for DoReFa Quantizer
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+common configuration needed by compression algorithms can be found at `Specification of ``config_list`` <./QuickStart.rst>`__.
+
+configuration needed by this algorithm :
+
+----
+
+BNN Quantizer
+-------------
+
+In `Binarized Neural Networks: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1 <https://arxiv.org/abs/1602.02830>`__\ , 
+
+..
+
+   We introduce a method to train Binarized Neural Networks (BNNs) - neural networks with binary weights and activations at run-time. At training-time the binary weights and activations are used for computing the parameters gradients. During the forward pass, BNNs drastically reduce memory size and accesses, and replace most arithmetic operations with bit-wise operations, which is expected to substantially improve power-efficiency.
+
+
+Usage
+^^^^^
+
+PyTorch code
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.quantization import BNNQuantizer
+   model = VGG_Cifar10(num_classes=10)
+
+   configure_list = [{
+       'quant_bits': 1,
+       'quant_types': ['weight'],
+       'op_types': ['Conv2d', 'Linear'],
+       'op_names': ['features.0', 'features.3', 'features.7', 'features.10', 'features.14', 'features.17', 'classifier.0', 'classifier.3']
+   }, {
+       'quant_bits': 1,
+       'quant_types': ['output'],
+       'op_types': ['Hardtanh'],
+       'op_names': ['features.6', 'features.9', 'features.13', 'features.16', 'features.20', 'classifier.2', 'classifier.5']
+   }]
+
+   quantizer = BNNQuantizer(model, configure_list)
+   model = quantizer.compress()
+
+You can view example :githublink:`examples/model_compress/BNN_quantizer_cifar10.py <examples/model_compress/BNN_quantizer_cifar10.py>` for more information.
+
+User configuration for BNN Quantizer
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+common configuration needed by compression algorithms can be found at `Specification of ``config_list`` <./QuickStart.rst>`__.
+
+configuration needed by this algorithm :
+
+Experiment
+^^^^^^^^^^
+
+We implemented one of the experiments in `Binarized Neural Networks: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1 <https://arxiv.org/abs/1602.02830>`__\ , we quantized the **VGGNet** for CIFAR-10 in the paper. Our experiments results are as follows:
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Model
+     - Accuracy
+   * - VGGNet
+     - 86.93%
+
+
+The experiments code can be found at :githublink:`examples/model_compress/BNN_quantizer_cifar10.py <examples/model_compress/BNN_quantizer_cifar10.py>` 
diff --git a/docs/en_US/Compression/QuickStart.rst b/docs/en_US/Compression/QuickStart.rst
new file mode 100644
index 0000000000..85a1930bfe
--- /dev/null
+++ b/docs/en_US/Compression/QuickStart.rst
@@ -0,0 +1,212 @@
+Tutorial for Model Compression
+==============================
+
+.. contents::
+
+In this tutorial, we use the `first section <#quick-start-to-compress-a-model>`__ to quickly go through the usage of model compression on NNI. Then use the `second section <#detailed-usage-guide>`__ to explain more details of the usage.
+
+Quick Start to Compress a Model
+-------------------------------
+
+NNI provides very simple APIs for compressing a model. The compression includes pruning algorithms and quantization algorithms. The usage of them are the same, thus, here we use `slim pruner </Compression/Pruner.html#slim-pruner>`__ as an example to show the usage.
+
+Write configuration
+^^^^^^^^^^^^^^^^^^^
+
+Write a configuration to specify the layers that you want to prune. The following configuration means pruning all the ``BatchNorm2d``\ s to sparsity 0.7 while keeping other layers unpruned.
+
+.. code-block:: python
+
+   configure_list = [{
+       'sparsity': 0.7,
+       'op_types': ['BatchNorm2d'],
+   }]
+
+The specification of configuration can be found `here <#specification-of-config-list>`__. Note that different pruners may have their own defined fields in configuration, for exmaple ``start_epoch`` in AGP pruner. Please refer to each pruner's `usage <./Pruner.rst>`__ for details, and adjust the configuration accordingly.
+
+Choose a compression algorithm
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Choose a pruner to prune your model. First instantiate the chosen pruner with your model and configuration as arguments, then invoke ``compress()`` to compress your model.
+
+.. code-block:: python
+
+   pruner = SlimPruner(model, configure_list)
+   model = pruner.compress()
+
+Then, you can train your model using traditional training approach (e.g., SGD), pruning is applied transparently during the training. Some pruners prune once at the beginning, the following training can be seen as fine-tune. Some pruners prune your model iteratively, the masks are adjusted epoch by epoch during training.
+
+Export compression result
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+After training, you get accuracy of the pruned model. You can export model weights to a file, and the generated masks to a file as well. Exporting onnx model is also supported.
+
+.. code-block:: python
+
+   pruner.export_model(model_path='pruned_vgg19_cifar10.pth', mask_path='mask_vgg19_cifar10.pth')
+
+The complete code of model compression examples can be found :githublink:`here <examples/model_compress/model_prune_torch.py>`.
+
+Speed up the model
+^^^^^^^^^^^^^^^^^^
+
+Masks do not provide real speedup of your model. The model should be speeded up based on the exported masks, thus, we provide an API to speed up your model as shown below. After invoking ``apply_compression_results`` on your model, your model becomes a smaller one with shorter inference latency.
+
+.. code-block:: python
+
+   from nni.compression.pytorch import apply_compression_results
+   apply_compression_results(model, 'mask_vgg19_cifar10.pth')
+
+Please refer to `here <ModelSpeedup.rst>`__ for detailed description.
+
+Detailed Usage Guide
+--------------------
+
+The example code for users to apply model compression on a user model can be found below:
+
+PyTorch code
+
+.. code-block:: python
+
+   from nni.algorithms.compression.pytorch.pruning import LevelPruner
+   config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
+   pruner = LevelPruner(model, config_list)
+   pruner.compress()
+
+Tensorflow code
+
+.. code-block:: python
+
+   from nni.algorithms.compression.tensorflow.pruning import LevelPruner
+   config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
+   pruner = LevelPruner(tf.get_default_graph(), config_list)
+   pruner.compress()
+
+You can use other compression algorithms in the package of ``nni.compression``. The algorithms are implemented in both PyTorch and TensorFlow (partial support on TensorFlow), under ``nni.compression.pytorch`` and ``nni.compression.tensorflow`` respectively. You can refer to `Pruner <./Pruner.md>`__ and `Quantizer <./Quantizer.md>`__ for detail description of supported algorithms. Also if you want to use knowledge distillation, you can refer to `KDExample <../TrialExample/KDExample.rst>`__
+
+A compression algorithm is first instantiated with a ``config_list`` passed in. The specification of this ``config_list`` will be described later.
+
+The function call ``pruner.compress()`` modifies user defined model (in Tensorflow the model can be obtained with ``tf.get_default_graph()``\ , while in PyTorch the model is the defined model class), and the model is modified with masks inserted. Then when you run the model, the masks take effect. The masks can be adjusted at runtime by the algorithms.
+
+*Note that, ``pruner.compress`` simply adds masks on model weights, it does not include fine tuning logic. If users want to fine tune the compressed model, they need to write the fine tune logic by themselves after ``pruner.compress``.*
+
+Specification of ``config_list``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Users can specify the configuration (i.e., ``config_list``\ ) for a compression algorithm. For example,when compressing a model, users may want to specify the sparsity ratio, to specify different ratios for different types of operations, to exclude certain types of operations, or to compress only a certain types of operations. For users to express these kinds of requirements, we define a configuration specification. It can be seen as a python ``list`` object, where each element is a ``dict`` object. 
+
+The ``dict``\ s in the ``list`` are applied one by one, that is, the configurations in latter ``dict`` will overwrite the configurations in former ones on the operations that are within the scope of both of them. 
+
+There are different keys in a ``dict``. Some of them are common keys supported by all the compression algorithms:
+
+
+* **op_types**\ : This is to specify what types of operations to be compressed. 'default' means following the algorithm's default setting.
+* **op_names**\ : This is to specify by name what operations to be compressed. If this field is omitted, operations will not be filtered by it.
+* **exclude**\ : Default is False. If this field is True, it means the operations with specified types and names will be excluded from the compression.
+
+Some other keys are often specific to a certain algorithms, users can refer to `pruning algorithms <./Pruner.md>`__ and `quantization algorithms <./Quantizer.rst>`__ for the keys allowed by each algorithm.
+
+A simple example of configuration is shown below:
+
+.. code-block:: python
+
+   [
+       {
+           'sparsity': 0.8,
+           'op_types': ['default']
+       },
+       {
+           'sparsity': 0.6,
+           'op_names': ['op_name1', 'op_name2']
+       },
+       {
+           'exclude': True,
+           'op_names': ['op_name3']
+       }
+   ]
+
+It means following the algorithm's default setting for compressed operations with sparsity 0.8, but for ``op_name1`` and ``op_name2`` use sparsity 0.6, and do not compress ``op_name3``.
+
+Quantization specific keys
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Besides the keys explained above, if you use quantization algorithms you need to specify more keys in ``config_list``\ , which are explained below.
+
+
+* **quant_types** : list of string. 
+
+Type of quantization you want to apply, currently support 'weight', 'input', 'output'. 'weight' means applying quantization operation
+to the weight parameter of modules. 'input' means applying quantization operation to the input of module forward method. 'output' means applying quantization operation to the output of module forward method, which is often called as 'activation' in some papers.
+
+
+* **quant_bits** : int or dict of {str : int}
+
+bits length of quantization, key is the quantization type, value is the quantization bits length, eg. 
+
+.. code-block:: bash
+
+   {
+       quant_bits: {
+           'weight': 8,
+           'output': 4,
+           },
+   }
+
+when the value is int type, all quantization types share same bits length. eg. 
+
+.. code-block:: bash
+
+   {
+       quant_bits: 8, # weight or output quantization are all 8 bits
+   }
+
+The following example shows a more complete ``config_list``\ , it uses ``op_names`` (or ``op_types``\ ) to specify the target layers along with the quantization bits for those layers.
+
+.. code-block:: bash
+
+   configure_list = [{
+           'quant_types': ['weight'],        
+           'quant_bits': 8, 
+           'op_names': ['conv1']
+       }, {
+           'quant_types': ['weight'],
+           'quant_bits': 4,
+           'quant_start_step': 0,
+           'op_names': ['conv2']
+       }, {
+           'quant_types': ['weight'],
+           'quant_bits': 3,
+           'op_names': ['fc1']
+           },
+          {
+           'quant_types': ['weight'],
+           'quant_bits': 2,
+           'op_names': ['fc2']
+           }
+   ]
+
+In this example, 'op_names' is the name of layer and four layers will be quantized to different quant_bits.
+
+APIs for Updating Fine Tuning Status
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Some compression algorithms use epochs to control the progress of compression (e.g. `AGP </Compression/Pruner.html#agp-pruner>`__\ ), and some algorithms need to do something after every minibatch. Therefore, we provide another two APIs for users to invoke: ``pruner.update_epoch(epoch)`` and ``pruner.step()``.
+
+``update_epoch`` should be invoked in every epoch, while ``step`` should be invoked after each minibatch. Note that most algorithms do not require calling the two APIs. Please refer to each algorithm's document for details. For the algorithms that do not need them, calling them is allowed but has no effect.
+
+Export Compressed Model
+^^^^^^^^^^^^^^^^^^^^^^^
+
+You can easily export the compressed model using the following API if you are pruning your model, ``state_dict`` of the sparse model weights will be stored in ``model.pth``\ , which can be loaded by ``torch.load('model.pth')``. In this exported ``model.pth``\ , the masked weights are zero.
+
+.. code-block:: bash
+
+   pruner.export_model(model_path='model.pth')
+
+``mask_dict`` and pruned model in ``onnx`` format(\ ``input_shape`` need to be specified) can also be exported like this:
+
+.. code-block:: python
+
+   pruner.export_model(model_path='model.pth', mask_path='mask.pth', onnx_path='model.onnx', input_shape=[1, 1, 28, 28])
+
+If you want to really speed up the compressed model, please refer to `NNI model speedup <./ModelSpeedup.rst>`__ for details.
diff --git a/docs/en_US/FeatureEngineering/GBDTSelector.rst b/docs/en_US/FeatureEngineering/GBDTSelector.rst
new file mode 100644
index 0000000000..f645b12785
--- /dev/null
+++ b/docs/en_US/FeatureEngineering/GBDTSelector.rst
@@ -0,0 +1,70 @@
+GBDTSelector
+------------
+
+GBDTSelector is based on `LightGBM <https://github.com/microsoft/LightGBM>`__\ , which is a gradient boosting framework that uses tree-based learning algorithms.
+
+When passing the data into the GBDT model, the model will construct the boosting tree. And the feature importance comes from the score in construction, which indicates how useful or valuable each feature was in the construction of the boosted decision trees within the model.
+
+We could use this method as a strong baseline in Feature Selector, especially when using the GBDT model as a classifier or regressor.
+
+For now, we support the ``importance_type`` is ``split`` and ``gain``. But we will support customized ``importance_type`` in the future, which means the user could define how to calculate the ``feature score`` by themselves.
+
+Usage
+^^^^^
+
+First you need to install dependency:
+
+.. code-block:: bash
+
+   pip install lightgbm
+
+Then
+
+.. code-block:: python
+
+   from nni.feature_engineering.gbdt_selector import GBDTSelector
+
+   # load data
+   ...
+   X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
+
+   # initlize a selector
+   fgs = GBDTSelector()
+   # fit data
+   fgs.fit(X_train, y_train, ...)
+   # get improtant features
+   # will return the index with important feature here.
+   print(fgs.get_selected_features(10))
+
+   ...
+
+And you could reference the examples in ``/examples/feature_engineering/gbdt_selector/``\ , too.
+
+**Requirement of ``fit`` FuncArgs**
+
+
+* 
+  **X** (array-like, require) - The training input samples which shape = [n_samples, n_features]
+
+* 
+  **y** (array-like, require) - The target values (class labels in classification, real numbers in regression) which shape = [n_samples].
+
+* 
+  **lgb_params** (dict, require) - The parameters for lightgbm model. The detail you could reference `here <https://lightgbm.readthedocs.io/en/latest/Parameters.html>`__
+
+* 
+  **eval_ratio** (float, require) - The ratio of data size. It's used for split the eval data and train data from self.X.
+
+* 
+  **early_stopping_rounds** (int, require) - The early stopping setting in lightgbm. The detail you could reference `here <https://lightgbm.readthedocs.io/en/latest/Parameters.html>`__.
+
+* 
+  **importance_type** (str, require) - could be 'split' or 'gain'. The 'split' means ' result contains numbers of times the feature is used in a model' and the 'gain' means 'result contains total gains of splits which use the feature'. The detail you could reference in `here <https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.Booster.html#lightgbm.Booster.feature_importance>`__.
+
+* 
+  **num_boost_round** (int, require) - number of boost round. The detail you could reference `here <https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.train.html#lightgbm.train>`__.
+
+**Requirement of ``get_selected_features`` FuncArgs**
+
+
+* **topk** (int, require) - the topK impotance features you want to selected.
diff --git a/docs/en_US/FeatureEngineering/GradientFeatureSelector.rst b/docs/en_US/FeatureEngineering/GradientFeatureSelector.rst
new file mode 100644
index 0000000000..1b4b212bdd
--- /dev/null
+++ b/docs/en_US/FeatureEngineering/GradientFeatureSelector.rst
@@ -0,0 +1,107 @@
+GradientFeatureSelector
+-----------------------
+
+The algorithm in GradientFeatureSelector comes from `"Feature Gradients: Scalable Feature Selection via Discrete Relaxation" <https://arxiv.org/pdf/1908.10382.pdf>`__.
+
+GradientFeatureSelector, a gradient-based search algorithm
+for feature selection. 
+
+1) This approach extends a recent result on the estimation of
+learnability in the sublinear data regime by showing that the calculation can be performed iteratively (i.e., in mini-batches) and in **linear time and space** with respect to both the number of features D and the sample size N. 
+
+2) This, along with a discrete-to-continuous relaxation of the search domain, allows for an **efficient, gradient-based** search algorithm among feature subsets for very **large datasets**.
+
+3) Crucially, this algorithm is capable of finding **higher-order correlations** between features and targets for both the N > D and N < D regimes, as opposed to approaches that do not consider such interactions and/or only consider one regime.
+
+Usage
+^^^^^
+
+.. code-block:: python
+
+   from nni.feature_engineering.gradient_selector import FeatureGradientSelector
+
+   # load data
+   ...
+   X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
+
+   # initlize a selector
+   fgs = FeatureGradientSelector(n_features=10)
+   # fit data
+   fgs.fit(X_train, y_train)
+   # get improtant features
+   # will return the index with important feature here.
+   print(fgs.get_selected_features())
+
+   ...
+
+And you could reference the examples in ``/examples/feature_engineering/gradient_feature_selector/``\ , too.
+
+**Parameters of class FeatureGradientSelector constructor**
+
+
+* 
+  **order** (int, optional, default = 4) - What order of interactions to include. Higher orders may be more accurate but increase the run time. 12 is the maximum allowed order.
+
+* 
+  **penatly** (int, optional, default = 1) - Constant that multiplies the regularization term.
+
+* 
+  **n_features** (int, optional, default = None) - If None, will automatically choose number of features based on search. Otherwise, the number of top features to select.
+
+* 
+  **max_features** (int, optional, default = None) - If not None, will use the 'elbow method' to determine the number of features with max_features as the upper limit.
+
+* 
+  **learning_rate** (float, optional, default = 1e-1) - learning rate
+
+* 
+  **init** (*zero, on, off, onhigh, offhigh, or sklearn, optional, default = zero*\ ) - How to initialize the vector of scores. 'zero' is the default.
+
+* 
+  **n_epochs** (int, optional, default = 1) - number of epochs to run
+
+* 
+  **shuffle** (bool, optional, default = True) - Shuffle "rows" prior to an epoch.
+
+* 
+  **batch_size** (int, optional, default = 1000) - Nnumber of "rows" to process at a time.
+
+* 
+  **target_batch_size** (int, optional, default = 1000) - Number of "rows" to accumulate gradients over. Useful when many rows will not fit into memory but are needed for accurate estimation.
+
+* 
+  **classification** (bool, optional, default = True) - If True, problem is classification, else regression.
+
+* 
+  **ordinal** (bool, optional, default = True) - If True, problem is ordinal classification. Requires classification to be True.
+
+* 
+  **balanced** (bool, optional, default = True) - If true, each class is weighted equally in optimization, otherwise weighted is done via support of each class. Requires classification to be True.
+
+* 
+  **prerocess** (str, optional, default = 'zscore') - 'zscore' which refers to centering and normalizing data to unit variance or 'center' which only centers the data to 0 mean.
+
+* 
+  **soft_grouping** (bool, optional, default = True) - If True, groups represent features that come from the same source. Used to encourage sparsity of groups and features within groups.
+
+* 
+  **verbose** (int, optional, default = 0) - Controls the verbosity when fitting. Set to 0 for no printing 1 or higher for printing every verbose number of gradient steps.
+
+* 
+  **device** (str, optional, default = 'cpu') - 'cpu' to run on CPU and 'cuda' to run on GPU. Runs much faster on GPU
+
+**Requirement of ``fit`` FuncArgs**
+
+
+* 
+  **X** (array-like, require) - The training input samples which shape = [n_samples, n_features]
+
+* 
+  **y** (array-like, require) - The target values (class labels in classification, real numbers in regression) which shape = [n_samples].
+
+* 
+  **groups** (array-like, optional, default = None) - Groups of columns that must be selected as a unit. e.g. [0, 0, 1, 2] specifies the first two columns are part of a group. Which shape is [n_features].
+
+**Requirement of ``get_selected_features`` FuncArgs**
+
+ For now, the ``get_selected_features`` function has no parameters.
diff --git a/docs/en_US/FeatureEngineering/Overview.rst b/docs/en_US/FeatureEngineering/Overview.rst
new file mode 100644
index 0000000000..c6fedfeeaa
--- /dev/null
+++ b/docs/en_US/FeatureEngineering/Overview.rst
@@ -0,0 +1,320 @@
+Feature Engineering with NNI
+============================
+
+We are glad to announce the alpha release for Feature Engineering toolkit on top of NNI, it's still in the experiment phase which might evolve based on user feedback. We'd like to invite you to use, feedback and even contribute.
+
+For now, we support the following feature selector:
+
+
+* `GradientFeatureSelector <./GradientFeatureSelector.rst>`__
+* `GBDTSelector <./GBDTSelector.rst>`__
+
+These selectors are suitable for tabular data(which means it doesn't include image, speech and text data).
+
+In addition, those selector only for feature selection. If you want to:
+1) generate high-order combined features on nni while doing feature selection;
+2) leverage your distributed resources;
+you could try this :githublink:`example <examples/feature_engineering/auto-feature-engineering>`.
+
+How to use?
+-----------
+
+.. code-block:: python
+
+   from nni.feature_engineering.gradient_selector import FeatureGradientSelector
+   # from nni.feature_engineering.gbdt_selector import GBDTSelector
+
+   # load data
+   ...
+   X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
+
+   # initlize a selector
+   fgs = FeatureGradientSelector(...)
+   # fit data
+   fgs.fit(X_train, y_train)
+   # get improtant features
+   # will return the index with important feature here.
+   print(fgs.get_selected_features(...))
+
+   ...
+
+When using the built-in Selector, you first need to ``import`` a feature selector, and ``initialize`` it. You could call the function ``fit`` in the selector to pass the data to the selector. After that, you could use ``get_seleteced_features`` to get important features. The function parameters in different selectors might be different, so you need to check the docs before using it. 
+
+How to customize?
+-----------------
+
+NNI provides *state-of-the-art* feature selector algorithm in the builtin-selector. NNI also supports to build a feature selector by yourself.
+
+If you want to implement a customized feature selector, you need to:
+
+
+#. Inherit the base FeatureSelector class
+#. Implement *fit* and _get_selected*features* function
+#. Integrate with sklearn (Optional)
+
+Here is an example:
+
+**1. Inherit the base Featureselector Class**
+
+.. code-block:: python
+
+   from nni.feature_engineering.feature_selector import FeatureSelector
+
+   class CustomizedSelector(FeatureSelector):
+       def __init__(self, ...):
+       ...
+
+**2. Implement *fit* and _get_selected*features* Function**
+
+.. code-block:: python
+
+   from nni.tuner import Tuner
+
+   from nni.feature_engineering.feature_selector import FeatureSelector
+
+   class CustomizedSelector(FeatureSelector):
+       def __init__(self, ...):
+       ...
+
+       def fit(self, X, y, **kwargs):
+           """
+           Fit the training data to FeatureSelector
+
+           Parameters
+           ------------
+           X : array-like numpy matrix
+           The training input samples, which shape is [n_samples, n_features].
+           y: array-like numpy matrix
+           The target values (class labels in classification, real numbers in regression). Which shape is [n_samples].
+           """
+           self.X = X
+           self.y = y
+           ...
+
+       def get_selected_features(self):
+           """
+           Get important feature
+
+           Returns
+           -------
+           list :
+           Return the index of the important feature.
+           """
+           ...
+           return self.selected_features_
+
+       ...
+
+**3. Integrate with Sklearn**
+
+``sklearn.pipeline.Pipeline`` can connect models in series, such as feature selector, normalization, and classification/regression to form a typical machine learning problem workflow. 
+The following step could help us to better integrate with sklearn, which means we could treat the customized feature selector as a module of the pipeline.
+
+
+#. Inherit the calss *sklearn.base.BaseEstimator*
+#. Implement _get\ *params* and _set*params* function in *BaseEstimator*
+#. Inherit the class _sklearn.feature\ *selection.base.SelectorMixin*
+#. Implement _get\ *support*\ , *transform* and _inverse*transform* Function in *SelectorMixin*
+
+Here is an example:
+
+**1. Inherit the BaseEstimator Class and its Function**
+
+.. code-block:: python
+
+   from sklearn.base import BaseEstimator
+   from nni.feature_engineering.feature_selector import FeatureSelector
+
+   class CustomizedSelector(FeatureSelector, BaseEstimator):
+       def __init__(self, ...):
+       ...
+
+       def get_params(self, ...):
+           """
+           Get parameters for this estimator.
+           """
+           params = self.__dict__
+           params = {key: val for (key, val) in params.items()
+           if not key.endswith('_')}
+           return params
+
+       def set_params(self, **params):
+           """
+           Set the parameters of this estimator.
+           """
+           for param in params:
+           if hasattr(self, param):
+           setattr(self, param, params[param])
+           return self
+
+**2. Inherit the SelectorMixin Class and its Function**
+
+.. code-block:: python
+
+   from sklearn.base import BaseEstimator
+   from sklearn.feature_selection.base import SelectorMixin
+
+   from nni.feature_engineering.feature_selector import FeatureSelector
+
+   class CustomizedSelector(FeatureSelector, BaseEstimator, SelectorMixin):
+       def __init__(self, ...):
+           ...
+
+       def get_params(self, ...):
+           """
+           Get parameters for this estimator.
+           """
+           params = self.__dict__
+           params = {key: val for (key, val) in params.items()
+           if not key.endswith('_')}
+           return params
+
+       def set_params(self, **params):
+           """
+           Set the parameters of this estimator.
+           """
+           for param in params:
+           if hasattr(self, param):
+           setattr(self, param, params[param])
+           return self
+
+       def get_support(self, indices=False):
+           """
+           Get a mask, or integer index, of the features selected.
+
+           Parameters
+           ----------
+           indices : bool
+           Default False. If True, the return value will be an array of integers, rather than a boolean mask.
+
+           Returns
+           -------
+           list :
+           returns support: An index that selects the retained features from a feature vector.
+           If indices are False, this is a boolean array of shape [# input features], in which an element is True iff its corresponding feature is selected for retention.
+           If indices are True, this is an integer array of shape [# output features] whose values
+           are indices into the input feature vector.
+           """
+           ...
+           return mask
+
+
+       def transform(self, X):
+           """Reduce X to the selected features.
+
+           Parameters
+           ----------
+           X : array
+           which shape is [n_samples, n_features]
+
+           Returns
+           -------
+           X_r : array
+           which shape is [n_samples, n_selected_features]
+           The input samples with only the selected features.
+           """
+           ...
+           return X_r
+
+
+       def inverse_transform(self, X):
+           """
+           Reverse the transformation operation
+
+           Parameters
+           ----------
+           X : array
+           shape is [n_samples, n_selected_features]
+
+           Returns
+           -------
+           X_r : array
+           shape is [n_samples, n_original_features]
+           """
+           ...
+           return X_r
+
+After integrating with Sklearn, we could use the feature selector as follows:
+
+.. code-block:: python
+
+   from sklearn.linear_model import LogisticRegression
+
+   # load data
+   ...
+   X_train, y_train = ...
+
+   # build a ppipeline
+   pipeline = make_pipeline(XXXSelector(...), LogisticRegression())
+   pipeline = make_pipeline(SelectFromModel(ExtraTreesClassifier(n_estimators=50)), LogisticRegression())
+   pipeline.fit(X_train, y_train)
+
+   # score
+   print("Pipeline Score: ", pipeline.score(X_train, y_train))
+
+Benchmark
+---------
+
+``Baseline`` means without any feature selection, we directly pass the data to LogisticRegression. For this benchmark, we only use 10% data from the train as test data. For the GradientFeatureSelector, we only take the top20 features. The metric is the mean accuracy on the given test data and labels.
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Dataset
+     - All Features + LR (acc, time, memory)
+     - GradientFeatureSelector + LR (acc, time, memory)
+     - TreeBasedClassifier + LR (acc, time, memory)
+     - #Train
+     - #Feature
+   * - colon-cancer
+     - 0.7547, 890ms, 348MiB
+     - 0.7368, 363ms, 286MiB
+     - 0.7223, 171ms, 1171 MiB
+     - 62
+     - 2,000
+   * - gisette
+     - 0.9725, 215ms, 584MiB
+     - 0.89416, 446ms, 397MiB
+     - 0.9792, 911ms, 234MiB
+     - 6,000
+     - 5,000
+   * - avazu
+     - 0.8834, N/A, N/A
+     - N/A, N/A, N/A
+     - N/A, N/A, N/A
+     - 40,428,967
+     - 1,000,000
+   * - rcv1
+     - 0.9644, 557ms, 241MiB
+     - 0.7333, 401ms, 281MiB
+     - 0.9615, 752ms, 284MiB
+     - 20,242
+     - 47,236
+   * - news20.binary
+     - 0.9208, 707ms, 361MiB
+     - 0.6870, 565ms, 371MiB
+     - 0.9070, 904ms, 364MiB
+     - 19,996
+     - 1,355,191
+   * - real-sim
+     - 0.9681, 433ms, 274MiB
+     - 0.7969, 251ms, 274MiB
+     - 0.9591, 643ms, 367MiB
+     - 72,309
+     - 20,958
+
+
+The dataset of benchmark could be download in `here <https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/>`__
+
+The code could be refenrence ``/examples/feature_engineering/gradient_feature_selector/benchmark_test.py``.
+
+Reference and Feedback
+----------------------
+
+
+* To `report a bug <https://github.com/microsoft/nni/issues/new?template=bug-report.rst>`__ for this feature in GitHub;
+* To `file a feature or improvement request <https://github.com/microsoft/nni/issues/new?template=enhancement.rst>`__ for this feature in GitHub;
+* To know more about :githublink:`Neural Architecture Search with NNI <docs/en_US/NAS/Overview.rst>`\ ;
+* To know more about :githublink:`Model Compression with NNI <docs/en_US/Compression/Overview.rst>`\ ;
+* To know more about :githublink:`Hyperparameter Tuning with NNI <docs/en_US/Tuner/BuiltinTuner.rst>`\ ;
diff --git a/docs/en_US/NAS/Advanced.rst b/docs/en_US/NAS/Advanced.rst
new file mode 100644
index 0000000000..7930245d23
--- /dev/null
+++ b/docs/en_US/NAS/Advanced.rst
@@ -0,0 +1,136 @@
+Customize a NAS Algorithm
+=========================
+
+Extend the Ability of One-Shot Trainers
+---------------------------------------
+
+Users might want to do multiple things if they are using the trainers on real tasks, for example, distributed training, half-precision training, logging periodically, writing tensorboard, dumping checkpoints and so on. As mentioned previously, some trainers do have support for some of the items listed above; others might not. Generally, there are two recommended ways to add anything you want to an existing trainer: inherit an existing trainer and override, or copy an existing trainer and modify.
+
+Either way, you are walking into the scope of implementing a new trainer. Basically, implementing a one-shot trainer is no different from any traditional deep learning trainer, except that a new concept called mutator will reveal itself. So that the implementation will be different in at least two places:
+
+
+* Initialization
+
+.. code-block:: python
+
+   model = Model()
+   mutator = MyMutator(model)
+
+
+* Training
+
+.. code-block:: python
+
+   for _ in range(epochs):
+       for x, y in data_loader:
+           mutator.reset()  # reset all the choices in model
+           out = model(x)  # like traditional model
+           loss = criterion(out, y)
+           loss.backward()
+           # no difference below
+
+To demonstrate what mutators are for, we need to know how one-shot NAS normally works. Usually, one-shot NAS "co-optimize model weights and architecture weights". It repeatedly: sample an architecture or combination of several architectures from the supernet, train the chosen architectures like traditional deep learning model, update the trained parameters to the supernet, and use the metrics or loss as some signal to guide the architecture sampler. The mutator, is the architecture sampler here, often defined to be another deep-learning model. Therefore, you can treat it as any model, by defining parameters in it and optimizing it with optimizers. One mutator is initialized with exactly one model. Once a mutator is binded to a model, it cannot be rebinded to another model.
+
+``mutator.reset()`` is the core step. That's where all the choices in the model are finalized. The reset result will be always effective, until the next reset flushes the data. After the reset, the model can be seen as a traditional model to do forward-pass and backward-pass.
+
+Finally, mutators provide a method called ``mutator.export()`` that export a dict with architectures to the model. Note that currently this dict this a mapping from keys of mutables to tensors of selection. So in order to dump to json, users need to convert the tensors explicitly into python list.
+
+Meanwhile, NNI provides some useful tools so that users can implement trainers more easily. See `Trainers <./NasReference.rst>`__ for details.
+
+Implement New Mutators
+----------------------
+
+To start with, here is the pseudo-code that demonstrates what happens on ``mutator.reset()`` and ``mutator.export()``.
+
+.. code-block:: python
+
+   def reset(self):
+       self.apply_on_model(self.sample_search())
+
+.. code-block:: python
+
+   def export(self):
+       return self.sample_final()
+
+On reset, a new architecture is sampled with ``sample_search()`` and applied on the model. Then the model is trained for one or more steps in search phase. On export, a new architecture is sampled with ``sample_final()`` and **do nothing to the model**. This is either for checkpoint or exporting the final architecture.
+
+The requirements of return values of ``sample_search()`` and ``sample_final()`` are the same: a mapping from mutable keys to tensors. The tensor can be either a BoolTensor (true for selected, false for negative), or a FloatTensor which applies weight on each candidate. The selected branches will then be computed (in ``LayerChoice``\ , modules will be called; in ``InputChoice``\ , it's just tensors themselves), and reduce with the reduction operation specified in the choices. For most algorithms only worrying about the former part, here is an example of your mutator implementation.
+
+.. code-block:: python
+
+   class RandomMutator(Mutator):
+       def __init__(self, model):
+           super().__init__(model)  # don't forget to call super
+           # do something else
+
+       def sample_search(self):
+           result = dict()
+           for mutable in self.mutables:  # this is all the mutable modules in user model
+               # mutables share the same key will be de-duplicated
+               if isinstance(mutable, LayerChoice):
+                   # decided that this mutable should choose `gen_index`
+                   gen_index = np.random.randint(mutable.length)
+                   result[mutable.key] = torch.tensor([i == gen_index for i in range(mutable.length)], 
+                                                      dtype=torch.bool)
+               elif isinstance(mutable, InputChoice):
+                   if mutable.n_chosen is None:  # n_chosen is None, then choose any number
+                       result[mutable.key] = torch.randint(high=2, size=(mutable.n_candidates,)).view(-1).bool()
+                   # else do something else
+           return result
+
+       def sample_final(self):
+           return self.sample_search()  # use the same logic here. you can do something different
+
+The complete example of random mutator can be found :githublink:`here <src/sdk/pynni/nni/nas/pytorch/random/mutator.py>`.
+
+For advanced usages, e.g., users want to manipulate the way modules in ``LayerChoice`` are executed, they can inherit ``BaseMutator``\ , and overwrite ``on_forward_layer_choice`` and ``on_forward_input_choice``\ , which are the callback implementation of ``LayerChoice`` and ``InputChoice`` respectively. Users can still use property ``mutables`` to get all ``LayerChoice`` and ``InputChoice`` in the model code. For details, please refer to :githublink:`reference <src/sdk/pynni/nni/nas/pytorch>` here to learn more.
+
+.. tip::
+    A useful application of random mutator is for debugging. Use
+
+    .. code-block:: python
+
+        mutator = RandomMutator(model)
+        mutator.reset()
+
+    will immediately set one possible candidate in the search space as the active one.
+
+Implemented a Distributed NAS Tuner
+-----------------------------------
+
+Before learning how to write a distributed NAS tuner, users should first learn how to write a general tuner. read `Customize Tuner <../Tuner/CustomizeTuner.rst>`__ for tutorials.
+
+When users call "\ `nnictl ss_gen <../Tutorial/Nnictl.rst>`__\ " to generate search space file, a search space file like this will be generated:
+
+.. code-block:: json
+
+   {
+       "key_name": {
+           "_type": "layer_choice",
+           "_value": ["op1_repr", "op2_repr", "op3_repr"]
+       },
+       "key_name": {
+           "_type": "input_choice",
+           "_value": {
+               "candidates": ["in1_key", "in2_key", "in3_key"],
+               "n_chosen": 1
+           }
+       }
+   }
+
+This is the exact search space tuners will receive in ``update_search_space``. It's then tuners' responsibility to interpret the search space and generate new candidates in ``generate_parameters``. A valid "parameters" will be in the following format:
+
+.. code-block:: json
+
+   {
+       "key_name": {
+           "_value": "op1_repr",
+           "_idx": 0
+       },
+       "key_name": {
+           "_value": ["in2_key"],
+           "_idex": [1]
+       }
+   }
+
+Send it through ``generate_parameters``\ , and the tuner would look like any HPO tuner. Refer to `SPOS <./SPOS.rst>`__ example code for an example.
diff --git a/docs/en_US/NAS/Benchmarks.rst b/docs/en_US/NAS/Benchmarks.rst
new file mode 100644
index 0000000000..a81e1785b5
--- /dev/null
+++ b/docs/en_US/NAS/Benchmarks.rst
@@ -0,0 +1,168 @@
+NAS Benchmarks
+==============
+
+..  toctree::
+    :hidden:
+
+    Example Usages <BenchmarksExample>
+
+Introduction
+------------
+
+To imporve the reproducibility of NAS algorithms as well as reducing computing resource requirements, researchers proposed a series of NAS benchmarks such as `NAS-Bench-101 <https://arxiv.org/abs/1902.09635>`__\ , `NAS-Bench-201 <https://arxiv.org/abs/2001.00326>`__\ , `NDS <https://arxiv.org/abs/1905.13214>`__\ , etc. NNI provides a query interface for users to acquire these benchmarks. Within just a few lines of code, researcher are able to evaluate their NAS algorithms easily and fairly by utilizing these benchmarks.
+
+Prerequisites
+-------------
+
+
+* Please prepare a folder to household all the benchmark databases. By default, it can be found at ``${HOME}/.nni/nasbenchmark``. You can place it anywhere you like, and specify it in ``NASBENCHMARK_DIR`` via ``export NASBENCHMARK_DIR=/path/to/your/nasbenchmark`` before importing NNI.
+* Please install ``peewee`` via ``pip3 install peewee``\ , which NNI uses to connect to database.
+
+Data Preparation
+----------------
+
+To avoid storage and legality issues, we do not provide any prepared databases. Please follow the following steps.
+
+
+#. 
+   Clone NNI to your machine and enter ``examples/nas/benchmarks`` directory.
+
+   .. code-block:: bash
+
+      git clone -b ${NNI_VERSION} https://github.com/microsoft/nni
+      cd nni/examples/nas/benchmarks
+
+   Replace ``${NNI_VERSION}`` with a released version name or branch name, e.g., ``v1.9``.
+
+#. 
+   Install dependencies via ``pip3 install -r xxx.requirements.txt``. ``xxx`` can be ``nasbench101``\ , ``nasbench201`` or ``nds``.
+
+#. Generate the database via ``./xxx.sh``. The directory that stores the benchmark file can be configured with ``NASBENCHMARK_DIR`` environment variable, which defaults to ``~/.nni/nasbenchmark``. Note that the NAS-Bench-201 dataset will be downloaded from a google drive.
+
+Please make sure there is at least 10GB free disk space and note that the conversion process can take up to hours to complete.
+
+Example Usages
+--------------
+
+Please refer to `examples usages of Benchmarks API <./BenchmarksExample>`__.
+
+NAS-Bench-101
+-------------
+
+`Paper link <https://arxiv.org/abs/1902.09635>`__ &nbsp; &nbsp; `Open-source <https://github.com/google-research/nasbench>`__
+
+NAS-Bench-101 contains 423,624 unique neural networks, combined with 4 variations in number of epochs (4, 12, 36, 108), each of which is trained 3 times. It is a cell-wise search space, which constructs and stacks a cell by enumerating DAGs with at most 7 operators, and no more than 9 connections. All operators can be chosen from ``CONV3X3_BN_RELU``\ , ``CONV1X1_BN_RELU`` and ``MAXPOOL3X3``\ , except the first operator (always ``INPUT``\ ) and last operator (always ``OUTPUT``\ ).
+
+Notably, NAS-Bench-101 eliminates invalid cells (e.g., there is no path from input to output, or there is redundant computation). Furthermore, isomorphic cells are de-duplicated, i.e., all the remaining cells are computationally unique.
+
+API Documentation
+^^^^^^^^^^^^^^^^^
+
+.. autofunction:: nni.nas.benchmarks.nasbench101.query_nb101_trial_stats
+
+.. autoattribute:: nni.nas.benchmarks.nasbench101.INPUT
+
+.. autoattribute:: nni.nas.benchmarks.nasbench101.OUTPUT
+
+.. autoattribute:: nni.nas.benchmarks.nasbench101.CONV3X3_BN_RELU
+
+.. autoattribute:: nni.nas.benchmarks.nasbench101.CONV1X1_BN_RELU
+
+.. autoattribute:: nni.nas.benchmarks.nasbench101.MAXPOOL3X3
+
+.. autoclass:: nni.nas.benchmarks.nasbench101.Nb101TrialConfig
+
+.. autoclass:: nni.nas.benchmarks.nasbench101.Nb101TrialStats
+
+.. autoclass:: nni.nas.benchmarks.nasbench101.Nb101IntermediateStats
+
+.. autofunction:: nni.nas.benchmarks.nasbench101.graph_util.nasbench_format_to_architecture_repr
+
+.. autofunction:: nni.nas.benchmarks.nasbench101.graph_util.infer_num_vertices
+
+.. autofunction:: nni.nas.benchmarks.nasbench101.graph_util.hash_module
+
+NAS-Bench-201
+-------------
+
+`Paper link <https://arxiv.org/abs/2001.00326>`__ &nbsp; &nbsp; `Open-source API <https://github.com/D-X-Y/NAS-Bench-201>`__ &nbsp; &nbsp;\ `Implementations <https://github.com/D-X-Y/AutoDL-Projects>`__
+
+NAS-Bench-201 is a cell-wise search space that views nodes as tensors and edges as operators. The search space contains all possible densely-connected DAGs with 4 nodes, resulting in 15,625 candidates in total. Each operator (i.e., edge) is selected from a pre-defined operator set (\ ``NONE``\ , ``SKIP_CONNECT``\ , ``CONV_1X1``\ , ``CONV_3X3`` and ``AVG_POOL_3X3``\ ). Training appraoches vary in the dataset used (CIFAR-10, CIFAR-100, ImageNet) and number of epochs scheduled (12 and 200). Each combination of architecture and training approach is repeated 1 - 3 times with different random seeds.
+
+API Documentation
+^^^^^^^^^^^^^^^^^
+
+.. autofunction:: nni.nas.benchmarks.nasbench201.query_nb201_trial_stats
+
+.. autoattribute:: nni.nas.benchmarks.nasbench201.NONE
+
+.. autoattribute:: nni.nas.benchmarks.nasbench201.SKIP_CONNECT
+
+.. autoattribute:: nni.nas.benchmarks.nasbench201.CONV_1X1
+
+.. autoattribute:: nni.nas.benchmarks.nasbench201.CONV_3X3
+
+.. autoattribute:: nni.nas.benchmarks.nasbench201.AVG_POOL_3X3
+
+.. autoclass:: nni.nas.benchmarks.nasbench201.Nb201TrialConfig
+
+.. autoclass:: nni.nas.benchmarks.nasbench201.Nb201TrialStats
+
+.. autoclass:: nni.nas.benchmarks.nasbench201.Nb201IntermediateStats
+
+NDS
+---
+
+`Paper link <https://arxiv.org/abs/1905.13214>`__ &nbsp; &nbsp; `Open-source <https://github.com/facebookresearch/nds>`__
+
+*On Network Design Spaces for Visual Recognition* released trial statistics of over 100,000 configurations (models + hyper-parameters) sampled from multiple model families, including vanilla (feedforward network loosely inspired by VGG), ResNet and ResNeXt (residual basic block and residual bottleneck block) and NAS cells (following popular design from NASNet, Ameoba, PNAS, ENAS and DARTS). Most configurations are trained only once with a fixed seed, except a few that are trained twice or three times.
+
+Instead of storing results obtained with different configurations in separate files, we dump them into one single database to enable comparison in multiple dimensions. Specifically, we use ``model_family`` to distinguish model types, ``model_spec`` for all hyper-parameters needed to build this model, ``cell_spec`` for detailed information on operators and connections if it is a NAS cell, ``generator`` to denote the sampling policy through which this configuration is generated. Refer to API documentation for details.
+
+Available Operators
+-------------------
+
+Here is a list of available operators used in NDS.
+
+.. autoattribute:: nni.nas.benchmarks.nds.constants.NONE
+
+.. autoattribute:: nni.nas.benchmarks.nds.constants.SKIP_CONNECT
+
+.. autoattribute:: nni.nas.benchmarks.nds.constants.AVG_POOL_3X3
+
+.. autoattribute:: nni.nas.benchmarks.nds.constants.MAX_POOL_3X3
+
+.. autoattribute:: nni.nas.benchmarks.nds.constants.MAX_POOL_5X5
+
+.. autoattribute:: nni.nas.benchmarks.nds.constants.MAX_POOL_7X7
+
+.. autoattribute:: nni.nas.benchmarks.nds.constants.CONV_1X1
+
+.. autoattribute:: nni.nas.benchmarks.nds.constants.CONV_3X3
+
+.. autoattribute:: nni.nas.benchmarks.nds.constants.CONV_3X1_1X3
+
+.. autoattribute:: nni.nas.benchmarks.nds.constants.CONV_7X1_1X7
+
+.. autoattribute:: nni.nas.benchmarks.nds.constants.DIL_CONV_3X3
+
+.. autoattribute:: nni.nas.benchmarks.nds.constants.DIL_CONV_5X5
+
+.. autoattribute:: nni.nas.benchmarks.nds.constants.SEP_CONV_3X3
+
+.. autoattribute:: nni.nas.benchmarks.nds.constants.SEP_CONV_5X5
+
+.. autoattribute:: nni.nas.benchmarks.nds.constants.SEP_CONV_7X7
+
+.. autoattribute:: nni.nas.benchmarks.nds.constants.DIL_SEP_CONV_3X3
+
+API Documentation
+^^^^^^^^^^^^^^^^^
+
+.. autofunction:: nni.nas.benchmarks.nds.query_nds_trial_stats
+
+.. autoclass:: nni.nas.benchmarks.nds.NdsTrialConfig
+
+.. autoclass:: nni.nas.benchmarks.nds.NdsTrialStats
+
+.. autoclass:: nni.nas.benchmarks.nds.NdsIntermediateStats
diff --git a/docs/en_US/NAS/CDARTS.rst b/docs/en_US/NAS/CDARTS.rst
new file mode 100644
index 0000000000..90d7804383
--- /dev/null
+++ b/docs/en_US/NAS/CDARTS.rst
@@ -0,0 +1,72 @@
+CDARTS
+======
+
+Introduction
+------------
+
+`CDARTS <https://arxiv.org/pdf/2006.10724.pdf>`__ builds a cyclic feedback mechanism between the search and evaluation networks. First, the search network generates an initial topology for evaluation, so that the weights of the evaluation network can be optimized. Second, the architecture topology in the search network is further optimized by the label supervision in classification, as well as the regularization from the evaluation network through feature distillation. Repeating the above cycle results in a joint optimization of the search and evaluation networks, and thus enables the evolution of the topology to fit the final evaluation network.
+
+In implementation of ``CdartsTrainer``\ , it first instantiates two models and two mutators (one for each). The first model is the so-called "search network", which is mutated with a ``RegularizedDartsMutator`` -- a mutator with subtle differences with ``DartsMutator``. The second model is the "evaluation network", which is mutated with a discrete mutator that leverages the previous search network mutator, to sample a single path each time. Trainers train models and mutators alternatively. Users can refer to `paper <https://arxiv.org/pdf/2006.10724.pdf>`__ if they are interested in more details on these trainers and mutators.
+
+Reproduction Results
+--------------------
+
+This is CDARTS based on the NNI platform, which currently supports CIFAR10 search and retrain. ImageNet search and retrain should also be supported, and we provide corresponding interfaces. Our reproduced results on NNI are slightly lower than the paper, but much higher than the original DARTS. Here we show the results of three independent experiments on CIFAR10.
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Runs
+     - Paper
+     - NNI
+   * - 1
+     - 97.52
+     - 97.44
+   * - 2
+     - 97.53
+     - 97.48
+   * - 3
+     - 97.58
+     - 97.56
+
+
+Examples
+--------
+
+`Example code <https://github.com/microsoft/nni/tree/master/examples/nas/cdarts>`__
+
+.. code-block:: bash
+
+   # In case NNI code is not cloned. If the code is cloned already, ignore this line and enter code folder.
+   git clone https://github.com/Microsoft/nni.git
+
+   # install apex for distributed training.
+   git clone https://github.com/NVIDIA/apex
+   cd apex
+   python setup.py install --cpp_ext --cuda_ext
+
+   # search the best architecture
+   cd examples/nas/cdarts
+   bash run_search_cifar.sh
+
+   # train the best architecture.
+   bash run_retrain_cifar.sh
+
+Reference
+---------
+
+PyTorch
+^^^^^^^
+
+..  autoclass:: nni.algorithms.nas.pytorch.cdarts.CdartsTrainer
+    :members:
+
+..  autoclass:: nni.algorithms.nas.pytorch.cdarts.RegularizedDartsMutator
+    :members:
+
+..  autoclass:: nni.algorithms.nas.pytorch.cdarts.DartsDiscreteMutator
+    :members:
+
+..  autoclass:: nni.algorithms.nas.pytorch.cdarts.RegularizedMutatorParallel
+    :members:
diff --git a/docs/en_US/NAS/ClassicNas.rst b/docs/en_US/NAS/ClassicNas.rst
new file mode 100644
index 0000000000..d8aa47a5c4
--- /dev/null
+++ b/docs/en_US/NAS/ClassicNas.rst
@@ -0,0 +1,59 @@
+.. role:: raw-html(raw)
+   :format: html
+
+
+Classic NAS Algorithms
+======================
+
+In classic NAS algorithms, each architecture is trained as a trial and the NAS algorithm acts as a tuner. Thus, this training mode naturally fits within the NNI hyper-parameter tuning framework, where Tuner generates new architecture for the next trial and trials run in the training service.
+
+Quick Start
+-----------
+
+The following example shows how to use classic NAS algorithms. You can see it is quite similar to NNI hyper-parameter tuning.
+
+.. code-block:: python
+
+   model = Net()
+
+   # get the chosen architecture from tuner and apply it on model
+   get_and_apply_next_architecture(model)
+   train(model)  # your code for training the model
+   acc = test(model)  # test the trained model
+   nni.report_final_result(acc)  # report the performance of the chosen architecture
+
+First, instantiate the model. Search space has been defined in this model through ``LayerChoice`` and ``InputChoice``. After that, user should invoke ``get_and_apply_next_architecture(model)`` to settle down to a specific architecture. This function receives the architecture from tuner (i.e., the classic NAS algorithm) and applies the architecture to ``model``. At this point, ``model`` becomes a specific architecture rather than a search space. Then users are free to train this model just like training a normal PyTorch model. After get the accuracy of this model, users should invoke ``nni.report_final_result(acc)`` to report the result to the tuner.
+
+At this point, trial code is ready. Then, we can prepare an NNI experiment, i.e., search space file and experiment config file. Different from NNI hyper-parameter tuning, search space file is automatically generated from the trial code by running the command (the detailed usage of this command can be found `here <../Tutorial/Nnictl.rst>`__\ ):
+
+``nnictl ss_gen --trial_command="the command for running your trial code"``
+
+A file named ``nni_auto_gen_search_space.json`` is generated by this command. Then put the path of the generated search space in the field ``searchSpacePath`` of the experiment config file. The other fields of the config file can be filled by referring `this tutorial <../Tutorial/QuickStart.rst>`__.
+
+Currently, we only support :githublink:`PPO Tuner <examples/tuners/random_nas_tuner>` for classic NAS. More classic NAS algorithms will be supported soon.
+
+The complete examples can be found :githublink:`here <examples/nas/classic_nas>` for PyTorch and :githublink:`here <examples/nas/classic_nas-tf>` for TensorFlow.
+
+Standalone mode for easy debugging
+----------------------------------
+
+We support a standalone mode for easy debugging, where you can directly run the trial command without launching an NNI experiment. This is for checking whether your trial code can correctly run. The first candidate(s) are chosen for ``LayerChoice`` and ``InputChoice`` in this standalone mode.
+
+:raw-html:`<a name="regulaized-evolution-tuner"></a>`
+
+Regularized Evolution Tuner
+---------------------------
+
+This is a tuner geared for NNI’s Neural Architecture Search (NAS) interface. It uses the `evolution algorithm <https://arxiv.org/pdf/1802.01548.pdf>`__.
+
+The tuner first randomly initializes the number of ``population`` models and evaluates them. After that, every time to produce a new architecture, the tuner randomly chooses the number of ``sample`` architectures from ``population``\ , then mutates the best model in ``sample``\ , the parent model, to produce the child model. The mutation includes the hidden mutation and the op mutation. The hidden state mutation consists of replacing a hidden state with another hidden state from within the cell, subject to the constraint that no loops are formed. The op mutation behaves like the hidden state mutation as far as replacing one op with another op from the op set. Note that keeping the child model the same as its parent is not allowed. After evaluating the child model, it is added to the tail of the ``population``\ , then pops the front one.
+
+Note that **trial concurrency should be less than the population of the model**\ , otherwise NO_MORE_TRIAL exception will be raised.
+
+The whole procedure is summarized by the pseudocode below.
+
+
+.. image:: ../../img/EvoNasTuner.png
+   :target: ../../img/EvoNasTuner.png
+   :alt: 
+
diff --git a/docs/en_US/NAS/Cream.rst b/docs/en_US/NAS/Cream.rst
new file mode 100644
index 0000000000..7ad06784b4
--- /dev/null
+++ b/docs/en_US/NAS/Cream.rst
@@ -0,0 +1,158 @@
+.. role:: raw-html(raw)
+   :format: html
+
+
+Cream of the Crop: Distilling Prioritized Paths For One-Shot Neural Architecture Search
+=======================================================================================
+
+ **`[Paper] <https://papers.nips.cc/paper/2020/file/d072677d210ac4c03ba046120f0802ec-Paper.pdf>`__ `[Models-Google Drive] <https://drive.google.com/drive/folders/1NLGAbBF9bA1IUAxKlk2VjgRXhr6RHvRW?usp=sharing>`__\ `[Models-Baidu Disk (PWD: wqw6)] <https://pan.baidu.com/s/1TqQNm2s14oEdyNPimw3T9g>`__ `[BibTex] <https://scholar.googleusercontent.com/scholar.bib?q=info:ICWVXc_SsKAJ:scholar.google.com/&output=citation&scisdr=CgUmooXfEMfTi0cV5aU:AAGBfm0AAAAAX7sQ_aXoamdKRaBI12tAVN8REq1VKNwM&scisig=AAGBfm0AAAAAX7sQ_RdYtp6BSro3zgbXVJU2MCgsG730&scisf=4&ct=citation&cd=-1&hl=ja>`__**   :raw-html:`<br/>`
+
+In this work, we present a simple yet effective architecture distillation method. The central idea is that subnetworks can learn collaboratively and teach each other throughout the training process, aiming to boost the convergence of individual models. We introduce the concept of prioritized path, which refers to the architecture candidates exhibiting superior performance during training. Distilling knowledge from the prioritized paths is able to boost the training of subnetworks. Since the prioritized paths are changed on the fly depending on their performance and complexity, the final obtained paths are the cream of the crop. The discovered architectures achieve superior performance compared to the recent `MobileNetV3 <https://arxiv.org/abs/1905.02244>`__ and `EfficientNet <https://arxiv.org/abs/1905.11946>`__ families under aligned settings.
+
+:raw-html:`<div ><img src="https://github.com/microsoft/Cream/blob/main/demo/intro.jpg" width="800"/></div>`
+Reproduced Results
+------------------
+
+Top-1 Accuracy on ImageNet. The top-1 accuracy of Cream search algorithm surpasses MobileNetV3 and EfficientNet-B0/B1 on ImageNet.
+The training with 16 Gpus is a little bit superior than 8 Gpus, as below.
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Model (M Flops)
+     - 8Gpus
+     - 16Gpus
+   * - 14M
+     - 53.7
+     - 53.8
+   * - 43M
+     - 65.8
+     - 66.5
+   * - 114M
+     - 72.1
+     - 72.8
+   * - 287M
+     - 76.7
+     - 77.6
+   * - 481M
+     - 78.9
+     - 79.2
+   * - 604M
+     - 79.4
+     - 80.0
+
+
+
+.. raw:: html
+
+   <table style="border: none">
+       <th><img src="./../../img/cream_flops100.jpg" alt="drawing" width="400"/></th>
+       <th><img src="./../../img/cream_flops600.jpg" alt="drawing" width="400"/></th>
+   </table>
+
+
+Examples
+--------
+
+`Example code <https://github.com/microsoft/nni/tree/master/examples/nas/cream>`__
+
+Please run the following scripts in the example folder.
+
+Data Preparation
+----------------
+
+You need to first download the `ImageNet-2012 <http://www.image-net.org/>`__ to the folder ``./data/imagenet`` and move the validation set to the subfolder ``./data/imagenet/val``. To move the validation set, you cloud use the following script: https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh 
+
+Put the imagenet data in ``./data``. It should be like following:
+
+.. code-block:: bash
+
+   ./data/imagenet/train
+   ./data/imagenet/val
+   ...
+
+Quick Start
+-----------
+
+I. Search
+^^^^^^^^^
+
+First, build environments for searching.
+
+.. code-block:: bash
+
+   pip install -r ./requirements
+
+   git clone https://github.com/NVIDIA/apex.git
+   cd apex
+   python setup.py install --cpp_ext --cuda_ext
+
+To search for an architecture, you need to configure the parameters ``FLOPS_MINIMUM`` and ``FLOPS_MAXIMUM`` to specify the desired model flops, such as [0,600]MB flops. You can specify the flops interval by changing these two parameters in ``./configs/train.yaml``
+
+.. code-block:: bash
+
+   FLOPS_MINIMUM: 0 # Minimum Flops of Architecture
+   FLOPS_MAXIMUM: 600 # Maximum Flops of Architecture
+
+For example, if you expect to search an architecture with model flops <= 200M, please set the ``FLOPS_MINIMUM`` and ``FLOPS_MAXIMUM`` to be ``0`` and ``200``.
+
+After you specify the flops of the architectures you would like to search, you can search an architecture now by running:
+
+.. code-block:: bash
+
+   python -m torch.distributed.launch --nproc_per_node=8 ./train.py --cfg ./configs/train.yaml
+
+The searched architectures need to be retrained and obtain the final model. The final model is saved in ``.pth.tar`` format. Retraining code will be released soon.
+
+II. Retrain
+^^^^^^^^^^^
+
+To train searched architectures, you need to configure the parameter ``MODEL_SELECTION`` to specify the model Flops. To specify which model to train, you should add ``MODEL_SELECTION`` in ``./configs/retrain.yaml``. You can select one from [14,43,112,287,481,604], which stands for different Flops(MB).
+
+.. code-block:: bash
+
+   MODEL_SELECTION: 43 # Retrain 43m model
+   MODEL_SELECTION: 481 # Retrain 481m model
+   ......
+
+To train random architectures, you need specify ``MODEL_SELECTION`` to ``-1`` and configure the parameter ``INPUT_ARCH``\ :
+
+.. code-block:: bash
+
+   MODEL_SELECTION: -1 # Train random architectures
+   INPUT_ARCH: [[0], [3], [3, 3], [3, 1, 3], [3, 3, 3, 3], [3, 3, 3], [0]] # Random Architectures
+   ......
+
+After adding ``MODEL_SELECTION`` in ``./configs/retrain.yaml``\ , you need to use the following command to train the model.
+
+.. code-block:: bash
+
+   python -m torch.distributed.launch --nproc_per_node=8 ./retrain.py --cfg ./configs/retrain.yaml
+
+III. Test
+^^^^^^^^^
+
+To test our trained of models, you need to use ``MODEL_SELECTION`` in ``./configs/test.yaml`` to specify which model to test.
+
+.. code-block:: bash
+
+   MODEL_SELECTION: 43 # test 43m model
+   MODEL_SELECTION: 481 # test 470m model
+   ......
+
+After specifying the flops of the model, you need to write the path to the resume model in ``./test.sh``.
+
+.. code-block:: bash
+
+   RESUME_PATH: './43.pth.tar'
+   RESUME_PATH: './481.pth.tar'
+   ......
+
+We provide 14M/43M/114M/287M/481M/604M pretrained models in `google drive <https://drive.google.com/drive/folders/1CQjyBryZ4F20Rutj7coF8HWFcedApUn2>`__ or `[Models-Baidu Disk (password: wqw6)] <https://pan.baidu.com/s/1TqQNm2s14oEdyNPimw3T9g>`__ .
+
+After downloading the pretrained models and adding ``MODEL_SELECTION`` and ``RESUME_PATH`` in './configs/test.yaml', you need to use the following command to test the model.
+
+.. code-block:: bash
+
+   python -m torch.distributed.launch --nproc_per_node=8 ./test.py --cfg ./configs/test.yaml
diff --git a/docs/en_US/NAS/DARTS.rst b/docs/en_US/NAS/DARTS.rst
new file mode 100644
index 0000000000..021c554a4a
--- /dev/null
+++ b/docs/en_US/NAS/DARTS.rst
@@ -0,0 +1,69 @@
+DARTS
+=====
+
+Introduction
+------------
+
+The paper `DARTS: Differentiable Architecture Search <https://arxiv.org/abs/1806.09055>`__ addresses the scalability challenge of architecture search by formulating the task in a differentiable manner. Their method is based on the continuous relaxation of the architecture representation, allowing efficient search of the architecture using gradient descent.
+
+Authors' code optimizes the network weights and architecture weights alternatively in mini-batches. They further explore the possibility that uses second order optimization (unroll) instead of first order, to improve the performance.
+
+Implementation on NNI is based on the `official implementation <https://github.com/quark0/darts>`__ and a `popular 3rd-party repo <https://github.com/khanrc/pt.darts>`__. DARTS on NNI is designed to be general for arbitrary search space. A CNN search space tailored for CIFAR10, same as the original paper, is implemented as a use case of DARTS.
+
+Reproduction Results
+--------------------
+
+The above-mentioned example is meant to reproduce the results in the paper, we do experiments with first and second order optimization. Due to the time limit, we retrain *only the best architecture* derived from the search phase and we repeat the experiment *only once*. Our results is currently on par with the results reported in paper. We will add more results later when ready.
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - 
+     - In paper
+     - Reproduction
+   * - First order (CIFAR10)
+     - 3.00 +/- 0.14
+     - 2.78
+   * - Second order (CIFAR10)
+     - 2.76 +/- 0.09
+     - 2.80
+
+
+Examples
+--------
+
+CNN Search Space
+^^^^^^^^^^^^^^^^
+
+:githublink:`Example code <examples/nas/darts>`
+
+.. code-block:: bash
+
+   # In case NNI code is not cloned. If the code is cloned already, ignore this line and enter code folder.
+   git clone https://github.com/Microsoft/nni.git
+
+   # search the best architecture
+   cd examples/nas/darts
+   python3 search.py
+
+   # train the best architecture
+   python3 retrain.py --arc-checkpoint ./checkpoints/epoch_49.json
+
+Reference
+---------
+
+PyTorch
+^^^^^^^
+
+..  autoclass:: nni.algorithms.nas.pytorch.darts.DartsTrainer
+    :members:
+
+..  autoclass:: nni.algorithms.nas.pytorch.darts.DartsMutator
+    :members:
+
+Limitations
+-----------
+
+
+* DARTS doesn't support DataParallel and needs to be customized in order to support DistributedDataParallel.
diff --git a/docs/en_US/NAS/ENAS.rst b/docs/en_US/NAS/ENAS.rst
new file mode 100644
index 0000000000..4ee0d03573
--- /dev/null
+++ b/docs/en_US/NAS/ENAS.rst
@@ -0,0 +1,46 @@
+ENAS
+====
+
+Introduction
+------------
+
+The paper `Efficient Neural Architecture Search via Parameter Sharing <https://arxiv.org/abs/1802.03268>`__ uses parameter sharing between child models to accelerate the NAS process. In ENAS, a controller learns to discover neural network architectures by searching for an optimal subgraph within a large computational graph. The controller is trained with policy gradient to select a subgraph that maximizes the expected reward on the validation set. Meanwhile the model corresponding to the selected subgraph is trained to minimize a canonical cross entropy loss.
+
+Implementation on NNI is based on the `official implementation in Tensorflow <https://github.com/melodyguan/enas>`__\ , including a general-purpose Reinforcement-learning controller and a trainer that trains target network and this controller alternatively. Following paper, we have also implemented macro and micro search space on CIFAR10 to demonstrate how to use these trainers. Since code to train from scratch on NNI is not ready yet, reproduction results are currently unavailable.
+
+Examples
+--------
+
+CIFAR10 Macro/Micro Search Space
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+:githublink:`Example code <examples/nas/enas>`
+
+.. code-block:: bash
+
+   # In case NNI code is not cloned. If the code is cloned already, ignore this line and enter code folder.
+   git clone https://github.com/Microsoft/nni.git
+
+   # search the best architecture
+   cd examples/nas/enas
+
+   # search in macro search space
+   python3 search.py --search-for macro
+
+   # search in micro search space
+   python3 search.py --search-for micro
+
+   # view more options for search
+   python3 search.py -h
+
+Reference
+---------
+
+PyTorch
+^^^^^^^
+
+..  autoclass:: nni.algorithms.nas.pytorch.enas.EnasTrainer
+    :members:
+
+..  autoclass:: nni.algorithms.nas.pytorch.enas.EnasMutator
+    :members:
diff --git a/docs/en_US/NAS/NasGuide.rst b/docs/en_US/NAS/NasGuide.rst
new file mode 100644
index 0000000000..45475c686a
--- /dev/null
+++ b/docs/en_US/NAS/NasGuide.rst
@@ -0,0 +1,88 @@
+One-shot NAS algorithms
+=======================
+
+Besides `classic NAS algorithms <./ClassicNas.rst>`__\ , users also apply more advanced one-shot NAS algorithms to find better models from a search space. There are lots of related works about one-shot NAS algorithms, such as `SMASH <https://arxiv.org/abs/1708.05344>`__\ , `ENAS <https://arxiv.org/abs/1802.03268>`__\ , `DARTS <https://arxiv.org/abs/1808.05377>`__\ , `FBNet <https://arxiv.org/abs/1812.03443>`__\ , `ProxylessNAS <https://arxiv.org/abs/1812.00332>`__\ , `SPOS <https://arxiv.org/abs/1904.00420>`__\ , `Single-Path NAS <https://arxiv.org/abs/1904.02877>`__\ ,  `Understanding One-shot <http://proceedings.mlr.press/v80/bender18a>`__ and `GDAS <https://arxiv.org/abs/1910.04465>`__. One-shot NAS algorithms usually build a supernet containing every candidate in the search space as its subnetwork, and in each step, a subnetwork or combination of several subnetworks is trained.
+
+Currently, several one-shot NAS methods are supported on NNI. For example, ``DartsTrainer``\ , which uses SGD to train architecture weights and model weights iteratively, and ``ENASTrainer``\ , which `uses a controller to train the model <https://arxiv.org/abs/1802.03268>`__. New and more efficient NAS trainers keep emerging in research community and some will be implemented in future releases of NNI.
+
+Search with One-shot NAS Algorithms
+-----------------------------------
+
+Each one-shot NAS algorithm implements a trainer, for which users can find usage details in the description of each algorithm. Here is a simple example, demonstrating how users can use ``EnasTrainer``.
+
+.. code-block:: python
+
+   # this is exactly same as traditional model training
+   model = Net()
+   dataset_train = CIFAR10(root="./data", train=True, download=True, transform=train_transform)
+   dataset_valid = CIFAR10(root="./data", train=False, download=True, transform=valid_transform)
+   criterion = nn.CrossEntropyLoss()
+   optimizer = torch.optim.SGD(model.parameters(), 0.05, momentum=0.9, weight_decay=1.0E-4)
+
+   # use NAS here
+   def top1_accuracy(output, target):
+       # this is the function that computes the reward, as required by ENAS algorithm
+       batch_size = target.size(0)
+       _, predicted = torch.max(output.data, 1)
+       return (predicted == target).sum().item() / batch_size
+
+   def metrics_fn(output, target):
+       # metrics function receives output and target and computes a dict of metrics
+       return {"acc1": top1_accuracy(output, target)}
+
+   from nni.algorithms.nas.pytorch import enas
+   trainer = enas.EnasTrainer(model,
+                              loss=criterion,
+                              metrics=metrics_fn,
+                              reward_function=top1_accuracy,
+                              optimizer=optimizer,
+                              batch_size=128
+                              num_epochs=10,  # 10 epochs
+                              dataset_train=dataset_train,
+                              dataset_valid=dataset_valid,
+                              log_frequency=10)  # print log every 10 steps
+   trainer.train()  # training
+   trainer.export(file="model_dir/final_architecture.json")  # export the final architecture to file
+
+``model`` is the one with `user defined search space <./WriteSearchSpace.rst>`__. Then users should prepare training data and model evaluation metrics. To search from the defined search space, a one-shot algorithm is instantiated, called trainer (e.g., EnasTrainer). The trainer exposes a few arguments that you can customize. For example, the loss function, the metrics function, the optimizer, and the datasets. These should satisfy most usage requirements and we do our best to make sure our built-in trainers work on as many models, tasks, and datasets as possible.
+
+**Note that** when using one-shot NAS algorithms, there is no need to start an NNI experiment. Users can directly run this Python script (i.e., ``train.py``\ ) through ``python3 train.py`` without ``nnictl``. After training, users can export the best one of the found models through ``trainer.export()``.
+
+Each trainer in NNI has its targeted scenario and usage. Some trainers have the assumption that the task is a classification task; some trainers might have a different definition of "epoch" (e.g., an ENAS epoch = some child steps + some controller steps). Most trainers do not have support for distributed training: they won't wrap your model with ``DataParallel`` or ``DistributedDataParallel`` to do that. So after a few tryouts, if you want to actually use the trainers on your very customized applications, you might need to `customize your trainer <./Advanced.rst#extend-the-ability-of-one-shot-trainers>`__.
+
+Furthermore, one-shot NAS can be visualized with our NAS UI. `See more details. <./Visualization.rst>`__
+
+Retrain with Exported Architecture
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+After the search phase, it's time to train the found architecture. Unlike many open-source NAS algorithms who write a whole new model specifically for retraining. We found that the search model and retraining model are usually very similar, and therefore you can construct your final model with the exact same model code. For example
+
+.. code-block:: python
+
+   model = Net()
+   apply_fixed_architecture(model, "model_dir/final_architecture.json")
+
+The JSON is simply a mapping from mutable keys to choices. Choices can be expressed in:
+
+
+* A string: select the candidate with corresponding name.
+* A number: select the candidate with corresponding index.
+* A list of string: select the candidates with corresponding names.
+* A list of number: select the candidates with corresponding indices.
+* A list of boolean values: a multi-hot array.
+
+For example,
+
+.. code-block:: json
+
+   {
+       "LayerChoice1": "conv5x5",
+       "LayerChoice2": 6,
+       "InputChoice3": ["layer1", "layer3"],
+       "InputChoice4": [1, 2],
+       "InputChoice5": [false, true, false, false, true]
+   }
+
+After applying, the model is then fixed and ready for final training. The model works as a single model, and unused parameters and modules are pruned.
+
+Also, refer to `DARTS <./DARTS.rst>`__ for code exemplifying retraining.
diff --git a/docs/en_US/NAS/NasReference.rst b/docs/en_US/NAS/NasReference.rst
new file mode 100644
index 0000000000..6df2be425b
--- /dev/null
+++ b/docs/en_US/NAS/NasReference.rst
@@ -0,0 +1,99 @@
+NAS Reference
+=============
+
+.. contents::
+
+Mutables
+--------
+
+..  autoclass:: nni.nas.pytorch.mutables.Mutable
+    :members:
+
+..  autoclass:: nni.nas.pytorch.mutables.LayerChoice
+    :members:
+
+..  autoclass:: nni.nas.pytorch.mutables.InputChoice
+    :members:
+
+..  autoclass:: nni.nas.pytorch.mutables.MutableScope
+    :members:
+
+Utilities
+^^^^^^^^^
+
+..  autofunction:: nni.nas.pytorch.utils.global_mutable_counting
+
+Mutators
+--------
+
+..  autoclass:: nni.nas.pytorch.base_mutator.BaseMutator
+    :members:
+
+..  autoclass:: nni.nas.pytorch.mutator.Mutator
+    :members:
+
+Random Mutator
+^^^^^^^^^^^^^^
+
+..  autoclass:: nni.algorithms.nas.pytorch.random.RandomMutator
+    :members:
+
+Utilities
+^^^^^^^^^
+
+..  autoclass:: nni.nas.pytorch.utils.StructuredMutableTreeNode
+    :members:
+
+Trainers
+--------
+
+Trainer
+^^^^^^^
+
+..  autoclass:: nni.nas.pytorch.base_trainer.BaseTrainer
+    :members:
+
+..  autoclass:: nni.nas.pytorch.trainer.Trainer
+    :members:
+
+Retrain
+^^^^^^^
+
+..  autofunction:: nni.nas.pytorch.fixed.apply_fixed_architecture
+
+..  autoclass:: nni.nas.pytorch.fixed.FixedArchitecture
+    :members:
+
+Distributed NAS
+^^^^^^^^^^^^^^^
+
+..  autofunction:: nni.algorithms.nas.pytorch.classic_nas.get_and_apply_next_architecture
+
+..  autoclass:: nni.algorithms.nas.pytorch.classic_nas.mutator.ClassicMutator
+    :members:
+
+Callbacks
+^^^^^^^^^
+
+..  autoclass:: nni.nas.pytorch.callbacks.Callback
+    :members:
+
+..  autoclass:: nni.nas.pytorch.callbacks.LRSchedulerCallback
+    :members:
+
+..  autoclass:: nni.nas.pytorch.callbacks.ArchitectureCheckpoint
+    :members:
+
+..  autoclass:: nni.nas.pytorch.callbacks.ModelCheckpoint
+    :members:
+
+Utilities
+^^^^^^^^^
+
+..  autoclass:: nni.nas.pytorch.utils.AverageMeterGroup
+    :members:
+
+..  autoclass:: nni.nas.pytorch.utils.AverageMeter
+    :members:
+
+..  autofunction:: nni.nas.pytorch.utils.to_device
diff --git a/docs/en_US/NAS/Overview.rst b/docs/en_US/NAS/Overview.rst
new file mode 100644
index 0000000000..3583816f5d
--- /dev/null
+++ b/docs/en_US/NAS/Overview.rst
@@ -0,0 +1,112 @@
+Neural Architecture Search (NAS) on NNI
+=======================================
+
+.. contents::
+
+Overview
+--------
+
+Automatic neural architecture search is taking an increasingly important role in finding better models. Recent research has proved the feasibility of automatic NAS and has lead to models that beat many manually designed and tuned models. Some representative works are `NASNet <https://arxiv.org/abs/1707.07012>`__\ , `ENAS <https://arxiv.org/abs/1802.03268>`__\ , `DARTS <https://arxiv.org/abs/1806.09055>`__\ , `Network Morphism <https://arxiv.org/abs/1806.10282>`__\ , and `Evolution <https://arxiv.org/abs/1703.01041>`__. Further, new innovations keep emerging.
+
+However, it takes a great effort to implement NAS algorithms, and it's hard to reuse the code base of existing algorithms for new ones. To facilitate NAS innovations (e.g., the design and implementation of new NAS models, the comparison of different NAS models side-by-side, etc.), an easy-to-use and flexible programming interface is crucial.
+
+With this motivation, our ambition is to provide a unified architecture in NNI, accelerate innovations on NAS, and apply state-of-the-art algorithms to real-world problems faster.
+
+With the unified interface, there are two different modes for architecture search. `One <#supported-one-shot-nas-algorithms>`__ is the so-called one-shot NAS, where a super-net is built based on a search space and one-shot training is used to generate a good-performing child model. `The other <#supported-classic-nas-algorithms>`__ is the traditional search-based approach, where each child model within the search space runs as an independent trial. We call it classic NAS.
+
+NNI also provides dedicated `visualization tool <#nas-visualization>`__ for users to check the status of the neural architecture search process.
+
+Supported Classic NAS Algorithms
+--------------------------------
+
+The procedure of classic NAS algorithms is similar to hyper-parameter tuning, users use ``nnictl`` to start experiments and each model runs as a trial. The difference is that search space file is automatically generated from user model (with search space in it) by running ``nnictl ss_gen``. The following table listed supported tuning algorihtms for classic NAS mode. More algorihtms will be supported in future release.
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name
+     - Brief Introduction of Algorithm
+   * - :githublink:`Random Search <examples/tuners/random_nas_tuner>`
+     - Randomly pick a model from search space
+   * - `PPO Tuner </Tuner/BuiltinTuner.html#PPOTuner>`__
+     - PPO Tuner is a Reinforcement Learning tuner based on PPO algorithm. `Reference Paper <https://arxiv.org/abs/1707.06347>`__
+
+
+Please refer to `here <ClassicNas.rst>`__ for the usage of classic NAS algorithms.
+
+Supported One-shot NAS Algorithms
+---------------------------------
+
+NNI currently supports the one-shot NAS algorithms listed below and is adding more. Users can reproduce an algorithm or use it on their own dataset. We also encourage users to implement other algorithms with `NNI API <#use-nni-api>`__\ , to benefit more people.
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name
+     - Brief Introduction of Algorithm
+   * - `ENAS </NAS/ENAS.html>`__
+     - `Efficient Neural Architecture Search via Parameter Sharing <https://arxiv.org/abs/1802.03268>`__. In ENAS, a controller learns to discover neural network architectures by searching for an optimal subgraph within a large computational graph. It uses parameter sharing between child models to achieve fast speed and excellent performance.
+   * - `DARTS </NAS/DARTS.html>`__
+     - `DARTS: Differentiable Architecture Search <https://arxiv.org/abs/1806.09055>`__ introduces a novel algorithm for differentiable network architecture search on bilevel optimization.
+   * - `P-DARTS </NAS/PDARTS.html>`__
+     - `Progressive Differentiable Architecture Search: Bridging the Depth Gap between Search and Evaluation <https://arxiv.org/abs/1904.12760>`__ is based on DARTS. It introduces an efficient algorithm which allows the depth of searched architectures to grow gradually during the training procedure.
+   * - `SPOS </NAS/SPOS.html>`__
+     - `Single Path One-Shot Neural Architecture Search with Uniform Sampling <https://arxiv.org/abs/1904.00420>`__ constructs a simplified supernet trained with a uniform path sampling method and applies an evolutionary algorithm to efficiently search for the best-performing architectures.
+   * - `CDARTS </NAS/CDARTS.html>`__
+     - `Cyclic Differentiable Architecture Search <https://arxiv.org/abs/****>`__ builds a cyclic feedback mechanism between the search and evaluation networks. It introduces a cyclic differentiable architecture search framework which integrates the two networks into a unified architecture.
+   * - `ProxylessNAS </NAS/Proxylessnas.html>`__
+     - `ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware <https://arxiv.org/abs/1812.00332>`__. It removes proxy, directly learns the architectures for large-scale target tasks and target hardware platforms.
+   * - `TextNAS </NAS/TextNAS.html>`__
+     - `TextNAS: A Neural Architecture Search Space tailored for Text Representation <https://arxiv.org/pdf/1912.10729.pdf>`__. It is a neural architecture search algorithm tailored for text representation.
+
+
+One-shot algorithms run **standalone without nnictl**. NNI supports both PyTorch and Tensorflow 2.X.
+
+Here are some common dependencies to run the examples. PyTorch needs to be above 1.2 to use ``BoolTensor``.
+
+
+* tensorboard
+* PyTorch 1.2+
+* git
+
+Please refer to `here <NasGuide.rst>`__ for the usage of one-shot NAS algorithms.
+
+One-shot NAS can be visualized with our visualization tool. Learn more details `here <./Visualization.rst>`__.
+
+Search Space Zoo
+----------------
+
+NNI provides some predefined search space which can be easily reused. By stacking the extracted cells, user can quickly reproduce those NAS models.
+
+Search Space Zoo contains the following NAS cells:
+
+
+* `DartsCell <./SearchSpaceZoo.rst#DartsCell>`__
+* `ENAS micro <./SearchSpaceZoo.rst#ENASMicroLayer>`__
+* `ENAS macro <./SearchSpaceZoo.rst#ENASMacroLayer>`__
+* `NAS Bench 201 <./SearchSpaceZoo.rst#nas-bench-201>`__
+
+Using NNI API to Write Your Search Space
+----------------------------------------
+
+The programming interface of designing and searching a model is often demanded in two scenarios.
+
+
+#. When designing a neural network, there may be multiple operation choices on a layer, sub-model, or connection, and it's undetermined which one or combination performs best. So, it needs an easy way to express the candidate layers or sub-models.
+#. When applying NAS on a neural network, it needs a unified way to express the search space of architectures, so that it doesn't need to update trial code for different search algorithms.
+
+For using NNI NAS, we suggest users to first go through `the tutorial of NAS API for building search space <./WriteSearchSpace.rst>`__.
+
+NAS Visualization
+-----------------
+
+To help users track the process and status of how the model is searched under specified search space, we developed a visualization tool. It visualizes search space as a super-net and shows importance of subnets and layers/operations, as well as how the importance changes along with the search process. Please refer to `the document of NAS visualization <./Visualization.rst>`__ for how to use it.
+
+Reference and Feedback
+----------------------
+
+
+* To `report a bug <https://github.com/microsoft/nni/issues/new?template=bug-report.rst>`__ for this feature in GitHub;
+* To `file a feature or improvement request <https://github.com/microsoft/nni/issues/new?template=enhancement.rst>`__ for this feature in GitHub.
diff --git a/docs/en_US/NAS/PDARTS.rst b/docs/en_US/NAS/PDARTS.rst
new file mode 100644
index 0000000000..ae4d5daa06
--- /dev/null
+++ b/docs/en_US/NAS/PDARTS.rst
@@ -0,0 +1,20 @@
+P-DARTS
+=======
+
+Examples
+--------
+
+:githublink:`Example code <examples/nas/pdarts>`
+
+.. code-block:: bash
+
+   # In case NNI code is not cloned. If the code is cloned already, ignore this line and enter code folder.
+   git clone https://github.com/Microsoft/nni.git
+
+   # search the best architecture
+   cd examples/nas/pdarts
+   python3 search.py
+
+   # train the best architecture, it's the same progress as darts.
+   cd ../darts
+   python3 retrain.py --arc-checkpoint ../pdarts/checkpoints/epoch_2.json
diff --git a/docs/en_US/NAS/Proxylessnas.rst b/docs/en_US/NAS/Proxylessnas.rst
new file mode 100644
index 0000000000..56857fb2ab
--- /dev/null
+++ b/docs/en_US/NAS/Proxylessnas.rst
@@ -0,0 +1,74 @@
+ProxylessNAS on NNI
+===================
+
+Introduction
+------------
+
+The paper `ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware <https://arxiv.org/pdf/1812.00332.pdf>`__ removes proxy, it directly learns the architectures for large-scale target tasks and target hardware platforms. They address high memory consumption issue of differentiable NAS and reduce the computational cost to the same level of regular training while still allowing a large candidate set. Please refer to the paper for the details.
+
+Usage
+-----
+
+To use ProxylessNAS training/searching approach, users need to specify search space in their model using `NNI NAS interface <NasGuide.rst>`__\ , e.g., ``LayerChoice``\ , ``InputChoice``. After defining and instantiating the model, the following work can be leaved to ProxylessNasTrainer by instantiating the trainer and passing the model to it.
+
+.. code-block:: python
+
+   trainer = ProxylessNasTrainer(model,
+                                 model_optim=optimizer,
+                                 train_loader=data_provider.train,
+                                 valid_loader=data_provider.valid,
+                                 device=device,
+                                 warmup=True,
+                                 ckpt_path=args.checkpoint_path,
+                                 arch_path=args.arch_path)
+   trainer.train()
+   trainer.export(args.arch_path)
+
+The complete example code can be found :githublink:`here <examples/nas/proxylessnas>`.
+
+**Input arguments of ProxylessNasTrainer**
+
+
+* **model** (*PyTorch model, required*\ ) - The model that users want to tune/search. It has mutables to specify search space.
+* **model_optim** (*PyTorch optimizer, required*\ ) - The optimizer users want to train the model.
+* **device** (*device, required*\ ) - The devices that users provide to do the train/search. The trainer applies data parallel on the model for users.
+* **train_loader** (*PyTorch data loader, required*\ ) - The data loader for training set.
+* **valid_loader** (*PyTorch data loader, required*\ ) - The data loader for validation set.
+* **label_smoothing** (*float, optional, default = 0.1*\ ) - The degree of label smoothing.
+* **n_epochs** (*int, optional, default = 120*\ ) - The number of epochs to train/search.
+* **init_lr** (*float, optional, default = 0.025*\ ) - The initial learning rate for training the model.
+* **binary_mode** (*'two', 'full', or 'full_v2', optional, default = 'full_v2'*\ ) - The forward/backward mode for the binary weights in mutator. 'full' means forward all the candidate ops, 'two' means only forward two sampled ops, 'full_v2' means recomputing the inactive ops during backward.
+* **arch_init_type** (*'normal' or 'uniform', optional, default = 'normal'*\ ) - The way to init architecture parameters.
+* **arch_init_ratio** (*float, optional, default = 1e-3*\ ) - The ratio to init architecture parameters.
+* **arch_optim_lr** (*float, optional, default = 1e-3*\ ) - The learning rate of the architecture parameters optimizer.
+* **arch_weight_decay** (*float, optional, default = 0*\ ) - Weight decay of the architecture parameters optimizer.
+* **grad_update_arch_param_every** (*int, optional, default = 5*\ ) - Update architecture weights every this number of minibatches.
+* **grad_update_steps** (*int, optional, default = 1*\ ) - During each update of architecture weights, the number of steps to train architecture weights.
+* **warmup** (*bool, optional, default = True*\ ) - Whether to do warmup.
+* **warmup_epochs** (*int, optional, default = 25*\ ) - The number of epochs to do during warmup.
+* **arch_valid_frequency** (*int, optional, default = 1*\ ) - The frequency of printing validation result.
+* **load_ckpt** (*bool, optional, default = False*\ ) - Whether to load checkpoint.
+* **ckpt_path** (*str, optional, default = None*\ ) - checkpoint path, if load_ckpt is True, ckpt_path cannot be None.
+* **arch_path** (*str, optional, default = None*\ ) - The path to store chosen architecture.
+
+Implementation
+--------------
+
+The implementation on NNI is based on the `offical implementation <https://github.com/mit-han-lab/ProxylessNAS>`__. The official implementation supports two training approaches: gradient descent and RL based, and support different targeted hardware, including 'mobile', 'cpu', 'gpu8', 'flops'. In our current implementation on NNI, gradient descent training approach is supported, but has not supported different hardwares. The complete support is ongoing.
+
+Below we will describe implementation details. Like other one-shot NAS algorithms on NNI, ProxylessNAS is composed of two parts: *search space* and *training approach*. For users to flexibly define their own search space and use built-in ProxylessNAS training approach, we put the specified search space in :githublink:`example code <examples/nas/proxylessnas>` using :githublink:`NNI NAS interface <src/sdk/pynni/nni/nas/pytorch/proxylessnas>`.
+
+
+.. image:: ../../img/proxylessnas.png
+   :target: ../../img/proxylessnas.png
+   :alt: 
+
+
+ProxylessNAS training approach is composed of ProxylessNasMutator and ProxylessNasTrainer. ProxylessNasMutator instantiates MixedOp for each mutable (i.e., LayerChoice), and manage architecture weights in MixedOp. **For DataParallel**\ , architecture weights should be included in user model. Specifically, in ProxylessNAS implementation, we add MixedOp to the corresponding mutable (i.e., LayerChoice) as a member variable. The mutator also exposes two member functions, i.e., ``arch_requires_grad``\ , ``arch_disable_grad``\ , for the trainer to control the training of architecture weights.
+
+ProxylessNasMutator also implements the forward logic of the mutables (i.e., LayerChoice).
+
+Reproduce Results
+-----------------
+
+To reproduce the result, we first run the search, we found that though it runs many epochs the chosen architecture converges at the first several epochs. This is probably induced by hyper-parameters or the implementation, we are working on it. The test accuracy of the found architecture is top1: 72.31, top5: 90.26.
diff --git a/docs/en_US/NAS/SPOS.rst b/docs/en_US/NAS/SPOS.rst
new file mode 100644
index 0000000000..86bf901afd
--- /dev/null
+++ b/docs/en_US/NAS/SPOS.rst
@@ -0,0 +1,124 @@
+Single Path One-Shot (SPOS)
+===========================
+
+Introduction
+------------
+
+Proposed in `Single Path One-Shot Neural Architecture Search with Uniform Sampling <https://arxiv.org/abs/1904.00420>`__ is a one-shot NAS method that addresses the difficulties in training One-Shot NAS models by constructing a simplified supernet trained with an uniform path sampling method, so that all underlying architectures (and their weights) get trained fully and equally. An evolutionary algorithm is then applied to efficiently search for the best-performing architectures without any fine tuning.
+
+Implementation on NNI is based on `official repo <https://github.com/megvii-model/SinglePathOneShot>`__. We implement a trainer that trains the supernet and a evolution tuner that leverages the power of NNI framework that speeds up the evolutionary search phase. We have also shown 
+
+Examples
+--------
+
+Here is a use case, which is the search space in paper, and the way to use flops limit to perform uniform sampling.
+
+:githublink:`Example code <examples/nas/spos>`
+
+Requirements
+^^^^^^^^^^^^
+
+NVIDIA DALI >= 0.16 is needed as we use DALI to accelerate the data loading of ImageNet. `Installation guide <https://docs.nvidia.com/deeplearning/sdk/dali-developer-guide/docs/installation.html>`__
+
+Download the flops lookup table from `here <https://1drv.ms/u/s!Am_mmG2-KsrnajesvSdfsq_cN48?e=aHVppN>`__ (maintained by `Megvii <https://github.com/megvii-model>`__\ ).
+Put ``op_flops_dict.pkl`` and ``checkpoint-150000.pth.tar`` (if you don't want to retrain the supernet) under ``data`` directory.
+
+Prepare ImageNet in the standard format (follow the script `here <https://gist.github.com/BIGBALLON/8a71d225eff18d88e469e6ea9b39cef4>`__\ ). Linking it to ``data/imagenet`` will be more convenient.
+
+After preparation, it's expected to have the following code structure:
+
+.. code-block:: bash
+
+   spos
+   ├── architecture_final.json
+   ├── blocks.py
+   ├── config_search.yml
+   ├── data
+   │   ├── imagenet
+   │   │   ├── train
+   │   │   └── val
+   │   └── op_flops_dict.pkl
+   ├── dataloader.py
+   ├── network.py
+   ├── readme.md
+   ├── scratch.py
+   ├── supernet.py
+   ├── tester.py
+   ├── tuner.py
+   └── utils.py
+
+Step 1. Train Supernet
+^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: bash
+
+   python supernet.py
+
+Will export the checkpoint to ``checkpoints`` directory, for the next step.
+
+NOTE: The data loading used in the official repo is `slightly different from usual <https://github.com/megvii-model/SinglePathOneShot/issues/5>`__\ , as they use BGR tensor and keep the values between 0 and 255 intentionally to align with their own DL framework. The option ``--spos-preprocessing`` will simulate the behavior used originally and enable you to use the checkpoints pretrained.
+
+Step 2. Evolution Search
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Single Path One-Shot leverages evolution algorithm to search for the best architecture. The tester, which is responsible for testing the sampled architecture, recalculates all the batch norm for a subset of training images, and evaluates the architecture on the full validation set.
+
+In order to make the tuner aware of the flops limit and have the ability to calculate the flops, we created a new tuner called ``EvolutionWithFlops`` in ``tuner.py``\ , inheriting the tuner in SDK.
+
+To have a search space ready for NNI framework, first run
+
+.. code-block:: bash
+
+   nnictl ss_gen -t "python tester.py"
+
+This will generate a file called ``nni_auto_gen_search_space.json``\ , which is a serialized representation of your search space.
+
+By default, it will use ``checkpoint-150000.pth.tar`` downloaded previously. In case you want to use the checkpoint trained by yourself from the last step, specify ``--checkpoint`` in the command in ``config_search.yml``.
+
+Then search with evolution tuner.
+
+.. code-block:: bash
+
+   nnictl create --config config_search.yml
+
+The final architecture exported from every epoch of evolution can be found in ``checkpoints`` under the working directory of your tuner, which, by default, is ``$HOME/nni-experiments/your_experiment_id/log``.
+
+Step 3. Train from Scratch
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: bash
+
+   python scratch.py
+
+By default, it will use ``architecture_final.json``. This architecture is provided by the official repo (converted into NNI format). You can use any architecture (e.g., the architecture found in step 2) with ``--fixed-arc`` option.
+
+Reference
+---------
+
+PyTorch
+^^^^^^^
+
+..  autoclass:: nni.algorithms.nas.pytorch.spos.SPOSEvolution
+    :members:
+
+..  autoclass:: nni.algorithms.nas.pytorch.spos.SPOSSupernetTrainer
+    :members:
+
+..  autoclass:: nni.algorithms.nas.pytorch.spos.SPOSSupernetTrainingMutator
+    :members:
+
+Known Limitations
+-----------------
+
+
+* Block search only. Channel search is not supported yet.
+* Only GPU version is provided here.
+
+Current Reproduction Results
+----------------------------
+
+Reproduction is still undergoing. Due to the gap between official release and original paper, we compare our current results with official repo (our run) and paper.
+
+
+* Evolution phase is almost aligned with official repo. Our evolution algorithm shows a converging trend and reaches ~65% accuracy at the end of search. Nevertheless, this result is not on par with paper. For details, please refer to `this issue <https://github.com/megvii-model/SinglePathOneShot/issues/6>`__.
+* Retrain phase is not aligned. Our retraining code, which uses the architecture released by the authors, reaches 72.14% accuracy, still having a gap towards 73.61% by official release and 74.3% reported in original paper.
diff --git a/docs/en_US/NAS/SearchSpaceZoo.rst b/docs/en_US/NAS/SearchSpaceZoo.rst
new file mode 100644
index 0000000000..5f2fc87a15
--- /dev/null
+++ b/docs/en_US/NAS/SearchSpaceZoo.rst
@@ -0,0 +1,281 @@
+.. role:: raw-html(raw)
+   :format: html
+
+
+Search Space Zoo
+================
+
+DartsCell
+---------
+
+DartsCell is extracted from :githublink:`CNN model <examples/nas/darts>`. A DartsCell is a directed acyclic graph containing an ordered sequence of N nodes and each node stands for a latent representation (e.g. feature map in a convolutional network). Directed edges from Node 1 to Node 2 are associated with some operations that transform Node 1 and the result is stored on Node 2. The `Candidate operators <#predefined-operations-darts>`__ between nodes is predefined and unchangeable. One edge represents an operation that chosen from the predefined ones to be applied to the starting node of the edge. One cell contains two input nodes, a single output node, and other ``n_node`` nodes. The input nodes are defined as the cell outputs in the previous two layers. The output of the cell is obtained by applying a reduction operation (e.g. concatenation) to all the intermediate nodes. To make the search space continuous, the categorical choice of a particular operation is relaxed to a softmax over all possible operations. By adjusting the weight of softmax on every node, the operation with the highest probability is chosen to be part of the final structure. A CNN model can be formed by stacking several cells together, which builds a search space. Note that, in DARTS paper all cells in the model share the same structure.
+
+One structure in the Darts search space is shown below. Note that, NNI merges the last one of the four intermediate nodes and the output node.
+
+
+.. image:: ../../img/NAS_Darts_cell.svg
+   :target: ../../img/NAS_Darts_cell.svg
+   :alt: 
+
+
+The predefined operators are shown `here <#predefined-operations-darts>`__.
+
+..  autoclass:: nni.nas.pytorch.search_space_zoo.DartsCell
+    :members:
+
+Example code
+^^^^^^^^^^^^
+
+:githublink:`example code <examples/nas/search_space_zoo/darts_example.py>`
+
+.. code-block:: bash
+
+   git clone https://github.com/Microsoft/nni.git
+   cd nni/examples/nas/search_space_zoo
+   # search the best structure
+   python3 darts_example.py
+
+:raw-html:`<a name="predefined-operations-darts"></a>`
+
+Candidate operators
+^^^^^^^^^^^^^^^^^^^
+
+All supported operators for Darts are listed below.
+
+
+* 
+  MaxPool / AvgPool
+
+
+  * MaxPool: Call ``torch.nn.MaxPool2d``. This operation applies a 2D max pooling over all input channels. Its parameters ``kernel_size=3`` and ``padding=1`` are fixed. The pooling result will pass through a BatchNorm2d then return as the result.
+  * 
+    AvgPool: Call ``torch.nn.AvgPool2d``. This operation applies a 2D average pooling over all input channels. Its parameters ``kernel_size=3`` and ``padding=1`` are fixed. The pooling result will pass through a BatchNorm2d then return as the result.
+
+    MaxPool / AvgPool with ``kernel_size=3`` and ``padding=1`` followed by BatchNorm2d
+
+..  autoclass:: nni.nas.pytorch.search_space_zoo.darts_ops.PoolBN
+
+* 
+  SkipConnect
+
+    There is no operation between two nodes. Call ``torch.nn.Identity`` to forward what it gets to the output.
+
+* 
+  Zero operation
+
+    There is no connection between two nodes.
+
+* 
+  DilConv3x3 / DilConv5x5
+
+    :raw-html:`<a name="DilConv"></a>`\ DilConv3x3: (Dilated) depthwise separable Conv. It's a 3x3 depthwise convolution with ``C_in`` groups, followed by a 1x1 pointwise convolution. It reduces the amount of parameters. Input is first passed through relu, then DilConv and finally batchNorm2d. **Note that the operation is not Dilated Convolution, but we follow the convention in NAS papers to name it DilConv.** 3x3 DilConv has parameters ``kernel_size=3``\ , ``padding=1`` and 5x5 DilConv has parameters ``kernel_size=5``\ , ``padding=4``.
+
+  ..  autoclass:: nni.nas.pytorch.search_space_zoo.darts_ops.DilConv
+
+* 
+  SepConv3x3 / SepConv5x5
+
+    Composed of two DilConvs with fixed ``kernel_size=3``\ , ``padding=1`` or ``kernel_size=5``\ , ``padding=2`` sequentially.
+
+  ..  autoclass:: nni.nas.pytorch.search_space_zoo.darts_ops.SepConv
+
+ENASMicroLayer
+--------------
+
+This layer is extracted from the model designed :githublink:`here <examples/nas/enas>`. A model contains several blocks that share the same architecture. A block is made up of some normal layers and reduction layers, ``ENASMicroLayer`` is a unified implementation of the two types of layers. The only difference between the two layers is that reduction layers apply all operations with ``stride=2``.
+
+ENAS Micro employs a DAG with N nodes in one cell, where the nodes represent local computations, and the edges represent the flow of information between the N nodes. One cell contains two input nodes and a single output node. The following nodes choose two previous nodes as input and apply two operations from `predefined ones <#predefined-operations-enas>`__ then add them as the output of this node. For example, Node 4 chooses Node 1 and Node 3 as inputs then applies ``MaxPool`` and ``AvgPool`` on the inputs respectively, then adds and sums them as the output of Node 4. Nodes that are not served as input for any other node are viewed as the output of the layer. If there are multiple output nodes, the model will calculate the average of these nodes as the layer output.
+
+The ENAS micro search space is shown below.
+
+
+.. image:: ../../img/NAS_ENAS_micro.svg
+   :target: ../../img/NAS_ENAS_micro.svg
+   :alt: 
+ 
+
+The predefined operators can be seen `here <#predefined-operations-enas>`__.
+
+..  autoclass:: nni.nas.pytorch.search_space_zoo.ENASMicroLayer
+    :members:
+
+The Reduction Layer is made up of two Conv operations followed by BatchNorm, each of them will output ``C_out//2`` channels and concat them in channels as the output. The Convolution has ``kernel_size=1`` and ``stride=2``\ , and they perform alternate sampling on the input to reduce the resolution without loss of information. This layer is wrapped in ``ENASMicroLayer``.
+
+Example code
+^^^^^^^^^^^^
+
+:githublink:`example code <examples/nas/search_space_zoo/enas_micro_example.py>`
+
+.. code-block:: bash
+
+   git clone https://github.com/Microsoft/nni.git
+   cd nni/examples/nas/search_space_zoo
+   # search the best cell structure
+   python3 enas_micro_example.py
+
+:raw-html:`<a name="predefined-operations-enas"></a>`
+
+Candidate operators
+^^^^^^^^^^^^^^^^^^^
+
+All supported operators for ENAS micro search are listed below.
+
+
+* 
+  MaxPool / AvgPool
+
+
+  * MaxPool: Call ``torch.nn.MaxPool2d``. This operation applies a 2D max pooling over all input channels followed by BatchNorm2d. Its parameters are fixed to ``kernel_size=3``\ , ``stride=1`` and ``padding=1``.
+  * AvgPool: Call ``torch.nn.AvgPool2d``. This operation applies a 2D average pooling over all input channels followed by BatchNorm2d. Its parameters are fixed to ``kernel_size=3``\ , ``stride=1`` and ``padding=1``.
+
+..  autoclass:: nni.nas.pytorch.search_space_zoo.enas_ops.Pool
+
+* 
+  SepConv
+
+
+  * SepConvBN3x3: ReLU followed by a `DilConv <#DilConv>`__ and BatchNorm. Convolution parameters are ``kernel_size=3``\ , ``stride=1`` and ``padding=1``.
+  * 
+    SepConvBN5x5: Do the same operation as the previous one but it has different kernel sizes and paddings, which is set to 5 and 2 respectively.
+
+..  autoclass:: nni.nas.pytorch.search_space_zoo.enas_ops.SepConvBN
+
+* 
+  SkipConnect
+
+    Call ``torch.nn.Identity`` to connect directly to the next cell.
+
+ENASMacroLayer
+--------------
+
+In Macro search, the controller makes two decisions for each layer: i) the `operation <#macro-operations>`__ to perform on the result of the previous layer, ii) which the previous layer to connect to for SkipConnects. ENAS uses a controller to design the whole model architecture instead of one of its components. The output of operations is going to concat with the tensor of the chosen layer for SkipConnect. NNI provides `predefined operators <#macro-operations>`__ for macro search, which are listed in `Candidate operators <#macro-operations>`__.
+
+Part of one structure in the ENAS macro search space is shown below.
+
+
+.. image:: ../../img/NAS_ENAS_macro.svg
+   :target: ../../img/NAS_ENAS_macro.svg
+   :alt: 
+
+
+..  autoclass:: nni.nas.pytorch.search_space_zoo.ENASMacroLayer
+    :members:
+
+To describe the whole search space, NNI provides a model, which is built by stacking the layers.
+
+..  autoclass:: nni.nas.pytorch.search_space_zoo.ENASMacroGeneralModel
+    :members:
+
+Example code
+^^^^^^^^^^^^
+
+:githublink:`example code <examples/nas/search_space_zoo/enas_macro_example.py>`
+
+.. code-block:: bash
+
+   git clone https://github.com/Microsoft/nni.git
+   cd nni/examples/nas/search_space_zoo
+   # search the best cell structure
+   python3 enas_macro_example.py
+
+:raw-html:`<a name="macro-operations"></a>`
+
+Candidate operators
+^^^^^^^^^^^^^^^^^^^
+
+All supported operators for ENAS macro search are listed below.
+
+
+* 
+  ConvBranch
+
+    All input first passes into a StdConv, which is made up of a 1x1Conv followed by BatchNorm2d and ReLU. Then the intermediate result goes through one of the operations listed below. The final result is calculated through a BatchNorm2d and ReLU as post-procedure.
+
+
+  * Separable Conv3x3: If ``separable=True``\ , the cell will use `SepConv <#DilConv>`__ instead of normal Conv operation. SepConv's ``kernel_size=3``\ , ``stride=1`` and ``padding=1``.
+  * Separable Conv5x5: SepConv's ``kernel_size=5``\ , ``stride=1`` and ``padding=2``.
+  * Normal Conv3x3: If ``separable=False``\ , the cell will use a normal Conv operations with ``kernel_size=3``\ , ``stride=1`` and ``padding=1``.
+  * 
+    Normal Conv5x5: Conv's ``kernel_size=5``\ , ``stride=1`` and ``padding=2``.
+
+..  autoclass:: nni.nas.pytorch.search_space_zoo.enas_ops.ConvBranch
+
+* 
+  PoolBranch
+
+    All input first passes into a StdConv, which is made up of a 1x1Conv followed by BatchNorm2d and ReLU. Then the intermediate goes through pooling operation followed by BatchNorm.
+
+
+  * AvgPool: Call ``torch.nn.AvgPool2d``. This operation applies a 2D average pooling over all input channels. Its parameters are fixed to ``kernel_size=3``\ , ``stride=1`` and ``padding=1``.
+  * 
+    MaxPool: Call ``torch.nn.MaxPool2d``. This operation applies a 2D max pooling over all input channels. Its parameters are fixed to ``kernel_size=3``\ , ``stride=1`` and ``padding=1``.
+
+..  autoclass:: nni.nas.pytorch.search_space_zoo.enas_ops.PoolBranch
+
+NAS-Bench-201
+-------------
+
+NAS Bench 201 defines a unified search space, which is algorithm agnostic. The predefined skeleton consists of a stack of cells that share the same architecture. Every cell contains four nodes and a DAG is formed by connecting edges among them, where the node represents the sum of feature maps and the edge stands for an operation transforming a tensor from the source node to the target node. The predefined candidate operators can be found in `Candidate operators <#nas-bench-201-reference>`__.
+
+The search space of NAS Bench 201 is shown below.
+
+
+.. image:: ../../img/NAS_Bench_201.svg
+   :target: ../../img/NAS_Bench_201.svg
+   :alt: 
+
+
+..  autoclass:: nni.nas.pytorch.nasbench201.NASBench201Cell
+    :members:
+
+Example code
+^^^^^^^^^^^^
+
+:githublink:`example code <examples/nas/search_space_zoo/nas_bench_201.py>`
+
+.. code-block:: bash
+
+   # for structure searching
+   git clone https://github.com/Microsoft/nni.git
+   cd nni/examples/nas/search_space_zoo
+   python3 nas_bench_201.py
+
+:raw-html:`<a name="nas-bench-201-reference"></a>`
+
+Candidate operators
+^^^^^^^^^^^^^^^^^^^
+
+All supported operators for NAS Bench 201 are listed below.
+
+
+* 
+  AvgPool
+
+  If the number of input channels is not equal to the number of output channels, the input will first pass through a ``ReLUConvBN`` layer with ``kernel_size=1``\ , ``stride=1``\ , ``padding=0``\ , and ``dilation=0``.
+  Call ``torch.nn.AvgPool2d``. This operation applies a 2D average pooling over all input channels followed by BatchNorm2d. Its parameters are fixed to ``kernel_size=3`` and ``padding=1``.
+
+..  autoclass:: nni.nas.pytorch.nasbench201.nasbench201_ops.Pooling
+    :members:
+
+* 
+  Conv
+
+
+  * Conv1x1: Consist of a sequence of ReLU, ``nn.Cinv2d`` and BatchNorm. The Conv operation's parameter is fixed to ``kernal_size=1``\ , ``padding=0``\ , and ``dilation=1``.
+  * Conv3x3: Consist of a sequence of ReLU, ``nn.Cinv2d`` and BatchNorm. The Conv operation's parameter is fixed to ``kernal_size=3``\ , ``padding=1``\ , and ``dilation=1``.
+
+..  autoclass:: nni.nas.pytorch.nasbench201.nasbench201_ops.ReLUConvBN
+    :members:
+
+* 
+  SkipConnect
+
+  Call ``torch.nn.Identity`` to connect directly to the next cell.
+
+* 
+  Zeroize
+
+  Generate zero tensors indicating there is no connection from the source node to the target node.
+
+..  autoclass:: nni.nas.pytorch.nasbench201.nasbench201_ops.Zero
+    :members:
diff --git a/docs/en_US/NAS/TextNAS.rst b/docs/en_US/NAS/TextNAS.rst
new file mode 100644
index 0000000000..9bf9420f88
--- /dev/null
+++ b/docs/en_US/NAS/TextNAS.rst
@@ -0,0 +1,94 @@
+TextNAS
+=======
+
+Introduction
+------------
+
+This is the implementation of the TextNAS algorithm proposed in the paper `TextNAS: A Neural Architecture Search Space tailored for Text Representation <https://arxiv.org/pdf/1912.10729.pdf>`__. TextNAS is a neural architecture search algorithm tailored for text representation, more specifically, TextNAS is based on a novel search space consists of operators widely adopted to solve various NLP tasks, and TextNAS also supports multi-path ensemble within a single network to balance the width and depth of the architecture. 
+
+The search space of TextNAS contains: 
+
+.. code-block:: bash
+
+   * 1-D convolutional operator with filter size 1, 3, 5, 7 
+   * recurrent operator (bi-directional GRU) 
+   * self-attention operator
+   * pooling operator (max/average)
+
+
+Following the ENAS algorithm, TextNAS also utilizes parameter sharing to accelerate the search speed and adopts a reinforcement-learning controller for the architecture sampling and generation. Please refer to the paper for more details of TextNAS.
+
+Preparation
+-----------
+
+Prepare the word vectors and SST dataset, and organize them in data directory as shown below:
+
+.. code-block:: bash
+
+   textnas
+   ├── data
+   │   ├── sst
+   │   │   └── trees
+   │   │       ├── dev.txt
+   │   │       ├── test.txt
+   │   │       └── train.txt
+   │   └── glove.840B.300d.txt
+   ├── dataloader.py
+   ├── model.py
+   ├── ops.py
+   ├── README.md
+   ├── search.py
+   └── utils.py
+
+The following link might be helpful for finding and downloading the corresponding dataset:
+
+
+* `GloVe: Global Vectors for Word Representation <https://nlp.stanford.edu/projects/glove/>`__
+
+  * `glove.840B.300d.txt <http://nlp.stanford.edu/data/glove.840B.300d.zip>`__
+
+* `Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank <https://nlp.stanford.edu/sentiment/>`__
+
+  * `trainDevTestTrees_PTB.zip <https://nlp.stanford.edu/sentiment/trainDevTestTrees_PTB.zip>`__
+
+Examples
+--------
+
+Search Space
+^^^^^^^^^^^^
+
+:githublink:`Example code <examples/nas/textnas>`
+
+.. code-block:: bash
+
+   # In case NNI code is not cloned. If the code is cloned already, ignore this line and enter code folder.
+   git clone https://github.com/Microsoft/nni.git
+
+   # search the best architecture
+   cd examples/nas/textnas
+
+   # view more options for search
+   python3 search.py -h
+
+After each search epoch, 10 sampled architectures will be tested directly. Their performances are expected to be 40% - 42% after 10 epochs.
+
+By default, 20 sampled architectures will be exported into ``checkpoints`` directory for next step.
+
+retrain
+^^^^^^^
+
+.. code-block:: bash
+
+   # In case NNI code is not cloned. If the code is cloned already, ignore this line and enter code folder.
+   git clone https://github.com/Microsoft/nni.git
+
+   # search the best architecture
+   cd examples/nas/textnas
+
+   # default to retrain on sst-2
+   sh run_retrain.sh
+
+Reference
+---------
+
+TextNAS directly uses EnasTrainer, please refer to `ENAS <./ENAS.rst>`__ for the trainer APIs.
diff --git a/docs/en_US/NAS/Visualization.rst b/docs/en_US/NAS/Visualization.rst
new file mode 100644
index 0000000000..d1e70f05e0
--- /dev/null
+++ b/docs/en_US/NAS/Visualization.rst
@@ -0,0 +1,86 @@
+NAS Visualization (Experimental)
+================================
+
+Built-in Trainers Support
+-------------------------
+
+Currently, only ENAS and DARTS support visualization. Examples of `ENAS <./ENAS.md>`__ and `DARTS <./DARTS.rst>`__ has demonstrated how to enable visualization in your code, namely, adding this before ``trainer.train()``\ :
+
+.. code-block:: python
+
+   trainer.enable_visualization()
+
+This will create a directory ``logs/<current_time_stamp>`` in your working folder, in which you will find two files ``graph.json`` and ``log``.
+
+You don't have to wait until your program finishes to launch NAS UI, but it's important that these two files have been already created. Launch NAS UI with
+
+.. code-block:: bash
+
+   nnictl webui nas --logdir logs/<current_time_stamp> --port <port>
+
+Visualize a Customized Trainer
+------------------------------
+
+If you are interested in how to customize a trainer, please read this `doc <./Advanced.rst#extend-the-ability-of-one-shot-trainers>`__.
+
+You should do two modifications to an existing trainer to enable visualization:
+
+
+#. Export your graph before training, with
+
+.. code-block:: python
+
+   vis_graph = self.mutator.graph(inputs)
+   # `inputs` is a dummy input to your model. For example, torch.randn((1, 3, 32, 32)).cuda()
+   # If your model has multiple inputs, it should be a tuple.
+   with open("/path/to/your/logdir/graph.json", "w") as f:
+       json.dump(vis_graph, f)
+
+
+#. Logging the choices you've made. You can do it once per epoch, once per mini-batch or whatever frequency you'd like.
+
+.. code-block:: python
+
+   def __init__(self):
+       # ...
+       self.status_writer = open("/path/to/your/logdir/log", "w")  # create a writer
+
+   def train(self):
+       # ...
+       print(json.dumps(self.mutator.status()), file=self.status_writer, flush=True)  # dump a record of status
+
+If you are implementing your customized trainer inheriting ``Trainer``. We have provided ``enable_visualization()`` and ``_write_graph_status()`` for easy-to-use purposes. All you need to do is calling ``trainer.enable_visualization()`` before start, and ``trainer._write_graph_status()`` each time you want to do the logging. But remember both of these APIs are experimental and subject to change in future.
+
+Last but not least, invode NAS UI with
+
+.. code-block:: bash
+
+   nnictl webui nas --logdir /path/to/your/logdir
+
+NAS UI Preview
+--------------
+
+
+.. image:: ../../img/nasui-1.png
+   :target: ../../img/nasui-1.png
+   :alt: 
+
+
+
+.. image:: ../../img/nasui-2.png
+   :target: ../../img/nasui-2.png
+   :alt: 
+
+
+Limitations
+-----------
+
+
+* NAS visualization only works with PyTorch >=1.4. We've tested it on PyTorch 1.3.1 and it doesn't work.
+* We rely on PyTorch support for tensorboard for graph export, which relies on ``torch.jit``. It will not work if your model doesn't support ``jit``.
+* There are known performance issues when loading a moderate-size graph with many op choices (like DARTS search space).
+
+Feedback
+--------
+
+NAS UI is currently experimental. We welcome your feedback. `Here <https://github.com/microsoft/nni/pull/2085>`__ we have listed all the to-do items of NAS UI in the future. Feel free to comment (or `submit a new issue <https://github.com/microsoft/nni/issues/new?template=enhancement.rst>`__\ ) if you have other suggestions.
diff --git a/docs/en_US/NAS/WriteSearchSpace.rst b/docs/en_US/NAS/WriteSearchSpace.rst
new file mode 100644
index 0000000000..0281d19692
--- /dev/null
+++ b/docs/en_US/NAS/WriteSearchSpace.rst
@@ -0,0 +1,70 @@
+Write A .. role:: raw-html(raw)
+   :format: html
+
+Search Space
+====================
+
+Genrally, a search space describes candiate architectures from which users want to find the best one. Different search algorithms, no matter classic NAS or one-shot NAS, can be applied on the search space. NNI provides APIs to unified the expression of neural architecture search space.
+
+A search space can be built on a base model. This is also a common practice when a user wants to apply NAS on an existing model. Take `MNIST on PyTorch <https://github.com/pytorch/examples/blob/master/mnist/main.py>`__ as an example. Note that NNI provides the same APIs for expressing search space on PyTorch and TensorFlow.
+
+.. code-block:: python
+
+   from nni.nas.pytorch import mutables
+
+   class Net(nn.Module):
+       def __init__(self):
+           super(Net, self).__init__()
+           self.conv1 = mutables.LayerChoice([
+               nn.Conv2d(1, 32, 3, 1),
+               nn.Conv2d(1, 32, 5, 3)
+           ])  # try 3x3 kernel and 5x5 kernel
+           self.conv2 = nn.Conv2d(32, 64, 3, 1)
+           self.dropout1 = nn.Dropout2d(0.25)
+           self.dropout2 = nn.Dropout2d(0.5)
+           self.fc1 = nn.Linear(9216, 128)
+           self.fc2 = nn.Linear(128, 10)
+
+       def forward(self, x):
+           x = self.conv1(x)
+           x = F.relu(x)
+           # ... same as original ...
+           return output
+
+The example above adds an option of choosing conv5x5 at conv1. The modification is as simple as declaring a ``LayerChoice`` with the original conv3x3 and a new conv5x5 as its parameter. That's it! You don't have to modify the forward function in any way. You can imagine conv1 as any other module without NAS.
+
+So how about the possibilities of connections? This can be done using ``InputChoice``. To allow for a skip connection on the MNIST example, we add another layer called conv3. In the following example, a possible connection from conv2 is added to the output of conv3.
+
+.. code-block:: python
+
+   from nni.nas.pytorch import mutables
+
+   class Net(nn.Module):
+       def __init__(self):
+           # ... same ...
+           self.conv2 = nn.Conv2d(32, 64, 3, 1)
+           self.conv3 = nn.Conv2d(64, 64, 1, 1)
+           # declaring that there is exactly one candidate to choose from
+           # search strategy will choose one or None
+           self.skipcon = mutables.InputChoice(n_candidates=1)
+           # ... same ...
+
+       def forward(self, x):
+           x = self.conv1(x)
+           x = F.relu(x)
+           x = self.conv2(x)
+           x0 = self.skipcon([x])  # choose one or none from [x]
+           x = self.conv3(x)
+           if x0 is not None:  # skipconnection is open
+               x += x0
+           x = F.max_pool2d(x, 2)
+           # ... same ...
+           return output
+
+Input choice can be thought of as a callable module that receives a list of tensors and outputs the concatenation/sum/mean of some of them (sum by default), or ``None`` if none is selected. Like layer choices, input choices should be **initialized in ``__init__`` and called in ``forward``**. This is to allow search algorithms to identify these choices and do necessary preparations.
+
+``LayerChoice`` and ``InputChoice`` are both **mutables**. Mutable means "changeable". As opposed to traditional deep learning layers/modules which have fixed operation types once defined, models with mutable are essentially a series of possible models.
+
+Users can specify a **key** for each mutable. By default, NNI will assign one for you that is globally unique, but in case users want to share choices (for example, there are two ``LayerChoice``\ s with the same candidate operations and you want them to have the same choice, i.e., if first one chooses the i-th op, the second one also chooses the i-th op), they can give them the same key. The key marks the identity for this choice and will be used in the dumped checkpoint. So if you want to increase the readability of your exported architecture, manually assigning keys to each mutable would be a good idea. For advanced usage on mutables (e.g., ``LayerChoice`` and ``InputChoice``\ ), see `Mutables <./NasReference.rst>`__.
+
+With search space defined, the next step is searching for the best model from it. Please refer to `classic NAS algorithms <./ClassicNas.md>`__ and `one-shot NAS algorithms <./NasGuide.rst>`__ for how to search from your defined search space.
diff --git a/docs/en_US/Overview.rst b/docs/en_US/Overview.rst
new file mode 100644
index 0000000000..639dae8b71
--- /dev/null
+++ b/docs/en_US/Overview.rst
@@ -0,0 +1,123 @@
+Overview
+========
+
+NNI (Neural Network Intelligence) is a toolkit to help users design and tune machine learning models (e.g., hyperparameters), neural network architectures, or complex system's parameters, in an efficient and automatic way. NNI has several appealing properties: ease-of-use, scalability, flexibility, and efficiency.
+
+
+* **Ease-of-use**\ : NNI can be easily installed through python pip. Only several lines need to be added to your code in order to use NNI's power. You can use both the commandline tool and WebUI to work with your experiments.
+* **Scalability**\ : Tuning hyperparameters or the neural architecture often demands a large number of computational resources, while NNI is designed to fully leverage different computation resources, such as remote machines, training platforms (e.g., OpenPAI, Kubernetes). Hundreds of trials could run in parallel by depending on the capacity of your configured training platforms.
+* **Flexibility**\ : Besides rich built-in algorithms, NNI allows users to customize various hyperparameter tuning algorithms, neural architecture search algorithms, early stopping algorithms, etc. Users can also extend NNI with more training platforms, such as virtual machines, kubernetes service on the cloud. Moreover, NNI can connect to external environments to tune special applications/models on them.
+* **Efficiency**\ : We are intensively working on more efficient model tuning on both the system and algorithm level. For example, we leverage early feedback to speedup the tuning procedure.
+
+The figure below shows high-level architecture of NNI.
+
+
+.. raw:: html
+
+   <p align="center">
+   <img src="https://user-images.githubusercontent.com/16907603/92089316-94147200-ee00-11ea-9944-bf3c4544257f.png" alt="drawing" width="700"/>
+   </p>
+
+
+Key Concepts
+------------
+
+
+* 
+  *Experiment*\ : One task of, for example, finding out the best hyperparameters of a model, finding out the best neural network architecture, etc. It consists of trials and AutoML algorithms.
+
+* 
+  *Search Space*\ : The feasible region for tuning the model. For example, the value range of each hyperparameter.
+
+* 
+  *Configuration*\ : An instance from the search space, that is, each hyperparameter has a specific value.
+
+* 
+  *Trial*\ : An individual attempt at applying a new configuration (e.g., a set of hyperparameter values, a specific neural architecture, etc.). Trial code should be able to run with the provided configuration.
+
+* 
+  *Tuner*\ : An AutoML algorithm, which generates a new configuration for the next try. A new trial will run with this configuration.
+
+* 
+  *Assessor*\ : Analyze a trial's intermediate results (e.g., periodically evaluated accuracy on test dataset) to tell whether this trial can be early stopped or not.
+
+* 
+  *Training Platform*\ : Where trials are executed. Depending on your experiment's configuration, it could be your local machine, or remote servers, or large-scale training platform (e.g., OpenPAI, Kubernetes).
+
+Basically, an experiment runs as follows: Tuner receives search space and generates configurations. These configurations will be submitted to training platforms, such as the local machine, remote machines, or training clusters. Their performances are reported back to Tuner. Then, new configurations are generated and submitted.
+
+For each experiment, the user only needs to define a search space and update a few lines of code, and then leverage NNI built-in Tuner/Assessor and training platforms to search the best hyperparameters and/or neural architecture. There are basically 3 steps:
+
+..
+
+   Step 1: `Define search space <Tutorial/SearchSpaceSpec.rst>`__
+
+   Step 2: `Update model codes <TrialExample/Trials.rst>`__
+
+   Step 3: `Define Experiment <Tutorial/ExperimentConfig.rst>`__
+
+
+
+.. raw:: html
+
+   <p align="center">
+   <img src="https://user-images.githubusercontent.com/23273522/51816627-5d13db80-2302-11e9-8f3e-627e260203d5.jpg" alt="drawing"/>
+   </p>
+
+
+For more details about how to run an experiment, please refer to `Get Started <Tutorial/QuickStart.rst>`__.
+
+Core Features
+-------------
+
+NNI provides a key capacity to run multiple instances in parallel to find the best combinations of parameters. This feature can be used in various domains, like finding the best hyperparameters for a deep learning model or finding the best configuration for database and other complex systems with real data.
+
+NNI also provides algorithm toolkits for machine learning and deep learning, especially neural architecture search (NAS) algorithms, model compression algorithms, and feature engineering algorithms.
+
+Hyperparameter Tuning
+^^^^^^^^^^^^^^^^^^^^^
+
+This is a core and basic feature of NNI, we provide many popular `automatic tuning algorithms <Tuner/BuiltinTuner.md>`__ (i.e., tuner) and `early stop algorithms <Assessor/BuiltinAssessor.md>`__ (i.e., assessor). You can follow `Quick Start <Tutorial/QuickStart.rst>`__ to tune your model (or system). Basically, there are the above three steps and then starting an NNI experiment.
+
+General NAS Framework
+^^^^^^^^^^^^^^^^^^^^^
+
+This NAS framework is for users to easily specify candidate neural architectures, for example, one can specify multiple candidate operations (e.g., separable conv, dilated conv) for a single layer, and specify possible skip connections. NNI will find the best candidate automatically. On the other hand, the NAS framework provides a simple interface for another type of user (e.g., NAS algorithm researchers) to implement new NAS algorithms. A detailed description of NAS and its usage can be found `here <NAS/Overview.rst>`__.
+
+NNI has support for many one-shot NAS algorithms such as ENAS and DARTS through NNI trial SDK. To use these algorithms you do not have to start an NNI experiment. Instead, import an algorithm in your trial code and simply run your trial code. If you want to tune the hyperparameters in the algorithms or want to run multiple instances, you can choose a tuner and start an NNI experiment.
+
+Other than one-shot NAS, NAS can also run in a classic mode where each candidate architecture runs as an independent trial job. In this mode, similar to hyperparameter tuning, users have to start an NNI experiment and choose a tuner for NAS.
+
+Model Compression
+^^^^^^^^^^^^^^^^^
+
+NNI provides an easy-to-use model compression framework to compress deep neural networks, the compressed networks typically have much smaller model size and much faster
+inference speed without losing performance significantlly. Model compression on NNI includes pruning algorithms and quantization algorithms. NNI provides many pruning and
+quantization algorithms through NNI trial SDK. Users can directly use them in their trial code and run the trial code without starting an NNI experiment. Users can also use NNI model compression framework to customize their own pruning and quantization algorithms.
+
+A detailed description of model compression and its usage can be found `here <Compression/Overview.rst>`__.
+
+Automatic Feature Engineering
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Automatic feature engineering is for users to find the best features for their tasks. A detailed description of automatic feature engineering and its usage can be found `here <FeatureEngineering/Overview.rst>`__. It is supported through NNI trial SDK, which means you do not have to create an NNI experiment. Instead, simply import a built-in auto-feature-engineering algorithm in your trial code and directly run your trial code. 
+
+The auto-feature-engineering algorithms usually have a bunch of hyperparameters themselves. If you want to automatically tune those hyperparameters, you can leverage hyperparameter tuning of NNI, that is, choose a tuning algorithm (i.e., tuner) and start an NNI experiment for it.
+
+Learn More
+----------
+
+
+* `Get started <Tutorial/QuickStart.rst>`__
+* `How to adapt your trial code on NNI? <TrialExample/Trials.rst>`__
+* `What are tuners supported by NNI? <Tuner/BuiltinTuner.rst>`__
+* `How to customize your own tuner? <Tuner/CustomizeTuner.rst>`__
+* `What are assessors supported by NNI? <Assessor/BuiltinAssessor.rst>`__
+* `How to customize your own assessor? <Assessor/CustomizeAssessor.rst>`__
+* `How to run an experiment on local? <TrainingService/LocalMode.rst>`__
+* `How to run an experiment on multiple machines? <TrainingService/RemoteMachineMode.rst>`__
+* `How to run an experiment on OpenPAI? <TrainingService/PaiMode.rst>`__
+* `Examples <TrialExample/MnistExamples.rst>`__
+* `Neural Architecture Search on NNI <NAS/Overview.rst>`__
+* `Model Compression on NNI <Compression/Overview.rst>`__
+* `Automatic feature engineering on NNI <FeatureEngineering/Overview.rst>`__
diff --git a/docs/en_US/Release.rst b/docs/en_US/Release.rst
new file mode 100644
index 0000000000..3b4838c9bc
--- /dev/null
+++ b/docs/en_US/Release.rst
@@ -0,0 +1,1123 @@
+.. role:: raw-html(raw)
+   :format: html
+
+
+ChangeLog
+=========
+
+Release 1.9 - 10/22/2020
+========================
+
+Major updates
+-------------
+
+Neural architecture search
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* Support regularized evolution algorithm for NAS scenario (#2802)
+* Add NASBench201 in search space zoo (#2766)
+
+Model compression
+^^^^^^^^^^^^^^^^^
+
+
+* AMC pruner improvement: support resnet, support reproduction of the experiments (default parameters in our example code) in AMC paper (#2876 #2906)
+* Support constraint-aware on some of our pruners to improve model compression efficiency (#2657)
+* Support "tf.keras.Sequential" in model compression for TensorFlow (#2887)
+* Support customized op in the model flops counter (#2795)
+* Support quantizing bias in QAT quantizer (#2914)
+
+Training service
+^^^^^^^^^^^^^^^^
+
+
+* Support configuring python environment using "preCommand" in remote mode (#2875)
+* Support AML training service in Windows (#2882)
+* Support reuse mode for remote training service (#2923)
+
+WebUI & nnictl
+^^^^^^^^^^^^^^
+
+
+* The "Overview" page on WebUI is redesigned with new layout (#2914)
+* Upgraded node, yarn and FabricUI, and enabled Eslint (#2894 #2873 #2744)
+* Add/Remove columns in hyper-parameter chart and trials table in "Trials detail" page (#2900)
+* JSON format utility beautify on WebUI (#2863)
+* Support nnictl command auto-completion (#2857)
+
+UT & IT
+-------
+
+
+* Add integration test for experiment import and export (#2878)
+* Add integration test for user installed builtin tuner (#2859)
+* Add unit test for nnictl (#2912)
+
+Documentation
+-------------
+
+
+* Refactor of the document for model compression (#2919)
+
+Bug fixes
+---------
+
+
+* Bug fix of naïve evolution tuner, correctly deal with trial fails (#2695)
+* Resolve the warning "WARNING (nni.protocol) IPC pipeline not exists, maybe you are importing tuner/assessor from trial code?" (#2864)
+* Fix search space issue in experiment save/load (#2886)
+* Fix bug in experiment import data (#2878)
+* Fix annotation in remote mode (python 3.8 ast update issue) (#2881)
+* Support boolean type for "choice" hyper-parameter when customizing trial configuration on WebUI (#3003)
+
+Release 1.8 - 8/27/2020
+=======================
+
+Major updates
+-------------
+
+Training service
+^^^^^^^^^^^^^^^^
+
+
+* Access trial log directly on WebUI (local mode only) (#2718)
+* Add OpenPAI trial job detail link (#2703)
+* Support GPU scheduler in reusable environment (#2627) (#2769)
+* Add timeout for ``web_channel`` in ``trial_runner`` (#2710)
+* Show environment error message in AzureML mode (#2724)
+* Add more log information when copying data in OpenPAI mode (#2702)
+
+WebUI, nnictl and nnicli
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* Improve hyper-parameter parallel coordinates plot (#2691) (#2759)
+* Add pagination for trial job list (#2738) (#2773)
+* Enable panel close when clicking overlay region (#2734)
+* Remove support for Multiphase on WebUI (#2760)
+* Support save and restore experiments (#2750)
+* Add intermediate results in export result (#2706)
+* Add `command <https://github.com/microsoft/nni/blob/v1.8/docs/en_US/Tutorial/Nnictl.rst#nnictl-trial>`__ to list trial results with highest/lowest metrics (#2747)
+* Improve the user experience of `nnicli <https://github.com/microsoft/nni/blob/v1.8/docs/en_US/nnicli_ref.rst>`__ with `examples <https://github.com/microsoft/nni/blob/v1.8/examples/notebooks/retrieve_nni_info_with_python.ipynb>`__ (#2713)
+
+Neural architecture search
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* `Search space zoo: ENAS and DARTS <https://github.com/microsoft/nni/blob/v1.8/docs/en_US/NAS/SearchSpaceZoo.rst>`__ (#2589)
+* API to query intermediate results in NAS benchmark (#2728)
+
+Model compression
+^^^^^^^^^^^^^^^^^
+
+
+* Support the List/Tuple Construct/Unpack operation for TorchModuleGraph (#2609)
+* Model speedup improvement: Add support of DenseNet and InceptionV3 (#2719)
+* Support the multiple successive tuple unpack operations (#2768)
+* `Doc of comparing the performance of supported pruners <https://github.com/microsoft/nni/blob/v1.8/docs/en_US/CommunitySharings/ModelCompressionComparison.rst>`__ (#2742)
+* New pruners: `Sensitivity pruner <https://github.com/microsoft/nni/blob/v1.8/docs/en_US/Compressor/Pruner.md#sensitivity-pruner>`__ (#2684) and `AMC pruner <https://github.com/microsoft/nni/blob/v1.8/docs/en_US/Compressor/Pruner.rst>`__ (#2573) (#2786)
+* TensorFlow v2 support in model compression (#2755)
+
+Backward incompatible changes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* Update the default experiment folder from ``$HOME/nni/experiments`` to ``$HOME/nni-experiments``. If you want to view the experiments created by previous NNI releases, you can move the experiments folders from  ``$HOME/nni/experiments`` to ``$HOME/nni-experiments`` manually. (#2686) (#2753)
+* Dropped support for Python 3.5 and scikit-learn 0.20 (#2778) (#2777) (2783) (#2787) (#2788) (#2790)
+
+Others
+^^^^^^
+
+
+* Upgrade TensorFlow version in Docker image (#2732) (#2735) (#2720)
+
+Examples
+--------
+
+
+* Remove gpuNum in assessor examples (#2641)
+
+Documentation
+-------------
+
+
+* Improve customized tuner documentation (#2628)
+* Fix several typos and grammar mistakes in documentation (#2637 #2638, thanks @tomzx)
+* Improve AzureML training service documentation (#2631)
+* Improve CI of Chinese translation (#2654)
+* Improve OpenPAI training service documenation (#2685)
+* Improve documentation of community sharing (#2640)
+* Add tutorial of Colab support (#2700)
+* Improve documentation structure for model compression (#2676)
+
+Bug fixes
+---------
+
+
+* Fix mkdir error in training service (#2673)
+* Fix bug when using chmod in remote training service (#2689)
+* Fix dependency issue by making ``_graph_utils`` imported inline (#2675)
+* Fix mask issue in ``SimulatedAnnealingPruner`` (#2736)
+* Fix intermediate graph zooming issue (#2738)
+* Fix issue when dict is unordered when querying NAS benchmark (#2728)
+* Fix import issue for gradient selector dataloader iterator (#2690)
+* Fix support of adding tens of machines in remote training service (#2725)
+* Fix several styling issues in WebUI (#2762 #2737)
+* Fix support of unusual types in metrics including NaN and Infinity (#2782)
+* Fix nnictl experiment delete (#2791)
+
+Release 1.7 - 7/8/2020
+======================
+
+Major Features
+--------------
+
+Training Service
+^^^^^^^^^^^^^^^^
+
+
+* Support AML(Azure Machine Learning) platform as NNI training service.
+* OpenPAI job can be reusable. When a trial is completed, the OpenPAI job won't stop, and wait next trial. `refer to reuse flag in OpenPAI config <https://github.com/microsoft/nni/blob/v1.7/docs/en_US/TrainingService/PaiMode.rst#openpai-configurations>`__.
+* `Support ignoring files and folders in code directory with .nniignore when uploading code directory to training service <https://github.com/microsoft/nni/blob/v1.7/docs/en_US/TrainingService/Overview.rst#how-to-use-training-service>`__.
+
+Neural Architecture Search (NAS)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* 
+  `Provide NAS Open Benchmarks (NasBench101, NasBench201, NDS) with friendly APIs <https://github.com/microsoft/nni/blob/v1.7/docs/en_US/NAS/Benchmarks.rst>`__.
+
+* 
+  `Support Classic NAS (i.e., non-weight-sharing mode) on TensorFlow 2.X <https://github.com/microsoft/nni/blob/v1.7/docs/en_US/NAS/ClassicNas.rst>`__.
+
+Model Compression
+^^^^^^^^^^^^^^^^^
+
+
+* Improve Model Speedup: track more dependencies among layers and automatically resolve mask conflict, support the speedup of pruned resnet.
+* Added new pruners, including three auto model pruning algorithms: `NetAdapt Pruner <https://github.com/microsoft/nni/blob/v1.7/docs/en_US/Compressor/Pruner.md#netadapt-pruner>`__\ , `SimulatedAnnealing Pruner <https://github.com/microsoft/nni/blob/v1.7/docs/en_US/Compressor/Pruner.md#simulatedannealing-pruner>`__\ , `AutoCompress Pruner <https://github.com/microsoft/nni/blob/v1.7/docs/en_US/Compressor/Pruner.md#autocompress-pruner>`__\ , and `ADMM Pruner <https://github.com/microsoft/nni/blob/v1.7/docs/en_US/Compressor/Pruner.rst#admm-pruner>`__.
+* Added `model sensitivity analysis tool <https://github.com/microsoft/nni/blob/v1.7/docs/en_US/Compressor/CompressionUtils.rst>`__ to help users find the sensitivity of each layer to the pruning.
+* 
+  `Easy flops calculation for model compression and NAS <https://github.com/microsoft/nni/blob/v1.7/docs/en_US/Compressor/CompressionUtils.rst#model-flops-parameters-counter>`__.
+
+* 
+  Update lottery ticket pruner to export winning ticket.
+
+Examples
+^^^^^^^^
+
+
+* Automatically optimize tensor operators on NNI with a new `customized tuner OpEvo <https://github.com/microsoft/nni/blob/v1.7/docs/en_US/TrialExample/OpEvoExamples.rst>`__.
+
+Built-in tuners/assessors/advisors
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* `Allow customized tuners/assessor/advisors to be installed as built-in algorithms <https://github.com/microsoft/nni/blob/v1.7/docs/en_US/Tutorial/InstallCustomizedAlgos.rst>`__.
+
+WebUI
+^^^^^
+
+
+* Support visualizing nested search space more friendly.
+* Show trial's dict keys in hyper-parameter graph.
+* Enhancements to trial duration display.
+
+Others
+^^^^^^
+
+
+* Provide utility function to merge parameters received from NNI
+* Support setting paiStorageConfigName in pai mode
+
+Documentation
+-------------
+
+
+* Improve `documentation for model compression <https://github.com/microsoft/nni/blob/v1.7/docs/en_US/Compressor/Overview.rst>`__
+* Improve `documentation <https://github.com/microsoft/nni/blob/v1.7/docs/en_US/NAS/Benchmarks.rst>`__
+  and `examples <https://github.com/microsoft/nni/blob/v1.7/docs/en_US/NAS/BenchmarksExample.ipynb>`__ for NAS benchmarks.
+* Improve `documentation for AzureML training service <https://github.com/microsoft/nni/blob/v1.7/docs/en_US/TrainingService/AMLMode.rst>`__
+* Homepage migration to readthedoc.
+
+Bug Fixes
+---------
+
+
+* Fix bug for model graph with shared nn.Module
+* Fix nodejs OOM when ``make build``
+* Fix NASUI bugs
+* Fix duration and intermediate results pictures update issue.
+* Fix minor WebUI table style issues.
+
+Release 1.6 - 5/26/2020
+-----------------------
+
+Major Features
+^^^^^^^^^^^^^^
+
+New Features and improvement
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* Improve IPC limitation to 100W
+* improve code storage upload logic among trials in non-local platform
+* support ``__version__`` for SDK version
+* support windows dev intall
+
+Web UI
+^^^^^^
+
+
+* Show trial error message
+* finalize homepage layout
+* Refactor overview's best trials module
+* Remove multiphase from webui
+* add tooltip for trial concurrency in the overview page
+* Show top trials for hyper-parameter graph
+
+HPO Updates
+^^^^^^^^^^^
+
+
+* Improve PBT on failure handling and support experiment resume for PBT
+
+NAS Updates
+^^^^^^^^^^^
+
+
+* NAS support for TensorFlow 2.0 (preview) `TF2.0 NAS examples <https://github.com/microsoft/nni/tree/v1.9/examples/nas/naive-tf>`__
+* Use OrderedDict for LayerChoice
+* Prettify the format of export
+* Replace layer choice with selected module after applied fixed architecture
+
+Model Compression Updates
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* Model compression PyTorch 1.4 support
+
+Training Service Updates
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* update pai yaml merge logic
+* support windows as remote machine in remote mode `Remote Mode <https://github.com/microsoft/nni/blob/v1.9/docs/en_US/TrainingService/RemoteMachineMode.rst#windows>`__
+
+Bug Fix
+^^^^^^^
+
+
+* fix dev install
+* SPOS example crash when the checkpoints do not have state_dict
+* Fix table sort issue when experiment had failed trial
+* Support multi python env (conda, pyenv etc)
+
+Release 1.5 - 4/13/2020
+-----------------------
+
+New Features and Documentation
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Hyper-Parameter Optimizing
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* New tuner: `Population Based Training (PBT) <https://github.com/microsoft/nni/blob/v1.9/docs/en_US/Tuner/PBTTuner.rst>`__
+* Trials can now report infinity and NaN as result
+
+Neural Architecture Search
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* New NAS algorithm: `TextNAS <https://github.com/microsoft/nni/blob/v1.9/docs/en_US/NAS/TextNAS.rst>`__
+* ENAS and DARTS now support `visualization <https://github.com/microsoft/nni/blob/v1.9/docs/en_US/NAS/Visualization.rst>`__ through web UI.
+
+Model Compression
+^^^^^^^^^^^^^^^^^
+
+
+* New Pruner: `GradientRankFilterPruner <https://github.com/microsoft/nni/blob/v1.9/docs/en_US/Compressor/Pruner.rst#gradientrankfilterpruner>`__
+* Compressors will validate configuration by default
+* Refactor: Adding optimizer as an input argument of pruner, for easy support of DataParallel and more efficient iterative pruning. This is a broken change for the usage of iterative pruning algorithms.
+* Model compression examples are refactored and improved
+* Added documentation for `implementing compressing algorithm <https://github.com/microsoft/nni/blob/v1.9/docs/en_US/Compressor/Framework.rst>`__
+
+Training Service
+^^^^^^^^^^^^^^^^
+
+
+* Kubeflow now supports pytorchjob crd v1 (thanks external contributor @jiapinai)
+* Experimental `DLTS <https://github.com/microsoft/nni/blob/v1.9/docs/en_US/TrainingService/DLTSMode.rst>`__ support
+
+Overall Documentation Improvement
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* Documentation is significantly improved on grammar, spelling, and wording (thanks external contributor @AHartNtkn)
+
+Fixed Bugs
+^^^^^^^^^^
+
+
+* ENAS cannot have more than one LSTM layers (thanks external contributor @marsggbo)
+* NNI manager's timers will never unsubscribe (thanks external contributor @guilhermehn)
+* NNI manager may exhaust head memory (thanks external contributor @Sundrops)
+* Batch tuner does not support customized trials (#2075)
+* Experiment cannot be killed if it failed on start (#2080)
+* Non-number type metrics break web UI (#2278)
+* A bug in lottery ticket pruner
+* Other minor glitches
+
+Release 1.4 - 2/19/2020
+-----------------------
+
+Major Features
+^^^^^^^^^^^^^^
+
+Neural Architecture Search
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* Support `C-DARTS <https://github.com/microsoft/nni/blob/v1.4/docs/en_US/NAS/CDARTS.rst>`__ algorithm and add `the example <https://github.com/microsoft/nni/tree/v1.4/examples/nas/cdarts>`__ using it
+* Support a preliminary version of `ProxylessNAS <https://github.com/microsoft/nni/blob/v1.4/docs/en_US/NAS/Proxylessnas.rst>`__ and the corresponding `example <https://github.com/microsoft/nni/tree/v1.4/examples/nas/proxylessnas>`__
+* Add unit tests for the NAS framework
+
+Model Compression
+^^^^^^^^^^^^^^^^^
+
+
+* Support DataParallel for compressing models, and provide `an example <https://github.com/microsoft/nni/blob/v1.4/examples/model_compress/multi_gpu.py>`__ of using DataParallel
+* Support `model speedup <https://github.com/microsoft/nni/blob/v1.4/docs/en_US/Compressor/ModelSpeedup.rst>`__ for compressed models, in Alpha version
+
+Training Service
+^^^^^^^^^^^^^^^^
+
+
+* Support complete PAI configurations by allowing users to specify PAI config file path
+* Add example config yaml files for the new PAI mode (i.e., paiK8S)
+* Support deleting experiments using sshkey in remote mode (thanks external contributor @tyusr)
+
+WebUI
+^^^^^
+
+
+* WebUI refactor: adopt fabric framework
+
+Others
+^^^^^^
+
+
+* Support running `NNI experiment at foreground <https://github.com/microsoft/nni/blob/v1.4/docs/en_US/Tutorial/Nnictl#manage-an-experiment>`__\ , i.e., ``--foreground`` argument in ``nnictl create/resume/view``
+* Support canceling the trials in UNKNOWN state
+* Support large search space whose size could be up to 50mb (thanks external contributor @Sundrops)
+
+Documentation
+^^^^^^^^^^^^^
+
+
+* Improve `the index structure <https://nni.readthedocs.io/en/latest/>`__ of NNI readthedocs
+* Improve `documentation for NAS <https://github.com/microsoft/nni/blob/v1.4/docs/en_US/NAS/NasGuide.rst>`__
+* Improve documentation for `the new PAI mode <https://github.com/microsoft/nni/blob/v1.4/docs/en_US/TrainingService/PaiMode.rst>`__
+* Add QuickStart guidance for `NAS <https://github.com/microsoft/nni/blob/v1.4/docs/en_US/NAS/QuickStart.md>`__ and `model compression <https://github.com/microsoft/nni/blob/v1.4/docs/en_US/Compressor/QuickStart.rst>`__
+* Improve documentation for `the supported EfficientNet <https://github.com/microsoft/nni/blob/v1.4/docs/en_US/TrialExample/EfficientNet.rst>`__
+
+Bug Fixes
+^^^^^^^^^
+
+
+* Correctly support NaN in metric data, JSON compliant
+* Fix the out-of-range bug of ``randint`` type in search space
+* Fix the bug of wrong tensor device when exporting onnx model in model compression
+* Fix incorrect handling of nnimanagerIP in the new PAI mode (i.e., paiK8S)
+
+Release 1.3 - 12/30/2019
+------------------------
+
+Major Features
+^^^^^^^^^^^^^^
+
+Neural Architecture Search Algorithms Support
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* `Single Path One Shot <https://github.com/microsoft/nni/tree/v1.3/examples/nas/spos/>`__ algorithm and the example using it
+
+Model Compression Algorithms Support
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* `Knowledge Distillation <https://github.com/microsoft/nni/blob/v1.3/docs/en_US/TrialExample/KDExample.rst>`__ algorithm and the example using itExample
+* Pruners
+
+  * `L2Filter Pruner <https://github.com/microsoft/nni/blob/v1.3/docs/en_US/Compressor/Pruner.rst#3-l2filter-pruner>`__
+  * `ActivationAPoZRankFilterPruner <https://github.com/microsoft/nni/blob/v1.3/docs/en_US/Compressor/Pruner.rst#1-activationapozrankfilterpruner>`__
+  * `ActivationMeanRankFilterPruner <https://github.com/microsoft/nni/blob/v1.3/docs/en_US/Compressor/Pruner.rst#2-activationmeanrankfilterpruner>`__
+
+* `BNN Quantizer <https://github.com/microsoft/nni/blob/v1.3/docs/en_US/Compressor/Quantizer.rst#bnn-quantizer>`__
+  #### Training Service
+* 
+  NFS Support for PAI
+
+    Instead of using HDFS as default storage, since OpenPAI v0.11, OpenPAI can have NFS or AzureBlob or other storage as default storage. In this release, NNI extended the support for this recent change made by OpenPAI, and could integrate with OpenPAI v0.11 or later version with various default storage.
+
+* 
+  Kubeflow update adoption
+
+    Adopted the Kubeflow 0.7's new supports for tf-operator.
+
+Engineering (code and build automation)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* Enforced `ESLint <https://eslint.org/>`__ on static code analysis.
+
+Small changes & Bug Fixes
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* correctly recognize builtin tuner and customized tuner
+* logging in dispatcher base
+* fix the bug where tuner/assessor's failure sometimes kills the experiment.
+* Fix local system as remote machine `issue <https://github.com/microsoft/nni/issues/1852>`__
+* de-duplicate trial configuration in smac tuner `ticket <https://github.com/microsoft/nni/issues/1364>`__
+
+Release 1.2 - 12/02/2019
+------------------------
+
+Major Features
+^^^^^^^^^^^^^^
+
+
+* `Feature Engineering <https://github.com/microsoft/nni/blob/v1.2/docs/en_US/FeatureEngineering/Overview.rst>`__
+
+  * New feature engineering interface
+  * Feature selection algorithms: `Gradient feature selector <https://github.com/microsoft/nni/blob/v1.2/docs/en_US/FeatureEngineering/GradientFeatureSelector.md>`__ & `GBDT selector <https://github.com/microsoft/nni/blob/v1.2/docs/en_US/FeatureEngineering/GBDTSelector.rst>`__
+  * `Examples for feature engineering <https://github.com/microsoft/nni/tree/v1.2/examples/feature_engineering>`__
+
+* Neural Architecture Search (NAS) on NNI
+
+  * `New NAS interface <https://github.com/microsoft/nni/blob/v1.2/docs/en_US/NAS/NasInterface.rst>`__
+  * NAS algorithms: `ENAS <https://github.com/microsoft/nni/blob/v1.2/docs/en_US/NAS/Overview.md#enas>`__\ , `DARTS <https://github.com/microsoft/nni/blob/v1.2/docs/en_US/NAS/Overview.md#darts>`__\ , `P-DARTS <https://github.com/microsoft/nni/blob/v1.2/docs/en_US/NAS/Overview.rst#p-darts>`__ (in PyTorch)
+  * NAS in classic mode (each trial runs independently)
+
+* Model compression
+
+  * `New model pruning algorithms <https://github.com/microsoft/nni/blob/v1.2/docs/en_US/Compressor/Overview.rst>`__\ : lottery ticket pruning approach, L1Filter pruner, Slim pruner, FPGM pruner
+  * `New model quantization algorithms <https://github.com/microsoft/nni/blob/v1.2/docs/en_US/Compressor/Overview.rst>`__\ : QAT quantizer, DoReFa quantizer
+  * Support the API for exporting compressed model.
+
+* Training Service
+
+  * Support OpenPAI token authentication
+
+* Examples:
+
+  * `An example to automatically tune rocksdb configuration with NNI <https://github.com/microsoft/nni/tree/v1.2/examples/trials/systems/rocksdb-fillrandom>`__.
+  * `A new MNIST trial example supports tensorflow 2.0 <https://github.com/microsoft/nni/tree/v1.2/examples/trials/mnist-tfv2>`__.
+
+* Engineering Improvements
+
+  * For remote training service,  trial jobs require no GPU are now scheduled with round-robin policy instead of random.
+  * Pylint rules added to check pull requests, new pull requests need to comply with these `pylint rules <https://github.com/microsoft/nni/blob/v1.2/pylintrc>`__.
+
+* Web Portal & User Experience
+
+  * Support user to add customized trial.
+  * User can zoom out/in in detail graphs, except Hyper-parameter.
+
+* Documentation
+
+  * Improved NNI API documentation with more API docstring.
+
+Bug fix
+^^^^^^^
+
+
+* Fix the table sort issue when failed trials haven't metrics. -Issue #1773
+* Maintain selected status(Maximal/Minimal) when the page switched. -PR#1710
+* Make hyper-parameters graph's default metric yAxis more accurate. -PR#1736
+* Fix GPU script permission issue. -Issue #1665
+
+Release 1.1 - 10/23/2019
+------------------------
+
+Major Features
+^^^^^^^^^^^^^^
+
+
+* New tuner: `PPO Tuner <https://github.com/microsoft/nni/blob/v1.1/docs/en_US/Tuner/PPOTuner.rst>`__
+* `View stopped experiments <https://github.com/microsoft/nni/blob/v1.1/docs/en_US/Tutorial/Nnictl.rst#view>`__
+* Tuners can now use dedicated GPU resource (see ``gpuIndices`` in `tutorial <https://github.com/microsoft/nni/blob/v1.1/docs/en_US/Tutorial/ExperimentConfig.rst>`__ for details)
+* Web UI improvements
+
+  * Trials detail page can now list hyperparameters of each trial, as well as their start and end time (via "add column")
+  * Viewing huge experiment is now less laggy
+
+* More examples
+
+  * `EfficientNet PyTorch example <https://github.com/ultmaster/EfficientNet-PyTorch>`__
+  * `Cifar10 NAS example <https://github.com/microsoft/nni/blob/v1.1/examples/trials/nas_cifar10/README.rst>`__
+
+* `Model compression toolkit - Alpha release <https://github.com/microsoft/nni/blob/v1.1/docs/en_US/Compressor/Overview.rst>`__\ : We are glad to announce the alpha release for model compression toolkit on top of NNI, it's still in the experiment phase which might evolve based on usage feedback. We'd like to invite you to use, feedback and even contribute
+
+Fixed Bugs
+^^^^^^^^^^
+
+
+* Multiphase job hangs when search space exhuasted (issue #1204)
+* ``nnictl`` fails when log not available (issue #1548)
+
+Release 1.0 - 9/2/2019
+----------------------
+
+Major Features
+^^^^^^^^^^^^^^
+
+
+* 
+  Tuners and Assessors
+
+
+  * Support Auto-Feature generator & selection    -Issue#877  -PR #1387
+
+    * Provide auto feature interface
+    * Tuner based on beam search
+    * `Add Pakdd example <https://github.com/microsoft/nni/tree/v1.9/examples/trials/auto-feature-engineering>`__
+
+  * Add a parallel algorithm to improve the performance of TPE with large concurrency.  -PR #1052
+  * Support multiphase for hyperband    -PR #1257
+
+* 
+  Training Service
+
+
+  * Support private docker registry   -PR #755
+
+
+  * Engineering Improvements
+
+    * Python wrapper for rest api, support retrieve the values of the metrics in a programmatic way  PR #1318
+    * New python API : get_experiment_id(), get_trial_id()  -PR #1353   -Issue #1331 & -Issue#1368
+    * Optimized NAS Searchspace  -PR #1393
+
+      * Unify NAS search space with _type -- "mutable_type"e
+      * Update random search tuner
+
+    * Set gpuNum as optional      -Issue #1365
+    * Remove outputDir and dataDir configuration in PAI mode   -Issue #1342
+    * When creating a trial in Kubeflow mode, codeDir will no longer be copied to logDir   -Issue #1224
+
+* 
+  Web Portal & User Experience
+
+
+  * Show the best metric curve during search progress in WebUI  -Issue #1218
+  * Show the current number of parameters list in multiphase experiment   -Issue1210  -PR #1348
+  * Add "Intermediate count" option in AddColumn.      -Issue #1210
+  * Support search parameters value in WebUI     -Issue #1208
+  * Enable automatic scaling of axes for metric value  in default metric graph   -Issue #1360
+  * Add a detailed documentation link to the nnictl command in the command prompt    -Issue #1260
+  * UX improvement for showing Error log   -Issue #1173
+
+* 
+  Documentation
+
+
+  * Update the docs structure  -Issue #1231
+  * (deprecated) Multi phase document improvement   -Issue #1233  -PR #1242
+
+    * Add configuration example
+
+  * `WebUI description improvement <Tutorial/WebUI.rst>`__  -PR #1419
+
+Bug fix
+^^^^^^^
+
+
+* (Bug fix)Fix the broken links in 0.9 release  -Issue #1236
+* (Bug fix)Script for auto-complete
+* (Bug fix)Fix pipeline issue that it only check exit code of last command in a script.  -PR #1417
+* (Bug fix)quniform fors tuners    -Issue #1377
+* (Bug fix)'quniform' has different meaning beween GridSearch and other tuner.   -Issue #1335
+* (Bug fix)"nnictl experiment list" give the status of a "RUNNING" experiment as "INITIALIZED" -PR #1388
+* (Bug fix)SMAC cannot be installed if nni is installed in dev mode    -Issue #1376
+* (Bug fix)The filter button of the intermediate result cannot be clicked   -Issue #1263
+* (Bug fix)API "/api/v1/nni/trial-jobs/xxx" doesn't show a trial's all parameters in multiphase experiment    -Issue #1258
+* (Bug fix)Succeeded trial doesn't have final result but webui show ×××(FINAL)  -Issue #1207
+* (Bug fix)IT for nnictl stop -Issue #1298
+* (Bug fix)fix security warning
+* (Bug fix)Hyper-parameter page broken  -Issue #1332
+* (Bug fix)Run flake8 tests to find Python syntax errors and undefined names -PR #1217
+
+Release 0.9 - 7/1/2019
+----------------------
+
+Major Features
+^^^^^^^^^^^^^^
+
+
+* General NAS programming interface
+
+  * Add ``enas-mode``  and ``oneshot-mode`` for NAS interface: `PR #1201 <https://github.com/microsoft/nni/pull/1201#issue-291094510>`__
+
+* 
+  `Gaussian Process Tuner with Matern kernel <Tuner/GPTuner.rst>`__
+
+* 
+  (deprecated) Multiphase experiment supports
+
+
+  * Added new training service support for multiphase experiment: PAI mode supports multiphase experiment since v0.9.
+  * Added multiphase capability for the following builtin tuners:
+
+    * TPE, Random Search, Anneal, Naïve Evolution, SMAC, Network Morphism, Metis Tuner.
+
+* 
+  Web Portal
+
+
+  * Enable trial comparation in Web Portal. For details, refer to `View trials status <Tutorial/WebUI.rst>`__
+  * Allow users to adjust rendering interval of Web Portal. For details, refer to `View Summary Page <Tutorial/WebUI.rst>`__
+  * show intermediate results more friendly. For details, refer to `View trials status <Tutorial/WebUI.rst>`__
+
+* `Commandline Interface <Tutorial/Nnictl.rst>`__
+
+  * ``nnictl experiment delete``\ : delete one or all experiments, it includes log, result, environment information and cache. It uses to delete useless experiment result, or save disk space.
+  * ``nnictl platform clean``\ : It uses to clean up disk on a target platform. The provided YAML file includes the information of target platform, and it follows the same schema as the NNI configuration file.
+    ### Bug fix and other changes
+
+* Tuner Installation Improvements: add `sklearn <https://scikit-learn.org/stable/>`__ to nni dependencies.
+* (Bug Fix) Failed to connect to PAI http code - `Issue #1076 <https://github.com/microsoft/nni/issues/1076>`__
+* (Bug Fix) Validate file name for PAI platform - `Issue #1164 <https://github.com/microsoft/nni/issues/1164>`__
+* (Bug Fix) Update GMM evaluation in Metis Tuner
+* (Bug Fix) Negative time number rendering in Web Portal - `Issue #1182 <https://github.com/microsoft/nni/issues/1182>`__\ , `Issue #1185 <https://github.com/microsoft/nni/issues/1185>`__
+* (Bug Fix) Hyper-parameter not shown correctly in WebUI when there is only one hyper parameter - `Issue #1192 <https://github.com/microsoft/nni/issues/1192>`__
+
+Release 0.8 - 6/4/2019
+----------------------
+
+Major Features
+^^^^^^^^^^^^^^
+
+
+* Support NNI on Windows for OpenPAI/Remote mode
+
+  * NNI running on windows for remote mode
+  * NNI running on windows for OpenPAI mode
+
+* Advanced features for using GPU
+
+  * Run multiple trial jobs on the same GPU for local and remote mode
+  * Run trial jobs on the GPU running non-NNI jobs
+
+* Kubeflow v1beta2 operator
+
+  * Support Kubeflow TFJob/PyTorchJob v1beta2
+
+* `General NAS programming interface <https://github.com/microsoft/nni/blob/v0.8/docs/en_US/GeneralNasInterfaces.rst>`__
+
+  * Provide NAS programming interface for users to easily express their neural architecture search space through NNI annotation
+  * Provide a new command ``nnictl trial codegen`` for debugging the NAS code
+  * Tutorial of NAS programming interface, example of NAS on MNIST, customized random tuner for NAS
+
+* Support resume tuner/advisor's state for experiment resume
+* For experiment resume, tuner/advisor will be resumed by replaying finished trial data
+* Web Portal
+
+  * Improve the design of copying trial's parameters
+  * Support 'randint' type in hyper-parameter graph
+  * Use should ComponentUpdate to avoid unnecessary render
+
+Bug fix and other changes
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* Bug fix that ``nnictl update`` has inconsistent command styles
+* Support import data for SMAC tuner
+* Bug fix that experiment state transition from ERROR back to RUNNING
+* Fix bug of table entries
+* Nested search space refinement
+* Refine 'randint' type and support lower bound
+* `Comparison of different hyper-parameter tuning algorithm <CommunitySharings/HpoComparison.rst>`__
+* `Comparison of NAS algorithm <CommunitySharings/NasComparison.rst>`__
+* `NNI practice on Recommenders <CommunitySharings/RecommendersSvd.rst>`__
+
+Release 0.7 - 4/29/2018
+-----------------------
+
+Major Features
+^^^^^^^^^^^^^^
+
+
+* `Support NNI on Windows <Tutorial/InstallationWin.rst>`__
+
+  * NNI running on windows for local mode
+
+* `New advisor: BOHB <Tuner/BohbAdvisor.rst>`__
+
+  * Support a new advisor BOHB, which is a robust and efficient hyperparameter tuning algorithm, combines the advantages of Bayesian optimization and Hyperband
+
+* `Support import and export experiment data through nnictl <Tutorial/Nnictl.rst>`__
+
+  * Generate analysis results report after the experiment execution
+  * Support import data to tuner and advisor for tuning
+
+* `Designated gpu devices for NNI trial jobs <Tutorial/ExperimentConfig.rst#localConfig>`__
+
+  * Specify GPU devices for NNI trial jobs by gpuIndices configuration, if gpuIndices is set in experiment configuration file, only the specified GPU devices are used for NNI trial jobs.
+
+* Web Portal enhancement
+
+  * Decimal format of metrics other than default on the Web UI
+  * Hints in WebUI about Multi-phase
+  * Enable copy/paste for hyperparameters as python dict
+  * Enable early stopped trials data for tuners.
+
+* NNICTL provide better error message
+
+  * nnictl provide more meaningful error message for YAML file format error
+
+Bug fix
+^^^^^^^
+
+
+* Unable to kill all python threads after nnictl stop in async dispatcher mode
+* nnictl --version does not work with make dev-install
+* All trail jobs status stays on 'waiting' for long time on OpenPAI platform
+
+Release 0.6 - 4/2/2019
+----------------------
+
+Major Features
+^^^^^^^^^^^^^^
+
+
+* `Version checking <TrainingService/PaiMode.rst>`__
+
+  * check whether the version is consistent between nniManager and trialKeeper
+
+* `Report final metrics for early stop job <https://github.com/microsoft/nni/issues/776>`__
+
+  * If includeIntermediateResults is true, the last intermediate result of the trial that is early stopped by assessor is sent to tuner as final result. The default value of includeIntermediateResults is false.
+
+* `Separate Tuner/Assessor <https://github.com/microsoft/nni/issues/841>`__
+
+  * Adds two pipes to separate message receiving channels for tuner and assessor.
+
+* Make log collection feature configurable
+* Add intermediate result graph for all trials
+
+Bug fix
+^^^^^^^
+
+
+* `Add shmMB config key for OpenPAI <https://github.com/microsoft/nni/issues/842>`__
+* Fix the bug that doesn't show any result if metrics is dict
+* Fix the number calculation issue for float types in hyperband
+* Fix a bug in the search space conversion in SMAC tuner
+* Fix the WebUI issue when parsing experiment.json with illegal format
+* Fix cold start issue in Metis Tuner
+
+Release 0.5.2 - 3/4/2019
+------------------------
+
+Improvements
+^^^^^^^^^^^^
+
+
+* Curve fitting assessor performance improvement.
+
+Documentation
+^^^^^^^^^^^^^
+
+
+* Chinese version document: https://nni.readthedocs.io/zh/latest/
+* Debuggability/serviceability document: https://nni.readthedocs.io/en/latest/Tutorial/HowToDebug.html
+* Tuner assessor reference: https://nni.readthedocs.io/en/latest/sdk_reference.html
+
+Bug Fixes and Other Changes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* Fix a race condition bug that does not store trial job cancel status correctly.
+* Fix search space parsing error when using SMAC tuner.
+* Fix cifar10 example broken pipe issue.
+* Add unit test cases for nnimanager and local training service.
+* Add integration test azure pipelines for remote machine, OpenPAI and kubeflow training services.
+* Support Pylon in OpenPAI webhdfs client.
+
+Release 0.5.1 - 1/31/2018
+-------------------------
+
+Improvements
+^^^^^^^^^^^^
+
+
+* Making `log directory <https://github.com/microsoft/nni/blob/v0.5.1/docs/ExperimentConfig.rst>`__ configurable
+* Support `different levels of logs <https://github.com/microsoft/nni/blob/v0.5.1/docs/ExperimentConfig.rst>`__\ , making it easier for debugging
+
+Documentation
+^^^^^^^^^^^^^
+
+
+* Reorganized documentation & New Homepage Released: https://nni.readthedocs.io/en/latest/
+
+Bug Fixes and Other Changes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* Fix the bug of installation in python virtualenv, and refactor the installation logic
+* Fix the bug of HDFS access failure on OpenPAI mode after OpenPAI is upgraded.
+* Fix the bug that sometimes in-place flushed stdout makes experiment crash
+
+Release 0.5.0 - 01/14/2019
+--------------------------
+
+Major Features
+^^^^^^^^^^^^^^
+
+New tuner and assessor supports
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* Support `Metis tuner <Tuner/MetisTuner.rst>`__ as a new NNI tuner. Metis algorithm has been proofed to be well performed for **online** hyper-parameter tuning.
+* Support `ENAS customized tuner <https://github.com/countif/enas_nni>`__\ , a tuner contributed by github community user, is an algorithm for neural network search, it could learn neural network architecture via reinforcement learning and serve a better performance than NAS.
+* Support `Curve fitting assessor <Assessor/CurvefittingAssessor.rst>`__ for early stop policy using learning curve extrapolation.
+* Advanced Support of `Weight Sharing <https://github.com/microsoft/nni/blob/v0.5/docs/AdvancedNAS.rst>`__\ : Enable weight sharing for NAS tuners, currently through NFS.
+
+Training Service Enhancement
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* `FrameworkController Training service <TrainingService/FrameworkControllerMode.rst>`__\ : Support run experiments using frameworkcontroller on kubernetes
+
+  * FrameworkController is a Controller on kubernetes that is general enough to run (distributed) jobs with various machine learning frameworks, such as tensorflow, pytorch, MXNet.
+  * NNI provides unified and simple specification for job definition.
+  * MNIST example for how to use FrameworkController.
+
+User Experience improvements
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* A better trial logging support for NNI experiments in OpenPAI, Kubeflow and FrameworkController mode:
+
+  * An improved logging architecture to send stdout/stderr of trials to NNI manager via Http post. NNI manager will store trial's stdout/stderr messages in local log file.
+  * Show the link for trial log file on WebUI.
+
+* Support to show final result's all key-value pairs.
+
+Release 0.4.1 - 12/14/2018
+--------------------------
+
+Major Features
+^^^^^^^^^^^^^^
+
+New tuner supports
+^^^^^^^^^^^^^^^^^^
+
+
+* Support `network morphism <Tuner/NetworkmorphismTuner.rst>`__ as a new tuner
+
+Training Service improvements
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* Migrate `Kubeflow training service <TrainingService/KubeflowMode.rst>`__\ 's dependency from kubectl CLI to `Kubernetes API <https://kubernetes.io/docs/concepts/overview/kubernetes-api/>`__ client
+* `Pytorch-operator <https://github.com/kubeflow/pytorch-operator>`__ support for Kubeflow training service
+* Improvement on local code files uploading to OpenPAI HDFS
+* Fixed OpenPAI integration WebUI bug: WebUI doesn't show latest trial job status, which is caused by OpenPAI token expiration
+
+NNICTL improvements
+^^^^^^^^^^^^^^^^^^^
+
+
+* Show version information both in nnictl and WebUI. You can run **nnictl -v** to show your current installed NNI version
+
+WebUI improvements
+^^^^^^^^^^^^^^^^^^
+
+
+* Enable modify concurrency number during experiment
+* Add feedback link to NNI github 'create issue' page
+* Enable customize top 10 trials regarding to metric numbers (largest or smallest)
+* Enable download logs for dispatcher & nnimanager
+* Enable automatic scaling of axes for metric number
+* Update annotation to support displaying real choice in searchspace
+
+New examples
+^^^^^^^^^^^^
+
+
+* `FashionMnist <https://github.com/microsoft/nni/tree/v1.9/examples/trials/network_morphism>`__\ , work together with network morphism tuner
+* `Distributed MNIST example <https://github.com/microsoft/nni/tree/v1.9/examples/trials/mnist-distributed-pytorch>`__ written in PyTorch
+
+Release 0.4 - 12/6/2018
+-----------------------
+
+Major Features
+^^^^^^^^^^^^^^
+
+
+* `Kubeflow Training service <TrainingService/KubeflowMode.rst>`__
+
+  * Support tf-operator
+  * `Distributed trial example <https://github.com/microsoft/nni/tree/v1.9/examples/trials/mnist-distributed/dist_mnist.py>`__ on Kubeflow
+
+* `Grid search tuner <Tuner/GridsearchTuner.rst>`__
+* `Hyperband tuner <Tuner/HyperbandAdvisor.rst>`__
+* Support launch NNI experiment on MAC
+* WebUI
+
+  * UI support for hyperband tuner
+  * Remove tensorboard button
+  * Show experiment error message
+  * Show line numbers in search space and trial profile
+  * Support search a specific trial by trial number
+  * Show trial's hdfsLogPath
+  * Download experiment parameters
+
+Others
+^^^^^^
+
+
+* Asynchronous dispatcher
+* Docker file update, add pytorch library
+* Refactor 'nnictl stop' process, send SIGTERM to nni manager process, rather than calling stop Rest API.
+* OpenPAI training service bug fix
+
+  * Support NNI Manager IP configuration(nniManagerIp) in OpenPAI cluster config file, to fix the issue that user’s machine has no eth0 device
+  * File number in codeDir is capped to 1000 now, to avoid user mistakenly fill root dir for codeDir
+  * Don’t print useless ‘metrics is empty’ log in OpenPAI job’s stdout. Only print useful message once new metrics are recorded, to reduce confusion when user checks OpenPAI trial’s output for debugging purpose
+  * Add timestamp at the beginning of each log entry in trial keeper.
+
+Release 0.3.0 - 11/2/2018
+-------------------------
+
+NNICTL new features and updates
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* 
+  Support running multiple experiments simultaneously.
+
+  Before v0.3, NNI only supports running single experiment once a time. After this release, users are able to run multiple experiments simultaneously. Each experiment will require a unique port, the 1st experiment will be set to the default port as previous versions. You can specify a unique port for the rest experiments as below:
+
+  .. code-block:: bash
+
+     nnictl create --port 8081 --config <config file path>
+
+* 
+  Support updating max trial number.
+  use ``nnictl update --help`` to learn more. Or refer to `NNICTL Spec <Tutorial/Nnictl.rst>`__ for the fully usage of NNICTL.
+
+API new features and updates
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* 
+  :raw-html:`<span style="color:red">**breaking change**</span>`\ : nn.get_parameters() is refactored to nni.get_next_parameter. All examples of prior releases can not run on v0.3, please clone nni repo to get new examples. If you had applied NNI to your own codes, please update the API accordingly.
+
+* 
+  New API **nni.get_sequence_id()**.
+  Each trial job is allocated a unique sequence number, which can be retrieved by nni.get_sequence_id() API.
+
+  .. code-block:: bash
+
+     git clone -b v0.3 https://github.com/microsoft/nni.git
+
+* 
+  **nni.report_final_result(result)** API supports more data types for result parameter.
+
+  It can be of following types:
+
+
+  * int
+  * float
+  * A python dict containing 'default' key, the value of 'default' key should be of type int or float. The dict can contain any other key value pairs.
+
+New tuner support
+^^^^^^^^^^^^^^^^^
+
+
+* **Batch Tuner** which iterates all parameter combination, can be used to submit batch trial jobs.
+
+New examples
+^^^^^^^^^^^^
+
+
+* 
+  A NNI Docker image for public usage:
+
+  .. code-block:: bash
+
+     docker pull msranni/nni:latest
+
+* 
+  New trial example: `NNI Sklearn Example <https://github.com/microsoft/nni/tree/v1.9/examples/trials/sklearn>`__
+
+* New competition example: `Kaggle Competition TGS Salt Example <https://github.com/microsoft/nni/tree/v1.9/examples/trials/kaggle-tgs-salt>`__
+
+Others
+^^^^^^
+
+
+* UI refactoring, refer to `WebUI doc <Tutorial/WebUI.rst>`__ for how to work with the new UI.
+* Continuous Integration: NNI had switched to Azure pipelines
+
+Release 0.2.0 - 9/29/2018
+-------------------------
+
+Major Features
+^^^^^^^^^^^^^^
+
+
+* Support `OpenPAI <https://github.com/microsoft/pai>`__ Training Platform (See `here <TrainingService/PaiMode.rst>`__ for instructions about how to submit NNI job in pai mode)
+
+  * Support training services on pai mode. NNI trials will be scheduled to run on OpenPAI cluster
+  * NNI trial's output (including logs and model file) will be copied to OpenPAI HDFS for further debugging and checking
+
+* Support `SMAC <https://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf>`__ tuner (See `here <Tuner/SmacTuner.rst>`__ for instructions about how to use SMAC tuner)
+
+  * `SMAC <https://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf>`__ is based on Sequential Model-Based Optimization (SMBO). It adapts the most prominent previously used model class (Gaussian stochastic process models) and introduces the model class of random forests to SMBO to handle categorical parameters. The SMAC supported by NNI is a wrapper on `SMAC3 <https://github.com/automl/SMAC3>`__
+
+* Support NNI installation on `conda <https://conda.io/docs/index.html>`__ and python virtual environment
+* Others
+
+  * Update ga squad example and related documentation
+  * WebUI UX small enhancement and bug fix
+
+Release 0.1.0 - 9/10/2018 (initial release)
+-------------------------------------------
+
+Initial release of Neural Network Intelligence (NNI).
+
+Major Features
+^^^^^^^^^^^^^^
+
+
+* Installation and Deployment
+
+  * Support pip install and source codes install
+  * Support training services on local mode(including Multi-GPU mode) as well as multi-machines mode
+
+* Tuners, Assessors and Trial
+
+  * Support AutoML algorithms including:  hyperopt_tpe, hyperopt_annealing, hyperopt_random, and evolution_tuner
+  * Support assessor(early stop) algorithms including: medianstop algorithm
+  * Provide Python API for user defined tuners and assessors
+  * Provide Python API for user to wrap trial code as NNI deployable codes
+
+* Experiments
+
+  * Provide a command line toolkit 'nnictl' for experiments management
+  * Provide a WebUI for viewing experiments details and managing experiments
+
+* Continuous Integration
+
+  * Support CI by providing out-of-box integration with `travis-ci <https://github.com/travis-ci>`__ on ubuntu
+
+* Others
+
+  * Support simple GPU job scheduling
diff --git a/docs/en_US/ResearchPublications.rst b/docs/en_US/ResearchPublications.rst
new file mode 100644
index 0000000000..bad00be3cc
--- /dev/null
+++ b/docs/en_US/ResearchPublications.rst
@@ -0,0 +1,146 @@
+Research and Publications
+=========================
+
+We are intensively working on both tool chain and research to make automatic model design and tuning really practical and powerful. On the one hand, our main work is tool chain oriented development. On the other hand, our research works aim to improve this tool chain, rethink challenging problems in AutoML (on both system and algorithm) and propose elegant solutions. Below we list some of our research works, we encourage more research works on this topic and encourage collaboration with us.
+
+System Research
+---------------
+
+
+* `Retiarii: A Deep Learning Exploratory-Training Framework <https://www.usenix.org/system/files/osdi20-zhang_quanlu.pdf>`__
+
+.. code-block:: bibtex
+
+   @inproceedings{zhang2020retiarii,
+     title={Retiarii: A Deep Learning Exploratory-Training Framework},
+     author={Zhang, Quanlu and Han, Zhenhua and Yang, Fan and Zhang, Yuge and Liu, Zhe and Yang, Mao and Zhou, Lidong},
+     booktitle={14th $\{$USENIX$\}$ Symposium on Operating Systems Design and Implementation ($\{$OSDI$\}$ 20)},
+     pages={919--936},
+     year={2020}
+   }
+
+
+* `AutoSys: The Design and Operation of Learning-Augmented Systems <https://www.usenix.org/system/files/atc20-liang-chieh-jan.pdf>`__
+
+.. code-block:: bibtex
+
+   @inproceedings{liang2020autosys,
+     title={AutoSys: The Design and Operation of Learning-Augmented Systems},
+     author={Liang, Chieh-Jan Mike and Xue, Hui and Yang, Mao and Zhou, Lidong and Zhu, Lifei and Li, Zhao Lucis and Wang, Zibo and Chen, Qi and Zhang, Quanlu and Liu, Chuanjie and others},
+     booktitle={2020 $\{$USENIX$\}$ Annual Technical Conference ($\{$USENIX$\}$$\{$ATC$\}$ 20)},
+     pages={323--336},
+     year={2020}
+   }
+
+
+* `Gandiva: Introspective Cluster Scheduling for Deep Learning <https://www.usenix.org/system/files/osdi18-xiao.pdf>`__
+
+.. code-block:: bibtex
+
+   @inproceedings{xiao2018gandiva,
+     title={Gandiva: Introspective cluster scheduling for deep learning},
+     author={Xiao, Wencong and Bhardwaj, Romil and Ramjee, Ramachandran and Sivathanu, Muthian and Kwatra, Nipun and Han, Zhenhua and Patel, Pratyush and Peng, Xuan and Zhao, Hanyu and Zhang, Quanlu and others},
+     booktitle={13th $\{$USENIX$\}$ Symposium on Operating Systems Design and Implementation ($\{$OSDI$\}$ 18)},
+     pages={595--610},
+     year={2018}
+   }
+
+Algorithm Research
+------------------
+
+New Algorithms
+^^^^^^^^^^^^^^
+
+
+* `TextNAS: A Neural Architecture Search Space Tailored for Text Representation <https://arxiv.org/pdf/1912.10729.pdf>`__
+
+.. code-block:: bibtex
+
+   @inproceedings{wang2020textnas,
+     title={TextNAS: A Neural Architecture Search Space Tailored for Text Representation.},
+     author={Wang, Yujing and Yang, Yaming and Chen, Yiren and Bai, Jing and Zhang, Ce and Su, Guinan and Kou, Xiaoyu and Tong, Yunhai and Yang, Mao and Zhou, Lidong},
+     booktitle={AAAI},
+     pages={9242--9249},
+     year={2020}
+   }
+
+
+* `Cream of the Crop: Distilling Prioritized Paths For One-Shot Neural Architecture Search <https://papers.nips.cc/paper/2020/file/d072677d210ac4c03ba046120f0802ec-Paper.pdf>`__
+
+.. code-block:: bibtex
+
+   @article{peng2020cream,
+     title={Cream of the Crop: Distilling Prioritized Paths For One-Shot Neural Architecture Search},
+     author={Peng, Houwen and Du, Hao and Yu, Hongyuan and Li, Qi and Liao, Jing and Fu, Jianlong},
+     journal={Advances in Neural Information Processing Systems},
+     volume={33},
+     year={2020}
+   }
+
+
+* `Metis: Robustly tuning tail latencies of cloud systems <https://www.usenix.org/system/files/conference/atc18/atc18-li-zhao.pdf>`__
+
+.. code-block:: bibtex
+
+   @inproceedings{li2018metis,
+     title={Metis: Robustly tuning tail latencies of cloud systems},
+     author={Li, Zhao Lucis and Liang, Chieh-Jan Mike and He, Wenjia and Zhu, Lianjie and Dai, Wenjun and Jiang, Jin and Sun, Guangzhong},
+     booktitle={2018 $\{$USENIX$\}$ Annual Technical Conference ($\{$USENIX$\}$$\{$ATC$\}$ 18)},
+     pages={981--992},
+     year={2018}
+   }
+
+
+* `OpEvo: An Evolutionary Method for Tensor Operator Optimization <https://arxiv.org/abs/2006.05664>`__
+
+.. code-block:: bibtex
+
+   @article{gao2020opevo,
+     title={OpEvo: An Evolutionary Method for Tensor Operator Optimization},
+     author={Gao, Xiaotian and Wei, Cui and Zhang, Lintao and Yang, Mao},
+     journal={arXiv preprint arXiv:2006.05664},
+     year={2020}
+   }
+
+Measurement and Understanding
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* `Deeper insights into weight sharing in neural architecture search <https://arxiv.org/pdf/2001.01431.pdf>`__
+
+.. code-block:: bibtex
+
+   @article{zhang2020deeper,
+     title={Deeper insights into weight sharing in neural architecture search},
+     author={Zhang, Yuge and Lin, Zejun and Jiang, Junyang and Zhang, Quanlu and Wang, Yujing and Xue, Hui and Zhang, Chen and Yang, Yaming},
+     journal={arXiv preprint arXiv:2001.01431},
+     year={2020}
+   }
+
+
+* `How Does Supernet Help in Neural Architecture Search? <https://arxiv.org/abs/2010.08219>`__
+
+.. code-block:: bibtex
+
+   @article{zhang2020does,
+     title={How Does Supernet Help in Neural Architecture Search?},
+     author={Zhang, Yuge and Zhang, Quanlu and Yang, Yaming},
+     journal={arXiv preprint arXiv:2010.08219},
+     year={2020}
+   }
+
+Applications
+^^^^^^^^^^^^
+
+
+* `AutoADR: Automatic Model Design for Ad Relevance <https://arxiv.org/pdf/2010.07075.pdf>`__
+
+.. code-block:: bibtex
+
+   @inproceedings{chen2020autoadr,
+     title={AutoADR: Automatic Model Design for Ad Relevance},
+     author={Chen, Yiren and Yang, Yaming and Sun, Hong and Wang, Yujing and Xu, Yu and Shen, Wei and Zhou, Rong and Tong, Yunhai and Bai, Jing and Zhang, Ruofei},
+     booktitle={Proceedings of the 29th ACM International Conference on Information \& Knowledge Management},
+     pages={2365--2372},
+     year={2020}
+   }
diff --git a/docs/en_US/SupportedFramework_Library.rst b/docs/en_US/SupportedFramework_Library.rst
new file mode 100644
index 0000000000..bf8c4b5e33
--- /dev/null
+++ b/docs/en_US/SupportedFramework_Library.rst
@@ -0,0 +1,79 @@
+.. role:: raw-html(raw)
+   :format: html
+
+
+Framework and Library Supports
+==============================
+
+With the built-in Python API, NNI naturally supports the hyper parameter tuning and neural network search for all the AI frameworks and libraries who support Python models(\ ``version >= 3.6``\ ). NNI had also provided a set of examples and tutorials for some of the popular scenarios to make jump start easier.
+
+Supported AI Frameworks
+-----------------------
+
+
+* :raw-html:`<b>[PyTorch]</b>` https://github.com/pytorch/pytorch
+
+  .. raw:: html
+
+     <ul> 
+         <li><a href="../../examples/trials/mnist-distributed-pytorch">MNIST-pytorch</a><br/></li>
+         <li><a href="TrialExample/Cifar10Examples.md">CIFAR-10</a><br/></li>
+         <li><a href="../../examples/trials/kaggle-tgs-salt/README.md">TGS salt identification chanllenge</a><br/></li>
+         <li><a href="../../examples/trials/network_morphism/README.md">Network_morphism</a><br/></li>
+       </ul>
+
+
+* :raw-html:`<b>[TensorFlow]</b>` https://github.com/tensorflow/tensorflow
+
+  .. raw:: html
+
+     <ul> 
+         <li><a href="../../examples/trials/mnist-distributed">MNIST-tensorflow</a><br/></li>
+          <li><a href="../../examples/trials/ga_squad/README.md">Squad</a><br/></li>
+       </ul>
+
+
+* :raw-html:`<b>[Keras]</b>` https://github.com/keras-team/keras
+
+  .. raw:: html
+
+     <ul>
+         <li><a href="../../examples/trials/mnist-keras">MNIST-keras</a><br/></li>
+         <li><a href="../../examples/trials/network_morphism/README.md">Network_morphism</a><br/></li>
+       </ul>
+
+
+* :raw-html:`<b>[MXNet]</b>` https://github.com/apache/incubator-mxnet
+* :raw-html:`<b>[Caffe2]</b>` https://github.com/BVLC/caffe
+* :raw-html:`<b>[CNTK (Python language)]</b>` https://github.com/microsoft/CNTK
+* :raw-html:`<b>[Spark MLlib]</b>` http://spark.apache.org/mllib/
+* :raw-html:`<b>[Chainer]</b>` https://chainer.org/
+* :raw-html:`<b>[Theano]</b>` https://pypi.org/project/Theano/ :raw-html:`<br/>`
+
+You are encouraged to `contribute more examples <Tutorial/Contributing.rst>`__ for other NNI users. 
+
+Supported Library
+-----------------
+
+NNI also supports all libraries written in python.Here are some common libraries, including some algorithms based on GBDT: XGBoost, CatBoost and lightGBM.
+
+
+* :raw-html:`<b>[Scikit-learn]</b>` https://scikit-learn.org/stable/
+
+  .. raw:: html
+
+     <ul>
+       <li><a href="TrialExample/SklearnExamples.md">Scikit-learn</a><br/></li>
+       </ul>
+
+
+* :raw-html:`<b>[XGBoost]</b>` https://xgboost.readthedocs.io/en/latest/
+* :raw-html:`<b>[CatBoost]</b>` https://catboost.ai/
+* :raw-html:`<b>[LightGBM]</b>` https://lightgbm.readthedocs.io/en/latest/
+    :raw-html:`<ul>
+    <li><a href="TrialExample/GbdtExample.md">Auto-gbdt</a><br/></li>
+    </ul>`
+
+Here is just a small list of libraries that supported by NNI. If you are interested in NNI, you can refer to the `tutorial <TrialExample/Trials.rst>`__ to complete your own hacks.
+
+In addition to the above examples, we also welcome more and more users to apply NNI to your own work, if you have any doubts, please refer `Write a Trial Run on NNI <TrialExample/Trials.md>`__. In particular, if you want to be a contributor of NNI, whether it is the sharing of examples , writing of Tuner or otherwise, we are all looking forward to your participation.More information please refer to `here <Tutorial/Contributing.rst>`__.
diff --git a/docs/en_US/TrainingService/AMLMode.rst b/docs/en_US/TrainingService/AMLMode.rst
new file mode 100644
index 0000000000..27fea2e9e8
--- /dev/null
+++ b/docs/en_US/TrainingService/AMLMode.rst
@@ -0,0 +1,128 @@
+**Run an Experiment on Azure Machine Learning**
+===================================================
+
+NNI supports running an experiment on `AML <https://azure.microsoft.com/en-us/services/machine-learning/>`__ , called aml mode.
+
+Setup environment
+-----------------
+
+Step 1. Install NNI, follow the install guide `here <../Tutorial/QuickStart.rst>`__.   
+
+Step 2. Create an Azure account/subscription using this `link <https://azure.microsoft.com/en-us/free/services/machine-learning/>`__. If you already have an Azure account/subscription, skip this step.
+
+Step 3. Install the Azure CLI on your machine, follow the install guide `here <https://docs.microsoft.com/en-us/cli/azure/install-azure-cli?view=azure-cli-latest>`__.
+
+Step 4. Authenticate to your Azure subscription from the CLI. To authenticate interactively, open a command line or terminal and use the following command:
+
+.. code-block:: bash
+
+   az login
+
+Step 5. Log into your Azure account with a web browser and create a Machine Learning resource. You will need to choose a resource group and specific a workspace name. Then download ``config.json`` which will be used later.
+
+.. image:: ../../img/aml_workspace.png
+   :target: ../../img/aml_workspace.png
+   :alt: 
+
+
+Step 6. Create an AML cluster as the computeTarget.
+
+.. image:: ../../img/aml_cluster.png
+   :target: ../../img/aml_cluster.png
+   :alt: 
+
+
+Step 7. Open a command line and install AML package environment.
+
+.. code-block:: bash
+
+   python3 -m pip install azureml
+   python3 -m pip install azureml-sdk
+
+Run an experiment
+-----------------
+
+Use ``examples/trials/mnist-tfv1`` as an example. The NNI config YAML file's content is like:
+
+.. code-block:: yaml
+
+   authorName: default
+   experimentName: example_mnist
+   trialConcurrency: 1
+   maxExecDuration: 1h
+   maxTrialNum: 10
+   trainingServicePlatform: aml
+   searchSpacePath: search_space.json
+   #choice: true, false
+   useAnnotation: false
+   tuner:
+     #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner
+     #SMAC (SMAC should be installed through nnictl)
+     builtinTunerName: TPE
+     classArgs:
+       #choice: maximize, minimize
+       optimize_mode: maximize
+   trial:
+     command: python3 mnist.py
+     codeDir: .
+     image: msranni/nni
+     gpuNum: 1
+   amlConfig:
+     subscriptionId: ${replace_to_your_subscriptionId}
+     resourceGroup: ${replace_to_your_resourceGroup}
+     workspaceName: ${replace_to_your_workspaceName}
+     computeTarget: ${replace_to_your_computeTarget}
+
+Note: You should set ``trainingServicePlatform: aml`` in NNI config YAML file if you want to start experiment in aml mode.
+
+Compared with `LocalMode <LocalMode.rst>`__ trial configuration in aml mode have these additional keys:
+
+
+* image
+
+  * required key. The docker image name used in job. NNI support image ``msranni/nni`` for running aml jobs.
+    .. code-block:: bash
+
+       Note: This image is build based on cuda environment, may not be suitable for CPU clusters in AML.
+
+amlConfig:
+
+
+* subscriptionId
+
+  * required key, the subscriptionId of your account
+
+* resourceGroup
+
+  * required key, the resourceGroup of your account
+
+* workspaceName
+
+  * required key, the workspaceName of your account
+
+* computeTarget
+
+  * required key, the compute cluster name you want to use in your AML workspace. `refer <https://docs.microsoft.com/en-us/azure/machine-learning/concept-compute-target>`__ See Step 6.
+
+* maxTrialNumPerGpu
+
+  * optional key, default 1. Used to specify the max concurrency trial number on a GPU device.
+
+* useActiveGpu
+
+  * optional key, default false. Used to specify whether to use a GPU if there is another process. By default, NNI will use the GPU only if there is no other active process in the GPU.
+
+The required information of amlConfig could be found in the downloaded ``config.json`` in Step 5.
+
+Run the following commands to start the example experiment:
+
+.. code-block:: bash
+
+   git clone -b ${NNI_VERSION} https://github.com/microsoft/nni
+   cd nni/examples/trials/mnist-tfv1
+
+   # modify config_aml.yml ...
+
+   nnictl create --config config_aml.yml
+
+Replace ``${NNI_VERSION}`` with a released version name or branch name, e.g., ``v1.9``.
diff --git a/docs/en_US/TrainingService/AdaptDLMode.rst b/docs/en_US/TrainingService/AdaptDLMode.rst
new file mode 100644
index 0000000000..3b8f547e9f
--- /dev/null
+++ b/docs/en_US/TrainingService/AdaptDLMode.rst
@@ -0,0 +1,196 @@
+Run an Experiment on AdaptDL
+============================
+
+Now NNI supports running experiment on `AdaptDL <https://github.com/petuum/adaptdl>`__. Before starting to use NNI AdaptDL mode, you should have a Kubernetes cluster, either on-premises or `Azure Kubernetes Service(AKS) <https://azure.microsoft.com/en-us/services/kubernetes-service/>`__\ , a Ubuntu machine on which `kubeconfig <https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/>`__ is setup to connect to your Kubernetes cluster. In AdaptDL mode, your trial program will run as AdaptDL job in Kubernetes cluster.
+
+AdaptDL aims to make distributed deep learning easy and efficient in dynamic-resource environments such as shared clusters and the cloud.
+
+Prerequisite for Kubernetes Service
+-----------------------------------
+
+
+#. A **Kubernetes** cluster using Kubernetes 1.14 or later with storage. Follow this guideline to set up Kubernetes `on Azure <https://azure.microsoft.com/en-us/services/kubernetes-service/>`__\ , or `on-premise <https://kubernetes.io/docs/setup/>`__ with `cephfs <https://kubernetes.io/docs/concepts/storage/storage-classes/#ceph-rbd>`__\ , or `microk8s with storage add-on enabled <https://microk8s.io/docs/addons>`__.
+#. Helm install **AdaptDL Scheduler** to your Kubernetes cluster. Follow this `guideline <https://adaptdl.readthedocs.io/en/latest/installation/install-adaptdl.html>`__ to setup AdaptDL scheduler.
+#. Prepare a **kubeconfig** file, which will be used by NNI to interact with your Kubernetes API server. By default, NNI manager will use $(HOME)/.kube/config as kubeconfig file's path. You can also specify other kubeconfig files by setting the** KUBECONFIG** environment variable. Refer this `guideline <https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig>`__ to learn more about kubeconfig.
+#. If your NNI trial job needs GPU resource, you should follow this `guideline <https://github.com/NVIDIA/k8s-device-plugin>`__ to configure **Nvidia device plugin for Kubernetes**.
+#. (Optional) Prepare a **NFS server** and export a general purpose mount as external storage.
+#. Install **NNI**\ , follow the install guide `here <../Tutorial/QuickStart.rst>`__.
+
+Verify Prerequisites
+^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: bash
+
+   nnictl --version
+   # Expected: <version_number>
+
+.. code-block:: bash
+
+   kubectl version
+   # Expected that the kubectl client version matches the server version.
+
+.. code-block:: bash
+
+   kubectl api-versions | grep adaptdl
+   # Expected: adaptdl.petuum.com/v1
+
+Run an experiment
+-----------------
+
+We have a CIFAR10 example that fully leverages the AdaptDL scheduler under ``examples/trials/cifar10_pytorch`` folder. (\ ``main_adl.py`` and ``config_adl.yaml``\ )
+
+Here is a template configuration specification to use AdaptDL as a training service.
+
+.. code-block:: yaml
+
+   authorName: default
+   experimentName: minimal_adl
+
+   trainingServicePlatform: adl
+   nniManagerIp: 10.1.10.11
+   logCollection: http
+
+   tuner:
+     builtinTunerName: GridSearch
+   searchSpacePath: search_space.json
+
+   trialConcurrency: 2
+   maxTrialNum: 2
+
+   trial:
+     adaptive: false # optional.
+     image: <image_tag>
+     imagePullSecrets:  # optional
+       - name: stagingsecret
+     codeDir: .
+     command: python main.py
+     gpuNum: 1
+     cpuNum: 1  # optional
+     memorySize: 8Gi  # optional
+     nfs: # optional
+       server: 10.20.41.55
+       path: /
+       containerMountPath: /nfs
+     checkpoint: # optional
+       storageClass: microk8s-hostpath
+       storageSize: 1Gi
+
+Those configs not mentioned below, are following the
+`default specs defined in the NNI doc </Tutorial/ExperimentConfig.html#configuration-spec>`__.
+
+
+* **trainingServicePlatform**\ : Choose ``adl`` to use the Kubernetes cluster with AdaptDL scheduler.
+* **nniManagerIp**\ : *Required* to get the correct info and metrics back from the cluster, for ``adl`` training service.
+  IP address of the machine with NNI manager (NNICTL) that launches NNI experiment.
+* **logCollection**\ : *Recommended* to set as ``http``. It will collect the trial logs on cluster back to your machine via http.
+* **tuner**\ : It supports the Tuun tuner and all NNI built-in tuners (only except for the checkpoint feature of the NNI PBT tuners).
+* **trial**\ : It defines the specs of an ``adl`` trial.
+
+  * **adaptive**\ : (*Optional*\ ) Boolean for AdaptDL trainer. While ``true``\ , it the job is preemptible and adaptive.
+  * **image**\ : Docker image for the trial
+  * **imagePullSecret**\ : (*Optional*\ ) If you are using a private registry,
+    you need to provide the secret to successfully pull the image.
+  * **codeDir**\ : the working directory of the container. ``.`` means the default working directory defined by the image.
+  * **command**\ : the bash command to start the trial
+  * **gpuNum**\ : the number of GPUs requested for this trial. It must be non-negative integer.
+  * **cpuNum**\ : (*Optional*\ ) the number of CPUs requested for this trial.  It must be non-negative integer.
+  * **memorySize**\ : (*Optional*\ ) the size of memory requested for this trial. It must follow the Kubernetes
+    `default format <https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#meaning-of-memory>`__.
+  * **nfs**\ : (*Optional*\ ) mounting external storage. For more information about using NFS please check the below paragraph.
+  * **checkpoint** (*Optional*\ ) `storage settings <https://kubernetes.io/docs/concepts/storage/storage-classes/>`__ for AdaptDL internal checkpoints. You can keep it optional if you are not dev users.
+
+NFS Storage
+^^^^^^^^^^^
+
+As you may have noticed in the above configuration spec,
+an *optional* section is available to configure NFS external storage. It is optional when no external storage is required, when for example an docker image is sufficient with codes and data inside.
+
+Note that ``adl`` training service does NOT help mount an NFS to the local dev machine, so that one can manually mount it to local, manage the filesystem, copy the data or code etc.
+The ``adl`` training service can then mount it to the kubernetes for every trials, with the proper configurations:
+
+
+* **server**\ : NFS server address, e.g. IP address or domain
+* **path**\ : NFS server export path, i.e. the absolute path in NFS that can be mounted to trials
+* **containerMountPath**\ : In container absolute path to mount the NFS **path** above,
+  so that every trial will have the access to the NFS.
+  In the trial containers, you can access the NFS with this path.
+
+Use cases:
+
+
+* If your training trials depend on a dataset of large size, you may want to download it first onto the NFS first,
+  and mount it so that it can be shared across multiple trials.
+* The storage for containers are ephemeral and the trial containers will be deleted after a trial's lifecycle is over.
+  So if you want to export your trained models,
+  you may mount the NFS to the trial to persist and export your trained models.
+
+In short, it is not limited how a trial wants to read from or write on the NFS storage, so you may use it flexibly as per your needs.
+
+Monitor via Log Stream
+----------------------
+
+Follow the log streaming of a certain trial:
+
+.. code-block:: bash
+
+   nnictl log trial --trial_id=<trial_id>
+
+.. code-block:: bash
+
+   nnictl log trial <experiment_id> --trial_id=<trial_id>
+
+Note that *after* a trial has done and its pod has been deleted,
+no logs can be retrieved then via this command.
+However you may still be able to access the past trial logs
+according to the following approach.
+
+Monitor via TensorBoard
+-----------------------
+
+In the context of NNI, an experiment has multiple trials.
+For easy comparison across trials for a model tuning process,
+we support TensorBoard integration. Here one experiment has
+an independent TensorBoard logging directory thus dashboard.
+
+You can only use the TensorBoard while the monitored experiment is running.
+In other words, it is not supported to monitor stopped experiments.
+
+In the trial container you may have access to two environment variables:
+
+
+* ``ADAPTDL_TENSORBOARD_LOGDIR``\ : the TensorBoard logging directory for the current experiment,
+* ``NNI_TRIAL_JOB_ID``\ : the ``trial`` job id for the current trial.
+
+It is recommended for to have them joined as the directory for trial,
+for example in Python:
+
+.. code-block:: python
+
+   import os
+   tensorboard_logdir = os.path.join(
+       os.getenv("ADAPTDL_TENSORBOARD_LOGDIR"),
+       os.getenv("NNI_TRIAL_JOB_ID")
+   )
+
+If an experiment is stopped, the data logged here
+(defined by *the above envs* for monitoring with the following commands)
+will be lost. To persist the logged data, you can use the external storage (e.g. to mount an NFS)
+to export it and view the TensorBoard locally.
+
+With the above setting, you can monitor the experiment easily
+via TensorBoard by
+
+.. code-block:: bash
+
+   nnictl tensorboard start
+
+If having multiple experiment running at the same time, you may use
+
+.. code-block:: bash
+
+   nnictl tensorboard start <experiment_id>
+
+It will provide you the web url to access the tensorboard.
+
+Note that you have the flexibility to set up the local ``--port``
+for the TensorBoard.
diff --git a/docs/en_US/TrainingService/DLTSMode.rst b/docs/en_US/TrainingService/DLTSMode.rst
new file mode 100644
index 0000000000..87be8befb4
--- /dev/null
+++ b/docs/en_US/TrainingService/DLTSMode.rst
@@ -0,0 +1,67 @@
+**Run an Experiment on DLTS**
+=================================
+
+NNI supports running an experiment on `DLTS <https://github.com/microsoft/DLWorkspace.git>`__\ , called dlts mode. Before starting to use NNI dlts mode, you should have an account to access DLTS dashboard.
+
+Setup Environment
+-----------------
+
+Step 1. Choose a cluster from DLTS dashboard, ask administrator for the cluster dashboard URL.
+
+
+.. image:: ../../img/dlts-step1.png
+   :target: ../../img/dlts-step1.png
+   :alt: Choose Cluster
+
+
+Step 2. Prepare a NNI config YAML like the following:
+
+.. code-block:: yaml
+
+   # Set this field to "dlts"
+   trainingServicePlatform: dlts
+   authorName: your_name
+   experimentName: auto_mnist
+   trialConcurrency: 2
+   maxExecDuration: 3h
+   maxTrialNum: 100
+   searchSpacePath: search_space.json
+   useAnnotation: false
+   tuner:
+     builtinTunerName: TPE
+     classArgs:
+       optimize_mode: maximize
+   trial:
+     command: python3 mnist.py
+     codeDir: .
+     gpuNum: 1
+     image: msranni/nni
+   # Configuration to access DLTS
+   dltsConfig:
+     dashboard: # Ask administrator for the cluster dashboard URL
+
+Remember to fill the cluster dashboard URL to the last line.
+
+Step 3. Open your working directory of the cluster, paste the NNI config as well as related code to a directory.
+
+
+.. image:: ../../img/dlts-step3.png
+   :target: ../../img/dlts-step3.png
+   :alt: Copy Config
+
+
+Step 4. Submit a NNI manager job to the specified cluster.
+
+
+.. image:: ../../img/dlts-step4.png
+   :target: ../../img/dlts-step4.png
+   :alt: Submit Job
+
+
+Step 5. Go to Endpoints tab of the newly created job, click the Port 40000 link to check trial's information.
+
+
+.. image:: ../../img/dlts-step5.png
+   :target: ../../img/dlts-step5.png
+   :alt: View NNI WebUI
+
diff --git a/docs/en_US/TrainingService/FrameworkControllerMode.rst b/docs/en_US/TrainingService/FrameworkControllerMode.rst
new file mode 100644
index 0000000000..969244b012
--- /dev/null
+++ b/docs/en_US/TrainingService/FrameworkControllerMode.rst
@@ -0,0 +1,128 @@
+Run an Experiment on FrameworkController
+========================================
+
+===
+NNI supports running experiment using `FrameworkController <https://github.com/Microsoft/frameworkcontroller>`__\ , called frameworkcontroller mode. FrameworkController is built to orchestrate all kinds of applications on Kubernetes, you don't need to install Kubeflow for specific deep learning framework like tf-operator or pytorch-operator. Now you can use FrameworkController as the training service to run NNI experiment.
+
+Prerequisite for on-premises Kubernetes Service
+-----------------------------------------------
+
+
+#. A **Kubernetes** cluster using Kubernetes 1.8 or later. Follow this `guideline <https://kubernetes.io/docs/setup/>`__ to set up Kubernetes
+#. Prepare a **kubeconfig** file, which will be used by NNI to interact with your Kubernetes API server. By default, NNI manager will use $(HOME)/.kube/config as kubeconfig file's path. You can also specify other kubeconfig files by setting the**KUBECONFIG** environment variable. Refer this `guideline <https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig>`__ to learn more about kubeconfig.
+#. If your NNI trial job needs GPU resource, you should follow this `guideline <https://github.com/NVIDIA/k8s-device-plugin>`__ to configure **Nvidia device plugin for Kubernetes**.
+#. Prepare a **NFS server** and export a general purpose mount (we recommend to map your NFS server path in ``root_squash option``\ , otherwise permission issue may raise when NNI copies files to NFS. Refer this `page <https://linux.die.net/man/5/exports>`__ to learn what root_squash option is), or **Azure File Storage**.
+#. Install **NFS client** on the machine where you install NNI and run nnictl to create experiment. Run this command to install NFSv4 client:
+
+.. code-block:: bash
+
+    apt-get install nfs-common
+
+#. Install **NNI**\ , follow the install guide `here <../Tutorial/QuickStart>`__.
+
+Prerequisite for Azure Kubernetes Service
+-----------------------------------------
+
+
+#. NNI support Kubeflow based on Azure Kubernetes Service, follow the `guideline <https://azure.microsoft.com/en-us/services/kubernetes-service/>`__ to set up Azure Kubernetes Service.
+#. Install `Azure CLI <https://docs.microsoft.com/en-us/cli/azure/install-azure-cli?view=azure-cli-latest>`__ and **kubectl**.  Use ``az login`` to set azure account, and connect kubectl client to AKS, refer this `guideline <https://docs.microsoft.com/en-us/azure/aks/kubernetes-walkthrough#connect-to-the-cluster>`__.
+#. Follow the `guideline <https://docs.microsoft.com/en-us/azure/storage/common/storage-quickstart-create-account?tabs=portal>`__ to create azure file storage account. If you use Azure Kubernetes Service, NNI need Azure Storage Service to store code files and the output files.
+#. To access Azure storage service, NNI need the access key of the storage account, and NNI uses `Azure Key Vault <https://azure.microsoft.com/en-us/services/key-vault/>`__ Service to protect your private key. Set up Azure Key Vault Service, add a secret to Key Vault to store the access key of Azure storage account. Follow this `guideline <https://docs.microsoft.com/en-us/azure/key-vault/quick-create-cli>`__ to store the access key.
+
+Setup FrameworkController
+-------------------------
+
+Follow the `guideline <https://github.com/Microsoft/frameworkcontroller/tree/master/example/run>`__ to set up FrameworkController in the Kubernetes cluster, NNI supports FrameworkController by the stateful set mode. If your cluster enforces authorization, you need to create a service account with granted permission for FrameworkController, and then pass the name of the FrameworkController service account to the NNI Experiment Config. `refer <https://github.com/Microsoft/frameworkcontroller/tree/master/example/run#run-by-kubernetes-statefulset>`__
+
+Design
+------
+
+Please refer the design of `Kubeflow training service <KubeflowMode.rst>`__\ , FrameworkController training service pipeline is similar.
+
+Example
+-------
+
+The FrameworkController config file format is:
+
+.. code-block:: yaml
+
+   authorName: default
+   experimentName: example_mnist
+   trialConcurrency: 1
+   maxExecDuration: 10h
+   maxTrialNum: 100
+   #choice: local, remote, pai, kubeflow, frameworkcontroller
+   trainingServicePlatform: frameworkcontroller
+   searchSpacePath: ~/nni/examples/trials/mnist-tfv1/search_space.json
+   #choice: true, false
+   useAnnotation: false
+   tuner:
+     #choice: TPE, Random, Anneal, Evolution
+     builtinTunerName: TPE
+     classArgs:
+       #choice: maximize, minimize
+       optimize_mode: maximize
+   assessor:
+     builtinAssessorName: Medianstop
+     classArgs:
+       optimize_mode: maximize
+   trial:
+     codeDir: ~/nni/examples/trials/mnist-tfv1
+     taskRoles:
+       - name: worker
+         taskNum: 1
+         command: python3 mnist.py
+         gpuNum: 1
+         cpuNum: 1
+         memoryMB: 8192
+         image: msranni/nni:latest
+         frameworkAttemptCompletionPolicy:
+           minFailedTaskCount: 1
+           minSucceededTaskCount: 1
+   frameworkcontrollerConfig:
+     storage: nfs
+     nfs:
+       server: {your_nfs_server}
+       path: {your_nfs_server_exported_path}
+
+If you use Azure Kubernetes Service, you should  set ``frameworkcontrollerConfig`` in your config YAML file as follows:
+
+.. code-block:: yaml
+
+   frameworkcontrollerConfig:
+     storage: azureStorage
+     serviceAccountName: {your_frameworkcontroller_service_account_name}
+     keyVault:
+       vaultName: {your_vault_name}
+       name: {your_secert_name}
+     azureStorage:
+       accountName: {your_storage_account_name}
+       azureShare: {your_azure_share_name}
+
+Note: You should explicitly set ``trainingServicePlatform: frameworkcontroller`` in NNI config YAML file if you want to start experiment in frameworkcontrollerConfig mode.
+
+The trial's config format for NNI frameworkcontroller mode is a simple version of FrameworkController's official config, you could refer the `Tensorflow example of FrameworkController <https://github.com/Microsoft/frameworkcontroller/blob/master/example/framework/scenario/tensorflow/cpu/tensorflowdistributedtrainingwithcpu.yaml>`__ for deep understanding.
+
+Trial configuration in frameworkcontroller mode have the following configuration keys:
+
+
+* taskRoles: you could set multiple task roles in config file, and each task role is a basic unit to process in Kubernetes cluster.
+
+  * name: the name of task role specified, like "worker", "ps", "master".
+  * taskNum: the replica number of the task role.
+  * command: the users' command to be used in the container.
+  * gpuNum: the number of gpu device used in container.
+  * cpuNum: the number of cpu device used in container.
+  * memoryMB: the memory limitaion to be specified in container.
+  * image: the docker image used to create pod and run the program.
+  * frameworkAttemptCompletionPolicy: the policy to run framework, please refer the `user-manual <https://github.com/Microsoft/frameworkcontroller/blob/master/doc/user-manual.rst#frameworkattemptcompletionpolicy>`__ to get the specific information. Users could use the policy to control the pod, for example, if ps does not stop, only worker stops, The completion policy could helps stop ps.
+
+How to run example
+------------------
+
+After you prepare a config file, you could run your experiment by nnictl. The way to start an experiment on FrameworkController is similar to Kubeflow, please refer the `document <KubeflowMode.rst>`__ for more information.
+
+version check
+-------------
+
+NNI support version check feature in since version 0.6, `refer <PaiMode.rst>`__
diff --git a/docs/en_US/TrainingService/HowToImplementTrainingService.rst b/docs/en_US/TrainingService/HowToImplementTrainingService.rst
new file mode 100644
index 0000000000..049690011b
--- /dev/null
+++ b/docs/en_US/TrainingService/HowToImplementTrainingService.rst
@@ -0,0 +1,190 @@
+How to Implement Training Service in NNI
+========================================
+
+Overview
+--------
+
+TrainingService is a module related to platform management and job schedule in NNI. TrainingService is designed to be easily implemented, we define an abstract class TrainingService as the parent class of all kinds of TrainingService, users just need to inherit the parent class and complete their own child class if they want to implement customized TrainingService.
+
+System architecture
+-------------------
+
+
+.. image:: ../../img/NNIDesign.jpg
+   :target: ../../img/NNIDesign.jpg
+   :alt: 
+
+
+The brief system architecture of NNI is shown in the picture. NNIManager is the core management module of system, in charge of calling TrainingService to manage trial jobs and the communication between different modules. Dispatcher is a message processing center responsible for message dispatch. TrainingService is a module to manage trial jobs, it communicates with nniManager module, and has different instance according to different training platform. For the time being, NNI supports `local platfrom <LocalMode.md>`__\ , `remote platfrom <RemoteMachineMode.md>`__\ , `PAI platfrom <PaiMode.md>`__\ , `kubeflow platform <KubeflowMode.md>`__ and `FrameworkController platfrom <FrameworkControllerMode.rst>`__.
+
+In this document, we introduce the brief design of TrainingService. If users want to add a new TrainingService instance, they just need to complete a child class to implement TrainingService, don't need to understand the code detail of NNIManager, Dispatcher or other modules.
+
+Folder structure of code
+------------------------
+
+NNI's folder structure is shown below:
+
+.. code-block:: bash
+
+   nni
+     |- deployment
+     |- docs
+     |- examaples
+     |- src
+     | |- nni_manager
+     | | |- common
+     | | |- config
+     | | |- core
+     | | |- coverage
+     | | |- dist
+     | | |- rest_server
+     | | |- training_service
+     | | | |- common
+     | | | |- kubernetes
+     | | | |- local
+     | | | |- pai
+     | | | |- remote_machine
+     | | | |- test
+     | |- sdk
+     | |- webui
+     |- test
+     |- tools
+     | |-nni_annotation
+     | |-nni_cmd
+     | |-nni_gpu_tool
+     | |-nni_trial_tool
+
+``nni/src/`` folder stores the most source code of NNI. The code in this folder is related to NNIManager, TrainingService, SDK, WebUI and other modules. Users could find the abstract class of TrainingService in ``nni/src/nni_manager/common/trainingService.ts`` file, and they should put their own implemented TrainingService in ``nni/src/nni_manager/training_service`` folder. If users have implemented their own TrainingService code, they should also supplement the unit test of the code, and place them in ``nni/src/nni_manager/training_service/test`` folder.
+
+Function annotation of TrainingService
+--------------------------------------
+
+.. code-block:: bash
+
+   abstract class TrainingService {
+       public abstract listTrialJobs(): Promise<TrialJobDetail[]>;
+       public abstract getTrialJob(trialJobId: string): Promise<TrialJobDetail>;
+       public abstract addTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void;
+       public abstract removeTrialJobMetricListener(listener: (metric: TrialJobMetric) => void): void;
+       public abstract submitTrialJob(form: JobApplicationForm): Promise<TrialJobDetail>;
+       public abstract updateTrialJob(trialJobId: string, form: JobApplicationForm): Promise<TrialJobDetail>;
+       public abstract get isMultiPhaseJobSupported(): boolean;
+       public abstract cancelTrialJob(trialJobId: string, isEarlyStopped?: boolean): Promise<void>;
+       public abstract setClusterMetadata(key: string, value: string): Promise<void>;
+       public abstract getClusterMetadata(key: string): Promise<string>;
+       public abstract cleanUp(): Promise<void>;
+       public abstract run(): Promise<void>;
+   }
+
+The parent class of TrainingService has a few abstract functions, users need to inherit the parent class and implement all of these abstract functions.
+
+**setClusterMetadata(key: string, value: string)**
+
+ClusterMetadata is the data related to platform details, for examples, the ClusterMetadata defined in remote machine server is:
+
+.. code-block:: bash
+
+   export class RemoteMachineMeta {
+       public readonly ip : string;
+       public readonly port : number;
+       public readonly username : string;
+       public readonly passwd?: string;
+       public readonly sshKeyPath?: string;
+       public readonly passphrase?: string;
+       public gpuSummary : GPUSummary | undefined;
+       /* GPU Reservation info, the key is GPU index, the value is the job id which reserves this GPU*/
+       public gpuReservation : Map<number, string>;
+
+       constructor(ip : string, port : number, username : string, passwd : string,
+           sshKeyPath : string, passphrase : string) {
+           this.ip = ip;
+           this.port = port;
+           this.username = username;
+           this.passwd = passwd;
+           this.sshKeyPath = sshKeyPath;
+           this.passphrase = passphrase;
+           this.gpuReservation = new Map<number, string>();
+       }
+   }
+
+The metadata includes the host address, the username or other configuration related to the platform. Users need to define their own metadata format, and set the metadata instance in this function. This function is called before the experiment is started to set the configuration of remote machines.
+
+**getClusterMetadata(key: string)**
+
+This function will return the metadata value according to the values, it could be left empty if users don't need to use it.
+
+**submitTrialJob(form: JobApplicationForm)**
+
+SubmitTrialJob is a function to submit new trial jobs, users should generate a job instance in TrialJobDetail type. TrialJobDetail is defined as follow:
+
+.. code-block:: bash
+
+   interface TrialJobDetail {
+       readonly id: string;
+       readonly status: TrialJobStatus;
+       readonly submitTime: number;
+       readonly startTime?: number;
+       readonly endTime?: number;
+       readonly tags?: string[];
+       readonly url?: string;
+       readonly workingDirectory: string;
+       readonly form: JobApplicationForm;
+       readonly sequenceId: number;
+       isEarlyStopped?: boolean;
+   }
+
+According to different kinds of implementation, users could put the job detail into a job queue, and keep  fetching the job from the queue and start preparing and running them. Or they could finish preparing and running process in this function, and return job detail after the submit work.
+
+**cancelTrialJob(trialJobId: string, isEarlyStopped?: boolean)**
+
+If this function is called, the trial started by the platform should be canceled. Different kind of platform has diffenent methods to calcel a running job, this function should be implemented according to specific platform.
+
+**updateTrialJob(trialJobId: string, form: JobApplicationForm)**
+
+This function is called to update the trial job's status, trial job's status should be detected according to different platform, and be updated to ``RUNNING``\ , ``SUCCEED``\ , ``FAILED`` etc.
+
+**getTrialJob(trialJobId: string)**
+
+This function returns a trialJob detail instance according to trialJobId.
+
+**listTrialJobs()**
+
+Users should put all of trial job detail information into a list, and return the list.
+
+**addTrialJobMetricListener(listener: (metric: TrialJobMetric) => void)**
+
+NNI will hold an EventEmitter to get job metrics, if there is new job metrics detected, the EventEmitter will be triggered. Users should start the EventEmitter in this function.
+
+**removeTrialJobMetricListener(listener: (metric: TrialJobMetric) => void)**
+
+Close the EventEmitter.
+
+**run()**
+
+The run() function is a main loop function in TrainingService, users could set a while loop to execute their logic code, and finish executing them when the experiment is stopped.
+
+**cleanUp()**
+
+This function is called to clean up the environment when a experiment is stopped. Users should do the platform-related cleaning operation in this function.
+
+TrialKeeper tool
+----------------
+
+NNI offers a TrialKeeper tool to help maintaining trial jobs. Users can find the source code in ``nni/tools/nni_trial_tool``. If users want to run trial jobs in cloud platform, this tool will be a fine choice to help keeping trial running in the platform.
+
+The running architecture of TrialKeeper is show as follow:
+
+
+.. image:: ../../img/trialkeeper.jpg
+   :target: ../../img/trialkeeper.jpg
+   :alt: 
+
+
+When users submit a trial job to cloud platform, they should wrap their trial command into TrialKeeper, and start a TrialKeeper process in cloud platform. Notice that TrialKeeper use restful server to communicate with TrainingService, users should start a restful server in local machine to receive metrics sent from TrialKeeper. The source code about restful server could be found in ``nni/src/nni_manager/training_service/common/clusterJobRestServer.ts``.
+
+Reference
+---------
+
+For more information about how to debug, please `refer <../Tutorial/HowToDebug.rst>`__.
+
+The guideline of how to contribute, please `refer <../Tutorial/Contributing.rst>`__.
diff --git a/docs/en_US/TrainingService/KubeflowMode.rst b/docs/en_US/TrainingService/KubeflowMode.rst
new file mode 100644
index 0000000000..e0dc06f29f
--- /dev/null
+++ b/docs/en_US/TrainingService/KubeflowMode.rst
@@ -0,0 +1,255 @@
+Run an Experiment on Kubeflow
+=============================
+
+===
+
+Now NNI supports running experiment on `Kubeflow <https://github.com/kubeflow/kubeflow>`__\ , called kubeflow mode. Before starting to use NNI kubeflow mode, you should have a Kubernetes cluster, either on-premises or `Azure Kubernetes Service(AKS) <https://azure.microsoft.com/en-us/services/kubernetes-service/>`__\ , a Ubuntu machine on which `kubeconfig <https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/>`__ is setup to connect to your Kubernetes cluster. If you are not familiar with Kubernetes, `here <https://kubernetes.io/docs/tutorials/kubernetes-basics/>`__ is a good start. In kubeflow mode, your trial program will run as Kubeflow job in Kubernetes cluster.
+
+Prerequisite for on-premises Kubernetes Service
+-----------------------------------------------
+
+
+#. A **Kubernetes** cluster using Kubernetes 1.8 or later. Follow this `guideline <https://kubernetes.io/docs/setup/>`__ to set up Kubernetes
+#. Download, set up, and deploy **Kubeflow** to your Kubernetes cluster. Follow this `guideline <https://www.kubeflow.org/docs/started/getting-started/>`__ to setup Kubeflow.
+#. Prepare a **kubeconfig** file, which will be used by NNI to interact with your Kubernetes API server. By default, NNI manager will use $(HOME)/.kube/config as kubeconfig file's path. You can also specify other kubeconfig files by setting the**KUBECONFIG** environment variable. Refer this `guideline <https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig>`__ to learn more about kubeconfig.
+#. If your NNI trial job needs GPU resource, you should follow this `guideline <https://github.com/NVIDIA/k8s-device-plugin>`__ to configure **Nvidia device plugin for Kubernetes**.
+#. Prepare a **NFS server** and export a general purpose mount (we recommend to map your NFS server path in ``root_squash option``\ , otherwise permission issue may raise when NNI copy files to NFS. Refer this `page <https://linux.die.net/man/5/exports>`__ to learn what root_squash option is), or**Azure File Storage**.
+#. Install **NFS client** on the machine where you install NNI and run nnictl to create experiment. Run this command to install NFSv4 client:
+
+.. code-block:: bash
+
+    apt-get install nfs-common
+
+#. Install **NNI**\ , follow the install guide `here <../Tutorial/QuickStart>`__.
+
+Prerequisite for Azure Kubernetes Service
+-----------------------------------------
+
+
+#. NNI support Kubeflow based on Azure Kubernetes Service, follow the `guideline <https://azure.microsoft.com/en-us/services/kubernetes-service/>`__ to set up Azure Kubernetes Service.
+#. Install `Azure CLI <https://docs.microsoft.com/en-us/cli/azure/install-azure-cli?view=azure-cli-latest>`__ and **kubectl**.  Use ``az login`` to set azure account, and connect kubectl client to AKS, refer this `guideline <https://docs.microsoft.com/en-us/azure/aks/kubernetes-walkthrough#connect-to-the-cluster>`__.
+#. Deploy Kubeflow on Azure Kubernetes Service, follow the `guideline <https://www.kubeflow.org/docs/started/getting-started/>`__.
+#. Follow the `guideline <https://docs.microsoft.com/en-us/azure/storage/common/storage-quickstart-create-account?tabs=portal>`__ to create azure file storage account. If you use Azure Kubernetes Service, NNI need Azure Storage Service to store code files and the output files.
+#. To access Azure storage service, NNI need the access key of the storage account, and NNI use `Azure Key Vault <https://azure.microsoft.com/en-us/services/key-vault/>`__ Service to protect your private key. Set up Azure Key Vault Service, add a secret to Key Vault to store the access key of Azure storage account. Follow this `guideline <https://docs.microsoft.com/en-us/azure/key-vault/quick-create-cli>`__ to store the access key.
+
+Design
+------
+
+
+.. image:: ../../img/kubeflow_training_design.png
+   :target: ../../img/kubeflow_training_design.png
+   :alt: 
+
+Kubeflow training service instantiates a Kubernetes rest client to interact with your K8s cluster's API server.
+
+For each trial, we will upload all the files in your local codeDir path (configured in nni_config.yml) together with NNI generated files like parameter.cfg into a storage volumn. Right now we support two kinds of storage volumes: `nfs <https://en.wikipedia.org/wiki/Network_File_System>`__ and `azure file storage <https://azure.microsoft.com/en-us/services/storage/files/>`__\ , you should configure the storage volumn in NNI config YAML file. After files are prepared, Kubeflow training service will call K8S rest API to create Kubeflow jobs (\ `tf-operator <https://github.com/kubeflow/tf-operator>`__ job or `pytorch-operator <https://github.com/kubeflow/pytorch-operator>`__ job) in K8S, and mount your storage volume into the job's pod. Output files of Kubeflow job, like stdout, stderr, trial.log or model files, will also be copied back to the storage volumn. NNI will show the storage volumn's URL for each trial in WebUI, to allow user browse the log files and job's output files.
+
+Supported operator
+------------------
+
+NNI only support tf-operator and pytorch-operator of Kubeflow, other operators is not tested.
+Users could set operator type in config file.
+The setting of tf-operator:
+
+.. code-block:: yaml
+
+   kubeflowConfig:
+     operator: tf-operator
+
+The setting of pytorch-operator:
+
+.. code-block:: yaml
+
+   kubeflowConfig:
+     operator: pytorch-operator
+
+If users want to use tf-operator, he could set ``ps`` and ``worker`` in trial config. If users want to use pytorch-operator, he could set ``master`` and ``worker`` in trial config.
+
+Supported storage type
+----------------------
+
+NNI support NFS and Azure Storage to store the code and output files, users could set storage type in config file and set the corresponding config.
+
+The setting for NFS storage are as follows:
+
+.. code-block:: yaml
+
+   kubeflowConfig:
+     storage: nfs
+     nfs:
+       # Your NFS server IP, like 10.10.10.10
+       server: {your_nfs_server_ip}
+       # Your NFS server export path, like /var/nfs/nni
+       path: {your_nfs_server_export_path}
+
+If you use Azure storage, you should  set ``kubeflowConfig`` in your config YAML file as follows:
+
+.. code-block:: yaml
+
+   kubeflowConfig:
+     storage: azureStorage
+     keyVault:
+       vaultName: {your_vault_name}
+       name: {your_secert_name}
+     azureStorage:
+       accountName: {your_storage_account_name}
+       azureShare: {your_azure_share_name}
+
+Run an experiment
+-----------------
+
+Use ``examples/trials/mnist-tfv1`` as an example. This is a tensorflow job, and use tf-operator of Kubeflow. The NNI config YAML file's content is like:
+
+.. code-block:: yaml
+
+   authorName: default
+   experimentName: example_mnist
+   trialConcurrency: 2
+   maxExecDuration: 1h
+   maxTrialNum: 20
+   #choice: local, remote, pai, kubeflow
+   trainingServicePlatform: kubeflow
+   searchSpacePath: search_space.json
+   #choice: true, false
+   useAnnotation: false
+   tuner:
+     #choice: TPE, Random, Anneal, Evolution
+     builtinTunerName: TPE
+     classArgs:
+       #choice: maximize, minimize
+       optimize_mode: maximize
+   assessor:
+     builtinAssessorName: Medianstop
+     classArgs:
+       optimize_mode: maximize
+   trial:
+     codeDir: .
+     worker:
+       replicas: 2
+       command: python3 dist_mnist.py
+       gpuNum: 1
+       cpuNum: 1
+       memoryMB: 8196
+       image: msranni/nni:latest
+     ps:
+       replicas: 1
+       command: python3 dist_mnist.py
+       gpuNum: 0
+       cpuNum: 1
+       memoryMB: 8196
+       image: msranni/nni:latest
+   kubeflowConfig:
+     operator: tf-operator
+     apiVersion: v1alpha2
+     storage: nfs
+     nfs:
+       # Your NFS server IP, like 10.10.10.10
+       server: {your_nfs_server_ip}
+       # Your NFS server export path, like /var/nfs/nni
+       path: {your_nfs_server_export_path}
+
+Note: You should explicitly set ``trainingServicePlatform: kubeflow`` in NNI config YAML file if you want to start experiment in kubeflow mode.
+
+If you want to run PyTorch jobs, you could set your config files as follow:
+
+.. code-block:: yaml
+
+   authorName: default
+   experimentName: example_mnist_distributed_pytorch
+   trialConcurrency: 1
+   maxExecDuration: 1h
+   maxTrialNum: 10
+   #choice: local, remote, pai, kubeflow
+   trainingServicePlatform: kubeflow
+   searchSpacePath: search_space.json
+   #choice: true, false
+   useAnnotation: false
+   tuner:
+     #choice: TPE, Random, Anneal, Evolution
+     builtinTunerName: TPE
+     classArgs:
+       #choice: maximize, minimize
+       optimize_mode: minimize
+   trial:
+     codeDir: .
+     master:
+       replicas: 1
+       command: python3 dist_mnist.py
+       gpuNum: 1
+       cpuNum: 1
+       memoryMB: 2048
+       image: msranni/nni:latest
+     worker:
+       replicas: 1
+       command: python3 dist_mnist.py
+       gpuNum: 0
+       cpuNum: 1
+       memoryMB: 2048
+       image: msranni/nni:latest
+   kubeflowConfig:
+     operator: pytorch-operator
+     apiVersion: v1alpha2
+     nfs:
+       # Your NFS server IP, like 10.10.10.10
+       server: {your_nfs_server_ip}
+       # Your NFS server export path, like /var/nfs/nni
+       path: {your_nfs_server_export_path}
+
+Trial configuration in kubeflow mode have the following configuration keys:
+
+
+* codeDir
+
+  * code directory, where you put training code and config files
+
+* worker (required). This config section is used to configure tensorflow worker role
+
+  * replicas
+
+    * Required key. Should be positive number depends on how many replication your want to run for tensorflow worker role.
+
+  * command
+
+    * Required key. Command to launch your trial job, like ``python mnist.py``
+
+  * memoryMB
+
+    * Required key. Should be positive number based on your trial program's memory requirement
+
+  * cpuNum
+  * gpuNum
+  * image
+
+    * Required key. In kubeflow mode, your trial program will be scheduled by Kubernetes to run in `Pod <https://kubernetes.io/docs/concepts/workloads/pods/pod/>`__. This key is used to specify the Docker image used to create the pod where your trail program will run.
+    * We already build a docker image :githublink:`msranni/nni <deployment/docker/Dockerfile>`. You can either use this image directly in your config file, or build your own image based on it.
+
+  * privateRegistryAuthPath
+
+    * Optional field, specify ``config.json`` file path that holds an authorization token of docker registry, used to pull image from private registry. `Refer <https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/>`__.
+
+  * apiVersion
+
+    * Required key. The API version of your Kubeflow.
+
+* ps (optional). This config section is used to configure Tensorflow parameter server role.
+* master(optional). This config section is used to configure PyTorch parameter server role.
+
+Once complete to fill NNI experiment config file and save (for example, save as exp_kubeflow.yml), then run the following command
+
+.. code-block:: bash
+
+   nnictl create --config exp_kubeflow.yml
+
+to start the experiment in kubeflow mode. NNI will create Kubeflow tfjob or pytorchjob for each trial, and the job name format is something like ``nni_exp_{experiment_id}_trial_{trial_id}``.
+You can see the Kubeflow tfjob created by NNI in your Kubernetes dashboard.
+
+Notice: In kubeflow mode, NNIManager will start a rest server and listen on a port which is your NNI WebUI's port plus 1. For example, if your WebUI port is ``8080``\ , the rest server will listen on ``8081``\ , to receive metrics from trial job running in Kubernetes. So you should ``enable 8081`` TCP port in your firewall rule to allow incoming traffic.
+
+Once a trial job is completed, you can go to NNI WebUI's overview page (like http://localhost:8080/oview) to check trial's information.
+
+version check
+-------------
+
+NNI support version check feature in since version 0.6, `refer <PaiMode.rst>`__
+
+Any problems when using NNI in Kubeflow mode, please create issues on `NNI Github repo <https://github.com/Microsoft/nni>`__.
diff --git a/docs/en_US/TrainingService/LocalMode.rst b/docs/en_US/TrainingService/LocalMode.rst
new file mode 100644
index 0000000000..a53c092be6
--- /dev/null
+++ b/docs/en_US/TrainingService/LocalMode.rst
@@ -0,0 +1,195 @@
+**Tutorial: Create and Run an Experiment on local with NNI API**
+====================================================================
+
+In this tutorial, we will use the example in [~/examples/trials/mnist-tfv1] to explain how to create and run an experiment on local with NNI API.
+
+..
+
+   Before starts
+
+
+You have an implementation for MNIST classifer using convolutional layers, the Python code is in ``mnist_before.py``.
+
+..
+
+   Step 1 - Update model codes
+
+
+To enable NNI API, make the following changes:
+
+* Declare NNI API: include ``import nni`` in your trial code to use NNI APIs.
+* Get predefined parameters
+
+Use the following code snippet:
+
+.. code-block:: python
+
+   RECEIVED_PARAMS = nni.get_next_parameter()
+
+to get hyper-parameters' values assigned by tuner. ``RECEIVED_PARAMS`` is an object, for example:
+
+.. code-block:: json
+
+   {"conv_size": 2, "hidden_size": 124, "learning_rate": 0.0307, "dropout_rate": 0.2029}
+
+* Report NNI results: Use the API: ``nni.report_intermediate_result(accuracy)`` to send ``accuracy`` to assessor.
+  Use the API: ``nni.report_final_result(accuracy)`` to send `accuracy` to tuner.
+
+We had made the changes and saved it to ``mnist.py``.
+
+**NOTE**\ :
+
+.. code-block:: bash
+
+   accuracy - The `accuracy` could be any python object, but  if you use NNI built-in tuner/assessor, `accuracy` should be a numerical variable (e.g. float, int).
+   assessor - The assessor will decide which trial should early stop based on the history performance of trial (intermediate result of one trial).
+   tuner    - The tuner will generate next parameters/architecture based on the explore history (final result of all trials).
+
+..
+
+   Step 2 - Define SearchSpace
+
+
+The hyper-parameters used in ``Step 1.2 - Get predefined parameters`` is defined in a ``search_space.json`` file like below:
+
+.. code-block:: bash
+
+   {
+       "dropout_rate":{"_type":"uniform","_value":[0.1,0.5]},
+       "conv_size":{"_type":"choice","_value":[2,3,5,7]},
+       "hidden_size":{"_type":"choice","_value":[124, 512, 1024]},
+       "learning_rate":{"_type":"uniform","_value":[0.0001, 0.1]}
+   }
+
+Refer to `define search space <../Tutorial/SearchSpaceSpec.rst>`__ to learn more about search space.
+
+..
+
+   Step 3 - Define Experiment
+
+   ..
+
+      3.1 enable NNI API mode
+
+
+To enable NNI API mode, you need to set useAnnotation to *false* and provide the path of SearchSpace file (you just defined in step 1):
+
+.. code-block:: bash
+
+   useAnnotation: false
+   searchSpacePath: /path/to/your/search_space.json
+
+To run an experiment in NNI, you only needed:
+
+
+* Provide a runnable trial
+* Provide or choose a tuner
+* Provide a YAML experiment configure file
+* (optional) Provide or choose an assessor
+
+**Prepare trial**\ :
+
+..
+
+   A set of examples can be found in ~/nni/examples after your installation, run ``ls ~/nni/examples/trials`` to see all the trial examples.
+
+
+Let's use a simple trial example, e.g. mnist, provided by NNI. After you installed NNI, NNI examples have been put in ~/nni/examples, run ``ls ~/nni/examples/trials`` to see all the trial examples. You can simply execute the following command to run the NNI mnist example:
+
+.. code-block:: bash
+
+     python ~/nni/examples/trials/mnist-annotation/mnist.py
+
+
+This command will be filled in the YAML configure file below. Please refer to `here <../TrialExample/Trials.rst>`__ for how to write your own trial.
+
+**Prepare tuner**\ : NNI supports several popular automl algorithms, including Random Search, Tree of Parzen Estimators (TPE), Evolution algorithm etc. Users can write their own tuner (refer to `here <../Tuner/CustomizeTuner.rst>`__\ ), but for simplicity, here we choose a tuner provided by NNI as below:
+
+.. code-block:: bash
+
+     tuner:
+       builtinTunerName: TPE
+       classArgs:
+         optimize_mode: maximize
+
+
+*builtinTunerName* is used to specify a tuner in NNI, *classArgs* are the arguments pass to the tuner (the spec of builtin tuners can be found `here <../Tuner/BuiltinTuner.rst>`__\ ), *optimization_mode* is to indicate whether you want to maximize or minimize your trial's result.
+
+**Prepare configure file**\ : Since you have already known which trial code you are going to run and which tuner you are going to use, it is time to prepare the YAML configure file. NNI provides a demo configure file for each trial example, ``cat ~/nni/examples/trials/mnist-annotation/config.yml`` to see it. Its content is basically shown below:
+
+.. code-block:: yaml
+
+   authorName: your_name
+   experimentName: auto_mnist
+
+   # how many trials could be concurrently running
+   trialConcurrency: 1
+
+   # maximum experiment running duration
+   maxExecDuration: 3h
+
+   # empty means never stop
+   maxTrialNum: 100
+
+   # choice: local, remote
+   trainingServicePlatform: local
+
+   # search space file
+   searchSpacePath: search_space.json
+
+   # choice: true, false
+   useAnnotation: true
+   tuner:
+     builtinTunerName: TPE
+     classArgs:
+       optimize_mode: maximize
+   trial:
+     command: python mnist.py
+     codeDir: ~/nni/examples/trials/mnist-annotation
+     gpuNum: 0
+
+Here *useAnnotation* is true because this trial example uses our python annotation (refer to `here <../Tutorial/AnnotationSpec.rst>`__ for details). For trial, we should provide *trialCommand* which is the command to run the trial, provide *trialCodeDir* where the trial code is. The command will be executed in this directory. We should also provide how many GPUs a trial requires.
+
+With all these steps done, we can run the experiment with the following command:
+
+.. code-block:: bash
+
+     nnictl create --config ~/nni/examples/trials/mnist-annotation/config.yml
+
+
+You can refer to `here <../Tutorial/Nnictl.rst>`__ for more usage guide of *nnictl* command line tool.
+
+View experiment results
+-----------------------
+
+The experiment has been running now. Other than *nnictl*\ , NNI also provides WebUI for you to view experiment progress, to control your experiment, and some other appealing features.
+
+Using multiple local GPUs to speed up search
+--------------------------------------------
+
+The following steps assume that you have 4 NVIDIA GPUs installed at local and `tensorflow with GPU support <https://www.tensorflow.org/install/gpu>`__. The demo enables 4 concurrent trail jobs and each trail job uses 1 GPU.
+
+**Prepare configure file**\ : NNI provides a demo configuration file for the setting above, ``cat ~/nni/examples/trials/mnist-annotation/config_gpu.yml`` to see it. The trailConcurrency and gpuNum are different from the basic configure file:
+
+.. code-block:: bash
+
+   ...
+
+   # how many trials could be concurrently running
+   trialConcurrency: 4
+
+   ...
+
+   trial:
+     command: python mnist.py
+     codeDir: ~/nni/examples/trials/mnist-annotation
+     gpuNum: 1
+
+We can run the experiment with the following command:
+
+.. code-block:: bash
+
+     nnictl create --config ~/nni/examples/trials/mnist-annotation/config_gpu.yml
+
+
+You can use *nnictl* command line tool or WebUI to trace the training progress. *nvidia_smi* command line tool can also help you to monitor the GPU usage during training.
diff --git a/docs/en_US/TrainingService/Overview.rst b/docs/en_US/TrainingService/Overview.rst
new file mode 100644
index 0000000000..e0c808fc70
--- /dev/null
+++ b/docs/en_US/TrainingService/Overview.rst
@@ -0,0 +1,70 @@
+Training Service
+================
+
+What is Training Service?
+-------------------------
+
+NNI training service is designed to allow users to focus on AutoML itself, agnostic to the underlying computing infrastructure where the trials are actually run. When migrating from one cluster to another (e.g., local machine to Kubeflow), users only need to tweak several configurations, and the experiment can be easily scaled.
+
+Users can use training service provided by NNI, to run trial jobs on `local machine <./LocalMode.md>`__\ , `remote machines <./RemoteMachineMode.md>`__\ , and on clusters like `PAI <./PaiMode.md>`__\ , `Kubeflow <./KubeflowMode.md>`__\ , `AdaptDL <./AdaptDLMode.md>`__\ , `FrameworkController <./FrameworkControllerMode.md>`__\ , `DLTS <./DLTSMode.md>`__ and `AML <./AMLMode.rst>`__. These are called *built-in training services*.
+
+If the computing resource customers try to use is not listed above, NNI provides interface that allows users to build their own training service easily. Please refer to "\ `how to implement training service <./HowToImplementTrainingService>`__\ " for details.
+
+How to use Training Service?
+----------------------------
+
+Training service needs to be chosen and configured properly in experiment configuration YAML file. Users could refer to the document of each training service for how to write the configuration. Also, `reference <../Tutorial/ExperimentConfig>`__ provides more details on the specification of the experiment configuration file.
+
+Next, users should prepare code directory, which is specified as ``codeDir`` in config file. Please note that in non-local mode, the code directory will be uploaded to remote or cluster before the experiment. Therefore, we limit the number of files to 2000 and total size to 300MB. If the code directory contains too many files, users can choose which files and subfolders should be excluded by adding a ``.nniignore`` file that works like a ``.gitignore`` file. For more details on how to write this file, see :githublink:`this example <examples/trials/mnist-tfv1/.nniignore>` and the `git documentation <https://git-scm.com/docs/gitignore#_pattern_format>`__.
+
+In case users intend to use large files in their experiment (like large-scaled datasets) and they are not using local mode, they can either: 1) download the data before each trial launches by putting it into trial command; or 2) use a shared storage that is accessible to worker nodes. Usually, training platforms are equipped with shared storage, and NNI allows users to easily use them. Refer to docs of each built-in training service for details.
+
+Built-in Training Services
+--------------------------
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - TrainingService
+     - Brief Introduction
+   * - `**Local** <./LocalMode.rst>`__
+     - NNI supports running an experiment on local machine, called local mode. Local mode means that NNI will run the trial jobs and nniManager process in same machine, and support gpu schedule function for trial jobs.
+   * - `**Remote** <./RemoteMachineMode.rst>`__
+     - NNI supports running an experiment on multiple machines through SSH channel, called remote mode. NNI assumes that you have access to those machines, and already setup the environment for running deep learning training code. NNI will submit the trial jobs in remote machine, and schedule suitable machine with enough gpu resource if specified.
+   * - `**PAI** <./PaiMode.rst>`__
+     - NNI supports running an experiment on `OpenPAI <https://github.com/Microsoft/pai>`__ (aka PAI), called PAI mode. Before starting to use NNI PAI mode, you should have an account to access an `OpenPAI <https://github.com/Microsoft/pai>`__ cluster. See `here <https://github.com/Microsoft/pai#how-to-deploy>`__ if you don't have any OpenPAI account and want to deploy an OpenPAI cluster. In PAI mode, your trial program will run in PAI's container created by Docker.
+   * - `**Kubeflow** <./KubeflowMode.rst>`__
+     - NNI supports running experiment on `Kubeflow <https://github.com/kubeflow/kubeflow>`__\ , called kubeflow mode. Before starting to use NNI kubeflow mode, you should have a Kubernetes cluster, either on-premises or `Azure Kubernetes Service(AKS) <https://azure.microsoft.com/en-us/services/kubernetes-service/>`__\ , a Ubuntu machine on which `kubeconfig <https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/>`__ is setup to connect to your Kubernetes cluster. If you are not familiar with Kubernetes, `here <https://kubernetes.io/docs/tutorials/kubernetes-basics/>`__ is a good start. In kubeflow mode, your trial program will run as Kubeflow job in Kubernetes cluster.
+   * - `**AdaptDL** <./AdaptDLMode.rst>`__
+     - NNI supports running experiment on `AdaptDL <https://github.com/petuum/adaptdl>`__\ , called AdaptDL mode. Before starting to use NNI kubeflow mode, you should have a Kubernetes cluster.
+   * - `**FrameworkController** <./FrameworkControllerMode.rst>`__
+     - NNI supports running experiment using `FrameworkController <https://github.com/Microsoft/frameworkcontroller>`__\ , called frameworkcontroller mode. FrameworkController is built to orchestrate all kinds of applications on Kubernetes, you don't need to install Kubeflow for specific deep learning framework like tf-operator or pytorch-operator. Now you can use FrameworkController as the training service to run NNI experiment.
+   * - `**DLTS** <./DLTSMode.rst>`__
+     - NNI supports running experiment using `DLTS <https://github.com/microsoft/DLWorkspace.git>`__\ , which is an open source toolkit, developed by Microsoft, that allows AI scientists to spin up an AI cluster in turn-key fashion.
+   * - `**AML** <./AMLMode.rst>`__
+     - NNI supports running an experiment on `AML <https://azure.microsoft.com/en-us/services/machine-learning/>`__ , called aml mode.
+
+
+What does Training Service do?
+------------------------------
+
+
+.. raw:: html
+
+   <p align="center">
+   <img src="https://user-images.githubusercontent.com/23273522/51816536-ed055580-2301-11e9-8ad8-605a79ee1b9a.png" alt="drawing" width="700"/>
+   </p>
+
+
+According to the architecture shown in `Overview <../Overview>`__\ , training service (platform) is actually responsible for two events: 1) initiating a new trial; 2) collecting metrics and communicating with NNI core (NNI manager); 3) monitoring trial job status. To demonstrated in detail how training service works, we show the workflow of training service from the very beginning to the moment when first trial succeeds.
+
+Step 1. **Validate config and prepare the training platform.** Training service will first check whether the training platform user specifies is valid (e.g., is there anything wrong with authentication). After that, training service will start to prepare for the experiment by making the code directory (\ ``codeDir``\ ) accessible to training platform.
+
+.. Note:: Different training services have different ways to handle ``codeDir``. For example, local training service directly runs trials in ``codeDir``. Remote training service packs ``codeDir`` into a zip and uploads it to each machine. K8S-based training services copy ``codeDir`` onto a shared storage, which is either provided by training platform itself, or configured by users in config file.
+
+Step 2. **Submit the first trial.** To initiate a trial, usually (in non-reuse mode), NNI copies another few files (including parameters, launch script and etc.) onto training platform. After that, NNI launches the trial through subprocess, SSH, RESTful API, and etc.
+
+.. Warning:: The working directory of trial command has exactly the same content as ``codeDir``, but can have a differen path (even on differen machines) Local mode is the only training service that shares one ``codeDir`` across all trials. Other training services copies a ``codeDir`` from the shared copy prepared in step 1 and each trial has an independent working directory. We strongly advise users not to rely on the shared behavior in local mode, as it will make your experiments difficult to scale to other training services.
+
+Step 3. **Collect metrics.**  NNI then monitors the status of trial, updates the status (e.g., from ``WAITING`` to ``RUNNING``\ , ``RUNNING`` to ``SUCCEEDED``\ ) recorded, and also collects the metrics. Currently, most training services are implemented in an "active" way, i.e., training service will call the RESTful API on NNI manager to update the metrics. Note that this usually requires the machine that runs NNI manager to be at least accessible to the worker node.
diff --git a/docs/en_US/TrainingService/PaiMode.rst b/docs/en_US/TrainingService/PaiMode.rst
new file mode 100644
index 0000000000..d3a2221002
--- /dev/null
+++ b/docs/en_US/TrainingService/PaiMode.rst
@@ -0,0 +1,249 @@
+.. role:: raw-html(raw)
+   :format: html
+
+
+**Run an Experiment on OpenPAI**
+====================================
+
+NNI supports running an experiment on `OpenPAI <https://github.com/Microsoft/pai>`__\ , called pai mode. Before starting to use NNI pai mode, you should have an account to access an `OpenPAI <https://github.com/Microsoft/pai>`__ cluster. See `here <https://github.com/Microsoft/pai#how-to-deploy>`__ if you don't have any OpenPAI account and want to deploy an OpenPAI cluster. In pai mode, your trial program will run in pai's container created by Docker.
+
+.. toctree::
+
+Setup environment
+-----------------
+
+**Step 1. Install NNI, follow the install guide** `here <../Tutorial/QuickStart.rst>`__.   
+
+**Step 2. Get token.**
+
+Open web portal of OpenPAI, and click ``My profile`` button in the top-right side.
+
+.. image:: ../../img/pai_profile.jpg
+   :scale: 80%
+
+Click ``copy`` button in the page to copy a jwt token.
+
+.. image:: ../../img/pai_token.jpg
+   :scale: 67%
+
+**Step 3. Mount NFS storage to local machine.**  
+
+Click ``Submit job`` button in web portal.
+
+.. image:: ../../img/pai_job_submission_page.jpg
+   :scale: 50%
+
+Find the data management region in job submission page.
+
+.. image:: ../../img/pai_data_management_page.jpg
+   :scale: 33%  
+
+The ``Preview container paths`` is the NFS host and path that OpenPAI provided, you need to mount the corresponding host and path to your local machine first, then NNI could use the OpenPAI's NFS storage.\ :raw-html:`<br>`
+For example, use the following command:
+
+.. code-block:: bash
+
+   sudo mount -t nfs4 gcr-openpai-infra02:/pai/data /local/mnt
+
+Then the ``/data`` folder in container will be mounted to ``/local/mnt`` folder in your local machine.\ :raw-html:`<br>`
+You could use the following configuration in your NNI's config file:
+
+.. code-block:: yaml
+
+   nniManagerNFSMountPath: /local/mnt
+
+**Step 4. Get OpenPAI's storage config name and nniManagerMountPath**
+
+The ``Team share storage`` field is storage configuration used to specify storage value in OpenPAI. You can get ``paiStorageConfigName`` and ``containerNFSMountPath`` field in ``Team share storage``\ , for example:
+
+.. code-block:: yaml
+
+   paiStorageConfigName: confignfs-data
+   containerNFSMountPath: /mnt/confignfs-data
+
+Run an experiment
+-----------------
+
+Use ``examples/trials/mnist-annotation`` as an example. The NNI config YAML file's content is like:
+
+.. code-block:: yaml
+
+   authorName: your_name
+   experimentName: auto_mnist
+   # how many trials could be concurrently running
+   trialConcurrency: 2
+   # maximum experiment running duration
+   maxExecDuration: 3h
+   # empty means never stop
+   maxTrialNum: 100
+   # choice: local, remote, pai
+   trainingServicePlatform: pai
+   # search space file
+   searchSpacePath: search_space.json
+   # choice: true, false
+   useAnnotation: true
+   tuner:
+     builtinTunerName: TPE
+     classArgs:
+       optimize_mode: maximize
+   trial:
+     command: python3 mnist.py
+     codeDir: ~/nni/examples/trials/mnist-annotation
+     gpuNum: 0
+     cpuNum: 1
+     memoryMB: 8196
+     image: msranni/nni:latest
+     virtualCluster: default
+     nniManagerNFSMountPath: /local/mnt
+     containerNFSMountPath: /mnt/confignfs-data
+     paiStorageConfigName: confignfs-data
+   # Configuration to access OpenPAI Cluster
+   paiConfig:
+     userName: your_pai_nni_user
+     token: your_pai_token
+     host: 10.1.1.1
+     # optional, experimental feature.
+     reuse: true
+
+Note: You should set ``trainingServicePlatform: pai`` in NNI config YAML file if you want to start experiment in pai mode. The host field in configuration file is PAI's job submission page uri, like ``10.10.5.1``\ , the default http protocol in NNI is ``http``\ , if your PAI's cluster enabled https, please use the uri in ``https://10.10.5.1`` format.
+
+Trial configurations
+^^^^^^^^^^^^^^^^^^^^
+
+Compared with `LocalMode <LocalMode.md>`__ and `RemoteMachineMode <RemoteMachineMode.rst>`__\ , ``trial`` configuration in pai mode has the following additional keys:
+
+
+* 
+  cpuNum
+
+  Optional key. Should be positive number based on your trial program's CPU  requirement. If it is not set in trial configuration, it should be set in the config file specified in ``paiConfigPath`` field.
+
+* 
+  memoryMB
+
+  Optional key. Should be positive number based on your trial program's memory requirement. If it is not set in trial configuration, it should be set in the config file specified in ``paiConfigPath`` field.
+
+* 
+  image
+
+  Optional key. In pai mode, your trial program will be scheduled by OpenPAI to run in `Docker container <https://www.docker.com/>`__. This key is used to specify the Docker image used to create the container in which your trial will run.
+
+  We already build a docker image :githublink:`nnimsra/nni <deployment/docker/Dockerfile>`. You can either use this image directly in your config file, or build your own image based on it. If it is not set in trial configuration, it should be set in the config file specified in ``paiConfigPath`` field.
+
+* 
+  virtualCluster
+
+  Optional key. Set the virtualCluster of OpenPAI. If omitted, the job will run on default virtual cluster.
+
+* 
+  nniManagerNFSMountPath
+
+  Required key. Set the mount path in your nniManager machine.
+
+* 
+  containerNFSMountPath
+
+  Required key. Set the mount path in your container used in OpenPAI.
+
+* 
+  paiStorageConfigName:
+
+  Optional key. Set the storage name used in OpenPAI. If it is not set in trial configuration, it should be set in the config file specified in ``paiConfigPath`` field.
+
+* 
+  command
+
+  Optional key. Set the commands used in OpenPAI container.
+
+* 
+  paiConfigPath
+  Optional key. Set the file path of OpenPAI job configuration, the file is in yaml format.
+
+  If users set ``paiConfigPath`` in NNI's configuration file, no need to specify the fields ``command``\ , ``paiStorageConfigName``\ , ``virtualCluster``\ , ``image``\ , ``memoryMB``\ , ``cpuNum``\ , ``gpuNum`` in ``trial`` configuration. These fields will use the values from the config file specified by  ``paiConfigPath``.
+
+  Note:
+
+
+  #. 
+     The job name in OpenPAI's configuration file will be replaced by a new job name, the new job name is created by NNI, the name format is nni\ *exp*\ ${this.experimentId}*trial*\ ${trialJobId}.
+
+  #. 
+     If users set multiple taskRoles in OpenPAI's configuration file, NNI will wrap all of these taksRoles and start multiple tasks in one trial job, users should ensure that only one taskRole report metric to NNI, otherwise there might be some conflict error.
+
+OpenPAI configurations
+^^^^^^^^^^^^^^^^^^^^^^
+
+``paiConfig`` includes OpenPAI specific configurations,
+
+
+* 
+  userName
+
+  Required key. User name of OpenPAI platform.
+
+* 
+  token
+
+  Required key. Authentication key of OpenPAI platform.
+
+* 
+  host
+
+  Required key. The host of OpenPAI platform. It's OpenPAI's job submission page uri, like ``10.10.5.1``\ , the default http protocol in NNI is ``http``\ , if your OpenPAI cluster enabled https, please use the uri in ``https://10.10.5.1`` format.
+
+* 
+  reuse (experimental feature)
+
+  Optional key, default is false. If it's true, NNI will reuse OpenPAI jobs to run as many as possible trials. It can save time of creating new jobs. User needs to make sure each trial can run independent in same job, for example, avoid loading checkpoint from previous trials.
+
+Once complete to fill NNI experiment config file and save (for example, save as exp_pai.yml), then run the following command
+
+.. code-block:: bash
+
+   nnictl create --config exp_pai.yml
+
+to start the experiment in pai mode. NNI will create OpenPAI job for each trial, and the job name format is something like ``nni_exp_{experiment_id}_trial_{trial_id}``.
+You can see jobs created by NNI in the OpenPAI cluster's web portal, like:
+
+.. image:: ../../img/nni_pai_joblist.jpg
+   :target: ../../img/nni_pai_joblist.jpg
+   :alt: 
+
+
+Notice: In pai mode, NNIManager will start a rest server and listen on a port which is your NNI WebUI's port plus 1. For example, if your WebUI port is ``8080``\ , the rest server will listen on ``8081``\ , to receive metrics from trial job running in Kubernetes. So you should ``enable 8081`` TCP port in your firewall rule to allow incoming traffic.
+
+Once a trial job is completed, you can goto NNI WebUI's overview page (like http://localhost:8080/oview) to check trial's information.
+
+Expand a trial information in trial list view, click the logPath link like:
+
+.. image:: ../../img/nni_webui_joblist.jpg
+   :scale: 30%
+
+And you will be redirected to HDFS web portal to browse the output files of that trial in HDFS:
+
+.. image:: ../../img/nni_trial_hdfs_output.jpg
+   :scale: 80%
+
+You can see there're three fils in output folder: stderr, stdout, and trial.log
+
+data management
+---------------
+
+Before using NNI to start your experiment, users should set the corresponding mount data path in your nniManager machine. OpenPAI has their own storage(NFS, AzureBlob ...), and the storage will used in OpenPAI will be mounted to the container when it start a job. Users should set the OpenPAI storage type by ``paiStorageConfigName`` field to choose a storage in OpenPAI. Then users should mount the storage to their nniManager machine, and set the ``nniManagerNFSMountPath`` field in configuration file, NNI will generate bash files and copy data in ``codeDir`` to the ``nniManagerNFSMountPath`` folder, then NNI will start a trial job. The data in ``nniManagerNFSMountPath`` will be sync to OpenPAI storage, and will be mounted to OpenPAI's container. The data path in container is set in ``containerNFSMountPath``\ , NNI will enter this folder first, and then run scripts to start a trial job. 
+
+version check
+-------------
+
+NNI support version check feature in since version 0.6. It is a policy to insure the version of NNIManager is consistent with trialKeeper, and avoid errors caused by version incompatibility.
+Check policy:
+
+
+#. NNIManager before v0.6 could run any version of trialKeeper, trialKeeper support backward compatibility.
+#. Since version 0.6, NNIManager version should keep same with triakKeeper version. For example, if NNIManager version is 0.6, trialKeeper version should be 0.6 too.
+#. Note that the version check feature only check first two digits of version.For example, NNIManager v0.6.1 could use trialKeeper v0.6 or trialKeeper v0.6.2, but could not use trialKeeper v0.5.1 or trialKeeper v0.7.
+
+If you could not run your experiment and want to know if it is caused by version check, you could check your webUI, and there will be an error message about version check.
+
+
+.. image:: ../../img/version_check.png
+   :scale: 80%
diff --git a/docs/en_US/TrainingService/PaiYarnMode.rst b/docs/en_US/TrainingService/PaiYarnMode.rst
new file mode 100644
index 0000000000..34f3eba643
--- /dev/null
+++ b/docs/en_US/TrainingService/PaiYarnMode.rst
@@ -0,0 +1,193 @@
+.. role:: raw-html(raw)
+   :format: html
+
+
+**Run an Experiment on OpenpaiYarn**
+========================================
+
+The original ``pai`` mode is modificated to ``paiYarn`` mode, which is a distributed training platform based on Yarn.
+
+Setup environment
+-----------------
+
+Install NNI, follow the install guide `here <../Tutorial/QuickStart.rst>`__.
+
+Run an experiment
+-----------------
+
+Use ``examples/trials/mnist-tfv1`` as an example. The NNI config YAML file's content is like:
+
+.. code-block:: yaml
+
+   authorName: your_name
+   experimentName: auto_mnist
+   # how many trials could be concurrently running
+   trialConcurrency: 2
+   # maximum experiment running duration
+   maxExecDuration: 3h
+   # empty means never stop
+   maxTrialNum: 100
+   # choice: local, remote, pai, paiYarn
+   trainingServicePlatform: paiYarn
+   # search space file
+   searchSpacePath: search_space.json
+   # choice: true, false
+   useAnnotation: false
+   tuner:
+     builtinTunerName: TPE
+     classArgs:
+       optimize_mode: maximize
+   trial:
+     command: python3 mnist.py
+     codeDir: ~/nni/examples/trials/mnist-tfv1
+     gpuNum: 0
+     cpuNum: 1
+     memoryMB: 8196
+     image: msranni/nni:latest
+   # Configuration to access OpenpaiYarn Cluster
+   paiYarnConfig:
+     userName: your_paiYarn_nni_user
+     passWord: your_paiYarn_password
+     host: 10.1.1.1
+
+Note: You should set ``trainingServicePlatform: paiYarn`` in NNI config YAML file if you want to start experiment in paiYarn mode.
+
+Compared with `LocalMode <LocalMode.md>`__ and `RemoteMachineMode <RemoteMachineMode.rst>`__\ , trial configuration in paiYarn mode have these additional keys:
+
+
+* cpuNum
+
+  * Required key. Should be positive number based on your trial program's CPU  requirement
+
+* memoryMB
+
+  * Required key. Should be positive number based on your trial program's memory requirement
+
+* image
+
+  * Required key. In paiYarn mode, your trial program will be scheduled by OpenpaiYarn to run in `Docker container <https://www.docker.com/>`__. This key is used to specify the Docker image used to create the container in which your trial will run.
+  * We already build a docker image :githublink:`nnimsra/nni <deployment/docker/Dockerfile>`. You can either use this image directly in your config file, or build your own image based on it.
+
+* virtualCluster
+
+  * Optional key. Set the virtualCluster of OpenpaiYarn. If omitted, the job will run on default virtual cluster.
+
+* shmMB
+
+  * Optional key. Set the shmMB configuration of OpenpaiYarn, it set the shared memory for one task in the task role.
+
+* authFile
+
+  * Optional key, Set the auth file path for private registry while using paiYarn mode, `Refer <https://github.com/microsoft/paiYarn/blob/2ea69b45faa018662bc164ed7733f6fdbb4c42b3/docs/faq.rst#q-how-to-use-private-docker-registry-job-image-when-submitting-an-openpaiYarn-job>`__\ , you can prepare the authFile and simply provide the local path of this file, NNI will upload this file to HDFS for you.
+
+* 
+  portList  
+
+
+  * 
+    Optional key. Set the portList configuration of OpenpaiYarn, it specifies a list of port used in container, `Refer <https://github.com/microsoft/paiYarn/blob/b2324866d0280a2d22958717ea6025740f71b9f0/docs/job_tutorial.rst#specification>`__.\ :raw-html:`<br>`
+    The config schema in NNI is shown below:
+
+    .. code-block:: bash
+
+       portList:
+       - label: test
+         beginAt: 8080
+         portNumber: 2
+
+    Let's say you want to launch a tensorboard in the mnist example using the port. So the first step is to write a wrapper script ``launch_paiYarn.sh`` of ``mnist.py``.
+
+    .. code-block:: bash
+
+       export TENSORBOARD_PORT=paiYarn_PORT_LIST_${paiYarn_CURRENT_TASK_ROLE_NAME}_0_tensorboard
+       tensorboard --logdir . --port ${!TENSORBOARD_PORT} &
+       python3 mnist.py
+
+    The config file of portList should be filled as following:
+
+    .. code-block:: yaml
+
+       trial:
+       command: bash launch_paiYarn.sh
+       portList:
+       - label: tensorboard
+         beginAt: 0
+         portNumber: 1
+
+NNI support two kind of authorization method in paiYarn, including password and paiYarn token, `refer <https://github.com/microsoft/paiYarn/blob/b6bd2ab1c8890f91b7ac5859743274d2aa923c22/docs/rest-server/API.rst#2-authentication>`__. The authorization is configured in ``paiYarnConfig`` field.\ :raw-html:`<br>`
+For password authorization, the ``paiYarnConfig`` schema is:
+
+.. code-block:: bash
+
+   paiYarnConfig:
+     userName: your_paiYarn_nni_user
+     passWord: your_paiYarn_password
+     host: 10.1.1.1
+
+For paiYarn token authorization, the ``paiYarnConfig`` schema is:
+
+.. code-block:: bash
+
+   paiYarnConfig:
+     userName: your_paiYarn_nni_user
+     token: your_paiYarn_token
+     host: 10.1.1.1
+
+Once complete to fill NNI experiment config file and save (for example, save as exp_paiYarn.yml), then run the following command
+
+.. code-block:: bash
+
+   nnictl create --config exp_paiYarn.yml
+
+to start the experiment in paiYarn mode. NNI will create OpenpaiYarn job for each trial, and the job name format is something like ``nni_exp_{experiment_id}_trial_{trial_id}``.
+You can see jobs created by NNI in the OpenpaiYarn cluster's web portal, like:
+
+.. image:: ../../img/nni_pai_joblist.jpg
+   :target: ../../img/nni_pai_joblist.jpg
+   :alt: 
+
+
+Notice: In paiYarn mode, NNIManager will start a rest server and listen on a port which is your NNI WebUI's port plus 1. For example, if your WebUI port is ``8080``\ , the rest server will listen on ``8081``\ , to receive metrics from trial job running in Kubernetes. So you should ``enable 8081`` TCP port in your firewall rule to allow incoming traffic.
+
+Once a trial job is completed, you can goto NNI WebUI's overview page (like http://localhost:8080/oview) to check trial's information.
+
+Expand a trial information in trial list view, click the logPath link like:
+
+.. image:: ../../img/nni_webui_joblist.jpg
+   :target: ../../img/nni_webui_joblist.jpg
+   :alt: 
+
+
+And you will be redirected to HDFS web portal to browse the output files of that trial in HDFS:
+
+.. image:: ../../img/nni_trial_hdfs_output.jpg
+   :target: ../../img/nni_trial_hdfs_output.jpg
+   :alt: 
+
+
+You can see there're three fils in output folder: stderr, stdout, and trial.log
+
+data management
+---------------
+
+If your training data is not too large, it could be put into codeDir, and nni will upload the data to hdfs, or you could build your own docker image with the data. If you have large dataset, it's not appropriate to put the data in codeDir, and you could follow the `guidance <https://github.com/microsoft/paiYarn/blob/master/docs/user/storage.rst>`__ to mount the data folder in container.
+
+If you also want to save trial's other output into HDFS, like model files, you can use environment variable ``NNI_OUTPUT_DIR`` in your trial code to save your own output files, and NNI SDK will copy all the files in ``NNI_OUTPUT_DIR`` from trial's container to HDFS, the target path is ``hdfs://host:port/{username}/nni/{experiments}/{experimentId}/trials/{trialId}/nnioutput``
+
+version check
+-------------
+
+NNI support version check feature in since version 0.6. It is a policy to insure the version of NNIManager is consistent with trialKeeper, and avoid errors caused by version incompatibility.
+Check policy:
+
+
+#. NNIManager before v0.6 could run any version of trialKeeper, trialKeeper support backward compatibility.
+#. Since version 0.6, NNIManager version should keep same with triakKeeper version. For example, if NNIManager version is 0.6, trialKeeper version should be 0.6 too.
+#. Note that the version check feature only check first two digits of version.For example, NNIManager v0.6.1 could use trialKeeper v0.6 or trialKeeper v0.6.2, but could not use trialKeeper v0.5.1 or trialKeeper v0.7.
+
+If you could not run your experiment and want to know if it is caused by version check, you could check your webUI, and there will be an error message about version check.
+
+.. image:: ../../img/version_check.png
+   :target: ../../img/version_check.png
+   :alt: 
+
diff --git a/docs/en_US/TrainingService/RemoteMachineMode.rst b/docs/en_US/TrainingService/RemoteMachineMode.rst
new file mode 100644
index 0000000000..c96d5cd660
--- /dev/null
+++ b/docs/en_US/TrainingService/RemoteMachineMode.rst
@@ -0,0 +1,219 @@
+Run an Experiment on Remote Machines
+====================================
+
+NNI can run one experiment on multiple remote machines through SSH, called ``remote`` mode. It's like a lightweight training platform. In this mode, NNI can be started from your computer, and dispatch trials to remote machines in parallel.
+
+The OS of remote machines supports ``Linux``\ , ``Windows 10``\ , and ``Windows Server 2019``.
+
+Requirements
+------------
+
+
+* 
+  Make sure the default environment of remote machines meets requirements of your trial code. If the default environment does not meet the requirements, the setup script can be added into ``command`` field of NNI config.
+
+* 
+  Make sure remote machines can be accessed through SSH from the machine which runs ``nnictl`` command. It supports both password and key authentication of SSH. For advanced usages, please refer to `machineList part of configuration <../Tutorial/ExperimentConfig.rst>`__.
+
+* 
+  Make sure the NNI version on each machine is consistent.
+
+* 
+  Make sure the command of Trial is compatible with remote OSes, if you want to use remote Linux and Windows together. For example, the default python 3.x executable called ``python3`` on Linux, and ``python`` on Windows.
+
+Linux
+^^^^^
+
+
+* Follow `installation <../Tutorial/InstallationLinux.rst>`__ to install NNI on the remote machine.
+
+Windows
+^^^^^^^
+
+
+* 
+  Follow `installation <../Tutorial/InstallationWin.rst>`__ to install NNI on the remote machine.
+
+* 
+  Install and start ``OpenSSH Server``.
+
+
+  #. 
+     Open ``Settings`` app on Windows.
+
+  #. 
+     Click ``Apps``\ , then click ``Optional features``.
+
+  #. 
+     Click ``Add a feature``\ , search and select ``OpenSSH Server``\ , and then click ``Install``.
+
+  #. 
+     Once it's installed, run below command to start and set to automatic start.
+
+  .. code-block:: bat
+
+     sc config sshd start=auto
+     net start sshd
+
+* 
+  Make sure remote account is administrator, so that it can stop running trials.
+
+* 
+  Make sure there is no welcome message more than default, since it causes ssh2 failed in NodeJs. For example, if you're using Data Science VM on Azure, it needs to remove extra echo commands in ``C:\dsvm\tools\setup\welcome.bat``.
+
+  The output like below is ok, when opening a new command window.
+
+  .. code-block:: text
+
+     Microsoft Windows [Version 10.0.17763.1192]
+     (c) 2018 Microsoft Corporation. All rights reserved.
+
+     (py37_default) C:\Users\AzureUser>
+
+Run an experiment
+-----------------
+
+e.g. there are three machines, which can be logged in with username and password.
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - IP
+     - Username
+     - Password
+   * - 10.1.1.1
+     - bob
+     - bob123
+   * - 10.1.1.2
+     - bob
+     - bob123
+   * - 10.1.1.3
+     - bob
+     - bob123
+
+
+Install and run NNI on one of those three machines or another machine, which has network access to them.
+
+Use ``examples/trials/mnist-annotation`` as the example. Below is content of ``examples/trials/mnist-annotation/config_remote.yml``\ :
+
+.. code-block:: yaml
+
+   authorName: default
+   experimentName: example_mnist
+   trialConcurrency: 1
+   maxExecDuration: 1h
+   maxTrialNum: 10
+   #choice: local, remote, pai
+   trainingServicePlatform: remote
+   # search space file
+   searchSpacePath: search_space.json
+   #choice: true, false
+   useAnnotation: true
+   tuner:
+     #choice: TPE, Random, Anneal, Evolution, BatchTuner
+     #SMAC (SMAC should be installed through nnictl)
+     builtinTunerName: TPE
+     classArgs:
+       #choice: maximize, minimize
+       optimize_mode: maximize
+   trial:
+     command: python3 mnist.py
+     codeDir: .
+     gpuNum: 0
+   #machineList can be empty if the platform is local
+   machineList:
+     - ip: 10.1.1.1
+       username: bob
+       passwd: bob123
+       #port can be skip if using default ssh port 22
+       #port: 22
+     - ip: 10.1.1.2
+       username: bob
+       passwd: bob123
+     - ip: 10.1.1.3
+       username: bob
+       passwd: bob123
+
+Files in ``codeDir`` will be uploaded to remote machines automatically. You can run below command on Windows, Linux, or macOS to spawn trials on remote Linux machines:
+
+.. code-block:: bash
+
+   nnictl create --config examples/trials/mnist-annotation/config_remote.yml
+
+Configure python environment
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+By default, commands and scripts will be executed in the default environment in remote machine. If there are multiple python virtual environments in your remote machine, and you want to run experiments in a specific environment, then use **preCommand** to specify a python environment on your remote machine. 
+
+Use ``examples/trials/mnist-tfv2`` as the example. Below is content of ``examples/trials/mnist-tfv2/config_remote.yml``\ :
+
+.. code-block:: yaml
+
+   authorName: default
+   experimentName: example_mnist
+   trialConcurrency: 1
+   maxExecDuration: 1h
+   maxTrialNum: 10
+   #choice: local, remote, pai
+   trainingServicePlatform: remote
+   searchSpacePath: search_space.json
+   #choice: true, false
+   useAnnotation: false
+   tuner:
+     #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
+     #SMAC (SMAC should be installed through nnictl)
+     builtinTunerName: TPE
+     classArgs:
+       #choice: maximize, minimize
+       optimize_mode: maximize
+   trial:
+     command: python3 mnist.py
+     codeDir: .
+     gpuNum: 0
+   #machineList can be empty if the platform is local
+   machineList:
+     - ip: ${replace_to_your_remote_machine_ip}
+       username: ${replace_to_your_remote_machine_username}
+       sshKeyPath: ${replace_to_your_remote_machine_sshKeyPath}
+       # Pre-command will be executed before the remote machine executes other commands.
+       # Below is an example of specifying python environment.
+       # If you want to execute multiple commands, please use "&&" to connect them.
+       # preCommand: source ${replace_to_absolute_path_recommended_here}/bin/activate
+       # preCommand: source ${replace_to_conda_path}/bin/activate ${replace_to_conda_env_name}
+       preCommand: export PATH=${replace_to_python_environment_path_in_your_remote_machine}:$PATH
+
+The **preCommand** will be executed before the remote machine executes other commands. So you can configure python environment path like this:
+
+.. code-block:: yaml
+
+   # Linux remote machine
+   preCommand: export PATH=${replace_to_python_environment_path_in_your_remote_machine}:$PATH
+   # Windows remote machine
+   preCommand: set path=${replace_to_python_environment_path_in_your_remote_machine};%path%
+
+Or if you want to activate the ``virtualenv`` environment:
+
+.. code-block:: yaml
+
+   # Linux remote machine
+   preCommand: source ${replace_to_absolute_path_recommended_here}/bin/activate
+   # Windows remote machine
+   preCommand: ${replace_to_absolute_path_recommended_here}\\scripts\\activate
+
+Or if you want to activate the ``conda`` environment:
+
+.. code-block:: yaml
+
+   # Linux remote machine
+   preCommand: source ${replace_to_conda_path}/bin/activate ${replace_to_conda_env_name}
+   # Windows remote machine
+   preCommand: call activate ${replace_to_conda_env_name}
+
+If you want multiple commands to be executed, you can use ``&&`` to connect these commands:
+
+.. code-block:: yaml
+
+   preCommand: command1 && command2 && command3
+
+**Note**\ : Because **preCommand** will execute before other commands each time, it is strongly not recommended to set **preCommand** that will make changes to system, i.e. ``mkdir`` or ``touch``.
diff --git a/docs/en_US/TrialExample/Cifar10Examples.rst b/docs/en_US/TrialExample/Cifar10Examples.rst
new file mode 100644
index 0000000000..f2b1f05cf7
--- /dev/null
+++ b/docs/en_US/TrialExample/Cifar10Examples.rst
@@ -0,0 +1,85 @@
+CIFAR-10 examples
+=================
+
+Overview
+--------
+
+`CIFAR-10 <https://www.cs.toronto.edu/~kriz/cifar.html>`__ classification is a common benchmark problem in machine learning. The CIFAR-10 dataset is the collection of images. It is one of the most widely used datasets for machine learning research which contains 60,000 32x32 color images in 10 different classes. Thus, we use CIFAR-10 classification as an example to introduce NNI usage.
+
+**Goals**
+^^^^^^^^^^^^^
+
+As we all know, the choice of model optimizer is directly affects the performance of the final metrics. The goal of this tutorial is to **tune a better performace optimizer** to train a relatively small convolutional neural network (CNN) for recognizing images.
+
+In this example, we have selected the following common deep learning optimizer:
+
+..
+
+   "SGD", "Adadelta", "Adagrad", "Adam", "Adamax"
+
+
+**Experimental**
+^^^^^^^^^^^^^^^^^^^^
+
+Preparations
+^^^^^^^^^^^^
+
+This example requires PyTorch. PyTorch install package should be chosen based on python version and cuda version.
+
+Here is an example of the environment python==3.5 and cuda == 8.0, then using the following commands to install `PyTorch <https://pytorch.org/>`__\ :
+
+.. code-block:: bash
+
+   python3 -m pip install http://download.pytorch.org/whl/cu80/torch-0.4.1-cp35-cp35m-linux_x86_64.whl
+   python3 -m pip install torchvision
+
+CIFAR-10 with NNI
+^^^^^^^^^^^^^^^^^
+
+**Search Space**
+
+As we stated in the target, we target to find out the best ``optimizer`` for training CIFAR-10 classification. When using different optimizers, we also need to adjust ``learning rates`` and ``network structure`` accordingly. so we chose these three parameters as hyperparameters and write the following search space.
+
+.. code-block:: json
+
+   {
+       "lr":{"_type":"choice", "_value":[0.1, 0.01, 0.001, 0.0001]},
+       "optimizer":{"_type":"choice", "_value":["SGD", "Adadelta", "Adagrad", "Adam", "Adamax"]},
+       "model":{"_type":"choice", "_value":["vgg", "resnet18", "googlenet", "densenet121", "mobilenet", "dpn92", "senet18"]}
+   }
+
+*Implemented code directory: :githublink:`search_space.json <examples/trials/cifar10_pytorch/search_space.json>`*
+
+**Trial**
+
+The code for CNN training of each hyperparameters set, paying particular attention to the following points are specific for NNI:
+
+
+* Use ``nni.get_next_parameter()`` to get next training hyperparameter set.
+* Use ``nni.report_intermediate_result(acc)`` to report the intermedian result after finish each epoch.
+* Use ``nni.report_final_result(acc)`` to report the final result before the trial end.
+
+*Implemented code directory: :githublink:`main.py <examples/trials/cifar10_pytorch/main.py>`*
+
+You can also use your previous code directly, refer to `How to define a trial <Trials.rst>`__ for modify.
+
+**Config**
+
+Here is the example of running this experiment on local(with multiple GPUs):
+
+code directory: :githublink:`examples/trials/cifar10_pytorch/config.yml <examples/trials/cifar10_pytorch/config.yml>`
+
+Here is the example of running this experiment on OpenPAI:
+
+code directory: :githublink:`examples/trials/cifar10_pytorch/config_pai.yml <examples/trials/cifar10_pytorch/config_pai.yml>`
+
+*The complete examples we have implemented: :githublink:`examples/trials/cifar10_pytorch/ <examples/trials/cifar10_pytorch>`*
+
+Launch the experiment
+^^^^^^^^^^^^^^^^^^^^^
+
+We are ready for the experiment, let's now **run the config.yml file from your command line to start the experiment**.
+
+.. code-block:: bash
+
+   nnictl create --config nni/examples/trials/cifar10_pytorch/config.yml
diff --git a/docs/en_US/TrialExample/EfficientNet.rst b/docs/en_US/TrialExample/EfficientNet.rst
new file mode 100644
index 0000000000..b544f88312
--- /dev/null
+++ b/docs/en_US/TrialExample/EfficientNet.rst
@@ -0,0 +1,29 @@
+EfficientNet
+============
+
+`EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks <https://arxiv.org/abs/1905.11946>`__
+
+Use Grid search to find the best combination of alpha, beta and gamma for EfficientNet-B1, as discussed in Section 3.3 in paper. Search space, tuner, configuration examples are provided here.
+
+Instructions
+------------
+
+:githublink:`Example code <examples/trials/efficientnet>`
+
+
+#. Set your working directory here in the example code directory.
+#. Run ``git clone https://github.com/ultmaster/EfficientNet-PyTorch`` to clone the `ultmaster modified version <https://github.com/ultmaster/EfficientNet-PyTorch>`__ of the original `EfficientNet-PyTorch <https://github.com/lukemelas/EfficientNet-PyTorch>`__. The modifications were done to adhere to the original `Tensorflow version <https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet>`__ as close as possible (including EMA, label smoothing and etc.); also added are the part which gets parameters from tuner and reports intermediate/final results. Clone it into ``EfficientNet-PyTorch``\ ; the files like ``main.py``\ , ``train_imagenet.sh`` will appear inside, as specified in the configuration files.
+#. Run ``nnictl create --config config_local.yml`` (use ``config_pai.yml`` for OpenPAI) to find the best EfficientNet-B1. Adjust the training service (PAI/local/remote), batch size in the config files according to the environment.
+
+For training on ImageNet, read ``EfficientNet-PyTorch/train_imagenet.sh``. Download ImageNet beforehand and extract it adhering to `PyTorch format <https://pytorch.org/docs/stable/torchvision/datasets.html#imagenet>`__ and then replace ``/mnt/data/imagenet`` in with the location of the ImageNet storage. This file should also be a good example to follow for mounting ImageNet into the container on OpenPAI.
+
+Results
+-------
+
+The follow image is a screenshot, demonstrating the relationship between acc@1 and alpha, beta, gamma.
+
+
+.. image:: ../../img/efficientnet_search_result.png
+   :target: ../../img/efficientnet_search_result.png
+   :alt: 
+
diff --git a/docs/en_US/TrialExample/GbdtExample.rst b/docs/en_US/TrialExample/GbdtExample.rst
new file mode 100644
index 0000000000..63a3a006c3
--- /dev/null
+++ b/docs/en_US/TrialExample/GbdtExample.rst
@@ -0,0 +1,228 @@
+GBDT in nni
+===========
+
+Gradient boosting is a machine learning technique for regression and classification problems, which produces a prediction model in the form of an ensemble of weak prediction models, typically decision trees. It builds the model in a stage-wise fashion as other boosting methods do, and it generalizes them by allowing optimization of an arbitrary differentiable loss function.
+
+Gradient boosting decision tree has many popular implementations, such as `lightgbm <https://github.com/Microsoft/LightGBM>`__\ , `xgboost <https://github.com/dmlc/xgboost>`__\ , and `catboost <https://github.com/catboost/catboost>`__\ , etc. GBDT is a great tool for solving the problem of traditional machine learning problem. Since GBDT is a robust algorithm, it could use in many domains. The better hyper-parameters for GBDT, the better performance you could achieve.
+
+NNI is a great platform for tuning hyper-parameters, you could try various builtin search algorithm in nni and run multiple trials concurrently.
+
+1. Search Space in GBDT
+-----------------------
+
+There are many hyper-parameters in GBDT, but what kind of parameters will affect the performance or speed? Based on some practical experience, some suggestion here(Take lightgbm as example):
+
+..
+
+   * For better accuracy
+   * ``learning_rate``. The range of ``learning rate`` could be [0.001, 0.9].
+
+
+
+* 
+  ``num_leaves``. ``num_leaves`` is related to ``max_depth``\ , you don't have to tune both of them.
+
+* 
+  ``bagging_freq``. ``bagging_freq`` could be [1, 2, 4, 8, 10]
+
+* 
+  ``num_iterations``. May larger if underfitting.
+
+..
+
+   * For speed up
+   * ``bagging_fraction``. The range of ``bagging_fraction`` could be [0.7, 1.0].
+
+
+
+* 
+  ``feature_fraction``. The range of ``feature_fraction`` could be [0.6, 1.0].
+
+* 
+  ``max_bin``.
+
+..
+
+   * To avoid overfitting
+   * ``min_data_in_leaf``. This depends on your dataset.
+
+
+
+* 
+  ``min_sum_hessian_in_leaf``. This depend on your dataset.
+
+* 
+  ``lambda_l1`` and ``lambda_l2``.
+
+* 
+  ``min_gain_to_split``.
+
+* 
+  ``num_leaves``.
+
+Reference link:
+`lightgbm <https://lightgbm.readthedocs.io/en/latest/Parameters-Tuning.html>`__ and `autoxgoboost <https://github.com/ja-thomas/autoxgboost/blob/master/poster_2018.pdf>`__
+
+2. Task description
+-------------------
+
+Now we come back to our example "auto-gbdt" which run in lightgbm and nni. The data including :githublink:`train data <examples/trials/auto-gbdt/data/regression.train>` and :githublink:`test data <examples/trials/auto-gbdt/data/regression.train>`.
+Given the features and label in train data, we train a GBDT regression model and use it to predict.
+
+3. How to run in nni
+--------------------
+
+3.1 Install all the requirments
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: bash
+
+   pip install lightgbm
+   pip install pandas
+
+3.2 Prepare your trial code
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+You need to prepare a basic code as following:
+
+.. code-block:: python
+
+   ...
+
+   def get_default_parameters():
+       ...
+       return params
+
+
+   def load_data(train_path='./data/regression.train', test_path='./data/regression.test'):
+       '''
+       Load or create dataset
+       '''
+       ...
+
+       return lgb_train, lgb_eval, X_test, y_test
+
+   def run(lgb_train, lgb_eval, params, X_test, y_test):
+       # train
+       gbm = lgb.train(params,
+                       lgb_train,
+                       num_boost_round=20,
+                       valid_sets=lgb_eval,
+                       early_stopping_rounds=5)
+       # predict
+       y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)
+
+       # eval
+       rmse = mean_squared_error(y_test, y_pred) ** 0.5
+       print('The rmse of prediction is:', rmse)
+
+   if __name__ == '__main__':
+       lgb_train, lgb_eval, X_test, y_test = load_data()
+
+       PARAMS = get_default_parameters()
+       # train
+       run(lgb_train, lgb_eval, PARAMS, X_test, y_test)
+
+3.3 Prepare your search space.
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If you like to tune ``num_leaves``\ , ``learning_rate``\ , ``bagging_fraction`` and ``bagging_freq``\ , you could write a :githublink:`search_space.json <examples/trials/auto-gbdt/search_space.json>` as follow:
+
+.. code-block:: json
+
+   {
+       "num_leaves":{"_type":"choice","_value":[31, 28, 24, 20]},
+       "learning_rate":{"_type":"choice","_value":[0.01, 0.05, 0.1, 0.2]},
+       "bagging_fraction":{"_type":"uniform","_value":[0.7, 1.0]},
+       "bagging_freq":{"_type":"choice","_value":[1, 2, 4, 8, 10]}
+   }
+
+More support variable type you could reference `here <../Tutorial/SearchSpaceSpec.rst>`__.
+
+3.4 Add SDK of nni into your code.
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: diff
+
+   +import nni
+   ...
+
+   def get_default_parameters():
+       ...
+       return params
+
+
+   def load_data(train_path='./data/regression.train', test_path='./data/regression.test'):
+       '''
+       Load or create dataset
+       '''
+       ...
+
+       return lgb_train, lgb_eval, X_test, y_test
+
+   def run(lgb_train, lgb_eval, params, X_test, y_test):
+       # train
+       gbm = lgb.train(params,
+                       lgb_train,
+                       num_boost_round=20,
+                       valid_sets=lgb_eval,
+                       early_stopping_rounds=5)
+       # predict
+       y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)
+
+       # eval
+       rmse = mean_squared_error(y_test, y_pred) ** 0.5
+       print('The rmse of prediction is:', rmse)
+   +   nni.report_final_result(rmse)
+
+   if __name__ == '__main__':
+       lgb_train, lgb_eval, X_test, y_test = load_data()
+   +   RECEIVED_PARAMS = nni.get_next_parameter()
+       PARAMS = get_default_parameters()
+   +   PARAMS.update(RECEIVED_PARAMS)
+
+       # train
+       run(lgb_train, lgb_eval, PARAMS, X_test, y_test)
+
+3.5 Write a config file and run it.
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In the config file, you could set some settings including:
+
+
+* Experiment setting: ``trialConcurrency``\ , ``maxExecDuration``\ , ``maxTrialNum``\ , ``trial gpuNum``\ , etc.
+* Platform setting: ``trainingServicePlatform``\ , etc.
+* Path seeting: ``searchSpacePath``\ , ``trial codeDir``\ , etc.
+* Algorithm setting: select ``tuner`` algorithm, ``tuner optimize_mode``\ , etc.
+
+An config.yml as follow:
+
+.. code-block:: yaml
+
+   authorName: default
+   experimentName: example_auto-gbdt
+   trialConcurrency: 1
+   maxExecDuration: 10h
+   maxTrialNum: 10
+   #choice: local, remote, pai
+   trainingServicePlatform: local
+   searchSpacePath: search_space.json
+   #choice: true, false
+   useAnnotation: false
+   tuner:
+     #choice: TPE, Random, Anneal, Evolution, BatchTuner
+     #SMAC (SMAC should be installed through nnictl)
+     builtinTunerName: TPE
+     classArgs:
+       #choice: maximize, minimize
+       optimize_mode: minimize
+   trial:
+     command: python3 main.py
+     codeDir: .
+     gpuNum: 0
+
+Run this experiment with command as follow:
+
+.. code-block:: bash
+
+   nnictl create --config ./config.yml
diff --git a/docs/en_US/TrialExample/KDExample.rst b/docs/en_US/TrialExample/KDExample.rst
new file mode 100644
index 0000000000..c8cf1da0a8
--- /dev/null
+++ b/docs/en_US/TrialExample/KDExample.rst
@@ -0,0 +1,42 @@
+Knowledge Distillation on NNI
+=============================
+
+KnowledgeDistill
+----------------
+
+Knowledge distillation support, in `Distilling the Knowledge in a Neural Network <https://arxiv.org/abs/1503.02531>`__\ ,  the compressed model is trained to mimic a pre-trained, larger model.  This training setting is also referred to as "teacher-student",  where the large model is the teacher and the small model is the student.
+
+
+.. image:: ../../img/distill.png
+   :target: ../../img/distill.png
+   :alt: 
+
+
+Usage
+^^^^^
+
+PyTorch code
+
+.. code-block:: python
+
+   from knowledge_distill.knowledge_distill import KnowledgeDistill
+   kd = KnowledgeDistill(kd_teacher_model, kd_T=5)
+   alpha = 1
+   beta = 0.8
+   for batch_idx, (data, target) in enumerate(train_loader):
+       data, target = data.to(device), target.to(device)
+       optimizer.zero_grad()
+       output = model(data)
+       loss = F.cross_entropy(output, target)
+       # you only to add the following line to fine-tune with knowledge distillation
+       loss = alpha * loss + beta * kd.loss(data=data, student_out=output)
+       loss.backward()
+
+User configuration for KnowledgeDistill
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* **kd_teacher_model:** The pre-trained teacher model 
+* **kd_T:** Temperature for smoothing teacher model's output
+
+The complete code can be found `here <https://github.com/microsoft/nni/tree/v1.3/examples/model_compress/knowledge_distill/>`__
diff --git a/docs/en_US/TrialExample/MnistExamples.rst b/docs/en_US/TrialExample/MnistExamples.rst
new file mode 100644
index 0000000000..6f59aadeef
--- /dev/null
+++ b/docs/en_US/TrialExample/MnistExamples.rst
@@ -0,0 +1,82 @@
+.. role:: raw-html(raw)
+   :format: html
+
+
+MNIST examples
+==============
+
+CNN MNIST classifier for deep learning is similar to ``hello world`` for programming languages. Thus, we use MNIST as example to introduce different features of NNI. The examples are listed below:
+
+
+* `MNIST with NNI API (TensorFlow v1.x) <#mnist-tfv1>`__
+* `MNIST with NNI API (TensorFlow v2.x) <#mnist-tfv2>`__
+* `MNIST with NNI annotation <#mnist-annotation>`__
+* `MNIST in keras <#mnist-keras>`__
+* `MNIST -- tuning with batch tuner <#mnist-batch>`__
+* `MNIST -- tuning with hyperband <#mnist-hyperband>`__
+* `MNIST -- tuning within a nested search space <#mnist-nested>`__
+* `distributed MNIST (tensorflow) using kubeflow <#mnist-kubeflow-tf>`__
+* `distributed MNIST (pytorch) using kubeflow <#mnist-kubeflow-pytorch>`__
+
+:raw-html:`<a name="mnist-tfv1"></a>`
+**MNIST with NNI API (TensorFlow v1.x)**
+
+This is a simple network which has two convolutional layers, two pooling layers and a fully connected layer. We tune hyperparameters, such as dropout rate, convolution size, hidden size, etc. It can be tuned with most NNI built-in tuners, such as TPE, SMAC, Random. We also provide an exmaple YAML file which enables assessor.
+
+``code directory: examples/trials/mnist-tfv1/``
+
+:raw-html:`<a name="mnist-tfv2"></a>`
+**MNIST with NNI API (TensorFlow v2.x)**
+
+Same network to the example above, but written in TensorFlow v2.x Keras API.
+
+``code directory: examples/trials/mnist-tfv2/``
+
+:raw-html:`<a name="mnist-annotation"></a>`
+**MNIST with NNI annotation**
+
+This example is similar to the example above, the only difference is that this example uses NNI annotation to specify search space and report results, while the example above uses NNI apis to receive configuration and report results.
+
+``code directory: examples/trials/mnist-annotation/``
+
+:raw-html:`<a name="mnist-keras"></a>`
+**MNIST in keras**
+
+This example is implemented in keras. It is also a network for MNIST dataset, with two convolution layers, one pooling layer, and two fully connected layers.
+
+``code directory: examples/trials/mnist-keras/``
+
+:raw-html:`<a name="mnist-batch"></a>`
+**MNIST -- tuning with batch tuner**
+
+This example is to show how to use batch tuner. Users simply list all the configurations they want to try in the search space file. NNI will try all of them.
+
+``code directory: examples/trials/mnist-batch-tune-keras/``
+
+:raw-html:`<a name="mnist-hyperband"></a>`
+**MNIST -- tuning with hyperband**
+
+This example is to show how to use hyperband to tune the model. There is one more key ``STEPS`` in the received configuration for trials to control how long it can run (e.g., number of iterations).
+
+``code directory: examples/trials/mnist-hyperband/``
+
+:raw-html:`<a name="mnist-nested"></a>`
+**MNIST -- tuning within a nested search space**
+
+This example is to show that NNI also support nested search space. The search space file is an example of how to define nested search space.
+
+``code directory: examples/trials/mnist-nested-search-space/``
+
+:raw-html:`<a name="mnist-kubeflow-tf"></a>`
+**distributed MNIST (tensorflow) using kubeflow**
+
+This example is to show how to run distributed training on kubeflow through NNI. Users can simply provide distributed training code and a configure file which specifies the kubeflow mode. For example, what is the command to run ps and what is the command to run worker, and how many resources they consume. This example is implemented in tensorflow, thus, uses kubeflow tensorflow operator.
+
+``code directory: examples/trials/mnist-distributed/``
+
+:raw-html:`<a name="mnist-kubeflow-pytorch"></a>`
+**distributed MNIST (pytorch) using kubeflow**
+
+Similar to the previous example, the difference is that this example is implemented in pytorch, thus, it uses kubeflow pytorch operator.
+
+``code directory: examples/trials/mnist-distributed-pytorch/``
diff --git a/docs/en_US/TrialExample/OpEvoExamples.rst b/docs/en_US/TrialExample/OpEvoExamples.rst
new file mode 100644
index 0000000000..3b9ed2bc7c
--- /dev/null
+++ b/docs/en_US/TrialExample/OpEvoExamples.rst
@@ -0,0 +1,130 @@
+.. role:: raw-html(raw)
+   :format: html
+
+
+Tuning Tensor Operators on NNI
+==============================
+
+Overview
+--------
+
+Abundant applications raise the demands of training and inference deep neural networks (DNNs) efficiently on diverse hardware platforms ranging from cloud servers to embedded devices. Moreover, computational graph-level optimization of deep neural network, like tensor operator fusion, may introduce new tensor operators. Thus, manually optimized tensor operators provided by hardware-specific libraries have limitations in terms of supporting new hardware platforms or supporting new operators, so automatically optimizing tensor operators on diverse hardware platforms is essential for large-scale deployment and application of deep learning technologies in the real-world problems.
+
+Tensor operator optimization is substantially a combinatorial optimization problem. The objective function is the performance of a tensor operator on specific hardware platform, which should be maximized with respect to the hyper-parameters of corresponding device code, such as how to tile a matrix or whether to unroll a loop. Unlike many typical problems of this type, such as travelling salesman problem, the objective function of tensor operator optimization is a black box and expensive to sample. One has to compile a device code with a specific configuration and run it on real hardware to get the corresponding performance metric. Therefore, a desired method for optimizing tensor operators should find the best configuration with as few samples as possible.
+
+The expensive objective function makes solving tensor operator optimization problem with traditional combinatorial optimization methods, for example, simulated annealing and evolutionary algorithms, almost impossible. Although these algorithms inherently support combinatorial search spaces, they do not take sample-efficiency into account,
+thus thousands of or even more samples are usually needed, which is unacceptable when tuning tensor operators in product environments. On the other hand, sequential model based optimization (SMBO) methods are proved sample-efficient for optimizing black-box functions with continuous search spaces. However, when optimizing ones with combinatorial search spaces, SMBO methods are not as sample-efficient as their continuous counterparts, because there is lack of prior assumptions about the objective functions, such as continuity and differentiability in the case of continuous search spaces. For example, if one could assume an objective function with a continuous search space is infinitely differentiable, a Gaussian process with a radial basis function (RBF) kernel could be used to model the objective function. In this way, a sample provides not only a single value at a point but also the local properties of the objective function in its neighborhood or even global properties,
+which results in a high sample-efficiency. In contrast, SMBO methods for combinatorial optimization suffer poor sample-efficiency due to the lack of proper prior assumptions and surrogate models which can leverage them.
+
+OpEvo is recently proposed for solving this challenging problem. It efficiently explores the search spaces of tensor operators by introducing a topology-aware mutation operation based on q-random walk distribution to leverage the topological structures over the search spaces. Following this example, you can use OpEvo to tune three representative types of tensor operators selected from two popular neural networks, BERT and AlexNet. Three comparison baselines, AutoTVM, G-BFS and N-A2C, are also provided. Please refer to `OpEvo: An Evolutionary Method for Tensor Operator Optimization <https://arxiv.org/abs/2006.05664>`__ for detailed explanation about these algorithms.
+
+Environment Setup
+-----------------
+
+We prepared a dockerfile for setting up experiment environments. Before starting, please make sure the Docker daemon is running and the driver of your GPU accelerator is properly installed. Enter into the example folder ``examples/trials/systems/opevo`` and run below command to build and instantiate a Docker image from the dockerfile.
+
+.. code-block:: bash
+
+   # if you are using Nvidia GPU
+   make cuda-env
+   # if you are using AMD GPU
+   make rocm-env
+
+Run Experiments:
+----------------
+
+Three representative kinds of tensor operators, **matrix multiplication**\ ,** batched matrix multiplication** and **2D convolution**\ , are chosen from BERT and AlexNet, and tuned with NNI. The ``Trial`` code for all tensor operators is ``/root/compiler_auto_tune_stable.py``\ , and ``Search Space`` files and ``config`` files for each tuning algorithm locate in ``/root/experiments/``\ , which are categorized by tensor operators. Here ``/root`` refers to the root of the container.
+
+For tuning the operators of matrix multiplication, please run below commands from ``/root``\ :
+
+.. code-block:: bash
+
+   # (N, K) x (K, M) represents a matrix of shape (N, K) multiplies a matrix of shape (K, M)
+
+   # (512, 1024) x (1024, 1024)
+   # tuning with OpEvo
+   nnictl create --config experiments/mm/N512K1024M1024/config_opevo.yml
+   # tuning with G-BFS
+   nnictl create --config experiments/mm/N512K1024M1024/config_gbfs.yml
+   # tuning with N-A2C
+   nnictl create --config experiments/mm/N512K1024M1024/config_na2c.yml
+   # tuning with AutoTVM
+   OP=matmul STEP=512 N=512 M=1024 K=1024 P=NN ./run.s
+
+   # (512, 1024) x (1024, 4096)
+   # tuning with OpEvo
+   nnictl create --config experiments/mm/N512K1024M4096/config_opevo.yml
+   # tuning with G-BFS
+   nnictl create --config experiments/mm/N512K1024M4096/config_gbfs.yml
+   # tuning with N-A2C
+   nnictl create --config experiments/mm/N512K1024M4096/config_na2c.yml
+   # tuning with AutoTVM
+   OP=matmul STEP=512 N=512 M=1024 K=4096 P=NN ./run.sh
+
+   # (512, 4096) x (4096, 1024)
+   # tuning with OpEvo
+   nnictl create --config experiments/mm/N512K4096M1024/config_opevo.yml
+   # tuning with G-BFS
+   nnictl create --config experiments/mm/N512K4096M1024/config_gbfs.yml
+   # tuning with N-A2C
+   nnictl create --config experiments/mm/N512K4096M1024/config_na2c.yml
+   # tuning with AutoTVM
+   OP=matmul STEP=512 N=512 M=4096 K=1024 P=NN ./run.sh
+
+For tuning the operators of batched matrix multiplication, please run below commands from ``/root``\ :
+
+.. code-block:: bash
+
+   # batched matrix with batch size 960 and shape of matrix (128, 128) multiplies batched matrix with batch size 960 and shape of matrix (128, 64)
+   # tuning with OpEvo
+   nnictl create --config experiments/bmm/B960N128K128M64PNN/config_opevo.yml
+   # tuning with AutoTVM
+   OP=batch_matmul STEP=512 B=960 N=128 K=128 M=64 P=NN ./run.sh
+
+   # batched matrix with batch size 960 and shape of matrix (128, 128) is transposed first and then multiplies batched matrix with batch size 960 and shape of matrix (128, 64)
+   # tuning with OpEvo
+   nnictl create --config experiments/bmm/B960N128K128M64PTN/config_opevo.yml
+   # tuning with AutoTVM
+   OP=batch_matmul STEP=512 B=960 N=128 K=128 M=64 P=TN ./run.sh
+
+   # batched matrix with batch size 960 and shape of matrix (128, 64) is transposed first and then right multiplies batched matrix with batch size 960 and shape of matrix (128, 64).
+   # tuning with OpEvo
+   nnictl create --config experiments/bmm/B960N128K64M128PNT/config_opevo.yml
+   # tuning with AutoTVM
+   OP=batch_matmul STEP=512 B=960 N=128 K=64 M=128 P=NT ./run.sh
+
+For tuning the operators of 2D convolution, please run below commands from ``/root``\ :
+
+.. code-block:: bash
+
+   # image tensor of shape (512, 3, 227, 227) convolves with kernel tensor of shape (64, 3, 11, 11) with stride 4 and padding 0
+   # tuning with OpEvo
+   nnictl create --config experiments/conv/N512C3HW227F64K11ST4PD0/config_opevo.yml
+   # tuning with AutoTVM
+   OP=convfwd_direct STEP=512 N=512 C=3 H=227 W=227 F=64 K=11 ST=4 PD=0 ./run.sh
+
+   # image tensor of shape (512, 64, 27, 27) convolves with kernel tensor of shape (192, 64, 5, 5) with stride 1 and padding 2
+   # tuning with OpEvo
+   nnictl create --config experiments/conv/N512C64HW27F192K5ST1PD2/config_opevo.yml
+   # tuning with AutoTVM
+   OP=convfwd_direct STEP=512 N=512 C=64 H=27 W=27 F=192 K=5 ST=1 PD=2 ./run.sh
+
+Please note that G-BFS and N-A2C are only designed for tuning tiling schemes of multiplication of matrices with only power of 2 rows and columns, so they are not compatible with other types of configuration spaces, thus not eligible to tune the operators of batched matrix multiplication and 2D convolution. Here, AutoTVM is implemented by its authors in the TVM project, so the tuning results are printed on the screen rather than reported to NNI manager. The port 8080 of the container is bind to the host on the same port, so one can access the NNI Web UI through ``host_ip_addr:8080`` and monitor tuning process as below screenshot.
+
+:raw-html:`<img src="../../../examples/trials/systems/opevo/screenshot.png" />`
+
+Citing OpEvo
+------------
+
+If you feel OpEvo is helpful, please consider citing the paper as follows:
+
+.. code-block:: bash
+
+   @misc{gao2020opevo,
+       title={OpEvo: An Evolutionary Method for Tensor Operator Optimization},
+       author={Xiaotian Gao and Cui Wei and Lintao Zhang and Mao Yang},
+       year={2020},
+       eprint={2006.05664},
+       archivePrefix={arXiv},
+       primaryClass={cs.LG}
+   }
diff --git a/docs/en_US/TrialExample/RocksdbExamples.rst b/docs/en_US/TrialExample/RocksdbExamples.rst
new file mode 100644
index 0000000000..5f6bec300e
--- /dev/null
+++ b/docs/en_US/TrialExample/RocksdbExamples.rst
@@ -0,0 +1,129 @@
+Tuning RocksDB on NNI
+=====================
+
+Overview
+--------
+
+`RocksDB <https://github.com/facebook/rocksdb>`__ is a popular high performance embedded key-value database used in production systems at various web-scale enterprises including Facebook, Yahoo!, and LinkedIn.. It is a fork of `LevelDB <https://github.com/google/leveldb>`__ by Facebook optimized to exploit many central processing unit (CPU) cores, and make efficient use of fast storage, such as solid-state drives (SSD), for input/output (I/O) bound workloads.
+
+The performance of RocksDB is highly contingent on its tuning. However, because of the complexity of its underlying technology and a large number of configurable parameters, a good configuration is sometimes hard to obtain. NNI can help to address this issue. NNI supports many kinds of tuning algorithms to search the best configuration of RocksDB, and support many kinds of environments like local machine, remote servers and cloud. 
+
+This example illustrates how to use NNI to search the best configuration of RocksDB for a ``fillrandom`` benchmark supported by a benchmark tool ``db_bench``\ , which is an official benchmark tool provided by RocksDB itself. Therefore, before running this example, please make sure NNI is installed and `\ ``db_bench`` <https://github.com/facebook/rocksdb/wiki/Benchmarking-tools>`__ is in your ``PATH``. Please refer to `here <../Tutorial/QuickStart.md>`__ for detailed information about installation and preparing of NNI environment, and `here <https://github.com/facebook/rocksdb/blob/master/INSTALL.rst>`__ for compiling RocksDB as well as ``db_bench``.
+
+We also provide a simple script :githublink:`db_bench_installation.sh <examples/trials/systems/rocksdb-fillrandom/db_bench_installation.sh>` helping to compile and install ``db_bench`` as well as its dependencies on Ubuntu. Installing RocksDB on other systems can follow the same procedure.
+
+*code directory: :githublink:`example/trials/systems/rocksdb-fillrandom <examples/trials/systems/rocksdb-fillrandom>`*
+
+Experiment setup
+----------------
+
+There are mainly three steps to setup an experiment of tuning systems on NNI. Define search space with a ``json`` file, write a benchmark code, and start NNI experiment by passing a config file to NNI manager.
+
+Search Space
+^^^^^^^^^^^^
+
+For simplicity, this example tunes three parameters, ``write_buffer_size``\ , ``min_write_buffer_num`` and ``level0_file_num_compaction_trigger``\ , for writing 16M keys with 20 Bytes of key size and 100 Bytes of value size randomly, based on writing operations per second (OPS). ``write_buffer_size`` sets the size of a single memtable. Once memtable exceeds this size, it is marked immutable and a new one is created. ``min_write_buffer_num`` is the minimum number of memtables to be merged before flushing to storage. Once the number of files in level 0 reaches ``level0_file_num_compaction_trigger``\ , level 0 to level 1 compaction is triggered.
+
+In this example, the search space is specified by a ``search_space.json`` file as shown below. Detailed explanation of search space could be found `here <../Tutorial/SearchSpaceSpec.rst>`__.
+
+.. code-block:: json
+
+   {
+       "write_buffer_size": {
+           "_type": "quniform",
+           "_value": [2097152, 16777216, 1048576]
+       },
+       "min_write_buffer_number_to_merge": {
+           "_type": "quniform",
+           "_value": [2, 16, 1]
+       },
+       "level0_file_num_compaction_trigger": {
+           "_type": "quniform",
+           "_value": [2, 16, 1]
+       }
+   }
+
+*code directory: :githublink:`example/trials/systems/rocksdb-fillrandom/search_space.json <examples/trials/systems/rocksdb-fillrandom/search_space.json>`*
+
+Benchmark code
+^^^^^^^^^^^^^^
+
+Benchmark code should receive a configuration from NNI manager, and report the corresponding benchmark result back. Following NNI APIs are designed for this purpose. In this example, writing operations per second (OPS) is used as a performance metric. Please refer to `here <Trials.rst>`__ for detailed information.
+
+
+* Use ``nni.get_next_parameter()`` to get next system configuration.
+* Use ``nni.report_final_result(metric)`` to report the benchmark result.
+
+*code directory: :githublink:`example/trials/systems/rocksdb-fillrandom/main.py <examples/trials/systems/rocksdb-fillrandom/main.py>`*
+
+Config file
+^^^^^^^^^^^
+
+One could start a NNI experiment with a config file. A config file for NNI is a ``yaml`` file usually including experiment settings (\ ``trialConcurrency``\ , ``maxExecDuration``\ , ``maxTrialNum``\ , ``trial gpuNum``\ , etc.), platform settings (\ ``trainingServicePlatform``\ , etc.), path settings (\ ``searchSpacePath``\ , ``trial codeDir``\ , etc.) and tuner settings (\ ``tuner``\ , ``tuner optimize_mode``\ , etc.). Please refer to `here <../Tutorial/QuickStart.rst>`__ for more information.
+
+Here is an example of tuning RocksDB with SMAC algorithm:
+
+*code directory: :githublink:`example/trials/systems/rocksdb-fillrandom/config_smac.yml <examples/trials/systems/rocksdb-fillrandom/config_smac.yml>`*
+
+Here is an example of tuning RocksDB with TPE algorithm:
+
+*code directory: :githublink:`example/trials/systems/rocksdb-fillrandom/config_tpe.yml <examples/trials/systems/rocksdb-fillrandom/config_tpe.yml>`*
+
+Other tuners can be easily adopted in the same way. Please refer to `here <../Tuner/BuiltinTuner.rst>`__ for more information.
+
+Finally, we could enter the example folder and start the experiment using following commands:
+
+.. code-block:: bash
+
+   # tuning RocksDB with SMAC tuner
+   nnictl create --config ./config_smac.yml
+   # tuning RocksDB with TPE tuner
+   nnictl create --config ./config_tpe.yml
+
+Experiment results
+------------------
+
+We ran these two examples on the same machine with following details:
+
+
+* 16 * Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz
+* 465 GB of rotational hard drive with ext4 file system
+* 128 GB of RAM
+* Kernel version: 4.15.0-58-generic
+* NNI version: v1.0-37-g1bd24577
+* RocksDB version: 6.4
+* RocksDB DEBUG_LEVEL: 0
+
+The detailed experiment results are shown in the below figure. Horizontal axis is sequential order of trials. Vertical axis is the metric, write OPS in this example. Blue dots represent trials for tuning RocksDB with SMAC tuner, and orange dots stand for trials for tuning RocksDB with TPE tuner. 
+
+
+.. image:: https://github.com/microsoft/nni/tree/v1.9/examples/trials/systems/rocksdb-fillrandom/plot.png
+   :target: https://github.com/microsoft/nni/tree/v1.9/examples/trials/systems/rocksdb-fillrandom/plot.png
+   :alt: image
+
+
+Following table lists the best trials and corresponding parameters and metric obtained by the two tuners. Unsurprisingly, both of them found the same optimal configuration for ``fillrandom`` benchmark.
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Tuner
+     - Best trial
+     - Best OPS
+     - write_buffer_size
+     - min_write_buffer_number_to_merge
+     - level0_file_num_compaction_trigger
+   * - SMAC
+     - 255
+     - 779289
+     - 2097152
+     - 7.0
+     - 7.0
+   * - TPE
+     - 169
+     - 761456
+     - 2097152
+     - 7.0
+     - 7.0
+
diff --git a/docs/en_US/TrialExample/SklearnExamples.rst b/docs/en_US/TrialExample/SklearnExamples.rst
new file mode 100644
index 0000000000..eed7fb2e1b
--- /dev/null
+++ b/docs/en_US/TrialExample/SklearnExamples.rst
@@ -0,0 +1,103 @@
+Scikit-learn in NNI
+===================
+
+`Scikit-learn <https://github.com/scikit-learn/scikit-learn>`__ is a popular machine learning tool for data mining and data analysis. It supports many kinds of machine learning models like LinearRegression, LogisticRegression, DecisionTree, SVM etc. How to make the use of scikit-learn more efficiency is a valuable topic.
+
+NNI supports many kinds of tuning algorithms to search the best models and/or hyper-parameters for scikit-learn, and support many kinds of environments like local machine, remote servers and cloud.
+
+1. How to run the example
+-------------------------
+
+To start using NNI, you should install the NNI package, and use the command line tool ``nnictl`` to start an experiment. For more information about installation and preparing for the environment,  please refer `here <../Tutorial/QuickStart.rst>`__.
+
+After you installed NNI, you could enter the corresponding folder and start the experiment using following commands:
+
+.. code-block:: bash
+
+   nnictl create --config ./config.yml
+
+2. Description of the example
+-----------------------------
+
+2.1 classification
+^^^^^^^^^^^^^^^^^^
+
+This example uses the dataset of digits, which is made up of 1797 8x8 images, and each image is a hand-written digit, the goal is to classify these images into 10 classes.
+
+In this example, we use SVC as the model, and choose some parameters of this model, including ``"C", "kernel", "degree", "gamma" and "coef0"``. For more information of these parameters, please `refer <https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html>`__.
+
+2.2 regression
+^^^^^^^^^^^^^^
+
+This example uses the Boston Housing Dataset, this dataset consists of price of houses in various places in Boston and the information such as Crime (CRIM), areas of non-retail business in the town (INDUS), the age of people who own the house (AGE) etc., to predict the house price of Boston.
+
+In this example, we tune different kinds of regression models including ``"LinearRegression", "SVR", "KNeighborsRegressor", "DecisionTreeRegressor"`` and some parameters like ``"svr_kernel", "knr_weights"``. You could get more details about these models from `here <https://scikit-learn.org/stable/supervised_learning.html#supervised-learning>`__.
+
+3. How to write scikit-learn code using NNI
+-------------------------------------------
+
+It is easy to use NNI in your scikit-learn code, there are only a few steps.
+
+
+* 
+  **step 1**
+
+  Prepare a search_space.json to storage your choose spaces.
+  For example, if you want to choose different models, you may try:
+
+  .. code-block:: json
+
+     {
+       "model_name":{"_type":"choice","_value":["LinearRegression", "SVR", "KNeighborsRegressor", "DecisionTreeRegressor"]}
+     }
+
+  If you want to choose different models and parameters, you could put them together in a search_space.json file.
+
+  .. code-block:: json
+
+     {
+       "model_name":{"_type":"choice","_value":["LinearRegression", "SVR", "KNeighborsRegressor", "DecisionTreeRegressor"]},
+       "svr_kernel": {"_type":"choice","_value":["linear", "poly", "rbf"]},
+       "knr_weights": {"_type":"choice","_value":["uniform", "distance"]}
+     }
+
+  Then you could read these values as a dict from your python code, please get into the step 2.
+
+* 
+  **step 2**
+
+  At the beginning of your python code, you should ``import nni`` to insure the packages works normally.
+
+  First, you should use ``nni.get_next_parameter()`` function to get your parameters given by NNI. Then you could use these parameters to update your code.
+  For example, if you define your search_space.json like following format:
+
+  .. code-block:: json
+
+     {
+       "C": {"_type":"uniform","_value":[0.1, 1]},
+       "kernel": {"_type":"choice","_value":["linear", "rbf", "poly", "sigmoid"]},
+       "degree": {"_type":"choice","_value":[1, 2, 3, 4]},
+       "gamma": {"_type":"uniform","_value":[0.01, 0.1]},
+       "coef0": {"_type":"uniform","_value":[0.01, 0.1]}
+     }
+
+  You may get a parameter dict like this:
+
+  .. code-block:: python
+
+     params = {
+           'C': 1.0,
+           'kernel': 'linear',
+           'degree': 3,
+           'gamma': 0.01,
+           'coef0': 0.01
+     }
+
+  Then you could use these variables to write your scikit-learn code.
+
+* 
+  **step 3**
+
+  After you finished your training, you could get your own score of the model, like your precision, recall or MSE etc. NNI needs your score to tuner algorithms and generate next group of parameters, please report the score back to NNI and start next trial job.
+
+  You just need to use ``nni.report_final_result(score)`` to communicate with NNI after you process your scikit-learn code. Or if you have multiple scores in the steps of training, you could also report them back to NNI using ``nni.report_intemediate_result(score)``. Note, you may not report intermediate result of your job, but you must report back your final result.
diff --git a/docs/en_US/TrialExample/SquadEvolutionExamples.rst b/docs/en_US/TrialExample/SquadEvolutionExamples.rst
new file mode 100644
index 0000000000..f128d5b2a6
--- /dev/null
+++ b/docs/en_US/TrialExample/SquadEvolutionExamples.rst
@@ -0,0 +1,333 @@
+Automatic Model Architecture Search for Reading Comprehension
+=============================================================
+
+This example shows us how to use Genetic Algorithm to find good model architectures for Reading Comprehension.
+
+1. Search Space
+---------------
+
+Since attention and RNN have been proven effective in Reading Comprehension, we conclude the search space as follow:
+
+
+#. IDENTITY (Effectively means keep training).
+#. INSERT-RNN-LAYER (Inserts a LSTM. Comparing the performance of GRU and LSTM in our experiment, we decided to use LSTM here.)
+#. REMOVE-RNN-LAYER
+#. INSERT-ATTENTION-LAYER(Inserts an attention layer.)
+#. REMOVE-ATTENTION-LAYER
+#. ADD-SKIP (Identity between random layers).
+#. REMOVE-SKIP (Removes random skip).
+
+
+.. image:: ../../../examples/trials/ga_squad/ga_squad.png
+   :target: ../../../examples/trials/ga_squad/ga_squad.png
+   :alt: 
+
+
+New version
+^^^^^^^^^^^
+
+Also we have another version which time cost is less and performance is better. We will release soon.
+
+2. How to run this example in local?
+------------------------------------
+
+2.1 Use downloading script to download data
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Execute the following command to download needed files
+using the downloading script:
+
+.. code-block:: bash
+
+   chmod +x ./download.sh
+   ./download.sh
+
+Or Download manually
+
+
+#. download "dev-v1.1.json" and "train-v1.1.json" in https://rajpurkar.github.io/SQuAD-explorer/
+
+.. code-block:: bash
+
+   wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json
+   wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json
+
+
+#. download "glove.840B.300d.txt" in https://nlp.stanford.edu/projects/glove/
+
+.. code-block:: bash
+
+   wget http://nlp.stanford.edu/data/glove.840B.300d.zip
+   unzip glove.840B.300d.zip
+
+2.2 Update configuration
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Modify ``nni/examples/trials/ga_squad/config.yml``\ , here is the default configuration:
+
+.. code-block:: yaml
+
+   authorName: default
+   experimentName: example_ga_squad
+   trialConcurrency: 1
+   maxExecDuration: 1h
+   maxTrialNum: 1
+   #choice: local, remote
+   trainingServicePlatform: local
+   #choice: true, false
+   useAnnotation: false
+   tuner:
+     codeDir: ~/nni/examples/tuners/ga_customer_tuner
+     classFileName: customer_tuner.py
+     className: CustomerTuner
+     classArgs:
+       optimize_mode: maximize
+   trial:
+     command: python3 trial.py
+     codeDir: ~/nni/examples/trials/ga_squad
+     gpuNum: 0
+
+In the "trial" part, if you want to use GPU to perform the architecture search, change ``gpuNum`` from ``0`` to ``1``. You need to increase the ``maxTrialNum`` and ``maxExecDuration``\ , according to how long you want to wait for the search result.
+
+2.3 submit this job
+^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: bash
+
+   nnictl create --config ~/nni/examples/trials/ga_squad/config.yml
+
+3 Run this example on OpenPAI
+-----------------------------
+
+Due to the memory limitation of upload, we only upload the source code and complete the data download and training on OpenPAI. This experiment requires sufficient memory that ``memoryMB >= 32G``\ , and the training may last for several hours.
+
+3.1 Update configuration
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Modify ``nni/examples/trials/ga_squad/config_pai.yml``\ , here is the default configuration:
+
+.. code-block:: yaml
+
+   authorName: default
+   experimentName: example_ga_squad
+   trialConcurrency: 1
+   maxExecDuration: 1h
+   maxTrialNum: 10
+   #choice: local, remote, pai
+   trainingServicePlatform: pai
+   #choice: true, false
+   useAnnotation: false
+   #Your nni_manager ip
+   nniManagerIp: 10.10.10.10
+   tuner:
+     codeDir: https://github.com/Microsoft/nni/tree/v1.9/examples/tuners/ga_customer_tuner
+     classFileName: customer_tuner.py
+     className: CustomerTuner
+     classArgs:
+       optimize_mode: maximize
+   trial:
+     command: chmod +x ./download.sh && ./download.sh && python3 trial.py
+     codeDir: .
+     gpuNum: 0
+     cpuNum: 1
+     memoryMB: 32869
+     #The docker image to run nni job on OpenPAI
+     image: msranni/nni:latest
+   paiConfig:
+     #The username to login OpenPAI
+     userName: username
+     #The password to login OpenPAI
+     passWord: password
+     #The host of restful server of OpenPAI
+     host: 10.10.10.10
+
+Please change the default value to your personal account and machine information. Including ``nniManagerIp``\ , ``userName``\ , ``passWord`` and ``host``.
+
+In the "trial" part, if you want to use GPU to perform the architecture search, change ``gpuNum`` from ``0`` to ``1``. You need to increase the ``maxTrialNum`` and ``maxExecDuration``\ , according to how long you want to wait for the search result.
+
+``trialConcurrency`` is the number of trials running concurrently, which is the number of GPUs you want to use, if you are setting ``gpuNum`` to 1.
+
+3.2 submit this job
+^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: bash
+
+   nnictl create --config ~/nni/examples/trials/ga_squad/config_pai.yml
+
+4. Technical details about the trial
+------------------------------------
+
+4.1 How does it works
+^^^^^^^^^^^^^^^^^^^^^
+
+The evolution-algorithm based architecture for question answering has two different parts just like any other examples: the trial and the tuner.
+
+4.2 The trial
+^^^^^^^^^^^^^
+
+The trial has a lot of different files, functions and classes. Here we will only give most of those files a brief introduction:
+
+
+* ``attention.py`` contains an implementation for attention mechanism in Tensorflow.
+* ``data.py`` contains functions for data preprocessing.
+* ``evaluate.py`` contains the evaluation script.
+* ``graph.py`` contains the definition of the computation graph.
+* ``rnn.py`` contains an implementation for GRU in Tensorflow.
+* ``train_model.py`` is a wrapper for the whole question answering model.
+
+Among those files, ``trial.py`` and ``graph_to_tf.py`` are special.
+
+``graph_to_tf.py`` has a function named as ``graph_to_network``\ , here is its skeleton code:
+
+.. code-block:: python
+
+   def graph_to_network(input1,
+                        input2,
+                        input1_lengths,
+                        input2_lengths,
+                        graph,
+                        dropout_rate,
+                        is_training,
+                        num_heads=1,
+                        rnn_units=256):
+       topology = graph.is_topology()
+       layers = dict()
+       layers_sequence_lengths = dict()
+       num_units = input1.get_shape().as_list()[-1]
+       layers[0] = input1*tf.sqrt(tf.cast(num_units, tf.float32)) + \
+           positional_encoding(input1, scale=False, zero_pad=False)
+       layers[1] = input2*tf.sqrt(tf.cast(num_units, tf.float32))
+       layers[0] = dropout(layers[0], dropout_rate, is_training)
+       layers[1] = dropout(layers[1], dropout_rate, is_training)
+       layers_sequence_lengths[0] = input1_lengths
+       layers_sequence_lengths[1] = input2_lengths
+       for _, topo_i in enumerate(topology):
+           if topo_i == '|':
+               continue
+           if graph.layers[topo_i].graph_type == LayerType.input.value:
+               # ......
+           elif graph.layers[topo_i].graph_type == LayerType.attention.value:
+               # ......
+           # More layers to handle
+
+As we can see, this function is actually a compiler, that converts the internal model DAG configuration (which will be introduced in the ``Model configuration format`` section) ``graph``\ , to a Tensorflow computation graph.
+
+.. code-block:: python
+
+   topology = graph.is_topology()
+
+performs topological sorting on the internal graph representation, and the code inside the loop:
+
+.. code-block:: python
+
+   for _, topo_i in enumerate(topology):
+
+performs actually conversion that maps each layer to a part in Tensorflow computation graph.
+
+4.3 The tuner
+^^^^^^^^^^^^^
+
+The tuner is much more simple than the trial. They actually share the same ``graph.py``. Besides, the tuner has a ``customer_tuner.py``\ , the most important class in which is ``CustomerTuner``\ :
+
+.. code-block:: python
+
+   class CustomerTuner(Tuner):
+       # ......
+
+       def generate_parameters(self, parameter_id):
+           """Returns a set of trial graph config, as a serializable object.
+           parameter_id : int
+           """
+           if len(self.population) <= 0:
+               logger.debug("the len of poplution lower than zero.")
+               raise Exception('The population is empty')
+           pos = -1
+           for i in range(len(self.population)):
+               if self.population[i].result == None:
+                   pos = i
+                   break
+           if pos != -1:
+               indiv = copy.deepcopy(self.population[pos])
+               self.population.pop(pos)
+               temp = json.loads(graph_dumps(indiv.config))
+           else:
+               random.shuffle(self.population)
+               if self.population[0].result > self.population[1].result:
+                   self.population[0] = self.population[1]
+               indiv = copy.deepcopy(self.population[0])
+               self.population.pop(1)
+               indiv.mutation()
+               graph = indiv.config
+               temp =  json.loads(graph_dumps(graph))
+
+       # ......
+
+As we can see, the overloaded method ``generate_parameters`` implements a pretty naive mutation algorithm. The code lines:
+
+.. code-block:: python
+
+               if self.population[0].result > self.population[1].result:
+                   self.population[0] = self.population[1]
+               indiv = copy.deepcopy(self.population[0])
+
+controls the mutation process. It will always take two random individuals in the population, only keeping and mutating the one with better result.
+
+4.4 Model configuration format
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Here is an example of the model configuration, which is passed from the tuner to the trial in the architecture search procedure.
+
+.. code-block:: json
+
+   {
+       "max_layer_num": 50,
+       "layers": [
+           {
+               "input_size": 0,
+               "type": 3,
+               "output_size": 1,
+               "input": [],
+               "size": "x",
+               "output": [4, 5],
+               "is_delete": false
+           },
+           {
+               "input_size": 0,
+               "type": 3,
+               "output_size": 1,
+               "input": [],
+               "size": "y",
+               "output": [4, 5],
+               "is_delete": false
+           },
+           {
+               "input_size": 1,
+               "type": 4,
+               "output_size": 0,
+               "input": [6],
+               "size": "x",
+               "output": [],
+               "is_delete": false
+           },
+           {
+               "input_size": 1,
+               "type": 4,
+               "output_size": 0,
+               "input": [5],
+               "size": "y",
+               "output": [],
+               "is_delete": false
+           },
+           {"Comment": "More layers will be here for actual graphs."}
+       ]
+   }
+
+Every model configuration will have a "layers" section, which is a JSON list of layer definitions. The definition of each layer is also a JSON object, where:
+
+
+* ``type`` is the type of the layer. 0, 1, 2, 3, 4 corresponds to attention, self-attention, RNN, input and output layer respectively.
+* ``size`` is the length of the output. "x", "y" correspond to document length / question length, respectively.
+* ``input_size`` is the number of inputs the layer has.
+* ``input`` is the indices of layers taken as input of this layer.
+* ``output`` is the indices of layers use this layer's output as their input.
+* ``is_delete`` means whether the layer is still available.
diff --git a/docs/en_US/TrialExample/Trials.rst b/docs/en_US/TrialExample/Trials.rst
new file mode 100644
index 0000000000..23148756f5
--- /dev/null
+++ b/docs/en_US/TrialExample/Trials.rst
@@ -0,0 +1,216 @@
+.. role:: raw-html(raw)
+   :format: html
+
+
+Write a Trial Run on NNI
+========================
+
+A **Trial** in NNI is an individual attempt at applying a configuration (e.g., a set of hyper-parameters) to a model.
+
+To define an NNI trial, you need to first define the set of parameters (i.e., search space) and then update the model. NNI provides two approaches for you to define a trial: `NNI API <#nni-api>`__ and `NNI Python annotation <#nni-annotation>`__. You could also refer to `here <#more-examples>`__ for more trial examples.
+
+:raw-html:`<a name="nni-api"></a>`
+
+NNI API
+-------
+
+Step 1 - Prepare a SearchSpace parameters file.
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+An example is shown below:
+
+.. code-block:: json
+
+   {
+       "dropout_rate":{"_type":"uniform","_value":[0.1,0.5]},
+       "conv_size":{"_type":"choice","_value":[2,3,5,7]},
+       "hidden_size":{"_type":"choice","_value":[124, 512, 1024]},
+       "learning_rate":{"_type":"uniform","_value":[0.0001, 0.1]}
+   }
+
+Refer to `SearchSpaceSpec.md <../Tutorial/SearchSpaceSpec.rst>`__ to learn more about search spaces. Tuner will generate configurations from this search space, that is, choosing a value for each hyperparameter from the range.
+
+Step 2 - Update model code
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* 
+  Import NNI
+
+    Include ``import nni`` in your trial code to use NNI APIs.
+
+* 
+  Get configuration from Tuner
+
+.. code-block:: python
+
+   RECEIVED_PARAMS = nni.get_next_parameter()
+
+``RECEIVED_PARAMS`` is an object, for example:
+
+``{"conv_size": 2, "hidden_size": 124, "learning_rate": 0.0307, "dropout_rate": 0.2029}``.
+
+
+* Report metric data periodically (optional)
+
+.. code-block:: python
+
+   nni.report_intermediate_result(metrics)
+
+``metrics`` can be any python object. If users use the NNI built-in tuner/assessor, ``metrics`` can only have two formats: 1) a number e.g., float, int, or 2) a dict object that has a key named ``default`` whose value is a number. These ``metrics`` are reported to `assessor <../Assessor/BuiltinAssessor.rst>`__. Often, ``metrics`` includes the periodically evaluated loss or accuracy.
+
+
+* Report performance of the configuration
+
+.. code-block:: python
+
+   nni.report_final_result(metrics)
+
+``metrics`` can also be any python object. If users use the NNI built-in tuner/assessor, ``metrics`` follows the same format rule as that in ``report_intermediate_result``\ , the number indicates the model's performance, for example, the model's accuracy, loss etc. These ``metrics`` are reported to `tuner <../Tuner/BuiltinTuner.rst>`__.
+
+Step 3 - Enable NNI API
+^^^^^^^^^^^^^^^^^^^^^^^
+
+To enable NNI API mode, you need to set useAnnotation to *false* and provide the path of the SearchSpace file was defined in step 1:
+
+.. code-block:: yaml
+
+   useAnnotation: false
+   searchSpacePath: /path/to/your/search_space.json
+
+You can refer to `here <../Tutorial/ExperimentConfig.rst>`__ for more information about how to set up experiment configurations.
+
+Please refer to `here </sdk_reference.html>`__ for more APIs (e.g., ``nni.get_sequence_id()``\ ) provided by NNI.
+
+:raw-html:`<a name="nni-annotation"></a>`
+
+NNI Python Annotation
+---------------------
+
+An alternative to writing a trial is to use NNI's syntax for python. NNI annotations are simple, similar to comments. You don't have to make structural changes to your existing code. With a few lines of NNI annotation, you will be able to:
+
+
+* annotate the variables you want to tune
+* specify the range  in which you want to tune the variables
+* annotate which variable you want to report as an intermediate result to ``assessor``
+* annotate which variable you want to report as the final result (e.g. model accuracy) to ``tuner``.
+
+Again, take MNIST as an example, it only requires 2 steps to write a trial with NNI Annotation.
+
+Step 1 - Update codes with annotations
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The following is a TensorFlow code snippet for NNI Annotation where the highlighted four lines are annotations that:
+
+
+#. tune batch_size and dropout_rate
+#. report test_acc every 100 steps
+#. lastly report test_acc as the final result.
+
+It's worth noting that, as these newly added codes are merely annotations, you can still run your code as usual in environments without NNI installed.
+
+.. code-block:: diff
+
+   with tf.Session() as sess:
+       sess.run(tf.global_variables_initializer())
+   +   """@nni.variable(nni.choice(50, 250, 500), name=batch_size)"""
+       batch_size = 128
+       for i in range(10000):
+           batch = mnist.train.next_batch(batch_size)
+   +       """@nni.variable(nni.choice(0.1, 0.5), name=dropout_rate)"""
+           dropout_rate = 0.5
+           mnist_network.train_step.run(feed_dict={mnist_network.images: batch[0],
+                                                   mnist_network.labels: batch[1],
+                                                   mnist_network.keep_prob: dropout_rate})
+           if i % 100 == 0:
+               test_acc = mnist_network.accuracy.eval(
+                   feed_dict={mnist_network.images: mnist.test.images,
+                               mnist_network.labels: mnist.test.labels,
+                               mnist_network.keep_prob: 1.0})
+   +           """@nni.report_intermediate_result(test_acc)"""
+
+       test_acc = mnist_network.accuracy.eval(
+           feed_dict={mnist_network.images: mnist.test.images,
+                       mnist_network.labels: mnist.test.labels,
+                       mnist_network.keep_prob: 1.0})
+   +   """@nni.report_final_result(test_acc)"""
+
+**NOTE**\ :
+
+
+* ``@nni.variable`` will affect its following line which should be an assignment statement whose left-hand side must be the same as the keyword ``name`` in the ``@nni.variable`` statement.
+* ``@nni.report_intermediate_result``\ /\ ``@nni.report_final_result`` will send the data to assessor/tuner at that line.
+
+For more information about annotation syntax and its usage, please refer to `Annotation <../Tutorial/AnnotationSpec.rst>`__.
+
+Step 2 - Enable NNI Annotation
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In the YAML configure file, you need to set *useAnnotation* to true to enable NNI annotation:
+
+.. code-block:: bash
+
+   useAnnotation: true
+
+Standalone mode for debugging
+-----------------------------
+
+NNI supports a standalone mode for trial code to run without starting an NNI experiment. This is for finding out bugs in trial code more conveniently. NNI annotation natively supports standalone mode, as the added NNI related lines are comments. For NNI trial APIs, the APIs have changed behaviors in standalone mode, some APIs return dummy values, and some APIs do not really report values. Please refer to the following table for the full list of these APIs.
+
+.. code-block:: python
+
+   # NOTE: please assign default values to the hyperparameters in your trial code
+   nni.get_next_parameter # return {}
+   nni.report_final_result # have log printed on stdout, but does not report
+   nni.report_intermediate_result # have log printed on stdout, but does not report
+   nni.get_experiment_id # return "STANDALONE"
+   nni.get_trial_id # return "STANDALONE"
+   nni.get_sequence_id # return 0
+
+You can try standalone mode with the :githublink:`mnist example <examples/trials/mnist-tfv1>`. Simply run ``python3 mnist.py`` under the code directory. The trial code should successfully run with the default hyperparameter values.
+
+For more information on debugging, please refer to `How to Debug <../Tutorial/HowToDebug.rst>`__
+
+Where are my trials?
+--------------------
+
+Local Mode
+^^^^^^^^^^
+
+In NNI, every trial has a dedicated directory for them to output their own data. In each trial, an environment variable called ``NNI_OUTPUT_DIR`` is exported. Under this directory, you can find each trial's code, data, and other logs. In addition, each trial's log (including stdout) will be re-directed to a file named ``trial.log`` under that directory.
+
+If NNI Annotation is used, the trial's converted code is in another temporary directory. You can check that in a file named ``run.sh`` under the directory indicated by ``NNI_OUTPUT_DIR``. The second line (i.e., the ``cd`` command) of this file will change directory to the actual directory where code is located. Below is an example of ``run.sh``\ :
+
+.. code-block:: bash
+
+   #!/bin/bash
+   cd /tmp/user_name/nni/annotation/tmpzj0h72x6 #This is the actual directory
+   export NNI_PLATFORM=local
+   export NNI_SYS_DIR=/home/user_name/nni-experiments/$experiment_id$/trials/$trial_id$
+   export NNI_TRIAL_JOB_ID=nrbb2
+   export NNI_OUTPUT_DIR=/home/user_name/nni-experiments/$eperiment_id$/trials/$trial_id$
+   export NNI_TRIAL_SEQ_ID=1
+   export MULTI_PHASE=false
+   export CUDA_VISIBLE_DEVICES=
+   eval python3 mnist.py 2>/home/user_name/nni-experiments/$experiment_id$/trials/$trial_id$/stderr
+   echo $? `date +%s%3N` >/home/user_name/nni-experiments/$experiment_id$/trials/$trial_id$/.nni/state
+
+Other Modes
+^^^^^^^^^^^
+
+When running trials on other platforms like remote machine or PAI, the environment variable ``NNI_OUTPUT_DIR`` only refers to the output directory of the trial, while the trial code and ``run.sh`` might not be there. However, the ``trial.log`` will be transmitted back to the local machine in the trial's directory, which defaults to ``~/nni-experiments/$experiment_id$/trials/$trial_id$/``
+
+For more information, please refer to `HowToDebug <../Tutorial/HowToDebug.rst>`__.
+
+:raw-html:`<a name="more-examples"></a>`
+
+More Trial Examples
+-------------------
+
+
+* `MNIST examples <MnistExamples.rst>`__
+* `Finding out best optimizer for Cifar10 classification <Cifar10Examples.rst>`__
+* `How to tune Scikit-learn on NNI <SklearnExamples.rst>`__
+* `Automatic Model Architecture Search for Reading Comprehension. <SquadEvolutionExamples.rst>`__
+* `Tuning GBDT on NNI <GbdtExample.rst>`__
+* `Tuning RocksDB on NNI <RocksdbExamples.rst>`__
diff --git a/docs/en_US/Tuner/BatchTuner.rst b/docs/en_US/Tuner/BatchTuner.rst
new file mode 100644
index 0000000000..f8a36695a3
--- /dev/null
+++ b/docs/en_US/Tuner/BatchTuner.rst
@@ -0,0 +1,9 @@
+Batch Tuner on NNI
+==================
+
+Batch Tuner
+-----------
+
+Batch tuner allows users to simply provide several configurations (i.e., choices of hyper-parameters) for their trial code. After finishing all the configurations, the experiment is done. Batch tuner only supports the type ``choice`` in the `search space spec <../Tutorial/SearchSpaceSpec.rst>`__.
+
+Suggested scenario: If the configurations you want to try have been decided, you can list them in the SearchSpace file (using ``choice``\ ) and run them using the batch tuner.
diff --git a/docs/en_US/Tuner/BohbAdvisor.rst b/docs/en_US/Tuner/BohbAdvisor.rst
new file mode 100644
index 0000000000..ae4caa45e4
--- /dev/null
+++ b/docs/en_US/Tuner/BohbAdvisor.rst
@@ -0,0 +1,137 @@
+BOHB Advisor on NNI
+===================
+
+1. Introduction
+---------------
+
+BOHB is a robust and efficient hyperparameter tuning algorithm mentioned in `this reference paper <https://arxiv.org/abs/1807.01774>`__. BO is an abbreviation for "Bayesian Optimization" and HB is an abbreviation for "Hyperband".
+
+BOHB relies on HB (Hyperband) to determine how many configurations to evaluate with which budget, but it **replaces the random selection of configurations at the beginning of each HB iteration by a model-based search (Bayesian Optimization)**. Once the desired number of configurations for the iteration is reached, the standard successive halving procedure is carried out using these configurations. We keep track of the performance of all function evaluations g(x, b) of configurations x on all budgets b to use as a basis for our models in later iterations.
+
+Below we divide the introduction of the BOHB process into two parts:
+
+HB (Hyperband)
+^^^^^^^^^^^^^^
+
+We follow Hyperband’s way of choosing the budgets and continue to use SuccessiveHalving. For more details, you can refer to the `Hyperband in NNI <HyperbandAdvisor.rst>`__ and the `reference paper for Hyperband <https://arxiv.org/abs/1603.06560>`__. This procedure is summarized by the pseudocode below.
+
+
+.. image:: ../../img/bohb_1.png
+   :target: ../../img/bohb_1.png
+   :alt: 
+
+
+BO (Bayesian Optimization)
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The BO part of BOHB closely resembles TPE with one major difference: we opted for a single multidimensional KDE compared to the hierarchy of one-dimensional KDEs used in TPE in order to better handle interaction effects in the input space.
+
+Tree Parzen Estimator(TPE): uses a KDE (kernel density estimator) to model the densities.
+
+
+.. image:: ../../img/bohb_2.png
+   :target: ../../img/bohb_2.png
+   :alt: 
+
+
+To fit useful KDEs, we require a minimum number of data points Nmin; this is set to d + 1 for our experiments, where d is the number of hyperparameters. To build a model as early as possible, we do not wait until Nb = \|Db\|, where the number of observations for budget b is large enough to satisfy q · Nb ≥ Nmin. Instead, after initializing with Nmin + 2 random configurations, we choose the
+
+
+.. image:: ../../img/bohb_3.png
+   :target: ../../img/bohb_3.png
+   :alt: 
+
+
+best and worst configurations, respectively, to model the two densities.
+
+Note that we also sample a constant fraction named **random fraction** of the configurations uniformly at random.
+
+2. Workflow
+-----------
+
+
+.. image:: ../../img/bohb_6.jpg
+   :target: ../../img/bohb_6.jpg
+   :alt: 
+
+
+This image shows the workflow of BOHB. Here we set max_budget = 9, min_budget = 1, eta = 3, others as default. In this case, s_max = 2, so we will continuously run the {s=2, s=1, s=0, s=2, s=1, s=0, ...} cycle. In each stage of SuccessiveHalving (the orange box), we will pick the top 1/eta configurations and run them again with more budget, repeating the SuccessiveHalving stage until the end of this iteration. At the same time, we collect the configurations, budgets and final metrics of each trial and use these to build a multidimensional KDEmodel with the key "budget".
+ Multidimensional KDE is used to guide the selection of configurations for the next iteration.
+
+The sampling procedure (using Multidimensional KDE to guide selection) is summarized by the pseudocode below.
+
+
+.. image:: ../../img/bohb_4.png
+   :target: ../../img/bohb_4.png
+   :alt: 
+
+
+3. Usage
+--------
+
+BOHB advisor requires the `ConfigSpace <https://github.com/automl/ConfigSpace>`__ package. ConfigSpace can be installed using the following command.
+
+.. code-block:: bash
+
+   nnictl package install --name=BOHB
+
+To use BOHB, you should add the following spec in your experiment's YAML config file:
+
+.. code-block:: yaml
+
+   advisor:
+     builtinAdvisorName: BOHB
+     classArgs:
+       optimize_mode: maximize
+       min_budget: 1
+       max_budget: 27
+       eta: 3
+       min_points_in_model: 7
+       top_n_percent: 15
+       num_samples: 64
+       random_fraction: 0.33
+       bandwidth_factor: 3.0
+       min_bandwidth: 0.001
+
+**classArgs Requirements:**
+
+
+* **optimize_mode** (*maximize or minimize, optional, default = maximize*\ ) - If 'maximize', tuners will try to maximize metrics. If 'minimize', tuner will try to minimize metrics.
+* **min_budget** (*int, optional, default = 1*\ ) - The smallest budget to assign to a trial job, (budget can be the number of mini-batches or epochs). Needs to be positive.
+* **max_budget** (*int, optional, default = 3*\ ) - The largest budget to assign to a trial job, (budget can be the number of mini-batches or epochs). Needs to be larger than min_budget.
+* **eta** (*int, optional, default = 3*\ ) - In each iteration, a complete run of sequential halving is executed. In it, after evaluating each configuration on the same subset size, only a fraction of 1/eta of them 'advances' to the next round. Must be greater or equal to 2.
+* **min_points_in_model**\ (*int, optional, default = None*\ ): number of observations to start building a KDE. Default 'None' means dim+1; when the number of completed trials in this budget is equal to or larger than ``max{dim+1, min_points_in_model}``\ , BOHB will start to build a KDE model of this budget then use said KDE model to guide configuration selection. Needs to be positive. (dim means the number of hyperparameters in search space)
+* **top_n_percent**\ (*int, optional, default = 15*\ ): percentage (between 1 and 99) of the observations which are considered good. Good points and bad points are used for building KDE models. For example, if you have 100 observed trials and top_n_percent is 15, then the top 15% of points will be used for building the good points models "l(x)". The remaining 85% of points will be used for building the bad point models "g(x)".
+* **num_samples**\ (*int, optional, default = 64*\ ): number of samples to optimize EI (default 64). In this case, we will sample "num_samples" points and compare the result of l(x)/g(x). Then we will return the one with the maximum l(x)/g(x) value as the next configuration if the optimize_mode is ``maximize``. Otherwise, we return the smallest one.
+* **random_fraction**\ (*float, optional, default = 0.33*\ ): fraction of purely random configurations that are sampled from the prior without the model.
+* **bandwidth_factor**\ (*float, optional, default = 3.0*\ ): to encourage diversity, the points proposed to optimize EI are sampled from a 'widened' KDE where the bandwidth is multiplied by this factor. We suggest using the default value if you are not familiar with KDE.
+* **min_bandwidth**\ (*float, optional, default = 0.001*\ ): to keep diversity, even when all (good) samples have the same value for one of the parameters, a minimum bandwidth (default: 1e-3) is used instead of zero. We suggest using the default value if you are not familiar with KDE.
+
+*Please note that the float type currently only supports decimal representations. You have to use 0.333 instead of 1/3 and 0.001 instead of 1e-3.*
+
+4. File Structure
+-----------------
+
+The advisor has a lot of different files, functions, and classes. Here, we will only give most of those files a brief introduction:
+
+
+* ``bohb_advisor.py`` Definition of BOHB, handles interaction with the dispatcher, including generating new trials and processing results. Also includes the implementation of the HB (Hyperband) part.
+* ``config_generator.py`` Includes the implementation of the BO (Bayesian Optimization) part. The function *get_config* can generate new configurations based on BO; the function *new_result* will update the model with the new result.
+
+5. Experiment
+-------------
+
+MNIST with BOHB
+^^^^^^^^^^^^^^^
+
+code implementation: :githublink:`examples/trials/mnist-advisor <examples/trials/>`
+
+We chose BOHB to build a CNN on the MNIST dataset. The following is our experimental final results:
+
+
+.. image:: ../../img/bohb_5.png
+   :target: ../../img/bohb_5.png
+   :alt: 
+
+
+More experimental results can be found in the `reference paper <https://arxiv.org/abs/1807.01774>`__. We can see that BOHB makes good use of previous results and has a balanced trade-off in exploration and exploitation.
diff --git a/docs/en_US/Tuner/BuiltinTuner.rst b/docs/en_US/Tuner/BuiltinTuner.rst
new file mode 100644
index 0000000000..e98922bdec
--- /dev/null
+++ b/docs/en_US/Tuner/BuiltinTuner.rst
@@ -0,0 +1,585 @@
+.. role:: raw-html(raw)
+   :format: html
+
+
+HyperParameter Tuning with NNI Built-in Tuners
+==============================================
+
+To fit a machine/deep learning model into different tasks/problems, hyperparameters always need to be tuned. Automating the process of hyperparaeter tuning always requires a good tuning algorithm. NNI has provided state-of-the-art tuning algorithms as part of our built-in tuners and makes them easy to use. Below is the brief summary of NNI's current built-in tuners:
+
+Note: Click the **Tuner's name** to get the Tuner's installation requirements, suggested scenario, and an example configuration. A link for a detailed description of each algorithm is located at the end of the suggested scenario for each tuner. Here is an `article <../CommunitySharings/HpoComparison.rst>`__ comparing different Tuners on several problems.
+
+Currently, we support the following algorithms:
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Tuner
+     - Brief Introduction of Algorithm
+   * - `TPE <#TPE>`__
+     - The Tree-structured Parzen Estimator (TPE) is a sequential model-based optimization (SMBO) approach. SMBO methods sequentially construct models to approximate the performance of hyperparameters based on historical measurements, and then subsequently choose new hyperparameters to test based on this model. `Reference Paper <https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf>`__
+   * - `Random Search <#Random>`__
+     - In Random Search for Hyper-Parameter Optimization show that Random Search might be surprisingly simple and effective. We suggest that we could use Random Search as the baseline when we have no knowledge about the prior distribution of hyper-parameters. `Reference Paper <http://www.jmlr.org/papers/volume13/bergstra12a/bergstra12a.pdf>`__
+   * - `Anneal <#Anneal>`__
+     - This simple annealing algorithm begins by sampling from the prior, but tends over time to sample from points closer and closer to the best ones observed. This algorithm is a simple variation on the random search that leverages smoothness in the response surface. The annealing rate is not adaptive.
+   * - `Naïve Evolution <#Evolution>`__
+     - Naïve Evolution comes from Large-Scale Evolution of Image Classifiers. It randomly initializes a population-based on search space. For each generation, it chooses better ones and does some mutation (e.g., change a hyperparameter, add/remove one layer) on them to get the next generation. Naïve Evolution requires many trials to work, but it's very simple and easy to expand new features. `Reference paper <https://arxiv.org/pdf/1703.01041.pdf>`__
+   * - `SMAC <#SMAC>`__
+     - SMAC is based on Sequential Model-Based Optimization (SMBO). It adapts the most prominent previously used model class (Gaussian stochastic process models) and introduces the model class of random forests to SMBO, in order to handle categorical parameters. The SMAC supported by NNI is a wrapper on the SMAC3 GitHub repo. Notice, SMAC needs to be installed by ``nnictl package`` command. `Reference Paper, <https://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf>`__ `GitHub Repo <https://github.com/automl/SMAC3>`__
+   * - `Batch tuner <#Batch>`__
+     - Batch tuner allows users to simply provide several configurations (i.e., choices of hyper-parameters) for their trial code. After finishing all the configurations, the experiment is done. Batch tuner only supports the type choice in search space spec.
+   * - `Grid Search <#GridSearch>`__
+     - Grid Search performs an exhaustive searching through a manually specified subset of the hyperparameter space defined in the searchspace file. Note that the only acceptable types of search space are choice, quniform, randint.
+   * - `Hyperband <#Hyperband>`__
+     - Hyperband tries to use limited resources to explore as many configurations as possible and returns the most promising ones as a final result. The basic idea is to generate many configurations and run them for a small number of trials. The half least-promising configurations are thrown out, the remaining are further trained along with a selection of new configurations. The size of these populations is sensitive to resource constraints (e.g. allotted search time). `Reference Paper <https://arxiv.org/pdf/1603.06560.pdf>`__
+   * - `Network Morphism <#NetworkMorphism>`__
+     - Network Morphism provides functions to automatically search for deep learning architectures. It generates child networks that inherit the knowledge from their parent network which it is a morph from. This includes changes in depth, width, and skip-connections. Next, it estimates the value of a child network using historic architecture and metric pairs. Then it selects the most promising one to train. `Reference Paper <https://arxiv.org/abs/1806.10282>`__
+   * - `Metis Tuner <#MetisTuner>`__
+     - Metis offers the following benefits when it comes to tuning parameters: While most tools only predict the optimal configuration, Metis gives you two outputs: (a) current prediction of optimal configuration, and (b) suggestion for the next trial. No more guesswork. While most tools assume training datasets do not have noisy data, Metis actually tells you if you need to re-sample a particular hyper-parameter. `Reference Paper <https://www.microsoft.com/en-us/research/publication/metis-robustly-tuning-tail-latencies-cloud-systems/>`__
+   * - `BOHB <#BOHB>`__
+     - BOHB is a follow-up work to Hyperband. It targets the weakness of Hyperband that new configurations are generated randomly without leveraging finished trials. For the name BOHB, HB means Hyperband, BO means Bayesian Optimization. BOHB leverages finished trials by building multiple TPE models, a proportion of new configurations are generated through these models. `Reference Paper <https://arxiv.org/abs/1807.01774>`__
+   * - `GP Tuner <#GPTuner>`__
+     - Gaussian Process Tuner is a sequential model-based optimization (SMBO) approach with Gaussian Process as the surrogate. `Reference Paper <https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf>`__\ , `Github Repo <https://github.com/fmfn/BayesianOptimization>`__
+   * - `PPO Tuner <#PPOTuner>`__
+     - PPO Tuner is a Reinforcement Learning tuner based on PPO algorithm. `Reference Paper <https://arxiv.org/abs/1707.06347>`__
+   * - `PBT Tuner <#PBTTuner>`__
+     - PBT Tuner is a simple asynchronous optimization algorithm which effectively utilizes a fixed computational budget to jointly optimize a population of models and their hyperparameters to maximize performance. `Reference Paper <https://arxiv.org/abs/1711.09846v1>`__
+
+
+Usage of Built-in Tuners
+------------------------
+
+Using a built-in tuner provided by the NNI SDK requires one to declare the  **builtinTunerName** and **classArgs** in the ``config.yml`` file. In this part, we will introduce each tuner along with information about usage and suggested scenarios, classArg requirements, and an example configuration.
+
+Note: Please follow the format when you write your ``config.yml`` file. Some built-in tuners need to be installed using ``nnictl package``\ , like SMAC.
+
+:raw-html:`<a name="TPE"></a>`
+
+TPE
+^^^
+
+..
+
+   Built-in Tuner Name: **TPE**
+
+
+**Suggested scenario**
+
+TPE, as a black-box optimization, can be used in various scenarios and shows good performance in general. Especially when you have limited computation resources and can only try a small number of trials. From a large amount of experiments, we found that TPE is far better than Random Search. `Detailed Description <./HyperoptTuner.rst>`__
+
+**classArgs Requirements:**
+
+
+* **optimize_mode** (*maximize or minimize, optional, default = maximize*\ ) - If 'maximize', the tuner will try to maximize metrics. If 'minimize', the tuner will try to minimize metrics.
+
+Note: We have optimized the parallelism of TPE for large-scale trial concurrency. For the principle of optimization or turn-on optimization, please refer to `TPE document <./HyperoptTuner.rst>`__.
+
+**Example Configuration:**
+
+.. code-block:: yaml
+
+   # config.yml
+   tuner:
+     builtinTunerName: TPE
+     classArgs:
+       optimize_mode: maximize
+
+:raw-html:`<br>`
+
+:raw-html:`<a name="Random"></a>`
+
+Random Search
+^^^^^^^^^^^^^
+
+..
+
+   Built-in Tuner Name: **Random**
+
+
+**Suggested scenario**
+
+Random search is suggested when each trial does not take very long (e.g., each trial can be completed very quickly, or early stopped by the assessor), and you have enough computational resources. It's also useful if you want to uniformly explore the search space. Random Search can be considered a baseline search algorithm. `Detailed Description <./HyperoptTuner.rst>`__
+
+**Example Configuration:**
+
+.. code-block:: yaml
+
+   # config.yml
+   tuner:
+     builtinTunerName: Random
+
+:raw-html:`<br>`
+
+:raw-html:`<a name="Anneal"></a>`
+
+Anneal
+^^^^^^
+
+..
+
+   Built-in Tuner Name: **Anneal**
+
+
+**Suggested scenario**
+
+Anneal is suggested when each trial does not take very long and you have enough computation resources (very similar to Random Search). It's also useful when the variables in the search space can be sample from some prior distribution. `Detailed Description <./HyperoptTuner.rst>`__
+
+**classArgs Requirements:**
+
+
+* **optimize_mode** (*maximize or minimize, optional, default = maximize*\ ) - If 'maximize', the tuner will try to maximize metrics. If 'minimize', the tuner will try to minimize metrics.
+
+**Example Configuration:**
+
+.. code-block:: yaml
+
+   # config.yml
+   tuner:
+     builtinTunerName: Anneal
+     classArgs:
+       optimize_mode: maximize
+
+:raw-html:`<br>`
+
+:raw-html:`<a name="Evolution"></a>`
+
+Naïve Evolution
+^^^^^^^^^^^^^^^
+
+..
+
+   Built-in Tuner Name: **Evolution**
+
+
+**Suggested scenario**
+
+Its computational resource requirements are relatively high. Specifically, it requires a large initial population to avoid falling into a local optimum. If your trial is short or leverages assessor, this tuner is a good choice. It is also suggested when your trial code supports weight transfer; that is, the trial could inherit the converged weights from its parent(s). This can greatly speed up the training process. `Detailed Description <./EvolutionTuner.rst>`__
+
+**classArgs Requirements:**
+
+
+* 
+  **optimize_mode** (*maximize or minimize, optional, default = maximize*\ ) - If 'maximize', the tuner will try to maximize metrics. If 'minimize', the tuner will try to minimize metrics.
+
+* 
+  **population_size** (*int value (should > 0), optional, default = 20*\ ) - the initial size of the population (trial num) in the evolution tuner. It's suggested that ``population_size`` be much larger than ``concurrency`` so users can get the most out of the algorithm (and at least ``concurrency``\ , or the tuner will fail on its first generation of parameters).
+
+**Example Configuration:**
+
+.. code-block:: yaml
+
+   # config.yml
+   tuner:
+     builtinTunerName: Evolution
+     classArgs:
+       optimize_mode: maximize
+       population_size: 100
+
+:raw-html:`<br>`
+
+:raw-html:`<a name="SMAC"></a>`
+
+SMAC
+^^^^
+
+..
+
+   Built-in Tuner Name: **SMAC**
+
+
+**Please note that SMAC doesn't support running on Windows currently. For the specific reason, please refer to this `GitHub issue <https://github.com/automl/SMAC3/issues/483>`__.**
+
+**Installation**
+
+SMAC needs to be installed by following command before the first usage. As a reminder, ``swig`` is required for SMAC: for Ubuntu ``swig`` can be installed with ``apt``.
+
+.. code-block:: bash
+
+   nnictl package install --name=SMAC
+
+**Suggested scenario**
+
+Similar to TPE, SMAC is also a black-box tuner that can be tried in various scenarios and is suggested when computational resources are limited. It is optimized for discrete hyperparameters, thus, it's suggested when most of your hyperparameters are discrete. `Detailed Description <./SmacTuner.rst>`__
+
+**classArgs Requirements:**
+
+
+* **optimize_mode** (*maximize or minimize, optional, default = maximize*\ ) - If 'maximize', the tuner will try to maximize metrics. If 'minimize', the tuner will try to minimize metrics.
+* **config_dedup** (*True or False, optional, default = False*\ ) - If True, the tuner will not generate a configuration that has been already generated. If False, a configuration may be generated twice, but it is rare for a relatively large search space.
+
+**Example Configuration:**
+
+.. code-block:: yaml
+
+   # config.yml
+   tuner:
+     builtinTunerName: SMAC
+     classArgs:
+       optimize_mode: maximize
+
+:raw-html:`<br>`
+
+:raw-html:`<a name="Batch"></a>`
+
+Batch Tuner
+^^^^^^^^^^^
+
+..
+
+   Built-in Tuner Name: BatchTuner
+
+
+**Suggested scenario**
+
+If the configurations you want to try have been decided beforehand, you can list them in search space file (using ``choice``\ ) and run them using batch tuner. `Detailed Description <./BatchTuner.rst>`__
+
+**Example Configuration:**
+
+.. code-block:: yaml
+
+   # config.yml
+   tuner:
+     builtinTunerName: BatchTuner
+
+:raw-html:`<br>`
+
+Note that the search space for BatchTuner should look like:
+
+.. code-block:: json
+
+   {
+       "combine_params":
+       {
+           "_type" : "choice",
+           "_value" : [{"optimizer": "Adam", "learning_rate": 0.00001},
+                       {"optimizer": "Adam", "learning_rate": 0.0001},
+                       {"optimizer": "Adam", "learning_rate": 0.001},
+                       {"optimizer": "SGD", "learning_rate": 0.01},
+                       {"optimizer": "SGD", "learning_rate": 0.005},
+                       {"optimizer": "SGD", "learning_rate": 0.0002}]
+       }
+   }
+
+The search space file should include the high-level key ``combine_params``. The type of params in the search space must be ``choice`` and the ``values`` must include all the combined params values.
+
+:raw-html:`<a name="GridSearch"></a>`
+
+Grid Search
+^^^^^^^^^^^
+
+..
+
+   Built-in Tuner Name: **Grid Search**
+
+
+**Suggested scenario**
+
+Note that the only acceptable types within the search space are ``choice``\ , ``quniform``\ , and ``randint``.
+
+This is suggested when the search space is small. It's suggested when it is feasible to exhaustively sweep the whole search space. `Detailed Description <./GridsearchTuner.rst>`__
+
+**Example Configuration:**
+
+.. code-block:: yaml
+
+   # config.yml
+   tuner:
+     builtinTunerName: GridSearch
+
+:raw-html:`<br>`
+
+:raw-html:`<a name="Hyperband"></a>`
+
+Hyperband
+^^^^^^^^^
+
+..
+
+   Built-in Advisor Name: **Hyperband**
+
+
+**Suggested scenario**
+
+This is suggested when you have limited computational resources but have a relatively large search space. It performs well in scenarios where intermediate results can indicate good or bad final results to some extent. For example, when models that are more accurate early on in training are also more accurate later on. `Detailed Description <./HyperbandAdvisor.rst>`__
+
+**classArgs Requirements:**
+
+
+* **optimize_mode** (*maximize or minimize, optional, default = maximize*\ ) - If 'maximize', the tuner will try to maximize metrics. If 'minimize', the tuner will try to minimize metrics.
+* **R** (*int, optional, default = 60*\ ) - the maximum budget given to a trial (could be the number of mini-batches or epochs). Each trial should use TRIAL_BUDGET to control how long they run.
+* **eta** (*int, optional, default = 3*\ ) - ``(eta-1)/eta`` is the proportion of discarded trials.
+* **exec_mode** (*serial or parallelism, optional, default = parallelism*\ ) - If 'parallelism', the tuner will try to use available resources to start new bucket immediately. If 'serial', the tuner will only start new bucket after the current bucket is done.
+
+**Example Configuration:**
+
+.. code-block:: yaml
+
+   # config.yml
+   advisor:
+     builtinAdvisorName: Hyperband
+     classArgs:
+       optimize_mode: maximize
+       R: 60
+       eta: 3
+
+:raw-html:`<br>`
+
+:raw-html:`<a name="NetworkMorphism"></a>`
+
+Network Morphism
+^^^^^^^^^^^^^^^^
+
+..
+
+   Built-in Tuner Name: **NetworkMorphism**
+
+
+**Installation**
+
+NetworkMorphism requires :githublink:`PyTorch <examples/trials/network_morphism/requirements.txt>`.
+
+**Suggested scenario**
+
+This is suggested when you want to apply deep learning methods to your task but you have no idea how to choose or design a network. You may modify this :githublink:`example <examples/trials/network_morphism/cifar10/cifar10_keras.py>` to fit your own dataset and your own data augmentation method. Also you can change the batch size, learning rate, or optimizer. Currently, this tuner only supports the computer vision domain. `Detailed Description <./NetworkmorphismTuner.rst>`__
+
+**classArgs Requirements:**
+
+
+* **optimize_mode** (*maximize or minimize, optional, default = maximize*\ ) - If 'maximize', the tuner will try to maximize metrics. If 'minimize', the tuner will try to minimize metrics.
+* **task** (*('cv'), optional, default = 'cv'*\ ) - The domain of the experiment. For now, this tuner only supports the computer vision (CV) domain.
+* **input_width** (*int, optional, default = 32*\ ) - input image width
+* **input_channel** (*int, optional, default = 3*\ ) - input image channel
+* **n_output_node** (*int, optional, default = 10*\ ) - number of classes
+
+**Example Configuration:**
+
+.. code-block:: yaml
+
+   # config.yml
+   tuner:
+     builtinTunerName: NetworkMorphism
+       classArgs:
+         optimize_mode: maximize
+         task: cv
+         input_width: 32
+         input_channel: 3
+         n_output_node: 10
+
+:raw-html:`<br>`
+
+:raw-html:`<a name="MetisTuner"></a>`
+
+Metis Tuner
+^^^^^^^^^^^
+
+..
+
+   Built-in Tuner Name: **MetisTuner**
+
+
+Note that the only acceptable types of search space types are ``quniform``\ , ``uniform``\ , ``randint``\ , and numerical ``choice``. Only numerical values are supported since the values will be used to evaluate the 'distance' between different points.
+
+**Suggested scenario**
+
+Similar to TPE and SMAC, Metis is a black-box tuner. If your system takes a long time to finish each trial, Metis is more favorable than other approaches such as random search. Furthermore, Metis provides guidance on subsequent trials. Here is an :githublink:`example <examples/trials/auto-gbdt/search_space_metis.json>` on the use of Metis. Users only need to send the final result, such as ``accuracy``\ , to the tuner by calling the NNI SDK. `Detailed Description <./MetisTuner.rst>`__
+
+**classArgs Requirements:**
+
+
+* **optimize_mode** (*'maximize' or 'minimize', optional, default = 'maximize'*\ ) - If 'maximize', the tuner will try to maximize metrics. If 'minimize', the tuner will try to minimize metrics.
+
+**Example Configuration:**
+
+.. code-block:: yaml
+
+   # config.yml
+   tuner:
+     builtinTunerName: MetisTuner
+     classArgs:
+       optimize_mode: maximize
+
+:raw-html:`<br>`
+
+:raw-html:`<a name="BOHB"></a>`
+
+BOHB Advisor
+^^^^^^^^^^^^
+
+..
+
+   Built-in Tuner Name: **BOHB**
+
+
+**Installation**
+
+BOHB advisor requires `ConfigSpace <https://github.com/automl/ConfigSpace>`__ package. ConfigSpace can be installed using the following command.
+
+.. code-block:: bash
+
+   nnictl package install --name=BOHB
+
+**Suggested scenario**
+
+Similar to Hyperband, BOHB is suggested when you have limited computational resources but have a relatively large search space. It performs well in scenarios where intermediate results can indicate good or bad final results to some extent. In this case, it may converge to a better configuration than Hyperband due to its usage of Bayesian optimization. `Detailed Description <./BohbAdvisor.rst>`__
+
+**classArgs Requirements:**
+
+
+* **optimize_mode** (*maximize or minimize, optional, default = maximize*\ ) - If 'maximize', tuners will try to maximize metrics. If 'minimize', tuner will try to minimize metrics.
+* **min_budget** (*int, optional, default = 1*\ ) - The smallest budget to assign to a trial job, (budget can be the number of mini-batches or epochs). Needs to be positive.
+* **max_budget** (*int, optional, default = 3*\ ) - The largest budget to assign to a trial job, (budget can be the number of mini-batches or epochs). Needs to be larger than min_budget.
+* **eta** (*int, optional, default = 3*\ ) - In each iteration, a complete run of sequential halving is executed. In it, after evaluating each configuration on the same subset size, only a fraction of 1/eta of them 'advances' to the next round. Must be greater or equal to 2.
+* **min_points_in_model**\ (*int, optional, default = None*\ ): number of observations to start building a KDE. Default 'None' means dim+1; when the number of completed trials in this budget is equal to or larger than ``max{dim+1, min_points_in_model}``\ , BOHB will start to build a KDE model of this budget then use said KDE model to guide configuration selection. Needs to be positive. (dim means the number of hyperparameters in search space)
+* **top_n_percent**\ (*int, optional, default = 15*\ ): percentage (between 1 and 99) of the observations which are considered good. Good points and bad points are used for building KDE models. For example, if you have 100 observed trials and top_n_percent is 15, then the top 15% of points will be used for building the good points models "l(x)". The remaining 85% of points will be used for building the bad point models "g(x)".
+* **num_samples**\ (*int, optional, default = 64*\ ): number of samples to optimize EI (default 64). In this case, we will sample "num_samples" points and compare the result of l(x)/g(x). Then we will return the one with the maximum l(x)/g(x) value as the next configuration if the optimize_mode is ``maximize``. Otherwise, we return the smallest one.
+* **random_fraction**\ (*float, optional, default = 0.33*\ ): fraction of purely random configurations that are sampled from the prior without the model.
+* **bandwidth_factor**\ (*float, optional, default = 3.0*\ ): to encourage diversity, the points proposed to optimize EI are sampled from a 'widened' KDE where the bandwidth is multiplied by this factor. We suggest using the default value if you are not familiar with KDE.
+* **min_bandwidth**\ (*float, optional, default = 0.001*\ ): to keep diversity, even when all (good) samples have the same value for one of the parameters, a minimum bandwidth (default: 1e-3) is used instead of zero. We suggest using the default value if you are not familiar with KDE.
+
+*Please note that the float type currently only supports decimal representations. You have to use 0.333 instead of 1/3 and 0.001 instead of 1e-3.*
+
+**Example Configuration:**
+
+.. code-block:: yaml
+
+   advisor:
+     builtinAdvisorName: BOHB
+     classArgs:
+       optimize_mode: maximize
+       min_budget: 1
+       max_budget: 27
+       eta: 3
+
+:raw-html:`<a name="GPTuner"></a>`
+
+GP Tuner
+^^^^^^^^
+
+..
+
+   Built-in Tuner Name: **GPTuner**
+
+
+Note that the only acceptable types within the search space are ``randint``\ , ``uniform``\ , ``quniform``\ ,  ``loguniform``\ , ``qloguniform``\ , and numerical ``choice``. Only numerical values are supported since the values will be used to evaluate the 'distance' between different points.
+
+**Suggested scenario**
+
+As a strategy in a Sequential Model-based Global Optimization (SMBO) algorithm, GP Tuner uses a proxy optimization problem (finding the maximum of the acquisition function) that, albeit still a hard problem, is cheaper (in the computational sense) to solve and common tools can be employed to solve it. Therefore, GP Tuner is most adequate for situations where the function to be optimized is very expensive to evaluate. GP can be used when computational resources are limited. However, GP Tuner has a computational cost that grows at *O(N^3)* due to the requirement of inverting the Gram matrix, so it's not suitable when lots of trials are needed. `Detailed Description <./GPTuner.rst>`__
+
+**classArgs Requirements:**
+
+
+* **optimize_mode** (*'maximize' or 'minimize', optional, default = 'maximize'*\ ) - If 'maximize', the tuner will try to maximize metrics. If 'minimize', the tuner will try to minimize metrics.
+* **utility** (*'ei', 'ucb' or 'poi', optional, default = 'ei'*\ ) - The utility function (acquisition function). 'ei', 'ucb', and 'poi' correspond to 'Expected Improvement', 'Upper Confidence Bound', and 'Probability of Improvement', respectively.
+* **kappa** (*float, optional, default = 5*\ ) - Used by the 'ucb' utility function. The bigger ``kappa`` is, the more exploratory the tuner will be.
+* **xi** (*float, optional, default = 0*\ ) - Used by the 'ei' and 'poi' utility functions. The bigger ``xi`` is, the more exploratory the tuner will be.
+* **nu** (*float, optional, default = 2.5*\ ) - Used to specify the Matern kernel. The smaller nu, the less smooth the approximated function is.
+* **alpha** (*float, optional, default = 1e-6*\ ) - Used to specify the Gaussian Process Regressor. Larger values correspond to an increased noise level in the observations.
+* **cold_start_num** (*int, optional, default = 10*\ ) - Number of random explorations to perform before the Gaussian Process. Random exploration can help by diversifying the exploration space.
+* **selection_num_warm_up** (*int, optional, default = 1e5*\ ) - Number of random points to evaluate when getting the point which maximizes the acquisition function.
+* **selection_num_starting_points** (*int, optional, default = 250*\ ) - Number of times to run L-BFGS-B from a random starting point after the warmup.
+
+**Example Configuration:**
+
+.. code-block:: yaml
+
+   # config.yml
+   tuner:
+     builtinTunerName: GPTuner
+     classArgs:
+       optimize_mode: maximize
+       utility: 'ei'
+       kappa: 5.0
+       xi: 0.0
+       nu: 2.5
+       alpha: 1e-6
+       cold_start_num: 10
+       selection_num_warm_up: 100000
+       selection_num_starting_points: 250
+
+:raw-html:`<a name="PPOTuner"></a>`
+
+PPO Tuner
+^^^^^^^^^
+
+..
+
+   Built-in Tuner Name: **PPOTuner**
+
+
+Note that the only acceptable types within the search space are ``layer_choice`` and ``input_choice``. For ``input_choice``\ , ``n_chosen`` can only be 0, 1, or [0, 1]. Note, the search space file for NAS is usually automatically generated through the command `nnictl ss_gen <../Tutorial/Nnictl.rst>`__.
+
+**Suggested scenario**
+
+PPOTuner is a Reinforcement Learning tuner based on the PPO algorithm. PPOTuner can be used when using the NNI NAS interface to do neural architecture search. In general, the Reinforcement Learning algorithm needs more computing resources, though the PPO algorithm is relatively more efficient than others. It's recommended to use this tuner when you have a large amount of computional resources available. You could try it on a very simple task, such as the :githublink:`mnist-nas <examples/trials/mnist-nas>` example. `See details <./PPOTuner.rst>`__
+
+**classArgs Requirements:**
+
+
+* **optimize_mode** (*'maximize' or 'minimize'*\ ) - If 'maximize', the tuner will try to maximize metrics. If 'minimize', the tuner will try to minimize metrics.
+* **trials_per_update** (*int, optional, default = 20*\ ) - The number of trials to be used for one update. It must be divisible by minibatch_size. ``trials_per_update`` is recommended to be an exact multiple of ``trialConcurrency`` for better concurrency of trials.
+* **epochs_per_update** (*int, optional, default = 4*\ ) - The number of epochs for one update.
+* **minibatch_size** (*int, optional, default = 4*\ ) - Mini-batch size (i.e., number of trials for a mini-batch) for the update. Note that trials_per_update must be divisible by minibatch_size.
+* **ent_coef** (*float, optional, default = 0.0*\ ) - Policy entropy coefficient in the optimization objective.
+* **lr** (*float, optional, default = 3e-4*\ ) - Learning rate of the model (lstm network); constant.
+* **vf_coef** (*float, optional, default = 0.5*\ ) - Value function loss coefficient in the optimization objective.
+* **max_grad_norm** (*float, optional, default = 0.5*\ ) - Gradient norm clipping coefficient.
+* **gamma** (*float, optional, default = 0.99*\ ) - Discounting factor.
+* **lam** (*float, optional, default = 0.95*\ ) - Advantage estimation discounting factor (lambda in the paper).
+* **cliprange** (*float, optional, default = 0.2*\ ) - Cliprange in the PPO algorithm, constant.
+
+**Example Configuration:**
+
+.. code-block:: yaml
+
+   # config.yml
+   tuner:
+     builtinTunerName: PPOTuner
+     classArgs:
+       optimize_mode: maximize
+
+:raw-html:`<a name="PBTTuner"></a>`
+
+PBT Tuner
+^^^^^^^^^
+
+..
+
+   Built-in Tuner Name: **PBTTuner**
+
+
+**Suggested scenario**
+
+Population Based Training (PBT) bridges and extends parallel search methods and sequential optimization methods. It requires relatively small computation resource, by inheriting weights from currently good-performing ones to explore better ones periodically. With PBTTuner, users finally get a trained model, rather than a configuration that could reproduce the trained model by training the model from scratch. This is because model weights are inherited periodically through the whole search process. PBT can also be seen as a training approach. If you don't need to get a specific configuration, but just expect a good model, PBTTuner is a good choice. `See details <./PBTTuner.rst>`__
+
+**classArgs requirements:**
+
+
+* **optimize_mode** (*'maximize' or 'minimize'*\ ) - If 'maximize', the tuner will target to maximize metrics. If 'minimize', the tuner will target to minimize metrics.
+* **all_checkpoint_dir** (*str, optional, default = None*\ ) - Directory for trials to load and save checkpoint, if not specified, the directory would be "~/nni/checkpoint/\ :raw-html:`<exp-id>`\ ". Note that if the experiment is not local mode, users should provide a path in a shared storage which can be accessed by all the trials.
+* **population_size** (*int, optional, default = 10*\ ) - Number of trials in a population. Each step has this number of trials. In our implementation, one step is running each trial by specific training epochs set by users.
+* **factors** (*tuple, optional, default = (1.2, 0.8)*\ ) - Factors for perturbation of hyperparameters.
+* **fraction** (*float, optional, default = 0.2*\ ) - Fraction for selecting bottom and top trials.
+
+**Usage example**
+
+.. code-block:: yaml
+
+   # config.yml
+   tuner:
+     builtinTunerName: PBTTuner
+     classArgs:
+       optimize_mode: maximize
+
+Note that, to use this tuner, your trial code should be modified accordingly, please refer to `the document of PBTTuner <./PBTTuner.rst>`__ for details.
+
+**Reference and Feedback**
+------------------------------
+
+
+* To `report a bug <https://github.com/microsoft/nni/issues/new?template=bug-report.rst>`__ for this feature in GitHub;
+* To `file a feature or improvement request <https://github.com/microsoft/nni/issues/new?template=enhancement.rst>`__ for this feature in GitHub;
+* To know more about :githublink:`Feature Engineering with NNI <docs/en_US/FeatureEngineering/Overview.rst>`\ ;
+* To know more about :githublink:`NAS with NNI <docs/en_US/NAS/Overview.rst>`\ ;
+* To know more about :githublink:`Model Compression with NNI <docs/en_US/Compression/Overview.rst>`\ ;
diff --git a/docs/en_US/Tuner/CustomizeAdvisor.rst b/docs/en_US/Tuner/CustomizeAdvisor.rst
new file mode 100644
index 0000000000..eaca535cc9
--- /dev/null
+++ b/docs/en_US/Tuner/CustomizeAdvisor.rst
@@ -0,0 +1,42 @@
+**How To** - Customize Your Own Advisor
+===========================================
+
+*Warning: API is subject to change in future releases.*
+
+Advisor targets the scenario that the automl algorithm wants the methods of both tuner and assessor. Advisor is similar to tuner on that it receives trial parameters request, final results, and generate trial parameters. Also, it is similar to assessor on that it receives intermediate results, trial's end state, and could send trial kill command. Note that, if you use Advisor, tuner and assessor are not allowed to be used at the same time.
+
+If a user want to implement a customized Advisor, she/he only needs to:
+
+**1. Define an Advisor inheriting from the MsgDispatcherBase class.** For example:
+
+.. code-block:: python
+
+   from nni.runtime.msg_dispatcher_base import MsgDispatcherBase
+
+   class CustomizedAdvisor(MsgDispatcherBase):
+       def __init__(self, ...):
+           ...
+
+**2. Implement the methods with prefix ``handle_`` except ``handle_request``**.. You might find `docs </sdk_reference.html#nni.runtime.msg_dispatcher_base.MsgDispatcherBase>`__ for ``MsgDispatcherBase`` helpful.
+
+**3. Configure your customized Advisor in experiment YAML config file.**
+
+Similar to tuner and assessor. NNI needs to locate your customized Advisor class and instantiate the class, so you need to specify the location of the customized Advisor class and pass literal values as parameters to the ``__init__`` constructor.
+
+.. code-block:: yaml
+
+   advisor:
+     codeDir: /home/abc/myadvisor
+     classFileName: my_customized_advisor.py
+     className: CustomizedAdvisor
+     # Any parameter need to pass to your advisor class __init__ constructor
+     # can be specified in this optional classArgs field, for example
+     classArgs:
+       arg1: value1
+
+**Note that** The working directory of your advisor is ``<home>/nni-experiments/<experiment_id>/log``\ , which can be retrieved with environment variable ``NNI_LOG_DIRECTORY``.
+
+Example
+-------
+
+Here we provide an :githublink:`example <examples/tuners/mnist_keras_customized_advisor>`.
diff --git a/docs/en_US/Tuner/CustomizeTuner.rst b/docs/en_US/Tuner/CustomizeTuner.rst
new file mode 100644
index 0000000000..53458263f4
--- /dev/null
+++ b/docs/en_US/Tuner/CustomizeTuner.rst
@@ -0,0 +1,128 @@
+Customize-Tuner
+===============
+
+Customize Tuner
+---------------
+
+NNI provides state-of-the-art tuning algorithm in builtin-tuners. NNI supports to build a tuner by yourself for tuning demand.
+
+If you want to implement your own tuning algorithm, you can implement a customized Tuner, there are three things to do:
+
+
+#. Inherit the base Tuner class
+#. Implement receive_trial_result, generate_parameter and update_search_space function
+#. Configure your customized tuner in experiment YAML config file
+
+Here is an example:
+
+**1. Inherit the base Tuner class**
+
+.. code-block:: python
+
+   from nni.tuner import Tuner
+
+   class CustomizedTuner(Tuner):
+       def __init__(self, ...):
+           ...
+
+**2. Implement receive_trial_result, generate_parameter and update_search_space function**
+
+.. code-block:: python
+
+   from nni.tuner import Tuner
+
+   class CustomizedTuner(Tuner):
+       def __init__(self, ...):
+           ...
+
+       def receive_trial_result(self, parameter_id, parameters, value, **kwargs):
+           '''
+           Receive trial's final result.
+           parameter_id: int
+           parameters: object created by 'generate_parameters()'
+           value: final metrics of the trial, including default metric
+           '''
+           # your code implements here.
+       ...
+
+       def generate_parameters(self, parameter_id, **kwargs):
+           '''
+           Returns a set of trial (hyper-)parameters, as a serializable object
+           parameter_id: int
+           '''
+           # your code implements here.
+           return your_parameters
+       ...
+
+       def update_search_space(self, search_space):
+           '''
+           Tuners are advised to support updating search space at run-time.
+           If a tuner can only set search space once before generating first hyper-parameters,
+           it should explicitly document this behaviour.
+           search_space: JSON object created by experiment owner
+           '''
+           # your code implements here.
+       ...
+
+``receive_trial_result`` will receive the ``parameter_id, parameters, value`` as parameters input. Also, Tuner will receive the ``value`` object are exactly same value that Trial send.
+
+The ``your_parameters`` return from ``generate_parameters`` function, will be package as json object by NNI SDK. NNI SDK will unpack json object so the Trial will receive the exact same ``your_parameters`` from Tuner.
+
+For example:
+If the you implement the ``generate_parameters`` like this:
+
+.. code-block:: python
+
+   def generate_parameters(self, parameter_id, **kwargs):
+       '''
+       Returns a set of trial (hyper-)parameters, as a serializable object
+       parameter_id: int
+       '''
+       # your code implements here.
+       return {"dropout": 0.3, "learning_rate": 0.4}
+
+It means your Tuner will always generate parameters ``{"dropout": 0.3, "learning_rate": 0.4}``. Then Trial will receive ``{"dropout": 0.3, "learning_rate": 0.4}`` by calling API ``nni.get_next_parameter()``. Once the trial ends with a result (normally some kind of metrics), it can send the result to Tuner by calling API ``nni.report_final_result()``\ , for example ``nni.report_final_result(0.93)``. Then your Tuner's ``receive_trial_result`` function will receied the result like：
+
+.. code-block:: python
+
+   parameter_id = 82347
+   parameters = {"dropout": 0.3, "learning_rate": 0.4}
+   value = 0.93
+
+**Note that** The working directory of your tuner is ``<home>/nni-experiments/<experiment_id>/log``\ , which can be retrieved with environment variable ``NNI_LOG_DIRECTORY``\ , therefore, if you want to access a file (e.g., ``data.txt``\ ) in the directory of your own tuner, you cannot use ``open('data.txt', 'r')``. Instead, you should use the following:
+
+.. code-block:: python
+
+   _pwd = os.path.dirname(__file__)
+   _fd = open(os.path.join(_pwd, 'data.txt'), 'r')
+
+This is because your tuner is not executed in the directory of your tuner (i.e., ``pwd`` is not the directory of your own tuner).
+
+**3. Configure your customized tuner in experiment YAML config file**
+
+NNI needs to locate your customized tuner class and instantiate the class, so you need to specify the location of the customized tuner class and pass literal values as parameters to the __init__ constructor.
+
+.. code-block:: yaml
+
+   tuner:
+     codeDir: /home/abc/mytuner
+     classFileName: my_customized_tuner.py
+     className: CustomizedTuner
+     # Any parameter need to pass to your tuner class __init__ constructor
+     # can be specified in this optional classArgs field, for example
+     classArgs:
+       arg1: value1
+
+More detail example you could see:
+
+..
+
+   * :githublink:`evolution-tuner <src/sdk/pynni/nni/evolution_tuner>`
+   * :githublink:`hyperopt-tuner <src/sdk/pynni/nni/hyperopt_tuner>`
+   * :githublink:`evolution-based-customized-tuner <examples/tuners/ga_customer_tuner>`
+
+
+Write a more advanced automl algorithm
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The methods above are usually enough to write a general tuner. However, users may also want more methods, for example, intermediate results, trials' state (e.g., the methods in assessor), in order to have a more powerful automl algorithm. Therefore, we have another concept called ``advisor`` which directly inherits from ``MsgDispatcherBase`` in :githublink:`src/sdk/pynni/nni/msg_dispatcher_base.py <src/sdk/pynni/nni/msg_dispatcher_base.py>`. Please refer to `here <CustomizeAdvisor.rst>`__ for how to write a customized advisor.
diff --git a/docs/en_US/Tuner/EvolutionTuner.rst b/docs/en_US/Tuner/EvolutionTuner.rst
new file mode 100644
index 0000000000..e6df5dbe1d
--- /dev/null
+++ b/docs/en_US/Tuner/EvolutionTuner.rst
@@ -0,0 +1,7 @@
+Naive Evolution Tuners on NNI
+=============================
+
+Naive Evolution
+---------------
+
+Naive Evolution comes from `Large-Scale Evolution of Image Classifiers <https://arxiv.org/pdf/1703.01041.pdf>`__. It randomly initializes a population based on the search space. For each generation, it chooses better ones and does some mutation (e.g., changes a hyperparameter, adds/removes one layer, etc.) on them to get the next generation. Naive Evolution requires many trials to works but it's very simple and it's easily expanded with new features.
diff --git a/docs/en_US/Tuner/GPTuner.rst b/docs/en_US/Tuner/GPTuner.rst
new file mode 100644
index 0000000000..e81f83259e
--- /dev/null
+++ b/docs/en_US/Tuner/GPTuner.rst
@@ -0,0 +1,13 @@
+GP Tuner on NNI
+===============
+
+GP Tuner
+--------
+
+Bayesian optimization works by constructing a posterior distribution of functions (a Gaussian Process) that best describes the function you want to optimize. As the number of observations grows, the posterior distribution improves, and the algorithm becomes more certain of which regions in parameter space are worth exploring and which are not.
+
+GP Tuner is designed to minimize/maximize the number of steps required to find a combination of parameters that are close to the optimal combination. To do so, this method uses a proxy optimization problem (finding the maximum of the acquisition function) that, albeit still a hard problem, is cheaper (in the computational sense) to solve, and it's amenable to common tools. Therefore, Bayesian Optimization is suggested for situations where sampling the function to be optimized is very expensive.
+
+Note that the only acceptable types within the search space are ``randint``\ , ``uniform``\ , ``quniform``\ ,  ``loguniform``\ , ``qloguniform``\ , and numerical ``choice``.
+
+This optimization approach is described in Section 3 of `Algorithms for Hyper-Parameter Optimization <https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf>`__.
diff --git a/docs/en_US/Tuner/GridsearchTuner.rst b/docs/en_US/Tuner/GridsearchTuner.rst
new file mode 100644
index 0000000000..b4d95e81c5
--- /dev/null
+++ b/docs/en_US/Tuner/GridsearchTuner.rst
@@ -0,0 +1,9 @@
+Grid Search on NNI
+==================
+
+Grid Search
+-----------
+
+Grid Search performs an exhaustive search through a manually specified subset of the hyperparameter space defined in the searchspace file. 
+
+Note that the only acceptable types within the search space are ``choice``\ , ``quniform``\ , and ``randint``.
diff --git a/docs/en_US/Tuner/HyperbandAdvisor.rst b/docs/en_US/Tuner/HyperbandAdvisor.rst
new file mode 100644
index 0000000000..94221b46bb
--- /dev/null
+++ b/docs/en_US/Tuner/HyperbandAdvisor.rst
@@ -0,0 +1,121 @@
+Hyperband on NNI
+================
+
+1. Introduction
+---------------
+
+`Hyperband <https://arxiv.org/pdf/1603.06560.pdf>`__ is a popular autoML algorithm. The basic idea of Hyperband is to create several buckets, each having ``n`` randomly generated hyperparameter configurations, each configuration using ``r`` resources (e.g., epoch number, batch number). After the ``n`` configurations are finished, it chooses the top ``n/eta`` configurations and runs them using increased ``r*eta`` resources. At last, it chooses the best configuration it has found so far.
+
+2. Implementation with full parallelism
+---------------------------------------
+
+First, this is an example of how to write an autoML algorithm based on MsgDispatcherBase, rather than Tuner and Assessor. Hyperband is implemented in this way because it integrates the functions of both Tuner and Assessor, thus, we call it Advisor.
+
+Second, this implementation fully leverages Hyperband's internal parallelism. Specifically, the next bucket is not started strictly after the current bucket. Instead, it starts when there are available resources. If you want to use full parallelism mode, set ``exec_mode`` with ``parallelism``. 
+
+Or if you want to set ``exec_mode`` with ``serial`` according to the original algorithm. In this mode, the next bucket will start strictly after the current bucket.
+
+``parallelism`` mode may lead to multiple unfinished buckets, and there is at most one unfinished bucket under ``serial`` mode. The advantage of ``parallelism`` mode is to make full use of resources, which may reduce the experiment duration multiple times. The following two pictures are the results of quick verification using `nas-bench-201 <../NAS/Benchmarks.rst>`__\ , picture above is in ``parallelism`` mode, picture below is in ``serial`` mode.
+
+
+.. image:: ../../img/hyperband_parallelism.png
+   :target: ../../img/hyperband_parallelism.png
+   :alt: parallelism mode
+
+
+
+.. image:: ../../img/hyperband_serial.png
+   :target: ../../img/hyperband_serial.png
+   :alt: serial mode
+
+
+If you want to reproduce these results, refer to the example under ``examples/trials/benchmarking/`` for details.
+
+3. Usage
+--------
+
+To use Hyperband, you should add the following spec in your experiment's YAML config file:
+
+.. code-block:: bash
+
+   advisor:
+     #choice: Hyperband
+     builtinAdvisorName: Hyperband
+     classArgs:
+       #R: the maximum trial budget
+       R: 100
+       #eta: proportion of discarded trials
+       eta: 3
+       #choice: maximize, minimize
+       optimize_mode: maximize
+       #choice: serial, parallelism
+       exec_mode: parallelism
+
+Note that once you use Advisor, you are not allowed to add a Tuner and Assessor spec in the config file. If you use Hyperband, among the hyperparameters (i.e., key-value pairs) received by a trial, there will be one more key called ``TRIAL_BUDGET`` defined by user. **By using this ``TRIAL_BUDGET``\ , the trial can control how long it runs**.
+
+For ``report_intermediate_result(metric)`` and ``report_final_result(metric)`` in your trial code, **\ ``metric`` should be either a number or a dict which has a key ``default`` with a number as its value**. This number is the one you want to maximize or minimize, for example, accuracy or loss.
+
+``R`` and ``eta`` are the parameters of Hyperband that you can change. ``R`` means the maximum trial budget that can be allocated to a configuration. Here, trial budget could mean the number of epochs or mini-batches. This ``TRIAL_BUDGET`` should be used by the trial to control how long it runs. Refer to the example under ``examples/trials/mnist-advisor/`` for details.
+
+``eta`` means ``n/eta`` configurations from ``n`` configurations will survive and rerun using more budgets.
+
+Here is a concrete example of ``R=81`` and ``eta=3``\ :
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * -
+     - s=4
+     - s=3
+     - s=2
+     - s=1
+     - s=0
+   * - i
+     - n r
+     - n r
+     - n r
+     - n r
+     - n r
+   * - 0
+     - 81 1
+     - 27 3
+     - 9 9
+     - 6 27
+     - 5 81
+   * - 1
+     - 27 3
+     - 9 9
+     - 3 27
+     - 2 81
+     -
+   * - 2
+     - 9 9
+     - 3 27
+     - 1 81
+     -
+     -
+   * - 3
+     - 3 27
+     - 1 81
+     -
+     -
+     -
+   * - 4
+     - 1 81
+     -
+     -
+     -
+     -
+
+
+``s`` means bucket, ``n`` means the number of configurations that are generated, the corresponding ``r`` means how many budgets these configurations run. ``i`` means round, for example, bucket 4 has 5 rounds, bucket 3 has 4 rounds.
+
+For information about writing trial code, please refer to the instructions under ``examples/trials/mnist-hyperband/``.
+
+4. Future improvements
+----------------------
+
+The current implementation of Hyperband can be further improved by supporting a simple early stop algorithm since it's possible that not all the configurations in the top ``n/eta`` perform well. Any unpromising configurations should be stopped early.
+
+In the current implementation, configurations are generated randomly which follows the design in the `paper <https://arxiv.org/pdf/1603.06560.pdf>`__. As an improvement, configurations could be generated more wisely by leveraging advanced algorithms.
diff --git a/docs/en_US/Tuner/HyperoptTuner.rst b/docs/en_US/Tuner/HyperoptTuner.rst
new file mode 100644
index 0000000000..f2b774e174
--- /dev/null
+++ b/docs/en_US/Tuner/HyperoptTuner.rst
@@ -0,0 +1,43 @@
+TPE, Random Search, Anneal Tuners on NNI
+========================================
+
+TPE
+---
+
+The Tree-structured Parzen Estimator (TPE) is a sequential model-based optimization (SMBO) approach. SMBO methods sequentially construct models to approximate the performance of hyperparameters based on historical measurements, and then subsequently choose new hyperparameters to test based on this model. The TPE approach models P(x|y) and P(y) where x represents hyperparameters and y the associated evaluation matric. P(x|y) is modeled by transforming the generative process of hyperparameters, replacing the distributions of the configuration prior with non-parametric densities. This optimization approach is described in detail in `Algorithms for Hyper-Parameter Optimization <https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf>`__. ​
+
+Parallel TPE optimization
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+TPE approaches were actually run asynchronously in order to make use of multiple compute nodes and to avoid wasting time waiting for trial evaluations to complete. The original algorithm design was optimized for sequential computation. If we were to use TPE with much concurrency, its performance will be bad. We have optimized this case using the Constant Liar algorithm. For these principles of optimization, please refer to our `research blog <../CommunitySharings/ParallelizingTpeSearch.rst>`__.
+
+Usage
+^^^^^
+
+ To use TPE, you should add the following spec in your experiment's YAML config file:
+
+.. code-block:: yaml
+
+   tuner:
+     builtinTunerName: TPE
+     classArgs:
+       optimize_mode: maximize
+       parallel_optimize: True
+       constant_liar_type: min
+
+**classArgs requirements:**
+
+
+* **optimize_mode** (*maximize or minimize, optional, default = maximize*\ ) - If 'maximize', tuners will try to maximize metrics. If 'minimize', tuner will try to minimize metrics.
+* **parallel_optimize** (*bool, optional, default = False*\ ) - If True, TPE will use the Constant Liar algorithm to optimize parallel hyperparameter tuning. Otherwise, TPE will not discriminate between sequential or parallel situations.
+* **constant_liar_type** (*min or max or mean, optional, default = min*\ ) - The type of constant liar to use, will logically be determined on the basis of the values taken by y at X. There are three possible values, min{Y}, max{Y}, and mean{Y}.
+
+Random Search
+-------------
+
+In `Random Search for Hyper-Parameter Optimization <http://www.jmlr.org/papers/volume13/bergstra12a/bergstra12a.pdf>`__ we show that Random Search might be surprisingly effective despite its simplicity. We suggest using Random Search as a baseline when no knowledge about the prior distribution of hyper-parameters is available.
+
+Anneal
+------
+
+This simple annealing algorithm begins by sampling from the prior but tends over time to sample from points closer and closer to the best ones observed. This algorithm is a simple variation on random search that leverages smoothness in the response surface. The annealing rate is not adaptive.
diff --git a/docs/en_US/Tuner/InstallCustomizedTuner.rst b/docs/en_US/Tuner/InstallCustomizedTuner.rst
new file mode 100644
index 0000000000..bb83acb3ae
--- /dev/null
+++ b/docs/en_US/Tuner/InstallCustomizedTuner.rst
@@ -0,0 +1,61 @@
+How to install customized tuner as a builtin tuner
+==================================================
+
+You can following below steps to install a customized tuner in ``nni/examples/tuners/customized_tuner`` as a builtin tuner.
+
+Prepare installation source and install package
+-----------------------------------------------
+
+There are 2 options to install this customized tuner:
+
+Option 1: install from directory
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Step 1: From ``nni/examples/tuners/customized_tuner`` directory, run:
+
+``python setup.py develop``
+
+This command will build the ``nni/examples/tuners/customized_tuner`` directory as a pip installation source.
+
+Step 2: Run command:
+
+``nnictl package install ./``
+
+Option 2: install from whl file
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Step 1: From ``nni/examples/tuners/customized_tuner`` directory, run:
+
+``python setup.py bdist_wheel``
+
+This command build a whl file which is a pip installation source.
+
+Step 2: Run command:
+
+``nnictl package install dist/demo_tuner-0.1-py3-none-any.whl``
+
+Check the installed package
+---------------------------
+
+Then run command ``nnictl package list``\ , you should be able to see that demotuner is installed:
+
+.. code-block:: bash
+
+   +-----------------+------------+-----------+--------=-------------+------------------------------------------+
+   |      Name       |    Type    | Installed |      Class Name      |               Module Name                |
+   +-----------------+------------+-----------+----------------------+------------------------------------------+
+   | demotuner       | tuners     | Yes       | DemoTuner            | demo_tuner                               |
+   +-----------------+------------+-----------+----------------------+------------------------------------------+
+
+Use the installed tuner in experiment
+-------------------------------------
+
+Now you can use the demotuner in experiment configuration file the same way as other builtin tuners:
+
+.. code-block:: yaml
+
+   tuner:
+     builtinTunerName: demotuner
+     classArgs:
+       #choice: maximize, minimize
+       optimize_mode: maximize
diff --git a/docs/en_US/Tuner/MetisTuner.rst b/docs/en_US/Tuner/MetisTuner.rst
new file mode 100644
index 0000000000..5a4e260298
--- /dev/null
+++ b/docs/en_US/Tuner/MetisTuner.rst
@@ -0,0 +1,24 @@
+Metis Tuner on NNI
+==================
+
+Metis Tuner
+-----------
+
+`Metis <https://www.microsoft.com/en-us/research/publication/metis-robustly-tuning-tail-latencies-cloud-systems/>`__ offers several benefits over other tuning algorithms. While most tools only predict the optimal configuration, Metis gives you two outputs, a prediction for the optimal configuration and a suggestion for the next trial. No more guess work!
+
+While most tools assume training datasets do not have noisy data, Metis actually tells you if you need to resample a particular hyper-parameter.
+
+While most tools have problems of being exploitation-heavy, Metis' search strategy balances exploration, exploitation, and (optional) resampling.
+
+Metis belongs to the class of sequential model-based optimization (SMBO) algorithms and it is based on the Bayesian Optimization framework. To model the parameter-vs-performance space, Metis uses both a Gaussian Process and GMM. Since each trial can impose a high time cost, Metis heavily trades inference computations with naive trials. At each iteration, Metis does two tasks:
+
+
+* 
+  It finds the global optimal point in the Gaussian Process space. This point represents the optimal configuration.
+
+* 
+  It identifies the next hyper-parameter candidate. This is achieved by inferring the potential information gain of exploration, exploitation, and resampling.
+
+Note that the only acceptable types within the search space are ``quniform``\ , ``uniform``\ , ``randint``\ , and numerical ``choice``.
+
+More details can be found in our `paper <https://www.microsoft.com/en-us/research/publication/metis-robustly-tuning-tail-latencies-cloud-systems/>`__.
diff --git a/docs/en_US/Tuner/NetworkmorphismTuner.rst b/docs/en_US/Tuner/NetworkmorphismTuner.rst
new file mode 100644
index 0000000000..16ff54524e
--- /dev/null
+++ b/docs/en_US/Tuner/NetworkmorphismTuner.rst
@@ -0,0 +1,265 @@
+Network Morphism Tuner on NNI
+=============================
+
+1. Introduction
+---------------
+
+`Autokeras <https://arxiv.org/abs/1806.10282>`__ is a popular autoML tool using Network Morphism. The basic idea of Autokeras is to use Bayesian Regression to estimate the metric of the Neural Network Architecture. Each time, it generates several child networks from father networks. Then it uses a naïve Bayesian regression to estimate its metric value from the history of trained results of network and metric value pairs. Next, it chooses the child which has the best, estimated performance and adds it to the training queue. Inspired by the work of Autokeras and referring to its `code <https://github.com/jhfjhfj1/autokeras>`__\ , we implemented our Network Morphism method on the NNI platform.
+
+If you want to know more about network morphism trial usage, please see the :githublink:`Readme.md <examples/trials/network_morphism/README.rst>`.
+
+2. Usage
+--------
+
+To use Network Morphism, you should modify the following spec in your ``config.yml`` file:
+
+.. code-block:: yaml
+
+   tuner:
+     #choice: NetworkMorphism
+     builtinTunerName: NetworkMorphism
+     classArgs:
+       #choice: maximize, minimize
+       optimize_mode: maximize
+       #for now, this tuner only supports cv domain
+       task: cv
+       #modify to fit your input image width
+       input_width: 32
+       #modify to fit your input image channel
+       input_channel: 3
+       #modify to fit your number of classes
+       n_output_node: 10
+
+In the training procedure, it generates a JSON file which represents a Network Graph. Users can call the "json_to_graph()" function to build a PyTorch or Keras model from this JSON file.
+
+.. code-block:: python
+
+   import nni
+   from nni.networkmorphism_tuner.graph import json_to_graph
+
+   def build_graph_from_json(ir_model_json):
+       """build a pytorch model from json representation
+       """
+       graph = json_to_graph(ir_model_json)
+       model = graph.produce_torch_model()
+       return model
+
+   # trial get next parameter from network morphism tuner
+   RCV_CONFIG = nni.get_next_parameter()
+   # call the function to build pytorch model or keras model
+   net = build_graph_from_json(RCV_CONFIG)
+
+   # training procedure
+   # ....
+
+   # report the final accuracy to NNI
+   nni.report_final_result(best_acc)
+
+If you want to save and load the **best model**\ , the following methods are recommended.
+
+.. code-block:: python
+
+   # 1. Use NNI API
+   ## You can get the best model ID from WebUI
+   ## or `nni-experiments/experiment_id/log/model_path/best_model.txt'
+
+   ## read the json string from model file and load it with NNI API
+   with open("best-model.json") as json_file:
+       json_of_model = json_file.read()
+   model = build_graph_from_json(json_of_model)
+
+   # 2. Use Framework API (Related to Framework)
+   ## 2.1 Keras API
+
+   ## Save the model with Keras API in the trial code
+   ## it's better to save model with id in nni local mode
+   model_id = nni.get_sequence_id()
+   ## serialize model to JSON
+   model_json = model.to_json()
+   with open("model-{}.json".format(model_id), "w") as json_file:
+       json_file.write(model_json)
+   ## serialize weights to HDF5
+   model.save_weights("model-{}.h5".format(model_id))
+
+   ## Load the model with Keras API if you want to reuse the model
+   ## load json and create model
+   model_id = "" # id of the model you want to reuse
+   with open('model-{}.json'.format(model_id), 'r') as json_file:
+       loaded_model_json = json_file.read()
+   loaded_model = model_from_json(loaded_model_json)
+   ## load weights into new model
+   loaded_model.load_weights("model-{}.h5".format(model_id))
+
+   ## 2.2 PyTorch API
+
+   ## Save the model with PyTorch API in the trial code
+   model_id = nni.get_sequence_id()
+   torch.save(model, "model-{}.pt".format(model_id))
+
+   ## Load the model with PyTorch API if you want to reuse the model
+   model_id = "" # id of the model you want to reuse
+   loaded_model = torch.load("model-{}.pt".format(model_id))
+
+3. File Structure
+-----------------
+
+The tuner has a lot of different files, functions, and classes. Here, we will give most of those files only a brief introduction:
+
+
+* 
+  ``networkmorphism_tuner.py`` is a tuner which uses network morphism techniques.
+
+* 
+  ``bayesian.py`` is a Bayesian method to estimate the metric of unseen model based on the models we have already searched.
+
+* ``graph.py``  is the meta graph data structure. The class Graph represents the neural architecture graph of a model.
+
+  * Graph extracts the neural architecture graph from a model.
+  * Each node in the graph is an intermediate tensor between layers.
+  * Each layer is an edge in the graph.
+  * Notably, multiple edges may refer to the same layer.
+
+* 
+  ``graph_transformer.py`` includes some graph transformers which widen, deepen, or add skip-connections to the graph.
+
+* 
+  ``layers.py``  includes all the layers we use in our model.
+
+* ``layer_transformer.py`` includes some layer transformers which widen, deepen, or add skip-connections to the layer.
+* ``nn.py`` includes the class which generates the initial network.
+* ``metric.py`` some metric classes including Accuracy and MSE.
+* ``utils.py`` is the example search network architectures for the ``cifar10`` dataset, using Keras.
+
+4. The Network Representation Json Example
+------------------------------------------
+
+Here is an example of the intermediate representation JSON file we defined, which is passed from the tuner to the trial in the architecture search procedure. Users can call the "json_to_graph()" function in the trial code to build a PyTorch or Keras model from this JSON file.
+
+.. code-block:: json
+
+   {
+        "input_shape": [32, 32, 3],
+        "weighted": false,
+        "operation_history": [],
+        "layer_id_to_input_node_ids": {"0": [0],"1": [1],"2": [2],"3": [3],"4": [4],"5": [5],"6": [6],"7": [7],"8": [8],"9": [9],"10": [10],"11": [11],"12": [12],"13": [13],"14": [14],"15": [15],"16": [16]
+        },
+        "layer_id_to_output_node_ids": {"0": [1],"1": [2],"2": [3],"3": [4],"4": [5],"5": [6],"6": [7],"7": [8],"8": [9],"9": [10],"10": [11],"11": [12],"12": [13],"13": [14],"14": [15],"15": [16],"16": [17]
+        },
+        "adj_list": {
+            "0": [[1, 0]],
+            "1": [[2, 1]],
+            "2": [[3, 2]],
+            "3": [[4, 3]],
+            "4": [[5, 4]],
+            "5": [[6, 5]],
+            "6": [[7, 6]],
+            "7": [[8, 7]],
+            "8": [[9, 8]],
+            "9": [[10, 9]],
+            "10": [[11, 10]],
+            "11": [[12, 11]],
+            "12": [[13, 12]],
+            "13": [[14, 13]],
+            "14": [[15, 14]],
+            "15": [[16, 15]],
+            "16": [[17, 16]],
+            "17": []
+        },
+        "reverse_adj_list": {
+            "0": [],
+            "1": [[0, 0]],
+            "2": [[1, 1]],
+            "3": [[2, 2]],
+            "4": [[3, 3]],
+            "5": [[4, 4]],
+            "6": [[5, 5]],
+            "7": [[6, 6]],
+            "8": [[7, 7]],
+            "9": [[8, 8]],
+            "10": [[9, 9]],
+            "11": [[10, 10]],
+            "12": [[11, 11]],
+            "13": [[12, 12]],
+            "14": [[13, 13]],
+            "15": [[14, 14]],
+            "16": [[15, 15]],
+            "17": [[16, 16]]
+        },
+        "node_list": [
+            [0, [32, 32, 3]],
+            [1, [32, 32, 3]],
+            [2, [32, 32, 64]],
+            [3, [32, 32, 64]],
+            [4, [16, 16, 64]],
+            [5, [16, 16, 64]],
+            [6, [16, 16, 64]],
+            [7, [16, 16, 64]],
+            [8, [8, 8, 64]],
+            [9, [8, 8, 64]],
+            [10, [8, 8, 64]],
+            [11, [8, 8, 64]],
+            [12, [4, 4, 64]],
+            [13, [64]],
+            [14, [64]],
+            [15, [64]],
+            [16, [64]],
+            [17, [10]]
+        ],
+        "layer_list": [
+            [0, ["StubReLU", 0, 1]],
+            [1, ["StubConv2d", 1, 2, 3, 64, 3]],
+            [2, ["StubBatchNormalization2d", 2, 3, 64]],
+            [3, ["StubPooling2d", 3, 4, 2, 2, 0]],
+            [4, ["StubReLU", 4, 5]],
+            [5, ["StubConv2d", 5, 6, 64, 64, 3]],
+            [6, ["StubBatchNormalization2d", 6, 7, 64]],
+            [7, ["StubPooling2d", 7, 8, 2, 2, 0]],
+            [8, ["StubReLU", 8, 9]],
+            [9, ["StubConv2d", 9, 10, 64, 64, 3]],
+            [10, ["StubBatchNormalization2d", 10, 11, 64]],
+            [11, ["StubPooling2d", 11, 12, 2, 2, 0]],
+            [12, ["StubGlobalPooling2d", 12, 13]],
+            [13, ["StubDropout2d", 13, 14, 0.25]],
+            [14, ["StubDense", 14, 15, 64, 64]],
+            [15, ["StubReLU", 15, 16]],
+            [16, ["StubDense", 16, 17, 64, 10]]
+        ]
+    }
+
+You can consider the model to be a `directed acyclic graph <https://en.wikipedia.org/wiki/Directed_acyclic_graph>`__. The definition of each model is a JSON object where:
+
+
+* ``input_shape`` is a list of integers which do not include the batch axis.
+* ``weighted`` means whether the weights and biases in the neural network should be included in the graph.
+* ``operation_history`` is a list saving all the network morphism operations.
+* ``layer_id_to_input_node_ids`` is a dictionary mapping from layer identifiers to their input nodes identifiers.
+* ``layer_id_to_output_node_ids`` is a dictionary mapping from layer identifiers to their output nodes identifiers
+* ``adj_list`` is a two-dimensional list; the adjacency list of the graph. The first dimension is identified by tensor identifiers. In each edge list, the elements are two-element tuples of (tensor identifier, layer identifier).
+* ``reverse_adj_list`` is a reverse adjacent list in the same format as adj_list.
+* ``node_list`` is a list of integers. The indices of the list are the identifiers.
+* 
+  ``layer_list`` is a list of stub layers. The indices of the list are the identifiers.
+
+
+  * 
+    For ``StubConv (StubConv1d, StubConv2d, StubConv3d)``\ , the numbering follows the format: its node input id (or id list), node output id, input_channel, filters, kernel_size, stride, and padding.
+
+  * 
+    For ``StubDense``\ , the numbering follows the format: its node input id (or id list), node output id, input_units, and units.
+
+  * 
+    For ``StubBatchNormalization (StubBatchNormalization1d, StubBatchNormalization2d, StubBatchNormalization3d)``\ ,  the numbering follows the format: its node input id (or id list), node output id, and features numbers.
+
+  * 
+    For ``StubDropout(StubDropout1d, StubDropout2d, StubDropout3d)``\ , the numbering follows the format: its node input id (or id list), node output id, and dropout rate.
+
+  * 
+    For ``StubPooling (StubPooling1d, StubPooling2d, StubPooling3d)``\ , the numbering follows the format: its node input id (or id list), node output id, kernel_size, stride, and padding.
+
+  * 
+    For else layers, the numbering follows the format: its node input id (or id list) and node output id.
+
+5. TODO
+-------
+
+Next step, we will change the API from s fixed network generator to a network generator with more available operators. We will use ONNX instead of JSON later as the intermediate representation spec in the future.
diff --git a/docs/en_US/Tuner/PBTTuner.rst b/docs/en_US/Tuner/PBTTuner.rst
new file mode 100644
index 0000000000..18f934293a
--- /dev/null
+++ b/docs/en_US/Tuner/PBTTuner.rst
@@ -0,0 +1,56 @@
+PBT Tuner on NNI
+================
+
+PBTTuner
+--------
+
+Population Based Training (PBT) comes from `Population Based Training of Neural Networks <https://arxiv.org/abs/1711.09846v1>`__. It's a simple asynchronous optimization algorithm which effectively utilizes a fixed computational budget to jointly optimize a population of models and their hyperparameters to maximize performance. Importantly, PBT discovers a schedule of hyperparameter settings rather than following the generally sub-optimal strategy of trying to find a single fixed set to use for the whole course of training. 
+
+
+.. image:: ../../img/pbt.jpg
+   :target: ../../img/pbt.jpg
+   :alt: 
+
+
+PBTTuner initializes a population with several trials (i.e., ``population_size``\ ). There are four steps in the above figure, each trial only runs by one step. How long is one step is controlled by trial code, e.g., one epoch. When a trial starts, it loads a checkpoint specified by PBTTuner and continues to run one step, then saves checkpoint to a directory specified by PBTTuner and exits. The trials in a population run steps synchronously, that is, after all the trials finish the ``i``\ -th step, the ``(i+1)``\ -th step can be started. Exploitation and exploration of PBT are executed between two consecutive steps.
+
+Provide checkpoint directory
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Since some trials need to load other trial's checkpoint, users should provide a directory (i.e., ``all_checkpoint_dir``\ ) which is accessible by every trial. It is easy for local mode, users could directly use the default directory or specify any directory on the local machine. For other training services, users should follow `the document of those training services <../TrainingService/Overview.rst>`__ to provide a directory in a shared storage, such as NFS, Azure storage.
+
+Modify your trial code
+^^^^^^^^^^^^^^^^^^^^^^
+
+Before running a step, a trial needs to load a checkpoint, the checkpoint directory is specified in hyper-parameter configuration generated by PBTTuner, i.e., ``params['load_checkpoint_dir']``. Similarly, the directory for saving checkpoint is also included in the configuration, i.e., ``params['save_checkpoint_dir']``. Here, ``all_checkpoint_dir`` is base folder of ``load_checkpoint_dir`` and ``save_checkpoint_dir`` whose format is ``all_checkpoint_dir/<population-id>/<step>``.
+
+.. code-block:: python
+
+   params = nni.get_next_parameter()
+   # the path of the checkpoint to load
+   load_path = os.path.join(params['load_checkpoint_dir'], 'model.pth')
+   # load checkpoint from `load_path`
+   ...
+   # run one step
+   ...
+   # the path for saving a checkpoint
+   save_path = os.path.join(params['save_checkpoint_dir'], 'model.pth')
+   # save checkpoint to `save_path`
+   ...
+
+The complete example code can be found :githublink:`here <examples/trials/mnist-pbt-tuner-pytorch>`.
+
+Experiment config
+^^^^^^^^^^^^^^^^^
+
+Below is an exmaple of PBTTuner configuration in experiment config file. **Note that Assessor is not allowed if PBTTuner is used.**
+
+.. code-block:: yaml
+
+   # config.yml
+   tuner:
+     builtinTunerName: PBTTuner
+     classArgs:
+       optimize_mode: maximize
+       all_checkpoint_dir: /the/path/to/store/checkpoints
+       population_size: 10
diff --git a/docs/en_US/Tuner/PPOTuner.rst b/docs/en_US/Tuner/PPOTuner.rst
new file mode 100644
index 0000000000..5ad6414c92
--- /dev/null
+++ b/docs/en_US/Tuner/PPOTuner.rst
@@ -0,0 +1,34 @@
+PPO Tuner on NNI
+================
+
+PPOTuner
+--------
+
+This is a tuner geared for NNI's Neural Architecture Search (NAS) interface. It uses the `ppo algorithm <https://arxiv.org/abs/1707.06347>`__. The implementation inherits the main logic of the ppo2 OpenAI implementation `here <https://github.com/openai/baselines/tree/master/baselines/ppo2>`__ and is adapted for the NAS scenario.
+
+We had successfully tuned the mnist-nas example and has the following result:
+**NOTE: we are refactoring this example to the latest NAS interface, will publish the example codes after the refactor.**
+
+
+.. image:: ../../img/ppo_mnist.png
+   :target: ../../img/ppo_mnist.png
+   :alt: 
+
+
+We also tune :githublink:`the macro search space for image classification in the enas paper <examples/trials/nas_cifar10>` (with a limited epoch number for each trial, i.e., 8 epochs), which is implemented using the NAS interface and tuned with PPOTuner. Here is Figure 7 from the `enas paper <https://arxiv.org/pdf/1802.03268.pdf>`__ to show what the search space looks like
+
+
+.. image:: ../../img/enas_search_space.png
+   :target: ../../img/enas_search_space.png
+   :alt: 
+
+
+The figure above was the chosen architecture. Each square is a layer whose operation was chosen from 6 options. Each dashed line is a skip connection, each square layer can choose 0 or 1 skip connections, getting the output from a previous layer. **Note that**\ , in original macro search space, each square layer could choose any number of skip connections, while in our implementation, it is only allowed to choose 0 or 1.
+
+The results are shown in figure below (see the experimenal config :githublink:`here <examples/trials/nas_cifar10/config_ppo.yml>`\ :
+
+
+.. image:: ../../img/ppo_cifar10.png
+   :target: ../../img/ppo_cifar10.png
+   :alt: 
+
diff --git a/docs/en_US/Tuner/SmacTuner.rst b/docs/en_US/Tuner/SmacTuner.rst
new file mode 100644
index 0000000000..e82d0aa861
--- /dev/null
+++ b/docs/en_US/Tuner/SmacTuner.rst
@@ -0,0 +1,9 @@
+SMAC Tuner on NNI
+=================
+
+SMAC
+----
+
+`SMAC <https://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf>`__ is based on Sequential Model-Based Optimization (SMBO). It adapts the most prominent previously used model class (Gaussian stochastic process models) and introduces the model class of random forests to SMBO in order to handle categorical parameters. The SMAC supported by nni is a wrapper on `the SMAC3 github repo <https://github.com/automl/SMAC3>`__.
+
+Note that SMAC on nni only supports a subset of the types in the `search space spec <../Tutorial/SearchSpaceSpec.rst>`__\ : ``choice``\ , ``randint``\ , ``uniform``\ , ``loguniform``\ , and ``quniform``.
diff --git a/docs/en_US/Tutorial/AnnotationSpec.rst b/docs/en_US/Tutorial/AnnotationSpec.rst
new file mode 100644
index 0000000000..ed3a2918a0
--- /dev/null
+++ b/docs/en_US/Tutorial/AnnotationSpec.rst
@@ -0,0 +1,101 @@
+NNI Annotation
+==============
+
+Overview
+--------
+
+To improve user experience and reduce user effort, we design an annotation grammar. Using NNI annotation, users can adapt their code to NNI just by adding some standalone annotating strings, which does not affect the execution of the original code.
+
+Below is an example:
+
+.. code-block:: python
+
+   '''@nni.variable(nni.choice(0.1, 0.01, 0.001), name=learning_rate)'''
+   learning_rate = 0.1
+
+The meaning of this example is that NNI will choose one of several values (0.1, 0.01, 0.001) to assign to the learning_rate variable. Specifically, this first line is an NNI annotation, which is a single string. Following is an assignment statement. What nni does here is to replace the right value of this assignment statement according to the information provided by the annotation line.
+
+In this way, users could either run the python code directly or launch NNI to tune hyper-parameter in this code, without changing any codes.
+
+Types of Annotation:
+--------------------
+
+In NNI, there are mainly four types of annotation:
+
+1. Annotate variables
+^^^^^^^^^^^^^^^^^^^^^
+
+   ``'''@nni.variable(sampling_algo, name)'''``
+
+``@nni.variable`` is used in NNI to annotate a variable.
+
+**Arguments**
+
+
+* **sampling_algo**\ : Sampling algorithm that specifies a search space. User should replace it with a built-in NNI sampling function whose name consists of an ``nni.`` identification and a search space type specified in `SearchSpaceSpec <SearchSpaceSpec.rst>`__ such as ``choice`` or ``uniform``.
+* **name**\ : The name of the variable that the selected value will be assigned to. Note that this argument should be the same as the left value of the following assignment statement.
+
+There are 10 types to express your search space as follows:
+
+
+* ``@nni.variable(nni.choice(option1,option2,...,optionN),name=variable)``
+  Which means the variable value is one of the options, which should be a list The elements of options can themselves be stochastic expressions
+* ``@nni.variable(nni.randint(lower, upper),name=variable)``
+  Which means the variable value is a value like round(uniform(low, high)). For now, the type of chosen value is float. If you want to use integer value, please convert it explicitly.
+* ``@nni.variable(nni.uniform(low, high),name=variable)``
+  Which means the variable value is a value uniformly between low and high.
+* ``@nni.variable(nni.quniform(low, high, q),name=variable)``
+  Which means the variable value is a value like clip(round(uniform(low, high) / q) * q, low, high), where the clip operation is used to constraint the generated value in the bound.
+* ``@nni.variable(nni.loguniform(low, high),name=variable)``
+  Which means the variable value is a value drawn according to exp(uniform(low, high)) so that the logarithm of the return value is uniformly distributed.
+* ``@nni.variable(nni.qloguniform(low, high, q),name=variable)``
+  Which means the variable value is a value like clip(round(loguniform(low, high) / q) * q, low, high), where the clip operation is used to constraint the generated value in the bound.
+* ``@nni.variable(nni.normal(mu, sigma),name=variable)``
+  Which means the variable value is a real value that's normally-distributed with mean mu and standard deviation sigma.
+* ``@nni.variable(nni.qnormal(mu, sigma, q),name=variable)``
+  Which means the variable value is a value like round(normal(mu, sigma) / q) * q
+* ``@nni.variable(nni.lognormal(mu, sigma),name=variable)``
+  Which means the variable value is a value drawn according to exp(normal(mu, sigma))
+* ``@nni.variable(nni.qlognormal(mu, sigma, q),name=variable)``
+  Which means the variable value is a value like round(exp(normal(mu, sigma)) / q) * q
+
+Below is an example:
+
+.. code-block:: python
+
+   '''@nni.variable(nni.choice(0.1, 0.01, 0.001), name=learning_rate)'''
+   learning_rate = 0.1
+
+2. Annotate functions
+^^^^^^^^^^^^^^^^^^^^^
+
+   ``'''@nni.function_choice(*functions, name)'''``
+
+``@nni.function_choice`` is used to choose one from several functions.
+
+**Arguments**
+
+
+* **functions**\ : Several functions that are waiting to be selected from. Note that it should be a complete function call with arguments. Such as ``max_pool(hidden_layer, pool_size)``.
+* **name**\ : The name of the function that will be replaced in the following assignment statement.
+
+An example here is:
+
+.. code-block:: python
+
+   """@nni.function_choice(max_pool(hidden_layer, pool_size), avg_pool(hidden_layer, pool_size), name=max_pool)"""
+   h_pooling = max_pool(hidden_layer, pool_size)
+
+3. Annotate intermediate result
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   ``'''@nni.report_intermediate_result(metrics)'''``
+
+``@nni.report_intermediate_result`` is used to report intermediate result, whose usage is the same as ``nni.report_intermediate_result`` in the doc of `Write a trial run on NNI <../TrialExample/Trials.rst>`__
+
+4. Annotate final result
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+   ``'''@nni.report_final_result(metrics)'''``
+
+``@nni.report_final_result`` is used to report the final result of the current trial, whose usage is the same as ``nni.report_final_result`` in the doc of `Write a trial run on NNI <../TrialExample/Trials.rst>`__
diff --git a/docs/en_US/Tutorial/Contributing.rst b/docs/en_US/Tutorial/Contributing.rst
new file mode 100644
index 0000000000..40eadafb12
--- /dev/null
+++ b/docs/en_US/Tutorial/Contributing.rst
@@ -0,0 +1,76 @@
+Contributing to Neural Network Intelligence (NNI)
+=================================================
+
+Great!! We are always on the lookout for more contributors to our code base.
+
+Firstly, if you are unsure or afraid of anything, just ask or submit the issue or pull request anyways. You won't be yelled at for giving your best effort. The worst that can happen is that you'll be politely asked to change something. We appreciate any sort of contributions and don't want a wall of rules to get in the way of that.
+
+However, for those individuals who want a bit more guidance on the best way to contribute to the project, read on. This document will cover all the points we're looking for in your contributions, raising your chances of quickly merging or addressing your contributions.
+
+Looking for a quickstart, get acquainted with our `Get Started <QuickStart.rst>`__ guide.
+
+There are a few simple guidelines that you need to follow before providing your hacks.
+
+Raising Issues
+--------------
+
+When raising issues, please specify the following:
+
+
+* Setup details needs to be filled as specified in the issue template clearly for the reviewer to check.
+* A scenario where the issue occurred (with details on how to reproduce it).
+* Errors and log messages that are displayed by the software.
+* Any other details that might be useful.
+
+Submit Proposals for New Features
+---------------------------------
+
+
+* 
+  There is always something more that is required, to make it easier to suit your use-cases. Feel free to join the discussion on new features or raise a PR with your proposed change.
+
+* 
+  Fork the repository under your own github handle. After cloning the repository. Add, commit, push and sqaush (if necessary) the changes with detailed commit messages to your fork. From where you can proceed to making a pull request.
+
+Contributing to Source Code and Bug Fixes
+-----------------------------------------
+
+Provide PRs with appropriate tags for bug fixes or enhancements to the source code. Do follow the correct naming conventions and code styles when you work on and do try to implement all code reviews along the way.
+
+If you are looking for How to develop and debug the NNI source code, you can refer to `How to set up NNI developer environment doc <./SetupNniDeveloperEnvironment.rst>`__ file in the ``docs`` folder.
+
+Similarly for `Quick Start <QuickStart.rst>`__. For everything else, refer to `NNI Home page <http://nni.readthedocs.io>`__.
+
+Solve Existing Issues
+---------------------
+
+Head over to `issues <https://github.com/Microsoft/nni/issues>`__ to find issues where help is needed from contributors. You can find issues tagged with 'good-first-issue' or 'help-wanted' to contribute in.
+
+A person looking to contribute can take up an issue by claiming it as a comment/assign their Github ID to it. In case there is no PR or update in progress for a week on the said issue, then the issue reopens for anyone to take up again. We need to consider high priority issues/regressions where response time must be a day or so.
+
+Code Styles & Naming Conventions
+--------------------------------
+
+* We follow `PEP8 <https://www.python.org/dev/peps/pep-0008/>`__ for Python code and naming conventions, do try to adhere to the same when making a pull request or making a change. One can also take the help of linters such as ``flake8`` or ``pylint``
+* We also follow `NumPy Docstring Style <https://www.sphinx-doc.org/en/master/usage/extensions/example_numpy.html#example-numpy>`__ for Python Docstring Conventions. During the `documentation building <Contributing.rst#documentation>`__\ , we use `sphinx.ext.napoleon <https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html>`__ to generate Python API documentation from Docstring.
+* For docstrings, please refer to `numpydoc docstring guide <https://numpydoc.readthedocs.io/en/latest/format.html>`__ and `pandas docstring guide <https://python-sprints.github.io/pandas/guide/pandas_docstring.html>`__
+
+  * For function docstring, **description**, **Parameters**, and **Returns** **Yields** are mandatory.
+  * For class docstring, **description**, **Attributes** are mandatory.
+  * For docstring to describe ``dict``, which is commonly used in our hyper-param format description, please refer to RiboKit Doc Standards
+
+    * `Internal Guideline on Writing Standards <https://ribokit.github.io/docs/text/>`__
+
+Documentation
+-------------
+
+Our documentation is built with :githublink:`sphinx <docs>`.
+
+* Before submitting the documentation change, please **build homepage locally**: ``cd docs/en_US && make html``, then you can see all the built documentation webpage under the folder ``docs/en_US/_build/html``. It's also highly recommended taking care of **every WARNING** during the build, which is very likely the signal of a **deadlink** and other annoying issues.
+
+* 
+  For links, please consider using **relative paths** first. However, if the documentation is written in Markdown format, and:
+
+
+  * It's an image link which needs to be formatted with embedded html grammar, please use global URL like ``https://user-images.githubusercontent.com/44491713/51381727-e3d0f780-1b4f-11e9-96ab-d26b9198ba65.png``, which can be automatically generated by dragging picture onto `Github Issue <https://github.com/Microsoft/nni/issues/new>`__ Box.
+  * It cannot be re-formatted by sphinx, such as source code, please use its global URL. For source code that links to our github repo, please use URLs rooted at ``https://github.com/Microsoft/nni/tree/v1.9/`` (:githublink:`mnist.py <examples/trials/mnist-tfv1/mnist.py>` for example).
diff --git a/docs/en_US/Tutorial/ExperimentConfig.rst b/docs/en_US/Tutorial/ExperimentConfig.rst
new file mode 100644
index 0000000000..f5bbe1baab
--- /dev/null
+++ b/docs/en_US/Tutorial/ExperimentConfig.rst
@@ -0,0 +1,1087 @@
+Experiment Config Reference
+===========================
+
+A config file is needed when creating an experiment. The path of the config file is provided to ``nnictl``.
+The config file is in YAML format.
+This document describes the rules to write the config file, and provides some examples and templates.
+
+
+* `Experiment Config Reference <#experiment-config-reference>`__
+
+  * `Template <#template>`__
+  * `Configuration Spec <#configuration-spec>`__
+
+    * `authorName <#authorname>`__
+    * `experimentName <#experimentname>`__
+    * `trialConcurrency <#trialconcurrency>`__
+    * `maxExecDuration <#maxexecduration>`__
+    * `versionCheck <#versioncheck>`__
+    * `debug <#debug>`__
+    * `maxTrialNum <#maxtrialnum>`__
+    * `trainingServicePlatform <#trainingserviceplatform>`__
+    * `searchSpacePath <#searchspacepath>`__
+    * `useAnnotation <#useannotation>`__
+    * `multiThread <#multithread>`__
+    * `nniManagerIp <#nnimanagerip>`__
+    * `logDir <#logdir>`__
+    * `logLevel <#loglevel>`__
+    * `logCollection <#logcollection>`__
+    * `tuner <#tuner>`__
+
+      * `builtinTunerName <#builtintunername>`__
+      * `codeDir <#codedir>`__
+      * `classFileName <#classfilename>`__
+      * `className <#classname>`__
+      * `classArgs <#classargs>`__
+      * `gpuIndices <#gpuindices>`__
+      * `includeIntermediateResults <#includeintermediateresults>`__
+
+    * `assessor <#assessor>`__
+
+      * `builtinAssessorName <#builtinassessorname>`__
+      * `codeDir <#codedir-1>`__
+      * `classFileName <#classfilename-1>`__
+      * `className <#classname-1>`__
+      * `classArgs <#classargs-1>`__
+
+    * `advisor <#advisor>`__
+
+      * `builtinAdvisorName <#builtinadvisorname>`__
+      * `codeDir <#codedir-2>`__
+      * `classFileName <#classfilename-2>`__
+      * `className <#classname-2>`__
+      * `classArgs <#classargs-2>`__
+      * `gpuIndices <#gpuindices-1>`__
+
+    * `trial <#trial>`__
+    * `localConfig <#localconfig>`__
+
+      * `gpuIndices <#gpuindices-2>`__
+      * `maxTrialNumPerGpu <#maxtrialnumpergpu>`__
+      * `useActiveGpu <#useactivegpu>`__
+
+    * `machineList <#machinelist>`__
+
+      * `ip <#ip>`__
+      * `port <#port>`__
+      * `username <#username>`__
+      * `passwd <#passwd>`__
+      * `sshKeyPath <#sshkeypath>`__
+      * `passphrase <#passphrase>`__
+      * `gpuIndices <#gpuindices-3>`__
+      * `maxTrialNumPerGpu <#maxtrialnumpergpu-1>`__
+      * `useActiveGpu <#useactivegpu-1>`__
+      * `preCommand <#preCommand>`__
+
+    * `kubeflowConfig <#kubeflowconfig>`__
+
+      * `operator <#operator>`__
+      * `storage <#storage>`__
+      * `nfs <#nfs>`__
+      * `keyVault <#keyvault>`__
+      * `azureStorage <#azurestorage>`__
+      * `uploadRetryCount <#uploadretrycount>`__
+
+    * `paiConfig <#paiconfig>`__
+
+      * `userName <#username>`__
+      * `password <#password>`__
+      * `token <#token>`__
+      * `host <#host>`__
+      * `reuse <#reuse>`__
+
+  * `Examples <#examples>`__
+
+    * `Local mode <#local-mode>`__
+    * `Remote mode <#remote-mode>`__
+    * `PAI mode <#pai-mode>`__
+    * `Kubeflow mode <#kubeflow-mode>`__
+    * `Kubeflow with azure storage <#kubeflow-with-azure-storage>`__
+
+Template
+--------
+
+
+* **Light weight (without Annotation and Assessor)**
+
+.. code-block:: yaml
+
+   authorName:
+   experimentName:
+   trialConcurrency:
+   maxExecDuration:
+   maxTrialNum:
+   #choice: local, remote, pai, kubeflow
+   trainingServicePlatform:
+   searchSpacePath:
+   #choice: true, false, default: false
+   useAnnotation:
+   #choice: true, false, default: false
+   multiThread:
+   tuner:
+     #choice: TPE, Random, Anneal, Evolution
+     builtinTunerName:
+     classArgs:
+       #choice: maximize, minimize
+       optimize_mode:
+     gpuIndices:
+   trial:
+     command:
+     codeDir:
+     gpuNum:
+   #machineList can be empty if the platform is local
+   machineList:
+     - ip:
+       port:
+       username:
+       passwd:
+
+
+* **Use Assessor**
+
+.. code-block:: yaml
+
+   authorName:
+   experimentName:
+   trialConcurrency:
+   maxExecDuration:
+   maxTrialNum:
+   #choice: local, remote, pai, kubeflow
+   trainingServicePlatform:
+   searchSpacePath:
+   #choice: true, false, default: false
+   useAnnotation:
+   #choice: true, false, default: false
+   multiThread:
+   tuner:
+     #choice: TPE, Random, Anneal, Evolution
+     builtinTunerName:
+     classArgs:
+       #choice: maximize, minimize
+       optimize_mode:
+     gpuIndices:
+   assessor:
+     #choice: Medianstop
+     builtinAssessorName:
+     classArgs:
+       #choice: maximize, minimize
+       optimize_mode:
+   trial:
+     command:
+     codeDir:
+     gpuNum:
+   #machineList can be empty if the platform is local
+   machineList:
+     - ip:
+       port:
+       username:
+       passwd:
+
+
+* **Use Annotation**
+
+.. code-block:: yaml
+
+   authorName:
+   experimentName:
+   trialConcurrency:
+   maxExecDuration:
+   maxTrialNum:
+   #choice: local, remote, pai, kubeflow
+   trainingServicePlatform:
+   #choice: true, false, default: false
+   useAnnotation:
+   #choice: true, false, default: false
+   multiThread:
+   tuner:
+     #choice: TPE, Random, Anneal, Evolution
+     builtinTunerName:
+     classArgs:
+       #choice: maximize, minimize
+       optimize_mode:
+     gpuIndices:
+   assessor:
+     #choice: Medianstop
+     builtinAssessorName:
+     classArgs:
+       #choice: maximize, minimize
+       optimize_mode:
+   trial:
+     command:
+     codeDir:
+     gpuNum:
+   #machineList can be empty if the platform is local
+   machineList:
+     - ip:
+       port:
+       username:
+       passwd:
+
+Configuration Spec
+------------------
+
+authorName
+^^^^^^^^^^
+
+Required. String.
+
+The name of the author who create the experiment.
+
+*TBD: add default value.*
+
+experimentName
+^^^^^^^^^^^^^^
+
+Required. String.
+
+The name of the experiment created.
+
+*TBD: add default value.*
+
+trialConcurrency
+^^^^^^^^^^^^^^^^
+
+Required. Integer between 1 and 99999.
+
+Specifies the max num of trial jobs run simultaneously.
+
+If trialGpuNum is bigger than the free gpu numbers, and the trial jobs running simultaneously can not reach **trialConcurrency** number, some trial jobs will be put into a queue to wait for gpu allocation.
+
+maxExecDuration
+^^^^^^^^^^^^^^^
+
+Optional. String. Default: 999d.
+
+**maxExecDuration** specifies the max duration time of an experiment. The unit of the time is {**s**\ ,** m**\ ,** h**\ ,** d**\ }, which means {*seconds*\ , *minutes*\ , *hours*\ , *days*\ }.
+
+Note: The maxExecDuration spec set the time of an experiment, not a trial job. If the experiment reach the max duration time, the experiment will not stop, but could not submit new trial jobs any more.
+
+versionCheck
+^^^^^^^^^^^^
+
+Optional. Bool. Default: true.
+
+NNI will check the version of nniManager process and the version of trialKeeper in remote, pai and kubernetes platform. If you want to disable version check, you could set versionCheck be false.
+
+debug
+^^^^^
+
+Optional. Bool. Default: false.
+
+Debug mode will set versionCheck to false and set logLevel to be 'debug'.
+
+maxTrialNum
+^^^^^^^^^^^
+
+Optional. Integer between 1 and 99999. Default: 99999.
+
+Specifies the max number of trial jobs created by NNI, including succeeded and failed jobs.
+
+trainingServicePlatform
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Required. String.
+
+Specifies the platform to run the experiment, including **local**\ ,** remote**\ ,** pai**\ ,** kubeflow**\ ,** frameworkcontroller**.
+
+
+* 
+  **local** run an experiment on local ubuntu machine.
+
+* 
+  **remote** submit trial jobs to remote ubuntu machines, and** machineList** field should be filed in order to set up SSH connection to remote machine.
+
+* 
+  **pai**  submit trial jobs to `OpenPAI <https://github.com/Microsoft/pai>`__ of Microsoft. For more details of pai configuration, please refer to `Guide to PAI Mode <../TrainingService/PaiMode.rst>`__
+
+* 
+  **kubeflow** submit trial jobs to `kubeflow <https://www.kubeflow.org/docs/about/kubeflow/>`__\ , NNI support kubeflow based on normal kubernetes and `azure kubernetes <https://azure.microsoft.com/en-us/services/kubernetes-service/>`__. For detail please refer to `Kubeflow Docs <../TrainingService/KubeflowMode.rst>`__
+
+* 
+  **adl** submit trial jobs to `AdaptDL <https://www.kubeflow.org/docs/about/kubeflow/>`__\ , NNI support AdaptDL on Kubernetes cluster. For detail please refer to `AdaptDL Docs <../TrainingService/AdaptDLMode.rst>`__
+
+* 
+  TODO: explain frameworkcontroller.
+
+searchSpacePath
+^^^^^^^^^^^^^^^
+
+Optional. Path to existing file.
+
+Specifies the path of search space file, which should be a valid path in the local linux machine.
+
+The only exception that **searchSpacePath** can be not fulfilled is when ``useAnnotation=True``.
+
+useAnnotation
+^^^^^^^^^^^^^
+
+Optional. Bool. Default: false.
+
+Use annotation to analysis trial code and generate search space.
+
+Note: if **useAnnotation** is true, the searchSpacePath field should be removed.
+
+multiThread
+^^^^^^^^^^^
+
+Optional. Bool. Default: false.
+
+Enable multi-thread mode for dispatcher. If multiThread is enabled, dispatcher will start a thread to process each command from NNI Manager.
+
+nniManagerIp
+^^^^^^^^^^^^
+
+Optional. String. Default: eth0 device IP.
+
+Set the IP address of the machine on which NNI manager process runs. This field is optional, and if it's not set, eth0 device IP will be used instead.
+
+Note: run ``ifconfig`` on NNI manager's machine to check if eth0 device exists. If not, **nniManagerIp** is recommended to set explicitly.
+
+logDir
+^^^^^^
+
+Optional. Path to a directory. Default: ``<user home directory>/nni-experiments``.
+
+Configures the directory to store logs and data of the experiment.
+
+logLevel
+^^^^^^^^
+
+Optional. String. Default: ``info``.
+
+Sets log level for the experiment. Available log levels are: ``trace``\ , ``debug``\ , ``info``\ , ``warning``\ , ``error``\ , ``fatal``.
+
+logCollection
+^^^^^^^^^^^^^
+
+Optional. ``http`` or ``none``. Default: ``none``.
+
+Set the way to collect log in remote, pai, kubeflow, frameworkcontroller platform. There are two ways to collect log, one way is from ``http``\ , trial keeper will post log content back from http request in this way, but this way may slow down the speed to process logs in trialKeeper. The other way is ``none``\ , trial keeper will not post log content back, and only post job metrics. If your log content is too big, you could consider setting this param be ``none``.
+
+tuner
+^^^^^
+
+Required.
+
+Specifies the tuner algorithm in the experiment, there are two kinds of ways to set tuner. One way is to use tuner provided by NNI sdk (built-in tuners), in which case you need to set **builtinTunerName** and **classArgs**. Another way is to use users' own tuner file, in which case **codeDirectory**\ ,** classFileName**\ ,** className** and **classArgs** are needed. *Users must choose exactly one way.*
+
+builtinTunerName
+^^^^^^^^^^^^^^^^
+
+Required if using built-in tuners. String.
+
+Specifies the name of system tuner, NNI sdk provides different tuners introduced `here <../Tuner/BuiltinTuner.rst>`__.
+
+codeDir
+^^^^^^^
+
+Required if using customized tuners. Path relative to the location of config file.
+
+Specifies the directory of tuner code.
+
+classFileName
+^^^^^^^^^^^^^
+
+Required if using customized tuners. File path relative to **codeDir**.
+
+Specifies the name of tuner file.
+
+className
+^^^^^^^^^
+
+Required if using customized tuners. String.
+
+Specifies the name of tuner class.
+
+classArgs
+^^^^^^^^^
+
+Optional. Key-value pairs. Default: empty.
+
+Specifies the arguments of tuner algorithm. Please refer to `this file <../Tuner/BuiltinTuner.rst>`__ for the configurable arguments of each built-in tuner.
+
+gpuIndices
+^^^^^^^^^^
+
+Optional. String. Default: empty.
+
+Specifies the GPUs that can be used by the tuner process. Single or multiple GPU indices can be specified. Multiple GPU indices are separated by comma ``,``. For example, ``1``\ , or ``0,1,3``. If the field is not set, no GPU will be visible to tuner (by setting ``CUDA_VISIBLE_DEVICES`` to be an empty string).
+
+includeIntermediateResults
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Optional. Bool. Default: false.
+
+If **includeIntermediateResults** is true, the last intermediate result of the trial that is early stopped by assessor is sent to tuner as final result.
+
+assessor
+^^^^^^^^
+
+Specifies the assessor algorithm to run an experiment. Similar to tuners, there are two kinds of ways to set assessor. One way is to use assessor provided by NNI sdk. Users need to set **builtinAssessorName** and **classArgs**. Another way is to use users' own assessor file, and users need to set **codeDirectory**\ ,** classFileName**\ ,** className** and **classArgs**. *Users must choose exactly one way.*
+
+By default, there is no assessor enabled.
+
+builtinAssessorName
+^^^^^^^^^^^^^^^^^^^
+
+Required if using built-in assessors. String.
+
+Specifies the name of built-in assessor, NNI sdk provides different assessors introduced `here <../Assessor/BuiltinAssessor.rst>`__.
+
+codeDir
+^^^^^^^
+
+Required if using customized assessors. Path relative to the location of config file.
+
+Specifies the directory of assessor code.
+
+classFileName
+^^^^^^^^^^^^^
+
+Required if using customized assessors. File path relative to **codeDir**.
+
+Specifies the name of assessor file.
+
+className
+^^^^^^^^^
+
+Required if using customized assessors. String.
+
+Specifies the name of assessor class.
+
+classArgs
+^^^^^^^^^
+
+Optional. Key-value pairs. Default: empty.
+
+Specifies the arguments of assessor algorithm.
+
+advisor
+^^^^^^^
+
+Optional.
+
+Specifies the advisor algorithm in the experiment. Similar to tuners and assessors, there are two kinds of ways to specify advisor. One way is to use advisor provided by NNI sdk, need to set **builtinAdvisorName** and **classArgs**. Another way is to use users' own advisor file, and need to set **codeDirectory**\ ,** classFileName**\ ,** className** and **classArgs**.
+
+When advisor is enabled, settings of tuners and advisors will be bypassed.
+
+builtinAdvisorName
+^^^^^^^^^^^^^^^^^^
+
+Specifies the name of a built-in advisor. NNI sdk provides `BOHB <../Tuner/BohbAdvisor.md>`__ and `Hyperband <../Tuner/HyperbandAdvisor.rst>`__.
+
+codeDir
+^^^^^^^
+
+Required if using customized advisors. Path relative to the location of config file.
+
+Specifies the directory of advisor code.
+
+classFileName
+^^^^^^^^^^^^^
+
+Required if using customized advisors. File path relative to **codeDir**.
+
+Specifies the name of advisor file.
+
+className
+^^^^^^^^^
+
+Required if using customized advisors. String.
+
+Specifies the name of advisor class.
+
+classArgs
+^^^^^^^^^
+
+Optional. Key-value pairs. Default: empty.
+
+Specifies the arguments of advisor.
+
+gpuIndices
+^^^^^^^^^^
+
+Optional. String. Default: empty.
+
+Specifies the GPUs that can be used. Single or multiple GPU indices can be specified. Multiple GPU indices are separated by comma ``,``. For example, ``1``\ , or ``0,1,3``. If the field is not set, no GPU will be visible to tuner (by setting ``CUDA_VISIBLE_DEVICES`` to be an empty string).
+
+trial
+^^^^^
+
+Required. Key-value pairs.
+
+In local and remote mode, the following keys are required.
+
+
+* 
+  **command**\ : Required string. Specifies the command to run trial process.
+
+* 
+  **codeDir**\ : Required string. Specifies the directory of your own trial file. This directory will be automatically uploaded in remote mode.
+
+* 
+  **gpuNum**\ : Optional integer. Specifies the num of gpu to run the trial process. Default value is 0.
+
+In PAI mode, the following keys are required.
+
+
+* 
+  **command**\ : Required string. Specifies the command to run trial process.
+
+* 
+  **codeDir**\ : Required string. Specifies the directory of the own trial file. Files in the directory will be uploaded in PAI mode.
+
+* 
+  **gpuNum**\ : Required integer. Specifies the num of gpu to run the trial process. Default value is 0.
+
+* 
+  **cpuNum**\ : Required integer. Specifies the cpu number of cpu to be used in pai container.
+
+* 
+  **memoryMB**\ : Required integer. Set the memory size to be used in pai container, in megabytes.
+
+* 
+  **image**\ : Required string. Set the image to be used in pai.
+
+* 
+  **authFile**\ : Optional string. Used to provide Docker registry which needs authentication for image pull in PAI. `Reference <https://github.com/microsoft/pai/blob/2ea69b45faa018662bc164ed7733f6fdbb4c42b3/docs/faq.rst#q-how-to-use-private-docker-registry-job-image-when-submitting-an-openpai-job>`__.
+
+* 
+  **shmMB**\ : Optional integer. Shared memory size of container.
+
+* 
+  **portList**\ : List of key-values pairs with ``label``\ , ``beginAt``\ , ``portNumber``. See `job tutorial of PAI <https://github.com/microsoft/pai/blob/master/docs/job_tutorial.rst>`__ for details.
+
+In Kubeflow mode, the following keys are required.
+
+
+* 
+  **codeDir**\ : The local directory where the code files are in.
+
+* 
+  **ps**\ : An optional configuration for kubeflow's tensorflow-operator, which includes
+
+
+  * 
+    **replicas**\ : The replica number of **ps** role.
+
+  * 
+    **command**\ : The run script in **ps**\ 's container.
+
+  * 
+    **gpuNum**\ : The gpu number to be used in **ps** container.
+
+  * 
+    **cpuNum**\ : The cpu number to be used in **ps** container.
+
+  * 
+    **memoryMB**\ : The memory size of the container.
+
+  * 
+    **image**\ : The image to be used in **ps**.
+
+* 
+  **worker**\ : An optional configuration for kubeflow's tensorflow-operator.
+
+
+  * 
+    **replicas**\ : The replica number of **worker** role.
+
+  * 
+    **command**\ : The run script in **worker**\ 's container.
+
+  * 
+    **gpuNum**\ : The gpu number to be used in **worker** container.
+
+  * 
+    **cpuNum**\ : The cpu number to be used in **worker** container.
+
+  * 
+    **memoryMB**\ : The memory size of the container.
+
+  * 
+    **image**\ : The image to be used in **worker**.
+
+localConfig
+^^^^^^^^^^^
+
+Optional in local mode. Key-value pairs.
+
+Only applicable if **trainingServicePlatform** is set to ``local``\ , otherwise there should not be** localConfig** section in configuration file.
+
+gpuIndices
+^^^^^^^^^^
+
+Optional. String. Default: none.
+
+Used to specify designated GPU devices for NNI, if it is set, only the specified GPU devices are used for NNI trial jobs. Single or multiple GPU indices can be specified. Multiple GPU indices should be separated with comma (\ ``,``\ ), such as ``1`` or  ``0,1,3``. By default, all GPUs available will be used.
+
+maxTrialNumPerGpu
+^^^^^^^^^^^^^^^^^
+
+Optional. Integer. Default: 1.
+
+Used to specify the max concurrency trial number on a GPU device.
+
+useActiveGpu
+^^^^^^^^^^^^
+
+Optional. Bool. Default: false.
+
+Used to specify whether to use a GPU if there is another process. By default, NNI will use the GPU only if there is no other active process in the GPU. If **useActiveGpu** is set to true, NNI will use the GPU regardless of another processes. This field is not applicable for NNI on Windows.
+
+machineList
+^^^^^^^^^^^
+
+Required in remote mode. A list of key-value pairs with the following keys.
+
+ip
+^^
+
+Required. IP address or host name that is accessible from the current machine.
+
+The IP address or host name of remote machine.
+
+port
+^^^^
+
+Optional. Integer. Valid port. Default: 22.
+
+The ssh port to be used to connect machine.
+
+username
+^^^^^^^^
+
+Required if authentication with username/password. String.
+
+The account of remote machine.
+
+passwd
+^^^^^^
+
+Required if authentication with username/password. String.
+
+Specifies the password of the account.
+
+sshKeyPath
+^^^^^^^^^^
+
+Required if authentication with ssh key. Path to private key file.
+
+If users use ssh key to login remote machine, **sshKeyPath** should be a valid path to a ssh key file.
+
+*Note: if users set passwd and sshKeyPath simultaneously, NNI will try passwd first.*
+
+passphrase
+^^^^^^^^^^
+
+Optional. String.
+
+Used to protect ssh key, which could be empty if users don't have passphrase.
+
+gpuIndices
+^^^^^^^^^^
+
+Optional. String. Default: none.
+
+Used to specify designated GPU devices for NNI, if it is set, only the specified GPU devices are used for NNI trial jobs. Single or multiple GPU indices can be specified. Multiple GPU indices should be separated with comma (\ ``,``\ ), such as ``1`` or  ``0,1,3``. By default, all GPUs available will be used.
+
+maxTrialNumPerGpu
+^^^^^^^^^^^^^^^^^
+
+Optional. Integer. Default: 1.
+
+Used to specify the max concurrency trial number on a GPU device.
+
+useActiveGpu
+^^^^^^^^^^^^
+
+Optional. Bool. Default: false.
+
+Used to specify whether to use a GPU if there is another process. By default, NNI will use the GPU only if there is no other active process in the GPU. If **useActiveGpu** is set to true, NNI will use the GPU regardless of another processes. This field is not applicable for NNI on Windows.
+
+preCommand
+^^^^^^^^^^
+
+Optional. String.
+
+Specifies the pre-command that will be executed before the remote machine executes other commands. Users can configure the experimental environment on remote machine by setting **preCommand**. If there are multiple commands need to execute, use ``&&`` to connect them, such as ``preCommand: command1 && command2 && ...``.
+
+**Note**\ : Because **preCommand** will execute before other commands each time, it is strongly not recommended to set **preCommand** that will make changes to system, i.e. ``mkdir`` or ``touch``.
+
+remoteConfig
+^^^^^^^^^^^^
+
+Optional field in remote mode. Users could set per machine information in ``machineList`` field, and set global configuration for remote mode in this field.
+
+reuse
+^^^^^
+
+Optional. Bool. default: ``false``. It's an experimental feature.
+
+If it's true, NNI will reuse remote jobs to run as many as possible trials. It can save time of creating new jobs. User needs to make sure each trial can run independent in same job, for example, avoid loading checkpoint from previous trials. 
+
+kubeflowConfig
+^^^^^^^^^^^^^^
+
+operator
+^^^^^^^^
+
+Required. String. Has to be ``tf-operator`` or ``pytorch-operator``.
+
+Specifies the kubeflow's operator to be used, NNI support ``tf-operator`` in current version.
+
+storage
+^^^^^^^
+
+Optional. String. Default. ``nfs``.
+
+Specifies the storage type of kubeflow, including ``nfs`` and ``azureStorage``.
+
+nfs
+^^^
+
+Required if using nfs. Key-value pairs.
+
+
+* 
+  **server** is the host of nfs server.
+
+* 
+  **path** is the mounted path of nfs.
+
+keyVault
+^^^^^^^^
+
+Required if using azure storage. Key-value pairs.
+
+Set **keyVault** to storage the private key of your azure storage account. Refer to https://docs.microsoft.com/en-us/azure/key-vault/key-vault-manage-with-cli2.
+
+
+* 
+  **vaultName** is the value of ``--vault-name`` used in az command.
+
+* 
+  **name** is the value of ``--name`` used in az command.
+
+azureStorage
+^^^^^^^^^^^^
+
+Required if using azure storage. Key-value pairs.
+
+Set azure storage account to store code files.
+
+
+* 
+  **accountName** is the name of azure storage account.
+
+* 
+  **azureShare** is the share of the azure file storage.
+
+uploadRetryCount
+^^^^^^^^^^^^^^^^
+
+Required if using azure storage. Integer between 1 and 99999.
+
+If upload files to azure storage failed, NNI will retry the process of uploading, this field will specify the number of attempts to re-upload files.
+
+paiConfig
+^^^^^^^^^
+
+userName
+^^^^^^^^
+
+Required. String.
+
+The user name of your pai account.
+
+password
+^^^^^^^^
+
+Required if using password authentication. String.
+
+The password of the pai account.
+
+token
+^^^^^
+
+Required if using token authentication. String.
+
+Personal access token that can be retrieved from PAI portal.
+
+host
+^^^^
+
+Required. String.
+
+The hostname of IP address of PAI.
+
+reuse
+^^^^^
+
+Optional. Bool. default: ``false``. It's an experimental feature.
+
+If it's true, NNI will reuse OpenPAI jobs to run as many as possible trials. It can save time of creating new jobs. User needs to make sure each trial can run independent in same job, for example, avoid loading checkpoint from previous trials.
+
+Examples
+--------
+
+Local mode
+^^^^^^^^^^
+
+If users want to run trial jobs in local machine, and use annotation to generate search space, could use the following config:
+
+.. code-block:: yaml
+
+     authorName: test
+     experimentName: test_experiment
+     trialConcurrency: 3
+     maxExecDuration: 1h
+     maxTrialNum: 10
+     #choice: local, remote, pai, kubeflow
+     trainingServicePlatform: local
+     #choice: true, false
+     useAnnotation: true
+     tuner:
+       #choice: TPE, Random, Anneal, Evolution
+       builtinTunerName: TPE
+       classArgs:
+         #choice: maximize, minimize
+         optimize_mode: maximize
+     trial:
+       command: python3 mnist.py
+       codeDir: /nni/mnist
+       gpuNum: 0
+
+You can add assessor configuration.
+
+.. code-block:: yaml
+
+     authorName: test
+     experimentName: test_experiment
+     trialConcurrency: 3
+     maxExecDuration: 1h
+     maxTrialNum: 10
+     #choice: local, remote, pai, kubeflow
+     trainingServicePlatform: local
+     searchSpacePath: /nni/search_space.json
+     #choice: true, false
+     useAnnotation: false
+     tuner:
+       #choice: TPE, Random, Anneal, Evolution
+       builtinTunerName: TPE
+       classArgs:
+         #choice: maximize, minimize
+         optimize_mode: maximize
+     assessor:
+       #choice: Medianstop
+       builtinAssessorName: Medianstop
+       classArgs:
+         #choice: maximize, minimize
+         optimize_mode: maximize
+     trial:
+       command: python3 mnist.py
+       codeDir: /nni/mnist
+       gpuNum: 0
+
+Or you could specify your own tuner and assessor file as following,
+
+.. code-block:: yaml
+
+     authorName: test
+     experimentName: test_experiment
+     trialConcurrency: 3
+     maxExecDuration: 1h
+     maxTrialNum: 10
+     #choice: local, remote, pai, kubeflow
+     trainingServicePlatform: local
+     searchSpacePath: /nni/search_space.json
+     #choice: true, false
+     useAnnotation: false
+     tuner:
+       codeDir: /nni/tuner
+       classFileName: mytuner.py
+       className: MyTuner
+       classArgs:
+         #choice: maximize, minimize
+         optimize_mode: maximize
+     assessor:
+       codeDir: /nni/assessor
+       classFileName: myassessor.py
+       className: MyAssessor
+       classArgs:
+         #choice: maximize, minimize
+         optimize_mode: maximize
+     trial:
+       command: python3 mnist.py
+       codeDir: /nni/mnist
+       gpuNum: 0
+
+Remote mode
+^^^^^^^^^^^
+
+If run trial jobs in remote machine, users could specify the remote machine information as following format:
+
+.. code-block:: yaml
+
+     authorName: test
+     experimentName: test_experiment
+     trialConcurrency: 3
+     maxExecDuration: 1h
+     maxTrialNum: 10
+     #choice: local, remote, pai, kubeflow
+     trainingServicePlatform: remote
+     searchSpacePath: /nni/search_space.json
+     #choice: true, false
+     useAnnotation: false
+     tuner:
+       #choice: TPE, Random, Anneal, Evolution
+       builtinTunerName: TPE
+       classArgs:
+         #choice: maximize, minimize
+         optimize_mode: maximize
+     trial:
+       command: python3 mnist.py
+       codeDir: /nni/mnist
+       gpuNum: 0
+     #machineList can be empty if the platform is local
+     machineList:
+       - ip: 10.10.10.10
+         port: 22
+         username: test
+         passwd: test
+       - ip: 10.10.10.11
+         port: 22
+         username: test
+         passwd: test
+       - ip: 10.10.10.12
+         port: 22
+         username: test
+         sshKeyPath: /nni/sshkey
+         passphrase: qwert
+         # Pre-command will be executed before the remote machine executes other commands.
+         # Below is an example of specifying python environment.
+         # If you want to execute multiple commands, please use "&&" to connect them.
+         # preCommand: source ${replace_to_absolute_path_recommended_here}/bin/activate
+         # preCommand: source ${replace_to_conda_path}/bin/activate ${replace_to_conda_env_name}
+         preCommand: export PATH=${replace_to_python_environment_path_in_your_remote_machine}:$PATH
+
+PAI mode
+^^^^^^^^
+
+.. code-block:: yaml
+
+     authorName: test
+     experimentName: nni_test1
+     trialConcurrency: 1
+     maxExecDuration:500h
+     maxTrialNum: 1
+     #choice: local, remote, pai, kubeflow
+     trainingServicePlatform: pai
+     searchSpacePath: search_space.json
+     #choice: true, false
+     useAnnotation: false
+     tuner:
+       #choice: TPE, Random, Anneal, Evolution, BatchTuner
+       #SMAC (SMAC should be installed through nnictl)
+       builtinTunerName: TPE
+       classArgs:
+         #choice: maximize, minimize
+         optimize_mode: maximize
+     trial:
+       command: python3 main.py
+       codeDir: .
+       gpuNum: 4
+       cpuNum: 2
+       memoryMB: 10000
+       #The docker image to run NNI job on pai
+       image: msranni/nni:latest
+     paiConfig:
+       #The username to login pai
+       userName: test
+       #The password to login pai
+       passWord: test
+       #The host of restful server of pai
+       host: 10.10.10.10
+
+Kubeflow mode
+^^^^^^^^^^^^^
+
+  kubeflow with nfs storage.
+
+.. code-block:: yaml
+
+     authorName: default
+     experimentName: example_mni
+     trialConcurrency: 1
+     maxExecDuration: 1h
+     maxTrialNum: 1
+     #choice: local, remote, pai, kubeflow
+     trainingServicePlatform: kubeflow
+     searchSpacePath: search_space.json
+     #choice: true, false
+     useAnnotation: false
+     tuner:
+       #choice: TPE, Random, Anneal, Evolution
+       builtinTunerName: TPE
+       classArgs:
+         #choice: maximize, minimize
+         optimize_mode: maximize
+     trial:
+       codeDir: .
+       worker:
+         replicas: 1
+         command: python3 mnist.py
+         gpuNum: 0
+         cpuNum: 1
+         memoryMB: 8192
+         image: msranni/nni:latest
+     kubeflowConfig:
+       operator: tf-operator
+       nfs:
+         server: 10.10.10.10
+         path: /var/nfs/general
+
+Kubeflow with azure storage
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: yaml
+
+     authorName: default
+     experimentName: example_mni
+     trialConcurrency: 1
+     maxExecDuration: 1h
+     maxTrialNum: 1
+     #choice: local, remote, pai, kubeflow
+     trainingServicePlatform: kubeflow
+     searchSpacePath: search_space.json
+     #choice: true, false
+     useAnnotation: false
+     #nniManagerIp: 10.10.10.10
+     tuner:
+       #choice: TPE, Random, Anneal, Evolution
+       builtinTunerName: TPE
+       classArgs:
+         #choice: maximize, minimize
+         optimize_mode: maximize
+     assessor:
+       builtinAssessorName: Medianstop
+       classArgs:
+         optimize_mode: maximize
+     trial:
+       codeDir: .
+       worker:
+         replicas: 1
+         command: python3 mnist.py
+         gpuNum: 0
+         cpuNum: 1
+         memoryMB: 4096
+         image: msranni/nni:latest
+     kubeflowConfig:
+       operator: tf-operator
+       keyVault:
+         vaultName: Contoso-Vault
+         name: AzureStorageAccountKey
+       azureStorage:
+         accountName: storage
+         azureShare: share01
diff --git a/docs/en_US/Tutorial/FAQ.rst b/docs/en_US/Tutorial/FAQ.rst
new file mode 100644
index 0000000000..bead88fb43
--- /dev/null
+++ b/docs/en_US/Tutorial/FAQ.rst
@@ -0,0 +1,88 @@
+FAQ
+===
+
+This page is for frequent asked questions and answers.
+
+tmp folder fulled
+^^^^^^^^^^^^^^^^^
+
+nnictl will use tmp folder as a temporary folder to copy files under codeDir when executing experimentation creation.
+When met errors like below, try to clean up **tmp** folder first.
+
+..
+
+   OSError: [Errno 28] No space left on device
+
+
+Cannot get trials' metrics in OpenPAI mode
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In OpenPAI training mode, we start a rest server which listens on 51189 port in NNI Manager to receive metrcis reported from trials running in OpenPAI cluster. If you didn't see any metrics from WebUI in OpenPAI mode, check your machine where NNI manager runs on to make sure 51189 port is turned on in the firewall rule.
+
+Segmentation Fault (core dumped) when installing
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: text
+
+   make: *** [install-XXX] Segmentation fault (core dumped)
+
+
+Please try the following solutions in turn:
+
+
+* Update or reinstall you current python's pip like ``python3 -m pip install -U pip``
+* Install NNI with ``--no-cache-dir`` flag like ``python3 -m pip install nni --no-cache-dir``
+
+Job management error: getIPV4Address() failed because os.networkInterfaces().eth0 is undefined.
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Your machine don't have eth0 device, please set `nniManagerIp <ExperimentConfig.rst>`__ in your config file manually.
+
+Exceed the MaxDuration but didn't stop
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When the duration of experiment reaches the maximum duration, nniManager will not create new trials, but the existing trials will continue unless user manually stop the experiment.
+
+Could not stop an experiment using ``nnictl stop``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If you upgrade your NNI or you delete some config files of NNI when there is an experiment running, this kind of issue may happen because the loss of config file. You could use ``ps -ef | grep node`` to find the PID of your experiment, and use ``kill -9 {pid}`` to kill it manually.
+
+Could not get ``default metric`` in webUI of virtual machines
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Config the network mode to bridge mode or other mode that could make virtual machine's host accessible from external machine, and make sure the port of virtual machine is not forbidden by firewall.
+
+Could not open webUI link
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Unable to open the WebUI may have the following reasons:
+
+
+* ``http://127.0.0.1``\ , ``http://172.17.0.1`` and ``http://10.0.0.15`` are referred to localhost, if you start your experiment on the server or remote machine. You can replace the IP to your server IP to view the WebUI, like ``http://[your_server_ip]:8080``
+* If you still can't see the WebUI after you use the server IP, you can check the proxy and the firewall of your machine. Or use the browser on the machine where you start your NNI experiment.
+* Another reason may be your experiment is failed and NNI may fail to get the experiment information. You can check the log of NNIManager in the following directory: ``~/nni-experiments/[your_experiment_id]`` ``/log/nnimanager.log``
+
+Restful server start failed
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Probably it's a problem with your network config. Here is a checklist.
+
+
+* You might need to link ``127.0.0.1`` with ``localhost``. Add a line ``127.0.0.1 localhost`` to ``/etc/hosts``.
+* It's also possible that you have set some proxy config. Check your environment for variables like ``HTTP_PROXY`` or ``HTTPS_PROXY`` and unset if they are set.
+
+NNI on Windows problems
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Please refer to `NNI on Windows <InstallationWin.rst>`__
+
+More FAQ issues
+^^^^^^^^^^^^^^^
+
+`NNI Issues with FAQ labels <https://github.com/microsoft/nni/labels/FAQ>`__
+
+Help us improve
+^^^^^^^^^^^^^^^
+
+Please inquiry the problem in https://github.com/Microsoft/nni/issues to see whether there are other people already reported the problem, create a new one if there are no existing issues been created.
diff --git a/docs/en_US/Tutorial/HowToDebug.rst b/docs/en_US/Tutorial/HowToDebug.rst
new file mode 100644
index 0000000000..1b3db770d6
--- /dev/null
+++ b/docs/en_US/Tutorial/HowToDebug.rst
@@ -0,0 +1,111 @@
+**How to Debug in NNI**
+===========================
+
+Overview
+--------
+
+There are three parts that might have logs in NNI. They are nnimanager, dispatcher and trial. Here we will introduce them succinctly. More information please refer to `Overview <../Overview.rst>`__.
+
+
+* **NNI controller**\ : NNI controller (nnictl) is the nni command-line tool that is used to manage experiments (e.g., start an experiment).
+* **nnimanager**\ : nnimanager is the core of NNI, whose log is important when the whole experiment fails (e.g., no webUI or training service fails)
+* **Dispatcher**\ : Dispatcher calls the methods of **Tuner** and **Assessor**. Logs of dispatcher are related to the tuner or assessor code.
+
+  * **Tuner**\ : Tuner is an AutoML algorithm, which generates a new configuration for the next try. A new trial will run with this configuration.
+  * **Assessor**\ : Assessor analyzes trial's intermediate results (e.g., periodically evaluated accuracy on test dataset) to tell whether this trial can be early stopped or not.
+
+* **Trial**\ : Trial code is the code you write to run your experiment, which is an individual attempt at applying a new configuration (e.g., a set of hyperparameter values, a specific nerual architecture).
+
+Where is the log
+----------------
+
+There are three kinds of log in NNI. When creating a new experiment, you can specify log level as debug by adding ``--debug``. Besides, you can set more detailed log level in your configuration file by using
+``logLevel`` keyword. Available logLevels are: ``trace``\ , ``debug``\ , ``info``\ , ``warning``\ , ``error``\ , ``fatal``.
+
+NNI controller
+^^^^^^^^^^^^^^
+
+All possible errors that happen when launching an NNI experiment can be found here.
+
+You can use ``nnictl log stderr`` to find error information. For more options please refer to `NNICTL <Nnictl.rst>`__
+
+Experiment Root Directory
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Every experiment has a root folder, which is shown on the right-top corner of webUI. Or you could assemble it by replacing the ``experiment_id`` with your actual experiment_id in path ``~/nni-experiments/experiment_id/`` in case of webUI failure. ``experiment_id`` could be seen when you run ``nnictl create ...`` to create a new experiment.
+
+..
+
+   For flexibility, we also offer a ``logDir`` option in your configuration, which specifies the directory to store all experiments (defaults to ``~/nni-experiments``\ ). Please refer to `Configuration <ExperimentConfig.rst>`__ for more details.
+
+
+Under that directory, there is another directory named ``log``\ , where ``nnimanager.log`` and ``dispatcher.log`` are placed.
+
+Trial Root Directory
+^^^^^^^^^^^^^^^^^^^^
+
+Usually in webUI, you can click ``+`` in the left of every trial to expand it to see each trial's log path.
+
+Besides, there is another directory under experiment root directory, named ``trials``\ , which stores all the trials.
+Every trial has a unique id as its directory name. In this directory, a file named ``stderr`` records trial error and another named ``trial.log`` records this trial's log.
+
+Different kinds of errors
+-------------------------
+
+There are different kinds of errors. However, they can be divided into three categories based on their severity. So when nni fails, check each part sequentially.
+
+Generally, if webUI is started successfully, there is a ``Status`` in the ``Overview`` tab, serving as a possible indicator of what kind of error happens. Otherwise you should check manually.
+
+**NNI** Fails
+^^^^^^^^^^^^^^^^^
+
+This is the most serious error. When this happens, the whole experiment fails and no trial will be run. Usually this might be related to some installation problem.
+
+When this happens, you should check ``nnictl``\ 's error output file ``stderr`` (i.e., nnictl log stderr) and then the ``nnimanager``\ 's log to find if there is any error.
+
+**Dispatcher** Fails
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Dispatcher fails. Usually, for some new users of NNI, it means that tuner fails. You could check dispatcher's log to see what happens to your dispatcher. For built-in tuner, some common errors might be invalid search space (unsupported type of search space or inconsistence between initializing args in configuration file and actual tuner's __init__ function args).
+
+Take the later situation as an example. If you write a customized tuner who's __init__ function has an argument called ``optimize_mode``\ , which you do not provide in your configuration file, NNI will fail to run your tuner so the experiment fails. You can see errors in the webUI like:
+
+
+.. image:: ../../img/dispatcher_error.jpg
+   :target: ../../img/dispatcher_error.jpg
+   :alt: 
+
+
+Here we can see it is a dispatcher error. So we can check dispatcher's log, which might look like:
+
+.. code-block:: bash
+
+   [2019-02-19 19:36:45] DEBUG (nni.main/MainThread) START
+   [2019-02-19 19:36:47] ERROR (nni.main/MainThread) __init__() missing 1 required positional arguments: 'optimize_mode'
+   Traceback (most recent call last):
+     File "/usr/lib/python3.7/site-packages/nni/__main__.py", line 202, in <module>
+       main()
+     File "/usr/lib/python3.7/site-packages/nni/__main__.py", line 164, in main
+       args.tuner_args)
+     File "/usr/lib/python3.7/site-packages/nni/__main__.py", line 81, in create_customized_class_instance
+       instance = class_constructor(**class_args)
+   TypeError: __init__() missing 1 required positional arguments: 'optimize_mode'.
+
+**Trial** Fails
+^^^^^^^^^^^^^^^^^^^
+
+In this situation, NNI can still run and create new trials.
+
+It means your trial code (which is run by NNI) fails. This kind of error is strongly related to your trial code. Please check trial's log to fix any possible errors shown there.
+
+A common example of this would be run the mnist example without installing tensorflow. Surely there is an Import Error (that is, not installing tensorflow but trying to import it in your trial code) and thus every trial fails.
+
+
+.. image:: ../../img/trial_error.jpg
+   :target: ../../img/trial_error.jpg
+   :alt: 
+
+
+As it shows, every trial has a log path, where you can find trial's log and stderr.
+
+In addition to experiment level debug, NNI also provides the capability for debugging a single trial without the need to start the entire experiment. Refer to `standalone mode <../TrialExample/Trials#standalone-mode-for-debugging>`__ for more information about debug single trial code.
diff --git a/docs/en_US/Tutorial/HowToUseDocker.rst b/docs/en_US/Tutorial/HowToUseDocker.rst
new file mode 100644
index 0000000000..2905777465
--- /dev/null
+++ b/docs/en_US/Tutorial/HowToUseDocker.rst
@@ -0,0 +1,112 @@
+**How to Use Docker in NNI**
+================================
+
+Overview
+--------
+
+`Docker <https://www.docker.com/>`__ is a tool to make it easier for users to deploy and run applications based on their own operating system by starting containers. Docker is not a virtual machine, it does not create a virtual operating system, but it allows different applications to use the same OS kernel and isolate different applications by container.
+
+Users can start NNI experiments using Docker. NNI also provides an official Docker image `msranni/nni <https://hub.docker.com/r/msranni/nni>`__ on Docker Hub.
+
+Using Docker in local machine
+-----------------------------
+
+Step 1: Installation of Docker
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Before you start using Docker for NNI experiments, you should install Docker on your local machine. `See here <https://docs.docker.com/install/linux/docker-ce/ubuntu/>`__.
+
+Step 2: Start a Docker container
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If you have installed the Docker package in your local machine, you can start a Docker container instance to run NNI examples. You should notice that because NNI will start a web UI process in a container and continue to listen to a port, you need to specify the port mapping between your host machine and Docker container to give access to web UI outside the container. By visiting the host IP address and port, you can redirect to the web UI process started in Docker container and visit web UI content.
+
+For example, you could start a new Docker container from the following command:
+
+.. code-block:: bash
+
+   docker run -i -t -p [hostPort]:[containerPort] [image]
+
+``-i:`` Start a Docker in an interactive mode.
+
+``-t:`` Docker assign the container an input terminal.
+
+``-p:`` Port mapping, map host port to a container port.
+
+For more information about Docker commands, please `refer to this <https://docs.docker.com/v17.09/edge/engine/reference/run/>`__.
+
+Note:
+
+.. code-block:: bash
+
+      NNI only supports Ubuntu and MacOS systems in local mode for the moment, please use correct Docker image type. If you want to use gpu in a Docker container, please use nvidia-docker.
+
+Step 3: Run NNI in a Docker container
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If you start a Docker image using NNI's official image ``msranni/nni``\ , you can directly start NNI experiments by using the ``nnictl`` command. Our official image has NNI's running environment and basic python and deep learning frameworks preinstalled.
+
+If you start your own Docker image, you may need to install the NNI package first; please refer to `NNI installation <InstallationLinux.rst>`__.
+
+If you want to run NNI's official examples, you may need to clone the NNI repo in GitHub using
+
+.. code-block:: bash
+
+   git clone https://github.com/Microsoft/nni.git
+
+then you can enter ``nni/examples/trials`` to start an experiment.
+
+After you prepare NNI's environment, you can start a new experiment using the ``nnictl`` command. `See here <QuickStart.rst>`__.
+
+Using Docker on a remote platform
+---------------------------------
+
+NNI supports starting experiments in `remoteTrainingService <../TrainingService/RemoteMachineMode.rst>`__\ , and running trial jobs on remote machines. As Docker can start an independent Ubuntu system as an SSH server, a Docker container can be used as the remote machine in NNI's remote mode.
+
+Step 1: Setting a Docker environment
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+You should install the Docker software on your remote machine first, please `refer to this <https://docs.docker.com/install/linux/docker-ce/ubuntu/>`__.
+
+To make sure your Docker container can be connected by NNI experiments, you should build your own Docker image to set an SSH server or use images with an SSH configuration. If you want to use a Docker container as an SSH server, you should configure the SSH password login or private key login; please `refer to this <https://docs.docker.com/engine/examples/running_ssh_service/>`__.
+
+Note:
+
+.. code-block:: text
+
+   NNI's official image msranni/nni does not support SSH servers for the time being; you should build your own Docker image with an SSH configuration or use other images as a remote server.
+
+Step 2: Start a Docker container on a remote machine
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+An SSH server needs a port; you need to expose Docker's SSH port to NNI as the connection port. For example, if you set your container's SSH port as ``A``, you should map the container's port ``A`` to your remote host machine's other port ``B``, NNI will connect port ``B`` as an SSH port, and your host machine will map the connection from port ``B`` to port ``A`` then NNI could connect to your Docker container.
+
+For example, you could start your Docker container using the following commands:
+
+.. code-block:: bash
+
+   docker run -dit -p [hostPort]:[containerPort] [image]
+
+The ``containerPort`` is the SSH port used in your Docker container and the ``hostPort`` is your host machine's port exposed to NNI. You can set your NNI's config file to connect to ``hostPort`` and the connection will be transmitted to your Docker container.
+For more information about Docker commands, please `refer to this <https://docs.docker.com/v17.09/edge/engine/reference/run/>`__.
+
+Note:
+
+.. code-block:: bash
+
+   If you use your own Docker image as a remote server, please make sure that this image has a basic python environment and an NNI SDK runtime environment. If you want to use a GPU in a Docker container, please use nvidia-docker.
+
+Step 3: Run NNI experiments
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+You can set your config file as a remote platform and set the ``machineList`` configuration to connect to your Docker SSH server; `refer to this <../TrainingService/RemoteMachineMode.rst>`__. Note that you should set the correct ``port``\ , ``username``\ , and ``passWd`` or ``sshKeyPath`` of your host machine.
+
+``port:`` The host machine's port, mapping to Docker's SSH port.
+
+``username:`` The username of the Docker container.
+
+``passWd:`` The password of the Docker container.
+
+``sshKeyPath:`` The path of the private key of the Docker container.
+
+After the configuration of the config file, you could start an experiment, `refer to this <QuickStart.rst>`__.
diff --git a/docs/en_US/Tutorial/InstallCustomizedAlgos.rst b/docs/en_US/Tutorial/InstallCustomizedAlgos.rst
new file mode 100644
index 0000000000..c68e2a1316
--- /dev/null
+++ b/docs/en_US/Tutorial/InstallCustomizedAlgos.rst
@@ -0,0 +1,190 @@
+**How to install customized algorithms as builtin tuners, assessors and advisors**
+======================================================================================
+
+Overview
+--------
+
+NNI provides a lot of `builtin tuners <../Tuner/BuiltinTuner.md>`__\ , `advisors <../Tuner/HyperbandAdvisor.md>`__ and `assessors <../Assessor/BuiltinAssessor.rst>`__ can be used directly for Hyper Parameter Optimization, and some extra algorithms can be installed via ``nnictl package install --name <name>`` after NNI is installed. You can check these extra algorithms via ``nnictl package list`` command.
+
+NNI also provides the ability to build your own customized tuners, advisors and assessors. To use the customized algorithm, users can simply follow the spec in experiment config file to properly reference the algorithm, which has been illustrated in the tutorials of `customized tuners <../Tuner/CustomizeTuner.md>`__\ /\ `advisors <../Tuner/CustomizeAdvisor.md>`__\ /\ `assessors <../Assessor/CustomizeAssessor.rst>`__.
+
+NNI also allows users to install the customized algorithm as a builtin algorithm, in order for users to use the algorithm in the same way as NNI builtin tuners/advisors/assessors. More importantly, it becomes much easier for users to share or distribute their implemented algorithm to others. Customized tuners/advisors/assessors can be installed into NNI as builtin algorithms, once they are installed into NNI, you can use your customized algorithms the same way as builtin tuners/advisors/assessors in your experiment configuration file. For example, you built a customized tuner and installed it into NNI using a builtin name ``mytuner``\ , then you can use this tuner in your configuration file like below:
+
+.. code-block:: yaml
+
+   tuner:
+     builtinTunerName: mytuner
+
+Install customized algorithms as builtin tuners, assessors and advisors
+-----------------------------------------------------------------------
+
+You can follow below steps to build a customized tuner/assessor/advisor, and install it into NNI as builtin algorithm.
+
+1. Create a customized tuner/assessor/advisor
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Reference following instructions to create:
+
+
+* `customized tuner <../Tuner/CustomizeTuner.rst>`__
+* `customized assessor <../Assessor/CustomizeAssessor.rst>`__
+* `customized advisor <../Tuner/CustomizeAdvisor.rst>`__
+
+2. (Optional) Create a validator to validate classArgs
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+NNI provides a ``ClassArgsValidator`` interface for customized algorithms author to validate the classArgs parameters in experiment configuration file which are passed to customized algorithms constructors.
+The ``ClassArgsValidator`` interface is defined as:
+
+.. code-block:: python
+
+   class ClassArgsValidator(object):
+       def validate_class_args(self, **kwargs):
+           """
+           The classArgs fields in experiment configuration are packed as a dict and
+           passed to validator as kwargs.
+           """
+           pass
+
+For example, you can implement your validator such as:
+
+.. code-block:: python
+
+   from schema import Schema, Optional
+   from nni import ClassArgsValidator
+
+   class MedianstopClassArgsValidator(ClassArgsValidator):
+       def validate_class_args(self, **kwargs):
+           Schema({
+               Optional('optimize_mode'): self.choices('optimize_mode', 'maximize', 'minimize'),
+               Optional('start_step'): self.range('start_step', int, 0, 9999),
+           }).validate(kwargs)
+
+The validator will be invoked before experiment is started to check whether the classArgs fields are valid for your customized algorithms.
+
+3. Prepare package installation source
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In order to be installed as builtin tuners, assessors and advisors, the customized algorithms need to be packaged as installable source which can be recognized by ``pip`` command, under the hood nni calls ``pip`` command to install the package.
+Besides being a common pip source, the package needs to provide meta information in the ``classifiers`` field.
+Format of classifiers field is a following:
+
+.. code-block:: bash
+
+   NNI Package :: <type> :: <builtin name> :: <full class name of tuner> :: <full class name of class args validator>
+
+
+* ``type``\ : type of algorithms, could be one of ``tuner``\ , ``assessor``\ , ``advisor``
+* ``builtin name``\ : builtin name used in experiment configuration file
+* `full class name of tuner`: tuner class name, including its module name, for example: ``demo_tuner.DemoTuner``
+* `full class name of class args validator`: class args validator class name, including its module name, for example: ``demo_tuner.MyClassArgsValidator``
+
+Following is an example of classfiers in package's ``setup.py``\ :
+
+.. code-block:: python
+
+       classifiers = [
+           'Programming Language :: Python :: 3',
+           'License :: OSI Approved :: MIT License',
+           'Operating System :: ',
+           'NNI Package :: tuner :: demotuner :: demo_tuner.DemoTuner :: demo_tuner.MyClassArgsValidator'
+       ],
+
+Once you have the meta info in ``setup.py``\ , you can build your pip installation source via:
+
+
+* Run command ``python setup.py develop`` from the package directory, this command will build the directory as a pip installation source.
+* Run command ``python setup.py bdist_wheel`` from the package directory, this command build a whl file which is a pip installation source.
+
+NNI will look for the classifier starts with ``NNI Package`` to retrieve the package meta information while the package being installed with ``nnictl package install <source>`` command.
+
+Reference `customized tuner example <../Tuner/InstallCustomizedTuner.rst>`__ for a full example.
+
+4. Install customized algorithms package into NNI
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If your installation source is prepared as a directory with ``python setup.py develop``\ , you can install the package by following command:
+
+``nnictl package install <installation source directory>``
+
+For example:
+
+``nnictl package install nni/examples/tuners/customized_tuner/``
+
+If your installation source is prepared as a whl file with ``python setup.py bdist_wheel``\ , you can install the package by following command:
+
+``nnictl package install <whl file path>``
+
+For example:
+
+``nnictl package install nni/examples/tuners/customized_tuner/dist/demo_tuner-0.1-py3-none-any.whl``
+
+5. Use the installed builtin algorithms in experiment
+-----------------------------------------------------
+
+Once your customized algorithms is installed, you can use it in experiment configuration file the same way as other builtin tuners/assessors/advisors, for example:
+
+.. code-block:: yaml
+
+   tuner:
+     builtinTunerName: demotuner
+     classArgs:
+       #choice: maximize, minimize
+       optimize_mode: maximize
+
+Manage packages using ``nnictl package``
+--------------------------------------------
+
+List installed packages
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Run following command to list the installed packages:
+
+.. code-block:: bash
+
+   nnictl package list
+   +-----------------+------------+-----------+--------=-------------+------------------------------------------+
+   |      Name       |    Type    | Installed |      Class Name      |               Module Name                |
+   +-----------------+------------+-----------+----------------------+------------------------------------------+
+   | demotuner       | tuners     | Yes       | DemoTuner            | demo_tuner                               |
+   | SMAC            | tuners     | No        | SMACTuner            | nni.smac_tuner.smac_tuner                |
+   | PPOTuner        | tuners     | No        | PPOTuner             | nni.ppo_tuner.ppo_tuner                  |
+   | BOHB            | advisors   | Yes       | BOHB                 | nni.bohb_advisor.bohb_advisor            |
+   +-----------------+------------+-----------+----------------------+------------------------------------------+
+
+Run following command to list all packages, including the builtin packages can not be uninstalled.
+
+.. code-block:: bash
+
+   nnictl package list --all
+   +-----------------+------------+-----------+--------=-------------+------------------------------------------+
+   |      Name       |    Type    | Installed |      Class Name      |               Module Name                |
+   +-----------------+------------+-----------+----------------------+------------------------------------------+
+   | TPE             | tuners     | Yes       | HyperoptTuner        | nni.hyperopt_tuner.hyperopt_tuner        |
+   | Random          | tuners     | Yes       | HyperoptTuner        | nni.hyperopt_tuner.hyperopt_tuner        |
+   | Anneal          | tuners     | Yes       | HyperoptTuner        | nni.hyperopt_tuner.hyperopt_tuner        |
+   | Evolution       | tuners     | Yes       | EvolutionTuner       | nni.evolution_tuner.evolution_tuner      |
+   | BatchTuner      | tuners     | Yes       | BatchTuner           | nni.batch_tuner.batch_tuner              |
+   | GridSearch      | tuners     | Yes       | GridSearchTuner      | nni.gridsearch_tuner.gridsearch_tuner    |
+   | NetworkMorphism | tuners     | Yes       | NetworkMorphismTuner | nni.networkmorphism_tuner.networkmo...   |
+   | MetisTuner      | tuners     | Yes       | MetisTuner           | nni.metis_tuner.metis_tuner              |
+   | GPTuner         | tuners     | Yes       | GPTuner              | nni.gp_tuner.gp_tuner                    |
+   | PBTTuner        | tuners     | Yes       | PBTTuner             | nni.pbt_tuner.pbt_tuner                  |
+   | SMAC            | tuners     | No        | SMACTuner            | nni.smac_tuner.smac_tuner                |
+   | PPOTuner        | tuners     | No        | PPOTuner             | nni.ppo_tuner.ppo_tuner                  |
+   | Medianstop      | assessors  | Yes       | MedianstopAssessor   | nni.medianstop_assessor.medianstop_...   |
+   | Curvefitting    | assessors  | Yes       | CurvefittingAssessor | nni.curvefitting_assessor.curvefitt...   |
+   | Hyperband       | advisors   | Yes       | Hyperband            | nni.hyperband_advisor.hyperband_adv...   |
+   | BOHB            | advisors   | Yes       | BOHB                 | nni.bohb_advisor.bohb_advisor            |
+   +-----------------+------------+-----------+----------------------+------------------------------------------+
+
+Uninstall package
+^^^^^^^^^^^^^^^^^
+
+Run following command to uninstall an installed package:
+
+``nnictl package uninstall <builtin name>``
+
+For example:
+
+``nnictl package uninstall demotuner``
diff --git a/docs/en_US/Tutorial/InstallationLinux.rst b/docs/en_US/Tutorial/InstallationLinux.rst
new file mode 100644
index 0000000000..5c23db3adf
--- /dev/null
+++ b/docs/en_US/Tutorial/InstallationLinux.rst
@@ -0,0 +1,185 @@
+Install on Linux & Mac
+======================
+
+Installation
+------------
+
+Installation on Linux and macOS follow the same instructions, given below.
+
+Install NNI through pip
+^^^^^^^^^^^^^^^^^^^^^^^
+
+  Prerequisite: ``python 64-bit >= 3.6``
+
+.. code-block:: bash
+
+     python3 -m pip install --upgrade nni
+
+Install NNI through source code
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+  If you are interested in special or the latest code versions, you can install NNI through source code.
+
+  Prerequisites: ``python 64-bit >=3.6``\ , ``git``\ , ``wget``
+
+.. code-block:: bash
+
+     git clone -b v1.9 https://github.com/Microsoft/nni.git
+     cd nni
+     ./install.sh
+
+Use NNI in a docker image
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+  You can also install NNI in a docker image. Please follow the instructions :githublink:`here <deployment/docker/README.rst>` to build an NNI docker image. The NNI docker image can also be retrieved from Docker Hub through the command ``docker pull msranni/nni:latest``.
+
+Verify installation
+-------------------
+
+The following example is built on TensorFlow 1.x. Make sure **TensorFlow 1.x is used** when running it.
+
+
+* 
+  Download the examples via cloning the source code.
+
+  .. code-block:: bash
+
+     git clone -b v1.9 https://github.com/Microsoft/nni.git
+
+* 
+  Run the MNIST example.
+
+  .. code-block:: bash
+
+     nnictl create --config nni/examples/trials/mnist-tfv1/config.yml
+
+* 
+  Wait for the message ``INFO: Successfully started experiment!`` in the command line. This message indicates that your experiment has been successfully started. You can explore the experiment using the ``Web UI url``.
+
+.. code-block:: text
+
+   INFO: Starting restful server...
+   INFO: Successfully started Restful server!
+   INFO: Setting local config...
+   INFO: Successfully set local config!
+   INFO: Starting experiment...
+   INFO: Successfully started experiment!
+   -----------------------------------------------------------------------
+   The experiment id is egchD4qy
+   The Web UI urls are: http://223.255.255.1:8080   http://127.0.0.1:8080
+   -----------------------------------------------------------------------
+
+   You can use these commands to get more information about the experiment
+   -----------------------------------------------------------------------
+            commands                       description
+   1. nnictl experiment show        show the information of experiments
+   2. nnictl trial ls               list all of trial jobs
+   3. nnictl top                    monitor the status of running experiments
+   4. nnictl log stderr             show stderr log content
+   5. nnictl log stdout             show stdout log content
+   6. nnictl stop                   stop an experiment
+   7. nnictl trial kill             kill a trial job by id
+   8. nnictl --help                 get help information about nnictl
+   -----------------------------------------------------------------------
+
+
+* Open the ``Web UI url`` in your browser, you can view detailed information about the experiment and all the submitted trial jobs as shown below. `Here <../Tutorial/WebUI.rst>`__ are more Web UI pages.
+
+
+.. image:: ../../img/webui_overview_page.png
+   :target: ../../img/webui_overview_page.png
+   :alt: overview
+
+
+
+.. image:: ../../img/webui_trialdetail_page.png
+   :target: ../../img/webui_trialdetail_page.png
+   :alt: detail
+
+
+System requirements
+-------------------
+
+Due to potential programming changes, the minimum system requirements of NNI may change over time.
+
+Linux
+^^^^^
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - 
+     - Recommended
+     - Minimum
+   * - **Operating System**
+     - Ubuntu 16.04 or above
+     -
+   * - **CPU**
+     - Intel® Core™ i5 or AMD Phenom™ II X3 or better
+     - Intel® Core™ i3 or AMD Phenom™ X3 8650
+   * - **GPU**
+     - NVIDIA® GeForce® GTX 660 or better
+     - NVIDIA® GeForce® GTX 460
+   * - **Memory**
+     - 6 GB RAM
+     - 4 GB RAM
+   * - **Storage**
+     - 30 GB available hare drive space
+     -
+   * - **Internet**
+     - Boardband internet connection
+     -
+   * - **Resolution**
+     - 1024 x 768 minimum display resolution
+     -
+
+
+macOS
+^^^^^
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * -
+     - Recommended
+     - Minimum
+   * - **Operating System**
+     - macOS 10.14.1 or above
+     - 
+   * - **CPU**
+     - Intel® Core™ i7-4770 or better
+     - Intel® Core™ i5-760 or better
+   * - **GPU**
+     - AMD Radeon™ R9 M395X or better
+     - NVIDIA® GeForce® GT 750M or AMD Radeon™ R9 M290 or better
+   * - **Memory**
+     - 8 GB RAM
+     - 4 GB RAM
+   * - **Storage**
+     - 70GB available space SSD
+     - 70GB available space 7200 RPM HDD
+   * - **Internet**
+     - Boardband internet connection
+     - 
+   * - **Resolution**
+     - 1024 x 768 minimum display resolution
+     - 
+
+
+Further reading
+---------------
+
+
+* `Overview <../Overview.rst>`__
+* `Use command line tool nnictl <Nnictl.rst>`__
+* `Use NNIBoard <WebUI.rst>`__
+* `Define search space <SearchSpaceSpec.rst>`__
+* `Config an experiment <ExperimentConfig.rst>`__
+* `How to run an experiment on local (with multiple GPUs)? <../TrainingService/LocalMode.rst>`__
+* `How to run an experiment on multiple machines? <../TrainingService/RemoteMachineMode.rst>`__
+* `How to run an experiment on OpenPAI? <../TrainingService/PaiMode.rst>`__
+* `How to run an experiment on Kubernetes through Kubeflow? <../TrainingService/KubeflowMode.rst>`__
+* `How to run an experiment on Kubernetes through FrameworkController? <../TrainingService/FrameworkControllerMode.rst>`__
+* `How to run an experiment on Kubernetes through AdaptDL? <../TrainingService/AdaptDLMode.rst>`__
diff --git a/docs/en_US/Tutorial/InstallationWin.rst b/docs/en_US/Tutorial/InstallationWin.rst
new file mode 100644
index 0000000000..323e16c233
--- /dev/null
+++ b/docs/en_US/Tutorial/InstallationWin.rst
@@ -0,0 +1,215 @@
+Install on Windows
+==================
+
+Prerequires
+-----------
+
+
+* 
+  Python 3.6 (or above) 64-bit. `Anaconda <https://www.anaconda.com/products/individual>`__ or `Miniconda <https://docs.conda.io/en/latest/miniconda.html>`__ is highly recommended to manage multiple Python environments on Windows.
+
+* 
+  If it's a newly installed Python environment, it needs to install `Microsoft C++ Build Tools <https://visualstudio.microsoft.com/visual-cpp-build-tools/>`__ to support build NNI dependencies like ``scikit-learn``.
+
+  .. code-block:: bat
+
+       pip install cython wheel
+
+* 
+  git for verifying installation.
+
+Install NNI
+-----------
+
+In most cases, you can install and upgrade NNI from pip package. It's easy and fast.
+
+If you are interested in special or the latest code versions, you can install NNI through source code.
+
+If you want to contribute to NNI, refer to `setup development environment <SetupNniDeveloperEnvironment.rst>`__.
+
+
+* 
+  From pip package
+
+  .. code-block:: bat
+
+       python -m pip install --upgrade nni
+
+* 
+  From source code
+
+  .. code-block:: bat
+
+       git clone -b v1.9 https://github.com/Microsoft/nni.git
+       cd nni
+       powershell -ExecutionPolicy Bypass -file install.ps1
+
+Verify installation
+-------------------
+
+The following example is built on TensorFlow 1.x. Make sure **TensorFlow 1.x is used** when running it.
+
+
+* 
+  Clone examples within source code.
+
+  .. code-block:: bat
+
+       git clone -b v1.9 https://github.com/Microsoft/nni.git
+
+* 
+  Run the MNIST example.
+
+  .. code-block:: bat
+
+       nnictl create --config nni\examples\trials\mnist-tfv1\config_windows.yml
+
+    Note:  If you are familiar with other frameworks, you can choose corresponding example under ``examples\trials``. It needs to change trial command ``python3`` to ``python`` in each example YAML, since default installation has ``python.exe``\ , not ``python3.exe`` executable.
+
+* 
+  Wait for the message ``INFO: Successfully started experiment!`` in the command line. This message indicates that your experiment has been successfully started. You can explore the experiment using the ``Web UI url``.
+
+.. code-block:: text
+
+   INFO: Starting restful server...
+   INFO: Successfully started Restful server!
+   INFO: Setting local config...
+   INFO: Successfully set local config!
+   INFO: Starting experiment...
+   INFO: Successfully started experiment!
+   -----------------------------------------------------------------------
+   The experiment id is egchD4qy
+   The Web UI urls are: http://223.255.255.1:8080   http://127.0.0.1:8080
+   -----------------------------------------------------------------------
+
+   You can use these commands to get more information about the experiment
+   -----------------------------------------------------------------------
+            commands                       description
+   1. nnictl experiment show        show the information of experiments
+   2. nnictl trial ls               list all of trial jobs
+   3. nnictl top                    monitor the status of running experiments
+   4. nnictl log stderr             show stderr log content
+   5. nnictl log stdout             show stdout log content
+   6. nnictl stop                   stop an experiment
+   7. nnictl trial kill             kill a trial job by id
+   8. nnictl --help                 get help information about nnictl
+   -----------------------------------------------------------------------
+
+
+* Open the ``Web UI url`` in your browser, you can view detailed information about the experiment and all the submitted trial jobs as shown below. `Here <../Tutorial/WebUI.rst>`__ are more Web UI pages.
+
+
+.. image:: ../../img/webui_overview_page.png
+   :target: ../../img/webui_overview_page.png
+   :alt: overview
+
+
+
+.. image:: ../../img/webui_trialdetail_page.png
+   :target: ../../img/webui_trialdetail_page.png
+   :alt: detail
+
+
+System requirements
+-------------------
+
+Below are the minimum system requirements for NNI on Windows, Windows 10.1809 is well tested and recommend. Due to potential programming changes, the minimum system requirements for NNI may change over time.
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * -
+     - Recommended
+     - Minimum
+   * - **Operating System**
+     - Windows 10 1809 or above
+     - 
+   * - **CPU**
+     - Intel® Core™ i5 or AMD Phenom™ II X3 or better
+     - Intel® Core™ i3 or AMD Phenom™ X3 8650
+   * - **GPU**
+     - NVIDIA® GeForce® GTX 660 or better
+     - NVIDIA® GeForce® GTX 460
+   * - **Memory**
+     - 6 GB RAM
+     - 4 GB RAM
+   * - **Storage**
+     - 30 GB available hare drive space
+     - 
+   * - **Internet**
+     - Boardband internet connection
+     - 
+   * - **Resolution**
+     - 1024 x 768 minimum display resolution
+     - 
+
+
+FAQ
+---
+
+simplejson failed when installing NNI
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Make sure a C++ 14.0 compiler is installed.
+
+..
+
+   building 'simplejson._speedups' extension error: [WinError 3] The system cannot find the path specified
+
+
+Trial failed with missing DLL in command line or PowerShell
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This error is caused by missing LIBIFCOREMD.DLL and LIBMMD.DLL and failure to install SciPy. Using Anaconda or Miniconda with Python(64-bit) can solve it.
+
+..
+
+   ImportError: DLL load failed
+
+
+Trial failed on webUI
+^^^^^^^^^^^^^^^^^^^^^
+
+Please check the trial log file stderr for more details.
+
+If there is a stderr file, please check it. Two possible cases are:
+
+
+* forgetting to change the trial command ``python3`` to ``python`` in each experiment YAML.
+* forgetting to install experiment dependencies such as TensorFlow, Keras and so on.
+
+Fail to use BOHB on Windows
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Make sure a C++ 14.0 compiler is installed when trying to run ``nnictl package install --name=BOHB`` to install the dependencies.
+
+Not supported tuner on Windows
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+SMAC is not supported currently; for the specific reason refer to this `GitHub issue <https://github.com/automl/SMAC3/issues/483>`__.
+
+Use Windows as a remote worker
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Refer to `Remote Machine mode <../TrainingService/RemoteMachineMode.rst>`__.
+
+Segmentation fault (core dumped) when installing
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Refer to `FAQ <FAQ.rst>`__.
+
+Further reading
+---------------
+
+
+* `Overview <../Overview.rst>`__
+* `Use command line tool nnictl <Nnictl.rst>`__
+* `Use NNIBoard <WebUI.rst>`__
+* `Define search space <SearchSpaceSpec.rst>`__
+* `Config an experiment <ExperimentConfig.rst>`__
+* `How to run an experiment on local (with multiple GPUs)? <../TrainingService/LocalMode.rst>`__
+* `How to run an experiment on multiple machines? <../TrainingService/RemoteMachineMode.rst>`__
+* `How to run an experiment on OpenPAI? <../TrainingService/PaiMode.rst>`__
+* `How to run an experiment on Kubernetes through Kubeflow? <../TrainingService/KubeflowMode.rst>`__
+* `How to run an experiment on Kubernetes through FrameworkController? <../TrainingService/FrameworkControllerMode.rst>`__
diff --git a/docs/en_US/Tutorial/Nnictl.rst b/docs/en_US/Tutorial/Nnictl.rst
new file mode 100644
index 0000000000..ce25756221
--- /dev/null
+++ b/docs/en_US/Tutorial/Nnictl.rst
@@ -0,0 +1,1643 @@
+.. role:: raw-html(raw)
+   :format: html
+
+
+nnictl
+======
+
+Introduction
+------------
+
+**nnictl** is a command line tool, which can be used to control experiments, such as start/stop/resume an experiment, start/stop NNIBoard, etc.
+
+Commands
+--------
+
+nnictl support commands:
+
+
+* `nnictl create <#create>`__
+* `nnictl resume <#resume>`__
+* `nnictl view <#view>`__
+* `nnictl stop <#stop>`__
+* `nnictl update <#update>`__
+* `nnictl trial <#trial>`__
+* `nnictl top <#top>`__
+* `nnictl experiment <#experiment>`__
+* `nnictl platform <#platform>`__
+* `nnictl config <#config>`__
+* `nnictl log <#log>`__
+* `nnictl webui <#webui>`__
+* `nnictl tensorboard <#tensorboard>`__
+* `nnictl package <#package>`__
+* `nnictl ss_gen <#ss_gen>`__
+* `nnictl --version <#version>`__
+
+Manage an experiment
+^^^^^^^^^^^^^^^^^^^^
+
+:raw-html:`<a name="create"></a>`
+
+nnictl create
+^^^^^^^^^^^^^
+
+
+* 
+  Description
+
+  You can use this command to create a new experiment, using the configuration specified in config file.
+
+  After this command is successfully done, the context will be set as this experiment, which means the following command you issued is associated with this experiment, unless you explicitly changes the context(not supported yet).
+
+* 
+  Usage
+
+  .. code-block:: bash
+
+     nnictl create [OPTIONS]
+
+* 
+  Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - --config, -c
+     - True
+     - 
+     - YAML configure file of the experiment
+   * - --port, -p
+     - False
+     - 
+     - the port of restful server
+   * - --debug, -d
+     - False
+     - 
+     - set debug mode
+   * - --foreground, -f
+     - False
+     - 
+     - set foreground mode, print log content to terminal
+
+
+
+* 
+  Examples
+
+  ..
+
+     create a new experiment with the default port: 8080
+
+
+  .. code-block:: bash
+
+     nnictl create --config nni/examples/trials/mnist-tfv1/config.yml
+
+  ..
+
+     create a new experiment with specified port 8088
+
+
+  .. code-block:: bash
+
+     nnictl create --config nni/examples/trials/mnist-tfv1/config.yml --port 8088
+
+  ..
+
+     create a new experiment with specified port 8088 and debug mode
+
+
+  .. code-block:: bash
+
+     nnictl create --config nni/examples/trials/mnist-tfv1/config.yml --port 8088 --debug
+
+Note:
+
+.. code-block:: text
+
+   Debug mode will disable version check function in Trialkeeper.
+
+:raw-html:`<a name="resume"></a>`
+
+nnictl resume
+^^^^^^^^^^^^^
+
+
+* 
+  Description
+
+  You can use this command to resume a stopped experiment.
+
+* 
+  Usage
+
+  .. code-block:: bash
+
+     nnictl resume [OPTIONS]
+
+* 
+  Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - id
+     - True
+     - 
+     - The id of the experiment you want to resume
+   * - --port, -p
+     - False
+     - 
+     - Rest port of the experiment you want to resume
+   * - --debug, -d
+     - False
+     - 
+     - set debug mode
+   * - --foreground, -f
+     - False
+     - 
+     - set foreground mode, print log content to terminal
+
+
+
+* 
+  Example
+
+  ..
+
+     resume an experiment with specified port 8088
+
+
+  .. code-block:: bash
+
+     nnictl resume [experiment_id] --port 8088
+
+:raw-html:`<a name="view"></a>`
+
+nnictl view
+^^^^^^^^^^^
+
+
+* 
+  Description
+
+  You can use this command to view a stopped experiment.
+
+* 
+  Usage
+
+  .. code-block:: bash
+
+     nnictl view [OPTIONS]
+
+* 
+  Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - id
+     - True
+     - 
+     - The id of the experiment you want to view
+   * - --port, -p
+     - False
+     - 
+     - Rest port of the experiment you want to view
+
+
+
+* 
+  Example
+
+  ..
+
+     view an experiment with specified port 8088
+
+
+  .. code-block:: bash
+
+     nnictl view [experiment_id] --port 8088
+
+:raw-html:`<a name="stop"></a>`
+
+nnictl stop
+^^^^^^^^^^^
+
+
+* 
+  Description
+
+  You can use this command to stop a running experiment or multiple experiments.
+
+* 
+  Usage
+
+  .. code-block:: bash
+
+     nnictl stop [Options]
+
+* 
+  Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - id
+     - False
+     - 
+     - The id of the experiment you want to stop
+   * - --port, -p
+     - False
+     - 
+     - Rest port of the experiment you want to stop
+   * - --all, -a
+     - False
+     - 
+     - Stop all of experiments
+
+
+
+* 
+  Details & Examples
+
+
+  #. 
+     If there is no id specified, and there is an experiment running, stop the running experiment, or print error message.
+
+     .. code-block:: bash
+
+         nnictl stop
+
+  #. 
+     If there is an id specified, and the id matches the running experiment, nnictl will stop the corresponding experiment, or will print error message.
+
+     .. code-block:: bash
+
+         nnictl stop [experiment_id]
+
+  #. 
+     If there is a port specified, and an experiment is running on that port, the experiment will be stopped.
+
+     .. code-block:: bash
+
+         nnictl stop --port 8080
+
+  #. 
+     Users could use 'nnictl stop --all' to stop all experiments.
+
+     .. code-block:: bash
+
+         nnictl stop --all
+
+  #. 
+     If the id ends with \*, nnictl will stop all experiments whose ids matchs the regular.
+
+  #. If the id does not exist but match the prefix of an experiment id, nnictl will stop the matched experiment.
+  #. If the id does not exist but match multiple prefix of the experiment ids, nnictl will give id information.
+
+:raw-html:`<a name="update"></a>`
+
+nnictl update
+^^^^^^^^^^^^^
+
+
+* 
+  **nnictl update searchspace**
+
+
+  * 
+    Description
+
+    You can use this command to update an experiment's search space.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl update searchspace [OPTIONS]
+
+  * 
+    Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - id
+     - False
+     - 
+     - ID of the experiment you want to set
+   * - --filename, -f
+     - True
+     - 
+     - the file storing your new search space
+
+
+
+* 
+  Example
+
+  ``update experiment's new search space with file dir 'examples/trials/mnist-tfv1/search_space.json'``
+
+  .. code-block:: bash
+
+     nnictl update searchspace [experiment_id] --filename examples/trials/mnist-tfv1/search_space.json
+
+
+* 
+  **nnictl update concurrency**
+
+
+  * 
+    Description
+
+     You can use this command to update an experiment's concurrency.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl update concurrency [OPTIONS]
+
+  * 
+    Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - id
+     - False
+     - 
+     - ID of the experiment you want to set
+   * - --value, -v
+     - True
+     - 
+     - the number of allowed concurrent trials
+
+
+
+* 
+  Example
+
+  ..
+
+     update experiment's concurrency
+
+
+  .. code-block:: bash
+
+     nnictl update concurrency [experiment_id] --value [concurrency_number]
+
+
+* 
+  **nnictl update duration**
+
+
+  * 
+    Description
+
+    You can use this command to update an experiment's duration.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl update duration [OPTIONS]
+
+  * 
+    Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - id
+     - False
+     - 
+     - ID of the experiment you want to set
+   * - --value, -v
+     - True
+     - 
+     - Strings like '1m' for one minute or '2h' for two hours. SUFFIX may be 's' for seconds, 'm' for minutes, 'h' for hours or 'd' for days.
+
+
+
+* 
+  Example
+
+  ..
+
+     update experiment's duration
+
+
+  .. code-block:: bash
+
+     nnictl update duration [experiment_id] --value [duration]
+
+
+* 
+  **nnictl update trialnum**
+
+
+  * 
+    Description
+
+    You can use this command to update an experiment's maxtrialnum.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl update trialnum [OPTIONS]
+
+  * 
+    Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - id
+     - False
+     - 
+     - ID of the experiment you want to set
+   * - --value, -v
+     - True
+     - 
+     - the new number of maxtrialnum you want to set
+
+
+
+* 
+  Example
+
+  ..
+
+     update experiment's trial num
+
+
+  .. code-block:: bash
+
+     nnictl update trialnum [experiment_id] --value [trial_num]
+
+:raw-html:`<a name="trial"></a>`
+
+nnictl trial
+^^^^^^^^^^^^
+
+
+* 
+  **nnictl trial ls**
+
+
+  * 
+    Description
+
+    You can use this command to show trial's information. Note that if ``head`` or ``tail`` is set, only complete trials will be listed.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl trial ls
+       nnictl trial ls --head 10
+       nnictl trial ls --tail 10
+
+  * 
+    Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - id
+     - False
+     - 
+     - ID of the experiment you want to set
+   * - --head
+     - False
+     - 
+     - the number of items to be listed with the highest default metric
+   * - --tail
+     - False
+     - 
+     - the number of items to be listed with the lowest default metric
+
+
+
+* 
+  **nnictl trial kill**
+
+
+  * 
+    Description
+
+    You can use this command to kill a trial job.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl trial kill [OPTIONS]
+
+  * 
+    Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - id
+     - False
+     - 
+     - Experiment ID of the trial
+   * - --trial_id, -T
+     - True
+     - 
+     - ID of the trial you want to kill.
+
+
+
+* 
+  Example
+
+  ..
+
+     kill trail job
+
+
+  .. code-block:: bash
+
+     nnictl trial kill [experiment_id] --trial_id [trial_id]
+
+:raw-html:`<a name="top"></a>`
+
+nnictl top
+^^^^^^^^^^
+
+
+* 
+  Description
+
+  Monitor all of running experiments.
+
+* 
+  Usage
+
+  .. code-block:: bash
+
+     nnictl top
+
+* 
+  Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - id
+     - False
+     - 
+     - ID of the experiment you want to set
+   * - --time, -t
+     - False
+     - 
+     - The interval to update the experiment status, the unit of time is second, and the default value is 3 second.
+
+
+:raw-html:`<a name="experiment"></a>`
+
+Manage experiment information
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* 
+  **nnictl experiment show**
+
+
+  * 
+    Description
+
+    Show the information of experiment.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl experiment show
+
+  * 
+    Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - id
+     - False
+     - 
+     - ID of the experiment you want to set
+
+
+
+* 
+  **nnictl experiment status**
+
+
+  * 
+    Description
+
+    Show the status of experiment.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl experiment status
+
+  * 
+    Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - id
+     - False
+     - 
+     - ID of the experiment you want to set
+
+
+
+* 
+  **nnictl experiment list**
+
+
+  * 
+    Description
+
+    Show the information of all the (running) experiments.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl experiment list [OPTIONS]
+
+  * 
+    Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - --all
+     - False
+     - 
+     - list all of experiments
+
+
+
+* 
+  **nnictl experiment delete**
+
+
+  * 
+    Description
+
+    Delete one or all experiments, it includes log, result, environment information and cache. It uses to delete useless experiment result, or save disk space.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl experiment delete [OPTIONS]
+
+  * 
+    Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - id
+     - False
+     - 
+     - ID of the experiment
+   * - --all
+     - False
+     - 
+     - delete all of experiments
+
+
+
+* 
+  **nnictl experiment export**
+
+
+  * 
+    Description
+
+    You can use this command to export reward & hyper-parameter of trial jobs to a csv file.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl experiment export [OPTIONS]
+
+  * 
+    Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - id
+     - False
+     - 
+     - ID of the experiment
+   * - --filename, -f
+     - True
+     - 
+     - File path of the output file
+   * - --type
+     - True
+     - 
+     - Type of output file, only support "csv" and "json"
+   * - --intermediate, -i
+     - False
+     - 
+     - Are intermediate results included
+
+
+
+* 
+  Examples
+
+  ..
+
+     export all trial data in an experiment as json format
+
+
+  .. code-block:: bash
+
+     nnictl experiment export [experiment_id] --filename [file_path] --type json --intermediate
+
+
+* 
+  **nnictl experiment import**
+
+
+  * 
+    Description
+
+    You can use this command to import several prior or supplementary trial hyperparameters & results for NNI hyperparameter tuning. The data are fed to the tuning algorithm (e.g., tuner or advisor).
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl experiment import [OPTIONS]
+
+  * 
+    Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - id
+     - False
+     - 
+     - The id of the experiment you want to import data into
+   * - --filename, -f
+     - True
+     - 
+     - a file with data you want to import in json format
+
+
+
+* 
+  Details
+
+  NNI supports users to import their own data, please express the data in the correct format. An example is shown below:
+
+  .. code-block:: json
+
+     [
+       {"parameter": {"x": 0.5, "y": 0.9}, "value": 0.03},
+       {"parameter": {"x": 0.4, "y": 0.8}, "value": 0.05},
+       {"parameter": {"x": 0.3, "y": 0.7}, "value": 0.04}
+     ]
+
+  Every element in the top level list is a sample. For our built-in tuners/advisors, each sample should have at least two keys: ``parameter`` and ``value``. The ``parameter`` must match this experiment's search space, that is, all the keys (or hyperparameters) in ``parameter`` must match the keys in the search space. Otherwise, tuner/advisor may have unpredictable behavior. ``Value`` should follow the same rule of the input in ``nni.report_final_result``\ , that is, either a number or a dict with a key named ``default``. For your customized tuner/advisor, the file could have any json content depending on how you implement the corresponding methods (e.g., ``import_data``\ ).
+
+  You also can use `nnictl experiment export <#export>`__ to export a valid json file including previous experiment trial hyperparameters and results.
+
+  Currently, following tuner and advisor support import data:
+
+  .. code-block:: yaml
+
+     builtinTunerName: TPE, Anneal, GridSearch, MetisTuner
+     builtinAdvisorName: BOHB
+
+  *If you want to import data to BOHB advisor, user are suggested to add "TRIAL_BUDGET" in parameter as NNI do, otherwise, BOHB will use max_budget as "TRIAL_BUDGET". Here is an example:*
+
+  .. code-block:: json
+
+     [
+       {"parameter": {"x": 0.5, "y": 0.9, "TRIAL_BUDGET": 27}, "value": 0.03}
+     ]
+
+* 
+  Examples
+
+  ..
+
+     import data to a running experiment
+
+
+  .. code-block:: bash
+
+     nnictl experiment import [experiment_id] -f experiment_data.json
+
+
+* 
+  **nnictl experiment save**
+
+
+  * 
+    Description
+
+    Save nni experiment metadata and code data.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl experiment save [OPTIONS]
+
+  * 
+    Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - id
+     - True
+     - 
+     - The id of the experiment you want to save
+   * - --path, -p
+     - False
+     - 
+     - the folder path to store nni experiment data, default current working directory
+   * - --saveCodeDir, -s
+     - False
+     - 
+     - save codeDir data of the experiment, default False
+
+
+
+* 
+  Examples
+
+  ..
+
+     save an expeirment
+
+
+  .. code-block:: bash
+
+     nnictl experiment save [experiment_id] --saveCodeDir
+
+
+* 
+  **nnictl experiment load**
+
+
+  * 
+    Description
+
+    Load an nni experiment.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl experiment load [OPTIONS]
+
+  * 
+    Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - --path, -p
+     - True
+     - 
+     - the file path of nni package
+   * - --codeDir, -c
+     - True
+     - 
+     - the path of codeDir for loaded experiment, this path will also put the code in the loaded experiment package
+   * - --logDir, -l
+     - False
+     - 
+     - the path of logDir for loaded experiment
+   * - --searchSpacePath, -s
+     - True
+     - 
+     - the path of search space file for loaded experiment, this path contains file name. Default in $codeDir/search_space.json
+
+
+
+* 
+  Examples
+
+  ..
+
+     load an expeirment
+
+
+  .. code-block:: bash
+
+     nnictl experiment load --path [path] --codeDir [codeDir]
+
+:raw-html:`<a name="platform"></a>`
+
+Manage platform information
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+* 
+  **nnictl platform clean**
+
+
+  * 
+    Description
+
+    It uses to clean up disk on a target platform. The provided YAML file includes the information of target platform, and it follows the same schema as the NNI configuration file.
+
+  * 
+    Note
+
+    if the target platform is being used by other users, it may cause unexpected errors to others.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl platform clean [OPTIONS]
+
+  * 
+    Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - --config
+     - True
+     - 
+     - the path of yaml config file used when create an experiment
+
+
+:raw-html:`<a name="config"></a>`
+
+nnictl config show
+^^^^^^^^^^^^^^^^^^
+
+
+* 
+  Description
+
+  Display the current context information.
+
+* 
+  Usage
+
+  .. code-block:: bash
+
+     nnictl config show
+
+:raw-html:`<a name="log"></a>`
+
+Manage log
+^^^^^^^^^^
+
+
+* 
+  **nnictl log stdout**
+
+
+  * 
+    Description
+
+    Show the stdout log content.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl log stdout [options]
+
+  * 
+    Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - id
+     - False
+     - 
+     - ID of the experiment you want to set
+   * - --head, -h
+     - False
+     - 
+     - show head lines of stdout
+   * - --tail, -t
+     - False
+     - 
+     - show tail lines of stdout
+   * - --path, -p
+     - False
+     - 
+     - show the path of stdout file
+
+
+
+* 
+  Example
+
+  ..
+
+     Show the tail of stdout log content
+
+
+  .. code-block:: bash
+
+     nnictl log stdout [experiment_id] --tail [lines_number]
+
+
+* 
+  **nnictl log stderr**
+
+
+  * 
+    Description
+
+    Show the stderr log content.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl log stderr [options]
+
+  * 
+    Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - id
+     - False
+     - 
+     - ID of the experiment you want to set
+   * - --head, -h
+     - False
+     - 
+     - show head lines of stderr
+   * - --tail, -t
+     - False
+     - 
+     - show tail lines of stderr
+   * - --path, -p
+     - False
+     - 
+     - show the path of stderr file
+
+
+
+* 
+  **nnictl log trial**
+
+
+  * 
+    Description
+
+    Show trial log path.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl log trial [options]
+
+  * 
+    Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - id
+     - False
+     - 
+     - Experiment ID of the trial
+   * - --trial_id, -T
+     - False
+     - 
+     - ID of the trial to be found the log path, required when id is not empty.
+
+
+:raw-html:`<a name="webui"></a>`
+
+Manage webui
+^^^^^^^^^^^^
+
+
+* 
+  **nnictl webui url**
+
+
+  * 
+    Description
+
+    Show an experiment's webui url
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl webui url [options]
+
+  * 
+    Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - id
+     - False
+     - 
+     - Experiment ID
+
+
+:raw-html:`<a name="tensorboard"></a>`
+
+Manage tensorboard
+^^^^^^^^^^^^^^^^^^
+
+
+* 
+  **nnictl tensorboard start**
+
+
+  * 
+    Description
+
+    Start the tensorboard process.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl tensorboard start
+
+  * 
+    Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - id
+     - False
+     - 
+     - ID of the experiment you want to set
+   * - --trial_id, -T
+     - False
+     - 
+     - ID of the trial
+   * - --port
+     - False
+     - 6006
+     - The port of the tensorboard process
+
+
+
+* 
+  Detail
+
+
+  #. NNICTL support tensorboard function in local and remote platform for the moment, other platforms will be supported later.
+  #. If you want to use tensorboard, you need to write your tensorboard log data to environment variable [NNI_OUTPUT_DIR] path.
+  #. In local mode, nnictl will set --logdir=[NNI_OUTPUT_DIR] directly and start a tensorboard process.
+  #. In remote mode, nnictl will create a ssh client to copy log data from remote machine to local temp directory firstly, and then start a tensorboard process in your local machine. You need to notice that nnictl only copy the log data one time when you use the command, if you want to see the later result of tensorboard, you should execute nnictl tensorboard command again.
+  #. If there is only one trial job, you don't need to set trial id. If there are multiple trial jobs running, you should set the trial id, or you could use [nnictl tensorboard start --trial_id all] to map --logdir to all trial log paths.
+
+
+* 
+  **nnictl tensorboard stop**
+
+
+  * 
+    Description
+
+    Stop all of the tensorboard process.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl tensorboard stop
+
+  * 
+    Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - id
+     - False
+     - 
+     - ID of the experiment you want to set
+
+
+:raw-html:`<a name="package"></a>`
+
+Manage package
+^^^^^^^^^^^^^^
+
+
+* 
+  **nnictl package install**
+
+
+  * 
+    Description
+
+    Install a package (customized algorithms or nni provided algorithms) as builtin tuner/assessor/advisor.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl package install --name <package name>
+
+    The available ``<package name>`` can be checked via ``nnictl package list`` command.
+
+    or
+
+    .. code-block:: bash
+
+       nnictl package install <installation source>
+
+    Reference `Install customized algorithms <InstallCustomizedAlgos.rst>`__ to prepare the installation source.
+
+  * 
+    Example
+
+    ..
+
+       Install SMAC tuner
+
+
+    .. code-block:: bash
+
+       nnictl package install --name SMAC
+
+    ..
+
+       Install a customized tuner
+
+
+    .. code-block:: bash
+
+       nnictl package install nni/examples/tuners/customized_tuner/dist/demo_tuner-0.1-py3-none-any.whl
+
+
+* 
+  **nnictl package show**
+
+
+  * 
+    Description
+
+    Show the detailed information of specified packages.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl package show <package name>
+
+  * 
+    Example
+
+    .. code-block:: bash
+
+       nnictl package show SMAC
+
+* 
+  **nnictl package list**
+
+
+  * 
+    Description
+
+    List the installed/all packages.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl package list [OPTIONS]
+
+  * 
+    Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - --all
+     - False
+     - 
+     - List all packages
+
+
+
+* 
+  Example
+
+  ..
+
+     List installed packages
+
+
+  .. code-block:: bash
+
+     nnictl package list
+
+  ..
+
+     List all packages
+
+
+  .. code-block:: bash
+
+     nnictl package list --all
+
+
+* 
+  **nnictl package uninstall**
+
+
+  * 
+    Description
+
+    Uninstall a package.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl package uninstall <package name>
+
+  * 
+    Example
+    Uninstall SMAC package
+
+    .. code-block:: bash
+
+       nnictl package uninstall SMAC
+
+:raw-html:`<a name="ss_gen"></a>`
+
+Generate search space
+^^^^^^^^^^^^^^^^^^^^^
+
+
+* 
+  **nnictl ss_gen**
+
+
+  * 
+    Description
+
+    Generate search space from user trial code which uses NNI NAS APIs.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl ss_gen [OPTIONS]
+
+  * 
+    Options
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name, shorthand
+     - Required
+     - Default
+     - Description
+   * - --trial_command
+     - True
+     - 
+     - The command of the trial code
+   * - --trial_dir
+     - False
+     - ./
+     - The directory of the trial code
+   * - --file
+     - False
+     - nni_auto_gen_search_space.json
+     - The file for storing generated search space
+
+
+
+* 
+  Example
+
+  ..
+
+     Generate a search space
+
+
+  .. code-block:: bash
+
+     nnictl ss_gen --trial_command="python3 mnist.py" --trial_dir=./ --file=ss.json
+
+:raw-html:`<a name="version"></a>`
+
+Check NNI version
+^^^^^^^^^^^^^^^^^
+
+
+* 
+  **nnictl --version**
+
+
+  * 
+    Description
+
+    Describe the current version of NNI installed.
+
+  * 
+    Usage
+
+    .. code-block:: bash
+
+       nnictl --version
diff --git a/docs/en_US/Tutorial/QuickStart.rst b/docs/en_US/Tutorial/QuickStart.rst
new file mode 100644
index 0000000000..d2ab8366a8
--- /dev/null
+++ b/docs/en_US/Tutorial/QuickStart.rst
@@ -0,0 +1,318 @@
+QuickStart
+==========
+
+Installation
+------------
+
+We currently support Linux, macOS, and Windows. Ubuntu 16.04 or higher, macOS 10.14.1, and Windows 10.1809 are tested and supported. Simply run the following ``pip install`` in an environment that has ``python >= 3.6``.
+
+Linux and macOS
+^^^^^^^^^^^^^^^
+
+.. code-block:: bash
+
+   python3 -m pip install --upgrade nni
+
+Windows
+^^^^^^^
+
+.. code-block:: bash
+
+   python -m pip install --upgrade nni
+
+.. Note:: For Linux and macOS, ``--user`` can be added if you want to install NNI in your home directory; this does not require any special privileges.
+
+.. Note:: If there is an error like ``Segmentation fault``, please refer to the :doc:`FAQ <FAQ>`.
+
+.. Note:: For the system requirements of NNI, please refer to :doc:`Install NNI on Linux & Mac <InstallationLinux>` or :doc:`Windows <InstallationWin>`.
+
+Enable NNI Command-line Auto-Completion (Optional)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+After the installation, you may want to enable the auto-completion feature for **nnictl** commands. Please refer to this `tutorial <../CommunitySharings/AutoCompletion.rst>`__.
+
+"Hello World" example on MNIST
+------------------------------
+
+NNI is a toolkit to help users run automated machine learning experiments. It can automatically do the cyclic process of getting hyperparameters, running trials, testing results, and tuning hyperparameters. Here, we'll show how to use NNI to help you find the optimal hyperparameters for a MNIST model.
+
+Here is an example script to train a CNN on the MNIST dataset **without NNI**\ :
+
+.. code-block:: python
+
+   def run_trial(params):
+       # Input data
+       mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
+       # Build network
+       mnist_network = MnistNetwork(channel_1_num=params['channel_1_num'],
+                                    channel_2_num=params['channel_2_num'],
+                                    conv_size=params['conv_size'],
+                                    hidden_size=params['hidden_size'],
+                                    pool_size=params['pool_size'],
+                                    learning_rate=params['learning_rate'])
+       mnist_network.build_network()
+
+       test_acc = 0.0
+       with tf.Session() as sess:
+           # Train network
+           mnist_network.train(sess, mnist)
+           # Evaluate network
+           test_acc = mnist_network.evaluate(mnist)
+
+   if __name__ == '__main__':
+       params = {'data_dir': '/tmp/tensorflow/mnist/input_data',
+                 'dropout_rate': 0.5,
+                 'channel_1_num': 32,
+                 'channel_2_num': 64,
+                 'conv_size': 5,
+                 'pool_size': 2,
+                 'hidden_size': 1024,
+                 'learning_rate': 1e-4,
+                 'batch_num': 2000,
+                 'batch_size': 32}
+       run_trial(params)
+
+If you want to see the full implementation, please refer to :githublink:`examples/trials/mnist-tfv1/mnist_before.py <examples/trials/mnist-tfv1/mnist_before.py>`.
+
+The above code can only try one set of parameters at a time; if we want to tune learning rate, we need to manually modify the hyperparameter and start the trial again and again.
+
+NNI is born to help the user do tuning jobs; the NNI working process is presented below:
+
+.. code-block:: text
+
+   input: search space, trial code, config file
+   output: one optimal hyperparameter configuration
+
+   1: For t = 0, 1, 2, ..., maxTrialNum,
+   2:      hyperparameter = chose a set of parameter from search space
+   3:      final result = run_trial_and_evaluate(hyperparameter)
+   4:      report final result to NNI
+   5:      If reach the upper limit time,
+   6:          Stop the experiment
+   7: return hyperparameter value with best final result
+
+If you want to use NNI to automatically train your model and find the optimal hyper-parameters, you need to do three changes based on your code:
+
+Three steps to start an experiment
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+**Step 1**\ : Write a ``Search Space`` file in JSON, including the ``name`` and the ``distribution`` (discrete-valued or continuous-valued) of all the hyperparameters you need to search.
+
+.. code-block:: diff
+
+   -   params = {'data_dir': '/tmp/tensorflow/mnist/input_data', 'dropout_rate': 0.5, 'channel_1_num': 32, 'channel_2_num': 64,
+   -   'conv_size': 5, 'pool_size': 2, 'hidden_size': 1024, 'learning_rate': 1e-4, 'batch_num': 2000, 'batch_size': 32}
+   + {
+   +     "dropout_rate":{"_type":"uniform","_value":[0.5, 0.9]},
+   +     "conv_size":{"_type":"choice","_value":[2,3,5,7]},
+   +     "hidden_size":{"_type":"choice","_value":[124, 512, 1024]},
+   +     "batch_size": {"_type":"choice", "_value": [1, 4, 8, 16, 32]},
+   +     "learning_rate":{"_type":"choice","_value":[0.0001, 0.001, 0.01, 0.1]}
+   + }
+
+*Example:* :githublink:`search_space.json <examples/trials/mnist-tfv1/search_space.json>`
+
+**Step 2**\ : Modify your ``Trial`` file to get the hyperparameter set from NNI and report the final result to NNI.
+
+.. code-block:: diff
+
+   + import nni
+
+     def run_trial(params):
+         mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
+
+         mnist_network = MnistNetwork(channel_1_num=params['channel_1_num'], channel_2_num=params['channel_2_num'], conv_size=params['conv_size'], hidden_size=params['hidden_size'], pool_size=params['pool_size'], learning_rate=params['learning_rate'])
+         mnist_network.build_network()
+
+         with tf.Session() as sess:
+             mnist_network.train(sess, mnist)
+             test_acc = mnist_network.evaluate(mnist)
+   +         nni.report_final_result(test_acc)
+
+     if __name__ == '__main__':
+   -     params = {'data_dir': '/tmp/tensorflow/mnist/input_data', 'dropout_rate': 0.5, 'channel_1_num': 32, 'channel_2_num': 64,
+   -     'conv_size': 5, 'pool_size': 2, 'hidden_size': 1024, 'learning_rate': 1e-4, 'batch_num': 2000, 'batch_size': 32}
+   +     params = nni.get_next_parameter()
+         run_trial(params)
+
+*Example:* :githublink:`mnist.py <examples/trials/mnist-tfv1/mnist.py>`
+
+**Step 3**\ : Define a ``config`` file in YAML which declares the ``path`` to the search space and trial files. It also gives other information such as the tuning algorithm, max trial number, and max duration arguments.
+
+.. code-block:: yaml
+
+   authorName: default
+   experimentName: example_mnist
+   trialConcurrency: 1
+   maxExecDuration: 1h
+   maxTrialNum: 10
+   trainingServicePlatform: local
+   # The path to Search Space
+   searchSpacePath: search_space.json
+   useAnnotation: false
+   tuner:
+     builtinTunerName: TPE
+   # The path and the running command of trial
+   trial:
+     command: python3 mnist.py
+     codeDir: .
+     gpuNum: 0
+
+.. Note:: If you are planning to use remote machines or clusters as your :doc:`training service <../TrainingService/Overview>`, to avoid too much pressure on network, we limit the number of files to 2000 and total size to 300MB. If your codeDir contains too many files, you can choose which files and subfolders should be excluded by adding a ``.nniignore`` file that works like a ``.gitignore`` file. For more details on how to write this file, see the `git documentation <https://git-scm.com/docs/gitignore#_pattern_format>`__.
+
+*Example:* :githublink:`config.yml <examples/trials/mnist-tfv1/config.yml>` :githublink:`.nniignore <examples/trials/mnist-tfv1/.nniignore>`
+
+All the code above is already prepared and stored in :githublink:`examples/trials/mnist-tfv1/ <examples/trials/mnist-tfv1>`.
+
+Linux and macOS
+^^^^^^^^^^^^^^^
+
+Run the **config.yml** file from your command line to start an MNIST experiment.
+
+.. code-block:: bash
+
+   nnictl create --config nni/examples/trials/mnist-tfv1/config.yml
+
+Windows
+^^^^^^^
+
+Run the **config_windows.yml** file from your command line to start an MNIST experiment.
+
+.. code-block:: bash
+
+   nnictl create --config nni\examples\trials\mnist-tfv1\config_windows.yml
+
+.. Note:: If you're using NNI on Windows, you probably need to change ``python3`` to ``python`` in the config.yml file or use the config_windows.yml file to start the experiment.
+
+.. Note:: ``nnictl`` is a command line tool that can be used to control experiments, such as start/stop/resume an experiment, start/stop NNIBoard, etc. Click :doc:`here <Nnictl>` for more usage of ``nnictl``.
+
+Wait for the message ``INFO: Successfully started experiment!`` in the command line. This message indicates that your experiment has been successfully started. And this is what we expect to get:
+
+.. code-block:: text
+
+   INFO: Starting restful server...
+   INFO: Successfully started Restful server!
+   INFO: Setting local config...
+   INFO: Successfully set local config!
+   INFO: Starting experiment...
+   INFO: Successfully started experiment!
+   -----------------------------------------------------------------------
+   The experiment id is egchD4qy
+   The Web UI urls are: [Your IP]:8080
+   -----------------------------------------------------------------------
+
+   You can use these commands to get more information about the experiment
+   -----------------------------------------------------------------------
+            commands                       description
+   1. nnictl experiment show        show the information of experiments
+   2. nnictl trial ls               list all of trial jobs
+   3. nnictl top                    monitor the status of running experiments
+   4. nnictl log stderr             show stderr log content
+   5. nnictl log stdout             show stdout log content
+   6. nnictl stop                   stop an experiment
+   7. nnictl trial kill             kill a trial job by id
+   8. nnictl --help                 get help information about nnictl
+   -----------------------------------------------------------------------
+
+If you prepared ``trial``\ , ``search space``\ , and ``config`` according to the above steps and successfully created an NNI job, NNI will automatically tune the optimal hyper-parameters and run different hyper-parameter sets for each trial according to the requirements you set. You can clearly see its progress through the NNI WebUI.
+
+WebUI
+-----
+
+After you start your experiment in NNI successfully, you can find a message in the command-line interface that tells you the ``Web UI url`` like this:
+
+.. code-block:: text
+
+   The Web UI urls are: [Your IP]:8080
+
+Open the ``Web UI url`` (Here it's: ``[Your IP]:8080``\ ) in your browser; you can view detailed information about the experiment and all the submitted trial jobs as shown below. If you cannot open the WebUI link in your terminal, please refer to the `FAQ <FAQ.rst>`__.
+
+View summary page
+^^^^^^^^^^^^^^^^^
+
+Click the "Overview" tab.
+
+Information about this experiment will be shown in the WebUI, including the experiment trial profile and search space message. NNI also supports downloading this information and the parameters through the **Download** button. You can download the experiment results anytime while the experiment is running, or you can wait until the end of the execution, etc.
+
+
+.. image:: ../../img/QuickStart1.png
+   :target: ../../img/QuickStart1.png
+   :alt: 
+
+
+The top 10 trials will be listed on the Overview page. You can browse all the trials on the "Trials Detail" page.
+
+
+.. image:: ../../img/QuickStart2.png
+   :target: ../../img/QuickStart2.png
+   :alt: 
+
+
+View trials detail page
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Click the "Default Metric" tab to see the point graph of all trials. Hover to see specific default metrics and search space messages.
+
+
+.. image:: ../../img/QuickStart3.png
+   :target: ../../img/QuickStart3.png
+   :alt: 
+
+
+Click the "Hyper Parameter" tab to see the parallel graph.
+
+
+* You can select the percentage to see the top trials.
+* Choose two axis to swap their positions.
+
+
+.. image:: ../../img/QuickStart4.png
+   :target: ../../img/QuickStart4.png
+   :alt: 
+
+
+Click the "Trial Duration" tab to see the bar graph.
+
+
+.. image:: ../../img/QuickStart5.png
+   :target: ../../img/QuickStart5.png
+   :alt: 
+
+
+Below is the status of all trials. Specifically:
+
+
+* Trial detail: trial's id, duration, start time, end time, status, accuracy, and search space file.
+* If you run on the OpenPAI platform, you can also see the hdfsLogPath.
+* Kill: you can kill a job that has the ``Running`` status.
+* Support: Used to search for a specific trial.
+
+
+.. image:: ../../img/QuickStart6.png
+   :target: ../../img/QuickStart6.png
+   :alt: 
+
+
+
+* Intermediate Result Graph
+
+
+.. image:: ../../img/QuickStart7.png
+   :target: ../../img/QuickStart7.png
+   :alt: 
+
+
+Related Topic
+-------------
+
+
+* `Try different Tuners <../Tuner/BuiltinTuner.rst>`__
+* `Try different Assessors <../Assessor/BuiltinAssessor.rst>`__
+* `How to use command line tool nnictl <Nnictl.rst>`__
+* `How to write a trial <../TrialExample/Trials.rst>`__
+* `How to run an experiment on local (with multiple GPUs)? <../TrainingService/LocalMode.rst>`__
+* `How to run an experiment on multiple machines? <../TrainingService/RemoteMachineMode.rst>`__
+* `How to run an experiment on OpenPAI? <../TrainingService/PaiMode.rst>`__
+* `How to run an experiment on Kubernetes through Kubeflow? <../TrainingService/KubeflowMode.rst>`__
+* `How to run an experiment on Kubernetes through FrameworkController? <../TrainingService/FrameworkControllerMode.rst>`__
+* `How to run an experiment on Kubernetes through AdaptDL? <../TrainingService/AdaptDLMode.rst>`__
diff --git a/docs/en_US/Tutorial/SearchSpaceSpec.rst b/docs/en_US/Tutorial/SearchSpaceSpec.rst
new file mode 100644
index 0000000000..881b99a3a6
--- /dev/null
+++ b/docs/en_US/Tutorial/SearchSpaceSpec.rst
@@ -0,0 +1,258 @@
+.. role:: raw-html(raw)
+   :format: html
+
+Search Space
+============
+
+Overview
+--------
+
+In NNI, tuner will sample parameters/architecture according to the search space, which is defined as a json file.
+
+To define a search space, users should define the name of the variable, the type of sampling strategy and its parameters.
+
+
+* An example of a search space definition is as follow:
+
+.. code-block:: yaml
+
+   {
+       "dropout_rate": {"_type": "uniform", "_value": [0.1, 0.5]},
+       "conv_size": {"_type": "choice", "_value": [2, 3, 5, 7]},
+       "hidden_size": {"_type": "choice", "_value": [124, 512, 1024]},
+       "batch_size": {"_type": "choice", "_value": [50, 250, 500]},
+       "learning_rate": {"_type": "uniform", "_value": [0.0001, 0.1]}
+   }
+
+Take the first line as an example. ``dropout_rate`` is defined as a variable whose priori distribution is a uniform distribution with a range from ``0.1`` to ``0.5``.
+
+Note that the available sampling strategies within a search space depend on the tuner you want to use. We list the supported types for each builtin tuner below. For a customized tuner, you don't have to follow our convention and you will have the flexibility to define any type you want.
+
+Types
+-----
+
+All types of sampling strategies and their parameter are listed here:
+
+
+* 
+  ``{"_type": "choice", "_value": options}``
+
+
+  * The variable's value is one of the options. Here ``options`` should be a list of numbers or a list of strings. Using arbitrary objects as members of this list (like sublists, a mixture of numbers and strings, or null values) should work in most cases, but may trigger undefined behaviors.
+  * ``options`` can also be a nested sub-search-space, this sub-search-space takes effect only when the corresponding element is chosen. The variables in this sub-search-space can be seen as conditional variables. Here is an simple :githublink:`example of nested search space definition <examples/trials/mnist-nested-search-space/search_space.json>`. If an element in the options list is a dict, it is a sub-search-space, and for our built-in tuners you have to add a ``_name`` key in this dict, which helps you to identify which element is chosen. Accordingly, here is a :githublink:`sample <examples/trials/mnist-nested-search-space/sample.json>` which users can get from nni with nested search space definition. See the table below for the tuners which support nested search spaces.
+
+* 
+  ``{"_type": "randint", "_value": [lower, upper]}``
+
+
+  * Choosing a random integer between ``lower`` (inclusive) and ``upper`` (exclusive).
+  * Note: Different tuners may interpret ``randint`` differently. Some (e.g., TPE, GridSearch) treat integers from lower
+    to upper as unordered ones, while others respect the ordering (e.g., SMAC). If you want all the tuners to respect
+    the ordering, please use ``quniform`` with ``q=1``.
+
+* 
+  ``{"_type": "uniform", "_value": [low, high]}``
+
+
+  * The variable value is uniformly sampled between low and high.
+  * When optimizing, this variable is constrained to a two-sided interval.
+
+* 
+  ``{"_type": "quniform", "_value": [low, high, q]}``
+
+
+  * The variable value is determined using ``clip(round(uniform(low, high) / q) * q, low, high)``\ , where the clip operation is used to constrain the generated value within the bounds. For example, for ``_value`` specified as [0, 10, 2.5], possible values are [0, 2.5, 5.0, 7.5, 10.0]; For ``_value`` specified as [2, 10, 5], possible values are [2, 5, 10].
+  * Suitable for a discrete value with respect to which the objective is still somewhat "smooth", but which should be bounded both above and below. If you want to uniformly choose an integer from a range [low, high], you can write ``_value`` like this: ``[low, high, 1]``.
+
+* 
+  ``{"_type": "loguniform", "_value": [low, high]}``
+
+
+  * The variable value is drawn from a range [low, high] according to a loguniform distribution like exp(uniform(log(low), log(high))), so that the logarithm of the return value is uniformly distributed.
+  * When optimizing, this variable is constrained to be positive.
+
+* 
+  ``{"_type": "qloguniform", "_value": [low, high, q]}``
+
+
+  * The variable value is determined using ``clip(round(loguniform(low, high) / q) * q, low, high)``\ , where the clip operation is used to constrain the generated value within the bounds.
+  * Suitable for a discrete variable with respect to which the objective is "smooth" and gets smoother with the size of the value, but which should be bounded both above and below.
+
+* 
+  ``{"_type": "normal", "_value": [mu, sigma]}``
+
+
+  * The variable value is a real value that's normally-distributed with mean mu and standard deviation sigma. When optimizing, this is an unconstrained variable.
+
+* 
+  ``{"_type": "qnormal", "_value": [mu, sigma, q]}``
+
+
+  * The variable value is determined using ``round(normal(mu, sigma) / q) * q``
+  * Suitable for a discrete variable that probably takes a value around mu, but is fundamentally unbounded.
+
+* 
+  ``{"_type": "lognormal", "_value": [mu, sigma]}``
+
+
+  * The variable value is drawn according to ``exp(normal(mu, sigma))`` so that the logarithm of the return value is normally distributed. When optimizing, this variable is constrained to be positive.
+
+* 
+  ``{"_type": "qlognormal", "_value": [mu, sigma, q]}``
+
+
+  * The variable value is determined using ``round(exp(normal(mu, sigma)) / q) * q``
+  * Suitable for a discrete variable with respect to which the objective is smooth and gets smoother with the size of the variable, which is bounded from one side.
+
+Search Space Types Supported by Each Tuner
+------------------------------------------
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - 
+     - choice
+     - choice(nested)
+     - randint
+     - uniform
+     - quniform
+     - loguniform
+     - qloguniform
+     - normal
+     - qnormal
+     - lognormal
+     - qlognormal
+   * - TPE Tuner
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+   * - Random Search Tuner
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+   * - Anneal Tuner
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+   * - Evolution Tuner
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+   * - SMAC Tuner
+     - :raw-html:`&#10003;`
+     - 
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - 
+     - 
+     - 
+     - 
+     - 
+   * - Batch Tuner
+     - :raw-html:`&#10003;`
+     - 
+     - 
+     - 
+     - 
+     - 
+     - 
+     - 
+     - 
+     - 
+     - 
+   * - Grid Search Tuner
+     - :raw-html:`&#10003;`
+     - 
+     - :raw-html:`&#10003;`
+     - 
+     - :raw-html:`&#10003;`
+     - 
+     - 
+     - 
+     - 
+     - 
+     - 
+   * - Hyperband Advisor
+     - :raw-html:`&#10003;`
+     - 
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+   * - Metis Tuner
+     - :raw-html:`&#10003;`
+     - 
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - 
+     - 
+     - 
+     - 
+     - 
+     - 
+   * - GP Tuner
+     - :raw-html:`&#10003;`
+     - 
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - 
+     - 
+     - 
+     - 
+
+
+Known Limitations:
+
+
+* 
+  GP Tuner and Metis Tuner support only **numerical values** in search space (\ ``choice`` type values can be no-numerical with other tuners, e.g. string values). Both GP Tuner and Metis Tuner use Gaussian Process Regressor(GPR). GPR make predictions based on a kernel function and the 'distance' between different points, it's hard to get the true distance between no-numerical values.
+
+* 
+  Note that for nested search space:
+
+
+  * Only Random Search/TPE/Anneal/Evolution tuner supports nested search space
diff --git a/docs/en_US/Tutorial/SetupNniDeveloperEnvironment.rst b/docs/en_US/Tutorial/SetupNniDeveloperEnvironment.rst
new file mode 100644
index 0000000000..763486c5cc
--- /dev/null
+++ b/docs/en_US/Tutorial/SetupNniDeveloperEnvironment.rst
@@ -0,0 +1,72 @@
+Setup NNI development environment
+=================================
+
+NNI development environment supports Ubuntu 1604 (or above), and Windows 10 with Python3 64bit.
+
+Installation
+------------
+
+The installation steps are similar with installing from source code. But the installation links to code directory, so that code changes can be applied to installation as easy as possible.
+
+1. Clone source code
+^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: bash
+
+   git clone https://github.com/Microsoft/nni.git
+
+Note, if you want to contribute code back, it needs to fork your own NNI repo, and clone from there.
+
+2. Install from source code
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Ubuntu
+^^^^^^
+
+.. code-block:: bash
+
+   make dev-easy-install
+
+Windows
+^^^^^^^
+
+.. code-block:: bat
+
+   powershell -ExecutionPolicy Bypass -file install.ps1 -Development
+
+3. Check if the environment is ready
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Now, you can try to start an experiment to check if your environment is ready.
+For example, run the command
+
+.. code-block:: bash
+
+   nnictl create --config examples/trials/mnist-tfv1/config.yml
+
+And open WebUI to check if everything is OK
+
+4. Reload changes
+^^^^^^^^^^^^^^^^^
+
+Python
+^^^^^^
+
+Nothing to do, the code is already linked to package folders.
+
+TypeScript
+^^^^^^^^^^
+
+
+* If ``src/nni_manager`` is changed, run ``yarn watch`` under this folder. It will watch and build code continually. The ``nnictl`` need to be restarted to reload NNI manager.
+* If ``src/webui`` is changed, run ``yarn dev``\ , which will run a mock API server and a webpack dev server simultaneously. Use ``EXPERIMENT`` environment variable (e.g., ``mnist-tfv1-running``\ ) to specify the mock data being used. Built-in mock experiments are listed in ``src/webui/mock``. An example of the full command is ``EXPERIMENT=mnist-tfv1-running yarn dev``.
+* If ``src/nasui`` is changed, run ``yarn start`` under the corresponding folder. The web UI will refresh automatically if code is changed. There is also a mock API server that is useful when developing. It can be launched via ``node server.js``.
+
+5. Submit Pull Request
+^^^^^^^^^^^^^^^^^^^^^^
+
+All changes are merged to master branch from your forked repo. The description of Pull Request must be meaningful, and useful.
+
+We will review the changes as soon as possible. Once it passes review, we will merge it to master branch.
+
+For more contribution guidelines and coding styles, you can refer to the `contributing document <Contributing.rst>`__.
diff --git a/docs/en_US/Tutorial/WebUI.rst b/docs/en_US/Tutorial/WebUI.rst
new file mode 100644
index 0000000000..e57dec85d1
--- /dev/null
+++ b/docs/en_US/Tutorial/WebUI.rst
@@ -0,0 +1,210 @@
+WebUI
+=====
+
+View summary page
+-----------------
+
+Click the tab "Overview".
+
+
+* On the overview tab, you can see the experiment information and status and the performance of top trials. If you want to see config and search space, please click the right button "Config" and "Search space".
+
+
+.. image:: ../../img/webui-img/full-oview.png
+   :target: ../../img/webui-img/full-oview.png
+   :alt: 
+
+
+
+* If your experiment has many trials, you can change the refresh interval here.
+
+
+.. image:: ../../img/webui-img/refresh-interval.png
+   :target: ../../img/webui-img/refresh-interval.png
+   :alt: 
+
+
+
+* You can review and download the experiment results and nni-manager/dispatcher log files from the "Download" button.
+
+
+.. image:: ../../img/webui-img/download.png
+   :target: ../../img/webui-img/download.png
+   :alt: 
+
+
+
+* You can change some experiment configurations such as maxExecDuration, maxTrialNum and trial concurrency on here.
+
+
+.. image:: ../../img/webui-img/edit-experiment-param.png
+   :target: ../../img/webui-img/edit-experiment-param.png
+   :alt: 
+
+
+
+* You can click the exclamation point in the error box to see a log message if the experiment's status is an error.
+
+
+.. image:: ../../img/webui-img/log-error.png
+   :target: ../../img/webui-img/log-error.png
+   :alt: 
+
+
+.. image:: ../../img/webui-img/review-log.png
+   :target: ../../img/webui-img/review-log.png
+   :alt: 
+
+
+
+* You can click "About" to see the version and report any questions.
+
+View job default metric
+-----------------------
+
+
+* Click the tab "Default Metric" to see the point graph of all trials. Hover to see its specific default metric and search space message.
+
+
+.. image:: ../../img/webui-img/default-metric.png
+   :target: ../../img/webui-img/default-metric.png
+   :alt: 
+
+
+
+* Click the switch named "optimization curve" to see the experiment's optimization curve.
+
+
+.. image:: ../../img/webui-img/best-curve.png
+   :target: ../../img/webui-img/best-curve.png
+   :alt: 
+
+
+View hyper parameter
+--------------------
+
+Click the tab "Hyper Parameter" to see the parallel graph.
+
+
+* You can add/remove axes and drag to swap axes on the chart.
+* You can select the percentage to see top trials.
+
+
+.. image:: ../../img/webui-img/hyperPara.png
+   :target: ../../img/webui-img/hyperPara.png
+   :alt: 
+
+
+View Trial Duration
+-------------------
+
+Click the tab "Trial Duration" to see the bar graph.
+
+
+.. image:: ../../img/webui-img/trial_duration.png
+   :target: ../../img/webui-img/trial_duration.png
+   :alt: 
+
+
+View Trial Intermediate Result Graph
+------------------------------------
+
+Click the tab "Intermediate Result" to see the line graph.
+
+
+.. image:: ../../img/webui-img/trials_intermeidate.png
+   :target: ../../img/webui-img/trials_intermeidate.png
+   :alt: 
+
+
+The trial may have many intermediate results in the training process. In order to see the trend of some trials more clearly, we set a filtering function for the intermediate result graph.
+
+You may find that these trials will get better or worse at an intermediate result. This indicates that it is an important and relevant intermediate result. To take a closer look at the point here, you need to enter its corresponding X-value at #Intermediate. Then input the range of metrics on this intermedia result. In the picture below, we choose the No. 4 intermediate result and set the range of metrics to 0.8-1.
+
+
+.. image:: ../../img/webui-img/filter-intermediate.png
+   :target: ../../img/webui-img/filter-intermediate.png
+   :alt: 
+
+
+View trials status
+------------------
+
+Click the tab "Trials Detail" to see the status of all trials. Specifically:
+
+
+* Trial detail: trial's id, trial's duration, start time, end time, status, accuracy, and search space file.
+
+
+.. image:: ../../img/webui-img/detail-local.png
+   :target: ../../img/webui-img/detail-local.png
+   :alt: 
+
+
+
+* The button named "Add column" can select which column to show on the table. If you run an experiment whose final result is a dict, you can see other keys in the table. You can choose the column "Intermediate count" to watch the trial's progress.
+
+
+.. image:: ../../img/webui-img/addColumn.png
+   :target: ../../img/webui-img/addColumn.png
+   :alt: 
+
+
+
+* If you want to compare some trials, you can select them and then click "Compare" to see the results.
+
+
+.. image:: ../../img/webui-img/select-trial.png
+   :target: ../../img/webui-img/select-trial.png
+   :alt: 
+
+
+.. image:: ../../img/webui-img/compare.png
+   :target: ../../img/webui-img/compare.png
+   :alt: 
+
+
+
+* Support to search for a specific trial by it's id, status, Trial No. and parameters.
+
+
+.. image:: ../../img/webui-img/search-trial.png
+   :target: ../../img/webui-img/search-trial.png
+   :alt: 
+
+
+
+* You can use the button named "Copy as python" to copy the trial's parameters.
+
+
+.. image:: ../../img/webui-img/copyParameter.png
+   :target: ../../img/webui-img/copyParameter.png
+   :alt: 
+
+
+
+* If you run on the OpenPAI or Kubeflow platform, you can also see the nfs log.
+
+
+.. image:: ../../img/webui-img/detail-pai.png
+   :target: ../../img/webui-img/detail-pai.png
+   :alt: 
+
+
+
+* Intermediate Result Graph: you can see the default metric in this graph by clicking the intermediate button.
+
+
+.. image:: ../../img/webui-img/intermediate.png
+   :target: ../../img/webui-img/intermediate.png
+   :alt: 
+
+
+
+* Kill: you can kill a job that status is running.
+
+
+.. image:: ../../img/webui-img/kill-running.png
+   :target: ../../img/webui-img/kill-running.png
+   :alt: 
+
diff --git a/docs/en_US/autotune_ref.rst b/docs/en_US/autotune_ref.rst
new file mode 100644
index 0000000000..175fde3e70
--- /dev/null
+++ b/docs/en_US/autotune_ref.rst
@@ -0,0 +1,80 @@
+Python API Reference of Auto Tune
+=================================
+
+.. contents::
+
+Trial
+-----
+
+..  autofunction:: nni.get_next_parameter
+..  autofunction:: nni.get_current_parameter
+..  autofunction:: nni.report_intermediate_result
+..  autofunction:: nni.report_final_result
+..  autofunction:: nni.get_experiment_id
+..  autofunction:: nni.get_trial_id
+..  autofunction:: nni.get_sequence_id
+
+Tuner
+-----
+
+..  autoclass:: nni.tuner.Tuner
+    :members:
+
+..  autoclass:: nni.algorithms.hpo.hyperopt_tuner.hyperopt_tuner.HyperoptTuner
+    :members:
+
+..  autoclass:: nni.algorithms.hpo.evolution_tuner.evolution_tuner.EvolutionTuner
+    :members:
+
+..  autoclass:: nni.algorithms.hpo.smac_tuner.SMACTuner
+    :members:
+
+..  autoclass:: nni.algorithms.hpo.gridsearch_tuner.GridSearchTuner
+    :members:
+
+..  autoclass:: nni.algorithms.hpo.networkmorphism_tuner.networkmorphism_tuner.NetworkMorphismTuner
+    :members:
+
+..  autoclass:: nni.algorithms.hpo.metis_tuner.metis_tuner.MetisTuner
+    :members:
+
+..  autoclass:: nni.algorithms.hpo.ppo_tuner.PPOTuner
+    :members:
+
+..  autoclass:: nni.algorithms.hpo.batch_tuner.batch_tuner.BatchTuner
+    :members:
+
+..  autoclass:: nni.algorithms.hpo.gp_tuner.gp_tuner.GPTuner
+    :members:
+
+Assessor
+--------
+
+..  autoclass:: nni.assessor.Assessor
+    :members:
+
+..  autoclass:: nni.assessor.AssessResult
+    :members:
+
+..  autoclass:: nni.algorithms.hpo.curvefitting_assessor.CurvefittingAssessor
+    :members:
+
+..  autoclass:: nni.algorithms.hpo.medianstop_assessor.MedianstopAssessor
+    :members:
+
+Advisor
+-------
+
+..  autoclass:: nni.runtime.msg_dispatcher_base.MsgDispatcherBase
+    :members:
+
+..  autoclass:: nni.algorithms.hpo.hyperband_advisor.hyperband_advisor.Hyperband
+    :members:
+
+..  autoclass:: nni.algorithms.hpo.bohb_advisor.bohb_advisor.BOHB
+    :members:
+
+Utilities
+---------
+
+..  autofunction:: nni.utils.merge_parameter
diff --git a/docs/en_US/conf.py b/docs/en_US/conf.py
index 01ed9b6cc8..83df32c912 100644
--- a/docs/en_US/conf.py
+++ b/docs/en_US/conf.py
@@ -12,12 +12,10 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
-from recommonmark.transform import AutoStructify
-from recommonmark.parser import CommonMarkParser
 import os
+import subprocess
 import sys
-sys.path.insert(0, os.path.abspath('../../src/sdk/pynni'))
-sys.path.insert(1, os.path.abspath('../../src/sdk/pycli'))
+sys.path.insert(0, os.path.abspath('../..'))
 
 
 # -- Project information ---------------------------------------------------
@@ -43,12 +41,12 @@
 extensions = [
     'sphinx.ext.autodoc',
     'sphinx.ext.mathjax',
-    'sphinx_markdown_tables',
     'sphinxarg.ext',
     'sphinx.ext.napoleon',
     'sphinx.ext.viewcode',
     'sphinx.ext.intersphinx',
     'nbsphinx',
+    'sphinx.ext.extlinks',
 ]
 
 # Add mock modules
@@ -59,12 +57,7 @@
 
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
-#
-source_parsers = {
-    '.md': CommonMarkParser
-}
-
-source_suffix = ['.rst', '.md']
+source_suffix = ['.rst']
 
 # The master toctree document.
 master_doc = 'contents'
@@ -197,12 +190,14 @@
 # A list of files that should not be packed into the epub file.
 epub_exclude_files = ['search.html']
 
+# external links (for github code)
+# Reference the code via :githublink:`path/to/your/example/code.py`
+git_commit_id = subprocess.check_output(['git', 'rev-parse', 'HEAD']).decode().strip()
+
+extlinks = {
+    'githublink': ('https://github.com/microsoft/nni/blob/' + git_commit_id + '/%s', 'Github link: ')
+}
 
 # -- Extension configuration -------------------------------------------------
 def setup(app):
-    app.add_config_value('recommonmark_config', {
-        'enable_eval_rst': True,
-        'enable_auto_toc_tree': False,
-    }, True)
-    app.add_transform(AutoStructify)
     app.add_stylesheet('css/custom.css')
diff --git a/docs/en_US/contents.rst b/docs/en_US/contents.rst
index 2a2de0dc21..a7ee1fb0d4 100644
--- a/docs/en_US/contents.rst
+++ b/docs/en_US/contents.rst
@@ -17,7 +17,7 @@ Neural Network Intelligence
     Feature Engineering <feature_engineering>
     References <reference>
     Use Cases and Solutions <CommunitySharings/community_sharings>
-    Research and Publications <ResearchPublications.md>
+    Research and Publications <ResearchPublications>
     FAQ <Tutorial/FAQ>
     How to Contribute <contribution>
     Changelog <Release>
diff --git a/docs/en_US/hpo_advanced.rst b/docs/en_US/hpo_advanced.rst
index 8b85d4edaf..15b33dd813 100644
--- a/docs/en_US/hpo_advanced.rst
+++ b/docs/en_US/hpo_advanced.rst
@@ -9,4 +9,4 @@ Advanced Features
     Write a New Advisor <Tuner/CustomizeAdvisor>
     Write a New Training Service <TrainingService/HowToImplementTrainingService>
     Install Customized Algorithms as Builtin Tuners/Assessors/Advisors <Tutorial/InstallCustomizedAlgos>
-    How to install customized tuner as a builtin tuner <Tuner/InstallCustomizedTuner.md>
+    How to install customized tuner as a builtin tuner <Tuner/InstallCustomizedTuner>
diff --git a/docs/en_US/nnicli_ref.rst b/docs/en_US/nnicli_ref.rst
new file mode 100644
index 0000000000..2ebfea53e3
--- /dev/null
+++ b/docs/en_US/nnicli_ref.rst
@@ -0,0 +1,41 @@
+NNI Client
+==========
+
+NNI client is a python API of ``nnictl``\ , which implements the most commonly used commands. Users can use this API to control their experiments, collect experiment results and conduct advanced analyses based on experiment results in python code directly instead of using command line. Here is an example:
+
+.. code-block:: bash
+
+   from nni.experiment import Experiment
+
+   # create an experiment instance
+   exp = Experiment() 
+
+   # start an experiment, then connect the instance to this experiment
+   # you can also use `resume_experiment`, `view_experiment` or `connect_experiment`
+   # only one of them should be called in one instance
+   exp.start_experiment('nni/examples/trials/mnist-pytorch/config.yml', port=9090)
+
+   # update the experiment's concurrency
+   exp.update_concurrency(3)
+
+   # get some information about the experiment
+   print(exp.get_experiment_status())
+   print(exp.get_job_statistics())
+   print(exp.list_trial_jobs())
+
+   # stop the experiment, then disconnect the instance from the experiment.
+   exp.stop_experiment()
+
+References
+----------
+
+..  autoclass:: nni.experiment.Experiment
+    :members:
+..  autoclass:: nni.experiment.TrialJob
+    :members:
+..  autoclass:: nni.experiment.TrialHyperParameters
+    :members:
+..  autoclass:: nni.experiment.TrialMetricData
+    :members:
+..  autoclass:: nni.experiment.TrialResult
+    :members:
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 09ab7afd19..337ec4f208 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,9 +1,7 @@
-sphinx==1.8.3
+sphinx==3.3.1
 sphinx-argparse==0.2.5
-sphinx-markdown-tables==0.0.9
 sphinx-rtd-theme==0.4.2
 sphinxcontrib-websupport==1.1.0
-recommonmark==0.5.0
 pygments==2.7.1
 hyperopt
 json_tricks
diff --git a/docs/tools/md2rst.py b/docs/tools/md2rst.py
new file mode 100644
index 0000000000..a99a6816e8
--- /dev/null
+++ b/docs/tools/md2rst.py
@@ -0,0 +1,135 @@
+import argparse
+import m2r
+import os
+import re
+import shutil
+from pathlib import Path
+
+
+def single_line_process(line):
+    if line == ' .. contents::':
+        return '.. contents::'
+    # https://github.com/sphinx-doc/sphinx/issues/3921
+    line = re.sub(r'(`.*? <.*?>`)_', r'\1__', line)
+    # inline emphasis
+    line = re.sub(r'\*\*\\ (.*?)\\ \*\*', r' **\1** ', line)
+    line = re.sub(r'\*(.*?)\\ \*', r'*\1*', line)
+    line = re.sub(r'\*\*(.*?) \*\*', r'**\1** ', line)
+    line = re.sub(r'\\\*\\\*(.*?)\*\*', r'**\1**', line)
+    line = re.sub(r'\\\*\\\*(.*?)\*\*\\ ', r'**\1**', line)
+    line = line.replace(r'\* - `\**', r'* - `**')
+    line = re.sub(r'\\\* \*\*(.*?)\*\* \(\\\*\s*(.*?)\s*\*\\ \)', r'* \1 (\2)', line)
+    line = re.sub(r'\<(.*)\.md(\>|#)', r'<\1.rst\2', line)
+    line = re.sub(r'`\*\*(.*?)\*\* <#(.*?)>`__', r'`\1 <#\2>`__', line)
+    line = re.sub(r'\*\* (classArgs|stop|FLOPS.*?|pruned.*?|large.*?|path|preCommand|2D.*?|codeDirectory|ps|worker|Tuner|Assessor)\*\*',
+                  r' **\1**', line)
+
+    line = line.replace('.. code-block:::: bash', '.. code-block:: bash')
+    line = line.replace('raw-html-m2r', 'raw-html')
+    line = line.replace('[toc]', '.. toctree::')
+
+    # image
+    line = re.sub(r'\:raw\-html\:`\<img src\=\"(.*?)\" style\=\"zoom\: ?(\d+)\%\;\" \/\>`', r'\n.. image:: \1\n   :scale: \2%', line)
+
+    # special case (per line handling)
+    line = line.replace('Nb = |Db|', r'Nb = \|Db\|')
+    line = line.replace('  Here is just a small list of libraries ', '\nHere is just a small list of libraries ')
+    line = line.replace('  Find the data management region in job submission page.', 'Find the data management region in job submission page.')
+    line = line.replace('Tuner/InstallCustomizedTuner.md', 'Tuner/InstallCustomizedTuner')
+    line = line.replace('&#10003;', ':raw-html:`&#10003;`')
+    line = line.replace(' **builtinTunerName** and** classArgs**', '**builtinTunerName** and **classArgs**')
+    line = line.replace('`\ ``nnictl ss_gen`` <../Tutorial/Nnictl.rst>`__', '`nnictl ss_gen <../Tutorial/Nnictl.rst>`__')
+    line = line.replace('**Step 1. Install NNI, follow the install guide `here <../Tutorial/QuickStart.rst>`__.**',
+                        '**Step 1. Install NNI, follow the install guide** `here <../Tutorial/QuickStart.rst>`__.')
+    line = line.replace('*Please refer to `here ', 'Please refer to `here ')
+    # line = line.replace('\* **optimize_mode** ', '* **optimize_mode** ')
+    if line == '~' * len(line):
+        line = '^' * len(line)
+    return line
+
+
+def special_case_replace(full_text):
+    replace_pairs = {}
+    replace_pairs['PyTorch\n"""""""'] = '**PyTorch**'
+    replace_pairs['Search Space\n============'] = '.. role:: raw-html(raw)\n   :format: html\n\nSearch Space\n============'
+    for file in os.listdir(Path(__file__).parent / 'patches'):
+        with open(Path(__file__).parent / 'patches' / file) as f:
+            r, s = f.read().split('%%%%%%\n')
+        replace_pairs[r] = s
+    for r, s in replace_pairs.items():
+        full_text = full_text.replace(r, s)
+    return full_text
+
+
+def process_table(content):
+    content = content.replace('------ |', '------|')
+    lines = []
+    for line in content.split('\n'):
+        if line.startswith('  |'):
+            line = line[2:]
+        lines.append(line)
+    return '\n'.join(lines)
+
+
+def process_github_link(line):
+    line = re.sub(r'`(\\ ``)?([^`]*?)(``)? \<(.*?)(blob|tree)/v1.9/(.*?)\>`__', r':githublink:`\2 <\6>`', line)
+    if 'githublink' in line:
+        line = re.sub(r'\*Example: (.*)\*', r'*Example:* \1', line)
+    line = line.replace('https://nni.readthedocs.io/en/latest', '')
+    return line
+
+
+for root, dirs, files in os.walk('en_US'):
+    root = Path(root)
+    for file in files:
+        if not file.endswith('.md') or file == 'Release_v1.0.md':
+            continue
+
+        with open(root / file) as f:
+            md_content = f.read()
+
+        if file == 'Nnictl.md':
+            md_content = process_table(md_content)
+
+        out = m2r.convert(md_content)
+        lines = out.split('\n')
+        if lines[0] == '':
+            lines = lines[1:]
+
+        # remove code-block eval_rst
+        i = 0
+        while i < len(lines):
+            line = lines[i]
+            if line.strip() == '.. code-block:: eval_rst':
+                space_count = line.index('.')
+                lines[i] = lines[i + 1] = None
+                if i > 0 and lines[i - 1]:
+                    lines[i] = ''  # blank line
+                i += 2
+                while i < len(lines) and (lines[i].startswith(' ' * (space_count + 3)) or lines[i] == ''):
+                    lines[i] = lines[i][space_count + 3:]
+                    i += 1
+            elif line.strip() == '.. code-block' or line.strip() == '.. code-block::':
+                lines[i] += ':: bash'
+                i += 1
+            else:
+                i += 1
+
+        lines = [l for l in lines if l is not None]
+
+        lines = list(map(single_line_process, lines))
+
+        if file != 'Release.md':
+            # githublink
+            lines = list(map(process_github_link, lines))
+
+        out = '\n'.join(lines)
+        out = special_case_replace(out)
+
+        with open(root / (Path(file).stem + '.rst'), 'w') as f:
+            f.write(out)
+
+        # back it up and remove
+        moved_root = Path('archive_en_US') / root.relative_to('en_US')
+        moved_root.mkdir(exist_ok=True)
+        shutil.move(root / file, moved_root / file)
diff --git a/docs/tools/patches/1.txt b/docs/tools/patches/1.txt
new file mode 100644
index 0000000000..cbee52eded
--- /dev/null
+++ b/docs/tools/patches/1.txt
@@ -0,0 +1,24 @@
+   * - GP Tuner
+     - :raw-html:`&#10003;`
+     - 
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - 
+     - 
+     - 
+%%%%%%
+   * - GP Tuner
+     - :raw-html:`&#10003;`
+     - 
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - :raw-html:`&#10003;`
+     - 
+     - 
+     - 
+     - 
diff --git a/docs/tools/patches/10.txt b/docs/tools/patches/10.txt
new file mode 100644
index 0000000000..ba95164f93
--- /dev/null
+++ b/docs/tools/patches/10.txt
@@ -0,0 +1,3 @@
+An SSH server needs a port; you need to expose Docker's SSH port to NNI as the connection port. For example, if you set your container's SSH port as  **``A``** \ , you should map the container's port ** ``A``**  to your remote host machine's other port ** ``B``** \ , NNI will connect port ** ``B``**  as an SSH port, and your host machine will map the connection from port ** ``B``**  to port ** ``A``**  then NNI could connect to your Docker container.
+%%%%%%
+An SSH server needs a port; you need to expose Docker's SSH port to NNI as the connection port. For example, if you set your container's SSH port as ``A``, you should map the container's port ``A`` to your remote host machine's other port ``B``, NNI will connect port ``B`` as an SSH port, and your host machine will map the connection from port ``B`` to port ``A`` then NNI could connect to your Docker container.
diff --git a/docs/tools/patches/11.txt b/docs/tools/patches/11.txt
new file mode 100644
index 0000000000..df6e5d9c40
--- /dev/null
+++ b/docs/tools/patches/11.txt
@@ -0,0 +1,3 @@
+If the id ends with *, nnictl will stop all experiments whose ids matchs the regular.
+%%%%%%
+If the id ends with \*, nnictl will stop all experiments whose ids matchs the regular.
diff --git a/docs/tools/patches/12.txt b/docs/tools/patches/12.txt
new file mode 100644
index 0000000000..f1d88c0782
--- /dev/null
+++ b/docs/tools/patches/12.txt
@@ -0,0 +1,7 @@
+..
+
+   make: *** [install-XXX] Segmentation fault (core dumped)
+%%%%%%
+.. code-block:: text
+
+   make: *** [install-XXX] Segmentation fault (core dumped)
diff --git a/docs/tools/patches/13.txt b/docs/tools/patches/13.txt
new file mode 100644
index 0000000000..c17cedbca8
--- /dev/null
+++ b/docs/tools/patches/13.txt
@@ -0,0 +1,3 @@
+  Click ``Submit job`` button in web portal.
+%%%%%%
+Click ``Submit job`` button in web portal.
diff --git a/docs/tools/patches/14.txt b/docs/tools/patches/14.txt
new file mode 100644
index 0000000000..88866fa2b5
--- /dev/null
+++ b/docs/tools/patches/14.txt
@@ -0,0 +1,5 @@
+:raw-html:`<div >
+    <img src="https://github.com/microsoft/Cream/blob/main/demo/intro.jpg" width="800"/>
+</div>`
+%%%%%%
+:raw-html:`<div ><img src="https://github.com/microsoft/Cream/blob/main/demo/intro.jpg" width="800"/></div>`
\ No newline at end of file
diff --git a/docs/tools/patches/15.txt b/docs/tools/patches/15.txt
new file mode 100644
index 0000000000..030074b450
--- /dev/null
+++ b/docs/tools/patches/15.txt
@@ -0,0 +1,8 @@
+.. list-table::
+   :header-rows: 1
+
+%%%%%%
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
diff --git a/docs/tools/patches/16.txt b/docs/tools/patches/16.txt
new file mode 100644
index 0000000000..f1592a713f
--- /dev/null
+++ b/docs/tools/patches/16.txt
@@ -0,0 +1,9 @@
+.. code-block:: bash
+
+   1.1 Declare NNI API
+       Include `import nni` in your trial code to use NNI APIs.
+%%%%%%
+..
+
+   1.1 Declare NNI API
+       Include `import nni` in your trial code to use NNI APIs.
diff --git a/docs/tools/patches/17.txt b/docs/tools/patches/17.txt
new file mode 100644
index 0000000000..21b449b066
--- /dev/null
+++ b/docs/tools/patches/17.txt
@@ -0,0 +1,7 @@
+.. code-block:: bash
+
+   from nni.compression.pytorch.utils.counter import count_flops_params
+%%%%%%
+.. code-block:: python
+
+   from nni.compression.pytorch.utils.counter import count_flops_params
diff --git a/docs/tools/patches/18.txt b/docs/tools/patches/18.txt
new file mode 100644
index 0000000000..3938dc4cb0
--- /dev/null
+++ b/docs/tools/patches/18.txt
@@ -0,0 +1,7 @@
+.. code-block:: bash
+
+   NNI's official image msranni/nni does not support SSH servers for the time being; you should build your own Docker image with an SSH configuration or use other images as a remote server.
+%%%%%%
+.. code-block:: text
+
+   NNI's official image msranni/nni does not support SSH servers for the time being; you should build your own Docker image with an SSH configuration or use other images as a remote server.
diff --git a/docs/tools/patches/19.txt b/docs/tools/patches/19.txt
new file mode 100644
index 0000000000..7fc65674b0
--- /dev/null
+++ b/docs/tools/patches/19.txt
@@ -0,0 +1,56 @@
+Code Styles & Naming Conventions
+--------------------------------
+
+
+* We follow `PEP8 <https://www.python.org/dev/peps/pep-0008/>`__ for Python code and naming conventions, do try to adhere to the same when making a pull request or making a change. One can also take the help of linters such as ``flake8`` or ``pylint``
+* We also follow `NumPy Docstring Style <https://www.sphinx-doc.org/en/master/usage/extensions/example_numpy.html#example-numpy>`__ for Python Docstring Conventions. During the `documentation building <Contributing.rst#documentation>`__\ , we use `sphinx.ext.napoleon <https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html>`__ to generate Python API documentation from Docstring.
+* For docstrings, please refer to `numpydoc docstring guide <https://numpydoc.readthedocs.io/en/latest/format.html>`__ and `pandas docstring guide <https://python-sprints.github.io/pandas/guide/pandas_docstring.html>`__
+
+  * For function docstring, **description** , **Parameters**\ , and** Returns**\ /** Yields** are mandatory.
+  * For class docstring, **description**\ ,** Attributes** are mandatory.
+  * For docstring to describe ``dict``\ , which is commonly used in our hyper-param format description, please refer to [RiboKit : Doc Standards
+
+    * Internal Guideline on Writing Standards](https://ribokit.github.io/docs/text/)
+
+Documentation
+-------------
+
+Our documentation is built with :githublink:`sphinx <docs>`.
+
+
+* 
+  Before submitting the documentation change, please **build homepage locally**\ : ``cd docs/en_US && make html``\ , then you can see all the built documentation webpage under the folder ``docs/en_US/_build/html``. It's also highly recommended taking care of** every WARNING** during the build, which is very likely the signal of a** deadlink** and other annoying issues.
+
+* 
+  For links, please consider using **relative paths** first. However, if the documentation is written in Markdown format, and:
+
+
+  * It's an image link which needs to be formatted with embedded html grammar, please use global URL like ``https://user-images.githubusercontent.com/44491713/51381727-e3d0f780-1b4f-11e9-96ab-d26b9198ba65.png``\ , which can be automatically generated by dragging picture onto `Github Issue <https://github.com/Microsoft/nni/issues/new>`__ Box.
+  * It cannot be re-formatted by sphinx, such as source code, please use its global URL. For source code that links to our github repo, please use URLs rooted at ``https://github.com/Microsoft/nni/tree/v1.9/`` (\ :githublink:`mnist.py <examples/trials/mnist-tfv1/mnist.py>` for example).
+%%%%%%
+Code Styles & Naming Conventions
+--------------------------------
+
+* We follow `PEP8 <https://www.python.org/dev/peps/pep-0008/>`__ for Python code and naming conventions, do try to adhere to the same when making a pull request or making a change. One can also take the help of linters such as ``flake8`` or ``pylint``
+* We also follow `NumPy Docstring Style <https://www.sphinx-doc.org/en/master/usage/extensions/example_numpy.html#example-numpy>`__ for Python Docstring Conventions. During the `documentation building <Contributing.rst#documentation>`__\ , we use `sphinx.ext.napoleon <https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html>`__ to generate Python API documentation from Docstring.
+* For docstrings, please refer to `numpydoc docstring guide <https://numpydoc.readthedocs.io/en/latest/format.html>`__ and `pandas docstring guide <https://python-sprints.github.io/pandas/guide/pandas_docstring.html>`__
+
+  * For function docstring, **description**, **Parameters**, and **Returns** **Yields** are mandatory.
+  * For class docstring, **description**, **Attributes** are mandatory.
+  * For docstring to describe ``dict``, which is commonly used in our hyper-param format description, please refer to RiboKit Doc Standards
+
+    * `Internal Guideline on Writing Standards <https://ribokit.github.io/docs/text/>`__
+
+Documentation
+-------------
+
+Our documentation is built with :githublink:`sphinx <docs>`.
+
+* Before submitting the documentation change, please **build homepage locally**: ``cd docs/en_US && make html``, then you can see all the built documentation webpage under the folder ``docs/en_US/_build/html``. It's also highly recommended taking care of **every WARNING** during the build, which is very likely the signal of a **deadlink** and other annoying issues.
+
+* 
+  For links, please consider using **relative paths** first. However, if the documentation is written in Markdown format, and:
+
+
+  * It's an image link which needs to be formatted with embedded html grammar, please use global URL like ``https://user-images.githubusercontent.com/44491713/51381727-e3d0f780-1b4f-11e9-96ab-d26b9198ba65.png``, which can be automatically generated by dragging picture onto `Github Issue <https://github.com/Microsoft/nni/issues/new>`__ Box.
+  * It cannot be re-formatted by sphinx, such as source code, please use its global URL. For source code that links to our github repo, please use URLs rooted at ``https://github.com/Microsoft/nni/tree/v1.9/`` (:githublink:`mnist.py <examples/trials/mnist-tfv1/mnist.py>` for example).
diff --git a/docs/tools/patches/2.txt b/docs/tools/patches/2.txt
new file mode 100644
index 0000000000..6a5c87d262
--- /dev/null
+++ b/docs/tools/patches/2.txt
@@ -0,0 +1,45 @@
+   * - 
+     - Recommended
+     - Minimum
+   * - **Operating System**
+     - Ubuntu 16.04 or above
+   * - **CPU**
+     - Intel® Core™ i5 or AMD Phenom™ II X3 or better
+     - Intel® Core™ i3 or AMD Phenom™ X3 8650
+   * - **GPU**
+     - NVIDIA® GeForce® GTX 660 or better
+     - NVIDIA® GeForce® GTX 460
+   * - **Memory**
+     - 6 GB RAM
+     - 4 GB RAM
+   * - **Storage**
+     - 30 GB available hare drive space
+   * - **Internet**
+     - Boardband internet connection
+   * - **Resolution**
+     - 1024 x 768 minimum display resolution
+%%%%%%
+   * - 
+     - Recommended
+     - Minimum
+   * - **Operating System**
+     - Ubuntu 16.04 or above
+     -
+   * - **CPU**
+     - Intel® Core™ i5 or AMD Phenom™ II X3 or better
+     - Intel® Core™ i3 or AMD Phenom™ X3 8650
+   * - **GPU**
+     - NVIDIA® GeForce® GTX 660 or better
+     - NVIDIA® GeForce® GTX 460
+   * - **Memory**
+     - 6 GB RAM
+     - 4 GB RAM
+   * - **Storage**
+     - 30 GB available hare drive space
+     -
+   * - **Internet**
+     - Boardband internet connection
+     -
+   * - **Resolution**
+     - 1024 x 768 minimum display resolution
+     -
diff --git a/docs/tools/patches/20.txt b/docs/tools/patches/20.txt
new file mode 100644
index 0000000000..dfb73a53e0
--- /dev/null
+++ b/docs/tools/patches/20.txt
@@ -0,0 +1,44 @@
+..
+
+   1.1 Declare NNI API
+       Include `import nni` in your trial code to use NNI APIs.
+
+   1.2 Get predefined parameters
+       Use the following code snippet:
+
+           RECEIVED_PARAMS = nni.get_next_parameter()
+
+       to get hyper-parameters' values assigned by tuner. `RECEIVED_PARAMS` is an object, for example:
+
+           {"conv_size": 2, "hidden_size": 124, "learning_rate": 0.0307, "dropout_rate": 0.2029}
+
+   1.3 Report NNI results
+       Use the API:
+
+           `nni.report_intermediate_result(accuracy)`
+
+       to send `accuracy` to assessor.
+
+       Use the API:
+
+           `nni.report_final_result(accuracy)`
+
+       to send `accuracy` to tuner.
+%%%%%%
+* Declare NNI API: include ``import nni`` in your trial code to use NNI APIs.
+* Get predefined parameters
+
+Use the following code snippet:
+
+.. code-block:: python
+
+   RECEIVED_PARAMS = nni.get_next_parameter()
+
+to get hyper-parameters' values assigned by tuner. ``RECEIVED_PARAMS`` is an object, for example:
+
+.. code-block:: json
+
+   {"conv_size": 2, "hidden_size": 124, "learning_rate": 0.0307, "dropout_rate": 0.2029}
+
+* Report NNI results: Use the API: ``nni.report_intermediate_result(accuracy)`` to send ``accuracy`` to assessor.
+  Use the API: ``nni.report_final_result(accuracy)`` to send `accuracy` to tuner.
diff --git a/docs/tools/patches/3.txt b/docs/tools/patches/3.txt
new file mode 100644
index 0000000000..49037f5ab1
--- /dev/null
+++ b/docs/tools/patches/3.txt
@@ -0,0 +1,46 @@
+   * - 
+     - Recommended
+     - Minimum
+   * - **Operating System**
+     - macOS 10.14.1 or above
+   * - **CPU**
+     - Intel® Core™ i7-4770 or better
+     - Intel® Core™ i5-760 or better
+   * - **GPU**
+     - AMD Radeon™ R9 M395X or better
+     - NVIDIA® GeForce® GT 750M or AMD Radeon™ R9 M290 or better
+   * - **Memory**
+     - 8 GB RAM
+     - 4 GB RAM
+   * - **Storage**
+     - 70GB available space SSD
+     - 70GB available space 7200 RPM HDD
+   * - **Internet**
+     - Boardband internet connection
+   * - **Resolution**
+     - 1024 x 768 minimum display resolution
+%%%%%%
+   * -
+     - Recommended
+     - Minimum
+   * - **Operating System**
+     - macOS 10.14.1 or above
+     - 
+   * - **CPU**
+     - Intel® Core™ i7-4770 or better
+     - Intel® Core™ i5-760 or better
+   * - **GPU**
+     - AMD Radeon™ R9 M395X or better
+     - NVIDIA® GeForce® GT 750M or AMD Radeon™ R9 M290 or better
+   * - **Memory**
+     - 8 GB RAM
+     - 4 GB RAM
+   * - **Storage**
+     - 70GB available space SSD
+     - 70GB available space 7200 RPM HDD
+   * - **Internet**
+     - Boardband internet connection
+     - 
+   * - **Resolution**
+     - 1024 x 768 minimum display resolution
+     - 
diff --git a/docs/tools/patches/4.txt b/docs/tools/patches/4.txt
new file mode 100644
index 0000000000..c4fe7bfaae
--- /dev/null
+++ b/docs/tools/patches/4.txt
@@ -0,0 +1,45 @@
+   * - 
+     - Recommended
+     - Minimum
+   * - **Operating System**
+     - Windows 10 1809 or above
+   * - **CPU**
+     - Intel® Core™ i5 or AMD Phenom™ II X3 or better
+     - Intel® Core™ i3 or AMD Phenom™ X3 8650
+   * - **GPU**
+     - NVIDIA® GeForce® GTX 660 or better
+     - NVIDIA® GeForce® GTX 460
+   * - **Memory**
+     - 6 GB RAM
+     - 4 GB RAM
+   * - **Storage**
+     - 30 GB available hare drive space
+   * - **Internet**
+     - Boardband internet connection
+   * - **Resolution**
+     - 1024 x 768 minimum display resolution
+%%%%%%
+   * -
+     - Recommended
+     - Minimum
+   * - **Operating System**
+     - Windows 10 1809 or above
+     - 
+   * - **CPU**
+     - Intel® Core™ i5 or AMD Phenom™ II X3 or better
+     - Intel® Core™ i3 or AMD Phenom™ X3 8650
+   * - **GPU**
+     - NVIDIA® GeForce® GTX 660 or better
+     - NVIDIA® GeForce® GTX 460
+   * - **Memory**
+     - 6 GB RAM
+     - 4 GB RAM
+   * - **Storage**
+     - 30 GB available hare drive space
+     - 
+   * - **Internet**
+     - Boardband internet connection
+     - 
+   * - **Resolution**
+     - 1024 x 768 minimum display resolution
+     - 
diff --git a/docs/tools/patches/5.txt b/docs/tools/patches/5.txt
new file mode 100644
index 0000000000..0a0ee1d4ea
--- /dev/null
+++ b/docs/tools/patches/5.txt
@@ -0,0 +1,84 @@
+   * - 
+     - s=4
+     - s=3
+     - s=2
+     - s=1
+     - s=0
+   * - i
+     - n r
+     - n r
+     - n r
+     - n r
+     - n r
+   * - 0
+     - 81 1
+     - 27 3
+     - 9 9
+     - 6 27
+     - 5 81
+   * - 1
+     - 27 3
+     - 9 9
+     - 3 27
+     - 2 81
+     - 
+   * - 2
+     - 9 9
+     - 3 27
+     - 1 81
+     - 
+     - 
+   * - 3
+     - 3 27
+     - 1 81
+     - 
+     - 
+     - 
+   * - 4
+     - 1 81
+     - 
+     - 
+     - 
+%%%%%%
+   * -
+     - s=4
+     - s=3
+     - s=2
+     - s=1
+     - s=0
+   * - i
+     - n r
+     - n r
+     - n r
+     - n r
+     - n r
+   * - 0
+     - 81 1
+     - 27 3
+     - 9 9
+     - 6 27
+     - 5 81
+   * - 1
+     - 27 3
+     - 9 9
+     - 3 27
+     - 2 81
+     -
+   * - 2
+     - 9 9
+     - 3 27
+     - 1 81
+     -
+     -
+   * - 3
+     - 3 27
+     - 1 81
+     -
+     -
+     -
+   * - 4
+     - 1 81
+     -
+     -
+     -
+     -
diff --git a/docs/tools/patches/6.txt b/docs/tools/patches/6.txt
new file mode 100644
index 0000000000..abf445780c
--- /dev/null
+++ b/docs/tools/patches/6.txt
@@ -0,0 +1,3 @@
+*Please refer to `here <https://nni.readthedocs.io/en/latest/sdk_reference.html>`__ for more APIs (e.g., ``nni.get_sequence_id()``\ ) provided by NNI.
+%%%%%%
+*Please refer to `here <https://nni.readthedocs.io/en/latest/sdk_reference.html>`__ for more APIs (e.g., ``nni.get_sequence_id()``\ ) provided by NNI.*
diff --git a/docs/tools/patches/7.txt b/docs/tools/patches/7.txt
new file mode 100644
index 0000000000..57393d5427
--- /dev/null
+++ b/docs/tools/patches/7.txt
@@ -0,0 +1,44 @@
+   #. For each filter 
+      .. image:: http://latex.codecogs.com/gif.latex?F_{i,j}
+         :target: http://latex.codecogs.com/gif.latex?F_{i,j}
+         :alt: 
+      , calculate the sum of its absolute kernel weights
+      .. image:: http://latex.codecogs.com/gif.latex?s_j=\sum_{l=1}^{n_i}\sum|K_l|
+         :target: http://latex.codecogs.com/gif.latex?s_j=\sum_{l=1}^{n_i}\sum|K_l|
+         :alt: 
+
+   #. Sort the filters by 
+      .. image:: http://latex.codecogs.com/gif.latex?s_j
+         :target: http://latex.codecogs.com/gif.latex?s_j
+         :alt: 
+      .
+   #. Prune 
+      .. image:: http://latex.codecogs.com/gif.latex?m
+         :target: http://latex.codecogs.com/gif.latex?m
+         :alt: 
+       filters with the smallest sum values and their corresponding feature maps. The
+        kernels in the next convolutional layer corresponding to the pruned feature maps are also
+      .. code-block:: bash
+
+         removed.
+
+   #. A new kernel matrix is created for both the 
+      .. image:: http://latex.codecogs.com/gif.latex?i
+         :target: http://latex.codecogs.com/gif.latex?i
+         :alt: 
+      th and 
+      .. image:: http://latex.codecogs.com/gif.latex?i+1
+         :target: http://latex.codecogs.com/gif.latex?i+1
+         :alt: 
+      th layers, and the remaining kernel
+        weights are copied to the new model.
+%%%%%%
+   #. For each filter :math:`F_{i,j}`, calculate the sum of its absolute kernel weights :math:`s_j=\sum_{l=1}^{n_i}\sum|K_l|`.
+
+   #. Sort the filters by :math:`s_j`.
+
+   #. Prune :math:`m` filters with the smallest sum values and their corresponding feature maps. The
+      kernels in the next convolutional layer corresponding to the pruned feature maps are also removed.
+
+   #. A new kernel matrix is created for both the :math:`i`-th and :math:`i+1`-th layers, and the remaining kernel
+      weights are copied to the new model.
diff --git a/docs/tools/patches/8.txt b/docs/tools/patches/8.txt
new file mode 100644
index 0000000000..f92d650f66
--- /dev/null
+++ b/docs/tools/patches/8.txt
@@ -0,0 +1,25 @@
+#. A **Kubernetes** cluster using Kubernetes 1.8 or later. Follow this `guideline <https://kubernetes.io/docs/setup/>`__ to set up Kubernetes
+#. Prepare a **kubeconfig** file, which will be used by NNI to interact with your Kubernetes API server. By default, NNI manager will use $(HOME)/.kube/config as kubeconfig file's path. You can also specify other kubeconfig files by setting the** KUBECONFIG** environment variable. Refer this `guideline <https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig>`__ to learn more about kubeconfig.
+#. If your NNI trial job needs GPU resource, you should follow this `guideline <https://github.com/NVIDIA/k8s-device-plugin>`__ to configure **Nvidia device plugin for Kubernetes**.
+#. Prepare a **NFS server** and export a general purpose mount (we recommend to map your NFS server path in ``root_squash option``\ , otherwise permission issue may raise when NNI copies files to NFS. Refer this `page <https://linux.die.net/man/5/exports>`__ to learn what root_squash option is), or** Azure File Storage**.
+#. 
+   Install **NFS client** on the machine where you install NNI and run nnictl to create experiment. Run this command to install NFSv4 client:
+
+   .. code-block:: bash
+
+       apt-get install nfs-common
+
+#. 
+   Install **NNI**\ , follow the install guide `here <../Tutorial/QuickStart.rst>`__.
+%%%%%%
+#. A **Kubernetes** cluster using Kubernetes 1.8 or later. Follow this `guideline <https://kubernetes.io/docs/setup/>`__ to set up Kubernetes
+#. Prepare a **kubeconfig** file, which will be used by NNI to interact with your Kubernetes API server. By default, NNI manager will use $(HOME)/.kube/config as kubeconfig file's path. You can also specify other kubeconfig files by setting the**KUBECONFIG** environment variable. Refer this `guideline <https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig>`__ to learn more about kubeconfig.
+#. If your NNI trial job needs GPU resource, you should follow this `guideline <https://github.com/NVIDIA/k8s-device-plugin>`__ to configure **Nvidia device plugin for Kubernetes**.
+#. Prepare a **NFS server** and export a general purpose mount (we recommend to map your NFS server path in ``root_squash option``\ , otherwise permission issue may raise when NNI copies files to NFS. Refer this `page <https://linux.die.net/man/5/exports>`__ to learn what root_squash option is), or **Azure File Storage**.
+#. Install **NFS client** on the machine where you install NNI and run nnictl to create experiment. Run this command to install NFSv4 client:
+
+.. code-block:: bash
+
+    apt-get install nfs-common
+
+#. Install **NNI**\ , follow the install guide `here <../Tutorial/QuickStart>`__.
diff --git a/docs/tools/patches/9.txt b/docs/tools/patches/9.txt
new file mode 100644
index 0000000000..17560dceab
--- /dev/null
+++ b/docs/tools/patches/9.txt
@@ -0,0 +1,27 @@
+#. A **Kubernetes** cluster using Kubernetes 1.8 or later. Follow this `guideline <https://kubernetes.io/docs/setup/>`__ to set up Kubernetes
+#. Download, set up, and deploy **Kubeflow** to your Kubernetes cluster. Follow this `guideline <https://www.kubeflow.org/docs/started/getting-started/>`__ to setup Kubeflow.
+#. Prepare a **kubeconfig** file, which will be used by NNI to interact with your Kubernetes API server. By default, NNI manager will use $(HOME)/.kube/config as kubeconfig file's path. You can also specify other kubeconfig files by setting the** KUBECONFIG** environment variable. Refer this `guideline <https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig>`__ to learn more about kubeconfig.
+#. If your NNI trial job needs GPU resource, you should follow this `guideline <https://github.com/NVIDIA/k8s-device-plugin>`__ to configure **Nvidia device plugin for Kubernetes**.
+#. Prepare a **NFS server** and export a general purpose mount (we recommend to map your NFS server path in ``root_squash option``\ , otherwise permission issue may raise when NNI copy files to NFS. Refer this `page <https://linux.die.net/man/5/exports>`__ to learn what root_squash option is), or** Azure File Storage**.
+#. 
+   Install **NFS client** on the machine where you install NNI and run nnictl to create experiment. Run this command to install NFSv4 client:
+
+   .. code-block:: bash
+
+       apt-get install nfs-common
+
+#. 
+   Install **NNI**\ , follow the install guide `here <../Tutorial/QuickStart.rst>`__.
+%%%%%%
+#. A **Kubernetes** cluster using Kubernetes 1.8 or later. Follow this `guideline <https://kubernetes.io/docs/setup/>`__ to set up Kubernetes
+#. Download, set up, and deploy **Kubeflow** to your Kubernetes cluster. Follow this `guideline <https://www.kubeflow.org/docs/started/getting-started/>`__ to setup Kubeflow.
+#. Prepare a **kubeconfig** file, which will be used by NNI to interact with your Kubernetes API server. By default, NNI manager will use $(HOME)/.kube/config as kubeconfig file's path. You can also specify other kubeconfig files by setting the**KUBECONFIG** environment variable. Refer this `guideline <https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig>`__ to learn more about kubeconfig.
+#. If your NNI trial job needs GPU resource, you should follow this `guideline <https://github.com/NVIDIA/k8s-device-plugin>`__ to configure **Nvidia device plugin for Kubernetes**.
+#. Prepare a **NFS server** and export a general purpose mount (we recommend to map your NFS server path in ``root_squash option``\ , otherwise permission issue may raise when NNI copy files to NFS. Refer this `page <https://linux.die.net/man/5/exports>`__ to learn what root_squash option is), or**Azure File Storage**.
+#. Install **NFS client** on the machine where you install NNI and run nnictl to create experiment. Run this command to install NFSv4 client:
+
+.. code-block:: bash
+
+    apt-get install nfs-common
+
+#. Install **NNI**\ , follow the install guide `here <../Tutorial/QuickStart>`__.
diff --git a/docs/tools/restoremd.py b/docs/tools/restoremd.py
new file mode 100644
index 0000000000..3c9e32e229
--- /dev/null
+++ b/docs/tools/restoremd.py
@@ -0,0 +1,11 @@
+import os
+import shutil
+from pathlib import Path
+
+
+for root, dirs, files in os.walk('archive_en_US'):
+    root = Path(root)
+    for file in files:
+        moved_root = Path('en_US') / root.relative_to('archive_en_US')
+        shutil.move(root / file, moved_root / file)
+        os.remove(moved_root / (Path(file).stem + '.rst'))
diff --git a/examples/trials/cifar10_pytorch/config_adl.yml b/examples/trials/cifar10_pytorch/config_adl.yml
index b1a994d752..69058880df 100644
--- a/examples/trials/cifar10_pytorch/config_adl.yml
+++ b/examples/trials/cifar10_pytorch/config_adl.yml
@@ -17,10 +17,13 @@ tuner:
     #choice: maximize, minimize
     optimize_mode: maximize
 trial:
-  command: python3 main_adl.py
-  codeDir: .
+  command: python3 /cifar10/main_adl.py
+  codeDir: /cifar10
   gpuNum: 1
   image: {replace_with_the_image_that_has_adaptdl_installed}
+  # optional
+  imagePullSecrets:
+    - name: {secret}
   adaptive: true
   checkpoint:
     storageClass: dfs
diff --git a/examples/trials/cifar10_pytorch/main_adl.py b/examples/trials/cifar10_pytorch/main_adl.py
index c162c2bfc9..f2485b3d8d 100644
--- a/examples/trials/cifar10_pytorch/main_adl.py
+++ b/examples/trials/cifar10_pytorch/main_adl.py
@@ -146,7 +146,7 @@ def valid(epoch):
         writer.add_scalar("Accuracy/Valid", stats["accuracy"], epoch)
 
         if adaptdl.env.replica_rank() == 0:
-            nni.report_intermediate_result(stats["accuracy"], accum=stats)
+            nni.report_intermediate_result(stats["accuracy"])
 
         print("Valid:", stats)
         return stats["accuracy"]
diff --git a/examples/trials/mnist-tfv2/launch.py b/examples/trials/mnist-tfv2/launch.py
new file mode 100644
index 0000000000..1887c15c80
--- /dev/null
+++ b/examples/trials/mnist-tfv2/launch.py
@@ -0,0 +1,27 @@
+# FIXME: For demonstration only. It should not be here
+
+from pathlib import Path
+
+from nni.experiment import Experiment
+from nni.algorithms.hpo.hyperopt_tuner.hyperopt_tuner import HyperoptTuner
+
+tuner = HyperoptTuner('tpe')
+
+search_space = {
+    "dropout_rate": { "_type": "uniform", "_value": [0.5, 0.9] },
+    "conv_size": { "_type": "choice", "_value": [2, 3, 5, 7] },
+    "hidden_size": { "_type": "choice", "_value": [124, 512, 1024] },
+    "batch_size": { "_type": "choice", "_value": [16, 32] },
+    "learning_rate": { "_type": "choice", "_value": [0.0001, 0.001, 0.01, 0.1] }
+}
+
+experiment = Experiment(tuner, 'local')
+experiment.config.experiment_name = 'test'
+experiment.config.trial_concurrency = 2
+experiment.config.max_trial_number = 5
+experiment.config.search_space = search_space
+experiment.config.trial_command = 'python3 mnist.py'
+experiment.config.trial_code_directory = Path(__file__).parent
+experiment.config.training_service.use_active_gpu = True
+
+experiment.run(8081)
diff --git a/nni/__init__.py b/nni/__init__.py
index 0630571ae6..3514482a53 100644
--- a/nni/__init__.py
+++ b/nni/__init__.py
@@ -3,6 +3,9 @@
 
 __version__ = '999.0.0-developing'
 
+from .runtime.log import init_logger
+init_logger()
+
 from .runtime.env_vars import dispatcher_env_vars
 from .utils import ClassArgsValidator
 
diff --git a/nni/compression/pytorch/speedup/compress_modules.py b/nni/compression/pytorch/speedup/compress_modules.py
index 413f32c6df..c82b484e7b 100644
--- a/nni/compression/pytorch/speedup/compress_modules.py
+++ b/nni/compression/pytorch/speedup/compress_modules.py
@@ -10,6 +10,7 @@
 replace_module = {
     'BatchNorm2d': lambda module, mask: replace_batchnorm2d(module, mask),
     'Conv2d': lambda module, mask: replace_conv2d(module, mask),
+    'ConvTranspose2d': lambda module, mask: replace_convtranspose2d(module, mask),
     'MaxPool2d': lambda module, mask: no_replace(module, mask),
     'AvgPool2d': lambda module, mask: no_replace(module, mask),
     'AdaptiveAvgPool2d': lambda module, mask: no_replace(module, mask),
@@ -22,6 +23,7 @@
     'Dropout3d': lambda module, mask: no_replace(module, mask)
 }
 
+
 def no_replace(module, mask):
     """
     No need to replace
@@ -29,6 +31,7 @@ def no_replace(module, mask):
     _logger.debug("no need to replace")
     return module
 
+
 def replace_linear(linear, mask):
     """
     Parameters
@@ -54,11 +57,13 @@ def replace_linear(linear, mask):
                                  out_features=linear.out_features,
                                  bias=linear.bias is not None)
     new_linear.to(linear.weight.device)
-    new_linear.weight.data = torch.index_select(linear.weight.data, -1, index.to(linear.weight.device))
+    new_linear.weight.data = torch.index_select(
+        linear.weight.data, -1, index.to(linear.weight.device))
     if linear.bias is not None:
         new_linear.bias.data.copy_(linear.bias.data)
     return new_linear
 
+
 def replace_batchnorm2d(norm, mask):
     """
     Parameters
@@ -87,10 +92,13 @@ def replace_batchnorm2d(norm, mask):
     new_norm.weight.data = torch.index_select(norm.weight.data, 0, index)
     new_norm.bias.data = torch.index_select(norm.bias.data, 0, index)
     if norm.track_running_stats:
-        new_norm.running_mean.data = torch.index_select(norm.running_mean.data, 0, index)
-        new_norm.running_var.data = torch.index_select(norm.running_var.data, 0, index)
+        new_norm.running_mean.data = torch.index_select(
+            norm.running_mean.data, 0, index)
+        new_norm.running_var.data = torch.index_select(
+            norm.running_var.data, 0, index)
     return new_norm
 
+
 def replace_conv2d(conv, mask):
     """
     Parameters
@@ -121,7 +129,8 @@ def replace_conv2d(conv, mask):
         # remove groups for depthwise layers
         assert in_channels == out_channels
         groups = in_channels
-    _logger.debug("replace conv2d %s with in_channels: %d, out_channels: %d", mask.module_name, in_channels, out_channels)
+    _logger.debug("replace conv2d %s with in_channels: %d, out_channels: %d",
+                  mask.module_name, in_channels, out_channels)
     new_conv = torch.nn.Conv2d(in_channels=in_channels,
                                out_channels=out_channels,
                                kernel_size=conv.kernel_size,
@@ -136,9 +145,11 @@ def replace_conv2d(conv, mask):
     tmp_weight_data = tmp_bias_data = None
 
     if mask.output_mask is not None:
-        tmp_weight_data = torch.index_select(conv.weight.data, 0, out_channels_index)
+        tmp_weight_data = torch.index_select(
+            conv.weight.data, 0, out_channels_index)
         if conv.bias is not None:
-            tmp_bias_data = torch.index_select(conv.bias.data, 0, out_channels_index)
+            tmp_bias_data = torch.index_select(
+                conv.bias.data, 0, out_channels_index)
     else:
         tmp_weight_data = conv.weight.data
     # For the convolutional layers that have more than one group
@@ -152,24 +163,120 @@ def replace_conv2d(conv, mask):
         for groupid in range(conv.groups):
             start = groupid * input_step
             end = (groupid + 1) * input_step
-            current_input_index = list(filter(lambda x: start <= x and x < end, in_channels_index.tolist()))
+            current_input_index = list(
+                filter(lambda x: start <= x and x < end, in_channels_index.tolist()))
             if not current_input_index:
                 # there is no kept channel in current group
-                continue
+                # TODO bug here, the groups is directly get from conv.groups, if the whole group is removed,
+                # then the number of groups in the new_conv also need to change
+                raise Exception(
+                    " Donnot support removing the whole group filter except in the depth-wise conv temporarily")
             # shift the global index into the group index
             current_input_index = [x-start for x in current_input_index]
             # if the groups is larger than 1, the input channels of each
             # group should be pruned evenly.
             assert len(current_input_index) == in_channels_group, \
                 'Input channels of each group are not pruned evenly'
-            current_input_index = torch.tensor(current_input_index).to(tmp_weight_data.device) # pylint: disable=not-callable
+            current_input_index = torch.tensor(current_input_index).to(tmp_weight_data.device)  # pylint: disable=not-callable
             f_start = groupid * filter_step
             f_end = (groupid + 1) * filter_step
-            new_conv.weight.data[f_start:f_end] = torch.index_select(tmp_weight_data[f_start:f_end], 1, current_input_index)
+            new_conv.weight.data[f_start:f_end] = torch.index_select(
+                tmp_weight_data[f_start:f_end], 1, current_input_index)
     else:
         new_conv.weight.data.copy_(tmp_weight_data)
 
     if conv.bias is not None:
-        new_conv.bias.data.copy_(conv.bias.data if tmp_bias_data is None else tmp_bias_data)
+        new_conv.bias.data.copy_(
+            conv.bias.data if tmp_bias_data is None else tmp_bias_data)
 
     return new_conv
+
+
+def replace_convtranspose2d(convtrans, mask):
+    """
+    We need anothor replace function for
+    convtranspose2d, because the layout of
+    the weight is different from traditional
+    conv layers. The layout of the weight is [N_in, N_out, ksize_1, ksize_2]
+    Parameters
+    ----------
+    convtrans : torch.nn.ConvTranspose2d
+        The conv2d module to be replaced
+    mask : ModuleMasks
+        The masks of this module
+    Returns
+    -------
+    torch.nn.ConvTranspose2d
+        The new conv2d module
+    """
+    assert isinstance(mask, ModuleMasks)
+    assert isinstance(convtrans, torch.nn.ConvTranspose2d)
+    if mask.input_mask is None:
+        in_channels = convtrans.in_channels
+    else:
+        in_channels_index = mask.input_mask.mask_index[1]
+        in_channels = in_channels_index.size(0)
+    if mask.output_mask is None:
+        out_channels = convtrans.out_channels
+    else:
+        out_channels_index = mask.output_mask.mask_index[1]
+        out_channels = out_channels_index.size(0)
+    groups = convtrans.groups
+    # check if can remove the whole group of filters
+    if convtrans.in_channels == convtrans.out_channels == convtrans.groups:
+        # remove groups for depthwise layers
+        # this needs the group dependency to be fixed before the speedup
+        assert in_channels == out_channels
+        groups = in_channels
+    _logger.debug('Replace convtranspose2d %s with in_channels:%d out_channels:%d',
+                  mask.module_name, in_channels, out_channels)
+    new_convtrans = torch.nn.ConvTranspose2d(in_channels=in_channels,
+                                             out_channels=out_channels,
+                                             kernel_size=convtrans.kernel_size,
+                                             stride=convtrans.stride,
+                                             padding=convtrans.padding,
+                                             dilation=convtrans.dilation,
+                                             groups=groups,
+                                             bias=convtrans.bias is not None,
+                                             padding_mode=convtrans.padding_mode)
+    new_convtrans.to(convtrans.weight.device)
+    tmp_weight_data = None
+    if mask.input_mask is not None:
+        # in convtranspose2d we need to select the input channel first
+        tmp_weight_data = torch.index_select(
+            convtrans.weight.data, 0, in_channels_index)
+    else:
+        tmp_weight_data = convtrans.weight.data
+    # we need to handle the output channel group by group like the conv layer
+    out_step = int(convtrans.out_channels / convtrans.groups)
+    out_channel_group = int(out_channels/groups)
+    new_in_per_group = int(in_channels/groups)
+
+    if mask.output_mask is not None and not(in_channels == out_channels == groups):
+        for groupid in range(convtrans.groups):
+            start = groupid * out_step
+            end = (groupid + 1) * out_step
+            current_output_index = list(
+                filter(lambda x: start <= x and x < end, out_channels_index.tolist()))
+            # we need to shift the index into the group-wise
+            current_output_index = [x-start for x in current_output_index]
+            if not current_output_index:
+                # No kept channel in the current group
+                raise Exception(
+                    " Donnot support removing the whole group filter except in the depth-wise conv temporarily")
+            assert len(current_output_index) == out_channel_group, \
+                'Output channel of each group should be the same after pruning'
+            current_output_index = torch.tensor(current_output_index).to(tmp_weight_data.device) # pylint: disable=not-callable
+            new_start = groupid * new_in_per_group
+            new_end = (groupid + 1) * new_in_per_group
+            new_convtrans.weight.data[new_start:new_end] = torch.index_select(
+                tmp_weight_data[new_start:new_end], 1, current_output_index)
+    else:
+        new_convtrans.weight.data.copy_(tmp_weight_data)
+    if convtrans.bias is not None:
+        if mask.output_mask is not None:
+            new_convtrans.bias.data[:] = torch.index_select(
+                convtrans.bias.data, 0, out_channels_index)
+        else:
+            new_convtrans.bias.data.copy_(convtrans.bias.data)
+    return new_convtrans
diff --git a/nni/compression/pytorch/speedup/infer_shape.py b/nni/compression/pytorch/speedup/infer_shape.py
index 252ede28af..e3fbecf83c 100644
--- a/nni/compression/pytorch/speedup/infer_shape.py
+++ b/nni/compression/pytorch/speedup/infer_shape.py
@@ -13,6 +13,7 @@
 
 conv_prune_dim = -1
 
+
 def set_conv_prune_dim(dim):
     """
     Parameters:
@@ -23,6 +24,7 @@ def set_conv_prune_dim(dim):
     global conv_prune_dim
     conv_prune_dim = dim
 
+
 class CoarseMask:
     """
     Coarse grained mask for a given tensor, here tensor could be weights,
@@ -228,6 +230,7 @@ def __repr__(self):
 infer_from_mask = {
     'BatchNorm2d': lambda module_masks, mask: batchnorm2d_mask(module_masks, mask),
     'Conv2d': lambda module_masks, mask: conv2d_mask(module_masks, mask),
+    'ConvTranspose2d': lambda module_masks, mask: convtranspose2d_mask(module_masks, mask),
     'Linear': lambda module_masks, mask, shape: linear_mask(module_masks, mask, shape)
 }
 
@@ -246,6 +249,7 @@ def __repr__(self):
     'aten::relu_': lambda module_masks, mask: relu_inshape(module_masks, mask),
     'aten::sigmoid': lambda module_masks, mask: relu_inshape(module_masks, mask),
     'Conv2d': lambda module_masks, mask: conv2d_inshape(module_masks, mask),
+    'ConvTranspose2d': lambda module_masks, mask: convtranspose2d_inshape(module_masks, mask),
     'MaxPool2d': lambda module_masks, mask: maxpool2d_inshape(module_masks, mask),
     'aten::max_pool2d': lambda module_masks, mask: maxpool2d_inshape(module_masks, mask),
     'aten::avg_pool2d': lambda module_masks, mask: maxpool2d_inshape(module_masks, mask),
@@ -277,6 +281,7 @@ def __repr__(self):
 """
 infer_from_outshape = {
     'Conv2d': lambda module_masks, mask: conv2d_outshape(module_masks, mask),
+    'ConvTranspose2d': lambda module_masks, mask: convtranspose2d_outshape(module_masks, mask),
     'BatchNorm2d': lambda module_masks, mask: batchnorm2d_outshape(module_masks, mask),
 
     'MaxPool2d': lambda module_masks, mask: maxpool2d_outshape(module_masks, mask),
@@ -306,6 +311,7 @@ def __repr__(self):
     'aten::dropout': lambda module_masks, mask: dropout_outshape(module_masks, mask)
 }
 
+
 def dropout_inshape(module_masks, mask):
     if module_masks.input_mask is None:
         module_masks.set_input_mask(mask)
@@ -325,6 +331,7 @@ def dropout_inshape(module_masks, mask):
     module_masks.set_output_mask(mask)
     return module_masks.output_mask
 
+
 def dropout_outshape(module_masks, mask):
     if module_masks.output_mask is None:
         module_masks.set_output_mask(mask)
@@ -335,6 +342,7 @@ def dropout_outshape(module_masks, mask):
 
     return module_masks.output_mask
 
+
 def cat_inshape(module_masks, mask, cat_info, last_visited):
     """
     Inference the output mask of the cat operation from the
@@ -433,6 +441,7 @@ def add_inshape(module_masks, mask):
         raise Exception('Mask conflict happenes!')
     return None
 
+
 def add_outshape(module_masks, mask):
     """
     Inference the input mask of the add operation from the
@@ -445,9 +454,11 @@ def add_outshape(module_masks, mask):
         module_masks.set_input_mask(mask)
         return mask
     else:
-        assert all(module_masks.output_mask.mask_index[1] == mask.mask_index[1])
+        assert all(
+            module_masks.output_mask.mask_index[1] == mask.mask_index[1])
     return mask
 
+
 def batchnorm2d_inshape(module_masks, mask):
     """
     We assume only the second dimension has coarse grained mask
@@ -477,6 +488,7 @@ def batchnorm2d_inshape(module_masks, mask):
     module_masks.set_param_masks('bias', weight_cmask)
     return mask
 
+
 def batchnorm2d_outshape(module_masks, mask):
     """
     We assume only the second dimension has coarse grained mask
@@ -577,6 +589,7 @@ def view_inshape(module_masks, mask, shape):
     module_masks.set_output_mask(output_cmask)
     return output_cmask
 
+
 def view_outshape(module_masks, mask, shape):
     """
     Parameters
@@ -614,12 +627,14 @@ def view_outshape(module_masks, mask, shape):
 
     return input_cmask
 
+
 def size_inshape(module_masks, mask):
     """
     No need to do anything for this ```size``` op
     """
     return None
 
+
 def mean_inshape(module_masks, mask, shape):
     """
     Similar to view operation, currently mask inference only supports
@@ -642,6 +657,7 @@ def mean_inshape(module_masks, mask, shape):
     module_masks.set_output_mask(output_cmask)
     return output_cmask
 
+
 def mean_outshape(module_masks, mask, shape):
     """
     Similar to view operation, currently mask inference only supports
@@ -662,6 +678,7 @@ def mean_outshape(module_masks, mask, shape):
     module_masks.set_input_mask(input_cmask)
     return input_cmask
 
+
 def maxpool2d_inshape(module_masks, mask):
     """
     Assume only the second dimension is masked
@@ -690,6 +707,7 @@ def maxpool2d_inshape(module_masks, mask):
     module_masks.set_output_mask(mask)
     return mask
 
+
 def maxpool2d_outshape(module_masks, mask):
     """
     Assume only the second dimension is masked
@@ -714,6 +732,7 @@ def maxpool2d_outshape(module_masks, mask):
     module_masks.set_output_mask(mask)
     return mask
 
+
 def relu_inshape(module_masks, mask):
     """
     Parameters
@@ -737,6 +756,7 @@ def relu_inshape(module_masks, mask):
     module_masks.set_output_mask(mask)
     return mask
 
+
 def relu_outshape(module_masks, mask):
     """
     Parameters
@@ -754,11 +774,13 @@ def relu_outshape(module_masks, mask):
     assert isinstance(mask, CoarseMask)
     if module_masks.output_mask is not None:
         # mask conflict should be solved before speedup
-        assert all(module_masks.output_mask.mask_index[1] == mask.mask_index[1])
+        assert all(
+            module_masks.output_mask.mask_index[1] == mask.mask_index[1])
     module_masks.set_input_mask(mask)
     module_masks.set_output_mask(mask)
     return mask
 
+
 def batchnorm2d_mask(module_masks, mask):
     """
     Infer input and output shape from weight mask
@@ -792,6 +814,7 @@ def batchnorm2d_mask(module_masks, mask):
     module_masks.set_output_mask(output_cmask)
     return input_cmask, output_cmask
 
+
 def linear_mask(module_masks, mask, shape):
     """
     Infer input and output shape from weight mask with limitations:
@@ -825,6 +848,7 @@ def linear_mask(module_masks, mask, shape):
     module_masks.set_input_mask(input_cmask)
     return input_cmask, None
 
+
 def conv2d_mask(module_masks, mask):
     """
     Infer input and output shape from weight mask
@@ -863,8 +887,9 @@ def convert_to_coarse_mask(mask, dim=0):
         weight_mask = mask['weight']
 
         sum_idx = (1, 2, 3) if dim == 0 else (0, 2, 3)
-        index = torch.nonzero(weight_mask.abs().sum(sum_idx) != 0, as_tuple=True)[0]
-        if len(index) == weight_mask.shape[dim]: # full mask
+        index = torch.nonzero(weight_mask.abs().sum(
+            sum_idx) != 0, as_tuple=True)[0]
+        if len(index) == weight_mask.shape[dim]:  # full mask
             index = None
 
         if index is None:
@@ -882,7 +907,8 @@ def convert_to_coarse_mask(mask, dim=0):
                 bias_cmask.add_index_mask(dim=0, index=bias_index)
             return index, weight_cmask, bias_cmask
 
-    index, weight_cmask, bias_cmask = convert_to_coarse_mask(mask, dim=conv_prune_dim)
+    index, weight_cmask, bias_cmask = convert_to_coarse_mask(
+        mask, dim=conv_prune_dim)
 
     if index is None:
         # TODO: fine grained mask speedup
@@ -910,7 +936,8 @@ def convert_to_coarse_mask(mask, dim=0):
             module_masks.set_input_mask(io_cmask)
         else:
             assert module_masks.input_mask == io_cmask
-        return  module_masks.input_mask, None
+        return module_masks.input_mask, None
+
 
 def conv2d_inshape(module_masks, mask):
     """
@@ -972,7 +999,8 @@ def conv2d_outshape(module_masks, mask):
         # mask conflict should be solved by fix_mask_conflict before speedup
         # mask and module_masks.output_mask may have different number of dimensions
         # since they could be passed by linear or conv2d
-        assert all(module_masks.output_mask.mask_index[1] == mask.mask_index[1])
+        assert all(
+            module_masks.output_mask.mask_index[1] == mask.mask_index[1])
 
     weight_cmask = CoarseMask(num_dim=4)
     weight_cmask.add_index_mask(dim=0, index=mask.mask_index[1])
@@ -988,3 +1016,74 @@ def conv2d_outshape(module_masks, mask):
         module_masks.input_mask = mask
         return mask
     return None
+
+
+def convtranspose2d_mask(module_masks, mask):
+    # TODO support the Convtranspose2d Pruning for the L1FilterPruner
+    raise Exception(
+        "Current Filter pruner cannot prune the ConvTranspose2d, will support pruning ConvTranspose2d later")
+
+
+def convtranspose2d_inshape(module_masks, mask):
+    """
+    Shape change of input tensor does not affect the shape of its output tensor
+    Parameters
+    ----------
+    module_masks : ModuleMasks
+        The ModuleMasks instance of the conv2d
+    mask : CoarseMask
+        The mask of its input tensor
+    Returns
+    -------
+    CoarseMask
+        The mask of its output tensor
+    """
+    assert isinstance(mask, CoarseMask)
+    if module_masks.input_mask is None:
+        module_masks.set_input_mask(mask)
+    else:
+        # the same conv layer may be accessed more
+        # than once, such as a concat operation.
+        # mask conflict should be solved by fix_mask_conflict before speedup
+        assert module_masks.input_mask == mask
+
+    # shape changes pass through depths wise conv layers
+    m = module_masks.module
+    if m.in_channels == m.out_channels == m.groups:
+        module_masks.output_mask = mask
+        module_masks.input_mask = mask
+        return mask
+    return None
+
+
+def convtranspose2d_outshape(module_masks, mask):
+    assert isinstance(mask, CoarseMask)
+    assert mask.mask_index[1] is not None
+    assert mask.mask_index[0] is None
+    assert mask.mask_index[2] is None
+    assert mask.mask_index[3] is None
+
+    if module_masks.output_mask is None:
+        module_masks.output_mask = mask
+    else:
+        # mask conflict should be solved by fix_mask_conflict before speedup
+        # mask and module_masks.output_mask may have different number of dimensions
+        # since they could be passed by linear or conv2d
+        assert all(
+            module_masks.output_mask.mask_index[1] == mask.mask_index[1])
+
+    weight_cmask = CoarseMask(num_dim=4)
+    # Note the memory layout of Convtranspose2d is C_in, C_out, k1, k2
+    weight_cmask.add_index_mask(dim=1, index=mask.mask_index[1])
+    bias_cmask = CoarseMask(num_dim=1)
+    bias_cmask.add_index_mask(dim=0, index=mask.mask_index[1])
+    module_masks.set_param_masks('weight', weight_cmask)
+    module_masks.set_param_masks('bias', bias_cmask)
+
+    # shape changes pass through depths wise conv layers
+    m = module_masks.module
+    if m.in_channels == m.out_channels == m.groups:
+        module_masks.output_mask = mask
+        module_masks.input_mask = mask
+        return mask
+    return None
diff --git a/nni/compression/pytorch/utils/mask_conflict.py b/nni/compression/pytorch/utils/mask_conflict.py
index 1f8d099d26..180cbf1caf 100644
--- a/nni/compression/pytorch/utils/mask_conflict.py
+++ b/nni/compression/pytorch/utils/mask_conflict.py
@@ -9,6 +9,7 @@
 # logging.basicConfig(level = logging.DEBUG)
 _logger = logging.getLogger(__name__)
 
+
 def fix_mask_conflict(masks, model=None, dummy_input=None, traced=None):
     """
     MaskConflict fix the mask conflict for the channel dependencies
@@ -50,6 +51,7 @@ def fix_mask_conflict(masks, model=None, dummy_input=None, traced=None):
     masks = padding_cat_mask.fix_mask()
     return masks, fix_channel_mask.conv_prune_dim
 
+
 class MaskFix:
     def __init__(self, masks, model=None, dummy_input=None, traced=None):
         # check if the parameters are valid
@@ -74,6 +76,7 @@ def export(self, path):
         """
         torch.save(self.masks, path)
 
+
 class CatMaskPadding(MaskFix):
     def __init__(self, masks, model, dummy_input=None, traced=None):
         """
@@ -100,7 +103,8 @@ def __init__(self, masks, model, dummy_input=None, traced=None):
         super(CatMaskPadding, self).__init__(masks, model, dummy_input, traced)
 
     def fix_mask(self):
-        cat_padding_depen = CatPaddingDependency(self.model, self.dummy_input, self.traced)
+        cat_padding_depen = CatPaddingDependency(
+            self.model, self.dummy_input, self.traced)
         name_to_module = {}
         for name, module in self.model.named_modules():
             name_to_module[name] = module
@@ -131,11 +135,10 @@ def fix_mask(self):
                     # module.bias may be None
                     b_shape = module.bias.data.size()
                     b_mask = torch.ones(b_shape).to(device)
-                self.masks[layer] = {'weight':w_mask, 'bias':b_mask}
+                self.masks[layer] = {'weight': w_mask, 'bias': b_mask}
         return self.masks
 
 
-
 class GroupMaskConflict(MaskFix):
     def __init__(self, masks, model=None, dummy_input=None, traced=None):
         """
@@ -154,8 +157,8 @@ def __init__(self, masks, model=None, dummy_input=None, traced=None):
             the traced model of the target model, is this parameter is not None,
             we donnot use the model and dummpy_input to get the trace graph.
         """
-        super(GroupMaskConflict, self).__init__(masks, model, dummy_input, traced)
-
+        super(GroupMaskConflict, self).__init__(
+            masks, model, dummy_input, traced)
 
     def fix_mask(self):
         """
@@ -163,7 +166,8 @@ def fix_mask(self):
         has group dependencies. This function should be called before the
         mask inference of the 'speedup' module.
         """
-        group_depen = GroupDependency(self.model, self.dummy_input, self.traced)
+        group_depen = GroupDependency(
+            self.model, self.dummy_input, self.traced)
         depens = group_depen.dependency
         _logger.info(depens)
         for layername in depens:
@@ -174,8 +178,10 @@ def fix_mask(self):
             w_mask = self.masks[layername]['weight']
             shape = w_mask.size()
             count = np.prod(shape[1:])
-            all_ones = (w_mask.flatten(1).sum(-1) == count).nonzero().squeeze(1).tolist()
-            all_zeros = (w_mask.flatten(1).sum(-1) == 0).nonzero().squeeze(1).tolist()
+            all_ones = (w_mask.flatten(1).sum(-1) ==
+                        count).nonzero().squeeze(1).tolist()
+            all_zeros = (w_mask.flatten(1).sum(-1) ==
+                         0).nonzero().squeeze(1).tolist()
             if len(all_ones) + len(all_zeros) < w_mask.size(0):
                 # In fine-grained pruning, skip this layer
                 _logger.info('Layers %s using fine-grained pruning', layername)
@@ -190,7 +196,8 @@ def fix_mask(self):
             for i in range(group):
                 _start = step * i
                 _end = step * (i+1)
-                _tmp_list = list(filter(lambda x: _start <= x and x < _end, all_zeros))
+                _tmp_list = list(
+                    filter(lambda x: _start <= x and x < _end, all_zeros))
                 group_masked.append(_tmp_list)
             mini_masked = min([len(x) for x in group_masked])
             for gm in group_masked:
@@ -198,13 +205,13 @@ def fix_mask(self):
                     # To keep the output channel number still being divisible to
                     # groups, we set the masks of following filters to be zero.
                     pos = gm[i]
-                    self.masks[layername]['weight'][pos] = torch.ones(shape[1:])
-                    if hasattr(self.masks[layername], 'bias'):
+                    self.masks[layername]['weight'][pos] = torch.ones(
+                        shape[1:])
+                    if 'bias' in self.masks[layername] and self.masks[layername]['bias'] is not None:
                         self.masks[layername]['bias'][pos] = 1
         return self.masks
 
 
-
 class ChannelMaskConflict(MaskFix):
     def __init__(self, masks, model=None, dummy_input=None, traced=None):
         """
@@ -223,7 +230,8 @@ def __init__(self, masks, model=None, dummy_input=None, traced=None):
             the traced graph of the target model, is this parameter is not None,
             we donnot use the model and dummpy_input to get the trace graph.
         """
-        super(ChannelMaskConflict, self).__init__(masks, model, dummy_input, traced)
+        super(ChannelMaskConflict, self).__init__(
+            masks, model, dummy_input, traced)
         self.conv_prune_dim = detect_mask_prune_dim(masks, model)
         _logger.info('detected conv prune dim: %s', self.conv_prune_dim)
 
@@ -235,9 +243,11 @@ def fix_mask(self):
         are supported.
         """
         if self.conv_prune_dim == 0:
-            channel_depen = ChannelDependency(self.model, self.dummy_input, self.traced)
+            channel_depen = ChannelDependency(
+                self.model, self.dummy_input, self.traced)
         else:
-            channel_depen = InputChannelDependency(self.model, self.dummy_input, self.traced)
+            channel_depen = InputChannelDependency(
+                self.model, self.dummy_input, self.traced)
         depen_sets = channel_depen.dependency_sets
         sum_idx = (1, 2, 3) if self.conv_prune_dim == 0 else (0, 2, 3)
         for dset in depen_sets:
@@ -262,17 +272,29 @@ def fix_mask(self):
                         channel_masks.append((mask.abs().sum(0) != 0).int())
                     elif type(m).__name__ == 'BatchNorm2d':
                         channel_masks.append(mask.int())
+                    elif type(m).__name__ == 'ConvTranspose2d':
+                        # convtranspose have difference memory layout, so that we need create
+                        # a tmp_sum_idx for conv_transpose
+                        tmp_sum_idx = (
+                            0, 2, 3) if self.conv_prune_dim == 0 else (1, 2, 3)
+                        channel_mask = (mask.abs().sum(tmp_sum_idx) != 0).int()
+                        channel_masks.append(channel_mask)
+                        if (channel_mask.sum() * (mask.numel() / mask.shape[1-self.conv_prune_dim])).item() != (mask > 0).sum().item():
+                            fine_grained = True
                     else:
-                        raise RuntimeError(f'unsupported module type: {type(m).__name__}')
+                        raise RuntimeError(
+                            f'unsupported module type: {type(m).__name__}')
                 else:
                     # no mask means not pruned, equivlent to full masks
                     channel_masks.append(None)
             if fine_grained:
-                _logger.info('fine-grained mask detected, skip solving conflict for this set: %s', dset)
+                _logger.info(
+                    'fine-grained mask detected, skip solving conflict for this set: %s', dset)
                 continue
             if all(x is None for x in channel_masks):
                 continue
-            num_channels_list = [len(x) for x in channel_masks if x is not None]
+            num_channels_list = [len(x)
+                                 for x in channel_masks if x is not None]
             # number of channels in same set should be identical
             assert len(set(num_channels_list)) == 1
             num_channels = num_channels_list[0]
@@ -284,7 +306,8 @@ def fix_mask(self):
             # merge masks with 'or'
             merged_channel_mask = channel_masks[0].clone()
             for i in range(1, len(channel_masks)):
-                merged_channel_mask = ((merged_channel_mask + channel_masks[i]) != 0).int()
+                merged_channel_mask = (
+                    (merged_channel_mask + channel_masks[i]) != 0).int()
 
             merged_index = torch.nonzero(merged_channel_mask, as_tuple=True)[0]
 
@@ -305,16 +328,19 @@ def fix_mask(self):
                 elif type(m).__name__ == 'BatchNorm2d':
                     new_mask = merged_index.type_as(orig_mask)
                 else:
-                    raise RuntimeError(f'unsupported module type: {type(m).__name__}')
+                    raise RuntimeError(
+                        f'unsupported module type: {type(m).__name__}')
 
                 self.masks[name]['weight'] = new_mask
                 if 'bias' in self.masks[name] and self.masks[name]['bias'] is not None:
                     if type(m).__name__ == 'Conv2d':
                         assert self.conv_prune_dim == 0
-                    self.masks[name]['bias'] = merged_channel_mask.type_as(self.masks[name]['bias'])
+                    self.masks[name]['bias'] = merged_channel_mask.type_as(
+                        self.masks[name]['bias'])
 
         return self.masks
 
+
 def detect_mask_prune_dim(masks, model):
     """
     Detect how the masks of convolutional layers are pruned.
@@ -358,7 +384,8 @@ def detect_mask_prune_dim(masks, model):
         _logger.warning('no multi-dimension masks found.')
         return 0
 
-    dim0_sparsity, dim1_sparsity = 1. - dim0_preserved / dim0_num, 1. - dim1_preserved / dim1_num
+    dim0_sparsity, dim1_sparsity = 1. - dim0_preserved / \
+        dim0_num, 1. - dim1_preserved / dim1_num
     _logger.info('dim0 sparsity: %f', dim0_sparsity)
     _logger.info('dim1 sparsity: %f', dim1_sparsity)
 
diff --git a/nni/compression/pytorch/utils/shape_dependency.py b/nni/compression/pytorch/utils/shape_dependency.py
index 3c2b63cd0b..6c7491897b 100644
--- a/nni/compression/pytorch/utils/shape_dependency.py
+++ b/nni/compression/pytorch/utils/shape_dependency.py
@@ -4,13 +4,16 @@
 import csv
 import logging
 
-__all__ = ['ChannelDependency', 'GroupDependency', 'CatPaddingDependency', 'InputChannelDependency']
+__all__ = ['ChannelDependency', 'GroupDependency',
+           'CatPaddingDependency', 'InputChannelDependency']
 
 CONV_TYPE = 'aten::_convolution'
 ADD_TYPES = ['aten::add', 'aten::add_']
 CAT_TYPE = 'aten::cat'
 logger = logging.getLogger('Shape_Dependency')
-RESHAPE_OPS = [CAT_TYPE, 'aten::view', 'aten::reshape', 'aten::flatten', 'aten::mean']
+RESHAPE_OPS = [CAT_TYPE, 'aten::view',
+               'aten::reshape', 'aten::flatten', 'aten::mean']
+
 
 class Dependency:
     def __init__(self, model=None, dummy_input=None, traced_model=None):
@@ -34,6 +37,7 @@ def build_dependency(self):
     def export(self, filepath):
         raise NotImplementedError
 
+
 class ChannelDependency(Dependency):
     def __init__(self, model=None, dummy_input=None, traced_model=None):
         """
@@ -50,7 +54,8 @@ def __init__(self, model=None, dummy_input=None, traced_model=None):
             if we alreay has the traced graph of the target model, we donnot
             need to trace the model again.
         """
-        super(ChannelDependency, self).__init__(model, dummy_input, traced_model)
+        super(ChannelDependency, self).__init__(
+            model, dummy_input, traced_model)
 
     def _get_parent_layers(self, node):
         """
@@ -71,7 +76,7 @@ def _get_parent_layers(self, node):
         queue.append(node)
         while queue:
             curnode = queue.pop(0)
-            if curnode.op_type == 'Conv2d' or curnode.op_type == 'Linear':
+            if curnode.op_type == 'Conv2d' or curnode.op_type == 'Linear' or curnode.op_type == 'ConvTranspose2d':
                 # find the first met conv
                 parent_layers.append(curnode.name)
                 continue
@@ -119,7 +124,6 @@ def build_dependency(self):
             for _node in dependency_set:
                 self.dependency[_node] = dependency_set
 
-
     def export(self, filepath):
         """
         export the channel dependencies as a csv file.
@@ -185,6 +189,7 @@ def dependency_sets(self):
             d_sets.append(tmp_set)
         return d_sets
 
+
 def reshape_break_channel_dependency(op_node):
     """
     The reshape operations such as (reshape, view, flatten) may break
@@ -213,6 +218,7 @@ def reshape_break_channel_dependency(op_node):
     out_channel = out_shape[1]
     return in_channel != out_channel
 
+
 class InputChannelDependency(ChannelDependency):
     """
     Some pruners may prune the input channel of the convolutional
@@ -242,7 +248,8 @@ def __init__(self, model, dummy_input=None, traced_model=None):
             if we alreay has the traced graph of the target model, we donnot
             need to trace the model again.
         """
-        super(InputChannelDependency, self).__init__(model, dummy_input, traced_model)
+        super(InputChannelDependency, self).__init__(
+            model, dummy_input, traced_model)
 
     def _get_following_convs(self, tensor):
         queue = []
@@ -250,14 +257,14 @@ def _get_following_convs(self, tensor):
         queue.extend(self.graph.input_to_node[tensor])
         while queue:
             curnode = queue.pop(0)
-            if curnode.op_type == 'Conv2d' or curnode.op_type == 'Linear':
+            if curnode.op_type == 'Conv2d' or curnode.op_type == 'Linear' or curnode.op_type == 'ConvTranspose2d':
                 # find the first met conv
                 key_layers.append(curnode.name)
                 continue
             elif curnode.op_type in RESHAPE_OPS:
                 # check if the reshape operation will break the channel dependency
                 if reshape_break_channel_dependency(curnode):
-                # reshape operations also breaks the dependency relationship
+                    # reshape operations also breaks the dependency relationship
                     continue
             successors = self.graph.find_successors(curnode.unique_name)
             successors = [self.graph.name_to_node[name] for name in successors]
@@ -290,7 +297,8 @@ def build_dependency(self):
 
 class CatPaddingDependency(ChannelDependency):
     def __init__(self, model=None, dummy_input=None, traced_model=None):
-        super(CatPaddingDependency, self).__init__(model, dummy_input, traced_model)
+        super(CatPaddingDependency, self).__init__(
+            model, dummy_input, traced_model)
 
     def build_dependency(self):
         """
@@ -347,6 +355,7 @@ def export(self, filepath):
                 row.extend(list(layers))
                 csv_w.writerow(row)
 
+
 class GroupDependency(Dependency):
     def __init__(self, model=None, dummy_input=None, traced_model=None):
         """
@@ -388,7 +397,7 @@ def _get_parent_convs(self, node):
         queue = predeessors
         while queue:
             curnode = queue.pop(0)
-            if curnode.op_type == 'Conv2d':
+            if curnode.op_type == 'Conv2d' or curnode.op_type == 'ConvTranspose2d':
                 # find the first met conv
                 parent_layers.append(curnode.name)
                 continue
@@ -412,7 +421,8 @@ def _get_conv_groups(self, node_group):
         group : int
             the number of the groups of the target conv layer.
         """
-        cpp_conv = list(filter(lambda x: x.kind() == CONV_TYPE, node_group.node_cpps))
+        cpp_conv = list(filter(lambda x: x.kind() ==
+                               CONV_TYPE, node_group.node_cpps))
         assert len(cpp_conv) == 1
         cpp_conv = cpp_conv[0]
         inputs = list(cpp_conv.inputs())
@@ -442,12 +452,14 @@ def build_dependency(self):
             filters should be divisible to.
         """
         for node in self.graph.nodes_py.nodes_op:
-            if node.op_type == 'Conv2d':
+            if node.op_type == 'Conv2d' or node.op_type == 'ConvTranspose2d':
                 group = self._get_conv_groups(node)
+
                 if node.name in self.dependency:
                     # the conv layer whose group is larger than 1 will require that
                     # it's number of output channel to be divisible by the number of group.
-                    self.dependency[node.name] = max(self.dependency[node.name], group)
+                    self.dependency[node.name] = max(
+                        self.dependency[node.name], group)
                 else:
                     self.dependency[node.name] = group
                 if group > 1:
@@ -456,7 +468,8 @@ def build_dependency(self):
                     parent_convs = self._get_parent_convs(node)
                     for parent in parent_convs:
                         if parent in self.dependency:
-                            self.dependency[parent] = max(self.dependency[parent], group)
+                            self.dependency[parent] = max(
+                                self.dependency[parent], group)
                         else:
                             self.dependency[parent] = group
         return self.dependency
@@ -484,6 +497,7 @@ def export(self, filepath):
             for name in self.dependency:
                 group = self.dependency[name]
                 csv_w.writerow([name, group])
+
     @property
     def dependency_sets(self):
         return self.dependency
diff --git a/nni/experiment/__init__.py b/nni/experiment/__init__.py
index 20b244fe62..0311372337 100644
--- a/nni/experiment/__init__.py
+++ b/nni/experiment/__init__.py
@@ -1,4 +1,7 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
 
+from .config import *
+from .experiment import Experiment
+
 from .nni_client import *
diff --git a/nni/experiment/config/__init__.py b/nni/experiment/config/__init__.py
new file mode 100644
index 0000000000..d9a32d2377
--- /dev/null
+++ b/nni/experiment/config/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+from .common import *
+from .local import *
diff --git a/nni/experiment/config/base.py b/nni/experiment/config/base.py
new file mode 100644
index 0000000000..fdfa17e6e3
--- /dev/null
+++ b/nni/experiment/config/base.py
@@ -0,0 +1,153 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import copy
+import dataclasses
+from pathlib import Path
+from typing import Any, Dict, Optional, Type, TypeVar
+
+from ruamel import yaml
+
+from . import util
+
+__all__ = ['ConfigBase', 'PathLike']
+
+T = TypeVar('T', bound='ConfigBase')
+
+PathLike = util.PathLike
+
+def _is_missing(obj: Any) -> bool:
+    return isinstance(obj, type(dataclasses.MISSING))
+
+class ConfigBase:
+    """
+    Base class of config classes.
+    Subclass may override `_canonical_rules` and `_validation_rules`,
+    and `validate()` if the logic is complex.
+    """
+
+    # Rules to convert field value to canonical format.
+    # The key is field name.
+    # The value is callable `value -> canonical_value`
+    # It is not type-hinted so dataclass won't treat it as field
+    _canonical_rules = {}  # type: ignore
+
+    # Rules to validate field value.
+    # The key is field name.
+    # The value is callable `value -> valid` or `value -> (valid, error_message)`
+    # The rule will be called with canonical format and is only called when `value` is not None.
+    # `error_message` is used when `valid` is False.
+    # It will be prepended with class name and field name in exception message.
+    _validation_rules = {}  # type: ignore
+
+    def __init__(self, *, _base_path: Optional[Path] = None, **kwargs):
+        """
+        Initialize a config object and set some fields.
+        Name of keyword arguments can either be snake_case or camelCase.
+        They will be converted to snake_case automatically.
+        If a field is missing and don't have default value, it will be set to `dataclasses.MISSING`.
+        """
+        kwargs = {util.case_insensitive(key): value for key, value in kwargs.items()}
+        if _base_path is None:
+            _base_path = Path()
+        for field in dataclasses.fields(self):
+            value = kwargs.pop(util.case_insensitive(field.name), field.default)
+            if value is not None and not _is_missing(value):
+                # relative paths loaded from config file are not relative to pwd
+                if 'Path' in str(field.type):
+                    value = Path(value).expanduser()
+                    if not value.is_absolute():
+                        value = _base_path / value
+                # convert nested dict to config type
+                if isinstance(value, dict):
+                    cls = util.strip_optional(field.type)
+                    if isinstance(cls, type) and issubclass(cls, ConfigBase):
+                        value = cls(**value, _base_path=_base_path)
+            setattr(self, field.name, value)
+        if kwargs:
+            cls = type(self).__name__
+            fields = ', '.join(kwargs.keys())
+            raise ValueError(f'{cls}: Unrecognized fields {fields}')
+
+    @classmethod
+    def load(cls: Type[T], path: PathLike) -> T:
+        """
+        Load config from YAML (or JSON) file.
+        Keys in YAML file can either be camelCase or snake_case.
+        """
+        data = yaml.safe_load(open(path))
+        if not isinstance(data, dict):
+            raise ValueError(f'Content of config file {path} is not a dict/object')
+        return cls(**data, _base_path=Path(path).parent)
+
+    def json(self) -> Dict[str, Any]:
+        """
+        Convert config to JSON object.
+        The keys of returned object will be camelCase.
+        """
+        return dataclasses.asdict(
+            self.canonical(),
+            dict_factory = lambda items: dict((util.camel_case(k), v) for k, v in items if v is not None)
+        )
+
+    def canonical(self: T) -> T:
+        """
+        Returns a deep copy, where the fields supporting multiple formats are converted to the canonical format.
+        Noticeably, relative path may be converted to absolute path.
+        """
+        ret = copy.deepcopy(self)
+        for field in dataclasses.fields(ret):
+            key, value = field.name, getattr(ret, field.name)
+            rule = ret._canonical_rules.get(key)
+            if rule is not None:
+                setattr(ret, key, rule(value))
+            elif isinstance(value, ConfigBase):
+                setattr(ret, key, value.canonical())
+                # value will be copied twice, should not be a performance issue anyway
+        return ret
+
+    def validate(self) -> None:
+        """
+        Validate the config object and raise Exception if it's ill-formed.
+        """
+        class_name = type(self).__name__
+        config = self.canonical()
+
+        for field in dataclasses.fields(config):
+            key, value = field.name, getattr(config, field.name)
+
+            # check existence
+            if _is_missing(value):
+                raise ValueError(f'{class_name}: {key} is not set')
+
+            # check type (TODO)
+            type_name = str(field.type).replace('typing.', '')
+            optional = any([
+                type_name.startswith('Optional['),
+                type_name.startswith('Union[') and 'NoneType' in type_name,
+                type_name == 'Any'
+            ])
+            if value is None:
+                if optional:
+                    continue
+                else:
+                    raise ValueError(f'{class_name}: {key} cannot be None')
+
+            # check value
+            rule = config._validation_rules.get(key)
+            if rule is not None:
+                try:
+                    result = rule(value)
+                except Exception:
+                    raise ValueError(f'{class_name}: {key} has bad value {repr(value)}')
+
+                if isinstance(result, bool):
+                    if not result:
+                        raise ValueError(f'{class_name}: {key} ({repr(value)}) is out of range')
+                else:
+                    if not result[0]:
+                        raise ValueError(f'{class_name}: {key} {result[1]}')
+
+            # check nested config
+            if isinstance(value, ConfigBase):
+                value.validate()
diff --git a/nni/experiment/config/common.py b/nni/experiment/config/common.py
new file mode 100644
index 0000000000..e2e4013a14
--- /dev/null
+++ b/nni/experiment/config/common.py
@@ -0,0 +1,145 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+
+from .base import ConfigBase, PathLike
+from . import util
+
+__all__ = [
+    'ExperimentConfig',
+    'AlgorithmConfig',
+    'CustomAlgorithmConfig',
+    'TrainingServiceConfig',
+]
+
+
+@dataclass(init=False)
+class _AlgorithmConfig(ConfigBase):
+    name: Optional[str] = None
+    class_name: Optional[str] = None
+    code_directory: Optional[PathLike] = None
+    class_args: Optional[Dict[str, Any]] = None
+
+    def validate(self):
+        super().validate()
+        _validate_algo(self)
+
+
+@dataclass(init=False)
+class AlgorithmConfig(_AlgorithmConfig):
+    name: str
+    class_args: Optional[Dict[str, Any]] = None
+
+
+@dataclass(init=False)
+class CustomAlgorithmConfig(_AlgorithmConfig):
+    class_name: str
+    class_directory: Optional[PathLike] = None
+    class_args: Optional[Dict[str, Any]] = None
+
+
+class TrainingServiceConfig(ConfigBase):
+    platform: str
+
+
+@dataclass(init=False)
+class ExperimentConfig(ConfigBase):
+    experiment_name: Optional[str] = None
+    search_space_file: Optional[PathLike] = None
+    search_space: Any = None
+    trial_command: str
+    trial_code_directory: PathLike = '.'
+    trial_concurrency: int
+    trial_gpu_number: int = 0
+    max_experiment_duration: Optional[str] = None
+    max_trial_number: Optional[int] = None
+    nni_manager_ip: Optional[str] = None
+    use_annotation: bool = False
+    debug: bool = False
+    log_level: Optional[str] = None
+    experiment_working_directory: Optional[PathLike] = None
+    tuner_gpu_indices: Optional[Union[List[int], str]] = None
+    tuner: Optional[_AlgorithmConfig] = None
+    accessor: Optional[_AlgorithmConfig] = None
+    advisor: Optional[_AlgorithmConfig] = None
+    training_service: TrainingServiceConfig
+
+    def __init__(self, training_service_platform: Optional[str] = None, **kwargs):
+        super().__init__(**kwargs)
+        if training_service_platform is not None:
+            assert 'training_service' not in kwargs
+            self.training_service = util.training_service_config_factory(training_service_platform)
+
+    def validate(self, initialized_tuner: bool = False) -> None:
+        super().validate()
+        if initialized_tuner:
+            _validate_for_exp(self)
+        else:
+            _validate_for_nnictl(self)
+
+## End of public API ##
+
+    @property
+    def _canonical_rules(self):
+        return _canonical_rules
+
+    @property
+    def _validation_rules(self):
+        return _validation_rules
+
+
+_canonical_rules = {
+    'search_space_file': util.canonical_path,
+    'trial_code_directory': util.canonical_path,
+    'max_experiment_duration': lambda value: f'{util.parse_time(value)}s' if value is not None else None,
+    'experiment_working_directory': util.canonical_path,
+    'tuner_gpu_indices': lambda value: [int(idx) for idx in value.split(',')] if isinstance(value, str) else value
+}
+
+_validation_rules = {
+    'search_space_file': lambda value: (Path(value).is_file(), f'"{value}" does not exist or is not regular file'),
+    'trial_code_directory': lambda value: (Path(value).is_dir(), f'"{value}" does not exist or is not directory'),
+    'trial_concurrency': lambda value: value > 0,
+    'trial_gpu_number': lambda value: value >= 0,
+    'max_experiment_duration': lambda value: util.parse_time(value) > 0,
+    'max_trial_number': lambda value: value > 0,
+    'log_level': lambda value: value in ["trace", "debug", "info", "warning", "error", "fatal"],
+    'tuner_gpu_indices': lambda value: all(i >= 0 for i in value) and len(value) == len(set(value)),
+    'training_service': lambda value: (type(value) is not TrainingServiceConfig, 'cannot be abstract base class')
+}
+
+def _validate_for_exp(config: ExperimentConfig) -> None:
+    # validate experiment for nni.Experiment, where tuner is already initialized outside
+    if config.use_annotation:
+        raise ValueError('ExperimentConfig: annotation is not supported in this mode')
+    if util.count(config.search_space, config.search_space_file) != 1:
+        raise ValueError('ExperimentConfig: search_space and search_space_file must be set one')
+    if util.count(config.tuner, config.accessor, config.advisor) != 0:
+        raise ValueError('ExperimentConfig: tuner, accessor, and advisor must not be set in for this mode')
+    if config.tuner_gpu_indices is not None:
+        raise ValueError('ExperimentConfig: tuner_gpu_indices is not supported in this mode')
+
+def _validate_for_nnictl(config: ExperimentConfig) -> None:
+    # validate experiment for normal launching approach
+    if config.use_annotation:
+        if util.count(config.search_space, config.search_space_file) != 0:
+            raise ValueError('ExperimentConfig: search_space and search_space_file must not be set with annotationn')
+    else:
+        if util.count(config.search_space, config.search_space_file) != 1:
+            raise ValueError('ExperimentConfig: search_space and search_space_file must be set one')
+    if util.count(config.tuner, config.advisor) != 1:
+        raise ValueError('ExperimentConfig: tuner and advisor must be set one')
+
+def _validate_algo(algo: AlgorithmConfig) -> None:
+    if algo.name is None:
+        if algo.class_name is None:
+            raise ValueError('Missing algorithm name')
+        if algo.code_directory is not None and not Path(algo.code_directory).is_dir():
+            raise ValueError(f'code_directory "{algo.code_directory}" does not exist or is not directory')
+    else:
+        if algo.class_name is not None or algo.code_directory is not None:
+            raise ValueError(f'When name is set for registered algorithm, class_name and code_directory cannot be used')
+    # TODO: verify algorithm installation and class args
diff --git a/nni/experiment/config/convert.py b/nni/experiment/config/convert.py
new file mode 100644
index 0000000000..e3efef2f7b
--- /dev/null
+++ b/nni/experiment/config/convert.py
@@ -0,0 +1,228 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import json
+import logging
+from pathlib import Path
+from tempfile import NamedTemporaryFile
+from typing import Any, Dict, List
+
+from .common import ExperimentConfig
+from . import util
+
+_logger = logging.getLogger(__name__)
+
+
+def to_v1_yaml(config: ExperimentConfig, skip_nnictl: bool = False) -> Dict[str, Any]:
+    config.validate(skip_nnictl)
+    data = config.json()
+
+    ts = data.pop('trainingService')
+    if ts['platform'] == 'openpai':
+        ts['platform'] = 'pai'
+
+    data['authorName'] = 'N/A'
+    data['experimentName'] = data.get('experimentName', 'N/A')
+    data['maxExecDuration'] = data.pop('maxExperimentDuration', '999d')
+    if data['debug']:
+        data['versionCheck'] = False
+    data['maxTrialNum'] = data.pop('maxTrialNumber', 99999)
+    data['trainingServicePlatform'] = ts['platform']
+    ss = data.pop('searchSpace', None)
+    ss_file = data.pop('searchSpaceFile', None)
+    if ss is not None:
+        ss_file = NamedTemporaryFile('w', delete=False)
+        json.dump(ss, ss_file, indent=4)
+        data['searchSpacePath'] = ss_file.name
+    elif ss_file is not None:
+        data['searchSpacePath'] = ss_file
+    if 'experimentWorkingDirectory' in data:
+        data['logDir'] = data.pop('experimentWorkingDirectory')
+
+    for algo_type in ['tuner', 'assessor', 'advisor']:
+        algo = data.get(algo_type)
+        if algo is None:
+            continue
+        if algo['name'] is not None:  # builtin
+            algo['builtin' + algo_type.title() + 'Name'] = algo.pop('name')
+            algo.pop('className', None)
+            algo.pop('codeDirectory', None)
+        else:
+            algo.pop('name', None)
+            class_name_parts = algo.pop('className').split('.')
+            algo['codeDir'] = algo.pop('codeDirectory', '') + '/'.join(class_name_parts[:-2])
+            algo['classFileName'] = class_name_parts[-2] + '.py'
+            algo['className'] = class_name_parts[-1]
+
+    tuner_gpu_indices = _convert_gpu_indices(data.pop('tunerGpuIndices', None))
+    if tuner_gpu_indices is not None:
+        data['tuner']['gpuIndicies'] = tuner_gpu_indices
+
+    data['trial'] = {
+        'command': data.pop('trialCommand'),
+        'codeDir': data.pop('trialCodeDirectory'),
+        'gpuNum': data.pop('trialGpuNumber', '')
+    }
+
+    if ts['platform'] == 'local':
+        data['localConfig'] = {
+            'useActiveGpu': ts['useActiveGpu'],
+            'maxTrialNumPerGpu': ts['maxTrialNumberPerGpu']
+        }
+        if ts.get('gpuIndices') is not None:
+            data['localConfig']['gpuIndices'] = ','.join(str(idx) for idx in ts['gpuIndices'])
+
+    elif ts['platform'] == 'remote':
+        data['remoteConfig'] = {'reuse': ts['reuseMode']}
+        data['machineList'] = []
+        for machine in ts['machineList']:
+            machine = {
+                'ip': machine['host'],
+                'username': machine['user'],
+                'passwd': machine['password'],
+                'sshKeyPath': machine['sshKeyFile'],
+                'passphrase': machine['sshPassphrase'],
+                'gpuIndices': _convert_gpu_indices(machine['gpuIndices']),
+                'maxTrialNumPerGpu': machine['maxTrialNumPerGpu'],
+                'useActiveGpu': machine['useActiveGpu'],
+                'preCommand': machine['trialPrepareCommand']
+            }
+
+    elif ts['platform'] == 'pai':
+        data['trial']['cpuNum'] = ts['trialCpuNumber']
+        data['trial']['memoryMB'] = util.parse_size(ts['trialMemorySize'])
+        data['trial']['image'] = ts['docker_image']
+        data['paiConfig'] = {
+            'userName': ts['username'],
+            'token': ts['token'],
+            'host': 'https://' + ts['host'],
+            'reuse': ts['reuseMode']
+        }
+
+    return data
+
+def _convert_gpu_indices(indices):
+    return ','.join(str(idx) for idx in indices) if indices is not None else None
+
+
+def to_cluster_metadata(config: ExperimentConfig) -> List[Dict[str, Any]]:
+    experiment_config = to_v1_yaml(config, skip_nnictl=True)
+    ret = []
+
+    if config.training_service.platform == 'local':
+        request_data = dict()
+        request_data['local_config'] = experiment_config['localConfig']
+        if request_data['local_config']:
+            if request_data['local_config'].get('gpuIndices') and isinstance(request_data['local_config'].get('gpuIndices'), int):
+                request_data['local_config']['gpuIndices'] = str(request_data['local_config'].get('gpuIndices'))
+            if request_data['local_config'].get('maxTrialNumOnEachGpu'):
+                request_data['local_config']['maxTrialNumOnEachGpu'] = request_data['local_config'].get('maxTrialNumOnEachGpu')
+            if request_data['local_config'].get('useActiveGpu'):
+                request_data['local_config']['useActiveGpu'] = request_data['local_config'].get('useActiveGpu')
+        ret.append(request_data)
+
+    elif config.training_service.platform == 'remote':
+        request_data = dict()
+        if experiment_config.get('remoteConfig'):
+            request_data['remote_config'] = experiment_config['remoteConfig']
+        else:
+            request_data['remote_config'] = {'reuse': False}
+        request_data['machine_list'] = experiment_config['machineList']
+        if request_data['machine_list']:
+            for i in range(len(request_data['machine_list'])):
+                if isinstance(request_data['machine_list'][i].get('gpuIndices'), int):
+                    request_data['machine_list'][i]['gpuIndices'] = str(request_data['machine_list'][i].get('gpuIndices'))
+        ret.append(request_data)
+
+    elif config.training_service.platform == 'openpai':
+        pai_config_data = dict()
+        pai_config_data['pai_config'] = experiment_config['paiConfig']
+        ret.append(pai_config_data)
+
+    else:
+        raise RuntimeError('Unsupported training service ' + config.training_service.platform)
+
+    if experiment_config.get('nniManagerIp') is not None:
+        ret.append({'nni_manager_ip': {'nniManagerIp': experiment_config['nniManagerIp']}})
+    ret.append({'trial_config': experiment_config['trial']})
+    return ret
+
+
+def to_rest_json(config: ExperimentConfig) -> Dict[str, Any]:
+    experiment_config = to_v1_yaml(config, skip_nnictl=True)
+    request_data = dict()
+    request_data['authorName'] = experiment_config['authorName']
+    request_data['experimentName'] = experiment_config['experimentName']
+    request_data['trialConcurrency'] = experiment_config['trialConcurrency']
+    request_data['maxExecDuration'] = util.parse_time(experiment_config['maxExecDuration'])
+    request_data['maxTrialNum'] = experiment_config['maxTrialNum']
+
+    if config.search_space is not None:
+        request_data['searchSpace'] = json.dumps(config.search_space)
+    else:
+        request_data['searchSpace'] = Path(config.search_space_file).read_text()
+
+    request_data['trainingServicePlatform'] = experiment_config.get('trainingServicePlatform')
+    if experiment_config.get('advisor'):
+        request_data['advisor'] = experiment_config['advisor']
+        if request_data['advisor'].get('gpuNum'):
+            _logger.warning('gpuNum is deprecated, please use gpuIndices instead.')
+        if request_data['advisor'].get('gpuIndices') and isinstance(request_data['advisor'].get('gpuIndices'), int):
+            request_data['advisor']['gpuIndices'] = str(request_data['advisor'].get('gpuIndices'))
+    elif experiment_config.get('tuner'):
+        request_data['tuner'] = experiment_config['tuner']
+        if request_data['tuner'].get('gpuNum'):
+            _logger.warning('gpuNum is deprecated, please use gpuIndices instead.')
+        if request_data['tuner'].get('gpuIndices') and isinstance(request_data['tuner'].get('gpuIndices'), int):
+            request_data['tuner']['gpuIndices'] = str(request_data['tuner'].get('gpuIndices'))
+        if 'assessor' in experiment_config:
+            request_data['assessor'] = experiment_config['assessor']
+            if request_data['assessor'].get('gpuNum'):
+                _logger.warning('gpuNum is deprecated, please remove it from your config file.')
+    else:
+        request_data['tuner'] = {'builtinTunerName': '_user_created_'}
+    #debug mode should disable version check
+    if experiment_config.get('debug') is not None:
+        request_data['versionCheck'] = not experiment_config.get('debug')
+    #validate version check
+    if experiment_config.get('versionCheck') is not None:
+        request_data['versionCheck'] = experiment_config.get('versionCheck')
+    if experiment_config.get('logCollection'):
+        request_data['logCollection'] = experiment_config.get('logCollection')
+    request_data['clusterMetaData'] = []
+    if experiment_config['trainingServicePlatform'] == 'local':
+        request_data['clusterMetaData'].append(
+            {'key':'codeDir', 'value':experiment_config['trial']['codeDir']})
+        request_data['clusterMetaData'].append(
+            {'key': 'command', 'value': experiment_config['trial']['command']})
+    elif experiment_config['trainingServicePlatform'] == 'remote':
+        request_data['clusterMetaData'].append(
+            {'key': 'machine_list', 'value': experiment_config['machineList']})
+        request_data['clusterMetaData'].append(
+            {'key': 'trial_config', 'value': experiment_config['trial']})
+        if not experiment_config.get('remoteConfig'):
+            # set default value of reuse in remoteConfig to False
+            experiment_config['remoteConfig'] = {'reuse': False}
+        request_data['clusterMetaData'].append(
+            {'key': 'remote_config', 'value': experiment_config['remoteConfig']})
+    elif experiment_config['trainingServicePlatform'] == 'pai':
+        request_data['clusterMetaData'].append(
+            {'key': 'pai_config', 'value': experiment_config['paiConfig']})
+        request_data['clusterMetaData'].append(
+            {'key': 'trial_config', 'value': experiment_config['trial']})
+    elif experiment_config['trainingServicePlatform'] == 'kubeflow':
+        request_data['clusterMetaData'].append(
+            {'key': 'kubeflow_config', 'value': experiment_config['kubeflowConfig']})
+        request_data['clusterMetaData'].append(
+            {'key': 'trial_config', 'value': experiment_config['trial']})
+    elif experiment_config['trainingServicePlatform'] == 'frameworkcontroller':
+        request_data['clusterMetaData'].append(
+            {'key': 'frameworkcontroller_config', 'value': experiment_config['frameworkcontrollerConfig']})
+        request_data['clusterMetaData'].append(
+            {'key': 'trial_config', 'value': experiment_config['trial']})
+    elif experiment_config['trainingServicePlatform'] == 'aml':
+        request_data['clusterMetaData'].append(
+            {'key': 'aml_config', 'value': experiment_config['amlConfig']})
+        request_data['clusterMetaData'].append(
+            {'key': 'trial_config', 'value': experiment_config['trial']})
+    return request_data
diff --git a/nni/experiment/config/local.py b/nni/experiment/config/local.py
new file mode 100644
index 0000000000..dba71daacc
--- /dev/null
+++ b/nni/experiment/config/local.py
@@ -0,0 +1,26 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+from dataclasses import dataclass
+from typing import List, Optional, Union
+
+from .common import TrainingServiceConfig
+
+__all__ = ['LocalConfig']
+
+@dataclass(init=False)
+class LocalConfig(TrainingServiceConfig):
+    platform: str = 'local'
+    use_active_gpu: bool
+    max_trial_number_per_gpu: int = 1
+    gpu_indices: Optional[Union[List[int], str]] = None
+
+    _canonical_rules = {
+        'gpu_indices': lambda value: [int(idx) for idx in value.split(',')] if isinstance(value, str) else value
+    }
+
+    _validation_rules = {
+        'platform': lambda value: (value == 'local', 'cannot be modified'),
+        'max_trial_number_per_gpu': lambda value: value > 0,
+        'gpu_indices': lambda value: all(idx >= 0 for idx in value) and len(value) == len(set(value))
+    }
diff --git a/nni/experiment/config/util.py b/nni/experiment/config/util.py
new file mode 100644
index 0000000000..122dc197ba
--- /dev/null
+++ b/nni/experiment/config/util.py
@@ -0,0 +1,54 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""
+Miscellaneous utility functions.
+"""
+
+import math
+import os.path
+from pathlib import Path
+from typing import Optional, Union
+
+PathLike = Union[Path, str]
+
+def case_insensitive(key: str) -> str:
+    return key.lower().replace('_', '')
+
+def camel_case(key: str) -> str:
+    words = key.split('_')
+    return words[0] + ''.join(word.title() for word in words[1:])
+
+def canonical_path(path: Optional[PathLike]) -> Optional[str]:
+    # Path.resolve() does not work on Windows when file not exist, so use os.path instead
+    return os.path.abspath(os.path.expanduser(path)) if path is not None else None
+
+def count(*values) -> int:
+    return sum(value is not None and value is not False for value in values)
+
+def training_service_config_factory(platform: str): # -> TrainingServiceConfig
+    from .common import TrainingServiceConfig
+    for cls in TrainingServiceConfig.__subclasses__():
+        if cls.platform == platform:
+            return cls()
+    raise ValueError(f'Unrecognized platform {platform}')
+
+def strip_optional(type_hint):
+    return type_hint.__args__[0] if str(type_hint).startswith('typing.Optional[') else type_hint
+
+def parse_time(time: str, target_unit: str = 's') -> int:
+    return _parse_unit(time.lower(), target_unit, _time_units)
+
+def parse_size(size: str, target_unit: str = 'mb') -> int:
+    return _parse_unit(size.lower(), target_unit, _size_units)
+
+_time_units = {'d': 24 * 3600, 'h': 3600, 'm': 60, 's': 1}
+_size_units = {'gb': 1024 * 1024 * 1024, 'mb': 1024 * 1024, 'kb': 1024}
+
+def _parse_unit(string, target_unit, all_units):
+    for unit, factor in all_units.items():
+        if string.endswith(unit):
+            number = string[:-len(unit)]
+            value = float(number) * factor
+            return math.ceil(value / all_units[target_unit])
+    raise ValueError(f'Unsupported unit in "{string}"')
diff --git a/nni/experiment/experiment.py b/nni/experiment/experiment.py
new file mode 100644
index 0000000000..1bc7936731
--- /dev/null
+++ b/nni/experiment/experiment.py
@@ -0,0 +1,184 @@
+import atexit
+import logging
+import socket
+from subprocess import Popen
+from threading import Thread
+import time
+from typing import Optional, overload
+
+import colorama
+import psutil
+
+import nni.runtime.log
+from nni.runtime.msg_dispatcher import MsgDispatcher
+from nni.tuner import Tuner
+
+from .config import ExperimentConfig
+from . import launcher
+from .pipe import Pipe
+from . import rest
+
+nni.runtime.log.init_logger_experiment()
+_logger = logging.getLogger('nni.experiment')
+
+
+class Experiment:
+    """
+    Create and stop an NNI experiment.
+
+    Attributes
+    ----------
+    config
+        Experiment configuration.
+    port
+        Web UI port of the experiment, or `None` if it is not running.
+    """
+
+    @overload
+    def __init__(self, tuner: Tuner, config: ExperimentConfig) -> None:
+        """
+        Prepare an experiment.
+
+        Use `Experiment.start()` to launch it.
+
+        Parameters
+        ----------
+        tuner
+            A tuner instance.
+        config
+            Experiment configuration.
+        """
+        ...
+
+    @overload
+    def __init__(self, tuner: Tuner, training_service: str) -> None:
+        """
+        Prepare an experiment, leaving configuration fields to be set later.
+
+        Example usage::
+
+            experiment = Experiment(my_tuner, 'remote')
+            experiment.config.trial_command = 'python3 trial.py'
+            experiment.config.machines.append(RemoteMachineConfig(ip=..., user_name=...))
+            ...
+            experiment.start(8080)
+
+        Parameters
+        ----------
+        tuner
+            A tuner instance.
+        training_service
+            Name of training service.
+            Supported value: "local", "remote", "openpai".
+        """
+        ...
+
+    def __init__(self, tuner: Tuner, config=None, training_service=None):
+        self.config: ExperimentConfig
+        self.port: Optional[int] = None
+        self.tuner: Tuner = tuner
+        self._proc: Optional[Popen] = None
+        self._pipe: Optional[Pipe] = None
+        self._dispatcher: Optional[MsgDispatcher] = None
+        self._dispatcher_thread: Optional[Thread] = None
+
+        if isinstance(config, str):
+            config, training_service = None, config
+
+        if config is None:
+            self.config = ExperimentConfig(training_service)
+        else:
+            self.config = config
+
+
+    def start(self, port: int = 8080, debug: bool = False) -> None:
+        """
+        Start the experiment in background.
+
+        This method will raise exception on failure.
+        If it returns, the experiment should have been successfully started.
+
+        Parameters
+        ----------
+        port
+            The port of web UI.
+        debug
+            Whether to start in debug mode.
+        """
+        atexit.register(self.stop)
+
+        if debug:
+            logging.getLogger('nni').setLevel(logging.DEBUG)
+
+        self._proc, self._pipe = launcher.start_experiment(self.config, port, debug)
+        assert self._proc is not None
+        assert self._pipe is not None
+
+        self.port = port  # port will be None if start up failed
+
+        # dispatcher must be launched after pipe initialized
+        # the logic to launch dispatcher in background should be refactored into dispatcher api
+        self._dispatcher = MsgDispatcher(self.tuner, None)
+        self._dispatcher_thread = Thread(target=self._dispatcher.run)
+        self._dispatcher_thread.start()
+
+        ips = [self.config.nni_manager_ip]
+        for interfaces in psutil.net_if_addrs().values():
+            for interface in interfaces:
+                if interface.family == socket.AF_INET:
+                    ips.append(interface.address)
+        ips = [f'http://{ip}:{port}' for ip in ips if ip]
+        msg = 'Web UI URLs: ' + colorama.Fore.CYAN + ' '.join(ips)
+        _logger.info(msg)
+
+        # TODO: register experiment management metadata
+
+
+    def stop(self) -> None:
+        """
+        Stop background experiment.
+        """
+        _logger.info('Stopping experiment...')
+        atexit.unregister(self.stop)
+
+        if self._proc is not None:
+            self._proc.kill()
+        if self._pipe is not None:
+            self._pipe.close()
+        if self._dispatcher_thread is not None:
+            self._dispatcher.stopping = True
+            self._dispatcher_thread.join(timeout=1)
+
+        self.port = None
+        self._proc = None
+        self._pipe = None
+        self._dispatcher = None
+        self._dispatcher_thread = None
+
+
+    def run(self, port: int = 8080, debug: bool = False) -> bool:
+        """
+        Run the experiment.
+
+        This function will block until experiment finish or error.
+
+        Return `True` when experiment done; or return `False` when experiment failed.
+        """
+        self.start(port, debug)
+        try:
+            while True:
+                time.sleep(10)
+                status = self.get_status()
+                if status == 'STOPPED':
+                    return True
+                if status == 'ERROR':
+                    return False
+        finally:
+            self.stop()
+
+
+    def get_status(self) -> str:
+        if self.port is None:
+            raise RuntimeError('Experiment is not running')
+        resp = rest.get(self.port, '/check-status')
+        return resp['status']
diff --git a/nni/experiment/launcher.py b/nni/experiment/launcher.py
new file mode 100644
index 0000000000..7121482fce
--- /dev/null
+++ b/nni/experiment/launcher.py
@@ -0,0 +1,105 @@
+import contextlib
+import logging
+from pathlib import Path
+import socket
+from subprocess import Popen
+import sys
+import time
+from typing import Optional, Tuple
+
+import colorama
+
+import nni.runtime.protocol
+import nni_node
+
+from .config import ExperimentConfig
+from .config import convert
+from . import management
+from .pipe import Pipe
+from . import rest
+
+_logger = logging.getLogger('nni.experiment')
+
+
+def start_experiment(config: ExperimentConfig, port: int, debug: bool) -> Tuple[Popen, Pipe]:
+    pipe = None
+    proc = None
+
+    config.validate(initialized_tuner=True)
+    _ensure_port_idle(port)
+    if config.training_service.platform == 'openpai':
+        _ensure_port_idle(port + 1, 'OpenPAI requires an additional port')
+    exp_id = management.generate_experiment_id()
+
+    try:
+        _logger.info(f'Creating experiment {colorama.Fore.CYAN}{exp_id}')
+        pipe = Pipe(exp_id)
+        proc = _start_rest_server(config, port, debug, exp_id, pipe.path)
+        _logger.info('Connecting IPC pipe...')
+        pipe_file = pipe.connect()
+        nni.runtime.protocol._in_file = pipe_file
+        nni.runtime.protocol._out_file = pipe_file
+        _logger.info('Statring web server...')
+        _check_rest_server(port)
+        _logger.info('Setting up...')
+        _init_experiment(config, port, debug)
+        return proc, pipe
+
+    except Exception as e:
+        _logger.error('Create experiment failed')
+        if proc is not None:
+            with contextlib.suppress(Exception):
+                proc.kill()
+        if pipe is not None:
+            with contextlib.suppress(Exception):
+                pipe.close()
+        raise e
+
+
+def _ensure_port_idle(port: int, message: Optional[str] = None) -> None:
+    sock = socket.socket()
+    if sock.connect_ex(('localhost', port)) == 0:
+        sock.close()
+        message = f'(message)' if message else ''
+        raise RuntimeError(f'Port {port} is not idle {message}')
+
+
+def _start_rest_server(config: ExperimentConfig, port: int, debug: bool, experiment_id: str, pipe_path: str) -> Popen:
+    ts = config.training_service.platform
+    if ts == 'openpai':
+        ts = 'pai'
+
+    args = {
+        'port': port,
+        'mode': ts,
+        'experiment_id': experiment_id,
+        'start_mode': 'new',
+        'log_level': 'debug' if debug else 'info',
+        'dispatcher_pipe': pipe_path,
+    }
+
+    node_dir = Path(nni_node.__path__[0])
+    node = str(node_dir / ('node.exe' if sys.platform == 'win32' else 'node'))
+    main_js = str(node_dir / 'main.js')
+    cmd = [node, '--max-old-space-size=4096', main_js]
+    for arg_key, arg_value in args.items():
+        cmd.append('--' + arg_key)
+        cmd.append(str(arg_value))
+    return Popen(cmd, cwd=node_dir)
+
+
+def _check_rest_server(port: int, retry: int = 3) -> None:
+    for i in range(retry):
+        with contextlib.suppress(Exception):
+            rest.get(port, '/check-status')
+            return
+        if i > 0:
+            _logger.warning('Timeout, retry...')
+        time.sleep(1)
+    rest.get(port, '/check-status')
+
+
+def _init_experiment(config: ExperimentConfig, port: int, debug: bool) -> None:
+    for cluster_metadata in convert.to_cluster_metadata(config):
+        rest.put(port, '/experiment/cluster-metadata', cluster_metadata)
+    rest.post(port, '/experiment', convert.to_rest_json(config))
diff --git a/nni/experiment/management.py b/nni/experiment/management.py
new file mode 100644
index 0000000000..b15c4d6d25
--- /dev/null
+++ b/nni/experiment/management.py
@@ -0,0 +1,16 @@
+from pathlib import Path
+import random
+import string
+
+
+def generate_experiment_id() -> str:
+    return ''.join(random.sample(string.ascii_lowercase + string.digits, 8))
+
+
+def create_experiment_directory(experiment_id: str) -> Path:
+    path = Path.home() / 'nni-experiments' / experiment_id
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+
+
+# TODO: port shangning's work here, and use it in Experiment.start()/.stop()
diff --git a/nni/experiment/nni_client.py b/nni/experiment/nni_client.py
index 767cce8d24..c0e4ccc11a 100644
--- a/nni/experiment/nni_client.py
+++ b/nni/experiment/nni_client.py
@@ -28,7 +28,7 @@
 import requests
 
 __all__ = [
-    'Experiment',
+    'ExternalExperiment',
     'TrialResult',
     'TrialMetricData',
     'TrialHyperParameters',
@@ -228,7 +228,7 @@ def __repr__(self):
                     .format(self.trialJobId, self.status, self.hyperParameters, self.logPath,
                             self.startTime, self.endTime, self.finalMetricData, self.stderrPath)
 
-class Experiment:
+class ExternalExperiment:
     def __init__(self):
         self._endpoint = None
         self._exp_id = None
diff --git a/nni/experiment/pipe.py b/nni/experiment/pipe.py
new file mode 100644
index 0000000000..4b2e15c493
--- /dev/null
+++ b/nni/experiment/pipe.py
@@ -0,0 +1,64 @@
+from io import BufferedIOBase
+import os
+import sys
+
+if sys.platform == 'win32':
+    import _winapi
+    import msvcrt
+
+    class WindowsPipe:
+        def __init__(self, experiment_id: str):
+            self.path: str = r'\\.\pipe\nni-' + experiment_id
+            self.file = None
+
+            self._handle = _winapi.CreateNamedPipe(
+                self.path,
+                _winapi.PIPE_ACCESS_DUPLEX,
+                _winapi.PIPE_TYPE_MESSAGE | _winapi.PIPE_READMODE_MESSAGE | _winapi.PIPE_WAIT,
+                1,
+                8192,
+                8192,
+                0,
+                _winapi.NULL
+            )
+
+        def connect(self) -> BufferedIOBase:
+            _winapi.ConnectNamedPipe(self._handle, _winapi.NULL)
+            fd = msvcrt.open_osfhandle(self._handle, 0)
+            self.file = os.fdopen(fd, 'w+b')
+            return self.file
+
+        def close(self) -> None:
+            if self.file is not None:
+                self.file.close()
+            _winapi.CloseHandle(self._handle)
+
+    Pipe = WindowsPipe
+
+
+else:
+    import socket
+
+    from . import management
+
+    class UnixPipe:
+        def __init__(self, experiment_id: str):
+            self.path: str = str(management.create_experiment_directory(experiment_id) / 'dispatcher-pipe')
+            self.file = None
+
+            self._socket = socket.socket(socket.AF_UNIX)
+            self._socket.bind(self.path)
+            self._socket.listen(1)  # only accepts one connection
+
+        def connect(self) -> BufferedIOBase:
+            conn, _ = self._socket.accept()
+            self.file = conn.makefile('w+b')
+            return self.file
+
+        def close(self) -> None:
+            if self.file is not None:
+                self.file.close()
+            self._socket.close()
+            os.unlink(self.path)
+
+    Pipe = UnixPipe
diff --git a/nni/experiment/rest.py b/nni/experiment/rest.py
new file mode 100644
index 0000000000..9533934f8a
--- /dev/null
+++ b/nni/experiment/rest.py
@@ -0,0 +1,32 @@
+import logging
+from typing import Any
+
+import requests
+
+_logger = logging.getLogger(__name__)
+
+url_template = 'http://localhost:{}/api/v1/nni{}'
+timeout = 20
+
+def get(port: int, api: str) -> Any:
+    url = url_template.format(port, api)
+    resp = requests.get(url, timeout=timeout)
+    if not resp.ok:
+        _logger.error('rest request GET %s %s failed: %s %s', port, api, resp.status_code, resp.text)
+    resp.raise_for_status()
+    return resp.json()
+
+def post(port: int, api: str, data: Any) -> Any:
+    url = url_template.format(port, api)
+    resp = requests.post(url, json=data, timeout=timeout)
+    if not resp.ok:
+        _logger.error('rest request POST %s %s failed: %s %s', port, api, resp.status_code, resp.text)
+    resp.raise_for_status()
+    return resp.json()
+
+def put(port: int, api: str, data: Any) -> None:
+    url = url_template.format(port, api)
+    resp = requests.put(url, json=data, timeout=timeout)
+    if not resp.ok:
+        _logger.error('rest request PUT %s %s failed: %s', port, api, resp.status_code)
+    resp.raise_for_status()
diff --git a/nni/runtime/common.py b/nni/runtime/common.py
index ec5ef10162..537a35b55c 100644
--- a/nni/runtime/common.py
+++ b/nni/runtime/common.py
@@ -1,95 +1,6 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
 
-from datetime import datetime
-from io import TextIOBase
-import logging
-import os
-import sys
-import time
-
-log_level_map = {
-    'fatal': logging.FATAL,
-    'error': logging.ERROR,
-    'warning': logging.WARNING,
-    'info': logging.INFO,
-    'debug': logging.DEBUG
-}
-
-_time_format = '%m/%d/%Y, %I:%M:%S %p'
-
-# FIXME
-# This hotfix the bug that querying installed tuners with `package_utils` will activate dispatcher logger.
-# This behavior depends on underlying implementation of `nnictl` and is likely to break in future.
-_logger_initialized = False
-
-class _LoggerFileWrapper(TextIOBase):
-    def __init__(self, logger_file):
-        self.file = logger_file
-
-    def write(self, s):
-        if s != '\n':
-            cur_time = datetime.now().strftime(_time_format)
-            self.file.write('[{}] PRINT '.format(cur_time) + s + '\n')
-            self.file.flush()
-        return len(s)
-
-def init_logger(logger_file_path, log_level_name='info'):
-    """Initialize root logger.
-    This will redirect anything from logging.getLogger() as well as stdout to specified file.
-    logger_file_path: path of logger file (path-like object).
-    """
-    global _logger_initialized
-    if _logger_initialized:
-        return
-    _logger_initialized = True
-
-    if os.environ.get('NNI_PLATFORM') == 'unittest':
-        return  # fixme: launching logic needs refactor
-
-    log_level = log_level_map.get(log_level_name, logging.INFO)
-    logger_file = open(logger_file_path, 'w')
-    fmt = '[%(asctime)s] %(levelname)s (%(name)s/%(threadName)s) %(message)s'
-    logging.Formatter.converter = time.localtime
-    formatter = logging.Formatter(fmt, _time_format)
-    handler = logging.StreamHandler(logger_file)
-    handler.setFormatter(formatter)
-
-    root_logger = logging.getLogger()
-    root_logger.addHandler(handler)
-    root_logger.setLevel(log_level)
-
-    # these modules are too verbose
-    logging.getLogger('matplotlib').setLevel(log_level)
-
-    sys.stdout = _LoggerFileWrapper(logger_file)
-
-def init_standalone_logger():
-    """
-    Initialize root logger for standalone mode.
-    This will set NNI's log level to INFO and print its log to stdout.
-    """
-    global _logger_initialized
-    if _logger_initialized:
-        return
-    _logger_initialized = True
-
-    fmt = '[%(asctime)s] %(levelname)s (%(name)s) %(message)s'
-    formatter = logging.Formatter(fmt, _time_format)
-    handler = logging.StreamHandler(sys.stdout)
-    handler.setFormatter(formatter)
-    nni_logger = logging.getLogger('nni')
-    nni_logger.addHandler(handler)
-    nni_logger.setLevel(logging.INFO)
-    nni_logger.propagate = False
-
-    # Following line does not affect NNI loggers, but without this user's logger won't be able to
-    # print log even it's level is set to INFO, so we do it for user's convenience.
-    # If this causes any issue in future, remove it and use `logging.info` instead of
-    # `logging.getLogger('xxx')` in all examples.
-    logging.basicConfig()
-
-
 _multi_thread = False
 _multi_phase = False
 
diff --git a/nni/runtime/log.py b/nni/runtime/log.py
new file mode 100644
index 0000000000..da96138e09
--- /dev/null
+++ b/nni/runtime/log.py
@@ -0,0 +1,163 @@
+from datetime import datetime
+from io import TextIOBase
+import logging
+from logging import FileHandler, Formatter, Handler, StreamHandler
+from pathlib import Path
+import sys
+import time
+from typing import Optional
+
+import colorama
+
+from .env_vars import dispatcher_env_vars, trial_env_vars
+
+
+def init_logger() -> None:
+    """
+    This function will (and should only) get invoked on the first time of importing nni (no matter which submodule).
+    It will try to detect the running environment and setup logger accordingly.
+
+    The detection should work in most cases but for `nnictl` and `nni.experiment`.
+    They will be identified as "standalone" mode and must configure the logger by themselves.
+    """
+    colorama.init()
+
+    if dispatcher_env_vars.SDK_PROCESS == 'dispatcher':
+        _init_logger_dispatcher()
+        return
+
+    trial_platform = trial_env_vars.NNI_PLATFORM
+
+    if trial_platform == 'unittest':
+        return
+
+    if trial_platform:
+        _init_logger_trial()
+        return
+
+    _init_logger_standalone()
+
+
+def init_logger_experiment() -> None:
+    """
+    Initialize logger for `nni.experiment.Experiment`.
+
+    This function will get invoked after `init_logger()`.
+    """
+    formatter.format = _colorful_format
+
+
+time_format = '%Y-%m-%d %H:%M:%S'
+
+formatter = Formatter(
+    '[%(asctime)s] %(levelname)s (%(name)s/%(threadName)s) %(message)s',
+    time_format
+)
+
+def _init_logger_dispatcher() -> None:
+    log_level_map = {
+        'fatal': logging.CRITICAL,
+        'error': logging.ERROR,
+        'warning': logging.WARNING,
+        'info': logging.INFO,
+        'debug': logging.DEBUG,
+        'trace': 0
+    }
+
+    log_path = _prepare_log_dir(dispatcher_env_vars.NNI_LOG_DIRECTORY) / 'dispatcher.log'
+    log_level = log_level_map.get(dispatcher_env_vars.NNI_LOG_LEVEL, logging.INFO)
+    _setup_root_logger(FileHandler(log_path), log_level)
+
+
+def _init_logger_trial() -> None:
+    log_path = _prepare_log_dir(trial_env_vars.NNI_OUTPUT_DIR) / 'trial.log'
+    log_file = open(log_path, 'w')
+    _setup_root_logger(StreamHandler(log_file), logging.INFO)
+
+    sys.stdout = _LogFileWrapper(log_file)
+
+
+def _init_logger_standalone() -> None:
+    _setup_nni_logger(StreamHandler(sys.stdout), logging.INFO)
+
+    # Following line does not affect NNI loggers, but without this user's logger won't
+    # print log even it's level is set to INFO, so we do it for user's convenience.
+    # If this causes any issue in future, remove it and use `logging.info()` instead of
+    # `logging.getLogger('xxx').info()` in all examples.
+    logging.basicConfig()
+
+
+def _prepare_log_dir(path: Optional[str]) -> Path:
+    if path is None:
+        return Path()
+    ret = Path(path)
+    ret.mkdir(parents=True, exist_ok=True)
+    return ret
+
+def _setup_root_logger(handler: Handler, level: int) -> None:
+    _setup_logger('', handler, level)
+
+def _setup_nni_logger(handler: Handler, level: int) -> None:
+    _setup_logger('nni', handler, level)
+
+def _setup_logger(name: str, handler: Handler, level: int) -> None:
+    handler.setFormatter(formatter)
+    logger = logging.getLogger(name)
+    logger.addHandler(handler)
+    logger.setLevel(level)
+    logger.propagate = False
+
+def _colorful_format(record):
+    if record.levelno >= logging.ERROR:
+        color = colorama.Fore.RED
+    elif record.levelno >= logging.WARNING:
+        color = colorama.Fore.YELLOW
+    elif record.levelno >= logging.INFO:
+        color = colorama.Fore.GREEN
+    else:
+        color = colorama.Fore.BLUE
+    msg = color + (record.msg % record.args) + colorama.Style.RESET_ALL
+    time = formatter.formatTime(record, time_format)
+    if record.levelno < logging.INFO:
+        return '[{}] {}:{} {}'.format(time, record.threadName, record.name, msg)
+    else:
+        return '[{}] {}'.format(time, msg)
+
+class _LogFileWrapper(TextIOBase):
+    # wrap the logger file so that anything written to it will automatically get formatted
+
+    def __init__(self, log_file: TextIOBase):
+        self.file: TextIOBase = log_file
+        self.line_buffer: Optional[str] = None
+        self.line_start_time: Optional[datetime] = None
+
+    def write(self, s: str) -> int:
+        cur_time = datetime.now()
+        if self.line_buffer and (cur_time - self.line_start_time).total_seconds() > 0.1:
+            self.flush()
+
+        if self.line_buffer:
+            self.line_buffer += s
+        else:
+            self.line_buffer = s
+            self.line_start_time = cur_time
+
+        if '\n' not in s:
+            return len(s)
+
+        time_str = cur_time.strftime(time_format)
+        lines = self.line_buffer.split('\n')
+        for line in lines[:-1]:
+            self.file.write(f'[{time_str}] PRINT {line}\n')
+        self.file.flush()
+
+        self.line_buffer = lines[-1]
+        self.line_start_time = cur_time
+        return len(s)
+
+    def flush(self) -> None:
+        if self.line_buffer:
+            time_str = self.line_start_time.strftime(time_format)
+            self.file.write(f'[{time_str}] PRINT {self.line_buffer}\n')
+            self.file.flush()
+            self.line_buffer = None
diff --git a/nni/runtime/msg_dispatcher_base.py b/nni/runtime/msg_dispatcher_base.py
index 66af52df28..cda40f5c3f 100644
--- a/nni/runtime/msg_dispatcher_base.py
+++ b/nni/runtime/msg_dispatcher_base.py
@@ -9,11 +9,9 @@
 
 from .common import multi_thread_enabled
 from .env_vars import dispatcher_env_vars
-from ..utils import init_dispatcher_logger
 from ..recoverable import Recoverable
 from .protocol import CommandType, receive
 
-init_dispatcher_logger()
 
 _logger = logging.getLogger(__name__)
 
@@ -27,11 +25,11 @@ class MsgDispatcherBase(Recoverable):
     """
 
     def __init__(self):
+        self.stopping = False
         if multi_thread_enabled():
             self.pool = ThreadPool()
             self.thread_results = []
         else:
-            self.stopping = False
             self.default_command_queue = Queue()
             self.assessor_command_queue = Queue()
             self.default_worker = threading.Thread(target=self.command_queue_worker, args=(self.default_command_queue,))
@@ -45,11 +43,11 @@ def run(self):
         """Run the tuner.
         This function will never return unless raise.
         """
-        _logger.info('Start dispatcher')
+        _logger.info('Dispatcher started')
         if dispatcher_env_vars.NNI_MODE == 'resume':
             self.load_checkpoint()
 
-        while True:
+        while not self.stopping:
             command, data = receive()
             if data:
                 data = json_tricks.loads(data)
@@ -77,7 +75,7 @@ def run(self):
             self.default_worker.join()
             self.assessor_worker.join()
 
-        _logger.info('Terminated by NNI manager')
+        _logger.info('Dispatcher terminiated')
 
     def command_queue_worker(self, command_queue):
         """Process commands in command queues.
diff --git a/nni/runtime/platform/local.py b/nni/runtime/platform/local.py
index 5d8124d3ff..7318b45a6f 100644
--- a/nni/runtime/platform/local.py
+++ b/nni/runtime/platform/local.py
@@ -7,7 +7,6 @@
 import time
 import subprocess
 
-from ..common import init_logger
 from ..env_vars import trial_env_vars
 from nni.utils import to_json
 
@@ -21,9 +20,6 @@
     os.makedirs(_outputdir)
 
 _nni_platform = trial_env_vars.NNI_PLATFORM
-if _nni_platform == 'local':
-    _log_file_path = os.path.join(_outputdir, 'trial.log')
-    init_logger(_log_file_path)
 
 _multiphase = trial_env_vars.MULTI_PHASE
 
diff --git a/nni/runtime/platform/standalone.py b/nni/runtime/platform/standalone.py
index 27c2e94db2..16b3266f55 100644
--- a/nni/runtime/platform/standalone.py
+++ b/nni/runtime/platform/standalone.py
@@ -4,8 +4,6 @@
 import logging
 import json_tricks
 
-from ..common import init_standalone_logger
-
 __all__ = [
     'get_next_parameter',
     'get_experiment_id',
@@ -14,7 +12,6 @@
     'send_metric',
 ]
 
-init_standalone_logger()
 _logger = logging.getLogger('nni')
 
 
diff --git a/nni/runtime/protocol.py b/nni/runtime/protocol.py
index 57ea7fbc0b..9c5222f097 100644
--- a/nni/runtime/protocol.py
+++ b/nni/runtime/protocol.py
@@ -6,6 +6,8 @@
 import threading
 from enum import Enum
 
+_logger = logging.getLogger(__name__)
+
 
 class CommandType(Enum):
     # in
@@ -32,8 +34,7 @@ class CommandType(Enum):
         _in_file = open(3, 'rb')
         _out_file = open(4, 'wb')
 except OSError:
-    _msg = 'IPC pipeline not exists, maybe you are importing tuner/assessor from trial code?'
-    logging.getLogger(__name__).warning(_msg)
+    _logger.debug('IPC pipeline not exists')
 
 
 def send(command, data):
@@ -46,7 +47,7 @@ def send(command, data):
         _lock.acquire()
         data = data.encode('utf8')
         msg = b'%b%014d%b' % (command.value, len(data), data)
-        logging.getLogger(__name__).debug('Sending command, data: [%s]', msg)
+        _logger.debug('Sending command, data: [%s]', msg)
         _out_file.write(msg)
         _out_file.flush()
     finally:
@@ -58,14 +59,14 @@ def receive():
     Returns a tuple of command (CommandType) and payload (str)
     """
     header = _in_file.read(16)
-    logging.getLogger(__name__).debug('Received command, header: [%s]', header)
+    _logger.debug('Received command, header: [%s]', header)
     if header is None or len(header) < 16:
         # Pipe EOF encountered
-        logging.getLogger(__name__).debug('Pipe EOF encountered')
+        _logger.debug('Pipe EOF encountered')
         return None, None
     length = int(header[2:])
     data = _in_file.read(length)
     command = CommandType(header[:2])
     data = data.decode('utf8')
-    logging.getLogger(__name__).debug('Received command, data: [%s]', data)
+    _logger.debug('Received command, data: [%s]', data)
     return command, data
diff --git a/nni/tools/nnictl/launcher_utils.py b/nni/tools/nnictl/launcher_utils.py
index 7dcfa8c57e..2bd04be180 100644
--- a/nni/tools/nnictl/launcher_utils.py
+++ b/nni/tools/nnictl/launcher_utils.py
@@ -63,14 +63,16 @@ def parse_path(experiment_config, config_path):
     if experiment_config['trial'].get('paiConfigPath'):
         expand_path(experiment_config['trial'], 'paiConfigPath')
 
-    #if users use relative path, convert it to absolute path
+    # If users use relative path, convert it to absolute path.
     root_path = os.path.dirname(config_path)
     if experiment_config.get('searchSpacePath'):
         parse_relative_path(root_path, experiment_config, 'searchSpacePath')
     if experiment_config.get('logDir'):
         parse_relative_path(root_path, experiment_config, 'logDir')
     if experiment_config.get('trial'):
-        parse_relative_path(root_path, experiment_config['trial'], 'codeDir')
+        # In AdaptDL mode, 'codeDir' shouldn't be parsed because it points to the path in the container.
+        if experiment_config.get('trainingServicePlatform') != 'adl':
+            parse_relative_path(root_path, experiment_config['trial'], 'codeDir')
         if experiment_config['trial'].get('authFile'):
             parse_relative_path(root_path, experiment_config['trial'], 'authFile')
         if experiment_config['trial'].get('ps'):
diff --git a/nni/tools/nnictl/tensorboard_utils.py b/nni/tools/nnictl/tensorboard_utils.py
index 0e1b75ddd0..ecac3905d9 100644
--- a/nni/tools/nnictl/tensorboard_utils.py
+++ b/nni/tools/nnictl/tensorboard_utils.py
@@ -134,7 +134,6 @@ def start_tensorboard(args):
     if experiment_dict[args.id]["status"] == "STOPPED":
         print_error("Experiment {} is stopped...".format(args.id))
         return
-    config_file_name = experiment_dict[experiment_id]['fileName']
     nni_config = Config(args.id)
     if nni_config.get_config('experimentConfig').get('trainingServicePlatform') == 'adl':
         adl_tensorboard_helper(args)
diff --git a/nni/utils.py b/nni/utils.py
index fe782dae2c..7a4e2957fe 100644
--- a/nni/utils.py
+++ b/nni/utils.py
@@ -1,7 +1,6 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
 
-import os
 import copy
 import functools
 from enum import Enum, unique
@@ -9,8 +8,6 @@
 from schema import And
 
 from . import parameter_expressions
-from .runtime.common import init_logger
-from .runtime.env_vars import dispatcher_env_vars
 
 
 to_json = functools.partial(json_tricks.dumps, allow_nan=True)
@@ -120,16 +117,6 @@ def convert_dict2tuple(value):
     return value
 
 
-def init_dispatcher_logger():
-    """
-    Initialize dispatcher logging configuration
-    """
-    logger_file_path = 'dispatcher.log'
-    if dispatcher_env_vars.NNI_LOG_DIRECTORY is not None:
-        logger_file_path = os.path.join(dispatcher_env_vars.NNI_LOG_DIRECTORY, logger_file_path)
-    init_logger(logger_file_path, dispatcher_env_vars.NNI_LOG_LEVEL)
-
-
 def json2space(x, oldy=None, name=NodeType.ROOT):
     """
     Change search space from json format to hyperopt format
diff --git a/pylintrc b/pylintrc
index e23cacfb12..ccefe24702 100644
--- a/pylintrc
+++ b/pylintrc
@@ -47,4 +47,4 @@ ignore-patterns=test*
 # List of members which are set dynamically and missed by pylint inference
 generated-members=numpy.*,torch.*,tensorflow.*
 
-ignored-modules=tensorflow
+ignored-modules=tensorflow,_winapi,msvcrt
diff --git a/setup.py b/setup.py
index 5afa7d2d93..c4da557267 100644
--- a/setup.py
+++ b/setup.py
@@ -61,7 +61,6 @@
     'hyperopt==0.1.2',
     'json_tricks',
     'netifaces',
-    'numpy',
     'psutil',
     'ruamel.yaml',
     'requests',
@@ -74,14 +73,13 @@
     'pkginfo',
     'websockets',
     'filelock',
-    'prettytable'
+    'prettytable',
+    'dataclasses ; python_version < "3.7"',
+    'numpy < 1.19.4 ; sys_platform == "win32"',
+    'numpy < 1.20 ; sys_platform != "win32" and python_version < "3.7"',
+    'numpy ; sys.platform != "win32" and python_version >= "3.7"'
 ]
 
-if sys.platform == 'win32':
-    dependencies[dependencies.index('numpy')] = 'numpy<1.19.4'
-elif sys.version_info < (3, 7):
-    dependencies[dependencies.index('numpy')] = 'numpy<1.20'
-
 release = os.environ.get('NNI_RELEASE')
 
 def _setup():
@@ -132,7 +130,7 @@ def _setup():
 def _find_python_packages():
     packages = []
     for dirpath, dirnames, filenames in os.walk('nni'):
-        if '/__pycache__' not in dirpath:
+        if '/__pycache__' not in dirpath and '/.mypy_cache' not in dirpath:
             packages.append(dirpath.replace('/', '.'))
     return sorted(packages) + ['nni_node']
 
@@ -184,14 +182,16 @@ def run(self):
 
 class Develop(develop):
     user_options = develop.user_options + [
-        ('no-user', None, 'Prevent automatically adding "--user"')
+        ('no-user', None, 'Prevent automatically adding "--user"'),
+        ('skip-ts', None, 'Prevent building TypeScript modules')
     ]
 
-    boolean_options = develop.boolean_options + ['no-user']
+    boolean_options = develop.boolean_options + ['no-user', 'skip-ts']
 
     def initialize_options(self):
         super().initialize_options()
         self.no_user = None
+        self.skip_ts = None
 
     def finalize_options(self):
         # if `--user` or `--no-user` is explicitly set, do nothing
@@ -201,7 +201,8 @@ def finalize_options(self):
         super().finalize_options()
 
     def run(self):
-        setup_ts.build(release=None)
+        if not self.skip_ts:
+            setup_ts.build(release=None)
         super().run()
 
 class Clean(clean):
diff --git a/test/config/integration_tests.yml b/test/config/integration_tests.yml
index c146a1ac67..522d4add05 100644
--- a/test/config/integration_tests.yml
+++ b/test/config/integration_tests.yml
@@ -143,8 +143,8 @@ testCases:
   config:
     maxTrialNum: 4
     trialConcurrency: 4
-  launchCommand: python3 -c 'from nni.experiment import Experiment; exp = Experiment(); exp.start_experiment("$configFile")'
-  stopCommand: python3 -c 'from nni.experiment import Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()'
+  launchCommand: python3 -c 'from nni.experiment import ExternalExperiment as Experiment; exp = Experiment(); exp.start_experiment("$configFile")'
+  stopCommand: python3 -c 'from nni.experiment import ExternalExperiment as Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()'
   validator:
     class: NnicliValidator
   platform: linux darwin
diff --git a/test/config/integration_tests_tf2.yml b/test/config/integration_tests_tf2.yml
index 8bc74f9b7a..f8c8855d12 100644
--- a/test/config/integration_tests_tf2.yml
+++ b/test/config/integration_tests_tf2.yml
@@ -110,8 +110,8 @@ testCases:
   config:
     maxTrialNum: 4
     trialConcurrency: 4
-  launchCommand: python3 -c 'from nni.experiment import Experiment; exp = Experiment(); exp.start_experiment("$configFile")'
-  stopCommand: python3 -c 'from nni.experiment import Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()'
+  launchCommand: python3 -c 'from nni.experiment import ExternalExperiment as Experiment; exp = Experiment(); exp.start_experiment("$configFile")'
+  stopCommand: python3 -c 'from nni.experiment import ExternalExperiment as Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()'
   validator:
     class: NnicliValidator
   platform: linux darwin
diff --git a/test/config/pr_tests.yml b/test/config/pr_tests.yml
index 1e8ee8862a..c54e2b01fb 100644
--- a/test/config/pr_tests.yml
+++ b/test/config/pr_tests.yml
@@ -47,8 +47,8 @@ testCases:
   config:
     maxTrialNum: 4
     trialConcurrency: 4
-  launchCommand: python3 -c 'from nni.experiment import Experiment; exp = Experiment(); exp.start_experiment("$configFile")'
-  stopCommand: python3 -c 'from nni.experiment import Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()'
+  launchCommand: python3 -c 'from nni.experiment import ExternalExperiment as Experiment; exp = Experiment(); exp.start_experiment("$configFile")'
+  stopCommand: python3 -c 'from nni.experiment import ExternalExperiment as Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()'
   validator:
     class: NnicliValidator
   platform: linux darwin
diff --git a/test/nni_test/nnitest/validators.py b/test/nni_test/nnitest/validators.py
index 2df7342eb6..9d5803c0b3 100644
--- a/test/nni_test/nnitest/validators.py
+++ b/test/nni_test/nnitest/validators.py
@@ -6,7 +6,7 @@
 import subprocess
 import json
 import requests
-from nni.experiment import Experiment
+from nni.experiment import ExternalExperiment as Experiment
 from nni.tools.nnictl.updater import load_search_space
 from utils import METRICS_URL, GET_IMPORTED_DATA_URL
 
diff --git a/test/ut/sdk/test_model_speedup.py b/test/ut/sdk/test_model_speedup.py
index f578c95b4d..9f545a855e 100644
--- a/test/ut/sdk/test_model_speedup.py
+++ b/test/ut/sdk/test_model_speedup.py
@@ -30,13 +30,17 @@
 # an absolute threshold to determine whether the final result is correct.
 # The error should meet the RELATIVE_THREHOLD or the ABSOLUTE_THRESHOLD.
 ABSOLUTE_THRESHOLD = 0.0001
+
+
 class BackboneModel1(nn.Module):
     def __init__(self):
         super().__init__()
         self.conv1 = nn.Conv2d(1, 1, 1, 1)
+
     def forward(self, x):
         return self.conv1(x)
 
+
 class BackboneModel2(torch.nn.Module):
     def __init__(self):
         super().__init__()
@@ -53,32 +57,58 @@ def forward(self, x):
         x = F.relu(self.bn2(self.conv2(x)))
         x = F.max_pool2d(x, 2, 2)
         x = x.view(x.size(0), -1)
-        
+
         x = F.relu(self.fc1(x))
         x = self.fc2(x)
         return x
 
+
 class BigModel(torch.nn.Module):
     def __init__(self):
         super().__init__()
         self.backbone1 = BackboneModel1()
         self.backbone2 = BackboneModel2()
-        self.fc3 =  nn.Sequential(
+        self.fc3 = nn.Sequential(
             nn.Linear(10, 10),
             nn.BatchNorm1d(10),
             nn.ReLU(inplace=True),
             nn.Linear(10, 2)
         )
+
     def forward(self, x):
         x = self.backbone1(x)
         x = self.backbone2(x)
         x = self.fc3(x)
         return x
 
+
+class TransposeModel(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = nn.Conv2d(3, 20, 5)
+        self.conv2 = nn.ConvTranspose2d(20, 50, 5, groups=2)
+        self.bn1 = nn.BatchNorm2d(self.conv1.out_channels)
+        self.bn2 = nn.BatchNorm2d(self.conv2.out_channels)
+        self.fc1 = nn.Linear(8 * 8 * 50, 500)
+        self.fc2 = nn.Linear(500, 10)
+
+    def forward(self, x):
+        x = F.relu(self.bn1(self.conv1(x)))
+        # x = F.max_pool2d(x, 2, 2)
+        x = F.relu(self.bn2(self.conv2(x)))
+        # x = F.max_pool2d(x, 2, 2)
+        x = x.view(x.size(0), -1)
+
+        x = F.relu(self.fc1(x))
+        x = self.fc2(x)
+        return x
+
+
 dummy_input = torch.randn(2, 1, 28, 28)
 SPARSITY = 0.5
 MODEL_FILE, MASK_FILE = './11_model.pth', './l1_mask.pth'
 
+
 def prune_model_l1(model):
     config_list = [{
         'sparsity': SPARSITY,
@@ -88,6 +118,7 @@ def prune_model_l1(model):
     pruner.compress()
     pruner.export_model(model_path=MODEL_FILE, mask_path=MASK_FILE)
 
+
 def generate_random_sparsity(model):
     cfg_list = []
     for name, module in model.named_modules():
@@ -97,18 +128,20 @@ def generate_random_sparsity(model):
                              'sparsity': sparsity})
     return cfg_list
 
+
 def zero_bn_bias(model):
     with torch.no_grad():
         for name, module in model.named_modules():
             if isinstance(module, nn.BatchNorm2d) \
-            or isinstance(module, nn.BatchNorm3d) \
-            or isinstance(module, nn.BatchNorm1d):
+                    or isinstance(module, nn.BatchNorm3d) \
+                    or isinstance(module, nn.BatchNorm1d):
                 shape = module.bias.data.size()
                 device = module.bias.device
                 module.bias.data = torch.zeros(shape).to(device)
                 shape = module.running_mean.data.size()
                 module.running_mean = torch.zeros(shape).to(device)
 
+
 class L1ChannelMasker(WeightMasker):
     def __init__(self, model, pruner):
         self.model = model
@@ -143,21 +176,27 @@ def calc_mask(self, sparsity, wrapper, wrapper_idx=None):
         w_abs = weight.abs()
         if wrapper.type == 'Conv2d':
             w_abs_structured = w_abs.sum((0, 2, 3))
-            threshold = torch.topk(w_abs_structured, num_prune, largest=False)[0].max()
-            mask_weight = torch.gt(w_abs_structured, threshold)[None, :, None, None].expand_as(weight).type_as(weight)
+            threshold = torch.topk(
+                w_abs_structured, num_prune, largest=False)[0].max()
+            mask_weight = torch.gt(w_abs_structured, threshold)[
+                None, :, None, None].expand_as(weight).type_as(weight)
             return {'weight_mask': mask_weight.detach()}
         else:
             # Linear
             assert wrapper.type == 'Linear'
             w_abs_structured = w_abs.sum((0))
-            threshold = torch.topk(w_abs_structured, num_prune, largest=False)[0].max()
-            mask_weight = torch.gt(w_abs_structured, threshold)[None, :].expand_as(weight).type_as(weight)
+            threshold = torch.topk(
+                w_abs_structured, num_prune, largest=False)[0].max()
+            mask_weight = torch.gt(w_abs_structured, threshold)[
+                None, :].expand_as(weight).type_as(weight)
             return {'weight_mask': mask_weight.detach(), 'bias_mask': mask_bias}
 
+
 class L1ChannelPruner(_StructuredFilterPruner):
     def __init__(self, model, config_list, optimizer=None, dependency_aware=False, dummy_input=None):
         super().__init__(model, config_list, pruning_algorithm='l1', optimizer=optimizer,
                          dependency_aware=dependency_aware, dummy_input=dummy_input)
+
     def validate_config(self, model, config_list):
         pass
 
@@ -177,6 +216,7 @@ def channel_prune(model):
     pruner.compress()
     pruner.export_model(model_path=MODEL_FILE, mask_path=MASK_FILE)
 
+
 class SpeedupTestCase(TestCase):
     def test_speedup_vgg16(self):
         prune_model_l1(vgg16())
@@ -187,8 +227,10 @@ def test_speedup_vgg16(self):
 
         orig_model = vgg16()
         assert model.training
-        assert model.features[2].out_channels == int(orig_model.features[2].out_channels * SPARSITY)
-        assert model.classifier[0].in_features == int(orig_model.classifier[0].in_features * SPARSITY)
+        assert model.features[2].out_channels == int(
+            orig_model.features[2].out_channels * SPARSITY)
+        assert model.classifier[0].in_features == int(
+            orig_model.classifier[0].in_features * SPARSITY)
 
     def test_speedup_bigmodel(self):
         prune_model_l1(BigModel())
@@ -205,23 +247,55 @@ def test_speedup_bigmodel(self):
         model.eval()
         speedup_out = model(dummy_input)
         if not torch.allclose(mask_out, speedup_out, atol=1e-07):
-            print('input:', dummy_input.size(), torch.abs(dummy_input).sum((2,3)))
+            print('input:', dummy_input.size(),
+                  torch.abs(dummy_input).sum((2, 3)))
             print('mask_out:', mask_out)
             print('speedup_out:', speedup_out)
             raise RuntimeError('model speedup inference result is incorrect!')
 
         orig_model = BigModel()
 
-        assert model.backbone2.conv1.out_channels == int(orig_model.backbone2.conv1.out_channels * SPARSITY)
-        assert model.backbone2.conv2.in_channels == int(orig_model.backbone2.conv2.in_channels * SPARSITY)
-        assert model.backbone2.conv2.out_channels == int(orig_model.backbone2.conv2.out_channels * SPARSITY)
-        assert model.backbone2.fc1.in_features == int(orig_model.backbone2.fc1.in_features * SPARSITY)
+        assert model.backbone2.conv1.out_channels == int(
+            orig_model.backbone2.conv1.out_channels * SPARSITY)
+        assert model.backbone2.conv2.in_channels == int(
+            orig_model.backbone2.conv2.in_channels * SPARSITY)
+        assert model.backbone2.conv2.out_channels == int(
+            orig_model.backbone2.conv2.out_channels * SPARSITY)
+        assert model.backbone2.fc1.in_features == int(
+            orig_model.backbone2.fc1.in_features * SPARSITY)
+
+    def test_convtranspose_model(self):
+        ori_model = TransposeModel()
+        dummy_input = torch.rand(1, 3, 8, 8)
+        config_list = [{'sparsity': 0.5, 'op_types': ['Conv2d']}]
+        pruner = L1FilterPruner(ori_model, config_list)
+        pruner.compress()
+        ori_model(dummy_input)
+        pruner.export_model(MODEL_FILE, MASK_FILE)
+        pruner._unwrap_model()
+        new_model = TransposeModel()
+        state_dict = torch.load(MODEL_FILE)
+        new_model.load_state_dict(state_dict)
+        ms = ModelSpeedup(new_model, dummy_input, MASK_FILE)
+        ms.speedup_model()
+        zero_bn_bias(ori_model)
+        zero_bn_bias(new_model)
+        ori_out = ori_model(dummy_input)
+        new_out = new_model(dummy_input)
+        ori_sum = torch.sum(ori_out)
+        speeded_sum = torch.sum(new_out)
+        print('Tanspose Speedup Test: ori_sum={} speedup_sum={}'.format(ori_sum, speeded_sum))
+        assert (abs(ori_sum - speeded_sum) / abs(ori_sum) < RELATIVE_THRESHOLD) or \
+                (abs(ori_sum - speeded_sum) < ABSOLUTE_THRESHOLD)
 
     # FIXME: This test case might fail randomly, no idea why
     # Example: https://msrasrg.visualstudio.com/NNIOpenSource/_build/results?buildId=16282
 
     def test_speedup_integration(self):
-        for model_name in ['resnet18', 'squeezenet1_1', 'mobilenet_v2', 'densenet121', 'densenet169', 'inception_v3', 'resnet50']:
+        for model_name in ['resnet18', 'squeezenet1_1', 
+                           'mobilenet_v2', 'densenet121',
+                           # 'inception_v3' inception is too large and may fail the pipeline
+                           'densenet169', 'resnet50']:
             kwargs = {
                 'pretrained': True
             }
@@ -235,7 +309,7 @@ def test_speedup_integration(self):
             Model = getattr(models, model_name)
             net = Model(**kwargs).to(device)
             speedup_model = Model(**kwargs).to(device)
-            net.eval() # this line is necessary
+            net.eval()  # this line is necessary
             speedup_model.eval()
             # random generate the prune config for the pruner
             cfgs = generate_random_sparsity(net)
@@ -258,8 +332,10 @@ def test_speedup_integration(self):
             speeded_out = speedup_model(data)
             ori_sum = torch.sum(ori_out).item()
             speeded_sum = torch.sum(speeded_out).item()
-            print('Sum of the output of %s (before speedup):'%model_name, ori_sum)
-            print('Sum of the output of %s (after speedup):'%model_name, speeded_sum)
+            print('Sum of the output of %s (before speedup):' %
+                  model_name, ori_sum)
+            print('Sum of the output of %s (after speedup):' %
+                  model_name, speeded_sum)
             assert (abs(ori_sum - speeded_sum) / abs(ori_sum) < RELATIVE_THRESHOLD) or \
                    (abs(ori_sum - speeded_sum) < ABSOLUTE_THRESHOLD)
 
@@ -296,5 +372,6 @@ def tearDown(self):
         os.remove(MODEL_FILE)
         os.remove(MASK_FILE)
 
+
 if __name__ == '__main__':
     main()
diff --git a/ts/nni_manager/common/experimentStartupInfo.ts b/ts/nni_manager/common/experimentStartupInfo.ts
index ada33d5cd5..5316abd26e 100644
--- a/ts/nni_manager/common/experimentStartupInfo.ts
+++ b/ts/nni_manager/common/experimentStartupInfo.ts
@@ -17,9 +17,10 @@ class ExperimentStartupInfo {
     private logDir: string = '';
     private logLevel: string = '';
     private readonly: boolean = false;
+    private dispatcherPipe: string | null = null;
     private platform: string = '';
 
-    public setStartupInfo(newExperiment: boolean, experimentId: string, basePort: number, platform: string, logDir?: string, logLevel?: string, readonly?: boolean): void {
+    public setStartupInfo(newExperiment: boolean, experimentId: string, basePort: number, platform: string, logDir?: string, logLevel?: string, readonly?: boolean, dispatcherPipe?: string): void {
         assert(!this.initialized);
         assert(experimentId.trim().length > 0);
         this.newExperiment = newExperiment;
@@ -41,6 +42,10 @@ class ExperimentStartupInfo {
         if (readonly !== undefined) {
             this.readonly = readonly;
         }
+
+        if (dispatcherPipe != undefined && dispatcherPipe.length > 0) {
+            this.dispatcherPipe = dispatcherPipe;
+        }
     }
 
     public getExperimentId(): string {
@@ -84,6 +89,11 @@ class ExperimentStartupInfo {
 
         return this.readonly;
     }
+
+    public getDispatcherPipe(): string | null {
+        assert(this.initialized);
+        return this.dispatcherPipe;
+    }
 }
 
 function getExperimentId(): string {
@@ -107,16 +117,20 @@ function getExperimentStartupInfo(): ExperimentStartupInfo {
 }
 
 function setExperimentStartupInfo(
-    newExperiment: boolean, experimentId: string, basePort: number, platform: string, logDir?: string, logLevel?: string, readonly?: boolean): void {
+    newExperiment: boolean, experimentId: string, basePort: number, platform: string, logDir?: string, logLevel?: string, readonly?: boolean, dispatcherPipe?: string): void {
     component.get<ExperimentStartupInfo>(ExperimentStartupInfo)
-        .setStartupInfo(newExperiment, experimentId, basePort, platform, logDir, logLevel, readonly);
+        .setStartupInfo(newExperiment, experimentId, basePort, platform, logDir, logLevel, readonly, dispatcherPipe);
 }
 
 function isReadonly(): boolean {
     return component.get<ExperimentStartupInfo>(ExperimentStartupInfo).isReadonly();
 }
 
+function getDispatcherPipe(): string | null {
+    return component.get<ExperimentStartupInfo>(ExperimentStartupInfo).getDispatcherPipe();
+}
+
 export {
     ExperimentStartupInfo, getBasePort, getExperimentId, isNewExperiment, getPlatform, getExperimentStartupInfo,
-    setExperimentStartupInfo, isReadonly
+    setExperimentStartupInfo, isReadonly, getDispatcherPipe
 };
diff --git a/ts/nni_manager/common/log.ts b/ts/nni_manager/common/log.ts
index 9bf2b92c88..f5a0fefc7b 100644
--- a/ts/nni_manager/common/log.ts
+++ b/ts/nni_manager/common/log.ts
@@ -126,7 +126,10 @@ class Logger {
      */
     private log(level: string, param: any[]): void {
         if (!this.readonly) {
-            const logContent = `[${(new Date()).toLocaleString()}] ${level} ${format(param)}\n`;
+            const time = new Date();
+            const localTime = new Date(time.getTime() - time.getTimezoneOffset() * 60000);
+            const timeStr = localTime.toISOString().slice(0, -5).replace('T', ' ');
+            const logContent = `[${timeStr}] ${level} ${format(param)}\n`;
             if (this.writable && this.bufferSerialEmitter) {
                 const buffer: WritableStreamBuffer = new WritableStreamBuffer();
                 buffer.write(logContent);
diff --git a/ts/nni_manager/core/ipcInterface.ts b/ts/nni_manager/core/ipcInterface.ts
index 8ef78069a4..c6fbf3d52b 100644
--- a/ts/nni_manager/core/ipcInterface.ts
+++ b/ts/nni_manager/core/ipcInterface.ts
@@ -6,6 +6,7 @@
 import * as assert from 'assert';
 import { ChildProcess } from 'child_process';
 import { EventEmitter } from 'events';
+import * as net from 'net';
 import { Readable, Writable } from 'stream';
 import { NNIError } from '../common/errors';
 import { getLogger, Logger } from '../common/log';
@@ -62,10 +63,10 @@ class IpcInterface {
      * @param proc the process to wrap
      * @param acceptCommandTypes set of accepted commands for this process
      */
-    constructor(proc: ChildProcess, acceptCommandTypes: Set<string>) {
+    constructor(outStream: Writable, inStream: Readable, acceptCommandTypes: Set<string>) {
         this.acceptCommandTypes = acceptCommandTypes;
-        this.outgoingStream = <Writable>proc.stdio[ipcOutgoingFd];
-        this.incomingStream = <Readable>proc.stdio[ipcIncomingFd];
+        this.outgoingStream = outStream;
+        this.incomingStream = inStream;
         this.eventEmitter = new EventEmitter();
         this.readBuffer = Buffer.alloc(0);
 
@@ -132,7 +133,14 @@ class IpcInterface {
  * @param process_ the tuner process
  */
 function createDispatcherInterface(process: ChildProcess): IpcInterface {
-    return new IpcInterface(process, new Set([...CommandType.TUNER_COMMANDS, ...CommandType.ASSESSOR_COMMANDS]));
+    const outStream = <Writable>process.stdio[ipcOutgoingFd];
+    const inStream = <Readable>process.stdio[ipcIncomingFd];
+    return new IpcInterface(outStream, inStream, new Set([...CommandType.TUNER_COMMANDS, ...CommandType.ASSESSOR_COMMANDS]));
 }
 
-export { IpcInterface, createDispatcherInterface, encodeCommand, decodeCommand };
+function createDispatcherPipeInterface(pipePath: string): IpcInterface {
+    const client = net.createConnection(pipePath);
+    return new IpcInterface(client, client, new Set([...CommandType.TUNER_COMMANDS, ...CommandType.ASSESSOR_COMMANDS]));
+}
+
+export { IpcInterface, createDispatcherInterface, createDispatcherPipeInterface, encodeCommand, decodeCommand };
diff --git a/ts/nni_manager/core/nnimanager.ts b/ts/nni_manager/core/nnimanager.ts
index 379da5cca2..dce0d91eed 100644
--- a/ts/nni_manager/core/nnimanager.ts
+++ b/ts/nni_manager/core/nnimanager.ts
@@ -9,7 +9,7 @@ import { Deferred } from 'ts-deferred';
 import * as component from '../common/component';
 import { DataStore, MetricDataRecord, MetricType, TrialJobInfo } from '../common/datastore';
 import { NNIError } from '../common/errors';
-import { getExperimentId } from '../common/experimentStartupInfo';
+import { getExperimentId, getDispatcherPipe } from '../common/experimentStartupInfo';
 import { getLogger, Logger } from '../common/log';
 import {
     ExperimentParams, ExperimentProfile, Manager, ExperimentStatus,
@@ -24,7 +24,7 @@ import {
     INITIALIZE, INITIALIZED, KILL_TRIAL_JOB, NEW_TRIAL_JOB, NO_MORE_TRIAL_JOBS, PING,
     REPORT_METRIC_DATA, REQUEST_TRIAL_JOBS, SEND_TRIAL_JOB_PARAMETER, TERMINATE, TRIAL_END, UPDATE_SEARCH_SPACE, IMPORT_DATA
 } from './commands';
-import { createDispatcherInterface, IpcInterface } from './ipcInterface';
+import { createDispatcherInterface, createDispatcherPipeInterface, IpcInterface } from './ipcInterface';
 
 /**
  * NNIManager which implements Manager interface
@@ -71,6 +71,11 @@ class NNIManager implements Manager {
                 this.criticalError(NNIError.FromError(err, 'Job metrics error: '));
             });
         };
+
+        const pipe = getDispatcherPipe();
+        if (pipe !== null) {
+            this.dispatcher = createDispatcherPipeInterface(pipe);
+        }
     }
 
     public updateExperimentProfile(experimentProfile: ExperimentProfile, updateType: ProfileUpdateType): Promise<void> {
@@ -694,7 +699,7 @@ class NNIManager implements Manager {
     }
 
     private async onTrialJobMetrics(metric: TrialJobMetric): Promise<void> {
-        this.log.debug(`NNIManager received trial job metrics: ${metric}`);
+        this.log.debug(`NNIManager received trial job metrics: ${JSON.stringify(metric)}`);
         if (this.trialJobs.has(metric.id)){
             await this.dataStore.storeMetricData(metric.id, metric.data);
             if (this.dispatcher === undefined) {
diff --git a/ts/nni_manager/main.ts b/ts/nni_manager/main.ts
index e9f1d5b68a..c20f49ed94 100644
--- a/ts/nni_manager/main.ts
+++ b/ts/nni_manager/main.ts
@@ -30,9 +30,9 @@ import { DLTSTrainingService } from './training_service/dlts/dltsTrainingService
 
 function initStartupInfo(
     startExpMode: string, experimentId: string, basePort: number, platform: string,
-    logDirectory: string, experimentLogLevel: string, readonly: boolean): void {
+    logDirectory: string, experimentLogLevel: string, readonly: boolean, dispatcherPipe: string): void {
     const createNew: boolean = (startExpMode === ExperimentStartUpMode.NEW);
-    setExperimentStartupInfo(createNew, experimentId, basePort, platform, logDirectory, experimentLogLevel, readonly);
+    setExperimentStartupInfo(createNew, experimentId, basePort, platform, logDirectory, experimentLogLevel, readonly, dispatcherPipe);
 }
 
 async function initContainer(foreground: boolean, platformMode: string, logFileName?: string): Promise<void> {
@@ -163,7 +163,9 @@ if (!('true' || 'false').includes(readonlyArg.toLowerCase())) {
 }
 const readonly = readonlyArg.toLowerCase() == 'true' ? true : false;
 
-initStartupInfo(startMode, experimentId, port, mode, logDir, logLevel, readonly);
+const dispatcherPipe: string = parseArg(['--dispatcher_pipe']);
+
+initStartupInfo(startMode, experimentId, port, mode, logDir, logLevel, readonly, dispatcherPipe);
 
 mkDirP(getLogDir())
     .then(async () => {
diff --git a/ts/nni_manager/training_service/kubernetes/adl/adlTrainingService.ts b/ts/nni_manager/training_service/kubernetes/adl/adlTrainingService.ts
index 2abcc9cfee..9a07f022b7 100644
--- a/ts/nni_manager/training_service/kubernetes/adl/adlTrainingService.ts
+++ b/ts/nni_manager/training_service/kubernetes/adl/adlTrainingService.ts
@@ -214,10 +214,10 @@ class AdlTrainingService extends KubernetesTrainingService implements Kubernetes
             trialJobId, form, codeDir, outputDir)
         const cleanupScriptTemplate: string =
 `#!/bin/bash
-ps aux | grep "python3 -m nni_trial_tool.trial_keeper" | awk '{print $2}' | xargs kill -2
+ps aux | grep "python3 -m nni.tools.trial_tool.trial_keeper" | awk '{print $2}' | xargs kill -2
 while true;
 do
-    proc=\`ps aux | grep "python3 -m nni_trial_tool.trial_keeper" | awk '{print $2}' | grep "" -c\`
+    proc=\`ps aux | grep "python3 -m nni.tools.trial_tool.trial_keeper" | awk '{print $2}' | grep "" -c\`
     if (( $proc == 1  )); then
         exit 0
     else
@@ -281,7 +281,7 @@ export NNI_TRIAL_SEQ_ID={4}
 mkdir -p $NNI_OUTPUT_DIR
 {5}
 echo '{6}' > $NNI_CODE_DIR/{7}
-python3 -m nni_trial_tool.trial_keeper --trial_command '{8}' \
+python3 -m nni.tools.trial_tool.trial_keeper --trial_command '{8}' \
 --nnimanager_ip {9} --nnimanager_port {10} \
 --nni_manager_version '{11}' --log_collection '{12}'
 `;
diff --git a/ts/nni_manager/training_service/pai/paiYarn/paiYarnData.ts b/ts/nni_manager/training_service/pai/paiYarn/paiYarnData.ts
index 9a2b6b3706..397b6cd534 100644
--- a/ts/nni_manager/training_service/pai/paiYarn/paiYarnData.ts
+++ b/ts/nni_manager/training_service/pai/paiYarn/paiYarnData.ts
@@ -16,7 +16,7 @@ fi`;
 export const PAI_TRIAL_COMMAND_FORMAT: string =
 `export NNI_PLATFORM=paiYarn NNI_SYS_DIR={0} NNI_OUTPUT_DIR={1} NNI_TRIAL_JOB_ID={2} NNI_EXP_ID={3} NNI_TRIAL_SEQ_ID={4} MULTI_PHASE={5} \
 && cd $NNI_SYS_DIR && sh install_nni.sh \
-&& python3 -m nni_trial_tool.trial_keeper --trial_command '{6}' --nnimanager_ip '{7}' --nnimanager_port '{8}' \
+&& python3 -m nni.tools.trial_tool.trial_keeper --trial_command '{6}' --nnimanager_ip '{7}' --nnimanager_port '{8}' \
 --pai_hdfs_output_dir '{9}' --pai_hdfs_host '{10}' --pai_user_name {11} --nni_hdfs_exp_dir '{12}' --webhdfs_path '/webhdfs/api/v1' \
 --nni_manager_version '{13}' --log_collection '{14}'`;
 
diff --git a/ts/nni_manager/yarn.lock b/ts/nni_manager/yarn.lock
index 99f6c50489..7aeecbde3b 100644
--- a/ts/nni_manager/yarn.lock
+++ b/ts/nni_manager/yarn.lock
@@ -279,6 +279,11 @@
   version "7.0.3"
   resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.3.tgz#bdfd69d61e464dcc81b25159c270d75a73c1a636"
 
+"@types/lockfile@^1.0.0":
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/@types/lockfile/-/lockfile-1.0.1.tgz#434a3455e89843312f01976e010c60f1bcbd56f7"
+  integrity sha512-65WZedEm4AnOsBDdsapJJG42MhROu3n4aSSiu87JXF/pSdlubxZxp3S1yz3kTfkJ2KBPud4CpjoHVAptOm9Zmw==
+
 "@types/mime@*":
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/@types/mime/-/mime-2.0.0.tgz#5a7306e367c539b9f6543499de8dd519fac37a8b"