diff --git a/examples/trials/auto-gbdt/config.yml b/examples/trials/auto-gbdt/config.yml index 45b00a0994..38bdd5b80f 100644 --- a/examples/trials/auto-gbdt/config.yml +++ b/examples/trials/auto-gbdt/config.yml @@ -1,21 +1,10 @@ -authorName: default -experimentName: example_auto-gbdt +searchSpaceFile: search_space.json +trialCommand: python3 main.py trialConcurrency: 1 -maxExecDuration: 10h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxTrialNumber: 10 tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: minimize -trial: - command: python3 main.py - codeDir: . - gpuNum: 0 \ No newline at end of file +trainingService: # For other platforms, check mnist-pytorch example + platform: local diff --git a/examples/trials/auto-gbdt/config_metis.yml b/examples/trials/auto-gbdt/config_metis.yml index dd78272c9c..0999d8cb52 100644 --- a/examples/trials/auto-gbdt/config_metis.yml +++ b/examples/trials/auto-gbdt/config_metis.yml @@ -1,21 +1,22 @@ -authorName: default -experimentName: example_auto-gbdt-metis +# The search space of Metis tuner is slightly different from TPE and others. +# See Metis tuner' doc for details: https://nni.readthedocs.io/en/stable/Tuner/MetisTuner.html +searchSpace: + num_leaves: + _type: choice + _value: [31, 28, 24, 20] + learning_rate: + _type: choice + _value: [0.01, 0.05, 0.1, 0.2] + bagging_freq: + _type: choice + _value: [1, 2, 4, 8, 10] + +trialCommand: python3 main.py trialConcurrency: 1 -maxExecDuration: 10h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space_metis.json -#choice: true, false -useAnnotation: false +maxTrialNumber: 10 tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: MetisTuner + name: MetisTuner classArgs: - #choice: maximize, minimize optimize_mode: minimize -trial: - command: python3 main.py - codeDir: . - gpuNum: 0 +trainingService: + platform: local diff --git a/examples/trials/auto-gbdt/config_pai.yml b/examples/trials/auto-gbdt/config_pai.yml deleted file mode 100644 index 912971a0fa..0000000000 --- a/examples/trials/auto-gbdt/config_pai.yml +++ /dev/null @@ -1,35 +0,0 @@ -authorName: default -experimentName: example_auto-gbdt -trialConcurrency: 1 -maxExecDuration: 10h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: pai -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: minimize -trial: - command: python3 main.py - codeDir: . - gpuNum: 0 - cpuNum: 1 - memoryMB: 8196 - #The docker image to run nni job on pai - image: msranni/nni:latest - nniManagerNFSMountPath: {replace_to_your_nfs_mount_path} - containerNFSMountPath: {replace_to_your_container_mount_path} - paiStorageConfigName: {replace_to_your_storage_config_name} -paiConfig: - #The username to login pai - userName: username - #The token to login pai - token: token - #The host of restful server of pai - host: 10.10.10.10 \ No newline at end of file diff --git a/examples/trials/auto-gbdt/search_space.json b/examples/trials/auto-gbdt/search_space.json index ea09eca9e7..e55aaa6b79 100644 --- a/examples/trials/auto-gbdt/search_space.json +++ b/examples/trials/auto-gbdt/search_space.json @@ -1,6 +1,18 @@ { - "num_leaves":{"_type":"randint","_value":[20, 31]}, - "learning_rate":{"_type":"choice","_value":[0.01, 0.05, 0.1, 0.2]}, - "bagging_fraction":{"_type":"uniform","_value":[0.7, 1.0]}, - "bagging_freq":{"_type":"choice","_value":[1, 2, 4, 8, 10]} -} \ No newline at end of file + "num_leaves": { + "_type": "randint", + "_value": [20, 31] + }, + "learning_rate": { + "_type": "choice", + "_value": [0.01, 0.05, 0.1, 0.2] + }, + "bagging_fraction": { + "_type": "uniform", + "_value": [0.7, 1.0] + }, + "bagging_freq": { + "_type": "choice", + "_value": [1, 2, 4, 8, 10] + } +} diff --git a/examples/trials/auto-gbdt/search_space_metis.json b/examples/trials/auto-gbdt/search_space_metis.json deleted file mode 100644 index 6bfbc32afa..0000000000 --- a/examples/trials/auto-gbdt/search_space_metis.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "num_leaves":{"_type":"choice","_value":[31, 28, 24, 20]}, - "learning_rate":{"_type":"choice","_value":[0.01, 0.05, 0.1, 0.2]}, - "bagging_freq":{"_type":"choice","_value":[1, 2, 4, 8, 10]} -} diff --git a/examples/trials/benchmarking/hyperband/config_hyperband.yml b/examples/trials/benchmarking/hyperband/config_hyperband.yml index a979bad44c..e79e3e0d8e 100644 --- a/examples/trials/benchmarking/hyperband/config_hyperband.yml +++ b/examples/trials/benchmarking/hyperband/config_hyperband.yml @@ -1,27 +1,20 @@ -authorName: default -experimentName: example_mnist_hyperband -trialConcurrency: 2 -maxExecDuration: 100h -maxTrialNum: 10000 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false advisor: - #choice: Hyperband, BOHB - builtinAdvisorName: Hyperband + name: Hyperband classArgs: + optimize_mode: maximize + #R: the maximum trial budget (could be the number of mini-batches or epochs) can be # allocated to a trial. Each trial should use trial budget to control how long it runs. R: 60 + #eta: proportion of discarded trials eta: 3 - #choice: maximize, minimize - optimize_mode: maximize + #choice: serial, parallelism exec_mode: serial -trial: - command: python3 main.py - codeDir: . - gpuNum: 0 + +searchSpaceFile: search_space.json +trialCommand: python3 main.py +trialConcurrency: 10 +trainingService: + platform: local diff --git a/examples/trials/cifar10_pytorch/config.yml b/examples/trials/cifar10_pytorch/config.yml index a44cfafa2b..b70083916e 100644 --- a/examples/trials/cifar10_pytorch/config.yml +++ b/examples/trials/cifar10_pytorch/config.yml @@ -1,23 +1,14 @@ -authorName: default -experimentName: example_pytorch_cifar10 +searchSpaceFile: search_space.json +trialCommand: python3 main.py +trialGpuNumber: 1 trialConcurrency: 4 -maxExecDuration: 100h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxTrialNumber: 10 tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: maximize -trial: - command: python3 main.py - codeDir: . - gpuNum: 1 -localConfig: - maxTrialNumPerGpu: 2 +trainingService: # For other platforms, check mnist-pytorch example + platform: local + maxTrialNumberPerGpu: 2 + useActiveGpu: false # NOTE: Use "true" if you are using an OS with graphical interface (e.g. Windows 10, Ubuntu desktop) + # Check the doc for details: https://nni.readthedocs.io/en/latest/reference/experiment_config.html#useactivegpu diff --git a/examples/trials/cifar10_pytorch/config_pai.yml b/examples/trials/cifar10_pytorch/config_pai.yml deleted file mode 100644 index 58f9bf5b51..0000000000 --- a/examples/trials/cifar10_pytorch/config_pai.yml +++ /dev/null @@ -1,35 +0,0 @@ -authorName: default -experimentName: example_pytorch_cifar10 -trialConcurrency: 1 -maxExecDuration: 100h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: pai -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: maximize -trial: - command: python3 main.py - codeDir: . - gpuNum: 1 - cpuNum: 1 - memoryMB: 8196 - #The docker image to run nni job on pai - image: msranni/nni:latest - nniManagerNFSMountPath: {replace_to_your_nfs_mount_path} - containerNFSMountPath: {replace_to_your_container_mount_path} - paiStorageConfigName: {replace_to_your_storage_config_name} -paiConfig: - #The username to login pai - userName: username - #The token to login pai - token: token - #The host of restful server of pai - host: 10.10.10.10 diff --git a/examples/trials/cifar10_pytorch/search_space.json b/examples/trials/cifar10_pytorch/search_space.json index 723e49b6b9..562f041183 100644 --- a/examples/trials/cifar10_pytorch/search_space.json +++ b/examples/trials/cifar10_pytorch/search_space.json @@ -1,5 +1,14 @@ { - "lr":{"_type":"choice", "_value":[0.1, 0.01, 0.001, 0.0001]}, - "optimizer":{"_type":"choice", "_value":["SGD", "Adadelta", "Adagrad", "Adam", "Adamax"]}, - "model":{"_type":"choice", "_value":["vgg", "resnet18", "googlenet", "densenet121", "mobilenet", "dpn92", "senet18"]} + "lr": { + "_type": "choice", + "_value": [0.1, 0.01, 0.001, 0.0001] + }, + "optimizer": { + "_type": "choice", + "_value": ["SGD", "Adadelta", "Adagrad", "Adam", "Adamax"] + }, + "model": { + "_type": "choice", + "_value": ["vgg", "resnet18", "googlenet", "densenet121", "mobilenet", "dpn92", "senet18"] + } } diff --git a/examples/trials/efficientnet/config.yml b/examples/trials/efficientnet/config.yml new file mode 100644 index 0000000000..0849b74477 --- /dev/null +++ b/examples/trials/efficientnet/config.yml @@ -0,0 +1,15 @@ +searchSpaceFile: search_net.json +trialCodeDirectory: EfficientNet-PyTorch +trialCommand: python3 main.py /data/imagenet -j 12 -a efficientnet --batch-size 48 --lr 0.048 --wd 1e-5 --epochs 5 --request-from-nni +trialGpuNumber: 1 +trialConcurrency: 4 +maxTrialNumber: 100 +tuner: + className: tuner.FixedProductTuner + codeDirectory: . + classArgs: + product: 2 +trainingService: # For other platforms, check mnist-pytorch example + platform: local + useActiveGpu: false # NOTE: Use "true" if you are using an OS with graphical interface (e.g. Windows 10, Ubuntu desktop) + # Check the doc for details: https://nni.readthedocs.io/en/latest/reference/experiment_config.html#useactivegputrial: diff --git a/examples/trials/efficientnet/config_local.yml b/examples/trials/efficientnet/config_local.yml deleted file mode 100644 index bbb0978be2..0000000000 --- a/examples/trials/efficientnet/config_local.yml +++ /dev/null @@ -1,18 +0,0 @@ -authorName: unknown -experimentName: example_efficient_net -trialConcurrency: 4 -maxExecDuration: 99999d -maxTrialNum: 100 -trainingServicePlatform: local -searchSpacePath: search_net.json -useAnnotation: false -tuner: - codeDir: . - classFileName: tuner.py - className: FixedProductTuner - classArgs: - product: 2 -trial: - codeDir: EfficientNet-PyTorch - command: python main.py /data/imagenet -j 12 -a efficientnet --batch-size 48 --lr 0.048 --wd 1e-5 --epochs 5 --request-from-nni - gpuNum: 1 diff --git a/examples/trials/efficientnet/config_pai.yml b/examples/trials/efficientnet/config_pai.yml deleted file mode 100644 index d9c4d52450..0000000000 --- a/examples/trials/efficientnet/config_pai.yml +++ /dev/null @@ -1,31 +0,0 @@ -authorName: unknown -experimentName: example_efficient_net -trialConcurrency: 8 -maxExecDuration: 48h -maxTrialNum: 100 -trainingServicePlatform: pai -searchSpacePath: search_net.json -useAnnotation: false -tuner: - codeDir: . - classFileName: tuner.py - className: FixedProductTuner - classArgs: - product: 2 -trial: - codeDir: EfficientNet-PyTorch - command: sh train_imagenet.sh - cpuNum: 4 - memoryMB: 25000 - shmMB: 25000 - gpuNum: 1 - virtualCluster: nni - image: msranni/nni:latest - nniManagerNFSMountPath: {replace_to_your_nfs_mount_path} - containerNFSMountPath: {replace_to_your_container_mount_path} - paiStorageConfigName: {replace_to_your_storage_config_name} -nniManagerIp: -paiConfig: - userName: - token: - host: diff --git a/examples/trials/efficientnet/tuner.py b/examples/trials/efficientnet/tuner.py index 7e5bc8b60c..1917fdcf11 100644 --- a/examples/trials/efficientnet/tuner.py +++ b/examples/trials/efficientnet/tuner.py @@ -1,4 +1,4 @@ -from nni.gridsearch_tuner.gridsearch_tuner import GridSearchTuner +from nni.algorithms.hpo.gridsearch_tuner import GridSearchTuner class FixedProductTuner(GridSearchTuner): diff --git a/examples/trials/ga_squad/config.yml b/examples/trials/ga_squad/config.yml index e276f0633c..dcf8c5e8ed 100644 --- a/examples/trials/ga_squad/config.yml +++ b/examples/trials/ga_squad/config.yml @@ -1,19 +1,13 @@ -authorName: default -experimentName: example_ga_squad +trialCommand: python3 trial.py +trialGpuNumber: 0 trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -#choice: true, false -useAnnotation: false +maxTrialNumber: 10 +maxExperimentDuration: 1h +searchSpace: {} # hard-coded in tuner tuner: - codeDir: ../../tuners/ga_customer_tuner - classFileName: customer_tuner.py - className: CustomerTuner + className: customer_tuner.CustomerTuner + codeDirectory: ../../tuners/ga_customer_tuner classArgs: optimize_mode: maximize -trial: - command: python3 trial.py - codeDir: . - gpuNum: 0 +trainingService: # For other platforms, check mnist-pytorch example + platform: local diff --git a/examples/trials/ga_squad/config_pai.yml b/examples/trials/ga_squad/config_pai.yml deleted file mode 100644 index 756acc2e2d..0000000000 --- a/examples/trials/ga_squad/config_pai.yml +++ /dev/null @@ -1,35 +0,0 @@ -authorName: default -experimentName: example_ga_squad -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: pai -#choice: true, false -useAnnotation: false -#Your nni_manager ip -nniManagerIp: 10.10.10.10 -tuner: - codeDir: ../../tuners/ga_customer_tuner - classFileName: customer_tuner.py - className: CustomerTuner - classArgs: - optimize_mode: maximize -trial: - command: chmod +x ./download.sh && ./download.sh && python3 trial.py - codeDir: . - gpuNum: 0 - cpuNum: 1 - memoryMB: 32869 - #The docker image to run nni job on pai - image: msranni/nni:latest - nniManagerNFSMountPath: /home/user/mnt - containerNFSMountPath: /mnt/data/user - paiStorageConfigName: confignfs-data -paiConfig: - #The username to login pai - userName: username - #The token to login pai - token: token - #The host of restful server of pai - host: 10.10.10.10 diff --git a/examples/trials/kaggle-tgs-salt/config.yml b/examples/trials/kaggle-tgs-salt/config.yml index 1a0db8a51f..d385a3fa4d 100644 --- a/examples/trials/kaggle-tgs-salt/config.yml +++ b/examples/trials/kaggle-tgs-salt/config.yml @@ -1,20 +1,11 @@ -authorName: default -experimentName: example_tgs -trialConcurrency: 2 -maxExecDuration: 10h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -#choice: true, false useAnnotation: true +trialCommand: python3 train.py +trialGpuNumber: 0 +trialConcurrency: 2 +maxTrialNumber: 10 tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: maximize -trial: - command: python3 train.py - codeDir: . - gpuNum: 1 +trainingService: # For other platforms, check mnist-pytorch example + platform: local diff --git a/examples/trials/mnist-advisor/config_bohb.yml b/examples/trials/mnist-advisor/config_bohb.yml index ca20ccea43..a7502ed6b9 100644 --- a/examples/trials/mnist-advisor/config_bohb.yml +++ b/examples/trials/mnist-advisor/config_bohb.yml @@ -1,23 +1,18 @@ -authorName: default -experimentName: example_mnist_bohb +# Run following command first to install dependencies of BOHB tuner: +# $ python3 -m pip install nni[BOHB] + +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py +trialGpuNumber: 0 trialConcurrency: 1 -maxExecDuration: 10h -maxTrialNum: 1000 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxExperimentDuration: 10h +maxTrialNumber: 1000 advisor: - #choice: Hyperband, BOHB - #(BOHB should be installed through nnictl) - builtinAdvisorName: BOHB + name: BOHB classArgs: max_budget: 27 min_budget: 1 eta: 3 optimize_mode: maximize -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 +trainingService: # For other platforms, check mnist-pytorch example + platform: local diff --git a/examples/trials/mnist-advisor/config_hyperband.yml b/examples/trials/mnist-advisor/config_hyperband.yml index fd06a809f5..2cf2de9b32 100644 --- a/examples/trials/mnist-advisor/config_hyperband.yml +++ b/examples/trials/mnist-advisor/config_hyperband.yml @@ -1,27 +1,16 @@ -authorName: default -experimentName: example_mnist_hyperband +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py +trialGpuNumber: 0 trialConcurrency: 2 -maxExecDuration: 100h -maxTrialNum: 10000 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxExperimentDuration: 100h +maxTrialNumber: 10000 advisor: - #choice: Hyperband, BOHB - builtinAdvisorName: Hyperband + name: Hyperband classArgs: - #R: the maximum trial budget (could be the number of mini-batches or epochs) can be - # allocated to a trial. Each trial should use trial budget to control how long it runs. - R: 100 - #eta: proportion of discarded trials - eta: 3 - #choice: maximize, minimize - optimize_mode: maximize - #choice: serial, parallelism - exec_mode: parallelism -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 + R: 100 # the maximum trial budget (could be the number of mini-batches or epochs) can be + # allocated to a trial. Each trial should use trial budget to control how long it runs. + eta: 3 # proportion of discarded trials + optimize_mode: maximize # maximize or minimize + exec_mode: parallelism # serial or parallelism +trainingService: # For other platforms, check mnist-pytorch example + platform: local diff --git a/examples/trials/mnist-advisor/config_pai.yml b/examples/trials/mnist-advisor/config_pai.yml deleted file mode 100644 index 3c122b6fb4..0000000000 --- a/examples/trials/mnist-advisor/config_pai.yml +++ /dev/null @@ -1,41 +0,0 @@ -authorName: default -experimentName: example_mnist_hyperband -maxExecDuration: 1h -maxTrialNum: 10000 -trialConcurrency: 10 -#choice: local, remote, pai -trainingServicePlatform: pai -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false -advisor: - #choice: Hyperband, BOHB - #(BOHB should be installed through nnictl) - builtinAdvisorName: Hyperband - classArgs: - #R: the maximum trial budget - R: 100 - #eta: proportion of discarded trials - eta: 3 - #choice: maximize, minimize - optimize_mode: maximize - #choice: serial, parallelism - exec_mode: parallelism -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 - cpuNum: 1 - memoryMB: 8196 - #The docker image to run nni job on pai - image: msranni/nni:latest - nniManagerNFSMountPath: /home/user/mnt - containerNFSMountPath: /mnt/data/user - paiStorageConfigName: confignfs-data -paiConfig: - #The username to login pai - userName: username - #The token to login pai - token: token - #The host of restful server of pai - host: 10.10.10.10 diff --git a/examples/trials/mnist-annotation/config.yml b/examples/trials/mnist-annotation/config.yml index b724c7b609..937ec916de 100644 --- a/examples/trials/mnist-annotation/config.yml +++ b/examples/trials/mnist-annotation/config.yml @@ -1,20 +1,12 @@ -authorName: default -experimentName: example_mnist -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -#choice: true, false useAnnotation: true +trialCommand: python3 mnist.py +trialGpuNumber: 0 +trialConcurrency: 1 +maxTrialNumber: 10 +maxExperimentDuration: 1h tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: maximize -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 +trainingService: # For other platforms, check mnist-pytorch example + platform: local diff --git a/examples/trials/mnist-annotation/config_gpu.yml b/examples/trials/mnist-annotation/config_gpu.yml deleted file mode 100644 index df8abd3a3b..0000000000 --- a/examples/trials/mnist-annotation/config_gpu.yml +++ /dev/null @@ -1,20 +0,0 @@ -authorName: default -experimentName: example_mnist -trialConcurrency: 4 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -#choice: true, false -useAnnotation: true -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: maximize -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 1 diff --git a/examples/trials/mnist-annotation/config_kubeflow.yml b/examples/trials/mnist-annotation/config_kubeflow.yml deleted file mode 100644 index 37ff6f89a5..0000000000 --- a/examples/trials/mnist-annotation/config_kubeflow.yml +++ /dev/null @@ -1,31 +0,0 @@ -authorName: default -experimentName: example_dist -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 1 -#choice: local, remote, pai, kubeflow -trainingServicePlatform: kubeflow -#choice: true, false -useAnnotation: true -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: maximize -trial: - codeDir: . - worker: - replicas: 1 - command: python3 mnist.py - gpuNum: 0 - cpuNum: 1 - memoryMB: 8192 - image: msranni/nni:latest -kubeflowConfig: - operator: tf-operator - apiVersion: v1alpha2 - storage: nfs - nfs: - server: 10.10.10.10 - path: /var/nfs/general \ No newline at end of file diff --git a/examples/trials/mnist-annotation/config_pai.yml b/examples/trials/mnist-annotation/config_pai.yml deleted file mode 100644 index ad27baae86..0000000000 --- a/examples/trials/mnist-annotation/config_pai.yml +++ /dev/null @@ -1,34 +0,0 @@ -authorName: default -experimentName: example_mnist -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: pai -#choice: true, false -useAnnotation: true -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: maximize -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 - cpuNum: 1 - memoryMB: 8196 - #The docker image to run nni job on pai - image: msranni/nni:latest - nniManagerNFSMountPath: /home/user/mnt - containerNFSMountPath: /mnt/data/user - paiStorageConfigName: confignfs-data -paiConfig: - #The username to login pai - userName: username - #The token to login pai - token: token - #The host of restful server of pai - host: 10.10.10.10 \ No newline at end of file diff --git a/examples/trials/mnist-annotation/config_remote.yml b/examples/trials/mnist-annotation/config_remote.yml deleted file mode 100644 index 359b63044c..0000000000 --- a/examples/trials/mnist-annotation/config_remote.yml +++ /dev/null @@ -1,33 +0,0 @@ -authorName: default -experimentName: example_mnist -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: remote -#choice: true, false -useAnnotation: true -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: maximize -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 -#machineList can be empty if the platform is local -machineList: - - ip: 10.1.1.1 - username: bob - passwd: bob123 - #port can be skip if using default ssh port 22 - #port: 22 - - ip: 10.1.1.2 - username: bob - passwd: bob123 - - ip: 10.1.1.3 - username: bob - passwd: bob123 diff --git a/examples/trials/mnist-batch-tune-keras/config.yml b/examples/trials/mnist-batch-tune-keras/config.yml index 92a87a4f7c..fae372a7aa 100644 --- a/examples/trials/mnist-batch-tune-keras/config.yml +++ b/examples/trials/mnist-batch-tune-keras/config.yml @@ -1,18 +1,10 @@ -authorName: default -experimentName: example_mnist-keras +searchSpaceFile: search_space.json +trialCommand: python3 mnist-keras.py +trialGpuNumber: 0 trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxTrialNumber: 10 +maxExperimentDuration: 1h tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: BatchTuner -trial: - command: python3 mnist-keras.py - codeDir: . - gpuNum: 0 + name: BatchTuner +trainingService: # For other platforms, check mnist-pytorch example + platform: local diff --git a/examples/trials/mnist-batch-tune-keras/config_pai.yml b/examples/trials/mnist-batch-tune-keras/config_pai.yml deleted file mode 100644 index f3c7586e54..0000000000 --- a/examples/trials/mnist-batch-tune-keras/config_pai.yml +++ /dev/null @@ -1,32 +0,0 @@ -authorName: default -experimentName: example_mnist-keras -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: pai -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: BatchTuner -trial: - command: python3 mnist-keras.py - codeDir: . - gpuNum: 0 - cpuNum: 1 - memoryMB: 8196 - #The docker image to run nni job on pai - image: msranni/nni:latest - nniManagerNFSMountPath: {replace_to_your_nfs_mount_path} - containerNFSMountPath: {replace_to_your_container_mount_path} - paiStorageConfigName: {replace_to_your_storage_config_name} -paiConfig: - #The username to login pai - userName: username - #The token to login pai - token: token - #The host of restful server of pai - host: 10.10.10.10 diff --git a/examples/trials/mnist-distributed/config_kubeflow.yml b/examples/trials/mnist-distributed-tfv1/config_kubeflow.yml similarity index 100% rename from examples/trials/mnist-distributed/config_kubeflow.yml rename to examples/trials/mnist-distributed-tfv1/config_kubeflow.yml diff --git a/examples/trials/mnist-distributed/dist_mnist.py b/examples/trials/mnist-distributed-tfv1/dist_mnist.py similarity index 100% rename from examples/trials/mnist-distributed/dist_mnist.py rename to examples/trials/mnist-distributed-tfv1/dist_mnist.py diff --git a/examples/trials/mnist-distributed/search_space.json b/examples/trials/mnist-distributed-tfv1/search_space.json similarity index 100% rename from examples/trials/mnist-distributed/search_space.json rename to examples/trials/mnist-distributed-tfv1/search_space.json diff --git a/examples/trials/mnist-nested-search-space/config.yml b/examples/trials/mnist-nested-search-space/config.yml index 7c1715b97c..2cff01c655 100644 --- a/examples/trials/mnist-nested-search-space/config.yml +++ b/examples/trials/mnist-nested-search-space/config.yml @@ -1,20 +1,14 @@ -authorName: default -experimentName: mnist-nested-search-space +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py +trialGpuNumber: 0 trialConcurrency: 2 -maxExecDuration: 1h -maxTrialNum: 100 -#choice: local, remote -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxTrialNumber: 100 +maxExperimentDuration: 1h tuner: - #choice: TPE, Random, Anneal, Evolution - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: maximize -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 +trainingService: # For other platforms, check mnist-pytorch example + platform: local + useActiveGpu: false # NOTE: Use "true" if you are using an OS with graphical interface (e.g. Windows 10, Ubuntu desktop) + # Check the doc for details: https://nni.readthedocs.io/en/latest/reference/experiment_config.html#useactivegpu diff --git a/examples/trials/mnist-nested-search-space/search_space.json b/examples/trials/mnist-nested-search-space/search_space.json index 4f35ddb354..185bc00253 100644 --- a/examples/trials/mnist-nested-search-space/search_space.json +++ b/examples/trials/mnist-nested-search-space/search_space.json @@ -1,114 +1,114 @@ { - "layer0": { - "_type": "choice", - "_value": [{ - "_name": "Empty" - }, - { - "_name": "Conv", - "kernel_size": { - "_type": "choice", - "_value": [1, 2, 3, 5] - } - }, - { - "_name": "Max_pool", - "pooling_size": { - "_type": "choice", - "_value": [2, 3, 5] - } - }, - { - "_name": "Avg_pool", - "pooling_size": { - "_type": "choice", - "_value": [2, 3, 5] - } - } - ] - }, - "layer1": { - "_type": "choice", - "_value": [{ - "_name": "Empty" - }, - { - "_name": "Conv", - "kernel_size": { - "_type": "choice", - "_value": [1, 2, 3, 5] - } - }, - { - "_name": "Max_pool", - "pooling_size": { - "_type": "choice", - "_value": [2, 3, 5] - } - }, - { - "_name": "Avg_pool", - "pooling_size": { - "_type": "choice", - "_value": [2, 3, 5] - } - } - ] - }, - "layer2": { - "_type": "choice", - "_value": [{ - "_name": "Empty" - }, - { - "_name": "Conv", - "kernel_size": { - "_type": "choice", - "_value": [1, 2, 3, 5] - } - }, - { - "_name": "Max_pool", - "pooling_size": { - "_type": "choice", - "_value": [2, 3, 5] - } - }, - { - "_name": "Avg_pool", - "pooling_size": { - "_type": "choice", - "_value": [2, 3, 5] - } - } - ] - }, - "layer3": { - "_type": "choice", - "_value": [{ - "_name": "Empty" - }, - { - "_name": "Conv", - "kernel_size": { - "_type": "choice", - "_value": [1, 2, 3, 5] - } - }, - { - "_name": "Max_pool", - "pooling_size": { - "_type": "choice", - "_value": [2, 3, 5] - } - }, - { - "_name": "Avg_pool", - "pooling_size": { - "_type": "choice", - "_value": [2, 3, 5] - } - } - ] - } -} \ No newline at end of file + "layer0": { + "_type": "choice", + "_value": [{ + "_name": "Empty" + }, + { + "_name": "Conv", + "kernel_size": { + "_type": "choice", + "_value": [1, 2, 3, 5] + } + }, + { + "_name": "Max_pool", + "pooling_size": { + "_type": "choice", + "_value": [2, 3, 5] + } + }, + { + "_name": "Avg_pool", + "pooling_size": { + "_type": "choice", + "_value": [2, 3, 5] + } + } + ] + }, + "layer1": { + "_type": "choice", + "_value": [{ + "_name": "Empty" + }, + { + "_name": "Conv", + "kernel_size": { + "_type": "choice", + "_value": [1, 2, 3, 5] + } + }, + { + "_name": "Max_pool", + "pooling_size": { + "_type": "choice", + "_value": [2, 3, 5] + } + }, + { + "_name": "Avg_pool", + "pooling_size": { + "_type": "choice", + "_value": [2, 3, 5] + } + } + ] + }, + "layer2": { + "_type": "choice", + "_value": [{ + "_name": "Empty" + }, + { + "_name": "Conv", + "kernel_size": { + "_type": "choice", + "_value": [1, 2, 3, 5] + } + }, + { + "_name": "Max_pool", + "pooling_size": { + "_type": "choice", + "_value": [2, 3, 5] + } + }, + { + "_name": "Avg_pool", + "pooling_size": { + "_type": "choice", + "_value": [2, 3, 5] + } + } + ] + }, + "layer3": { + "_type": "choice", + "_value": [{ + "_name": "Empty" + }, + { + "_name": "Conv", + "kernel_size": { + "_type": "choice", + "_value": [1, 2, 3, 5] + } + }, + { + "_name": "Max_pool", + "pooling_size": { + "_type": "choice", + "_value": [2, 3, 5] + } + }, + { + "_name": "Avg_pool", + "pooling_size": { + "_type": "choice", + "_value": [2, 3, 5] + } + } + ] + } +} diff --git a/examples/trials/mnist-pbt-tuner-pytorch/config.yml b/examples/trials/mnist-pbt-tuner-pytorch/config.yml index 508278e69f..8d648c8893 100644 --- a/examples/trials/mnist-pbt-tuner-pytorch/config.yml +++ b/examples/trials/mnist-pbt-tuner-pytorch/config.yml @@ -1,22 +1,14 @@ -authorName: default -experimentName: example_mnist_pbt_tuner_pytorch +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py +trialGpuNumber: 1 trialConcurrency: 3 -maxExecDuration: 2h -maxTrialNum: 100 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxTrialNumber: 100 +maxExperimentDuration: 2h tuner: -# codeDir: ~/nni/src/sdk/pynni/nni/pbt_tuner -# classFileName: pbt_tuner.py -# className: PBTTuner - builtinTunerName: PBTTuner + name: PBTTuner classArgs: - #choice: maximize, minimize optimize_mode: maximize -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 1 +trainingService: # For other platforms, check mnist-pytorch example + platform: local + useActiveGpu: false # NOTE: Use "true" if you are using an OS with graphical interface (e.g. Windows 10, Ubuntu desktop) + # Check the doc for details: https://nni.readthedocs.io/en/latest/reference/experiment_config.html#useactivegpu diff --git a/examples/trials/mnist-pytorch/.gitignore b/examples/trials/mnist-pytorch/.gitignore new file mode 100644 index 0000000000..1269488f7f --- /dev/null +++ b/examples/trials/mnist-pytorch/.gitignore @@ -0,0 +1 @@ +data diff --git a/examples/trials/mnist-pytorch/config.yml b/examples/trials/mnist-pytorch/config.yml index 00a95216aa..7fd35c0e9a 100644 --- a/examples/trials/mnist-pytorch/config.yml +++ b/examples/trials/mnist-pytorch/config.yml @@ -1,21 +1,14 @@ -authorName: default -experimentName: example_mnist_pytorch +# This is the minimal config file for an NNI experiment. +# Use "nnictl create --config config.yml" to launch this experiment. +# Afterwards, you can check "config_detailed.yml" for more explanation. + +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py # NOTE: change "python3" to "python" if you are using Windows +trialGpuNumber: 0 trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: maximize -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 +trainingService: + platform: local diff --git a/examples/trials/mnist-pytorch/config_aml.yml b/examples/trials/mnist-pytorch/config_aml.yml index 8a5618606f..3c87d38123 100644 --- a/examples/trials/mnist-pytorch/config_aml.yml +++ b/examples/trials/mnist-pytorch/config_aml.yml @@ -1,25 +1,15 @@ -authorName: default -experimentName: example_mnist_pytorch +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -trainingServicePlatform: aml -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxTrialNumber: 10 tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: maximize -trial: - command: python3 mnist.py - codeDir: . - image: msranni/nni -amlConfig: - subscriptionId: ${replace_to_your_subscriptionId} - resourceGroup: ${replace_to_your_resourceGroup} - workspaceName: ${replace_to_your_workspaceName} - computeTarget: ${replace_to_your_computeTarget} +trainingService: + platform: aml + dockerImage: msranni/nni + subscriptionId: ${your subscription ID} + resourceGroup: ${your resource group} + workspaceName: ${your workspace name} + computeTarget: ${your compute target} diff --git a/examples/trials/mnist-pytorch/config_detailed.yml b/examples/trials/mnist-pytorch/config_detailed.yml new file mode 100644 index 0000000000..69dbf7ec1d --- /dev/null +++ b/examples/trials/mnist-pytorch/config_detailed.yml @@ -0,0 +1,44 @@ +# This example shows more configurable fields comparing to the minimal "config.yml" +# You can use "nnictl create --config config_detailed.yml" to launch this experiment. +# If you see an error message saying "port 8080 is used", use "nnictl stop --all" to stop previous experiments. + +name: MNIST # An optional name to help you distinguish experiments. + +# Hyper-parameter search space can either be configured here or in a seperate file. +# "config.yml" shows how to specify a seperate search space file. +# The common schema of search space is documented here: +# https://nni.readthedocs.io/en/stable/Tutorial/SearchSpaceSpec.html +searchSpace: + batch_size: + _type: choice + _value: [16, 32, 64, 128] + hidden_size: + _type: choice + _value: [128, 256, 512, 1024] + lr: + _type: choice + _value: [0.0001, 0.001, 0.01, 0.1] + momentum: + _type: uniform + _value: [0, 1] + +trialCommand: python3 mnist.py # The command to launch a trial. NOTE: change "python3" to "python" if you are using Windows. +trialCodeDirectory: . # The path of trial code. By default it's ".", which means the same directory of this config file. +trialGpuNumber: 1 # How many GPUs should each trial use. CUDA is required when it's greater than zero. + +trialConcurrency: 4 # Run 4 trials concurrently. +maxTrialNumber: 10 # Generate at most 10 trials. +maxExperimentDuration: 1h # Stop generating trials after 1 hour. + +tuner: # Configure the tuning alogrithm. + name: TPE # Supported algorithms: TPE, Random, Anneal, Evolution, GridSearch, GPTuner, PBTTuner, etc. + # Full list: https://nni.readthedocs.io/en/latest/Tuner/BuiltinTuner.html + classArgs: # Algorithm specific arguments. See the tuner's doc for details. + optimize_mode: maximize # "minimize" or "maximize" + +# Configure the training platform. +# Supported platforms: local, remote, openpai, aml, kubeflow, kubernetes, adl. +trainingService: + platform: local + useActiveGpu: false # NOTE: Use "true" if you are using an OS with graphical interface (e.g. Windows 10, Ubuntu desktop) + # Reason and details: https://nni.readthedocs.io/en/latest/reference/experiment_config.html#useactivegpu diff --git a/examples/trials/mnist-pytorch/config_hybrid.yml b/examples/trials/mnist-pytorch/config_hybrid.yml new file mode 100644 index 0000000000..1ae3a2cc3f --- /dev/null +++ b/examples/trials/mnist-pytorch/config_hybrid.yml @@ -0,0 +1,23 @@ +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py +trialGpuNumber: 0 +trialConcurrency: 5 +maxTrialNumber: 20 +tuner: + name: TPE + classArgs: + optimize_mode: maximize +# For local, remote, openpai, and aml, NNI can use multiple training services at one time +trainingService: + - platform: local + - platform: remote + machineList: + - host: ${your server's IP or domain name} + user: ${your user name} + ssh_key_file: ~/.ssh/id_rsa + - platform: aml + dockerImage: msranni/nni + subscriptionId: ${your subscription ID} + resourceGroup: ${your resource group} + workspaceName: ${your workspace name} + computeTarget: ${your compute target} diff --git a/examples/trials/mnist-pytorch/config_openpai.yml b/examples/trials/mnist-pytorch/config_openpai.yml new file mode 100644 index 0000000000..01f90ecfae --- /dev/null +++ b/examples/trials/mnist-pytorch/config_openpai.yml @@ -0,0 +1,20 @@ +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py +trialGpuNumber: 0 +trialConcurrency: 1 +maxTrialNumber: 10 +tuner: + name: TPE + classArgs: + optimize_mode: maximize +trainingService: + platform: openpai + host: http://123.123.123.123 + username: ${your user name} + token: ${your token} + dockerImage: msranni/nni + trialCpuNumber: 1 + trialMemorySize: 8GB + storageConfigName: ${your storage config name} + localStorageMountPoint: ${NFS mount point on local machine} + containerStorageMountPoint: ${NFS mount point inside Docker container} diff --git a/examples/trials/mnist-pytorch/config_pai.yml b/examples/trials/mnist-pytorch/config_pai.yml deleted file mode 100644 index f821fa3ba2..0000000000 --- a/examples/trials/mnist-pytorch/config_pai.yml +++ /dev/null @@ -1,35 +0,0 @@ -authorName: default -experimentName: example_mnist_pytorch -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: pai -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: maximize -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 - cpuNum: 1 - memoryMB: 8196 - #The docker image to run nni job on pai - image: msranni/nni:latest - nniManagerNFSMountPath: {replace_to_your_nfs_mount_path} - containerNFSMountPath: {replace_to_your_container_mount_path} - paiStorageConfigName: {replace_to_your_storage_config_name} -paiConfig: - #The username to login pai - userName: username - #The token to login pai - token: token - #The host of restful server of pai - host: 10.10.10.10 \ No newline at end of file diff --git a/examples/trials/mnist-pytorch/config_remote.yml b/examples/trials/mnist-pytorch/config_remote.yml new file mode 100644 index 0000000000..42a8546848 --- /dev/null +++ b/examples/trials/mnist-pytorch/config_remote.yml @@ -0,0 +1,24 @@ +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py +trialGpuNumber: 0 +trialConcurrency: 4 +maxTrialNumber: 20 +tuner: + name: TPE + classArgs: + optimize_mode: maximize +trainingService: + platform: remote + machineList: + - host: ${your server's IP or domain name} + user: ${your user name} + ssh_key_file: ~/.ssh/id_rsa # We recommend public key over password, it's more secure and convenient. + # You can specify more than one SSH servers: + - host: 123.123.123.123 + port: 10022 + user: nniuser + password: 12345 + pythonPath: /usr/bin # Other examples: + # /opt/python3.9/bin + # C:/Python39 + # C:/Users/USERNAME/.conda/envs/ENVNAME;C:/Users/USERNAME/.conda/envs/ENVNAME/Scripts;C:/Users/USERNAME/.conda/envs/ENVNAME/Library/bin diff --git a/examples/trials/mnist-pytorch/config_tensorboard.yml b/examples/trials/mnist-pytorch/config_tensorboard.yml index c067cd16c4..9c8839bca2 100644 --- a/examples/trials/mnist-pytorch/config_tensorboard.yml +++ b/examples/trials/mnist-pytorch/config_tensorboard.yml @@ -1,21 +1,11 @@ -authorName: default -experimentName: example_mnist_pytorch +searchSpaceFile: search_space.json +trialCommand: python3 mnist_tensorboard.py # NOTE: change "python3" to "python" if you are using Windows +trialGpuNumber: 0 trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxTrialNumber: 10 tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: maximize -trial: - command: python3 mnist_tensorboard.py - codeDir: . - gpuNum: 0 +trainingService: + platform: local diff --git a/examples/trials/mnist-pytorch/config_v2.yml b/examples/trials/mnist-pytorch/config_v2.yml deleted file mode 100644 index 370f22a7ea..0000000000 --- a/examples/trials/mnist-pytorch/config_v2.yml +++ /dev/null @@ -1,23 +0,0 @@ -searchSpace: - momentum: - _type: uniform - _value: [0, 1] - hidden_size: - _type: choice - _value: [128, 256, 512, 1024] - batch_size: - _type: choice - _value: [16, 32, 64, 128] - lr: - _type: choice - _value: [0.0001, 0.001, 0.01, 0.1] -trainingService: - platform: local -trialCodeDirectory: . -trialCommand: python3 mnist.py -trialConcurrency: 1 -trialGpuNumber: 0 -tuner: - name: TPE - classArgs: - optimize_mode: maximize diff --git a/examples/trials/mnist-pytorch/config_windows.yml b/examples/trials/mnist-pytorch/config_windows.yml deleted file mode 100644 index ae27d4517b..0000000000 --- a/examples/trials/mnist-pytorch/config_windows.yml +++ /dev/null @@ -1,21 +0,0 @@ -authorName: default -experimentName: example_mnist_pytorch -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: maximize -trial: - command: python mnist.py - codeDir: . - gpuNum: 0 diff --git a/examples/trials/mnist-sharedstorage/config_nfs.yml b/examples/trials/mnist-sharedstorage/config_nfs.yml index 0a2872c920..2b85f10224 100644 --- a/examples/trials/mnist-sharedstorage/config_nfs.yml +++ b/examples/trials/mnist-sharedstorage/config_nfs.yml @@ -32,4 +32,4 @@ sharedStorage: # usermount means you have already mount this storage on localMountPoint # nnimount means nni will try to mount this storage on localMountPoint # nomount means storage will not mount in local machine, will support partial storages in the future - localMounted: nnimount \ No newline at end of file + localMounted: nnimount diff --git a/examples/trials/mnist-tfv2/config.yml b/examples/trials/mnist-tfv2/config.yml index 06e9af6be3..7fd35c0e9a 100644 --- a/examples/trials/mnist-tfv2/config.yml +++ b/examples/trials/mnist-tfv2/config.yml @@ -1,17 +1,14 @@ -authorName: NNI Example -experimentName: MNIST TF v2.x +# This is the minimal config file for an NNI experiment. +# Use "nnictl create --config config.yml" to launch this experiment. +# Afterwards, you can check "config_detailed.yml" for more explanation. + +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py # NOTE: change "python3" to "python" if you are using Windows +trialGpuNumber: 0 trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -trainingServicePlatform: local # choices: local, remote, pai -searchSpacePath: search_space.json -useAnnotation: false tuner: - builtinTunerName: TPE # choices: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, - # GPTuner, SMAC (SMAC should be installed through nnictl) - classArgs: - optimize_mode: maximize # choices: maximize, minimize -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 + name: TPE + classArgs: + optimize_mode: maximize +trainingService: + platform: local diff --git a/examples/trials/mnist-tfv2/config_assessor.yml b/examples/trials/mnist-tfv2/config_assessor.yml index be9ec740ab..1a138d8e17 100644 --- a/examples/trials/mnist-tfv2/config_assessor.yml +++ b/examples/trials/mnist-tfv2/config_assessor.yml @@ -1,27 +1,16 @@ -authorName: NNI Example -experimentName: MNIST TF v2.x with assessor -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 50 -#choice: local, remote -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py +trialGpuNumber: 0 +trialConcurrency: 2 +maxTrialNumber: 50 tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: maximize -assessor: - #choice: Medianstop, Curvefitting - builtinAssessorName: Curvefitting +assessor: # Specify early-stop algorithm + name: Curvefitting classArgs: epoch_num: 20 threshold: 0.9 -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 +trainingService: + platform: local diff --git a/examples/trials/mnist-tfv2/config_detailed.yml b/examples/trials/mnist-tfv2/config_detailed.yml new file mode 100644 index 0000000000..77ce535bcc --- /dev/null +++ b/examples/trials/mnist-tfv2/config_detailed.yml @@ -0,0 +1,48 @@ +# This example shows more configurable fields comparing to the minimal "config.yml" +# You can use "nnictl create --config config_detailed.yml" to launch this experiment. +# If you see an error message saying "port 8080 is used", use "nnictl stop --all" to stop previous experiments. + +name: MNIST # An optional name to help you distinguish experiments. + +# Hyper-parameter search space can either be configured here or in a seperate file. +# "config.yml" shows how to specify a seperate search space file. +# The common schema of search space is documented here: +# https://nni.readthedocs.io/en/stable/Tutorial/SearchSpaceSpec.html +searchSpace: + dropout_rate: + _type: uniform + _value: [0.5, 0.9] + conv_size: + _type: choice + _value: [2, 3, 5, 7] + hidden_size: + _type: choice + _value: [128, 512, 1024] + batch_size: + _type: choice + _value: [16, 32] + learning_rate: + _type: choice + _value: [0.0001, 0.001, 0.01, 0.1] + +trialCommand: python3 mnist.py # The command to launch a trial. NOTE: change "python3" to "python" if you are using Windows. +trialCodeDirectory: . # The path of trial code. By default it's ".", which means the same directory of this config file. +trialGpuNumber: 1 # How many GPUs should each trial use. CUDA is required when it's greater than zero. + +trialConcurrency: 4 # Run 4 trials concurrently. +maxTrialNumber: 10 # Generate at most 10 trials. +maxExperimentDuration: 1h # Stop generating trials after 1 hour. + +tuner: # Configure the tuning alogrithm. + name: TPE # Supported algorithms: TPE, Random, Anneal, Evolution, GridSearch, GPTuner, PBTTuner, etc. + # Full list: https://nni.readthedocs.io/en/latest/Tuner/BuiltinTuner.html + classArgs: # Algorithm specific arguments. See the tuner's doc for details. + optimize_mode: maximize # "minimize" or "maximize" + +# Configure the training platform. +# Supported platforms: local, remote, openpai, aml, kubeflow, kubernetes, adl. +# You can find config template of some platforms in this directory, and others in mnist-pytorch example. +trainingService: + platform: local + useActiveGpu: false # NOTE: Use "true" if you are using an OS with graphical interface (e.g. Windows 10, Ubuntu desktop) + # Reason and details: https://nni.readthedocs.io/en/latest/reference/experiment_config.html#useactivegpu diff --git a/examples/trials/mnist-tfv2/config_hybrid.yml b/examples/trials/mnist-tfv2/config_hybrid.yml new file mode 100644 index 0000000000..1ae3a2cc3f --- /dev/null +++ b/examples/trials/mnist-tfv2/config_hybrid.yml @@ -0,0 +1,23 @@ +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py +trialGpuNumber: 0 +trialConcurrency: 5 +maxTrialNumber: 20 +tuner: + name: TPE + classArgs: + optimize_mode: maximize +# For local, remote, openpai, and aml, NNI can use multiple training services at one time +trainingService: + - platform: local + - platform: remote + machineList: + - host: ${your server's IP or domain name} + user: ${your user name} + ssh_key_file: ~/.ssh/id_rsa + - platform: aml + dockerImage: msranni/nni + subscriptionId: ${your subscription ID} + resourceGroup: ${your resource group} + workspaceName: ${your workspace name} + computeTarget: ${your compute target} diff --git a/examples/trials/mnist-tfv2/config_remote.yml b/examples/trials/mnist-tfv2/config_remote.yml index 09fb8634fe..42a8546848 100644 --- a/examples/trials/mnist-tfv2/config_remote.yml +++ b/examples/trials/mnist-tfv2/config_remote.yml @@ -1,32 +1,24 @@ -authorName: default -experimentName: example_mnist -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: remote -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +searchSpaceFile: search_space.json +trialCommand: python3 mnist.py +trialGpuNumber: 0 +trialConcurrency: 4 +maxTrialNumber: 20 tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: maximize -trial: - command: python3 mnist.py - codeDir: . - gpuNum: 0 -#machineList can be empty if the platform is local -machineList: - - ip: ${replace_to_your_remote_machine_ip} - username: ${replace_to_your_remote_machine_username} - sshKeyPath: ${replace_to_your_remote_machine_sshKeyPath} - # Below are examples of specifying python environment. - # pythonPath: /opt/python3.7/bin - # pythonPath: C:/Python37 - # Below is an example of specifying python environment for windows anaconda user. Multiple paths separated by ';'. - # pythonPath: C:/Users/yourname/.conda/envs/myenv;C:/Users/yourname/.conda/envs/myenv/Scripts;C:/Users/yourname/.conda/envs/myenv/Library/bin - pythonPath: ${replace_to_python_environment_path_in_your_remote_machine} +trainingService: + platform: remote + machineList: + - host: ${your server's IP or domain name} + user: ${your user name} + ssh_key_file: ~/.ssh/id_rsa # We recommend public key over password, it's more secure and convenient. + # You can specify more than one SSH servers: + - host: 123.123.123.123 + port: 10022 + user: nniuser + password: 12345 + pythonPath: /usr/bin # Other examples: + # /opt/python3.9/bin + # C:/Python39 + # C:/Users/USERNAME/.conda/envs/ENVNAME;C:/Users/USERNAME/.conda/envs/ENVNAME/Scripts;C:/Users/USERNAME/.conda/envs/ENVNAME/Library/bin diff --git a/examples/trials/mnist-tfv2/config_v2.yml b/examples/trials/mnist-tfv2/config_v2.yml deleted file mode 100644 index 64ba5c7ec8..0000000000 --- a/examples/trials/mnist-tfv2/config_v2.yml +++ /dev/null @@ -1,26 +0,0 @@ -searchSpace: - dropout_rate: - _type: uniform - _value: [0.5, 0.9] - conv_size: - _type: choice - _value: [2, 3, 5, 7] - hidden_size: - _type: choice - _value: [128, 512, 1024] - batch_size: - _type: choice - _value: [16, 32] - learning_rate: - _type: choice - _value: [0.0001, 0.001, 0.01, 0.1] -trainingService: - platform: local -trialCodeDirectory: . -trialCommand: python3 mnist.py -trialConcurrency: 1 -trialGpuNumber: 0 -tuner: - name: TPE - classArgs: - optimize_mode: maximize diff --git a/examples/trials/mnist-tfv2/config_windows.yml b/examples/trials/mnist-tfv2/config_windows.yml deleted file mode 100644 index f1c12aa135..0000000000 --- a/examples/trials/mnist-tfv2/config_windows.yml +++ /dev/null @@ -1,21 +0,0 @@ -authorName: NNI Example -experimentName: MNIST TF v2.x -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: maximize -trial: - command: python mnist.py - codeDir: . - gpuNum: 0 diff --git a/examples/trials/network_morphism/FashionMNIST/config.yml b/examples/trials/network_morphism/FashionMNIST/config.yml index f88f7dc57a..70a6d720a9 100644 --- a/examples/trials/network_morphism/FashionMNIST/config.yml +++ b/examples/trials/network_morphism/FashionMNIST/config.yml @@ -1,29 +1,18 @@ -authorName: default -experimentName: example_FashionMNIST-network-morphism +trialCommand: python3 FashionMNIST_keras.py +trialGpuNumber: 1 trialConcurrency: 4 -maxExecDuration: 48h -maxTrialNum: 200 -#choice: local, remote, pai -trainingServicePlatform: local -#searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxExperimentDuration: 48h +maxTrialNumber: 200 +searchSpace: {} # search space of NetworkMorphism is provided via classArgs tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, NetworkMorphism - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: NetworkMorphism + name: NetworkMorphism classArgs: - #choice: maximize, minimize - optimize_mode: maximize - #for now, this tuner only supports cv domain - task: cv - #input image width - input_width: 28 - #input image channel - input_channel: 1 - #number of classes - n_output_node: 10 -trial: - command: python3 FashionMNIST_keras.py - codeDir: . - gpuNum: 1 + optimize_mode: maximize # maximize or minimize + task: cv # for now, this tuner only supports cv domain + input_width: 28 # input image width + input_channel: 1 # input image channel + n_output_node: 10 # number of classes +trainingService: + platform: local + useActiveGpu: false # NOTE: Use "true" if you are using an OS with graphical interface (e.g. Windows 10, Ubuntu desktop) + # Check the doc for details: https://nni.readthedocs.io/en/latest/reference/experiment_config.html#useactivegpu diff --git a/examples/trials/network_morphism/FashionMNIST/config_pai.yml b/examples/trials/network_morphism/FashionMNIST/config_pai.yml deleted file mode 100644 index 935180163c..0000000000 --- a/examples/trials/network_morphism/FashionMNIST/config_pai.yml +++ /dev/null @@ -1,42 +0,0 @@ -authorName: default -experimentName: example_FashionMNIST-network-morphism -trialConcurrency: 1 -maxExecDuration: 24h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: pai -#choice: true, false -useAnnotation: false -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, NetworkMorphism - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: NetworkMorphism - classArgs: - #choice: maximize, minimize - optimize_mode: maximize - # for now, this tuner only supports cv domain - task: cv - #input image width - input_width: 28 - #input image channel - input_channel: 1 - #number of classes - n_output_node: 10 -trial: - command: python3 FashionMNIST_keras.py - codeDir: . - gpuNum: 1 - cpuNum: 1 - memoryMB: 8196 - #The docker image to run nni job on pai - image: msranni/nni:latest - nniManagerNFSMountPath: {replace_to_your_nfs_mount_path} - containerNFSMountPath: {replace_to_your_container_mount_path} - paiStorageConfigName: {replace_to_your_storage_config_name} -paiConfig: - #The username to login pai - userName: username - #The token to login pai - token: token - #The host of restful server of pai - host: 10.10.10.10 \ No newline at end of file diff --git a/examples/trials/network_morphism/cifar10/config.yml b/examples/trials/network_morphism/cifar10/config.yml index 35a96bb41c..0e6cc00b6f 100644 --- a/examples/trials/network_morphism/cifar10/config.yml +++ b/examples/trials/network_morphism/cifar10/config.yml @@ -1,29 +1,18 @@ -authorName: default -experimentName: example_cifar10-network-morphism +trialCommand: python3 cifar10_keras.py +trialGpuNumber: 1 trialConcurrency: 4 -maxExecDuration: 48h -maxTrialNum: 200 -#choice: local, remote, pai -trainingServicePlatform: local -#searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxExperimentDuration: 48h +maxTrialNumber: 200 +searchSpace: {} # search space of NetworkMorphism is provided via classArgs tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, NetworkMorphism - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: NetworkMorphism + name: NetworkMorphism classArgs: - #choice: maximize, minimize - optimize_mode: maximize - #for now, this tuner only supports cv domain - task: cv - #input image width - input_width: 32 - #input image channel - input_channel: 3 - #number of classes - n_output_node: 10 -trial: - command: python3 cifar10_keras.py - codeDir: . - gpuNum: 1 + optimize_mode: maximize # maximize or minimize + task: cv # for now, this tuner only supports cv domain + input_width: 32 # input image width + input_channel: 3 # input image channel + n_output_node: 10 # number of classes +trainingService: + platform: local + useActiveGpu: false # NOTE: Use "true" if you are using an OS with graphical interface (e.g. Windows 10, Ubuntu desktop) + # Check the doc for details: https://nni.readthedocs.io/en/latest/reference/experiment_config.html#useactivegpu diff --git a/examples/trials/network_morphism/cifar10/config_pai.yml b/examples/trials/network_morphism/cifar10/config_pai.yml deleted file mode 100644 index a377cc004a..0000000000 --- a/examples/trials/network_morphism/cifar10/config_pai.yml +++ /dev/null @@ -1,42 +0,0 @@ -authorName: default -experimentName: example_cifar10-network-morphism -trialConcurrency: 1 -maxExecDuration: 24h -maxTrialNum: 10 -#choice: local, remote, pai -trainingServicePlatform: pai -#choice: true, false -useAnnotation: false -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, NetworkMorphism - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: NetworkMorphism - classArgs: - #choice: maximize, minimize - optimize_mode: maximize - # for now, this tuner only supports cv domain - task: cv - #input image width - input_width: 32 - #input image channel - input_channel: 3 - #number of classes - n_output_node: 10 -trial: - command: python3 cifar10_keras.py - codeDir: . - gpuNum: 1 - cpuNum: 1 - memoryMB: 8196 - #The docker image to run nni job on pai - image: msranni/nni:latest - nniManagerNFSMountPath: {replace_to_your_nfs_mount_path} - containerNFSMountPath: {replace_to_your_container_mount_path} - paiStorageConfigName: {replace_to_your_storage_config_name} -paiConfig: - #The username to login pai - userName: username - #The token to login pai - token: token - #The host of restful server of pai - host: 10.10.10.10 \ No newline at end of file diff --git a/examples/trials/sklearn/classification/config.yml b/examples/trials/sklearn/classification/config.yml index 2071ee0385..a1e6bec0f6 100644 --- a/examples/trials/sklearn/classification/config.yml +++ b/examples/trials/sklearn/classification/config.yml @@ -1,20 +1,11 @@ -authorName: default -experimentName: example_sklearn-classification +searchSpaceFile: search_space.json +trialCommand: python3 main.py trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 100 -#choice: local, remote -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxTrialNumber: 100 +maxExperimentDuration: 1h tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: maximize -trial: - command: python3 main.py - codeDir: . - gpuNum: 0 \ No newline at end of file +trainingService: # For other platforms, check mnist-pytorch example + platform: local diff --git a/examples/trials/sklearn/classification/config_pai.yml b/examples/trials/sklearn/classification/config_pai.yml deleted file mode 100644 index 764cfbedae..0000000000 --- a/examples/trials/sklearn/classification/config_pai.yml +++ /dev/null @@ -1,35 +0,0 @@ -authorName: default -experimentName: example_sklearn -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 100 -#choice: local, remote, pai -trainingServicePlatform: pai -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner,MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: maximize -trial: - command: python3 main.py - codeDir: . - gpuNum: 0 - cpuNum: 1 - memoryMB: 8196 - #The docker image to run nni job on pai - image: msranni/nni:latest - nniManagerNFSMountPath: {replace_to_your_nfs_mount_path} - containerNFSMountPath: {replace_to_your_container_mount_path} - paiStorageConfigName: {replace_to_your_storage_config_name} -paiConfig: - #The username to login pai - userName: username - #The token to login pai - token: token - #The host of restful server of pai - host: 10.10.10.10 \ No newline at end of file diff --git a/examples/trials/sklearn/regression/config.yml b/examples/trials/sklearn/regression/config.yml index 35f0c5d617..c3fcf52c3e 100644 --- a/examples/trials/sklearn/regression/config.yml +++ b/examples/trials/sklearn/regression/config.yml @@ -1,20 +1,11 @@ -authorName: default -experimentName: example_sklearn-regression +searchSpaceFile: search_space.json +trialCommand: python3 main.py trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 30 -#choice: local, remote -trainingServicePlatform: local -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false +maxTrialNumber: 30 +maxExperimentDuration: 1h tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - builtinTunerName: TPE + name: TPE classArgs: - #choice: maximize, minimize optimize_mode: maximize -trial: - command: python3 main.py - codeDir: . - gpuNum: 0 \ No newline at end of file +trainingService: # For other platforms, check mnist-pytorch example + platform: local diff --git a/examples/trials/sklearn/regression/config_pai.yml b/examples/trials/sklearn/regression/config_pai.yml deleted file mode 100644 index 89c6d49c42..0000000000 --- a/examples/trials/sklearn/regression/config_pai.yml +++ /dev/null @@ -1,35 +0,0 @@ -authorName: default -experimentName: example_sklearn -trialConcurrency: 1 -maxExecDuration: 1h -maxTrialNum: 100 -#choice: local, remote, pai -trainingServicePlatform: pai -searchSpacePath: search_space.json -#choice: true, false -useAnnotation: false -tuner: - #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner - #SMAC (SMAC should be installed through nnictl) - builtinTunerName: TPE - classArgs: - #choice: maximize, minimize - optimize_mode: maximize -trial: - command: python3 main.py - codeDir: . - gpuNum: 0 - cpuNum: 1 - memoryMB: 8196 - #The docker image to run nni job on pai - image: msranni/nni:latest - nniManagerNFSMountPath: {replace_to_your_nfs_mount_path} - containerNFSMountPath: {replace_to_your_container_mount_path} - paiStorageConfigName: {replace_to_your_storage_config_name} -paiConfig: - #The username to login pai - userName: username - #The token to login pai - token: token - #The host of restful server of pai - host: 10.10.10.10 \ No newline at end of file diff --git a/nni/experiment/config/base.py b/nni/experiment/config/base.py index ac7b6024c1..d1fc92a26f 100644 --- a/nni/experiment/config/base.py +++ b/nni/experiment/config/base.py @@ -124,7 +124,7 @@ def validate(self) -> None: type_name = str(field.type).replace('typing.', '') optional = any([ type_name.startswith('Optional['), - type_name.startswith('Union[') and 'NoneType' in type_name, + type_name.startswith('Union[') and 'None' in type_name, type_name == 'Any' ]) if value is None: diff --git a/nni/experiment/config/common.py b/nni/experiment/config/common.py index 3937c16bbf..c7cd64a7da 100644 --- a/nni/experiment/config/common.py +++ b/nni/experiment/config/common.py @@ -29,6 +29,8 @@ def validate(self): super().validate() _validate_algo(self) + _canonical_rules = {'code_directory': util.canonical_path} + @dataclass(init=False) class AlgorithmConfig(_AlgorithmConfig): name: str @@ -37,7 +39,7 @@ class AlgorithmConfig(_AlgorithmConfig): @dataclass(init=False) class CustomAlgorithmConfig(_AlgorithmConfig): class_name: str - class_directory: Optional[PathLike] = '.' + code_directory: Optional[PathLike] = '.' class_args: Optional[Dict[str, Any]] = None @@ -67,7 +69,7 @@ class ExperimentConfig(ConfigBase): debug: bool = False log_level: Optional[str] = None experiment_working_directory: PathLike = '~/nni-experiments' - tuner_gpu_indices: Optional[Union[List[int], str]] = None + tuner_gpu_indices: Union[List[int], str, int, None] = None tuner: Optional[_AlgorithmConfig] = None assessor: Optional[_AlgorithmConfig] = None advisor: Optional[_AlgorithmConfig] = None @@ -137,7 +139,7 @@ def _validation_rules(self): 'trial_code_directory': util.canonical_path, 'max_experiment_duration': lambda value: f'{util.parse_time(value)}s' if value is not None else None, 'experiment_working_directory': util.canonical_path, - 'tuner_gpu_indices': lambda value: [int(idx) for idx in value.split(',')] if isinstance(value, str) else value, + 'tuner_gpu_indices': util.canonical_gpu_indices, 'tuner': lambda config: None if config is None or config.name == '_none_' else config.canonical(), 'assessor': lambda config: None if config is None or config.name == '_none_' else config.canonical(), 'advisor': lambda config: None if config is None or config.name == '_none_' else config.canonical(), diff --git a/nni/experiment/config/convert.py b/nni/experiment/config/convert.py index 06db0670f7..0b4b332d08 100644 --- a/nni/experiment/config/convert.py +++ b/nni/experiment/config/convert.py @@ -249,13 +249,13 @@ def convert_algo(algo_type, v1, v2): v2_algo = AlgorithmConfig(name=builtin_name, class_args=class_args) else: - class_directory = util.canonical_path(v1_algo.pop('codeDir')) + code_directory = util.canonical_path(v1_algo.pop('codeDir')) class_file_name = v1_algo.pop('classFileName') assert class_file_name.endswith('.py') class_name = class_file_name[:-3] + '.' + v1_algo.pop('className') v2_algo = CustomAlgorithmConfig( class_name=class_name, - class_directory=class_directory, + code_directory=code_directory, class_args=class_args ) diff --git a/nni/experiment/config/local.py b/nni/experiment/config/local.py index 01654c7231..90b92093fd 100644 --- a/nni/experiment/config/local.py +++ b/nni/experiment/config/local.py @@ -5,6 +5,7 @@ from typing import List, Optional, Union from .common import TrainingServiceConfig +from . import util __all__ = ['LocalConfig'] @@ -13,10 +14,10 @@ class LocalConfig(TrainingServiceConfig): platform: str = 'local' use_active_gpu: Optional[bool] = None max_trial_number_per_gpu: int = 1 - gpu_indices: Optional[Union[List[int], str]] = None + gpu_indices: Union[List[int], str, int, None] = None _canonical_rules = { - 'gpu_indices': lambda value: [int(idx) for idx in value.split(',')] if isinstance(value, str) else value + 'gpu_indices': util.canonical_gpu_indices } _validation_rules = { diff --git a/nni/experiment/config/remote.py b/nni/experiment/config/remote.py index d2ee34eff5..29c47fcaa8 100644 --- a/nni/experiment/config/remote.py +++ b/nni/experiment/config/remote.py @@ -22,12 +22,12 @@ class RemoteMachineConfig(ConfigBase): ssh_passphrase: Optional[str] = None use_active_gpu: bool = False max_trial_number_per_gpu: int = 1 - gpu_indices: Optional[Union[List[int], str]] = None + gpu_indices: Union[List[int], str, int, None] = None python_path: Optional[str] = None _canonical_rules = { 'ssh_key_file': util.canonical_path, - 'gpu_indices': lambda value: [int(idx) for idx in value.split(',')] if isinstance(value, str) else value, + 'gpu_indices': util.canonical_gpu_indices } _validation_rules = { diff --git a/nni/experiment/config/util.py b/nni/experiment/config/util.py index fa81aedeaa..62a56f6b00 100644 --- a/nni/experiment/config/util.py +++ b/nni/experiment/config/util.py @@ -92,3 +92,10 @@ def _parse_unit(string, target_unit, all_units): value = float(number) * factor return math.ceil(value / all_units[target_unit]) raise ValueError(f'Unsupported unit in "{string}"') + +def canonical_gpu_indices(indices: Union[List[int], str, int, None]) -> Optional[List[int]]: + if isinstance(indices, str): + return [int(idx) for idx in indices.split(',')] + if isinstance(indices, int): + return [indices] + return indices diff --git a/nni/tools/nnictl/launcher.py b/nni/tools/nnictl/launcher.py index 16e88307e5..9144bc0267 100644 --- a/nni/tools/nnictl/launcher.py +++ b/nni/tools/nnictl/launcher.py @@ -403,9 +403,13 @@ def launch_experiment(args, experiment_config, mode, experiment_id, config_versi if not os.path.isdir(path): os.makedirs(path) path = tempfile.mkdtemp(dir=path) - nas_mode = experiment_config['trial'].get('nasMode', 'classic_mode') - code_dir = expand_annotations(experiment_config['trial']['codeDir'], path, nas_mode=nas_mode) - experiment_config['trial']['codeDir'] = code_dir + if config_version == 1: + nas_mode = experiment_config['trial'].get('nasMode', 'classic_mode') + code_dir = expand_annotations(experiment_config['trial']['codeDir'], path, nas_mode=nas_mode) + experiment_config['trial']['codeDir'] = code_dir + else: + code_dir = expand_annotations(experiment_config['trialCodeDirectory'], path) + experiment_config['trialCodeDirectory'] = code_dir search_space = generate_search_space(code_dir) experiment_config['searchSpace'] = search_space assert search_space, ERROR_INFO % 'Generated search space is empty' diff --git a/nni/tools/package_utils/__init__.py b/nni/tools/package_utils/__init__.py index 9a86cbe75e..7f8f441965 100644 --- a/nni/tools/package_utils/__init__.py +++ b/nni/tools/package_utils/__init__.py @@ -187,7 +187,7 @@ def create_customized_class_instance(class_params): Returns customized class instance. """ - code_dir = class_params.get('classDirectory') + code_dir = class_params.get('codeDirectory') qualified_class_name = class_params.get('className') class_args = class_params.get('classArgs')