Skip to content

Commit

Permalink
Support save and open experiments (microsoft#2750)
Browse files Browse the repository at this point in the history
(cherry picked from commit d5072a2)
  • Loading branch information
SparkSnail authored and LovPe committed Aug 14, 2020
1 parent 85d6b90 commit bb2ed15
Show file tree
Hide file tree
Showing 6 changed files with 263 additions and 19 deletions.
59 changes: 56 additions & 3 deletions docs/en_US/Tutorial/Nnictl.md
Original file line number Diff line number Diff line change
Expand Up @@ -444,9 +444,6 @@ Debug mode will disable version check function in Trialkeeper.
|--all| False| |delete all of experiments|
<a name="export"></a>
* __nnictl experiment export__
* Description
Expand Down Expand Up @@ -531,6 +528,62 @@ Debug mode will disable version check function in Trialkeeper.
nnictl experiment import [experiment_id] -f experiment_data.json
```

* __nnictl experiment save__
* Description

Save nni experiment metadata and code data.

* Usage

```bash
nnictl experiment save [OPTIONS]
```

* Options

|Name, shorthand|Required|Default|Description|
|------|------|------ |------|
|id| True| |The id of the experiment you want to save|
|--path, -p| False| |the folder path to store nni experiment data, default current working directory|
|--saveCodeDir, -s| False| |save codeDir data of the experiment, default False|

* Examples

> save an expeirment

```bash
nnictl experiment save [experiment_id] --saveCodeDir
```

* __nnictl experiment load__
* Description

Load an nni experiment.

* Usage

```bash
nnictl experiment load [OPTIONS]
```

* Options

|Name, shorthand|Required|Default|Description|
|------|------|------ |------|
|--path, -p| True| |the file path of nni package|
|--codeDir, -c| True| |the path of codeDir for loaded experiment, this path will also put the code in the loaded experiment package|
|--logDir, -l| False| |the path of logDir for loaded experiment|

* Examples

> load an expeirment

```bash
nnictl experiment load --path [path] --codeDir [codeDir]
```



<a name="platform"></a>
### Manage platform information

Expand Down
12 changes: 12 additions & 0 deletions tools/nni_cmd/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
import os
import sys
import json
import tempfile
import socket
import string
import random
import ruamel.yaml as yaml
import psutil
from colorama import Fore
Expand Down Expand Up @@ -83,3 +86,12 @@ def check_tensorboard_version():
print_error('import tensorboard error!')
exit(1)

def generate_temp_dir():
'''generate a temp folder'''
def generate_folder_name():
return os.path.join(tempfile.gettempdir(), 'nni', ''.join(random.sample(string.ascii_letters + string.digits, 8)))
temp_dir = generate_folder_name()
while os.path.exists(temp_dir):
temp_dir = generate_folder_name()
os.makedirs(temp_dir)
return temp_dir
8 changes: 4 additions & 4 deletions tools/nni_cmd/config_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@ def __init__(self):
self.experiment_file = os.path.join(NNICTL_HOME_DIR, '.experiment')
self.experiments = self.read_file()

def add_experiment(self, expId, port, time, file_name, platform, experiment_name):
def add_experiment(self, expId, port, startTime, file_name, platform, experiment_name, endTime='N/A', status='INITIALIZED'):
'''set {key:value} paris to self.experiment'''
self.experiments[expId] = {}
self.experiments[expId]['port'] = port
self.experiments[expId]['startTime'] = time
self.experiments[expId]['endTime'] = 'N/A'
self.experiments[expId]['status'] = 'INITIALIZED'
self.experiments[expId]['startTime'] = startTime
self.experiments[expId]['endTime'] = endTime
self.experiments[expId]['status'] = status
self.experiments[expId]['fileName'] = file_name
self.experiments[expId]['platform'] = platform
self.experiments[expId]['experimentName'] = experiment_name
Expand Down
2 changes: 2 additions & 0 deletions tools/nni_cmd/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

NNICTL_HOME_DIR = os.path.join(os.path.expanduser('~'), '.local', 'nnictl')

NNI_HOME_DIR = os.path.join(os.path.expanduser('~'), 'nni-experiments')

ERROR_INFO = 'ERROR: '
NORMAL_INFO = 'INFO: '
WARNING_INFO = 'WARNING: '
Expand Down
35 changes: 25 additions & 10 deletions tools/nni_cmd/nnictl.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
from .nnictl_utils import stop_experiment, trial_ls, trial_kill, list_experiment, experiment_status,\
log_trial, experiment_clean, platform_clean, experiment_list, \
monitor_experiment, export_trials_data, trial_codegen, webui_url, \
get_config, log_stdout, log_stderr, search_space_auto_gen, webui_nas
get_config, log_stdout, log_stderr, search_space_auto_gen, webui_nas, \
save_experiment, load_experiment
from .package_management import package_install, package_uninstall, package_show, package_list
from .constants import DEFAULT_REST_PORT
from .tensorboard_utils import start_tensorboard, stop_tensorboard
Expand Down Expand Up @@ -129,15 +130,6 @@ def parse_args():
parser_experiment_clean.add_argument('id', nargs='?', help='the id of experiment')
parser_experiment_clean.add_argument('--all', action='store_true', default=False, help='delete all of experiments')
parser_experiment_clean.set_defaults(func=experiment_clean)

#parse experiment command
parser_platform = subparsers.add_parser('platform', help='get platform information')
#add subparsers for parser_experiment
parser_platform_subparsers = parser_platform.add_subparsers()
parser_platform_clean = parser_platform_subparsers.add_parser('clean', help='clean up the platform data')
parser_platform_clean.add_argument('--config', '-c', required=True, dest='config', help='the path of yaml config file')
parser_platform_clean.set_defaults(func=platform_clean)

#import tuning data
parser_import_data = parser_experiment_subparsers.add_parser('import', help='import additional data')
parser_import_data.add_argument('id', nargs='?', help='the id of experiment')
Expand All @@ -149,6 +141,29 @@ def parse_args():
parser_trial_export.add_argument('--type', '-t', choices=['json', 'csv'], required=True, dest='type', help='target file type')
parser_trial_export.add_argument('--filename', '-f', required=True, dest='path', help='target file path')
parser_trial_export.set_defaults(func=export_trials_data)
#save an NNI experiment
parser_save_experiment = parser_experiment_subparsers.add_parser('save', help='save an experiment')
parser_save_experiment.add_argument('id', nargs='?', help='the id of experiment')
parser_save_experiment.add_argument('--path', '-p', required=False, help='the folder path to store nni experiment data, \
default current working directory')
parser_save_experiment.add_argument('--saveCodeDir', '-s', action='store_true', default=False, help='save codeDir data \
of the experiment')
parser_save_experiment.set_defaults(func=save_experiment)
#load an NNI experiment
parser_load_experiment = parser_experiment_subparsers.add_parser('load', help='load an experiment')
parser_load_experiment.add_argument('--path', '-p', required=True, help='the path of nni package file')
parser_load_experiment.add_argument('--codeDir', '-c', required=True, help='the path of codeDir for loaded experiment, \
this path will also put the code in the loaded experiment package')
parser_load_experiment.add_argument('--logDir', '-l', required=False, help='the path of logDir for loaded experiment')
parser_load_experiment.set_defaults(func=load_experiment)

#parse platform command
parser_platform = subparsers.add_parser('platform', help='get platform information')
#add subparsers for parser_platform
parser_platform_subparsers = parser_platform.add_subparsers()
parser_platform_clean = parser_platform_subparsers.add_parser('clean', help='clean up the platform data')
parser_platform_clean.add_argument('--config', '-c', required=True, dest='config', help='the path of yaml config file')
parser_platform_clean.set_defaults(func=platform_clean)

#TODO:finish webui function
#parse board command
Expand Down
166 changes: 164 additions & 2 deletions tools/nni_cmd/nnictl_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
from .rest_utils import rest_get, rest_delete, check_rest_server_quick, check_response
from .url_utils import trial_jobs_url, experiment_url, trial_job_id_url, export_data_url
from .config_utils import Config, Experiments
from .constants import NNICTL_HOME_DIR, EXPERIMENT_INFORMATION_FORMAT, EXPERIMENT_DETAIL_FORMAT, \
from .constants import NNICTL_HOME_DIR, NNI_HOME_DIR, EXPERIMENT_INFORMATION_FORMAT, EXPERIMENT_DETAIL_FORMAT, \
EXPERIMENT_MONITOR_INFO, TRIAL_MONITOR_HEAD, TRIAL_MONITOR_CONTENT, TRIAL_MONITOR_TAIL, REST_TIME_OUT
from .common_utils import print_normal, print_error, print_warning, detect_process, get_yml_content
from .common_utils import print_normal, print_error, print_warning, detect_process, get_yml_content, generate_temp_dir
from .command_utils import check_output_command, kill_command
from .ssh_utils import create_ssh_sftp_client, remove_remote_directory

Expand Down Expand Up @@ -736,3 +736,165 @@ def search_space_auto_gen(args):
print_warning('Expected search space file \'{}\' generated, but not found.'.format(file_path))
else:
print_normal('Generate search space done: \'{}\'.'.format(file_path))

def save_experiment(args):
'''save experiment data to a zip file'''
experiment_config = Experiments()
experiment_dict = experiment_config.get_all_experiments()
if args.id is None:
print_error('Please set experiment id.')
exit(1)
if args.id not in experiment_dict:
print_error('Cannot find experiment {0}.'.format(args.id))
exit(1)
if experiment_dict[args.id].get('status') != 'STOPPED':
print_error('Can only save stopped experiment!')
exit(1)
print_normal('Saving...')
nni_config = Config(experiment_dict[args.id]['fileName'])
logDir = os.path.join(NNI_HOME_DIR, args.id)
if nni_config.get_config('logDir'):
logDir = os.path.join(nni_config.get_config('logDir'), args.id)
temp_root_dir = generate_temp_dir()

# Step1. Copy logDir to temp folder
if not os.path.exists(logDir):
print_error('logDir: %s does not exist!' % logDir)
exit(1)
temp_experiment_dir = os.path.join(temp_root_dir, 'experiment')
shutil.copytree(logDir, temp_experiment_dir)

# Step2. Copy nnictl metadata to temp folder
temp_nnictl_dir = os.path.join(temp_root_dir, 'nnictl')
os.makedirs(temp_nnictl_dir, exist_ok=True)
try:
with open(os.path.join(temp_nnictl_dir, '.experiment'), 'w') as file:
experiment_dict[args.id]['id'] = args.id
json.dump(experiment_dict[args.id], file)
except IOError:
print_error('Write file to %s failed!' % os.path.join(temp_nnictl_dir, '.experiment'))
exit(1)
nnictl_config_dir = os.path.join(NNICTL_HOME_DIR, experiment_dict[args.id]['fileName'])
shutil.copytree(nnictl_config_dir, os.path.join(temp_nnictl_dir, experiment_dict[args.id]['fileName']))

# Step3. Copy code dir
if args.saveCodeDir:
temp_code_dir = os.path.join(temp_root_dir, 'code')
shutil.copytree(nni_config.get_config('experimentConfig')['trial']['codeDir'], temp_code_dir)

# Step4. Archive folder
zip_package_name = 'nni_experiment_%s' % args.id
if args.path:
os.makedirs(args.path, exist_ok=True)
zip_package_name = os.path.join(args.path, zip_package_name)
shutil.make_archive(zip_package_name, 'zip', temp_root_dir)
print_normal('Save to %s.zip success!' % zip_package_name)

# Step5. Cleanup temp data
shutil.rmtree(temp_root_dir)

def load_experiment(args):
'''load experiment data'''
package_path = os.path.expanduser(args.path)
if not os.path.exists(args.path):
print_error('file path %s does not exist!' % args.path)
exit(1)
temp_root_dir = generate_temp_dir()
shutil.unpack_archive(package_path, temp_root_dir)
print_normal('Loading...')
# Step1. Validation
if not os.path.exists(args.codeDir):
print_error('Invalid: codeDir path does not exist!')
exit(1)
if args.logDir:
if not os.path.exists(args.logDir):
print_error('Invalid: logDir path does not exist!')
exit(1)
experiment_temp_dir = os.path.join(temp_root_dir, 'experiment')
if not os.path.exists(os.path.join(experiment_temp_dir, 'db')):
print_error('Invalid archive file: db file does not exist!')
shutil.rmtree(temp_root_dir)
exit(1)
nnictl_temp_dir = os.path.join(temp_root_dir, 'nnictl')
if not os.path.exists(os.path.join(nnictl_temp_dir, '.experiment')):
print_error('Invalid archive file: nnictl metadata file does not exist!')
shutil.rmtree(temp_root_dir)
exit(1)
try:
with open(os.path.join(nnictl_temp_dir, '.experiment'), 'r') as file:
experiment_metadata = json.load(file)
except ValueError as err:
print_error('Invalid nnictl metadata file: %s' % err)
shutil.rmtree(temp_root_dir)
exit(1)
experiment_config = Experiments()
experiment_dict = experiment_config.get_all_experiments()
experiment_id = experiment_metadata.get('id')
if experiment_id in experiment_dict:
print_error('Invalid: experiment id already exist!')
shutil.rmtree(temp_root_dir)
exit(1)
if not os.path.exists(os.path.join(nnictl_temp_dir, experiment_metadata.get('fileName'))):
print_error('Invalid: experiment metadata does not exist!')
shutil.rmtree(temp_root_dir)
exit(1)

# Step2. Copy nnictl metadata
src_path = os.path.join(nnictl_temp_dir, experiment_metadata.get('fileName'))
dest_path = os.path.join(NNICTL_HOME_DIR, experiment_metadata.get('fileName'))
if os.path.exists(dest_path):
shutil.rmtree(dest_path)
shutil.copytree(src_path, dest_path)

# Step3. Copy experiment data
nni_config = Config(experiment_metadata.get('fileName'))
nnictl_exp_config = nni_config.get_config('experimentConfig')
if args.logDir:
logDir = args.logDir
nnictl_exp_config['logDir'] = logDir
else:
if nnictl_exp_config.get('logDir'):
logDir = nnictl_exp_config['logDir']
else:
logDir = NNI_HOME_DIR
os.rename(os.path.join(temp_root_dir, 'experiment'), os.path.join(temp_root_dir, experiment_id))
src_path = os.path.join(os.path.join(temp_root_dir, experiment_id))
dest_path = os.path.join(os.path.join(logDir, experiment_id))
if os.path.exists(dest_path):
shutil.rmtree(dest_path)
shutil.copytree(src_path, dest_path)

# Step4. Copy code dir
codeDir = os.path.expanduser(args.codeDir)
if not os.path.isabs(codeDir):
codeDir = os.path.join(os.getcwd(), codeDir)
print_normal('Expand codeDir to %s' % codeDir)
nnictl_exp_config['trial']['codeDir'] = codeDir
archive_code_dir = os.path.join(temp_root_dir, 'code')
if os.path.exists(archive_code_dir):
file_list = os.listdir(archive_code_dir)
for file_name in file_list:
src_path = os.path.join(archive_code_dir, file_name)
target_path = os.path.join(codeDir, file_name)
if os.path.exists(target_path):
print_error('Copy %s failed, %s exist!' % (file_name, target_path))
continue
if os.path.isdir(src_path):
shutil.copytree(src_path, target_path)
else:
shutil.copy(src_path, target_path)

# Step5. Create experiment metadata
nni_config.set_config('experimentConfig', nnictl_exp_config)
experiment_config.add_experiment(experiment_id,
experiment_metadata.get('port'),
experiment_metadata.get('startTime'),
experiment_metadata.get('fileName'),
experiment_metadata.get('platform'),
experiment_metadata.get('experimentName'),
experiment_metadata.get('endTime'),
experiment_metadata.get('status'))
print_normal('Load experiment %s succsss!' % experiment_id)

# Step6. Cleanup temp data
shutil.rmtree(temp_root_dir)

0 comments on commit bb2ed15

Please sign in to comment.