Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

Fix nnictl bugs and add new feature #75

Merged
merged 14 commits into from
Sep 19, 2018
12 changes: 9 additions & 3 deletions tools/nnicmd/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from .rest_utils import rest_put, rest_post, check_rest_server, check_rest_server_quick
from .url_utils import cluster_metadata_url, experiment_url
from .config_utils import Config
from .common_utils import get_yml_content, get_json_content, print_error, print_normal
from .common_utils import get_yml_content, get_json_content, print_error, print_normal, detect_process
from .constants import EXPERIMENT_SUCCESS_INFO, STDOUT_FULL_PATH, STDERR_FULL_PATH, LOG_DIR, REST_PORT, ERROR_INFO, NORMAL_INFO
from .webui_utils import start_web_ui, check_web_ui

Expand All @@ -40,7 +40,7 @@ def start_rest_server(port, platform, mode, experiment_id=None):
print_normal('Checking experiment...')
nni_config = Config()
rest_port = nni_config.get_config('restServerPort')
if rest_port and check_rest_server_quick(rest_port):
if rest_port and check_rest_server_quick(rest_port)[0]:
print_error('There is an experiment running, please stop it first...')
print_normal('You can use \'nnictl stop\' command to stop an experiment!')
exit(0)
Expand Down Expand Up @@ -122,6 +122,12 @@ def set_experiment(experiment_config, mode, port):
def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=None):
'''follow steps to start rest server and start experiment'''
nni_config = Config()
#Check if there is an experiment running
origin_rest_pid = nni_config.get_config('restServerPid')
if origin_rest_pid and detect_process(origin_rest_pid):
print_error('There is an experiment running, please stop it first...')
print_normal('You can use \'nnictl stop\' command to stop an experiment!')
exit(0)
# start rest server
rest_process = start_rest_server(REST_PORT, experiment_config['trainingServicePlatform'], mode, experiment_id)
nni_config.set_config('restServerPid', rest_process.pid)
Expand All @@ -144,7 +150,7 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No

# check rest server
print_normal('Checking restful server...')
if check_rest_server(REST_PORT):
if check_rest_server(REST_PORT)[0]:
print_normal('Restful server start success!')
else:
print_error('Restful server start failed!')
Expand Down
4 changes: 3 additions & 1 deletion tools/nnicmd/nnictl.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def nni_help_info(*args):

def parse_args():
'''Definite the arguments users need to follow and input'''
parser = argparse.ArgumentParser(prog='nni ctl', description='use nni control')
parser = argparse.ArgumentParser(prog='nnictl', description='use nnictl command to control nni experiments')
parser.set_defaults(func=nni_help_info)

# create subparsers for args with sub values
Expand Down Expand Up @@ -95,6 +95,8 @@ def parse_args():
parser_experiment_subparsers = parser_experiment.add_subparsers()
parser_experiment_show = parser_experiment_subparsers.add_parser('show', help='show the information of experiment')
parser_experiment_show.set_defaults(func=list_experiment)
parser_experiment_status = parser_experiment_subparsers.add_parser('status', help='show the status of experiment')
parser_experiment_status.set_defaults(func=experiment_status)

#parse config command
parser_config = subparsers.add_parser('config', help='get config information')
Expand Down
20 changes: 15 additions & 5 deletions tools/nnicmd/nnictl_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def check_rest(args):
'''check if restful server is running'''
nni_config = Config()
rest_port = nni_config.get_config('restServerPort')
if check_rest_server_quick(rest_port):
if check_rest_server_quick(rest_port)[0]:
print_normal('Restful server is running...')
else:
print_normal('Restful server is not running...')
Expand All @@ -62,7 +62,7 @@ def stop_experiment(args):
print_normal('Experiment is not running...')
stop_web_ui()
return
if check_rest_server_quick(rest_port):
if check_rest_server_quick(rest_port)[0]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do not recommend to use [0], you could add one function, which only returns the first value :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for example, get_rest_server_return_code(), something like that.

response = rest_delete(experiment_url(rest_port), 20)
if not response or response.status_code != 200:
print_error('Stop experiment failed!')
Expand All @@ -82,7 +82,7 @@ def trial_ls(args):
if not detect_process(rest_pid):
print_error('Experiment is not running...')
return
if check_rest_server_quick(rest_port):
if check_rest_server_quick(rest_port)[0]:
response = rest_get(trial_jobs_url(rest_port), 20)
if response and response.status_code == 200:
content = json.loads(response.text)
Expand All @@ -102,7 +102,7 @@ def trial_kill(args):
if not detect_process(rest_pid):
print_error('Experiment is not running...')
return
if check_rest_server_quick(rest_port):
if check_rest_server_quick(rest_port)[0]:
response = rest_delete(trial_job_id_url(rest_port, args.trialid), 20)
if response and response.status_code == 200:
print(response.text)
Expand All @@ -119,7 +119,7 @@ def list_experiment(args):
if not detect_process(rest_pid):
print_error('Experiment is not running...')
return
if check_rest_server_quick(rest_port):
if check_rest_server_quick(rest_port)[0]:
response = rest_get(experiment_url(rest_port), 20)
if response and response.status_code == 200:
content = convert_time_stamp_to_date(json.loads(response.text))
Expand All @@ -129,6 +129,16 @@ def list_experiment(args):
else:
print_error('Restful server is not running...')

def experiment_status(args):
'''Show the status of experiment'''
nni_config = Config()
rest_port = nni_config.get_config('restServerPort')
result, response = check_rest_server_quick(rest_port)
if not result:
print_normal('Restful server is not running...')
else:
print(json.dumps(json.loads(response.text), indent=4, sort_keys=True, separators=(',', ':')))

def get_log_content(file_name, cmds):
'''use cmds to read config content'''
if os.path.exists(file_name):
Expand Down
10 changes: 5 additions & 5 deletions tools/nnicmd/rest_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,16 @@ def check_rest_server(rest_port):
response = rest_get(check_status_url(rest_port), 20)
if response:
if response.status_code == 200:
return True
return True, response
else:
return False
return False, response
else:
time.sleep(3)
return False
return False, response

def check_rest_server_quick(rest_port):
'''Check if restful server is ready, only check once'''
response = rest_get(check_status_url(rest_port), 5)
if response and response.status_code == 200:
return True
return False
return True, response
return False, None
2 changes: 1 addition & 1 deletion tools/nnicmd/updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def update_experiment_profile(key, value):
'''call restful server to update experiment profile'''
nni_config = Config()
rest_port = nni_config.get_config('restServerPort')
if check_rest_server_quick(rest_port):
if check_rest_server_quick(rest_port)[0]:
response = rest_get(experiment_url(rest_port), 20)
if response and response.status_code == 200:
experiment_profile = json.loads(response.text)
Expand Down