From de7b268524d6d497370b8df2dbcf05e6ce7039b0 Mon Sep 17 00:00:00 2001 From: Lijiaoa <61399850+Lijiaoa@users.noreply.github.com> Date: Fri, 5 Feb 2021 17:11:35 +0800 Subject: [PATCH 1/4] Support show trial details by clicking the point in trial results graph (#3352) Co-authored-by: Lijiao --- ts/webui/src/App.tsx | 30 ++++++++++++-- ts/webui/src/components/Overview.tsx | 41 +++---------------- ts/webui/src/components/TrialsDetail.tsx | 7 +++- .../overview/table/SuccessTable.tsx | 25 ++++------- .../trial-detail/DefaultMetricPoint.tsx | 15 ++++--- 5 files changed, 56 insertions(+), 62 deletions(-) diff --git a/ts/webui/src/App.tsx b/ts/webui/src/App.tsx index aa89d4af69..00068888b0 100644 --- a/ts/webui/src/App.tsx +++ b/ts/webui/src/App.tsx @@ -25,6 +25,7 @@ interface AppState { expWarningMessage: string; bestTrialEntries: string; // for overview page: best trial entreis isUpdate: boolean; + expandRowIDs: Set; } export const AppContext = React.createContext({ @@ -35,6 +36,7 @@ export const AppContext = React.createContext({ metricGraphMode: 'max', bestTrialEntries: '10', maxDurationUnit: 'm', + expandRowIDs: new Set(['']), // eslint-disable-next-line @typescript-eslint/no-empty-function changeColumn: (_val: string[]): void => {}, // eslint-disable-next-line @typescript-eslint/no-empty-function @@ -44,7 +46,9 @@ export const AppContext = React.createContext({ // eslint-disable-next-line @typescript-eslint/no-empty-function changeEntries: (_val: string): void => {}, // eslint-disable-next-line @typescript-eslint/no-empty-function - updateOverviewPage: () => {} + updateOverviewPage: () => {}, + // eslint-disable-next-line @typescript-eslint/no-empty-function + changeExpandRowIDs: (_val: string, _type?: string): void => {} }); class App extends React.Component<{}, AppState> { @@ -62,7 +66,8 @@ class App extends React.Component<{}, AppState> { isillegalFinal: false, expWarningMessage: '', bestTrialEntries: '10', - isUpdate: true + isUpdate: true, + expandRowIDs: new Set() }; } @@ -94,6 +99,20 @@ class App extends React.Component<{}, AppState> { this.setState({ columnList: columnList }); }; + changeExpandRowIDs = (id: string, type?: string): void => { + const currentExpandRowIDs = this.state.expandRowIDs; + + if (!currentExpandRowIDs.has(id)) { + currentExpandRowIDs.add(id); + } else { + if (!(type !== undefined && type === 'chart')) { + currentExpandRowIDs.delete(id); + } + } + + this.setState({ expandRowIDs: currentExpandRowIDs }); + }; + changeMetricGraphMode = (val: 'max' | 'min'): void => { this.setState({ metricGraphMode: val }); }; @@ -132,7 +151,8 @@ class App extends React.Component<{}, AppState> { isillegalFinal, expWarningMessage, bestTrialEntries, - maxDurationUnit + maxDurationUnit, + expandRowIDs } = this.state; if (experimentUpdateBroadcast === 0 || trialsUpdateBroadcast === 0) { return null; // TODO: render a loading page @@ -186,7 +206,9 @@ class App extends React.Component<{}, AppState> { changeMetricGraphMode: this.changeMetricGraphMode, bestTrialEntries, changeEntries: this.changeEntries, - updateOverviewPage: this.updateOverviewPage + updateOverviewPage: this.updateOverviewPage, + expandRowIDs, + changeExpandRowIDs: this.changeExpandRowIDs }} > {this.props.children} diff --git a/ts/webui/src/components/Overview.tsx b/ts/webui/src/components/Overview.tsx index 0fe01f9ed8..58ce24a3e7 100644 --- a/ts/webui/src/components/Overview.tsx +++ b/ts/webui/src/components/Overview.tsx @@ -70,8 +70,10 @@ class Overview extends React.Component<{}, OverviewState> { metricGraphMode, bestTrialEntries, maxDurationUnit, + expandRowIDs, updateOverviewPage, - changeMaxDurationUnit + changeMaxDurationUnit, + changeExpandRowIDs } = value; const maxActive = metricGraphMode === 'max' ? 'active' : ''; const minActive = metricGraphMode === 'min' ? 'active' : ''; @@ -169,10 +171,13 @@ class Overview extends React.Component<{}, OverviewState> { trialIds={bestTrials.map(trial => trial.info.trialJobId)} chartHeight={300} hasBestCurve={false} + changeExpandRowIDs={changeExpandRowIDs} /> trial.info.trialJobId)} updateOverviewPage={updateOverviewPage} + expandRowIDs={expandRowIDs} + changeExpandRowIDs={changeExpandRowIDs} /> @@ -198,40 +203,6 @@ class Overview extends React.Component<{}, OverviewState> { } return bestTrials; } - - private generateAccuracyGraph(bestTrials: Trial[]): object { - const xSequence = bestTrials.map(trial => trial.sequenceId); - const ySequence = bestTrials.map(trial => trial.accuracy); - - return { - // support max show 0.0000000 - grid: { - x: 60, - y: 40 - }, - tooltip: { - trigger: 'item' - }, - xAxis: { - name: 'Trial', - type: 'category', - data: xSequence - }, - yAxis: { - name: 'Default metric', - type: 'value', - scale: true, - data: ySequence - }, - series: [ - { - symbolSize: 6, - type: 'scatter', - data: ySequence - } - ] - }; - } } export default Overview; diff --git a/ts/webui/src/components/TrialsDetail.tsx b/ts/webui/src/components/TrialsDetail.tsx index fdb3426b4f..4d8d119d72 100644 --- a/ts/webui/src/components/TrialsDetail.tsx +++ b/ts/webui/src/components/TrialsDetail.tsx @@ -52,7 +52,12 @@ class TrialsDetail extends React.Component<{}, TrialDetailState> { {/* doesn't work*/} - + {/* */} diff --git a/ts/webui/src/components/overview/table/SuccessTable.tsx b/ts/webui/src/components/overview/table/SuccessTable.tsx index fa425d2fc4..a3ad438857 100644 --- a/ts/webui/src/components/overview/table/SuccessTable.tsx +++ b/ts/webui/src/components/overview/table/SuccessTable.tsx @@ -24,14 +24,14 @@ import '../../../static/style/openRow.scss'; interface SuccessTableProps { trialIds: string[]; - // eslint-disable-next-line @typescript-eslint/no-unused-vars updateOverviewPage: () => void; + expandRowIDs: Set; + changeExpandRowIDs: Function; } interface SuccessTableState { columns: IColumn[]; source: Array; - expandRowIdList: Set; sortInfo: SortInfo; } @@ -41,8 +41,7 @@ class SuccessTable extends React.Component this.state = { columns: this.columns, source: TRIALS.table(this.props.trialIds), - sortInfo: { field: '', isDescend: false }, - expandRowIdList: new Set() // store expanded row's trial id + sortInfo: { field: '', isDescend: false } }; } @@ -57,6 +56,7 @@ class SuccessTable extends React.Component const { columns, source, sortInfo } = this.state; const keepSortedSource = copyAndSort(source, sortInfo.field, sortInfo.isDescend); const isNoneData = source.length === 0 ? true : false; + return (
@@ -117,7 +117,7 @@ class SuccessTable extends React.Component styles={{ root: { transition: 'all 0.2s', - transform: `rotate(${this.state.expandRowIdList.has(item.id) ? 90 : 0}deg)` + transform: `rotate(${this.props.expandRowIDs.has(item.id) ? 90 : 0}deg)` } }} className='cursor' @@ -206,14 +206,14 @@ class SuccessTable extends React.Component }; private onRenderRow: IDetailsListProps['onRenderRow'] = props => { - const { expandRowIdList } = this.state; + const { expandRowIDs } = this.props; if (props) { return (
- {Array.from(expandRowIdList).map( + {Array.from(expandRowIDs).map( item => item === props.item.id && )}
@@ -223,15 +223,8 @@ class SuccessTable extends React.Component }; private expandTrialId = (_event: any, id: string): void => { - const { expandRowIdList } = this.state; - const { updateOverviewPage } = this.props; - const copyExpandList = expandRowIdList; - if (copyExpandList.has(id)) { - copyExpandList.delete(id); - } else { - copyExpandList.add(id); - } - this.setState(() => ({ expandRowIdList: copyExpandList })); + const { updateOverviewPage, changeExpandRowIDs } = this.props; + changeExpandRowIDs(id); updateOverviewPage(); }; } diff --git a/ts/webui/src/components/trial-detail/DefaultMetricPoint.tsx b/ts/webui/src/components/trial-detail/DefaultMetricPoint.tsx index 49cbba577a..2cae68a05a 100644 --- a/ts/webui/src/components/trial-detail/DefaultMetricPoint.tsx +++ b/ts/webui/src/components/trial-detail/DefaultMetricPoint.tsx @@ -26,6 +26,7 @@ interface DefaultPointProps { trialIds: string[]; chartHeight: number; hasBestCurve: boolean; + changeExpandRowIDs: Function; } interface DefaultPointState { @@ -57,7 +58,13 @@ class DefaultPoint extends React.Component } }; - generateGraphConfig(maxSequenceId: number): any { + pointClick = (params: any): void => { + if (window.location.pathname === '/oview') { + this.props.changeExpandRowIDs(params.data[2], 'chart'); + } + }; + + generateGraphConfig(_maxSequenceId: number): any { const { startY, endY } = this.state; return { grid: { @@ -67,10 +74,6 @@ class DefaultPoint extends React.Component trigger: 'item', enterable: true, confine: true, // confirm always show tooltip box rather than hidden by background - position: (point: number[], data: TooltipForAccuracy): number[] => [ - data.data[0] < maxSequenceId ? point[0] : point[0] - 300, - 80 - ], formatter: (data: TooltipForAccuracy): React.ReactNode => { return ( '
' + @@ -150,7 +153,7 @@ class DefaultPoint extends React.Component const { hasBestCurve, chartHeight } = this.props; const graph = this.generateGraph(); const accNodata = graph === EmptyGraph ? 'No data' : ''; - const onEvents = { dataZoom: this.metricDataZoom }; + const onEvents = { dataZoom: this.metricDataZoom, click: this.pointClick }; return (
From b698806284292b89c35cbe4cc1dbd4ad2a088f22 Mon Sep 17 00:00:00 2001 From: J-shang <33053116+J-shang@users.noreply.github.com> Date: Fri, 5 Feb 2021 17:20:10 +0800 Subject: [PATCH 2/4] migrate nnicli (#3334) --- docs/en_US/nnicli_ref.rst | 27 -- docs/en_US/sdk_reference.rst | 3 +- nni/experiment/__init__.py | 3 +- nni/experiment/data.py | 135 +++++++ nni/experiment/experiment.py | 229 +++++++++++- nni/experiment/nni_client.py | 509 -------------------------- test/config/integration_tests.yml | 11 - test/config/integration_tests_tf2.yml | 11 - test/config/pr_tests.yml | 11 - test/nni_test/nnitest/validators.py | 4 +- 10 files changed, 354 insertions(+), 589 deletions(-) delete mode 100644 docs/en_US/nnicli_ref.rst create mode 100644 nni/experiment/data.py delete mode 100644 nni/experiment/nni_client.py diff --git a/docs/en_US/nnicli_ref.rst b/docs/en_US/nnicli_ref.rst deleted file mode 100644 index 3b87aaa3b7..0000000000 --- a/docs/en_US/nnicli_ref.rst +++ /dev/null @@ -1,27 +0,0 @@ -NNI Client -========== - -NNI client is a python API of ``nnictl``, which implements the most commonly used commands. Users can use this API to control their experiments, collect experiment results and conduct advanced analyses based on experiment results in python code directly instead of using command line. Here is an example: - -.. code-block:: bash - - from nni.experiment import LegacyExperiment - - # create an experiment instance - exp = LegacyExperiment() - - # start an experiment, then connect the instance to this experiment - # you can also use `resume_experiment`, `view_experiment` or `connect_experiment` - # only one of them should be called in one instance - exp.start_experiment('nni/examples/trials/mnist-pytorch/config.yml', port=9090) - - # update the experiment's concurrency - exp.update_concurrency(3) - - # get some information about the experiment - print(exp.get_experiment_status()) - print(exp.get_job_statistics()) - print(exp.list_trial_jobs()) - - # stop the experiment, then disconnect the instance from the experiment. - exp.stop_experiment() diff --git a/docs/en_US/sdk_reference.rst b/docs/en_US/sdk_reference.rst index 91f7b0b23f..3c4eb9fb57 100644 --- a/docs/en_US/sdk_reference.rst +++ b/docs/en_US/sdk_reference.rst @@ -8,5 +8,4 @@ Python API Reference Auto Tune NAS - Compression Utilities - NNI Client \ No newline at end of file + Compression Utilities \ No newline at end of file diff --git a/nni/experiment/__init__.py b/nni/experiment/__init__.py index 0311372337..d59aec4e71 100644 --- a/nni/experiment/__init__.py +++ b/nni/experiment/__init__.py @@ -3,5 +3,4 @@ from .config import * from .experiment import Experiment - -from .nni_client import * +from .data import * diff --git a/nni/experiment/data.py b/nni/experiment/data.py new file mode 100644 index 0000000000..d58f4671f6 --- /dev/null +++ b/nni/experiment/data.py @@ -0,0 +1,135 @@ +from dataclasses import dataclass +import json +from typing import List + + +@dataclass +class TrialResult: + """ + TrialResult stores the result information of a trial job. + + Attributes + ---------- + parameter: dict + Hyper parameters for this trial. + value: serializable object, usually a number, or a dict with key "default" and other extra keys + Final result. + trialJobId: str + Trial job id. + """ + parameter: dict + value: dict + trialJobId: str + + def __init__(self, parameter: dict, value: str, trialJobId: str): + self.parameter = parameter + self.value = json.loads(value) + self.trialJobId = trialJobId + + +@dataclass +class TrialMetricData: + """ + TrialMetricData stores the metric data of a trial job. + A trial job may have both intermediate metric and final metric. + + Attributes + ---------- + timestamp: int + Time stamp. + trialJobId: str + Trial job id. + parameterId: int + Parameter id. + type: str + Metric type, `PERIODICAL` for intermediate result and `FINAL` for final result. + sequence: int + Sequence number in this trial. + data: serializable object, usually a number, or a dict with key "default" and other extra keys + Metric data. + """ + timestamp: int + trialJobId: str + parameterId: int + type: str + sequence: int + data: dict + + def __init__(self, timestamp: int, trialJobId: str, parameterId: int, type: str, sequence: int, data: str): # pylint: disable=W0622 + self.timestamp = timestamp + self.trialJobId = trialJobId + self.parameterId = parameterId + self.type = type + self.sequence = sequence + self.data = json.loads(json.loads(data)) + + +@dataclass +class TrialHyperParameters: + """ + TrialHyperParameters stores the hyper parameters of a trial job. + + Attributes + ---------- + parameter_id: int + Parameter id. + parameter_source: str + Parameter source. + parameters: dict + Hyper parameters. + parameter_index: int + Parameter index. + """ + parameter_id: int + parameter_source: str + parameters: dict + parameter_index: int + + +@dataclass +class TrialJob: + """ + TrialJob stores the information of a trial job. + + Attributes + ---------- + trialJobId: str + Trial job id. + status: str + Job status. + hyperParameters: list of `nni.experiment.TrialHyperParameters` + See `nni.experiment.TrialHyperParameters`. + logPath: str + Log path. + startTime: int + Job start time (timestamp). + endTime: int + Job end time (timestamp). + finalMetricData: list of `nni.experiment.TrialMetricData` + See `nni.experiment.TrialMetricData`. + stderrPath: str + Stderr log path. + sequenceId: int + Sequence Id. + """ + trialJobId: str + status: str + hyperParameters: List[TrialHyperParameters] + logPath: str + startTime: int + endTime: int + finalMetricData: List[TrialMetricData] + stderrPath: str + sequenceId: int + + def __init__(self, trialJobId: str, status: str, logPath: str, startTime: int, sequenceId: int, + endTime: int = -1, stderrPath: str = '', hyperParameters: List = [], finalMetricData: List = []): + self.trialJobId = trialJobId + self.status = status + self.hyperParameters = [TrialHyperParameters(**json.loads(e)) for e in hyperParameters] + self.logPath = logPath + self.startTime = startTime + self.endTime = endTime + self.finalMetricData = [TrialMetricData(**e) for e in finalMetricData] + self.stderrPath = stderrPath + self.sequenceId = sequenceId diff --git a/nni/experiment/experiment.py b/nni/experiment/experiment.py index 21e9169c93..c8e9e16fa9 100644 --- a/nni/experiment/experiment.py +++ b/nni/experiment/experiment.py @@ -5,7 +5,7 @@ from subprocess import Popen from threading import Thread import time -from typing import Optional, Union, List, overload +from typing import Optional, Union, List, overload, Any import colorama import psutil @@ -15,6 +15,7 @@ from nni.tuner import Tuner from .config import ExperimentConfig +from .data import TrialJob, TrialMetricData, TrialResult from . import launcher from . import management from .pipe import Pipe @@ -76,24 +77,37 @@ def __init__(self, tuner: Tuner, training_service: Union[str, List[str]]) -> Non """ ... - def __init__(self, tuner: Tuner, config=None, training_service=None): - self.config: ExperimentConfig + @overload + def __init__(self) -> None: + """ + Prepare an empty experiment, for `connect_experiment`. + + Use `Experiment.connect_experiment` to manage experiment. + + """ + ... + + def __init__(self, tuner=None, config=None, training_service=None): + self.config: Optional[ExperimentConfig] = None self.id: Optional[str] = None self.port: Optional[int] = None - self.tuner: Tuner = tuner + self.tuner: Optional[Tuner] = None self._proc: Optional[Popen] = None self._pipe: Optional[Pipe] = None self._dispatcher: Optional[MsgDispatcher] = None self._dispatcher_thread: Optional[Thread] = None - if isinstance(config, (str, list)): - config, training_service = None, config + if isinstance(tuner, Tuner): + self.tuner = tuner + if isinstance(config, (str, list)): + config, training_service = None, config - if config is None: - self.config = ExperimentConfig(training_service) + if config is None: + self.config = ExperimentConfig(training_service) + else: + self.config = config else: - self.config = config - + _logger.warning('Tuner not set, wait for connect...') def start(self, port: int = 8080, debug: bool = False) -> None: """ @@ -143,7 +157,6 @@ def start(self, port: int = 8080, debug: bool = False) -> None: def _create_dispatcher(self): # overrided by retiarii, temporary solution return MsgDispatcher(self.tuner, None) - def stop(self) -> None: """ Stop background experiment. @@ -169,7 +182,6 @@ def stop(self) -> None: self._dispatcher_thread = None _logger.info('Experiment stopped') - def run(self, port: int = 8080, debug: bool = False) -> bool: """ Run the experiment. @@ -192,9 +204,198 @@ def run(self, port: int = 8080, debug: bool = False) -> bool: finally: self.stop() + def connect_experiment(self, port: int): + """ + Connect to an existing experiment. - def get_status(self) -> str: + Parameters + ---------- + port + The port of web UI. + """ + self.port = port + self.get_status() + + def _experiment_rest_get(self, port: int, api: str) -> Any: if self.port is None: raise RuntimeError('Experiment is not running') - resp = rest.get(self.port, '/check-status') + return rest.get(self.port, api) + + def _experiment_rest_put(self, port: int, api: str, data: Any): + if self.port is None: + raise RuntimeError('Experiment is not running') + rest.put(self.port, api, data) + + def get_status(self) -> str: + """ + Return experiment status as a str. + + Returns + ------- + str + Experiment status. + """ + resp = self._experiment_rest_get(self.port, '/check-status') return resp['status'] + + def get_trial_job(self, trial_job_id: str): + """ + Return a trial job. + + Parameters + ---------- + trial_job_id: str + Trial job id. + + Returns + ---------- + TrialJob + A `TrialJob` instance corresponding to `trial_job_id`. + """ + resp = self._experiment_rest_get(self.port, '/trial-jobs/{}'.format(trial_job_id)) + return TrialJob(**resp) + + def list_trial_jobs(self): + """ + Return information for all trial jobs as a list. + + Returns + ---------- + list + List of `TrialJob`. + """ + resp = self._experiment_rest_get(self.port, '/trial-jobs') + return [TrialJob(**trial_job) for trial_job in resp] + + def get_job_statistics(self): + """ + Return trial job statistics information as a dict. + + Returns + ---------- + dict + Job statistics information. + """ + resp = self._experiment_rest_get(self.port, '/job-statistics') + return resp + + def get_job_metrics(self, trial_job_id=None): + """ + Return trial job metrics. + + Parameters + ---------- + trial_job_id: str + trial job id. if this parameter is None, all trail jobs' metrics will be returned. + + Returns + ---------- + dict + Each key is a trialJobId, the corresponding value is a list of `TrialMetricData`. + """ + api = '/metric-data/{}'.format(trial_job_id) if trial_job_id else '/metric-data' + resp = self._experiment_rest_get(self.port, api) + metric_dict = {} + for metric in resp: + trial_id = metric["trialJobId"] + if trial_id not in metric_dict: + metric_dict[trial_id] = [TrialMetricData(**metric)] + else: + metric_dict[trial_id].append(TrialMetricData(**metric)) + return metric_dict + + def get_experiment_profile(self): + """ + Return experiment profile as a dict. + + Returns + ---------- + dict + The profile of the experiment. + """ + resp = self._experiment_rest_get(self.port, '/experiment') + return resp + + def export_data(self): + """ + Return exported information for all trial jobs. + + Returns + ---------- + list + List of `TrialResult`. + """ + resp = self._experiment_rest_get(self.port, '/export-data') + return [TrialResult(**trial_result) for trial_result in resp] + + def _get_query_type(self, key: str): + if key == 'trial_concurrency': + return '?update_type=TRIAL_CONCURRENCY' + if key == 'max_experiment_duration': + return '?update_type=MAX_EXEC_DURATION' + if key == 'search_space': + return '?update_type=SEARCH_SPACE' + if key == 'max_trial_number': + return '?update_type=MAX_TRIAL_NUM' + + def _update_experiment_profile(self, key: str, value: Any): + """ + Update an experiment's profile + + Parameters + ---------- + key: str + One of `['trial_concurrency', 'max_experiment_duration', 'search_space', 'max_trial_number']`. + value: Any + New value of the key. + """ + api = '/experiment{}'.format(self._get_query_type(key)) + experiment_profile = self.get_experiment_profile() + experiment_profile['params'][key] = value + self._experiment_rest_put(self.port, api, experiment_profile) + + def update_trial_concurrency(self, value: int): + """ + Update an experiment's trial_concurrency + + Parameters + ---------- + value: int + New trial_concurrency value. + """ + self._update_experiment_profile('trial_concurrency', value) + + def update_max_experiment_duration(self, value: str): + """ + Update an experiment's max_experiment_duration + + Parameters + ---------- + value: str + Strings like '1m' for one minute or '2h' for two hours. + SUFFIX may be 's' for seconds, 'm' for minutes, 'h' for hours or 'd' for days. + """ + self._update_experiment_profile('max_experiment_duration', value) + + def update_search_space(self, value: dict): + """ + Update the experiment's search_space. + TODO: support searchspace file. + + Parameters + ---------- + value: dict + New search_space. + """ + self._update_experiment_profile('search_space', value) + + def update_max_trial_number(self, value): + """ + Update an experiment's max_trial_number + + Parameters + ---------- + value: int + New max_trial_number value. + """ + self._update_experiment_profile('max_trial_number', value) diff --git a/nni/experiment/nni_client.py b/nni/experiment/nni_client.py deleted file mode 100644 index e3dd8b1673..0000000000 --- a/nni/experiment/nni_client.py +++ /dev/null @@ -1,509 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -""" A python wrapper for nni rest api - -Example: - -from nni.experiment import Experiment - -exp = Experiment() -exp.start_experiment('../../../../examples/trials/mnist-pytorch/config.yml') - -exp.update_concurrency(3) - -print(exp.get_experiment_status()) -print(exp.get_job_statistics()) -print(exp.list_trial_jobs()) - -exp.stop_experiment() - -""" - -import sys -import os -import subprocess -import re -import json -import requests - -__all__ = [ - 'LegacyExperiment', - 'TrialResult', - 'TrialMetricData', - 'TrialHyperParameters', - 'TrialJob' -] - -EXPERIMENT_PATH = 'experiment' -STATUS_PATH = 'check-status' -JOB_STATISTICS_PATH = 'job-statistics' -TRIAL_JOBS_PATH = 'trial-jobs' -METRICS_PATH = 'metric-data' -EXPORT_DATA_PATH = 'export-data' -API_ROOT_PATH = 'api/v1/nni' - -def _nni_rest_get(endpoint, api_path, response_type='json'): - _check_endpoint(endpoint) - uri = '{}/{}/{}'.format(endpoint.strip('/'), API_ROOT_PATH, api_path) - res = requests.get(uri) - if _http_succeed(res.status_code): - if response_type == 'json': - return res.json() - elif response_type == 'text': - return res.text - else: - raise RuntimeError('Incorrect response_type') - else: - return None - -def _http_succeed(status_code): - return status_code // 100 == 2 - -def _create_process(cmd): - if sys.platform == 'win32': - process = subprocess.Popen(cmd, stdout=subprocess.PIPE, creationflags=subprocess.CREATE_NEW_PROCESS_GROUP) - else: - process = subprocess.Popen(cmd, stdout=subprocess.PIPE) - - while process.poll() is None: - output = process.stdout.readline() - if output: - print(output.decode('utf-8').strip()) - return process.returncode - -def _check_endpoint(endpoint): - if endpoint is None: - raise RuntimeError("This instance hasn't been connect to an experiment.") - -class TrialResult: - """ - TrialResult stores the result information of a trial job. - - Parameters - ---------- - json_obj: dict - Json object that stores the result information. - - Attributes - ---------- - parameter: dict - Hyper parameters for this trial. - value: serializable object, usually a number, or a dict with key "default" and other extra keys - Final result. - trialJobId: str - Trial job id. - """ - def __init__(self, json_obj): - self.parameter = None - self.value = None - self.trialJobId = None - for key in json_obj.keys(): - setattr(self, key, json_obj[key]) - self.value = json.loads(self.value) - - def __repr__(self): - return "TrialResult(parameter: {} value: {} trialJobId: {})".format(self.parameter, self.value, self.trialJobId) - -class TrialMetricData: - """ - TrialMetricData stores the metric data of a trial job. - A trial job may have both intermediate metric and final metric. - - Parameters - ---------- - json_obj: dict - Json object that stores the metric data. - - Attributes - ---------- - timestamp: int - Time stamp. - trialJobId: str - Trial job id. - parameterId: int - Parameter id. - type: str - Metric type, `PERIODICAL` for intermediate result and `FINAL` for final result. - sequence: int - Sequence number in this trial. - data: serializable object, usually a number, or a dict with key "default" and other extra keys - Metric data. - """ - def __init__(self, json_obj): - self.timestamp = None - self.trialJobId = None - self.parameterId = None - self.type = None - self.sequence = None - self.data = None - for key in json_obj.keys(): - setattr(self, key, json_obj[key]) - self.data = json.loads(json.loads(self.data)) - - def __repr__(self): - return "TrialMetricData(timestamp: {} trialJobId: {} parameterId: {} type: {} sequence: {} data: {})" \ - .format(self.timestamp, self.trialJobId, self.parameterId, self.type, self.sequence, self.data) - -class TrialHyperParameters: - """ - TrialHyperParameters stores the hyper parameters of a trial job. - - Parameters - ---------- - json_obj: dict - Json object that stores the hyper parameters. - - Attributes - ---------- - parameter_id: int - Parameter id. - parameter_source: str - Parameter source. - parameters: dict - Hyper parameters. - parameter_index: int - Parameter index. - """ - def __init__(self, json_obj): - self.parameter_id = None - self.parameter_source = None - self.parameters = None - self.parameter_index = None - for key in json_obj.keys(): - if hasattr(self, key): - setattr(self, key, json_obj[key]) - - def __repr__(self): - return "TrialHyperParameters(parameter_id: {} parameter_source: {} parameters: {} parameter_index: {})" \ - .format(self.parameter_id, self.parameter_source, self.parameters, self.parameter_index) - -class TrialJob: - """ - TrialJob stores the information of a trial job. - - Parameters - ---------- - json_obj: dict - json object that stores the hyper parameters - - Attributes - ---------- - trialJobId: str - Trial job id. - status: str - Job status. - hyperParameters: list of `nni.experiment.TrialHyperParameters` - See `nni.experiment.TrialHyperParameters`. - logPath: str - Log path. - startTime: int - Job start time (timestamp). - endTime: int - Job end time (timestamp). - finalMetricData: list of `nni.experiment.TrialMetricData` - See `nni.experiment.TrialMetricData`. - parameter_index: int - Parameter index. - """ - def __init__(self, json_obj): - self.trialJobId = None - self.status = None - self.hyperParameters = None - self.logPath = None - self.startTime = None - self.endTime = None - self.finalMetricData = None - self.stderrPath = None - for key in json_obj.keys(): - setattr(self, key, json_obj[key]) - if self.hyperParameters: - self.hyperParameters = [TrialHyperParameters(json.loads(e)) for e in self.hyperParameters] - if self.finalMetricData: - self.finalMetricData = [TrialMetricData(e) for e in self.finalMetricData] - - def __repr__(self): - return ("TrialJob(trialJobId: {} status: {} hyperParameters: {} logPath: {} startTime: {} " - "endTime: {} finalMetricData: {} stderrPath: {})") \ - .format(self.trialJobId, self.status, self.hyperParameters, self.logPath, - self.startTime, self.endTime, self.finalMetricData, self.stderrPath) - -class LegacyExperiment: - def __init__(self): - self._endpoint = None - self._exp_id = None - self._port = None - - @property - def endpoint(self): - return self._endpoint - - @property - def exp_id(self): - return self._exp_id - - @property - def port(self): - return self._port - - def _exec_command(self, cmd, port=None): - if self._endpoint is not None: - raise RuntimeError('This instance has been connected to an experiment.') - if _create_process(cmd) != 0: - raise RuntimeError('Failed to establish experiment, please check your config.') - else: - if port: - self._port = port - else: - self._port = 8080 - self._endpoint = 'http://localhost:{}'.format(self._port) - self._exp_id = self.get_experiment_profile()['id'] - - def start_experiment(self, config_file, port=None, debug=False): - """ - Start an experiment with specified configuration file and connect to it. - - Parameters - ---------- - config_file: str - Path to the config file. - port: int - The port of restful server, bigger than 1024. - debug: boolean - Set debug mode. - """ - cmd = 'nnictl create --config {}'.format(config_file).split(' ') - if port: - cmd += '--port {}'.format(port).split(' ') - if debug: - cmd += ['--debug'] - self._exec_command(cmd, port) - - def resume_experiment(self, exp_id, port=None, debug=False): - """ - Resume a stopped experiment with specified experiment id - - Parameters - ---------- - exp_id: str - Experiment id. - port: int - The port of restful server, bigger than 1024. - debug: boolean - Set debug mode. - """ - cmd = 'nnictl resume {}'.format(exp_id).split(' ') - if port: - cmd += '--port {}'.format(port).split(' ') - if debug: - cmd += ['--debug'] - self._exec_command(cmd, port) - - def view_experiment(self, exp_id, port=None): - """ - View a stopped experiment with specified experiment id. - - Parameters - ---------- - exp_id: str - Experiment id. - port: int - The port of restful server, bigger than 1024. - """ - cmd = 'nnictl view {}'.format(exp_id).split(' ') - if port: - cmd += '--port {}'.format(port).split(' ') - self._exec_command(cmd, port) - - def connect_experiment(self, endpoint): - """ - Connect to an existing experiment. - - Parameters - ---------- - endpoint: str - The endpoint of nni rest server, i.e, the url of Web UI. Should be a format like `http://ip:port`. - """ - if self._endpoint is not None: - raise RuntimeError('This instance has been connected to an experiment.') - self._endpoint = endpoint - try: - self._exp_id = self.get_experiment_profile()['id'] - except TypeError: - raise RuntimeError('Invalid experiment endpoint.') - self._port = int(re.search(r':[0-9]+', self._endpoint).group().replace(':', '')) - - def stop_experiment(self): - """Stop the experiment. - """ - _check_endpoint(self._endpoint) - cmd = 'nnictl stop {}'.format(self._exp_id).split(' ') - if _create_process(cmd) != 0: - raise RuntimeError('Failed to stop experiment.') - self._endpoint = None - self._exp_id = None - self._port = None - - def update_searchspace(self, filename): - """ - Update the experiment's search space. - - Parameters - ---------- - filename: str - Path to the searchspace file. - """ - _check_endpoint(self._endpoint) - cmd = 'nnictl update searchspace {} --filename {}'.format(self._exp_id, filename).split(' ') - if _create_process(cmd) != 0: - raise RuntimeError('Failed to update searchspace.') - - def update_concurrency(self, value): - """ - Update an experiment's concurrency - - Parameters - ---------- - value: int - New concurrency value. - """ - _check_endpoint(self._endpoint) - cmd = 'nnictl update concurrency {} --value {}'.format(self._exp_id, value).split(' ') - if _create_process(cmd) != 0: - raise RuntimeError('Failed to update concurrency.') - - def update_duration(self, value): - """ - Update an experiment's duration - - Parameters - ---------- - value: str - Strings like '1m' for one minute or '2h' for two hours. - SUFFIX may be 's' for seconds, 'm' for minutes, 'h' for hours or 'd' for days. - """ - _check_endpoint(self._endpoint) - cmd = 'nnictl update duration {} --value {}'.format(self._exp_id, value).split(' ') - if _create_process(cmd) != 0: - raise RuntimeError('Failed to update duration.') - - def update_trailnum(self, value): - """ - Update an experiment's maxtrialnum - - Parameters - ---------- - value: int - New trailnum value. - """ - _check_endpoint(self._endpoint) - cmd = 'nnictl update trialnum {} --value {}'.format(self._exp_id, value).split(' ') - if _create_process(cmd) != 0: - raise RuntimeError('Failed to update trailnum.') - - def get_experiment_status(self): - """ - Return experiment status as a dict. - - Returns - ---------- - dict - Experiment status. - """ - _check_endpoint(self._endpoint) - return _nni_rest_get(self._endpoint, STATUS_PATH) - - def get_trial_job(self, trial_job_id): - """ - Return a trial job. - - Parameters - ---------- - trial_job_id: str - Trial job id. - - Returns - ---------- - nnicli.TrialJob - A `nnicli.TrialJob` instance corresponding to `trial_job_id`. - """ - _check_endpoint(self._endpoint) - assert trial_job_id is not None - trial_job = _nni_rest_get(self._endpoint, os.path.join(TRIAL_JOBS_PATH, trial_job_id)) - return TrialJob(trial_job) - - def list_trial_jobs(self): - """ - Return information for all trial jobs as a list. - - Returns - ---------- - list - List of `nnicli.TrialJob`. - """ - _check_endpoint(self._endpoint) - trial_jobs = _nni_rest_get(self._endpoint, TRIAL_JOBS_PATH) - return [TrialJob(e) for e in trial_jobs] - - def get_job_statistics(self): - """ - Return trial job statistics information as a dict. - - Returns - ---------- - list - Job statistics information. - """ - _check_endpoint(self._endpoint) - return _nni_rest_get(self._endpoint, JOB_STATISTICS_PATH) - - def get_job_metrics(self, trial_job_id=None): - """ - Return trial job metrics. - - Parameters - ---------- - trial_job_id: str - trial job id. if this parameter is None, all trail jobs' metrics will be returned. - - Returns - ---------- - dict - Each key is a trialJobId, the corresponding value is a list of `nnicli.TrialMetricData`. - """ - _check_endpoint(self._endpoint) - api_path = METRICS_PATH if trial_job_id is None else os.path.join(METRICS_PATH, trial_job_id) - output = {} - trail_metrics = _nni_rest_get(self._endpoint, api_path) - for metric in trail_metrics: - trial_id = metric["trialJobId"] - if trial_id not in output: - output[trial_id] = [TrialMetricData(metric)] - else: - output[trial_id].append(TrialMetricData(metric)) - return output - - def export_data(self): - """ - Return exported information for all trial jobs. - - Returns - ---------- - list - List of `nnicli.TrialResult`. - """ - _check_endpoint(self._endpoint) - trial_results = _nni_rest_get(self._endpoint, EXPORT_DATA_PATH) - return [TrialResult(e) for e in trial_results] - - def get_experiment_profile(self): - """ - Return experiment profile as a dict. - - Returns - ---------- - dict - The profile of the experiment. - """ - _check_endpoint(self._endpoint) - return _nni_rest_get(self._endpoint, EXPERIMENT_PATH) diff --git a/test/config/integration_tests.yml b/test/config/integration_tests.yml index c833597d37..e14be4d7d9 100644 --- a/test/config/integration_tests.yml +++ b/test/config/integration_tests.yml @@ -142,17 +142,6 @@ testCases: kwargs: import_data_file_path: config/nnictl_experiment/test_import.json -- name: nnicli - configFile: test/config/examples/sklearn-regression.yml - config: - maxTrialNum: 4 - trialConcurrency: 4 - launchCommand: python3 -c 'from nni.experiment import LegacyExperiment as Experiment; exp = Experiment(); exp.start_experiment("$configFile")' - stopCommand: python3 -c 'from nni.experiment import LegacyExperiment as Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()' - validator: - class: NnicliValidator - platform: linux darwin - - name: foreground configFile: test/config/examples/sklearn-regression.yml launchCommand: python3 nni_test/nnitest/foreground.py --config $configFile --timeout 45 diff --git a/test/config/integration_tests_tf2.yml b/test/config/integration_tests_tf2.yml index a9f170944c..92f0b32d30 100644 --- a/test/config/integration_tests_tf2.yml +++ b/test/config/integration_tests_tf2.yml @@ -109,17 +109,6 @@ testCases: validator: class: ExportValidator -- name: nnicli - configFile: test/config/examples/sklearn-regression.yml - config: - maxTrialNum: 4 - trialConcurrency: 4 - launchCommand: python3 -c 'from nni.experiment import LegacyExperiment as Experiment; exp = Experiment(); exp.start_experiment("$configFile")' - stopCommand: python3 -c 'from nni.experiment import LegacyExperiment as Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()' - validator: - class: NnicliValidator - platform: linux darwin - - name: foreground configFile: test/config/examples/sklearn-regression.yml launchCommand: python3 nni_test/nnitest/foreground.py --config $configFile --timeout 45 diff --git a/test/config/pr_tests.yml b/test/config/pr_tests.yml index 3d0563974a..62a313bf4a 100644 --- a/test/config/pr_tests.yml +++ b/test/config/pr_tests.yml @@ -42,17 +42,6 @@ testCases: kwargs: expected_result_file: expected_metrics_dict.json -- name: nnicli - configFile: test/config/examples/sklearn-regression.yml - config: - maxTrialNum: 4 - trialConcurrency: 4 - launchCommand: python3 -c 'from nni.experiment import LegacyExperiment as Experiment; exp = Experiment(); exp.start_experiment("$configFile")' - stopCommand: python3 -c 'from nni.experiment import LegacyExperiment as Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()' - validator: - class: NnicliValidator - platform: linux darwin - - name: multi-thread configFile: test/config/multi_thread/config.yml diff --git a/test/nni_test/nnitest/validators.py b/test/nni_test/nnitest/validators.py index df92bcb962..f4a880857c 100644 --- a/test/nni_test/nnitest/validators.py +++ b/test/nni_test/nnitest/validators.py @@ -6,7 +6,7 @@ import subprocess import json import requests -from nni.experiment import LegacyExperiment as Experiment +from nni.experiment import Experiment from nni.tools.nnictl.updater import load_search_space from utils import METRICS_URL, GET_IMPORTED_DATA_URL @@ -93,7 +93,7 @@ class NnicliValidator(ITValidator): def __call__(self, rest_endpoint, experiment_dir, nni_source_dir, **kwargs): print(rest_endpoint) exp = Experiment() - exp.connect_experiment(rest_endpoint) + exp.connect_experiment(int(rest_endpoint.split(':')[-1])) print(exp.get_job_statistics()) print(exp.get_experiment_status()) print(exp.list_trial_jobs()) From e2c6739745764043fcbf9ced2b609c7e07f541e8 Mon Sep 17 00:00:00 2001 From: SparkSnail Date: Mon, 8 Feb 2021 10:07:36 +0800 Subject: [PATCH 3/4] support local windows in hybrid mode (#3353) --- .../environments/localEnvironmentService.ts | 49 ++++++++++++++----- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/ts/nni_manager/training_service/reusable/environments/localEnvironmentService.ts b/ts/nni_manager/training_service/reusable/environments/localEnvironmentService.ts index e40fd43764..15f31d27b1 100644 --- a/ts/nni_manager/training_service/reusable/environments/localEnvironmentService.ts +++ b/ts/nni_manager/training_service/reusable/environments/localEnvironmentService.ts @@ -12,8 +12,8 @@ import { getLogger, Logger } from '../../../common/log'; import { TrialConfigMetadataKey } from '../../common/trialConfigMetadataKey'; import { EnvironmentInformation, EnvironmentService } from '../environment'; import { TrialConfig } from '../../common/trialConfig'; -import { getExperimentRootDir, isAlive } from '../../../common/utils'; -import { execMkdir, runScript, execCopydir } from '../../common/util'; +import { getExperimentRootDir, isAlive, getNewLine } from '../../../common/utils'; +import { execMkdir, runScript, getScriptName, execCopydir } from '../../common/util'; @component.Singleton export class LocalEnvironmentService extends EnvironmentService { @@ -53,8 +53,8 @@ export class LocalEnvironmentService extends EnvironmentService { public async refreshEnvironmentsStatus(environments: EnvironmentInformation[]): Promise { environments.forEach(async (environment) => { - const jobpidPath: string = `${environment.runnerWorkingFolder}/pid`; - const runnerReturnCodeFilePath: string = `${environment.runnerWorkingFolder}/code`; + const jobpidPath: string = `${path.join(environment.runnerWorkingFolder, 'pid')}`; + const runnerReturnCodeFilePath: string = `${path.join(environment.runnerWorkingFolder, 'code')}`; /* eslint-disable require-atomic-updates */ try { // check if pid file exist @@ -87,6 +87,32 @@ export class LocalEnvironmentService extends EnvironmentService { } }); } + + private getScript(environment: EnvironmentInformation): string[] { + const script: string[] = []; + if (process.platform === 'win32') { + script.push(`cd $env:${this.experimentRootDir}`); + script.push(`New-Item -ItemType "directory" -Path ${path.join(this.experimentRootDir, 'envs', environment.id)} -Force`); + environment.command = `cd envs\\${environment.id} && python -m nni.tools.trial_tool.trial_runner`; + script.push( + `cmd.exe /c ${environment.command} --job_pid_file ${path.join(environment.runnerWorkingFolder, 'pid')} 2>&1 | Out-File "${path.join(environment.runnerWorkingFolder, 'trial_runner.log')}" -encoding utf8`, + `$NOW_DATE = [int64](([datetime]::UtcNow)-(get-date "1/1/1970")).TotalSeconds`, + `$NOW_DATE = "$NOW_DATE" + (Get-Date -Format fff).ToString()`, + `Write $LASTEXITCODE " " $NOW_DATE | Out-File "${path.join(environment.runnerWorkingFolder, 'code')}" -NoNewline -encoding utf8`); + } else { + script.push(`cd ${this.experimentRootDir}`); + script.push(`eval ${environment.command} --job_pid_file ${environment.runnerWorkingFolder}/pid 1>${environment.runnerWorkingFolder}/trialrunner_stdout 2>${environment.runnerWorkingFolder}/trialrunner_stderr"`); + if (process.platform === 'darwin') { + // https://superuser.com/questions/599072/how-to-get-bash-execution-time-in-milliseconds-under-mac-os-x + // Considering the worst case, write 999 to avoid negative duration + script.push(`echo $? \`date +%s999\` >'${environment.runnerWorkingFolder}/code'`); + } else { + script.push(`echo $? \`date +%s%3N\` >'${environment.runnerWorkingFolder}/code'`); + } + } + + return script; + } public async startEnvironment(environment: EnvironmentInformation): Promise { if (this.localTrialConfig === undefined) { @@ -99,14 +125,13 @@ export class LocalEnvironmentService extends EnvironmentService { environment.runnerWorkingFolder = path.join(localEnvCodeFolder, environment.id); await execMkdir(environment.runnerWorkingFolder); await execCopydir(localTempFolder, localEnvCodeFolder); - environment.command = `cd ${this.experimentRootDir} && \ -${environment.command} --job_pid_file ${environment.runnerWorkingFolder}/pid \ -1>${environment.runnerWorkingFolder}/trialrunner_stdout 2>${environment.runnerWorkingFolder}/trialrunner_stderr \ -&& echo $? \`date +%s%3N\` >${environment.runnerWorkingFolder}/code`; - await fs.promises.writeFile(path.join(localEnvCodeFolder, 'nni_run.sh'), - environment.command, { encoding: 'utf8', mode: 0o777 }), + environment.command = this.getScript(environment).join(getNewLine()); + const scriptName: string = getScriptName('run'); + await fs.promises.writeFile(path.join(localEnvCodeFolder, scriptName), + environment.command, { encoding: 'utf8', mode: 0o777 }); + // Execute command in local machine - runScript(path.join(localEnvCodeFolder, 'nni_run.sh')); + runScript(path.join(localEnvCodeFolder, scriptName)); environment.trackingUrl = `${environment.runnerWorkingFolder}`; } @@ -115,7 +140,7 @@ ${environment.command} --job_pid_file ${environment.runnerWorkingFolder}/pid \ return Promise.resolve(); } - const jobpidPath: string = `${environment.runnerWorkingFolder}/pid`; + const jobpidPath: string = `${path.join(environment.runnerWorkingFolder, 'pid')}`; const pid: string = await fs.promises.readFile(jobpidPath, 'utf8'); tkill(Number(pid), 'SIGKILL'); } From 2cdba2963629b0978adcebacb431908668ecf449 Mon Sep 17 00:00:00 2001 From: Lijiaoa <61399850+Lijiaoa@users.noreply.github.com> Date: Mon, 8 Feb 2021 10:58:05 +0800 Subject: [PATCH 4/4] del duplicate css file (#3362) --- ts/webui/src/components/trial-detail/TableList.tsx | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ts/webui/src/components/trial-detail/TableList.tsx b/ts/webui/src/components/trial-detail/TableList.tsx index 6c612124ef..8279590a62 100644 --- a/ts/webui/src/components/trial-detail/TableList.tsx +++ b/ts/webui/src/components/trial-detail/TableList.tsx @@ -22,12 +22,8 @@ import '../../static/style/tableStatus.css'; import '../../static/style/logPath.scss'; import '../../static/style/table.scss'; import '../../static/style/button.scss'; -import '../../static/style/logPath.scss'; import '../../static/style/openRow.scss'; import '../../static/style/pagination.scss'; -import '../../static/style/search.scss'; -import '../../static/style/table.scss'; -import '../../static/style/tableStatus.css'; import '../../static/style/overview/overviewTitle.scss'; import { blocked, copy, LineChart, tableListIcon } from '../buttons/Icon'; import ChangeColumnComponent from '../modals/ChangeColumnComponent';