Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

[Retiarii] end2end #3122

Merged
merged 61 commits into from
Dec 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
80ca92e
init commit of pytorch code converter
QuanluZhang Nov 1, 2020
d08d0a1
update
QuanluZhang Nov 3, 2020
fde8565
Merge branch 'dev-retiarii' of https://github.com/microsoft/nni into …
QuanluZhang Nov 3, 2020
a01ecfe
refactor graph ir
QuanluZhang Nov 4, 2020
2639080
update
QuanluZhang Nov 5, 2020
de9e801
Merge branch 'dev-retiarii' of https://github.com/microsoft/nni into …
QuanluZhang Nov 5, 2020
8874051
resolve merge conflict
QuanluZhang Nov 5, 2020
0c54c4e
update
QuanluZhang Nov 5, 2020
4b9f0d6
Merge branch 'dev-retiarii' of https://github.com/microsoft/nni into …
QuanluZhang Nov 5, 2020
48cf595
pass mnist example without code converter
QuanluZhang Nov 6, 2020
169e6b2
end to end
QuanluZhang Nov 10, 2020
a79c3be
fix bugs in pytorch code generation
QuanluZhang Nov 12, 2020
ae33a0a
fix code generating
QuanluZhang Nov 13, 2020
f2ad754
support apply mutators
QuanluZhang Nov 13, 2020
4640932
implement topo_sort
QuanluZhang Nov 13, 2020
c1d8ba9
generated the correct model
QuanluZhang Nov 14, 2020
bcc3cf1
end2end passed, strategy generates only one trial
QuanluZhang Nov 14, 2020
b1a3228
pass end2end, multiple trials
QuanluZhang Nov 15, 2020
7a642a1
update
QuanluZhang Nov 15, 2020
92bdd7a
merge input_names to input_node, passed test
QuanluZhang Nov 19, 2020
2729846
remove convert_name from code gen, add the logic in graph gen
QuanluZhang Nov 19, 2020
c0ed256
update
QuanluZhang Nov 19, 2020
d9e9260
Merge branch 'dev-retiarii' of https://github.com/microsoft/nni into …
QuanluZhang Nov 19, 2020
ec19bec
fix conflict
QuanluZhang Nov 19, 2020
1b557a5
refactor io node
QuanluZhang Nov 20, 2020
c8ffed8
first draft
liuzhe-lz Nov 20, 2020
f2dd604
Merge branch 'dev-retiarii' of https://github.com/microsoft/nni into …
QuanluZhang Nov 20, 2020
13de09e
init commit, support layerchoice, inputchoice
QuanluZhang Nov 20, 2020
4873cde
support layerchoice and inputchoice
QuanluZhang Nov 24, 2020
bfbec3d
second ver
liuzhe-lz Nov 25, 2020
e8648eb
refactor logging
liuzhe-lz Nov 26, 2020
bacd496
fix cluster metadata
liuzhe-lz Nov 26, 2020
4307ee8
clean up
liuzhe-lz Nov 26, 2020
b177b79
use foreground in example
liuzhe-lz Nov 26, 2020
e38278d
Merge branch 'master' into exp
liuzhe-lz Nov 26, 2020
97d370c
add missing file
liuzhe-lz Nov 26, 2020
8c55d21
fix pylint
liuzhe-lz Nov 27, 2020
7f96326
update ts timestamp to match python format
liuzhe-lz Nov 27, 2020
b052411
try to fix ts version differnce
liuzhe-lz Nov 27, 2020
697d8e6
generate darts code
QuanluZhang Nov 29, 2020
2c14267
Merge pull request #6 from liuzhe-lz/exp
QuanluZhang Nov 29, 2020
849d033
new launching approach
QuanluZhang Dec 1, 2020
07658b3
minor
QuanluZhang Dec 1, 2020
2dc6c5d
Merge branch 'dev-retiarii' of https://github.com/microsoft/nni into …
QuanluZhang Dec 1, 2020
c67a6ea
support instantiated trainer
QuanluZhang Dec 1, 2020
5deec9c
remove comments
QuanluZhang Dec 1, 2020
0f0db1f
refactor strategy
QuanluZhang Dec 1, 2020
67a161b
refactor user code, support with statement
QuanluZhang Dec 2, 2020
ef2fe7e
minor
QuanluZhang Dec 2, 2020
563a9c0
new experiment config for NAS, support tpe strategy for NAS
QuanluZhang Dec 5, 2020
32b9bec
Merge branch 'dev-retiarii' of https://github.com/microsoft/nni into …
QuanluZhang Dec 5, 2020
923ae26
refactor of code converter
QuanluZhang Dec 6, 2020
6e079fb
update code gen to shorten variable name for improving readability
QuanluZhang Dec 6, 2020
dd313e3
handle module list
QuanluZhang Dec 6, 2020
1ae325d
merge aten::slice
QuanluZhang Dec 6, 2020
4435973
deal with aten::append differently, as it has not output
QuanluZhang Dec 7, 2020
e4b94af
refactor
QuanluZhang Dec 10, 2020
e1a6b7b
resolve comments
QuanluZhang Dec 10, 2020
e6d3874
remove files
QuanluZhang Dec 11, 2020
9434511
minor
QuanluZhang Dec 11, 2020
0d12f34
Merge branch 'dev-retiarii' of https://github.com/microsoft/nni into …
QuanluZhang Dec 11, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions examples/trials/mnist-tfv2/launch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# FIXME: For demonstration only. It should not be here

from pathlib import Path

from nni.experiment import Experiment
from nni.algorithms.hpo.hyperopt_tuner.hyperopt_tuner import HyperoptTuner

tuner = HyperoptTuner('tpe')

search_space = {
"dropout_rate": { "_type": "uniform", "_value": [0.5, 0.9] },
"conv_size": { "_type": "choice", "_value": [2, 3, 5, 7] },
"hidden_size": { "_type": "choice", "_value": [124, 512, 1024] },
"batch_size": { "_type": "choice", "_value": [16, 32] },
"learning_rate": { "_type": "choice", "_value": [0.0001, 0.001, 0.01, 0.1] }
}

experiment = Experiment(tuner, 'local')
experiment.config.experiment_name = 'test'
experiment.config.trial_concurrency = 2
experiment.config.max_trial_number = 5
experiment.config.search_space = search_space
experiment.config.trial_command = 'python3 mnist.py'
experiment.config.trial_code_directory = Path(__file__).parent

experiment.run(8081, debug=True)
3 changes: 3 additions & 0 deletions nni/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@

__version__ = '999.0.0-developing'

from .runtime.log import init_logger
init_logger()

from .runtime.env_vars import dispatcher_env_vars
from .utils import ClassArgsValidator

Expand Down
3 changes: 3 additions & 0 deletions nni/experiment/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from .config import *
from .experiment import Experiment, RetiariiExperiment

from .nni_client import *
3 changes: 3 additions & 0 deletions nni/experiment/config/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .base import ExperimentConfig, RetiariiExpConfig

from .local import LocalExperimentConfig
115 changes: 115 additions & 0 deletions nni/experiment/config/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import dataclasses
import json
from pathlib import Path
from typing import Any, Dict, Optional, Union


@dataclasses.dataclass(init=False)
class ExperimentConfig:
experiment_name: str
search_space: Any
max_execution_seconds: Optional[int] = None
max_trial_number: Optional[int] = None
trial_concurrency: int
trial_command: str
trial_code_directory: Union[Path, str]
trial_gpu_number: int = 0
extra_config: Optional[Dict[str, str]] = None

_training_service: str


# these values will be used to create template object,
# and the user should overwrite them later.
_placeholder = {
'experiment_name': '_unset_',
'search_space': '_unset_',
'trial_concurrency': -1,
'trial_command': '_unset_',
'trial_code_directory': '_unset_'
}

# simple validation functions
# complex validation logic with special error message should go to `validate()` method instead
_value_range = {
'max_execution_seconds': lambda x: x is None or x > 0,
'max_trial_number': lambda x: x is None or x > 0,
'trial_concurrency': lambda x: x > 0,
'trial_gpu_number': lambda x: x >= 0
}


def __init__(self, **kwargs):
for field in dataclasses.fields(self):
if field.name in kwargs:
setattr(self, field.name, kwargs[field.name])
elif field.default != dataclasses.MISSING:
setattr(self, field.name, field.default)
else:
setattr(self, field.name, type(self)._placeholder[field.name])


def validate(self) -> None:
# check existence
for key, placeholder_value in type(self)._placeholder.items():
if getattr(self, key) == placeholder_value:
raise ValueError(f'Field "{key}" is not set')

# TODO: check type

# check value
for key, condition in type(self)._value_range.items():
value = getattr(self, key)
if not condition(value):
raise ValueError(f'Field "{key}" ({repr(value)}) out of range')

# check special fields
if not Path(self.trial_code_directory).is_dir():
raise ValueError(f'Trial code directory "{self.trial_code_directory}" does not exist or is not directory')


def experiment_config_json(self) -> Dict[str, Any]:
# this only contains the common part for most (if not all) training services
# subclasses should override it to provide exclusive fields
return {
'authorName': '_',
'experimentName': self.experiment_name,
'trialConcurrency': self.trial_concurrency,
'maxExecDuration': self.max_execution_seconds or (999 * 24 * 3600),
'maxTrialNum': self.max_trial_number or 99999,
'searchSpace': json.dumps(self.search_space),
'trainingServicePlatform': self._training_service,
'tuner': {'builtinTunerName': '_user_created_'},
**(self.extra_config or {})
}

def cluster_metadata_json(self) -> Any:
# the cluster metadata format is a total mess
# leave it to each subclass before we refactoring nni manager
raise NotImplementedError()


@staticmethod
def create_template(training_service: str) -> 'ExperimentConfig':
for cls in ExperimentConfig.__subclasses__():
for field in dataclasses.fields(cls):
if field.name == '_training_service' and field.default == training_service:
return cls()
raise ValueError(f'Unrecognized training service {training_service}')


class RetiariiExpConfig(ExperimentConfig):
@staticmethod
def create_template(training_service: str) -> 'ExperimentConfig':
for cls in ExperimentConfig.__subclasses__():
for field in dataclasses.fields(cls):
if field.name == '_training_service' and field.default == training_service:
config_obj = cls()
config_obj.search_space = {}
config_obj.trial_command = 'python3 -m nni.retiarii.trial_entry'
# FIXME: expose this field to users
config_obj.trial_code_directory = '../..'
return config_obj
40 changes: 40 additions & 0 deletions nni/experiment/config/local.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict

from .base import ExperimentConfig


@dataclass(init=False)
class LocalExperimentConfig(ExperimentConfig):
use_active_gpu: bool = False

_training_service: str = 'local'

def experiment_config_json(self) -> Dict[str, Any]:
ret = super().experiment_config_json()
ret['clusterMetaData'] = [
{
'key': 'codeDir',
'value': str(Path(self.trial_code_directory).resolve())
},
{
'key': 'command',
'value': self.trial_command
}
]
#ret['local_config'] = {
# 'useActiveGpu': self.use_active_gpu
#}
return ret

def cluster_metadata_json(self) -> Any:
return {
'trial_config': {
'command': self.trial_command,
'codeDir': str(Path(self.trial_code_directory).resolve())
}
}
Loading