Rename heterogeneous to hybrid (#3252)

microsoft · Jan 5, 2021 · c702241 · c702241
1 parent b4f0d32
commit c702241
Show file tree

Hide file tree

Showing 10 changed files with 47 additions and 47 deletions.
diff --git a/..._US/TrainingService/HeterogeneousMode.rst → docs/en_US/TrainingService/HybridMode.rst b/..._US/TrainingService/HeterogeneousMode.rst → docs/en_US/TrainingService/HybridMode.rst
@@ -1,12 +1,12 @@
-**Run an Experiment on Heterogeneous Mode**
+**Run an Experiment on Hybrid Mode**
 ===========================================
 
-Run NNI on heterogeneous mode means that NNI will run trials jobs in multiple kinds of training platforms. For example, NNI could submit trial jobs to remote machine and AML simultaneously.
+Run NNI on hybrid mode means that NNI will run trials jobs in multiple kinds of training platforms. For example, NNI could submit trial jobs to remote machine and AML simultaneously.
 
 Setup environment
 -----------------
 
-NNI has supported `local <./LocalMode.rst>`__\ , `remote <./RemoteMachineMode.rst>`__\ , `PAI <./PaiMode.rst>`__\ , and `AML <./AMLMode.rst>`__ for heterogeneous training service. Before starting an experiment using these mode, users should setup the corresponding environment for the platforms. More details about the environment setup could be found in the corresponding docs.
+NNI has supported `local <./LocalMode.rst>`__\ , `remote <./RemoteMachineMode.rst>`__\ , `PAI <./PaiMode.rst>`__\ , and `AML <./AMLMode.rst>`__ for hybrid training service. Before starting an experiment using these mode, users should setup the corresponding environment for the platforms. More details about the environment setup could be found in the corresponding docs.
 
 Run an experiment
 -----------------
@@ -20,7 +20,7 @@ Use ``examples/trials/mnist-tfv1`` as an example. The NNI config YAML file's con
     trialConcurrency: 2
     maxExecDuration: 1h
     maxTrialNum: 10
-    trainingServicePlatform: heterogeneous
+    trainingServicePlatform: hybrid
     searchSpacePath: search_space.json
     #choice: true, false
     useAnnotation: false
@@ -33,7 +33,7 @@ Use ``examples/trials/mnist-tfv1`` as an example. The NNI config YAML file's con
       command: python3 mnist.py
       codeDir: .
       gpuNum: 1
-    heterogeneousConfig:
+    hybridConfig:
       trainingServicePlatforms:
         - local
         - remote
@@ -44,11 +44,11 @@ Use ``examples/trials/mnist-tfv1`` as an example. The NNI config YAML file's con
         username: bob
         passwd: bob123
 
-Configurations for heterogeneous mode:
+Configurations for hybrid mode:
 
-heterogeneousConfig:
+hybridConfig:
 
-* trainingServicePlatforms. required key. This field specify the platforms used in heterogeneous mode, the values using yaml list format. NNI support setting ``local``, ``remote``, ``aml``, ``pai`` in this field.
+* trainingServicePlatforms. required key. This field specify the platforms used in hybrid mode, the values using yaml list format. NNI support setting ``local``, ``remote``, ``aml``, ``pai`` in this field.
 
 
-.. Note:: If setting a platform in trainingServicePlatforms mode, users should also set the corresponding configuration for the platform. For example, if set ``remote`` as one of the platform, should also set ``machineList`` and ``remoteConfig`` configuration.
+.. Note:: If setting a platform in trainingServicePlatforms mode, users should also set the corresponding configuration for the platform. For example, if set ``remote`` as one of the platform, should also set ``machineList`` and ``remoteConfig`` configuration. Local platform in hybrid mode does not support windows for now.
diff --git a/docs/en_US/training_services.rst b/docs/en_US/training_services.rst
@@ -11,4 +11,4 @@ Introduction to NNI Training Services
     FrameworkController<./TrainingService/FrameworkControllerMode>
     DLTS<./TrainingService/DLTSMode>
     AML<./TrainingService/AMLMode>
-    Heterogeneous<./TrainingService/HeterogeneousMode>
+    Hybrid<./TrainingService/HybridMode>
diff --git a/examples/trials/mnist-tfv1/config_heterogeneous.yml b/examples/trials/mnist-tfv1/config_heterogeneous.yml
@@ -3,7 +3,7 @@ experimentName: example_mnist
 trialConcurrency: 3
 maxExecDuration: 1h
 maxTrialNum: 10
-trainingServicePlatform: heterogeneous
+trainingServicePlatform: hybrid
 searchSpacePath: search_space.json
 #choice: true, false
 useAnnotation: false
@@ -18,7 +18,7 @@ trial:
   command: python3 mnist.py
   codeDir: .
   gpuNum: 0
-heterogeneousConfig:
+hybridConfig:
   trainingServicePlatforms:
     - local
     - remote

diff --git a/nni/runtime/platform/__init__.py b/nni/runtime/platform/__init__.py
@@ -9,7 +9,7 @@
     from .standalone import *
 elif trial_env_vars.NNI_PLATFORM == 'unittest':
     from .test import *
-elif trial_env_vars.NNI_PLATFORM in ('local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller', 'paiYarn', 'dlts', 'aml', 'adl', 'heterogeneous'):
+elif trial_env_vars.NNI_PLATFORM in ('local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller', 'paiYarn', 'dlts', 'aml', 'adl', 'hybrid'):
     from .local import *
 else:
     raise RuntimeError('Unknown platform %s' % trial_env_vars.NNI_PLATFORM)
diff --git a/nni/tools/nnictl/config_schema.py b/nni/tools/nnictl/config_schema.py
@@ -124,7 +124,7 @@ def validate(self, data):
     Optional('maxExecDuration'): And(Regex(r'^[1-9][0-9]*[s|m|h|d]$', error='ERROR: maxExecDuration format is [digit]{s,m,h,d}')),
     Optional('maxTrialNum'): setNumberRange('maxTrialNum', int, 1, 99999),
     'trainingServicePlatform': setChoice(
-        'trainingServicePlatform', 'remote', 'local', 'pai', 'kubeflow', 'frameworkcontroller', 'paiYarn', 'dlts', 'aml', 'adl', 'heterogeneous'),
+        'trainingServicePlatform', 'remote', 'local', 'pai', 'kubeflow', 'frameworkcontroller', 'paiYarn', 'dlts', 'aml', 'adl', 'hybrid'),
     Optional('searchSpacePath'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'searchSpacePath'),
     Optional('multiPhase'): setType('multiPhase', bool),
     Optional('multiThread'): setType('multiThread', bool),
@@ -262,7 +262,7 @@ def validate(self, data):
     }
 }
 
-heterogeneous_trial_schema = {
+hybrid_trial_schema = {
     'trial': {
         'codeDir': setPathCheck('codeDir'),
         Optional('nniManagerNFSMountPath'): setPathCheck('nniManagerNFSMountPath'),
@@ -279,8 +279,8 @@ def validate(self, data):
     }
 }
 
-heterogeneous_config_schema = {
-    'heterogeneousConfig': {
+hybrid_config_schema = {
+    'hybridConfig': {
         'trainingServicePlatforms': ['local', 'remote', 'pai', 'aml']
     }
 }
@@ -461,7 +461,7 @@ def validate(self, data):
     'frameworkcontroller': Schema({**common_schema, **frameworkcontroller_trial_schema, **frameworkcontroller_config_schema}),
     'aml': Schema({**common_schema, **aml_trial_schema, **aml_config_schema}),
     'dlts': Schema({**common_schema, **dlts_trial_schema, **dlts_config_schema}),
-    'heterogeneous': Schema({**common_schema, **heterogeneous_trial_schema, **heterogeneous_config_schema, **machine_list_schema,
+    'hybrid': Schema({**common_schema, **hybrid_trial_schema, **hybrid_config_schema, **machine_list_schema,
                              **pai_config_schema, **aml_config_schema, **remote_config_schema}),
 }
 
@@ -479,7 +479,7 @@ def validate_extras(self, experiment_config):
         self.validate_pai_trial_conifg(experiment_config)
         self.validate_kubeflow_operators(experiment_config)
         self.validate_eth0_device(experiment_config)
-        self.validate_heterogeneous_platforms(experiment_config)
+        self.validate_hybrid_platforms(experiment_config)
 
     def validate_tuner_adivosr_assessor(self, experiment_config):
         if experiment_config.get('advisor'):
@@ -590,15 +590,15 @@ def validate_eth0_device(self, experiment_config):
                 and 'eth0' not in netifaces.interfaces():
             raise SchemaError('This machine does not contain eth0 network device, please set nniManagerIp in config file!')
 
-    def validate_heterogeneous_platforms(self, experiment_config):
+    def validate_hybrid_platforms(self, experiment_config):
         required_config_name_map = {
             'remote': 'machineList',
             'aml': 'amlConfig',
             'pai': 'paiConfig'
         }
-        if experiment_config.get('trainingServicePlatform') == 'heterogeneous':
-            for platform in experiment_config['heterogeneousConfig']['trainingServicePlatforms']:
+        if experiment_config.get('trainingServicePlatform') == 'hybrid':
+            for platform in experiment_config['hybridConfig']['trainingServicePlatforms']:
                 config_name = required_config_name_map.get(platform)
                 if config_name and not experiment_config.get(config_name):
-                    raise SchemaError('Need to set {0} for {1} in heterogeneous mode!'.format(config_name, platform))
+                    raise SchemaError('Need to set {0} for {1} in hybrid mode!'.format(config_name, platform))
 
diff --git a/nni/tools/nnictl/launcher.py b/nni/tools/nnictl/launcher.py
@@ -300,23 +300,23 @@ def set_aml_config(experiment_config, port, config_file_name):
     #set trial_config
     return set_trial_config(experiment_config, port, config_file_name), err_message
 
-def set_heterogeneous_config(experiment_config, port, config_file_name):
-    '''set heterogeneous configuration'''
-    heterogeneous_config_data = dict()
-    heterogeneous_config_data['heterogeneous_config'] = experiment_config['heterogeneousConfig']
-    platform_list = experiment_config['heterogeneousConfig']['trainingServicePlatforms']
+def set_hybrid_config(experiment_config, port, config_file_name):
+    '''set hybrid configuration'''
+    hybrid_config_data = dict()
+    hybrid_config_data['hybrid_config'] = experiment_config['hybridConfig']
+    platform_list = experiment_config['hybridConfig']['trainingServicePlatforms']
     for platform in platform_list:
         if platform == 'aml':
-            heterogeneous_config_data['aml_config'] = experiment_config['amlConfig']
+            hybrid_config_data['aml_config'] = experiment_config['amlConfig']
         elif platform ==  'remote':
             if experiment_config.get('remoteConfig'):
-                heterogeneous_config_data['remote_config'] = experiment_config['remoteConfig']
-            heterogeneous_config_data['machine_list'] = experiment_config['machineList']
+                hybrid_config_data['remote_config'] = experiment_config['remoteConfig']
+            hybrid_config_data['machine_list'] = experiment_config['machineList']
         elif platform == 'local' and experiment_config.get('localConfig'):
-            heterogeneous_config_data['local_config'] = experiment_config['localConfig']
+            hybrid_config_data['local_config'] = experiment_config['localConfig']
         elif platform == 'pai':
-            heterogeneous_config_data['pai_config'] = experiment_config['paiConfig']
-    response = rest_put(cluster_metadata_url(port), json.dumps(heterogeneous_config_data), REST_TIME_OUT)
+            hybrid_config_data['pai_config'] = experiment_config['paiConfig']
+    response = rest_put(cluster_metadata_url(port), json.dumps(hybrid_config_data), REST_TIME_OUT)
     err_message = None
     if not response or not response.status_code == 200:
         if response is not None:
@@ -412,10 +412,10 @@ def set_experiment(experiment_config, mode, port, config_file_name):
             {'key': 'aml_config', 'value': experiment_config['amlConfig']})
         request_data['clusterMetaData'].append(
             {'key': 'trial_config', 'value': experiment_config['trial']})
-    elif experiment_config['trainingServicePlatform'] == 'heterogeneous':
+    elif experiment_config['trainingServicePlatform'] == 'hybrid':
         request_data['clusterMetaData'].append(
-            {'key': 'heterogeneous_config', 'value': experiment_config['heterogeneousConfig']})
-        platform_list = experiment_config['heterogeneousConfig']['trainingServicePlatforms']
+            {'key': 'hybrid_config', 'value': experiment_config['hybridConfig']})
+        platform_list = experiment_config['hybridConfig']['trainingServicePlatforms']
         request_dict = {
             'aml': {'key': 'aml_config', 'value': experiment_config.get('amlConfig')},
             'remote': {'key': 'machine_list', 'value': experiment_config.get('machineList')},
@@ -460,8 +460,8 @@ def set_platform_config(platform, experiment_config, port, config_file_name, res
         config_result, err_msg = set_dlts_config(experiment_config, port, config_file_name)
     elif platform == 'aml':
         config_result, err_msg = set_aml_config(experiment_config, port, config_file_name)
-    elif platform == 'heterogeneous':
-        config_result, err_msg = set_heterogeneous_config(experiment_config, port, config_file_name)
+    elif platform == 'hybrid':
+        config_result, err_msg = set_hybrid_config(experiment_config, port, config_file_name)
     else:
         raise Exception(ERROR_INFO % 'Unsupported platform!')
         exit(1)

diff --git a/ts/nni_manager/main.ts b/ts/nni_manager/main.ts
@@ -37,7 +37,7 @@ function initStartupInfo(
 }
 
 async function initContainer(foreground: boolean, platformMode: string, logFileName?: string): Promise<void> {
-    const routerPlatformMode = ['remote', 'pai', 'aml', 'heterogeneous'];
+    const routerPlatformMode = ['remote', 'pai', 'aml', 'hybrid'];
     if (routerPlatformMode.includes(platformMode)) {
         Container.bind(TrainingService)
             .to(RouterTrainingService)
@@ -97,7 +97,7 @@ async function initContainer(foreground: boolean, platformMode: string, logFileN
 
 function usage(): void {
     console.info('usage: node main.js --port <port> --mode \
-    <local/remote/pai/kubeflow/frameworkcontroller/paiYarn/aml/adl/heterogeneous> --start_mode <new/resume> --experiment_id <id> --foreground <true/false>');
+    <local/remote/pai/kubeflow/frameworkcontroller/paiYarn/aml/adl/hybrid> --start_mode <new/resume> --experiment_id <id> --foreground <true/false>');
 }
 
 const strPort: string = parseArg(['--port', '-p']);
@@ -117,7 +117,7 @@ const foreground: boolean = foregroundArg.toLowerCase() === 'true' ? true : fals
 const port: number = parseInt(strPort, 10);
 
 const mode: string = parseArg(['--mode', '-m']);
-if (!['local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller', 'paiYarn', 'dlts', 'aml', 'adl', 'heterogeneous'].includes(mode)) {
+if (!['local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller', 'paiYarn', 'dlts', 'aml', 'adl', 'hybrid'].includes(mode)) {
     console.log(`FATAL: unknown mode: ${mode}`);
     usage();
     process.exit(1);

diff --git a/ts/nni_manager/rest_server/restValidationSchemas.ts b/ts/nni_manager/rest_server/restValidationSchemas.ts
@@ -183,7 +183,7 @@ export namespace ValidationSchemas {
                 maxTrialNumPerGpu: joi.number(),
                 useActiveGpu: joi.boolean()
             }),
-            heterogeneous_config: joi.object({ // eslint-disable-line @typescript-eslint/camelcase
+            hybrid_config: joi.object({ // eslint-disable-line @typescript-eslint/camelcase
                 trainingServicePlatforms: joi.array(),
             }),
             nni_manager_ip: joi.object({ // eslint-disable-line @typescript-eslint/camelcase

diff --git a/ts/nni_manager/training_service/common/trialConfigMetadataKey.ts b/ts/nni_manager/training_service/common/trialConfigMetadataKey.ts
@@ -11,7 +11,7 @@ export enum TrialConfigMetadataKey {
     LOCAL_CONFIG = 'local_config',
     TRIAL_CONFIG = 'trial_config',
     REMOTE_CONFIG = 'remote_config',
-    HETEROGENEOUS_CONFIG = 'heterogeneous_config',
+    HYBRID_CONFIG = 'hybrid_config',
     EXPERIMENT_ID = 'experimentId',
     MULTI_PHASE = 'multiPhase',
     RANDOM_SCHEDULER = 'random_scheduler',
@@ -24,7 +24,7 @@ export enum TrialConfigMetadataKey {
     AML_CLUSTER_CONFIG = 'aml_config',
     VERSION_CHECK = 'version_check',
     LOG_COLLECTION = 'log_collection',
-    // Used to set platform for heterogeneous in reuse mode, 
+    // Used to set platform for hybrid in reuse mode, 
     // temproarily change and will refactor config schema in the future
     PLATFORM_LIST = 'platform_list'
 }
diff --git a/ts/nni_manager/training_service/reusable/routerTrainingService.ts b/ts/nni_manager/training_service/reusable/routerTrainingService.ts
@@ -95,8 +95,8 @@ class RouterTrainingService implements TrainingService {
 
     public async setClusterMetadata(key: string, value: string): Promise<void> {
         if (this.internalTrainingService === undefined) {
-            // Need to refactor configuration, remove heterogeneous_config field in the future
-            if (key === TrialConfigMetadataKey.HETEROGENEOUS_CONFIG){
+            // Need to refactor configuration, remove hybrid_config field in the future
+            if (key === TrialConfigMetadataKey.HYBRID_CONFIG){
                 this.internalTrainingService = component.get(TrialDispatcher);
                 const heterogenousConfig: HeterogenousConfig = <HeterogenousConfig>JSON.parse(value);
                 if (this.internalTrainingService === undefined) {