Skip to content

Commit

Permalink
Merge pull request #89 from Microsoft/master
Browse files Browse the repository at this point in the history
merge master
  • Loading branch information
SparkSnail authored Dec 7, 2018
2 parents e93a7da + c265903 commit b1ce7da
Show file tree
Hide file tree
Showing 7 changed files with 301 additions and 148 deletions.
4 changes: 4 additions & 0 deletions docs/ExperimentConfig.md
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,10 @@ machineList:

__operator__ specify the kubeflow's operator to be used, nni support __tf-operator__ in current version.

* __storage__

__storage__ specify the storage type of kubeflow, including {__nfs__, __azureStorage__}. This field is optional, and the default value is __nfs__. If the config use azureStorage, this field must be completed.

* __nfs__

__server__ is the host of nfs server
Expand Down
2 changes: 2 additions & 0 deletions docs/KubeflowMode.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ trial:
image: {your_docker_image_for_tensorflow_worker}
kubeflowConfig:
operator: tf-operator
storage: nfs
nfs:
server: {your_nfs_server}
path: {your_nfs_server_exported_path}
Expand All @@ -71,6 +72,7 @@ If you use Azure Kubernetes Service, you should set `kubeflowConfig` in your co
```
kubeflowConfig:
operator: tf-operator
storage: azureStorage
keyVault:
vaultName: {your_vault_name}
name: {your_secert_name}
Expand Down
10 changes: 10 additions & 0 deletions src/nni_manager/rest_server/restValidationSchemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,15 @@ export namespace ValidationSchemas {
memoryMB: joi.number().min(100),
gpuNum: joi.number().min(0).required(),
command: joi.string().min(1).required()
}),
master: joi.object({
replicas: joi.number().min(1).required(),
image: joi.string().min(1),
outputDir: joi.string(),
cpuNum: joi.number().min(1),
memoryMB: joi.number().min(100),
gpuNum: joi.number().min(0).required(),
command: joi.string().min(1).required()
})
}),
pai_config: joi.object({
Expand All @@ -68,6 +77,7 @@ export namespace ValidationSchemas {
}),
kubeflow_config: joi.object({
operator: joi.string().min(1).required(),
storage: joi.string().min(1),
nfs: joi.object({
server: joi.string().min(1).required(),
path: joi.string().min(1).required()
Expand Down
74 changes: 57 additions & 17 deletions src/nni_manager/training_service/kubeflow/kubeflowConfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,41 +23,63 @@ import { TrialConfig } from "../common/trialConfig";


/** operator types that kubeflow supported */
export type KubeflowOperator = 'tf-operator' | 'pytorch-operator' | 'mxnet-operator' | 'caffe2-operator' | 'chainer-operator' | 'mpi-operator';
export type KubeflowOperatorPlural = 'tfjobs' | 'pytorchjobs' | 'mxjobs' | 'caffe2jobs' | 'chainerjobs' | 'mpijobs';
export type KubeflowOperator = 'tf-operator' | 'pytorch-operator' ;
export type KubeflowOperatorPlural = 'tfjobs' | 'pytorchjobs' ;
export type KubeflowOperatorJobKind = 'TFJob' | 'PyTorchJob';
export type KubeflowStorageKind = 'nfs' | 'azureStorage';

/**
* map from Kubeflow operator name to its plural name in K8S
*/
export const kubeflowOperatorMap : Map<KubeflowOperator, KubeflowOperatorPlural> = new Map<KubeflowOperator, KubeflowOperatorPlural>([
['tf-operator' , 'tfjobs'],
['pytorch-operator', 'pytorchjobs'],
['mxnet-operator', 'mxjobs'],
['caffe2-operator', 'caffe2jobs'],
['chainer-operator', 'chainerjobs'],
['mpi-operator', 'mpijobs']
['pytorch-operator', 'pytorchjobs']
]);

/**
* map from Kubeflow operator name to its job kind name in K8S
*/
export const kubeflowOperatorJobKindMap : Map<KubeflowOperator, KubeflowOperatorJobKind> = new Map<KubeflowOperator, KubeflowOperatorJobKind>([
['tf-operator' , 'TFJob'],
['pytorch-operator', 'PyTorchJob']
]);

/**
* Kuberflow cluster configuration
*
*/
export class KubeflowClusterConfig {
export class KubeflowClusterConfigBase {
/** Name of Kubeflow operator, like tf-operator */
public readonly operator: KubeflowOperator;
public readonly nfs?: NFSConfig;
public readonly keyVault?: keyVaultConfig;
public readonly azureStorage?: AzureStorage;
public readonly storage?: KubeflowStorageKind;

/**
* Constructor
* @param userName User name of Kubeflow Cluster
* @param passWord password of Kubeflow Cluster
* @param host Host IP of Kubeflow Cluster
*/
constructor(operator: KubeflowOperator, nfs?: NFSConfig, keyVault?: keyVaultConfig, azureStorage ?: AzureStorage) {
constructor(operator: KubeflowOperator, storage?: KubeflowStorageKind) {
this.operator = operator;
this.nfs = nfs;
this.storage = storage;
}
}

export class KubeflowClusterConfigNFS extends KubeflowClusterConfigBase{
public readonly nfs: NFSConfig;

constructor(operator: KubeflowOperator, nfs: NFSConfig, storage?: KubeflowStorageKind) {
super(operator, storage)
this.nfs = nfs;
}
}

export class KubeflowClusterConfigAzure extends KubeflowClusterConfigBase{
public readonly keyVault: keyVaultConfig;
public readonly azureStorage: AzureStorage;

constructor(operator: KubeflowOperator, keyVault: keyVaultConfig, azureStorage: AzureStorage, storage?: KubeflowStorageKind) {
super(operator, storage)
this.keyVault = keyVault;
this.azureStorage = azureStorage;
}
Expand Down Expand Up @@ -142,15 +164,33 @@ export class KubeflowTrialConfigTemplate {
}
}

export class KubeflowTrialConfig {
export class KubeflowTrialConfigBase {
public readonly codeDir: string;

constructor(codeDir: string) {
this.codeDir = codeDir;
}
}

export class KubeflowTrialConfigTensorflow extends KubeflowTrialConfigBase{
public readonly ps?: KubeflowTrialConfigTemplate;
public readonly worker: KubeflowTrialConfigTemplate;

constructor(codeDir: string, worker: KubeflowTrialConfigTemplate, ps?: KubeflowTrialConfigTemplate) {
this.codeDir = codeDir;
this.worker = worker;
constructor(codeDir: string, worker: KubeflowTrialConfigTemplate, ps?: KubeflowTrialConfigTemplate) {
super(codeDir);
this.ps = ps;
this.worker = worker;
}
}

export class KubeflowTrialConfigPytorch extends KubeflowTrialConfigBase{
public readonly master?: KubeflowTrialConfigTemplate;
public readonly worker: KubeflowTrialConfigTemplate;

constructor(codeDir: string, worker: KubeflowTrialConfigTemplate, master?: KubeflowTrialConfigTemplate) {
super(codeDir);
this.master = master;
this.worker = worker;
}
}

Loading

0 comments on commit b1ce7da

Please sign in to comment.