diff --git a/docs/en_US/TrainingService/KubeflowMode.md b/docs/en_US/TrainingService/KubeflowMode.md index 6889026a11..ddc98821ab 100644 --- a/docs/en_US/TrainingService/KubeflowMode.md +++ b/docs/en_US/TrainingService/KubeflowMode.md @@ -198,6 +198,8 @@ Trial configuration in kubeflow mode have the following configuration keys: * image * Required key. In kubeflow mode, your trial program will be scheduled by Kubernetes to run in [Pod](https://kubernetes.io/docs/concepts/workloads/pods/pod/). This key is used to specify the Docker image used to create the pod where your trail program will run. * We already build a docker image [msranni/nni](https://hub.docker.com/r/msranni/nni/) on [Docker Hub](https://hub.docker.com/). It contains NNI python packages, Node modules and javascript artifact files required to start experiment, and all of NNI dependencies. The docker file used to build this image can be found at [here](https://github.com/Microsoft/nni/tree/master/deployment/docker/Dockerfile). You can either use this image directly in your config file, or build your own image based on it. + * privateRegistryAuthPath + * Optional field, specify `config.json` file path that holds an authorization token of docker registry, used to pull image from private registry. [Refer](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/). * apiVersion * Required key. The API version of your Kubeflow. * ps (optional). This config section is used to configure Tensorflow parameter server role. diff --git a/docs/en_US/TrainingService/PaiMode.md b/docs/en_US/TrainingService/PaiMode.md index ef9fc093cb..4a3543236d 100644 --- a/docs/en_US/TrainingService/PaiMode.md +++ b/docs/en_US/TrainingService/PaiMode.md @@ -53,6 +53,8 @@ Compared with [LocalMode](LocalMode.md) and [RemoteMachineMode](RemoteMachineMod * Optional key. Set the virtualCluster of OpenPAI. If omitted, the job will run on default virtual cluster. * shmMB * Optional key. Set the shmMB configuration of OpenPAI, it set the shared memory for one task in the task role. +* authFile + * Optional key, Set the auth file path for private registry while using PAI mode, [Refer](https://github.com/microsoft/pai/blob/2ea69b45faa018662bc164ed7733f6fdbb4c42b3/docs/faq.md#q-how-to-use-private-docker-registry-job-image-when-submitting-an-openpai-job). Once complete to fill NNI experiment config file and save (for example, save as exp_pai.yml), then run the following command ``` diff --git a/src/nni_manager/rest_server/restValidationSchemas.ts b/src/nni_manager/rest_server/restValidationSchemas.ts index c4a465c6f8..896e4429a5 100644 --- a/src/nni_manager/rest_server/restValidationSchemas.ts +++ b/src/nni_manager/rest_server/restValidationSchemas.ts @@ -51,10 +51,12 @@ export namespace ValidationSchemas { command: joi.string().min(1), virtualCluster: joi.string(), shmMB: joi.number(), + authFile: joi.string(), nasMode: joi.string().valid('classic_mode', 'enas_mode', 'oneshot_mode'), worker: joi.object({ replicas: joi.number().min(1).required(), image: joi.string().min(1), + privateRegistryAuthPath: joi.string().min(1), outputDir: joi.string(), cpuNum: joi.number().min(1), memoryMB: joi.number().min(100), @@ -64,6 +66,7 @@ export namespace ValidationSchemas { ps: joi.object({ replicas: joi.number().min(1).required(), image: joi.string().min(1), + privateRegistryAuthPath: joi.string().min(1), outputDir: joi.string(), cpuNum: joi.number().min(1), memoryMB: joi.number().min(100), @@ -73,6 +76,7 @@ export namespace ValidationSchemas { master: joi.object({ replicas: joi.number().min(1).required(), image: joi.string().min(1), + privateRegistryAuthPath: joi.string().min(1), outputDir: joi.string(), cpuNum: joi.number().min(1), memoryMB: joi.number().min(100), @@ -83,6 +87,7 @@ export namespace ValidationSchemas { name: joi.string().min(1), taskNum: joi.number().min(1).required(), image: joi.string().min(1), + privateRegistryAuthPath: joi.string().min(1), outputDir: joi.string(), cpuNum: joi.number().min(1), memoryMB: joi.number().min(100), diff --git a/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerConfig.ts b/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerConfig.ts index d3a82f76d3..b04b750182 100644 --- a/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerConfig.ts +++ b/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerConfig.ts @@ -43,8 +43,8 @@ export class FrameworkControllerTrialConfigTemplate extends KubernetesTrialConfi public readonly taskNum: number; constructor(taskNum: number, command : string, gpuNum : number, cpuNum: number, memoryMB: number, image: string, - frameworkAttemptCompletionPolicy: FrameworkAttemptCompletionPolicy) { - super(command, gpuNum, cpuNum, memoryMB, image); + frameworkAttemptCompletionPolicy: FrameworkAttemptCompletionPolicy, privateRegistryFilePath?: string | undefined) { + super(command, gpuNum, cpuNum, memoryMB, image, privateRegistryFilePath); this.frameworkAttemptCompletionPolicy = frameworkAttemptCompletionPolicy; this.name = name; this.taskNum = taskNum; diff --git a/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerTrainingService.ts b/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerTrainingService.ts index d5b6e282d9..d38c656aaa 100644 --- a/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerTrainingService.ts +++ b/src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerTrainingService.ts @@ -305,7 +305,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple } // Generate frameworkcontroller job resource config object const frameworkcontrollerJobConfig: any = - this.generateFrameworkControllerJobConfig(trialJobId, trialWorkingFolder, frameworkcontrollerJobName, podResources); + await this.generateFrameworkControllerJobConfig(trialJobId, trialWorkingFolder, frameworkcontrollerJobName, podResources); return Promise.resolve(frameworkcontrollerJobConfig); } @@ -329,8 +329,8 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple * @param frameworkcontrollerJobName job name * @param podResources pod template */ - private generateFrameworkControllerJobConfig(trialJobId: string, trialWorkingFolder: string, - frameworkcontrollerJobName : string, podResources : any) : any { + private async generateFrameworkControllerJobConfig(trialJobId: string, trialWorkingFolder: string, + frameworkcontrollerJobName : string, podResources : any) : Promise { if (this.fcClusterConfig === undefined) { throw new Error('frameworkcontroller Cluster config is not initialized'); } @@ -345,12 +345,14 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple if (containerPort === undefined) { throw new Error('Container port is not initialized'); } + const taskRole: any = this.generateTaskRoleConfig( trialWorkingFolder, this.fcTrialConfig.taskRoles[index].image, `run_${this.fcTrialConfig.taskRoles[index].name}.sh`, podResources[index], - containerPort + containerPort, + await this.createRegistrySecret(this.fcTrialConfig.taskRoles[index].privateRegistryAuthPath) ); taskRoles.push({ name: this.fcTrialConfig.taskRoles[index].name, @@ -363,7 +365,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple }); } - return { + return Promise.resolve({ apiVersion: `frameworkcontroller.microsoft.com/v1`, kind: 'Framework', metadata: { @@ -379,11 +381,11 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple executionType: 'Start', taskRoles: taskRoles } - }; + }); } - private generateTaskRoleConfig(trialWorkingFolder: string, replicaImage: string, runScriptFile: string, - podResources: any, containerPort: number): any { + private generateTaskRoleConfig(trialWorkingFolder: string, replicaImage: string, runScriptFile: string, + podResources: any, containerPort: number, privateRegistrySecretName: string | undefined): any { if (this.fcClusterConfig === undefined) { throw new Error('frameworkcontroller Cluster config is not initialized'); } @@ -451,13 +453,22 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple mountPath: '/mnt/frameworkbarrier' }] }]; - const spec: any = { - containers: containers, - initContainers: initContainers, - restartPolicy: 'OnFailure', - volumes: volumeSpecMap.get('nniVolumes'), - hostNetwork: false + + let spec: any = { + containers: containers, + initContainers: initContainers, + restartPolicy: 'OnFailure', + volumes: volumeSpecMap.get('nniVolumes'), + hostNetwork: false }; + if(privateRegistrySecretName) { + spec.imagePullSecrets = [ + { + name: privateRegistrySecretName + } + ] + } + if (this.fcClusterConfig.serviceAccountName !== undefined) { spec.serviceAccountName = this.fcClusterConfig.serviceAccountName; } diff --git a/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowConfig.ts b/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowConfig.ts index f737b41fb4..41228087ba 100644 --- a/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowConfig.ts +++ b/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowConfig.ts @@ -135,8 +135,8 @@ export class KubeflowTrialConfig extends KubernetesTrialConfig { export class KubeflowTrialConfigTemplate extends KubernetesTrialConfigTemplate { public readonly replicas: number; constructor(replicas: number, command : string, gpuNum : number, - cpuNum: number, memoryMB: number, image: string) { - super(command, gpuNum, cpuNum, memoryMB, image); + cpuNum: number, memoryMB: number, image: string, privateRegistryAuthPath?: string) { + super(command, gpuNum, cpuNum, memoryMB, image, privateRegistryAuthPath); this.replicas = replicas; } } diff --git a/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowTrainingService.ts b/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowTrainingService.ts index c64ecdb011..b65a548fb6 100644 --- a/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowTrainingService.ts +++ b/src/nni_manager/training_service/kubernetes/kubeflow/kubeflowTrainingService.ts @@ -347,7 +347,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber } // Generate kubeflow job resource config object - const kubeflowJobConfig: any = this.generateKubeflowJobConfig(trialJobId, trialWorkingFolder, kubeflowJobName, workerPodResources, + const kubeflowJobConfig: any = await this.generateKubeflowJobConfig(trialJobId, trialWorkingFolder, kubeflowJobName, workerPodResources, nonWorkerResources); return Promise.resolve(kubeflowJobConfig); @@ -361,8 +361,8 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber * @param workerPodResources worker pod template * @param nonWorkerPodResources non-worker pod template, like ps or master */ - private generateKubeflowJobConfig(trialJobId: string, trialWorkingFolder: string, kubeflowJobName : string, workerPodResources : any, - nonWorkerPodResources?: any) : any { + private async generateKubeflowJobConfig(trialJobId: string, trialWorkingFolder: string, kubeflowJobName : string, workerPodResources : any, + nonWorkerPodResources?: any) : Promise { if (this.kubeflowClusterConfig === undefined) { throw new Error('Kubeflow Cluster config is not initialized'); } @@ -377,29 +377,32 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber const replicaSpecsObj: any = {}; const replicaSpecsObjMap: Map = new Map(); - if (this.kubeflowTrialConfig.operatorType === 'tf-operator') { const tensorflowTrialConfig: KubeflowTrialConfigTensorflow = this.kubeflowTrialConfig; + let privateRegistrySecretName = await this.createRegistrySecret(tensorflowTrialConfig.worker.privateRegistryAuthPath); replicaSpecsObj.Worker = this.generateReplicaConfig(trialWorkingFolder, tensorflowTrialConfig.worker.replicas, - tensorflowTrialConfig.worker.image, 'run_worker.sh', workerPodResources); + tensorflowTrialConfig.worker.image, 'run_worker.sh', workerPodResources, privateRegistrySecretName); if (tensorflowTrialConfig.ps !== undefined) { + let privateRegistrySecretName: string | undefined = await this.createRegistrySecret(tensorflowTrialConfig.ps.privateRegistryAuthPath); replicaSpecsObj.Ps = this.generateReplicaConfig(trialWorkingFolder, tensorflowTrialConfig.ps.replicas, - tensorflowTrialConfig.ps.image, 'run_ps.sh', nonWorkerPodResources); + tensorflowTrialConfig.ps.image, 'run_ps.sh', nonWorkerPodResources, privateRegistrySecretName); } replicaSpecsObjMap.set(this.kubernetesCRDClient.jobKind, {tfReplicaSpecs: replicaSpecsObj}); } else if (this.kubeflowTrialConfig.operatorType === 'pytorch-operator') { const pytorchTrialConfig: KubeflowTrialConfigPytorch = this.kubeflowTrialConfig; if (pytorchTrialConfig.worker !== undefined) { + let privateRegistrySecretName: string | undefined = await this.createRegistrySecret(pytorchTrialConfig.worker.privateRegistryAuthPath); replicaSpecsObj.Worker = this.generateReplicaConfig(trialWorkingFolder, pytorchTrialConfig.worker.replicas, - pytorchTrialConfig.worker.image, 'run_worker.sh', workerPodResources); + pytorchTrialConfig.worker.image, 'run_worker.sh', workerPodResources, privateRegistrySecretName); } + let privateRegistrySecretName: string | undefined = await this.createRegistrySecret(pytorchTrialConfig.master.privateRegistryAuthPath); replicaSpecsObj.Master = this.generateReplicaConfig(trialWorkingFolder, pytorchTrialConfig.master.replicas, - pytorchTrialConfig.master.image, 'run_master.sh', nonWorkerPodResources); + pytorchTrialConfig.master.image, 'run_master.sh', nonWorkerPodResources, privateRegistrySecretName); replicaSpecsObjMap.set(this.kubernetesCRDClient.jobKind, {pytorchReplicaSpecs: replicaSpecsObj}); } - return { + return Promise.resolve({ apiVersion: `kubeflow.org/${this.kubernetesCRDClient.apiVersion}`, kind: this.kubernetesCRDClient.jobKind, metadata: { @@ -412,7 +415,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber } }, spec: replicaSpecsObjMap.get(this.kubernetesCRDClient.jobKind) - }; + }); } /** @@ -424,7 +427,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber * @param podResources pod resource config section */ private generateReplicaConfig(trialWorkingFolder: string, replicaNumber: number, replicaImage: string, runScriptFile: string, - podResources: any): any { + podResources: any, privateRegistrySecretName: string | undefined): any { if (this.kubeflowClusterConfig === undefined) { throw new Error('Kubeflow Cluster config is not initialized'); } @@ -436,7 +439,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber if (this.kubernetesCRDClient === undefined) { throw new Error('Kubeflow operator client is not initialized'); } - + // The config spec for volume field const volumeSpecMap: Map = new Map(); if (this.kubeflowClusterConfig.storageType === 'azureStorage') { volumeSpecMap.set('nniVolumes', [ @@ -459,7 +462,34 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber } }]); } - + // The config spec for container field + const containersSpecMap: Map = new Map(); + containersSpecMap.set('containers', [ + { + // Kubeflow tensorflow operator requires that containers' name must be tensorflow + // TODO: change the name based on operator's type + name: this.kubernetesCRDClient.containerName, + image: replicaImage, + args: ['sh', `${path.join(trialWorkingFolder, runScriptFile)}`], + volumeMounts: [ + { + name: 'nni-vol', + mountPath: this.CONTAINER_MOUNT_PATH + }], + resources: podResources + } + ]); + let spec: any = { + containers: containersSpecMap.get('containers'), + restartPolicy: 'ExitCode', + volumes: volumeSpecMap.get('nniVolumes') + } + if (privateRegistrySecretName) { + spec.imagePullSecrets = [ + { + name: privateRegistrySecretName + }] + } return { replicas: replicaNumber, template: { @@ -467,26 +497,9 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber // tslint:disable-next-line:no-null-keyword creationTimestamp: null }, - spec: { - containers: [ - { - // Kubeflow tensorflow operator requires that containers' name must be tensorflow - // TODO: change the name based on operator's type - name: this.kubernetesCRDClient.containerName, - image: replicaImage, - args: ['sh', `${path.join(trialWorkingFolder, runScriptFile)}`], - volumeMounts: [ - { - name: 'nni-vol', - mountPath: this.CONTAINER_MOUNT_PATH - }], - resources: podResources - }], - restartPolicy: 'ExitCode', - volumes: volumeSpecMap.get('nniVolumes') - } + spec: spec } - }; + } } } // tslint:enable: no-unsafe-any no-any diff --git a/src/nni_manager/training_service/kubernetes/kubernetesConfig.ts b/src/nni_manager/training_service/kubernetes/kubernetesConfig.ts index 6c7ac2fce0..334eb122ed 100644 --- a/src/nni_manager/training_service/kubernetes/kubernetesConfig.ts +++ b/src/nni_manager/training_service/kubernetes/kubernetesConfig.ts @@ -179,6 +179,9 @@ export class KubernetesTrialConfigTemplate { // Docker image public readonly image: string; + // Private registry config file path to download docker iamge + public readonly privateRegistryAuthPath?: string; + // Trail command public readonly command : string; @@ -186,12 +189,13 @@ export class KubernetesTrialConfigTemplate { public readonly gpuNum : number; constructor(command : string, gpuNum : number, - cpuNum: number, memoryMB: number, image: string) { + cpuNum: number, memoryMB: number, image: string, privateRegistryAuthPath?: string) { this.command = command; this.gpuNum = gpuNum; this.cpuNum = cpuNum; this.memoryMB = memoryMB; this.image = image; + this.privateRegistryAuthPath = privateRegistryAuthPath; } } diff --git a/src/nni_manager/training_service/kubernetes/kubernetesTrainingService.ts b/src/nni_manager/training_service/kubernetes/kubernetesTrainingService.ts index 4dc52cb120..0327cd553f 100644 --- a/src/nni_manager/training_service/kubernetes/kubernetesTrainingService.ts +++ b/src/nni_manager/training_service/kubernetes/kubernetesTrainingService.ts @@ -38,6 +38,8 @@ import { KubernetesClusterConfig } from './kubernetesConfig'; import { kubernetesScriptFormat, KubernetesTrialJobDetail } from './kubernetesData'; import { KubernetesJobRestServer } from './kubernetesJobRestServer'; +var fs = require('fs'); + /** * Training Service implementation for Kubernetes */ @@ -327,5 +329,34 @@ abstract class KubernetesTrainingService { return Promise.resolve(); } + + protected async createRegistrySecret(filePath: string | undefined): Promise { + if(filePath === undefined || filePath === '') { + return undefined; + } + let body = fs.readFileSync(filePath).toString('base64'); + let registrySecretName = String.Format('nni-secret-{0}', uniqueString(8) + .toLowerCase()); + await this.genericK8sClient.createSecret( + { + apiVersion: 'v1', + kind: 'Secret', + metadata: { + name: registrySecretName, + namespace: 'default', + labels: { + app: this.NNI_KUBERNETES_TRIAL_LABEL, + expId: getExperimentId() + } + }, + type: 'kubernetes.io/dockerconfigjson', + data: { + '.dockerconfigjson': body + } + } + ); + return registrySecretName; + } + } export { KubernetesTrainingService }; diff --git a/src/nni_manager/training_service/pai/paiConfig.ts b/src/nni_manager/training_service/pai/paiConfig.ts index c1bb7fb664..1248368a89 100644 --- a/src/nni_manager/training_service/pai/paiConfig.ts +++ b/src/nni_manager/training_service/pai/paiConfig.ts @@ -71,6 +71,8 @@ export class PAIJobConfig { public readonly image: string; // Code directory on HDFS public readonly codeDir: string; + //authentication file used for private Docker registry + public readonly authFile?: string; // List of taskRole, one task role at least public taskRoles: PAITaskRole[]; @@ -87,12 +89,13 @@ export class PAIJobConfig { * @param taskRoles List of taskRole, one task role at least */ constructor(jobName: string, image : string, codeDir : string, - taskRoles : PAITaskRole[], virtualCluster: string) { + taskRoles : PAITaskRole[], virtualCluster: string, authFile?: string) { this.jobName = jobName; this.image = image; this.codeDir = codeDir; this.taskRoles = taskRoles; this.virtualCluster = virtualCluster; + this.authFile = authFile; } } @@ -129,14 +132,17 @@ export class NNIPAITrialConfig extends TrialConfig { public virtualCluster?: string; //Shared memory for one task in the task role public shmMB?: number; + //authentication file used for private Docker registry + public authFile?: string; constructor(command : string, codeDir : string, gpuNum : number, cpuNum: number, memoryMB: number, - image: string, virtualCluster?: string, shmMB?: number) { + image: string, virtualCluster?: string, shmMB?: number, authFile?: string) { super(command, codeDir, gpuNum); this.cpuNum = cpuNum; this.memoryMB = memoryMB; this.image = image; this.virtualCluster = virtualCluster; this.shmMB = shmMB; + this.authFile = authFile; } } diff --git a/src/nni_manager/training_service/pai/paiTrainingService.ts b/src/nni_manager/training_service/pai/paiTrainingService.ts index ce5ca61905..210c720f38 100644 --- a/src/nni_manager/training_service/pai/paiTrainingService.ts +++ b/src/nni_manager/training_service/pai/paiTrainingService.ts @@ -442,7 +442,7 @@ class PAITrainingService implements TrainingService { // Task command nniPaiTrialCommand, // Task shared memory - this.paiTrialConfig.shmMB + this.paiTrialConfig.shmMB, ) ]; @@ -456,7 +456,9 @@ class PAITrainingService implements TrainingService { // PAI Task roles paiTaskRoles, // Add Virutal Cluster - this.paiTrialConfig.virtualCluster === undefined ? 'default' : this.paiTrialConfig.virtualCluster.toString() + this.paiTrialConfig.virtualCluster === undefined ? 'default' : this.paiTrialConfig.virtualCluster.toString(), + //Task auth File + this.paiTrialConfig.authFile ); // Step 2. Upload code files in codeDir onto HDFS diff --git a/tools/nni_cmd/config_schema.py b/tools/nni_cmd/config_schema.py index bddcce5fe6..fdc49610b5 100644 --- a/tools/nni_cmd/config_schema.py +++ b/tools/nni_cmd/config_schema.py @@ -233,6 +233,8 @@ def setPathCheck(key): 'cpuNum': setNumberRange('cpuNum', int, 0, 99999), 'memoryMB': setType('memoryMB', int), 'image': setType('image', str), + Optional('authFile'): And(Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),\ + error='ERROR: authFile format error, authFile format is hdfs://xxx.xxx.xxx.xxx:xxx'), Optional('shmMB'): setType('shmMB', int), Optional('dataDir'): And(Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),\ error='ERROR: dataDir format error, dataDir format is hdfs://xxx.xxx.xxx.xxx:xxx'), @@ -261,7 +263,8 @@ def setPathCheck(key): 'gpuNum': setNumberRange('gpuNum', int, 0, 99999), 'cpuNum': setNumberRange('cpuNum', int, 0, 99999), 'memoryMB': setType('memoryMB', int), - 'image': setType('image', str) + 'image': setType('image', str), + Optional('privateRegistryAuthPath'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'privateRegistryAuthPath') }, Optional('master'): { 'replicas': setType('replicas', int), @@ -269,7 +272,8 @@ def setPathCheck(key): 'gpuNum': setNumberRange('gpuNum', int, 0, 99999), 'cpuNum': setNumberRange('cpuNum', int, 0, 99999), 'memoryMB': setType('memoryMB', int), - 'image': setType('image', str) + 'image': setType('image', str), + Optional('privateRegistryAuthPath'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'privateRegistryAuthPath') }, Optional('worker'):{ 'replicas': setType('replicas', int), @@ -277,7 +281,8 @@ def setPathCheck(key): 'gpuNum': setNumberRange('gpuNum', int, 0, 99999), 'cpuNum': setNumberRange('cpuNum', int, 0, 99999), 'memoryMB': setType('memoryMB', int), - 'image': setType('image', str) + 'image': setType('image', str), + Optional('privateRegistryAuthPath'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'privateRegistryAuthPath') } } } @@ -324,7 +329,8 @@ def setPathCheck(key): 'gpuNum': setNumberRange('gpuNum', int, 0, 99999), 'cpuNum': setNumberRange('cpuNum', int, 0, 99999), 'memoryMB': setType('memoryMB', int), - 'image': setType('image', str) + 'image': setType('image', str), + Optional('privateRegistryAuthPath'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'privateRegistryAuthPath') }] } }