diff --git a/docs/KubeflowMode.md b/docs/KubeflowMode.md index 449649df17..2c4721b971 100644 --- a/docs/KubeflowMode.md +++ b/docs/KubeflowMode.md @@ -5,7 +5,7 @@ Now NNI supports running experiment on [Kubeflow](https://github.com/kubeflow/ku ## Prerequisite for on-premises Kubernetes Service 1. A **Kubernetes** cluster using Kubernetes 1.8 or later. Follow this [guideline](https://kubernetes.io/docs/setup/) to set up Kubernetes 2. Download, set up, and deploy **Kubelow** to your Kubernetes cluster. Follow this [guideline](https://www.kubeflow.org/docs/started/getting-started/) to set up Kubeflow -3. Prepare a **kubeconfig** file, which will be used by NNI to interact with your kubernetes API server. By default, NNI manager will use $(HOME)/.kube/config as kubeconfig file's path. Refer this [guideline]( https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig) to learn more about kubeconfig. +3. Prepare a **kubeconfig** file, which will be used by NNI to interact with your kubernetes API server. By default, NNI manager will use $(HOME)/.kube/config as kubeconfig file's path. You can also specify other kubeconfig files by setting the **KUBECONFIG** environment variable. Refer this [guideline]( https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig) to learn more about kubeconfig. 4. If your NNI trial job needs GPU resource, you should follow this [guideline](https://github.com/NVIDIA/k8s-device-plugin) to configure **Nvidia device plugin for Kubernetes**. 5. Prepare a **NFS server** and export a general purpose mount (we recommend to map your NFS server path in `root_squash option`, otherwise permission issue may raise when nni copy files to NFS. Refer this [page](https://linux.die.net/man/5/exports) to learn what root_squash option is), or **Azure File Storage**. 6. Install **NFS client** on the machine where you install NNI and run nnictl to create experiment. Run this command to install NFSv4 client: diff --git a/src/nni_manager/main.ts b/src/nni_manager/main.ts index 1944cc9956..5ca7336d25 100644 --- a/src/nni_manager/main.ts +++ b/src/nni_manager/main.ts @@ -109,6 +109,7 @@ mkDirP(getLogDir()).then(async () => { log.info(`Rest server listening on: ${restServer.endPoint}`); } catch (err) { log.error(`${err.stack}`); + throw err; } }).catch((err: Error) => { console.error(`Failed to create log dir: ${err.stack}`); diff --git a/src/nni_manager/training_service/kubeflow/kubernetesApiClient.ts b/src/nni_manager/training_service/kubeflow/kubernetesApiClient.ts index 43b308e2df..f6e2c00d6f 100644 --- a/src/nni_manager/training_service/kubeflow/kubernetesApiClient.ts +++ b/src/nni_manager/training_service/kubeflow/kubernetesApiClient.ts @@ -36,7 +36,7 @@ class GeneralK8sClient { protected readonly log: Logger = getLogger(); constructor() { - this.client = new K8SClient({ config: K8SConfig.fromKubeconfig(path.join(os.homedir(), '.kube', 'config')), version: '1.9'}); + this.client = new K8SClient({ config: K8SConfig.fromKubeconfig(), version: '1.9'}); this.client.loadSpec(); } @@ -58,7 +58,7 @@ abstract class KubeflowOperatorClient { protected crdSchema: any; constructor() { - this.client = new K8SClient({ config: K8SConfig.fromKubeconfig(path.join(os.homedir(), '.kube', 'config'))}); + this.client = new K8SClient({ config: K8SConfig.fromKubeconfig() }); this.client.loadSpec(); }