From 42b72c5ec0292dd46682780201ba5988f5fed2e4 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Fri, 10 Feb 2017 14:50:38 -0800 Subject: [PATCH] Bumping up kubernetes-client version to fix GKE and local proxy (#105) * Bumping up kubernetes-client version to add fixes * Modify wording * Addressed comments --- docs/running-on-kubernetes.md | 30 ++++++++++++++++++- resource-managers/kubernetes/core/pom.xml | 2 +- .../KubernetesClusterSchedulerBackend.scala | 2 +- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 5a48bb254a6df..19f406039e261 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -51,7 +51,7 @@ connect without SSL on a different port, the master would be set to `k8s://http: Note that applications can currently only be executed in cluster mode, where the driver and its executors are running on the cluster. - + ### Adding Other JARs Spark allows users to provide dependencies that are bundled into the driver's Docker image, or that are on the local @@ -150,6 +150,34 @@ or `container:`. A scheme of `file:` corresponds to the keyStore being located o the driver container as a [secret volume](https://kubernetes.io/docs/user-guide/secrets/). When the URI has the scheme `container:`, the file is assumed to already be on the container's disk at the appropriate path. +### Kubernetes Clusters and the authenticated proxy endpoint + +Spark-submit also supports submission through the +[local kubectl proxy](https://kubernetes.io/docs/user-guide/connecting-to-applications-proxy/). One can use the +authenticating proxy to communicate with the api server directly without passing credentials to spark-submit. + +The local proxy can be started by running: + + kubectl proxy + +If our local proxy were listening on port 8001, we would have our submission looking like the following: + + bin/spark-submit \ + --deploy-mode cluster \ + --class org.apache.spark.examples.SparkPi \ + --master k8s://http://127.0.0.1:8001 \ + --kubernetes-namespace default \ + --conf spark.executor.instances=5 \ + --conf spark.app.name=spark-pi \ + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest \ + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ + examples/jars/spark_examples_2.11-2.2.0.jar + +Communication between Spark and Kubernetes clusters is performed using the fabric8 kubernetes-client library. +The above mechanism using `kubectl proxy` can be used when we have authentication providers that the fabric8 +kubernetes-client library does not support. Authentication using X509 Client Certs and oauth tokens +is currently supported. + ### Spark Properties Below are some other common properties that are specific to Kubernetes. Most of the other configurations are the same diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 86d7dec2c076f..a7eba625cd56c 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -29,7 +29,7 @@ Spark Project Kubernetes kubernetes - 1.4.34 + 2.0.3 diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 550ddd113fa42..83225098bc651 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -44,7 +44,7 @@ private[spark] class KubernetesClusterSchedulerBackend( private val EXECUTOR_MODIFICATION_LOCK = new Object private val runningExecutorPods = new scala.collection.mutable.HashMap[String, Pod] - private val kubernetesMaster = Client.resolveK8sMaster(sc.master) + private val kubernetesMaster = "https://kubernetes" private val executorDockerImage = conf.get(EXECUTOR_DOCKER_IMAGE) private val kubernetesNamespace = conf.get(KUBERNETES_NAMESPACE) private val executorPort = conf.getInt("spark.executor.port", DEFAULT_STATIC_PORT)