Merge pull request #177 from apache-spark-on-k8s/prep-for-alpha-release

Prep for alpha release
apache-spark-on-k8s · Mar 16, 2017 · f9f5af4 · f9f5af4
2 parents cd0a083 + 35724a3
commit f9f5af4
Show file tree

Hide file tree

Showing 94 changed files with 5,542 additions and 64 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -25,11 +25,22 @@
 sudo: required
 dist: trusty
 
-# 2. Choose language and target JDKs for parallel builds.
+# 2. Choose language, target JDK and env's for parallel builds.
 language: java
 jdk:
-  - oraclejdk7
   - oraclejdk8
+env:  # Used by the install section below.
+  # Configure the unit test build for spark core and kubernetes modules,
+  # while excluding some flaky unit tests using a regex pattern.
+  - PHASE=test  \
+    PROFILES="-Pmesos -Pyarn -Phadoop-2.7 -Pkubernetes"  \
+    MODULES="-pl core,resource-managers/kubernetes/core -am"  \
+    ARGS="-Dtest=none -Dsuffixes='^org\.apache\.spark\.(?!ExternalShuffleServiceSuite|SortShuffleSuite$|rdd\.LocalCheckpointSuite$|deploy\.SparkSubmitSuite$|deploy\.StandaloneDynamicAllocationSuite$).*'"
+  # Configure the full build.
+  - PHASE=install  \
+    PROFILES="-Pmesos -Pyarn -Phadoop-2.7 -Pkubernetes -Pkinesis-asl -Phive -Phive-thriftserver"  \
+    MODULES=""  \
+    ARGS="-T 4 -q -DskipTests"
 
 # 3. Setup cache directory for SBT and Maven.
 cache:
@@ -41,11 +52,12 @@ cache:
 notifications:
   email: false
 
-# 5. Run maven install before running lint-java.
+# 5. Run maven build before running lints.
 install:
   - export MAVEN_SKIP_RC=1
-  - build/mvn -T 4 -q -DskipTests -Pmesos -Pyarn -Phadoop-2.3 -Pkinesis-asl -Phive -Phive-thriftserver install
+  - build/mvn ${PHASE} ${PROFILES} ${MODULES} ${ARGS}
 
-# 6. Run lint-java.
+# 6. Run lints.
 script:
   - dev/lint-java
+  - dev/lint-scala
diff --git a/README.md b/README.md
@@ -1,3 +1,41 @@
+# Apache Spark On Kubernetes
+
+This repository, located at https://github.com/apache-spark-on-k8s/spark, contains a fork of Apache Spark that enables running Spark jobs natively on a Kubernetes cluster.
+
+## What is this?
+
+This is a collaboratively maintained project working on [SPARK-18278](https://issues.apache.org/jira/browse/SPARK-18278). The goal is to bring native support for Spark to use Kubernetes as a cluster manager, in a fully supported way on par with the Spark Standalone, Mesos, and Apache YARN cluster managers.
+
+## Getting Started
+
+- [Usage guide](docs/running-on-kubernetes.md) shows how to run the code
+- [Development docs](resource-managers/kubernetes/README.md) shows how to get set up for development
+- Code is primarily located in the [resource-managers/kubernetes](resource-managers/kubernetes) folder
+
+## Why does this fork exist?
+
+Adding native integration for a new cluster manager is a large undertaking.  If poorly executed, it could introduce bugs into Spark when run on other cluster managers, cause release blockers slowing down the overall Spark project, or require hotfixes which divert attention away from development towards managing additional releases.  Any work this deep inside Spark needs to be done carefully to minimize the risk of those negative externalities.
+
+At the same time, an increasing number of people from various companies and organizations desire to work together to natively run Spark on Kubernetes.  The group needs a code repository, communication forum, issue tracking, and continuous integration, all in order to work together effectively on an open source product.
+
+We've been asked by an Apache Spark Committer to work outside of the Apache infrastructure for a short period of time to allow this feature to be hardened and improved without creating risk for Apache Spark.  The aim is to rapidly bring it to the point where it can be brought into the mainline Apache Spark repository for continued development within the Apache umbrella.  If all goes well, this should be a short-lived fork rather than a long-lived one.
+
+## Who are we?
+
+This is a collaborative effort by several folks from different companies who are interested in seeing this feature be successful.  Companies active in this project include (alphabetically):
+
+- Google
+- Haiwen
+- Hyperpilot
+- Intel
+- Palantir
+- Pepperdata
+- Red Hat
+
+--------------------
+
+(original README below)
+
 # Apache Spark
 
 Spark is a fast and general cluster computing system for Big Data. It provides

diff --git a/assembly/pom.xml b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.0-k8s-0.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -148,6 +148,16 @@
         </dependency>
       </dependencies>
     </profile>
+    <profile>
+      <id>kubernetes</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>spark-kubernetes_${scala.binary.version}</artifactId>
+          <version>${project.version}</version>
+        </dependency>
+      </dependencies>
+    </profile>
     <profile>
       <id>hive</id>
       <dependencies>

diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.0-k8s-0.1.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 

diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.0-k8s-0.1.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 

diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.0-k8s-0.1.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 

diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.0-k8s-0.1.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 

diff --git a/common/tags/pom.xml b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.0-k8s-0.1.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 

diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.0-k8s-0.1.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 

diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
@@ -252,7 +252,7 @@ public static long parseSecondNano(String secondNano) throws IllegalArgumentExce
   public final int months;
   public final long microseconds;
 
-  public final long milliseconds() {
+  public long milliseconds() {
     return this.microseconds / MICROS_PER_MILLI;
   }
 

diff --git a/core/pom.xml b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.0-k8s-0.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
@@ -54,7 +54,28 @@ $(document).ajaxStart(function () {
     $.blockUI({message: '<h3>Loading Executors Page...</h3>'});
 });
 
+function findKubernetesServiceBaseURI() {
+    var k8sProxyPattern = '/api/v1/proxy/namespaces/';
+    var k8sProxyPatternPos = document.baseURI.indexOf(k8sProxyPattern);
+    if (k8sProxyPatternPos > 0) {
+        // Spark is running in a kubernetes cluster, and the web ui is served
+        // through the kubectl proxy.
+        var remaining = document.baseURI.substr(k8sProxyPatternPos + k8sProxyPattern.length);
+        var urlSlashesCount = remaining.split('/').length - 3;
+        var words = document.baseURI.split('/');
+        var baseURI = words.slice(0, words.length - urlSlashesCount).join('/');
+        return baseURI;
+    }
+
+    return null;
+}
+
 function createTemplateURI(appId) {
+    var kubernetesBaseURI = findKubernetesServiceBaseURI();
+    if (kubernetesBaseURI) {
+        return kubernetesBaseURI + '/static/executorspage-template.html';
+    }
+
     var words = document.baseURI.split('/');
     var ind = words.indexOf("proxy");
     if (ind > 0) {
@@ -70,6 +91,14 @@ function createTemplateURI(appId) {
 }
 
 function getStandAloneppId(cb) {
+    var kubernetesBaseURI = findKubernetesServiceBaseURI();
+    if (kubernetesBaseURI) {
+        var appIdAndPort = kubernetesBaseURI.split('/').slice(-1)[0];
+        var appId = appIdAndPort.split(':')[0];
+        cb(appId);
+        return;
+    }
+
     var words = document.baseURI.split('/');
     var ind = words.indexOf("proxy");
     if (ind > 0) {
@@ -95,6 +124,11 @@ function getStandAloneppId(cb) {
 }
 
 function createRESTEndPoint(appId) {
+    var kubernetesBaseURI = findKubernetesServiceBaseURI();
+    if (kubernetesBaseURI) {
+        return kubernetesBaseURI + "/api/v1/applications/" + appId + "/allexecutors";
+    }
+
     var words = document.baseURI.split('/');
     var ind = words.indexOf("proxy");
     if (ind > 0) {

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -70,7 +70,8 @@ object SparkSubmit {
   private val STANDALONE = 2
   private val MESOS = 4
   private val LOCAL = 8
-  private val ALL_CLUSTER_MGRS = YARN | STANDALONE | MESOS | LOCAL
+  private val KUBERNETES = 16
+  private val ALL_CLUSTER_MGRS = YARN | STANDALONE | MESOS | KUBERNETES | LOCAL
 
   // Deploy modes
   private val CLIENT = 1
@@ -239,9 +240,10 @@ object SparkSubmit {
         YARN
       case m if m.startsWith("spark") => STANDALONE
       case m if m.startsWith("mesos") => MESOS
+      case m if m.startsWith("k8s") => KUBERNETES
       case m if m.startsWith("local") => LOCAL
       case _ =>
-        printErrorAndExit("Master must either be yarn or start with spark, mesos, local")
+        printErrorAndExit("Master must either be yarn or start with spark, mesos, k8s, or local")
         -1
     }
 
@@ -284,6 +286,7 @@ object SparkSubmit {
     }
     val isYarnCluster = clusterManager == YARN && deployMode == CLUSTER
     val isMesosCluster = clusterManager == MESOS && deployMode == CLUSTER
+    val isKubernetesCluster = clusterManager == KUBERNETES && deployMode == CLUSTER
 
     // Resolve maven dependencies if there are any and add classpath to jars. Add them to py-files
     // too for packages that include Python code
@@ -330,6 +333,10 @@ object SparkSubmit {
 
     // The following modes are not supported or applicable
     (clusterManager, deployMode) match {
+      case (KUBERNETES, CLIENT) =>
+        printErrorAndExit("Client mode is currently not supported for Kubernetes.")
+      case (KUBERNETES, CLUSTER) if args.isPython || args.isR =>
+        printErrorAndExit("Kubernetes does not currently support python or R applications.")
       case (STANDALONE, CLUSTER) if args.isPython =>
         printErrorAndExit("Cluster deploy mode is currently not supported for python " +
           "applications on standalone clusters.")
@@ -463,17 +470,21 @@ object SparkSubmit {
       OptionAssigner(args.principal, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.principal"),
       OptionAssigner(args.keytab, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.keytab"),
 
-      // Other options
+      OptionAssigner(args.kubernetesNamespace, KUBERNETES, ALL_DEPLOY_MODES,
+        sysProp = "spark.kubernetes.namespace"),
+
+        // Other options
       OptionAssigner(args.executorCores, STANDALONE | YARN, ALL_DEPLOY_MODES,
         sysProp = "spark.executor.cores"),
       OptionAssigner(args.executorMemory, STANDALONE | MESOS | YARN, ALL_DEPLOY_MODES,
         sysProp = "spark.executor.memory"),
       OptionAssigner(args.totalExecutorCores, STANDALONE | MESOS, ALL_DEPLOY_MODES,
         sysProp = "spark.cores.max"),
-      OptionAssigner(args.files, LOCAL | STANDALONE | MESOS, ALL_DEPLOY_MODES,
+      OptionAssigner(args.files, LOCAL | STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES,
         sysProp = "spark.files"),
       OptionAssigner(args.jars, LOCAL, CLIENT, sysProp = "spark.jars"),
-      OptionAssigner(args.jars, STANDALONE | MESOS, ALL_DEPLOY_MODES, sysProp = "spark.jars"),
+      OptionAssigner(args.jars, STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES,
+        sysProp = "spark.jars"),
       OptionAssigner(args.driverMemory, STANDALONE | MESOS | YARN, CLUSTER,
         sysProp = "spark.driver.memory"),
       OptionAssigner(args.driverCores, STANDALONE | MESOS | YARN, CLUSTER,
@@ -506,8 +517,9 @@ object SparkSubmit {
 
     // Add the application jar automatically so the user doesn't have to call sc.addJar
     // For YARN cluster mode, the jar is already distributed on each node as "app.jar"
+    // In Kubernetes cluster mode, the jar will be uploaded by the client separately.
     // For python and R files, the primary resource is already distributed as a regular file
-    if (!isYarnCluster && !args.isPython && !args.isR) {
+    if (!isYarnCluster && !isKubernetesCluster && !args.isPython && !args.isR) {
       var jars = sysProps.get("spark.jars").map(x => x.split(",").toSeq).getOrElse(Seq.empty)
       if (isUserJar(args.primaryResource)) {
         jars = jars ++ Seq(args.primaryResource)
@@ -606,6 +618,13 @@ object SparkSubmit {
       }
     }
 
+    if (isKubernetesCluster) {
+      childMainClass = "org.apache.spark.deploy.kubernetes.Client"
+      childArgs += args.primaryResource
+      childArgs += args.mainClass
+      childArgs ++= args.childArgs
+    }
+
     // Load any properties specified through --conf and the default properties file
     for ((k, v) <- args.sparkProperties) {
       sysProps.getOrElseUpdate(k, v)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -71,6 +71,9 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
   var principal: String = null
   var keytab: String = null
 
+  // Kubernetes only
+  var kubernetesNamespace: String = null
+
   // Standalone cluster mode only
   var supervise: Boolean = false
   var driverCores: String = null
@@ -186,6 +189,9 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       .getOrElse(sparkProperties.get("spark.executor.instances").orNull)
     keytab = Option(keytab).orElse(sparkProperties.get("spark.yarn.keytab")).orNull
     principal = Option(principal).orElse(sparkProperties.get("spark.yarn.principal")).orNull
+    kubernetesNamespace = Option(kubernetesNamespace)
+      .orElse(sparkProperties.get("spark.kubernetes.namespace"))
+      .orNull
 
     // Try to set main class from JAR if no --class argument is given
     if (mainClass == null && !isPython && !isR && primaryResource != null) {
@@ -426,6 +432,9 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       case KEYTAB =>
         keytab = value
 
+      case KUBERNETES_NAMESPACE =>
+        kubernetesNamespace = value
+
       case HELP =>
         printUsageAndExit(0)