Merge branch 'master' of github.com:apache/spark into v1WriteFallback

apache · Aug 20, 2019 · 9bfb76e · 9bfb76e
2 parents 83dbd78 + 39c1127
commit 9bfb76e
Show file tree

Hide file tree

Showing 353 changed files with 10,886 additions and 1,185 deletions.
diff --git a/.github/PULL_REQUEST_TEMPLATE b/.github/PULL_REQUEST_TEMPLATE
@@ -1,10 +1,42 @@
-## What changes were proposed in this pull request?
+<!--
+Thanks for sending a pull request!  Here are some tips for you:
+  1. If this is your first time, please read our contributor guidelines: https://spark.apache.org/contributing.html
+  2. Ensure you have added or run the appropriate tests for your PR: https://spark.apache.org/developer-tools.html
+  3. If the PR is unfinished, add '[WIP]' in your PR title, e.g., '[WIP][SPARK-XXXX] Your PR title ...'.
+  4. Be sure to keep the PR description updated to reflect all changes.
+  5. Please write your PR title to summarize what this PR proposes.
+  6. If possible, provide a concise example to reproduce the issue for a faster review.
+-->
 
-(Please fill in changes proposed in this fix)
+### What changes were proposed in this pull request?
+<!--
+Please clarify what changes you are proposing. The purpose of this section is to outline the changes and how this PR fixes the issue. 
+If possible, please consider writing useful notes for better and faster reviews in your PR. See the examples below.
+  1. If you refactor some codes with changing classes, showing the class hierarchy will help reviewers.
+  2. If you fix some SQL features, you can provide some references of other DBMSes.
+  3. If there is design documentation, please add the link.
+  4. If there is a discussion in the mailing list, please add the link.
+-->
 
-## How was this patch tested?
 
-(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
-(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)
+### Why are the changes needed?
+<!--
+Please clarify why the changes are needed. For instance,
+  1. If you propose a new API, clarify the use case for a new API.
+  2. If you fix a bug, you can clarify why it is a bug.
+-->
 
-Please review https://spark.apache.org/contributing.html before opening a pull request.
+
+### Does this PR introduce any user-facing change?
+<!--
+If yes, please clarify the previous behavior and the change this PR proposes - provide the console output, description and/or an example to show the behavior difference if possible.
+If no, write 'No'.
+-->
+
+
+### How was this patch tested?
+<!--
+If tests were added, say they were added here. Please make sure to add some test cases that check the changes thoroughly including negative and positive cases if possible.
+If it was tested in a way different from regular unit tests, please clarify how you tested step by step, ideally copy and paste-able, so that other reviewers can test and check, and descendants can verify in the future.
+If tests were not added, please describe why they were not added and/or why it was difficult to add.
+-->
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
@@ -22,5 +22,6 @@ jobs:
         version: ${{ matrix.java }}
     - name: Build with Maven
       run: |
-        export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m"
-        ./build/mvn -DskipTests package
+        export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
+        export MAVEN_CLI_OPTS="--no-transfer-progress"
+        ./build/mvn $MAVEN_CLI_OPTS -DskipTests package
diff --git a/LICENSE-binary b/LICENSE-binary
@@ -464,10 +464,8 @@ javax.xml.stream:stax-api    https://jcp.org/en/jsr/detail?id=173
 Common Development and Distribution License (CDDL) 1.1
 ------------------------------------------------------
 
-javax.annotation:javax.annotation-api    https://jcp.org/en/jsr/detail?id=250
 javax.servlet:javax.servlet-api   https://javaee.github.io/servlet-spec/
 javax.transaction:jta http://www.oracle.com/technetwork/java/index.html
-javax.ws.rs:javax.ws.rs-api https://github.com/jax-rs
 javax.xml.bind:jaxb-api    https://github.com/javaee/jaxb-v2
 org.glassfish.hk2:hk2-api https://github.com/javaee/glassfish
 org.glassfish.hk2:hk2-locator (same)
@@ -492,6 +490,12 @@ jakarta.xml.bind:jakarta.xml.bind-api
 com.sun.istack:istack-commons-runtime
 
 
+Eclipse Public License (EPL) 2.0
+--------------------------------
+
+jakarta.annotation:jakarta-annotation-api https://projects.eclipse.org/projects/ee4j.ca
+jakarta.ws.rs:jakarta.ws.rs-api https://github.com/eclipse-ee4j/jaxrs-api
+
 Mozilla Public License (MPL) 1.1
 --------------------------------
 

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
@@ -13,7 +13,7 @@ Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
 License: Apache License (== 2.0)
 URL: https://www.apache.org/ https://spark.apache.org/
 BugReports: https://spark.apache.org/contributing.html
-SystemRequirements: Java (== 8)
+SystemRequirements: Java (>= 8, < 12)
 Depends:
     R (>= 3.1),
     methods

diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R
@@ -64,7 +64,9 @@ checkJavaVersion <- function() {
   javaBin <- "java"
   javaHome <- Sys.getenv("JAVA_HOME")
   javaReqs <- utils::packageDescription(utils::packageName(), fields = c("SystemRequirements"))
-  sparkJavaVersion <- as.numeric(tail(strsplit(javaReqs, "[(=)]")[[1]], n = 1L))
+  sparkJavaVersions <- strsplit(javaReqs, "[(,)]")[[1]]
+  minJavaVersion <- as.numeric(strsplit(sparkJavaVersions[[2]], ">= ")[[1]][[2]])
+  maxJavaVersion <- as.numeric(strsplit(sparkJavaVersions[[3]], "< ")[[1]][[2]])
   if (javaHome != "") {
     javaBin <- file.path(javaHome, "bin", javaBin)
   }
@@ -91,12 +93,19 @@ checkJavaVersion <- function() {
       }, javaVersionOut)
 
   javaVersionStr <- strsplit(javaVersionFilter[[1]], "[\"]")[[1L]][2]
-  # javaVersionStr is of the form 1.8.0_92.
-  # Extract 8 from it to compare to sparkJavaVersion
-  javaVersionNum <- as.integer(strsplit(javaVersionStr, "[.]")[[1L]][2])
-  if (javaVersionNum != sparkJavaVersion) {
-    stop(paste("Java version", sparkJavaVersion, "is required for this package; found version:",
-               javaVersionStr))
+  # javaVersionStr is of the form 1.8.0_92/9.0.x/11.0.x.
+  # We are using 8, 9, 10, 11 for sparkJavaVersion.
+  versions <- strsplit(javaVersionStr, "[.]")[[1L]]
+  if ("1" == versions[1]) {
+    javaVersionNum <- as.integer(versions[2])
+  } else {
+    javaVersionNum <- as.integer(versions[1])
+  }
+  if (javaVersionNum < minJavaVersion || javaVersionNum >= maxJavaVersion) {
+    stop(paste0("Java version, greater than or equal to ", minJavaVersion,
+                " and less than ", maxJavaVersion,
+                ", is required for this package; found version: ",
+                javaVersionStr))
   }
   return(javaVersionNum)
 }

diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
@@ -325,7 +325,8 @@ setCheckpointDirSC <- function(sc, dirName) {
 #'
 #' A directory can be given if the recursive option is set to true.
 #' Currently directories are only supported for Hadoop-supported filesystems.
-#' Refer Hadoop-supported filesystems at \url{https://wiki.apache.org/hadoop/HCFS}.
+#' Refer Hadoop-supported filesystems at
+#' \url{https://cwiki.apache.org/confluence/display/HADOOP2/HCFS}.
 #'
 #' Note: A path can be added only once. Subsequent additions of the same path are ignored.
 #'

diff --git a/R/pkg/tests/fulltests/test_mllib_classification.R b/R/pkg/tests/fulltests/test_mllib_classification.R
@@ -308,7 +308,7 @@ test_that("spark.mlp", {
   expect_equal(summary$layers, c(4, 5, 4, 3))
   expect_equal(length(summary$weights), 64)
   expect_equal(head(summary$weights, 5), list(-24.28415, 107.8701, 16.86376, 1.103736, 9.244488),
-               tolerance = 1e-6)
+               tolerance = 1e-1)
 
   # Test predict method
   mlpTestDF <- df

diff --git a/core/pom.xml b/core/pom.xml
@@ -260,6 +260,15 @@
       <groupId>org.glassfish.jersey.containers</groupId>
       <artifactId>jersey-container-servlet-core</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.glassfish.jersey.inject</groupId>
+      <artifactId>jersey-hk2</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.glassfish.jersey.test-framework.providers</groupId>
+      <artifactId>jersey-test-framework-provider-simple</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>io.netty</groupId>
       <artifactId>netty-all</artifactId>

diff --git a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
@@ -19,6 +19,7 @@ package org.apache.spark
 
 import java.util.{Properties, Timer, TimerTask}
 
+import scala.concurrent.TimeoutException
 import scala.concurrent.duration._
 
 import org.apache.spark.annotation.{Experimental, Since}
@@ -117,12 +118,30 @@ class BarrierTaskContext private[spark] (
     timer.schedule(timerTask, 60000, 60000)
 
     try {
-      barrierCoordinator.askSync[Unit](
+      val abortableRpcFuture = barrierCoordinator.askAbortable[Unit](
         message = RequestToSync(numTasks, stageId, stageAttemptNumber, taskAttemptId,
           barrierEpoch),
         // Set a fixed timeout for RPC here, so users shall get a SparkException thrown by
         // BarrierCoordinator on timeout, instead of RPCTimeoutException from the RPC framework.
         timeout = new RpcTimeout(365.days, "barrierTimeout"))
+
+      // Wait the RPC future to be completed, but every 1 second it will jump out waiting
+      // and check whether current spark task is killed. If killed, then throw
+      // a `TaskKilledException`, otherwise continue wait RPC until it completes.
+      while(!abortableRpcFuture.toFuture.isCompleted) {
+        if (taskContext.isInterrupted()) {
+          val reason = taskContext.getKillReason().get
+          abortableRpcFuture.abort(reason)
+          throw new TaskKilledException(reason)
+        }
+        // wait RPC future for at most 1 second
+        try {
+          ThreadUtils.awaitResult(abortableRpcFuture.toFuture, 1.second)
+        } catch {
+          case _: TimeoutException => Unit // await future time reach 1 second.
+        }
+      }
+
       barrierEpoch += 1
       logInfo(s"Task $taskAttemptId from Stage $stageId(Attempt $stageAttemptNumber) finished " +
         "global sync successfully, waited for " +

diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonHadoopUtil.scala b/core/src/main/scala/org/apache/spark/api/python/PythonHadoopUtil.scala
@@ -156,7 +156,7 @@ private[python] object PythonHadoopUtil {
    * Convert a [[java.util.Map]] of properties to a [[org.apache.hadoop.conf.Configuration]]
    */
   def mapToConf(map: java.util.Map[String, String]): Configuration = {
-    val conf = new Configuration()
+    val conf = new Configuration(false)
     map.asScala.foreach { case (k, v) => conf.set(k, v) }
     conf
   }

diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -335,7 +335,7 @@ private[spark] object PythonRDD extends Logging {
       valueConverterClass: String,
       confAsMap: java.util.HashMap[String, String],
       batchSize: Int): JavaRDD[Array[Byte]] = {
-    val conf = PythonHadoopUtil.mapToConf(confAsMap)
+    val conf = getMergedConf(confAsMap, sc.hadoopConfiguration())
     val rdd =
       newAPIHadoopRDDFromClassNames[K, V, F](sc,
         None, inputFormatClass, keyClass, valueClass, conf)
@@ -404,7 +404,7 @@ private[spark] object PythonRDD extends Logging {
       valueConverterClass: String,
       confAsMap: java.util.HashMap[String, String],
       batchSize: Int): JavaRDD[Array[Byte]] = {
-    val conf = PythonHadoopUtil.mapToConf(confAsMap)
+    val conf = getMergedConf(confAsMap, sc.hadoopConfiguration())
     val rdd =
       hadoopRDDFromClassNames[K, V, F](sc,
         None, inputFormatClass, keyClass, valueClass, conf)
@@ -620,7 +620,7 @@ private[spark] object PythonRDD extends Logging {
       keyConverterClass: String,
       valueConverterClass: String,
       useNewAPI: Boolean): Unit = {
-    val conf = PythonHadoopUtil.mapToConf(confAsMap)
+    val conf = getMergedConf(confAsMap, pyRDD.context.hadoopConfiguration)
     val converted = convertRDD(SerDeUtil.pythonToPairRDD(pyRDD, batchSerialized),
       keyConverterClass, valueConverterClass, new JavaToWritableConverter)
     if (useNewAPI) {

diff --git a/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
@@ -42,7 +42,7 @@ import org.apache.spark.util.Utils
  * 3. When all necessary tasks completed successfully, the driver calls commitJob. If the job
  *    failed to execute (e.g. too many failed tasks), the job should call abortJob.
  */
-abstract class FileCommitProtocol {
+abstract class FileCommitProtocol extends Logging {
   import FileCommitProtocol._
 
   /**
@@ -129,7 +129,9 @@ abstract class FileCommitProtocol {
    * before the job has finished. These same task commit messages will be passed to commitJob()
    * if the entire job succeeds.
    */
-  def onTaskCommit(taskCommit: TaskCommitMessage): Unit = {}
+  def onTaskCommit(taskCommit: TaskCommitMessage): Unit = {
+    logDebug(s"onTaskCommit($taskCommit)")
+  }
 }
 
 

diff --git a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.internal.io
 
+import java.io.IOException
 import java.util.{Date, UUID}
 
 import scala.collection.mutable
@@ -136,7 +137,7 @@ class HadoopMapReduceCommitProtocol(
     tmpOutputPath
   }
 
-  private def getFilename(taskContext: TaskAttemptContext, ext: String): String = {
+  protected def getFilename(taskContext: TaskAttemptContext, ext: String): String = {
     // The file name looks like part-00000-2dd664f9-d2c4-4ffe-878f-c6c70c1fb0cb_00003-c000.parquet
     // Note that %05d does not truncate the split number, so if we have more than 100000 tasks,
     // the file name is fine and won't overflow.
@@ -205,11 +206,28 @@ class HadoopMapReduceCommitProtocol(
     }
   }
 
+  /**
+   * Abort the job; log and ignore any IO exception thrown.
+   * This is invariably invoked in an exception handler; raising
+   * an exception here will lose the root cause of the failure.
+   *
+   * @param jobContext job context
+   */
   override def abortJob(jobContext: JobContext): Unit = {
-    committer.abortJob(jobContext, JobStatus.State.FAILED)
-    if (hasValidPath) {
-      val fs = stagingDir.getFileSystem(jobContext.getConfiguration)
-      fs.delete(stagingDir, true)
+    try {
+      committer.abortJob(jobContext, JobStatus.State.FAILED)
+    } catch {
+      case e: IOException =>
+        logWarning(s"Exception while aborting ${jobContext.getJobID}", e)
+    }
+    try {
+      if (hasValidPath) {
+        val fs = stagingDir.getFileSystem(jobContext.getConfiguration)
+        fs.delete(stagingDir, true)
+      }
+    } catch {
+      case e: IOException =>
+        logWarning(s"Exception while aborting ${jobContext.getJobID}", e)
     }
   }
 
@@ -222,17 +240,35 @@ class HadoopMapReduceCommitProtocol(
 
   override def commitTask(taskContext: TaskAttemptContext): TaskCommitMessage = {
     val attemptId = taskContext.getTaskAttemptID
+    logTrace(s"Commit task ${attemptId}")
     SparkHadoopMapRedUtil.commitTask(
       committer, taskContext, attemptId.getJobID.getId, attemptId.getTaskID.getId)
     new TaskCommitMessage(addedAbsPathFiles.toMap -> partitionPaths.toSet)
   }
 
+  /**
+   * Abort the task; log and ignore any failure thrown.
+   * This is invariably invoked in an exception handler; raising
+   * an exception here will lose the root cause of the failure.
+   *
+   * @param taskContext context
+   */
   override def abortTask(taskContext: TaskAttemptContext): Unit = {
-    committer.abortTask(taskContext)
+    try {
+      committer.abortTask(taskContext)
+    } catch {
+      case e: IOException =>
+        logWarning(s"Exception while aborting ${taskContext.getTaskAttemptID}", e)
+    }
     // best effort cleanup of other staged files
-    for ((src, _) <- addedAbsPathFiles) {
-      val tmp = new Path(src)
-      tmp.getFileSystem(taskContext.getConfiguration).delete(tmp, false)
+    try {
+      for ((src, _) <- addedAbsPathFiles) {
+        val tmp = new Path(src)
+        tmp.getFileSystem(taskContext.getConfiguration).delete(tmp, false)
+      }
+    } catch {
+      case e: IOException =>
+        logWarning(s"Exception while aborting ${taskContext.getTaskAttemptID}", e)
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
@@ -117,8 +117,8 @@ private[spark] class NettyBlockTransferService(
     try {
       val blockFetchStarter = new RetryingBlockFetcher.BlockFetchStarter {
         override def createAndStart(blockIds: Array[String], listener: BlockFetchingListener) {
-          val client = clientFactory.createClient(host, port)
           try {
+            val client = clientFactory.createClient(host, port)
             new OneForOneBlockFetcher(client, appId, execId, blockIds, listener,
               transportConf, tempFileManager).start()
           } catch {

diff --git a/core/src/main/scala/org/apache/spark/rpc/RpcEndpointRef.scala b/core/src/main/scala/org/apache/spark/rpc/RpcEndpointRef.scala
@@ -46,6 +46,17 @@ private[spark] abstract class RpcEndpointRef(conf: SparkConf)
    */
   def send(message: Any): Unit
 
+  /**
+   * Send a message to the corresponding [[RpcEndpoint.receiveAndReply)]] and return a
+   * [[AbortableRpcFuture]] to receive the reply within the specified timeout.
+   * The [[AbortableRpcFuture]] instance wraps [[Future]] with additional `abort` method.
+   *
+   * This method only sends the message once and never retries.
+   */
+  def askAbortable[T: ClassTag](message: Any, timeout: RpcTimeout): AbortableRpcFuture[T] = {
+    throw new UnsupportedOperationException()
+  }
+
   /**
    * Send a message to the corresponding [[RpcEndpoint.receiveAndReply)]] and return a [[Future]] to
    * receive the reply within the specified timeout.
@@ -93,3 +104,21 @@ private[spark] abstract class RpcEndpointRef(conf: SparkConf)
   }
 
 }
+
+/**
+ * An exception thrown if the RPC is aborted.
+ */
+class RpcAbortException(message: String) extends Exception(message)
+
+/**
+ * A wrapper for [[Future]] but add abort method.
+ * This is used in long run RPC and provide an approach to abort the RPC.
+ */
+private[spark] class AbortableRpcFuture[T: ClassTag](
+    future: Future[T],
+    onAbort: String => Unit) {
+
+  def abort(reason: String): Unit = onAbort(reason)
+
+  def toFuture: Future[T] = future
+}