mesos · rxin · Sep 17, 2013 · Sep 17, 2013 · Sep 18, 2013 · Sep 18, 2013
diff --git a/core/src/main/scala/org/apache/spark/BlockStoreShuffleFetcher.scala b/core/src/main/scala/org/apache/spark/BlockStoreShuffleFetcher.scala
@@ -28,7 +28,11 @@ import org.apache.spark.util.CompletionIterator
 
 private[spark] class BlockStoreShuffleFetcher extends ShuffleFetcher with Logging {
 
-  override def fetch[T](shuffleId: Int, reduceId: Int, metrics: TaskMetrics, serializer: Serializer)
+  override def fetch[T](
+      shuffleId: Int,
+      reduceId: Int,
+      context: TaskContext,
+      serializer: Serializer)
     : Iterator[T] =
   {
 
@@ -74,7 +78,7 @@ private[spark] class BlockStoreShuffleFetcher extends ShuffleFetcher with Loggin
     val blockFetcherItr = blockManager.getMultiple(blocksByAddress, serializer)
     val itr = blockFetcherItr.flatMap(unpackBlock)
 
-    CompletionIterator[T, Iterator[T]](itr, {
+    val completionIter = CompletionIterator[T, Iterator[T]](itr, {
       val shuffleMetrics = new ShuffleReadMetrics
       shuffleMetrics.shuffleFinishTime = System.currentTimeMillis
       shuffleMetrics.remoteFetchTime = blockFetcherItr.remoteFetchTime
@@ -83,7 +87,9 @@ private[spark] class BlockStoreShuffleFetcher extends ShuffleFetcher with Loggin
       shuffleMetrics.totalBlocksFetched = blockFetcherItr.totalBlocks
       shuffleMetrics.localBlocksFetched = blockFetcherItr.numLocalBlocks
       shuffleMetrics.remoteBlocksFetched = blockFetcherItr.numRemoteBlocks
-      metrics.shuffleReadMetrics = Some(shuffleMetrics)
+      context.taskMetrics.shuffleReadMetrics = Some(shuffleMetrics)
     })
+
+    new InterruptibleIterator[T](context, completionIter)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/CacheManager.scala b/core/src/main/scala/org/apache/spark/CacheManager.scala
@@ -37,7 +37,7 @@ private[spark] class CacheManager(blockManager: BlockManager) extends Logging {
       case Some(cachedValues) =>
         // Partition is in cache, so just return its values
         logInfo("Found partition in cache!")
-        return cachedValues.asInstanceOf[Iterator[T]]
+        return new InterruptibleIterator(context, cachedValues.asInstanceOf[Iterator[T]])
 
       case None =>
         // Mark the split as loading (unless someone else marks it first)
@@ -55,7 +55,7 @@ private[spark] class CacheManager(blockManager: BlockManager) extends Logging {
             // downside of the current code is that threads wait serially if this does happen.
             blockManager.get(key) match {
               case Some(values) =>
-                return values.asInstanceOf[Iterator[T]]
+                return new InterruptibleIterator(context, values.asInstanceOf[Iterator[T]])
               case None =>
                 logInfo("Whoever was loading " + key + " failed; we'll try it ourselves")
                 loading.add(key)

diff --git a/core/src/main/scala/org/apache/spark/FutureJob.scala b/core/src/main/scala/org/apache/spark/FutureJob.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import java.util.concurrent.{ExecutionException, TimeUnit, Future}
+
+import org.apache.spark.scheduler.{JobFailed, JobSucceeded, JobWaiter}
+
+class FutureJob[T] private[spark](jobWaiter: JobWaiter[_], resultFunc: () => T)
+  extends Future[T] {
+
+  override def isDone: Boolean = jobWaiter.jobFinished
+
+  override def cancel(mayInterruptIfRunning: Boolean): Boolean = {
+    jobWaiter.kill()
+    true
+  }
+
+  override def isCancelled: Boolean = {
+    throw new UnsupportedOperationException
+  }
+
+  override def get(): T = {
+    jobWaiter.awaitResult() match {
+      case JobSucceeded =>
+        resultFunc()
+      case JobFailed(e: Exception, _) =>
+        throw new ExecutionException(e)
+    }
+  }
+
+  override def get(timeout: Long, unit: TimeUnit): T = {
+    throw new UnsupportedOperationException
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/InterruptibleIterator.scala b/core/src/main/scala/org/apache/spark/InterruptibleIterator.scala
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+/**
+ * An iterator that wraps around an existing iterator to provide task killing functionality.
+ * It works by checking the interrupted flag in TaskContext.
+ */
+class InterruptibleIterator[+T](val context: TaskContext, val delegate: Iterator[T])
+  extends Iterator[T] {
+
+  def hasNext: Boolean = !context.interrupted && delegate.hasNext
+
+  def next(): T = delegate.next()
+}
diff --git a/core/src/main/scala/org/apache/spark/ShuffleFetcher.scala b/core/src/main/scala/org/apache/spark/ShuffleFetcher.scala
@@ -27,7 +27,10 @@ private[spark] abstract class ShuffleFetcher {
    * Fetch the shuffle outputs for a given ShuffleDependency.
    * @return An iterator over the elements of the fetched shuffle outputs.
    */
-  def fetch[T](shuffleId: Int, reduceId: Int, metrics: TaskMetrics,
+  def fetch[T](
+      shuffleId: Int,
+      reduceId: Int,
+      context: TaskContext,
       serializer: Serializer = SparkEnv.get.serializerManager.default): Iterator[T]
 
   /** Stop the fetcher */

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -20,6 +20,7 @@ package org.apache.spark
 import java.io._
 import java.net.URI
 import java.util.Properties
+import java.util.concurrent.Future
 import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.Map
@@ -812,6 +813,38 @@ class SparkContext(
     result
   }
 
+  def submitJob[T, U, R](
+      rdd: RDD[T],
+      processPartition: Iterator[T] => U,
+      partitions: Seq[Int],
+      partitionResultHandler: (Int, U) => Unit,
+      resultFunc: () => R): Future[R] =
+  {
+    val callSite = Utils.formatSparkCallSite
+    val waiter = dagScheduler.submitJob(
+      rdd,
+      (context: TaskContext, iter: Iterator[T]) => processPartition(iter),
+      partitions,
+      callSite,
+      allowLocal = false,
+      partitionResultHandler,
+      null)
+    new FutureJob(waiter, resultFunc)
+  }
+
+  /**
+   * Kill a running job.
+   */
+  def killJob(jobId: Int) {
+    dagScheduler.killJob(jobId)
+  }
+
+  def killAllJobs() {
+    dagScheduler.activeJobs.foreach { job =>
+      killJob(job.jobId)
+    }
+  }
+
   /**
    * Clean a closure to make it ready to serialized and send to tasks
    * (removes unreferenced variables in $outer's, updates REPL variables)

diff --git a/core/src/main/scala/org/apache/spark/TaskContext.scala b/core/src/main/scala/org/apache/spark/TaskContext.scala
@@ -25,6 +25,7 @@ class TaskContext(
   val splitId: Int,
   val attemptId: Long,
   val runningLocally: Boolean = false,
+  @volatile var interrupted: Boolean = false,
   val taskMetrics: TaskMetrics = TaskMetrics.empty()
 ) extends Serializable {
 

diff --git a/core/src/main/scala/org/apache/spark/TaskEndReason.scala b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
@@ -48,4 +48,6 @@ private[spark] case class ExceptionFailure(
 
 private[spark] case class OtherFailure(message: String) extends TaskEndReason
 
-private[spark] case class TaskResultTooBigFailure() extends TaskEndReason
+private[spark] case object TaskResultTooBigFailure extends TaskEndReason
+
+private[spark] case object TaskKilled extends TaskEndReason