Merge branch 'master' of https://github.com/apache/spark into SPARK-1609

apache · Apr 25, 2014 · bcf36cb · bcf36cb
2 parents 1185605 + 80429f3
commit bcf36cb
Show file tree

Hide file tree

Showing 64 changed files with 651 additions and 192 deletions.
diff --git a/core/src/main/scala/org/apache/spark/Accumulators.scala b/core/src/main/scala/org/apache/spark/Accumulators.scala
@@ -104,8 +104,11 @@ class Accumulable[R, T] (
    * Set the accumulator's value; only allowed on master.
    */
   def value_= (newValue: R) {
-    if (!deserialized) value_ = newValue
-    else throw new UnsupportedOperationException("Can't assign accumulator value in task")
+    if (!deserialized) {
+      value_ = newValue
+    } else {
+      throw new UnsupportedOperationException("Can't assign accumulator value in task")
+    }
   }
 
   /**

diff --git a/core/src/main/scala/org/apache/spark/SecurityManager.scala b/core/src/main/scala/org/apache/spark/SecurityManager.scala
@@ -19,8 +19,6 @@ package org.apache.spark
 
 import java.net.{Authenticator, PasswordAuthentication}
 
-import scala.collection.mutable.ArrayBuffer
-
 import org.apache.hadoop.io.Text
 
 import org.apache.spark.deploy.SparkHadoopUtil
@@ -139,13 +137,13 @@ private[spark] class SecurityManager(sparkConf: SparkConf) extends Logging {
   private val sparkSecretLookupKey = "sparkCookie"
 
   private val authOn = sparkConf.getBoolean("spark.authenticate", false)
-  private val uiAclsOn = sparkConf.getBoolean("spark.ui.acls.enable", false)
+  private var uiAclsOn = sparkConf.getBoolean("spark.ui.acls.enable", false)
 
+  private var viewAcls: Set[String] = _
   // always add the current user and SPARK_USER to the viewAcls
-  private val aclUsers = ArrayBuffer[String](System.getProperty("user.name", ""),
+  private val defaultAclUsers = Seq[String](System.getProperty("user.name", ""),
     Option(System.getenv("SPARK_USER")).getOrElse(""))
-  aclUsers ++= sparkConf.get("spark.ui.view.acls", "").split(',')
-  private val viewAcls = aclUsers.map(_.trim()).filter(!_.isEmpty).toSet
+  setViewAcls(defaultAclUsers, sparkConf.get("spark.ui.view.acls", ""))
 
   private val secretKey = generateSecretKey()
   logInfo("SecurityManager, is authentication enabled: " + authOn +
@@ -170,6 +168,20 @@ private[spark] class SecurityManager(sparkConf: SparkConf) extends Logging {
     )
   }
 
+  private[spark] def setViewAcls(defaultUsers: Seq[String], allowedUsers: String) {
+    viewAcls = (defaultUsers ++ allowedUsers.split(',')).map(_.trim()).filter(!_.isEmpty).toSet 
+    logInfo("Changing view acls to: " + viewAcls.mkString(","))
+  }
+
+  private[spark] def setViewAcls(defaultUser: String, allowedUsers: String) {
+    setViewAcls(Seq[String](defaultUser), allowedUsers)
+  }
+
+  private[spark] def setUIAcls(aclSetting: Boolean) { 
+    uiAclsOn = aclSetting 
+    logInfo("Changing acls enabled to: " + uiAclsOn)
+  }
+
   /**
    * Generates or looks up the secret key.
    *
@@ -222,6 +234,8 @@ private[spark] class SecurityManager(sparkConf: SparkConf) extends Logging {
    * @return true is the user has permission, otherwise false
    */
   def checkUIViewPermissions(user: String): Boolean = {
+    logDebug("user=" + user + " uiAclsEnabled=" + uiAclsEnabled() + " viewAcls=" + 
+      viewAcls.mkString(","))
     if (uiAclsEnabled() && (user != null) && (!viewAcls.contains(user))) false else true
   }
 

diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
@@ -30,6 +30,7 @@ import org.apache.spark.partial.{BoundedDouble, PartialResult}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.StatCounter
+import org.apache.spark.util.Utils
 
 class JavaDoubleRDD(val srdd: RDD[scala.Double]) extends JavaRDDLike[JDouble, JavaDoubleRDD] {
 
@@ -133,7 +134,13 @@ class JavaDoubleRDD(val srdd: RDD[scala.Double]) extends JavaRDDLike[JDouble, Ja
   /**
    * Return a sampled subset of this RDD.
    */
-  def sample(withReplacement: Boolean, fraction: JDouble, seed: Int): JavaDoubleRDD =
+  def sample(withReplacement: Boolean, fraction: JDouble): JavaDoubleRDD =
+    sample(withReplacement, fraction, Utils.random.nextLong)
+
+  /**
+   * Return a sampled subset of this RDD.
+   */
+  def sample(withReplacement: Boolean, fraction: JDouble, seed: Long): JavaDoubleRDD =
     fromRDD(srdd.sample(withReplacement, fraction, seed))
 
   /**

diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -39,6 +39,7 @@ import org.apache.spark.api.java.function.{Function => JFunction, Function2 => J
 import org.apache.spark.partial.{BoundedDouble, PartialResult}
 import org.apache.spark.rdd.{OrderedRDDFunctions, RDD}
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.Utils
 
 class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
                        (implicit val kClassTag: ClassTag[K], implicit val vClassTag: ClassTag[V])
@@ -119,7 +120,13 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
   /**
    * Return a sampled subset of this RDD.
    */
-  def sample(withReplacement: Boolean, fraction: Double, seed: Int): JavaPairRDD[K, V] =
+  def sample(withReplacement: Boolean, fraction: Double): JavaPairRDD[K, V] =
+    sample(withReplacement, fraction, Utils.random.nextLong)
+
+  /**
+   * Return a sampled subset of this RDD.
+   */
+  def sample(withReplacement: Boolean, fraction: Double, seed: Long): JavaPairRDD[K, V] =
     new JavaPairRDD[K, V](rdd.sample(withReplacement, fraction, seed))
 
   /**

diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
@@ -24,6 +24,7 @@ import org.apache.spark._
 import org.apache.spark.api.java.function.{Function => JFunction}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.Utils
 
 class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
   extends JavaRDDLike[T, JavaRDD[T]] {
@@ -98,7 +99,13 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
   /**
    * Return a sampled subset of this RDD.
    */
-  def sample(withReplacement: Boolean, fraction: Double, seed: Int): JavaRDD[T] =
+  def sample(withReplacement: Boolean, fraction: Double): JavaRDD[T] =
+    sample(withReplacement, fraction, Utils.random.nextLong)
+
+  /**
+   * Return a sampled subset of this RDD.
+   */
+  def sample(withReplacement: Boolean, fraction: Double, seed: Long): JavaRDD[T] =
     wrapRDD(rdd.sample(withReplacement, fraction, seed))
 
   /**

diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
@@ -34,6 +34,7 @@ import org.apache.spark.api.java.function.{Function => JFunction, Function2 => J
 import org.apache.spark.partial.{BoundedDouble, PartialResult}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.Utils
 
 trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
   def wrapRDD(rdd: RDD[T]): This
@@ -394,7 +395,10 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
     new java.util.ArrayList(arr)
   }
 
-  def takeSample(withReplacement: Boolean, num: Int, seed: Int): JList[T] = {
+  def takeSample(withReplacement: Boolean, num: Int): JList[T] = 
+    takeSample(withReplacement, num, Utils.random.nextLong)
+
+  def takeSample(withReplacement: Boolean, num: Int, seed: Long): JList[T] = {
     import scala.collection.JavaConversions._
     val arr: java.util.Collection[T] = rdd.takeSample(withReplacement, num, seed).toSeq
     new java.util.ArrayList(arr)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -75,6 +75,10 @@ class SparkHadoopUtil {
 
   def getSecretKeyFromUserCredentials(key: String): Array[Byte] = { null }
 
+  def loginUserFromKeytab(principalName: String, keytabFilename: String) { 
+    UserGroupInformation.loginUserFromKeytab(principalName, keytabFilename)
+  }
+
 }
 
 object SparkHadoopUtil {

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -66,8 +66,7 @@ private[spark] class SparkSubmitArguments(args: Array[String]) {
         if (k.startsWith("spark")) {
           defaultProperties(k) = v
           if (verbose) SparkSubmit.printStream.println(s"Adding default property: $k=$v")
-        }
-        else {
+        } else {
           SparkSubmit.printWarning(s"Ignoring non-spark config property: $k=$v")
         }
       }

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -22,6 +22,7 @@ import scala.collection.mutable
 import org.apache.hadoop.fs.{FileStatus, Path}
 
 import org.apache.spark.{Logging, SecurityManager, SparkConf}
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.scheduler._
 import org.apache.spark.ui.{WebUI, SparkUI}
 import org.apache.spark.ui.JettyUtils._
@@ -167,17 +168,21 @@ class HistoryServer(
    * directory. If this file exists, the associated application is regarded to be completed, in
    * which case the server proceeds to render the SparkUI. Otherwise, the server does nothing.
    */
-  private def renderSparkUI(logDir: FileStatus, logInfo: EventLoggingInfo) {
+  private def renderSparkUI(logDir: FileStatus, elogInfo: EventLoggingInfo) {
     val path = logDir.getPath
     val appId = path.getName
-    val replayBus = new ReplayListenerBus(logInfo.logPaths, fileSystem, logInfo.compressionCodec)
+    val replayBus = new ReplayListenerBus(elogInfo.logPaths, fileSystem, elogInfo.compressionCodec)
     val appListener = new ApplicationEventListener
     replayBus.addListener(appListener)
-    val ui = new SparkUI(conf, replayBus, appId, "/history/" + appId)
+    val appConf = conf.clone()
+    val appSecManager = new SecurityManager(appConf)
+    val ui = new SparkUI(conf, appSecManager, replayBus, appId, "/history/" + appId)
 
     // Do not call ui.bind() to avoid creating a new server for each application
     replayBus.replay()
     if (appListener.applicationStarted) {
+      appSecManager.setUIAcls(HISTORY_UI_ACLS_ENABLED)
+      appSecManager.setViewAcls(appListener.sparkUser, appListener.viewAcls)
       attachSparkUI(ui)
       val appName = appListener.appName
       val sparkUser = appListener.sparkUser
@@ -201,6 +206,7 @@ class HistoryServer(
   private def attachSparkUI(ui: SparkUI) {
     assert(serverInfo.isDefined, "HistoryServer must be bound before attaching SparkUIs")
     ui.getHandlers.foreach(attachHandler)
+    addFilters(ui.getHandlers, conf)
   }
 
   /** Detach a reconstructed UI from this server. Only valid after bind(). */
@@ -254,9 +260,13 @@ object HistoryServer {
   // The port to which the web UI is bound
   val WEB_UI_PORT = conf.getInt("spark.history.ui.port", 18080)
 
+  // set whether to enable or disable view acls for all applications
+  val HISTORY_UI_ACLS_ENABLED = conf.getBoolean("spark.history.ui.acls.enable", false)
+
   val STATIC_RESOURCE_DIR = SparkUI.STATIC_RESOURCE_DIR
 
   def main(argStrings: Array[String]) {
+    initSecurity()
     val args = new HistoryServerArguments(argStrings)
     val securityManager = new SecurityManager(conf)
     val server = new HistoryServer(args.logDir, securityManager, conf)
@@ -266,6 +276,20 @@ object HistoryServer {
     while(true) { Thread.sleep(Int.MaxValue) }
     server.stop()
   }
+
+  def initSecurity() {
+    // If we are accessing HDFS and it has security enabled (Kerberos), we have to login
+    // from a keytab file so that we can access HDFS beyond the kerberos ticket expiration.
+    // As long as it is using Hadoop rpc (hdfs://), a relogin will automatically
+    // occur from the keytab.
+    if (conf.getBoolean("spark.history.kerberos.enabled", false)) {
+      // if you have enabled kerberos the following 2 params must be set
+      val principalName = conf.get("spark.history.kerberos.principal")
+      val keytabFilename = conf.get("spark.history.kerberos.keytab")
+      SparkHadoopUtil.get.loginUserFromKeytab(principalName, keytabFilename)
+    }
+  }
+
 }
 
 

diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -237,8 +237,7 @@ private[spark] class Master(
             if (waitingDrivers.contains(d)) {
               waitingDrivers -= d
               self ! DriverStateChanged(driverId, DriverState.KILLED, None)
-            }
-            else {
+            } else {
               // We just notify the worker to kill the driver here. The final bookkeeping occurs
               // on the return path when the worker submits a state change back to the master
               // to notify it that the driver was successfully killed.

diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
@@ -91,9 +91,11 @@ private[spark] class DriverRunner(
         }
 
         val state =
-          if (killed) { DriverState.KILLED }
-          else if (finalException.isDefined) { DriverState.ERROR }
-          else {
+          if (killed) {
+            DriverState.KILLED
+          } else if (finalException.isDefined) {
+            DriverState.ERROR
+          } else {
             finalExitCode match {
               case Some(0) => DriverState.FINISHED
               case _ => DriverState.FAILED

diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
@@ -58,30 +58,29 @@ private[spark] class ExecutorRunner(
       override def run() { fetchAndRunExecutor() }
     }
     workerThread.start()
-
     // Shutdown hook that kills actors on shutdown.
     shutdownHook = new Thread() {
       override def run() {
-        if (process != null) {
-          logInfo("Shutdown hook killing child process.")
-          process.destroy()
-          process.waitFor()
-        }
+        killProcess()
       }
     }
     Runtime.getRuntime.addShutdownHook(shutdownHook)
   }
 
+  private def killProcess() {
+    if (process != null) {
+      logInfo("Killing process!")
+      process.destroy()
+      process.waitFor()
+    }
+  }
+
   /** Stop this executor runner, including killing the process it launched */
   def kill() {
     if (workerThread != null) {
+      // the workerThread will kill the child process when interrupted
       workerThread.interrupt()
       workerThread = null
-      if (process != null) {
-        logInfo("Killing process!")
-        process.destroy()
-        process.waitFor()
-      }
       state = ExecutorState.KILLED
       worker ! ExecutorStateChanged(appId, execId, state, None, None)
       Runtime.getRuntime.removeShutdownHook(shutdownHook)
@@ -128,7 +127,6 @@ private[spark] class ExecutorRunner(
       // parent process for the executor command
       env.put("SPARK_LAUNCH_WITH_SCALA", "0")
       process = builder.start()
-
       val header = "Spark Executor Command: %s\n%s\n\n".format(
         command.mkString("\"", "\" \"", "\""), "=" * 40)
 
@@ -148,14 +146,13 @@ private[spark] class ExecutorRunner(
       val message = "Command exited with code " + exitCode
       worker ! ExecutorStateChanged(appId, execId, state, Some(message), Some(exitCode))
     } catch {
-      case interrupted: InterruptedException =>
+      case interrupted: InterruptedException => {
         logInfo("Runner thread for executor " + fullId + " interrupted")
-
+        killProcess()
+      }
       case e: Exception => {
         logError("Error running executor", e)
-        if (process != null) {
-          process.destroy()
-        }
+        killProcess()
         state = ExecutorState.FAILED
         val message = e.getClass + ": " + e.getMessage
         worker ! ExecutorStateChanged(appId, execId, state, Some(message), None)

diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
@@ -89,8 +89,7 @@ private[spark] class LogPage(parent: WorkerWebUI) extends WebUIPage("logPage") {
             Previous {Utils.bytesToString(math.min(byteLength, startByte))}
           </button>
         </a>
-      }
-      else {
+      } else {
         <button type="button" class="btn btn-default" disabled="disabled">
           Previous 0 B
         </button>
@@ -104,8 +103,7 @@ private[spark] class LogPage(parent: WorkerWebUI) extends WebUIPage("logPage") {
             Next {Utils.bytesToString(math.min(byteLength, logLength - endByte))}
           </button>
         </a>
-      }
-      else {
+      } else {
         <button type="button" class="btn btn-default" disabled="disabled">
           Next 0 B
         </button>
@@ -137,9 +135,13 @@ private[spark] class LogPage(parent: WorkerWebUI) extends WebUIPage("logPage") {
     val logLength = file.length()
     val getOffset = offset.getOrElse(logLength - defaultBytes)
     val startByte =
-      if (getOffset < 0) 0L
-      else if (getOffset > logLength) logLength
-      else getOffset
+      if (getOffset < 0) {
+        0L
+      } else if (getOffset > logLength) {
+        logLength
+      } else {
+        getOffset
+      }
     val logPageLength = math.min(byteLength, maxBytes)
     val endByte = math.min(startByte + logPageLength, logLength)
     (startByte, endByte)