SPARK-4705: Doing cherry-pick of fix into master

jeanlyn · Apr 7, 2015 · 0eb7722 · 0eb7722
1 parent c83e039
commit 0eb7722
Show file tree

Hide file tree

Showing 7 changed files with 50 additions and 9 deletions.
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -386,6 +386,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
   taskScheduler.start()
 
   val applicationId: String = taskScheduler.applicationId()
+  val applicationAttemptId : String = taskScheduler.applicationAttemptId()
   conf.set("spark.app.id", applicationId)
 
   env.blockManager.initialize(applicationId)
@@ -402,7 +403,8 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
   private[spark] val eventLogger: Option[EventLoggingListener] = {
     if (isEventLogEnabled) {
       val logger =
-        new EventLoggingListener(applicationId, eventLogDir.get, conf, hadoopConfiguration)
+        new EventLoggingListener(applicationId, applicationAttemptId,
+                                 eventLogDir.get, conf, hadoopConfiguration)
       logger.start()
       listenerBus.addListener(logger)
       Some(logger)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
@@ -47,6 +47,7 @@ import org.apache.spark.util.{JsonProtocol, Utils}
  */
 private[spark] class EventLoggingListener(
     appId: String,
+    appAttemptId : String,
     logBaseDir: URI,
     sparkConf: SparkConf,
     hadoopConf: Configuration)
@@ -55,7 +56,7 @@ private[spark] class EventLoggingListener(
   import EventLoggingListener._
 
   def this(appId: String, logBaseDir: URI, sparkConf: SparkConf) =
-    this(appId, logBaseDir, sparkConf, SparkHadoopUtil.get.newConfiguration(sparkConf))
+    this(appId, "", logBaseDir, sparkConf, SparkHadoopUtil.get.newConfiguration(sparkConf))
 
   private val shouldCompress = sparkConf.getBoolean("spark.eventLog.compress", false)
   private val shouldOverwrite = sparkConf.getBoolean("spark.eventLog.overwrite", false)
@@ -89,7 +90,7 @@ private[spark] class EventLoggingListener(
   private[scheduler] val loggedEvents = new ArrayBuffer[JValue]
 
   // Visible for tests only.
-  private[scheduler] val logPath = getLogPath(logBaseDir, appId, compressionCodecName)
+  private[scheduler] val logPath = getLogPath(logBaseDir, appId, compressionCodecName, appAttemptId)
 
   /**
    * Creates the log file in the configured log directory.
@@ -254,18 +255,30 @@ private[spark] object EventLoggingListener extends Logging {
    *
    * @param logBaseDir Directory where the log file will be written.
    * @param appId A unique app ID.
+   * @param appAttemptId A unique attempt id of appId.
    * @param compressionCodecName Name to identify the codec used to compress the contents
    *                             of the log, or None if compression is not enabled.
    * @return A path which consists of file-system-safe characters.
    */
   def getLogPath(
-      logBaseDir: URI,
+      logBaseDir: String,
       appId: String,
+      appAttemptId: String,
       compressionCodecName: Option[String] = None): String = {
-    val sanitizedAppId = appId.replaceAll("[ :/]", "-").replaceAll("[.${}'\"]", "_").toLowerCase
-    // e.g. app_123, app_123.lzf
-    val logName = sanitizedAppId + compressionCodecName.map { "." + _ }.getOrElse("")
-    logBaseDir.toString.stripSuffix("/") + "/" + logName
+    val name = appId.replaceAll("[ :/]", "-").replaceAll("[${}'\"]", "_").toLowerCase
+
+   if (appAttemptId.equals("")) {
+      Utils.resolveURI(logBaseDir) + "/" + name.stripSuffix("/")
+   } else {
+      Utils.resolveURI(logBaseDir) + "/" + appAttemptId + "/" + name.stripSuffix("/")
+   }
+  }
+
+  def getLogPath(
+      logBaseDir: String,
+      appId: String,
+      compressionCodecName: Option[String] = None): String = {
+    getLogPath(logBaseDir, appId, "", compressionCodecName)
   }
 
   /**

diff --git a/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
@@ -41,4 +41,11 @@ private[spark] trait SchedulerBackend {
    */
   def applicationId(): String = appId
 
+  /**
+   * Get an application ID associated with the job.
+   *
+   * @return An application attempt id
+   */
+  def applicationAttemptId(): String = ""
+
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
@@ -78,4 +78,12 @@ private[spark] trait TaskScheduler {
    * Process a lost executor
    */
   def executorLost(executorId: String, reason: ExecutorLossReason): Unit
+
+  /**
+   * Get an application's attempt Id  associated with the job.
+   *
+   * @return An application's Attempt ID
+   */
+  def applicationAttemptId(): String = ""
+
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -514,6 +514,8 @@ private[spark] class TaskSchedulerImpl(
   }
 
   override def applicationId(): String = backend.applicationId()
+
+  override def applicationAttemptId() : String = backend.applicationAttemptId()
 
 }
 

diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -90,6 +90,10 @@ private[spark] class ApplicationMaster(
 
         // Propagate the application ID so that YarnClusterSchedulerBackend can pick it up.
         System.setProperty("spark.yarn.app.id", appAttemptId.getApplicationId().toString())
+
+       //Propagate the attempt if, so that in case of event logging, different attempt's logs gets created in different directory
+       System.setProperty("spark.yarn.app.attemptid", appAttemptId.getAttemptId().toString())
+
       }
 
       logInfo("ApplicationAttemptId: " + appAttemptId)

diff --git a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
@@ -46,5 +46,10 @@ private[spark] class YarnClusterSchedulerBackend(
       logError("Application ID is not set.")
       super.applicationId
     }
-
+
+  override def applicationAttemptId(): String =
+    sc.getConf.getOption("spark.yarn.app.attemptid").getOrElse {
+      logError("Application attempt ID is not set.")
+      super.applicationAttemptId
+    }
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -514,6 +514,8 @@ private[spark] class TaskSchedulerImpl( @@
       }
       override def applicationId(): String = backend.applicationId()
+      override def applicationAttemptId() : String = backend.applicationAttemptId()
     }
@@ Expand Down @@