Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-20355] Add per application spark version on the history server headerpage #17658

Closed
wants to merge 14 commits into from
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ private[spark] case class ApplicationAttemptInfo(
endTime: Long,
lastUpdated: Long,
sparkUser: String,
completed: Boolean = false)
completed: Boolean = false,
appSparkVersion: String)

private[spark] case class ApplicationHistoryInfo(
id: String,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
val conf = this.conf.clone()
val appSecManager = new SecurityManager(conf)
SparkUI.createHistoryUI(conf, replayBus, appSecManager, appInfo.name,
HistoryServer.getAttemptURI(appId, attempt.attemptId), attempt.startTime)
HistoryServer.getAttemptURI(appId, attempt.attemptId),
attempt.startTime)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think adding this line wrap is necessary

// Do not call ui.bind() to avoid creating a new server for each application
}

Expand All @@ -257,6 +258,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
val appListener = replay(fileStatus, isApplicationCompleted(fileStatus), replayBus)
Copy link
Contributor

@jerryshao jerryshao Apr 19, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it correct to remove this two lines? As I know it is necessary because in the previous replay call we only parse SparkListenerApplicationStart and SparkListenerApplicationEnd event.

Sorry about it, just saw you moved this two lines above.


if (appListener.appId.isDefined) {
ui.appSparkVersion = appListener.appSparkVersion.getOrElse("")
ui.getSecurityManager.setAcls(HISTORY_UI_ACLS_ENABLE)
// make sure to set admin acls before view acls so they are properly picked up
val adminAcls = HISTORY_UI_ADMIN_ACLS + "," + appListener.adminAcls.getOrElse("")
Expand Down Expand Up @@ -469,7 +471,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
lastUpdated,
appListener.sparkUser.getOrElse(NOT_STARTED),
appCompleted,
fileStatus.getLen()
fileStatus.getLen(),
appListener.appSparkVersion.getOrElse("")
)
fileToAppInfo(logPath) = attemptInfo
logDebug(s"Application log ${attemptInfo.logPath} loaded successfully: $attemptInfo")
Expand Down Expand Up @@ -762,9 +765,10 @@ private class FsApplicationAttemptInfo(
lastUpdated: Long,
sparkUser: String,
completed: Boolean,
val fileSize: Long)
val fileSize: Long,
appSparkVersion: String)
extends ApplicationAttemptInfo(
attemptId, startTime, endTime, lastUpdated, sparkUser, completed) {
attemptId, startTime, endTime, lastUpdated, sparkUser, completed, appSparkVersion) {

/** extend the superclass string value with the extra attributes of this class */
override def toString: String = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ private[spark] class ApplicationEventListener extends SparkListener {
var adminAcls: Option[String] = None
var viewAclsGroups: Option[String] = None
var adminAclsGroups: Option[String] = None
var appSparkVersion: Option[String] = None

override def onApplicationStart(applicationStart: SparkListenerApplicationStart) {
appName = Some(applicationStart.appName)
Expand All @@ -57,4 +58,10 @@ private[spark] class ApplicationEventListener extends SparkListener {
adminAclsGroups = allProperties.get("spark.admin.acls.groups")
}
}

override def onOtherEvent(event: SparkListenerEvent): Unit = event match {
case SparkListenerLogStart(sparkVersion) =>
appSparkVersion = Some(sparkVersion)
case _ =>
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You need a "catch all" here:

scala> class MyListener extends org.apache.spark.scheduler.SparkListener {
     |   override def onOtherEvent(event: org.apache.spark.scheduler.SparkListenerEvent): Unit = event match {
     |     case _: org.apache.spark.scheduler.SparkListenerTaskStart => ()
     |   }
     | }
defined class MyListener
scala> sc.addSparkListener(new MyListener())
scala> spark.range(1000).write.saveAsTable("test")
17/04/19 10:39:01 ERROR LiveListenerBus: Listener MyListener threw an exception
scala.MatchError: SparkListenerSQLExecutionStart(0,saveAsTable at <console>:27,org.apache.spark.sql.DataFrameWriter.saveAsTable(DataFrameWriter.scala:354)

}
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ private[spark] class EventLoggingListener(
val cstream = compressionCodec.map(_.compressedOutputStream(dstream)).getOrElse(dstream)
val bstream = new BufferedOutputStream(cstream, outputBufferSize)

EventLoggingListener.initEventLog(bstream)
EventLoggingListener.initEventLog(bstream, testing, loggedEvents)
fileSystem.setPermission(path, LOG_FILE_PERMISSIONS)
writer = Some(new PrintWriter(bstream))
logInfo("Logging events to %s".format(logPath))
Expand Down Expand Up @@ -283,10 +283,17 @@ private[spark] object EventLoggingListener extends Logging {
*
* @param logStream Raw output stream to the event log file.
*/
def initEventLog(logStream: OutputStream): Unit = {
def initEventLog(
logStream: OutputStream,
testing: Boolean,
loggedEvents: ArrayBuffer[JValue]): Unit = {
val metadata = SparkListenerLogStart(SPARK_VERSION)
val metadataJson = compact(JsonProtocol.logStartToJson(metadata)) + "\n"
val eventJson = JsonProtocol.logStartToJson(metadata)
val metadataJson = compact(eventJson) + "\n"
logStream.write(metadataJson.getBytes(StandardCharsets.UTF_8))
if (testing && loggedEvents != null) {
loggedEvents += eventJson
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of this, how about returning the SparkListenerLogStart event?

Copy link
Author

@redsanket redsanket May 5, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought the loggedEvents only takes json value. Also the loggedEvents are generated here as a part of spark context and probably through other sources. The ReplayListenerSuite however tests the original events with the replay events (here the replay events are written to the event log but however the loggedEvents will not have the SparkListenerLogStart event, as this is not a part of SparkContext if I understand it correctly).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The downside to returning the event is that this function can't do more in the future. For instance if it wants to add 2 events. I guess its private so it doesn't matter to much and its just for testing stuff.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My main concern is that it makes calling this method from other places awkward. (i.e. the semantics of the new arguments are not clear.)

In any case, there's a single other call to this method, in a test suite, so probably ok.

}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,9 @@ case class SparkListenerApplicationEnd(time: Long) extends SparkListenerEvent

/**
* An internal class that describes the metadata of an event log.
* This event is not meant to be posted to listeners downstream.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason this note was here? What was the reasoning behind it?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was only for metadata info, so when this was written it was just not meant to be consumed but now we can reuse it for this case

*/
private[spark] case class SparkListenerLogStart(sparkVersion: String) extends SparkListenerEvent
@DeveloperApi
case class SparkListenerLogStart(sparkVersion: String) extends SparkListenerEvent

/**
* Interface for creating history listeners defined in other modules like SQL, which are used to
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ private[spark] trait SparkListenerBus
listener.onNodeUnblacklisted(nodeUnblacklisted)
case blockUpdated: SparkListenerBlockUpdated =>
listener.onBlockUpdated(blockUpdated)
case logStart: SparkListenerLogStart => // ignore event log metadata
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't looked at this closely, but will allowing this through cause any issues? Why was it ignored before?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not see it getting consumed apart from registering some metadata, so I guess it should be fine as this event already logs the version

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this change the behavior of SparkListenerBus? Previously we ignored this event, but here with this change we will post this event through listener.onOtherEvent and user will get this event. But from the code this event is not accessible outside of Spark.

private[spark] case class SparkListenerLogStart(sparkVersion: String) extends SparkListenerEvent

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that if this is gonna be posted on the bus it might be better to make it public and @DeveloperApi like other events.

Adding new events is not an issue, but posting an event that people can't handle is a little odd.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not too sure if it will change any behavior and precisely why we post it to other events, in case someone wants to listen to them and utilize the event like in this scenario

case _ => listener.onOtherEvent(event)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ private[spark] object ApplicationsListResource {
},
lastUpdated = new Date(internalAttemptInfo.lastUpdated),
sparkUser = internalAttemptInfo.sparkUser,
completed = internalAttemptInfo.completed
completed = internalAttemptInfo.completed,
appSparkVersion = internalAttemptInfo.appSparkVersion
)
}
)
Expand Down
3 changes: 2 additions & 1 deletion core/src/main/scala/org/apache/spark/status/api/v1/api.scala
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ class ApplicationAttemptInfo private[spark](
val lastUpdated: Date,
val duration: Long,
val sparkUser: String,
val completed: Boolean = false) {
val completed: Boolean = false,
val appSparkVersion: String) {
def getStartTimeEpoch: Long = startTime.getTime
def getEndTimeEpoch: Long = endTime.getTime
def getLastUpdatedEpoch: Long = lastUpdated.getTime
Expand Down
6 changes: 5 additions & 1 deletion core/src/main/scala/org/apache/spark/ui/SparkUI.scala
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ private[spark] class SparkUI private (

var appId: String = _

var appSparkVersion = org.apache.spark.SPARK_VERSION

private var streamingJobProgressListener: Option[SparkListener] = None

/** Initialize all components of the server. */
Expand Down Expand Up @@ -118,7 +120,8 @@ private[spark] class SparkUI private (
duration = 0,
lastUpdated = new Date(startTime),
sparkUser = getSparkUser,
completed = false
completed = false,
appSparkVersion = ""
))
))
}
Expand All @@ -139,6 +142,7 @@ private[spark] abstract class SparkUITab(parent: SparkUI, prefix: String)

def appName: String = parent.appName

def appSparkVersion: String = parent.appSparkVersion
}

private[spark] object SparkUI {
Expand Down
2 changes: 1 addition & 1 deletion core/src/main/scala/org/apache/spark/ui/UIUtils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ private[spark] object UIUtils extends Logging {
<div class="brand">
<a href={prependBaseUri("/")} class="brand">
<img src={prependBaseUri("/static/spark-logo-77x50px-hd.png")} />
<span class="version">{org.apache.spark.SPARK_VERSION}</span>
<span class="version">{activeTab.appSparkVersion}</span>
</a>
</div>
<p class="navbar-text pull-right">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"duration" : 10671,
"sparkUser" : "jose",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1479335620587,
"startTimeEpoch" : 1479335609916,
"lastUpdatedEpoch" : 0
Expand All @@ -22,6 +23,7 @@
"duration" : 101795,
"sparkUser" : "jose",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1479252138874,
"startTimeEpoch" : 1479252037079,
"lastUpdatedEpoch" : 0
Expand All @@ -36,6 +38,7 @@
"duration" : 10505,
"sparkUser" : "irashid",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1430917391398,
"startTimeEpoch" : 1430917380893,
"lastUpdatedEpoch" : 0
Expand All @@ -51,6 +54,7 @@
"duration" : 57,
"sparkUser" : "irashid",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1430917380950,
"startTimeEpoch" : 1430917380893,
"lastUpdatedEpoch" : 0
Expand All @@ -62,6 +66,7 @@
"duration" : 10,
"sparkUser" : "irashid",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1430917380890,
"startTimeEpoch" : 1430917380880,
"lastUpdatedEpoch" : 0
Expand All @@ -77,6 +82,7 @@
"duration" : 34935,
"sparkUser" : "irashid",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1426633945177,
"startTimeEpoch" : 1426633910242,
"lastUpdatedEpoch" : 0
Expand All @@ -88,6 +94,7 @@
"duration" : 34935,
"sparkUser" : "irashid",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1426533945177,
"startTimeEpoch" : 1426533910242,
"lastUpdatedEpoch" : 0
Expand All @@ -102,6 +109,7 @@
"duration" : 8635,
"sparkUser" : "irashid",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1425081766912,
"startTimeEpoch" : 1425081758277,
"lastUpdatedEpoch" : 0
Expand All @@ -116,6 +124,7 @@
"duration" : 9011,
"sparkUser" : "irashid",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1422981788731,
"startTimeEpoch" : 1422981779720,
"lastUpdatedEpoch" : 0
Expand All @@ -130,6 +139,7 @@
"duration" : 8635,
"sparkUser" : "irashid",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1422981766912,
"startTimeEpoch" : 1422981758277,
"lastUpdatedEpoch" : 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"duration" : 10671,
"sparkUser" : "jose",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1479335620587,
"startTimeEpoch" : 1479335609916,
"lastUpdatedEpoch" : 0
Expand All @@ -22,6 +23,7 @@
"duration" : 101795,
"sparkUser" : "jose",
"completed" : true,
"appSparkVersion" : "",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't look correct. If I look at the log file for this app (core/src/test/resources/spark-events/app-20161115172038-0000), there's a log start event:

{"Event":"SparkListenerLogStart","Spark Version":"2.1.0-SNAPSHOT"}

So this value should be "2.1.0-SNAPSHOT", no?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure the if the tests hit this code path https://github.com/apache/spark/pull/17658/files#diff-a7befb99e7bd7e3ab5c46c2568aa5b3eR474, so they take the default value

Copy link
Author

@redsanket redsanket May 5, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably I could change the default value, ok will do it

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not really about the default value; these tests replay the log files, which contain the Spark version, so I would expect the data retrieved through the API to contain the version that was recorded in the event log.

Another way of saying that probably there's a bug somewhere in your code that is preventing the data from the event log from being exposed correctly through the REST API.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see events being passed to ApplicationEventListener in debug logs but interestingly I doPostEvent seems to be not posting events to the listener to listen to the event which is a bit odd, not sure I need to add a change in contract for the tests to pick this up though

17/05/05 16:39:42.235 qtp1618269752-1127 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.jobs.JobProgressListener@53e58df ---SparkListenerLogStart(2.3.0-SNAPSHOT)
17/05/05 16:39:42.235 qtp1618269752-1127 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.env.EnvironmentListener@53371e87 ---SparkListenerLogStart(2.3.0-SNAPSHOT)
17/05/05 16:39:42.235 qtp1618269752-1127 INFO ReplayListenerBus: listener --- event org.apache.spark.storage.StorageStatusListener@394665e7 ---SparkListenerLogStart(2.3.0-SNAPSHOT)
17/05/05 16:39:42.235 qtp1618269752-1127 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.exec.ExecutorsListener@2d92f38e ---SparkListenerLogStart(2.3.0-SNAPSHOT)
17/05/05 16:39:42.235 qtp1618269752-1127 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.storage.StorageListener@5a9dbe43 ---SparkListenerLogStart(2.3.0-SNAPSHOT)
17/05/05 16:39:42.235 qtp1618269752-1127 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.scope.RDDOperationGraphListener@31b550b1 ---SparkListenerLogStart(2.3.0-SNAPSHOT)
17/05/05 16:39:42.236 qtp1618269752-1127 INFO ReplayListenerBus: listener --- event org.apache.spark.scheduler.ApplicationEventListener@3b56723d ---SparkListenerLogStart(2.3.0-SNAPSHOT)
17/05/05 16:39:43.300 qtp1618269752-1126 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.jobs.JobProgressListener@3647a865 ---SparkListenerLogStart(2.3.0-SNAPSHOT)
17/05/05 16:39:43.300 qtp1618269752-1126 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.env.EnvironmentListener@4357e8cc ---SparkListenerLogStart(2.3.0-SNAPSHOT)
17/05/05 16:39:43.300 qtp1618269752-1126 INFO ReplayListenerBus: listener --- event org.apache.spark.storage.StorageStatusListener@5062f4c0 ---SparkListenerLogStart(2.3.0-SNAPSHOT)
17/05/05 16:39:43.300 qtp1618269752-1126 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.exec.ExecutorsListener@273c097f ---SparkListenerLogStart(2.3.0-SNAPSHOT)
17/05/05 16:39:43.300 qtp1618269752-1126 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.storage.StorageListener@2c33997d ---SparkListenerLogStart(2.3.0-SNAPSHOT)
17/05/05 16:39:43.300 qtp1618269752-1126 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.scope.RDDOperationGraphListener@505e15bf ---SparkListenerLogStart(2.3.0-SNAPSHOT)
17/05/05 16:39:43.300 qtp1618269752-1126 INFO ReplayListenerBus: listener --- event org.apache.spark.scheduler.ApplicationEventListener@175d3f80 ---SparkListenerLogStart(2.3.0-SNAPSHOT)
17/05/05 16:39:44.305 qtp1618269752-1131 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.jobs.JobProgressListener@71dacceb ---SparkListenerLogStart(2.3.0-SNAPSHOT)
17/05/05 16:39:44.305 qtp1618269752-1131 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.env.EnvironmentListener@6f41235c ---SparkListenerLogStart(2.3.0-SNAPSHOT)
17/05/05 16:39:44.305 qtp1618269752-1131 INFO ReplayListenerBus: listener --- event org.apache.spark.storage.StorageStatusListener@3cdcb6d0 ---SparkListenerLogStart(2.3.0-SNAPSHOT)
17/05/05 16:39:44.305 qtp1618269752-1131 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.exec.ExecutorsListener@4abcd5c6 ---SparkListenerLogStart(2.3.0-SNAPSHOT)
17/05/05 16:39:44.305 qtp1618269752-1131 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.storage.StorageListener@6d26f5ee ---SparkListenerLogStart(2.3.0-SNAPSHOT)
17/05/05 16:39:44.305 qtp1618269752-1131 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.scope.RDDOperationGraphListener@b1b60d5 ---SparkListenerLogStart(2.3.0-SNAPSHOT)
17/05/05 16:39:44.305 qtp1618269752-1131 INFO ReplayListenerBus: listener --- event org.apache.spark.scheduler.ApplicationEventListener@52283d50 ---SparkListenerLogStart(2.3.0-SNAPSHOT)
17/05/05 16:39:44.676 qtp1478318899-1195 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.jobs.JobProgressListener@f1563e7 ---SparkListenerLogStart(1.4.0-SNAPSHOT)
17/05/05 16:39:44.676 qtp1478318899-1195 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.env.EnvironmentListener@9122315 ---SparkListenerLogStart(1.4.0-SNAPSHOT)
17/05/05 16:39:44.676 qtp1478318899-1195 INFO ReplayListenerBus: listener --- event org.apache.spark.storage.StorageStatusListener@eb98c21 ---SparkListenerLogStart(1.4.0-SNAPSHOT)
17/05/05 16:39:44.676 qtp1478318899-1195 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.exec.ExecutorsListener@1da79f75 ---SparkListenerLogStart(1.4.0-SNAPSHOT)
17/05/05 16:39:44.676 qtp1478318899-1195 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.storage.StorageListener@3989cd7d ---SparkListenerLogStart(1.4.0-SNAPSHOT)
17/05/05 16:39:44.676 qtp1478318899-1195 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.scope.RDDOperationGraphListener@219b4bd2 ---SparkListenerLogStart(1.4.0-SNAPSHOT)
17/05/05 16:39:44.676 qtp1478318899-1195 INFO ReplayListenerBus: listener --- event org.apache.spark.scheduler.ApplicationEventListener@63811344 ---SparkListenerLogStart(1.4.0-SNAPSHOT)
17/05/05 16:39:44.722 qtp1478318899-1195 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.jobs.JobProgressListener@52177d75 ---SparkListenerLogStart(1.4.0-SNAPSHOT)
17/05/05 16:39:44.722 qtp1478318899-1195 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.env.EnvironmentListener@72409b15 ---SparkListenerLogStart(1.4.0-SNAPSHOT)
17/05/05 16:39:44.722 qtp1478318899-1195 INFO ReplayListenerBus: listener --- event org.apache.spark.storage.StorageStatusListener@5ee11505 ---SparkListenerLogStart(1.4.0-SNAPSHOT)
17/05/05 16:39:44.722 qtp1478318899-1195 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.exec.ExecutorsListener@24bad915 ---SparkListenerLogStart(1.4.0-SNAPSHOT)
17/05/05 16:39:44.722 qtp1478318899-1195 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.storage.StorageListener@4d2fe422 ---SparkListenerLogStart(1.4.0-SNAPSHOT)
17/05/05 16:39:44.723 qtp1478318899-1195 INFO ReplayListenerBus: listener --- event org.apache.spark.ui.scope.RDDOperationGraphListener@3bc156a1 ---SparkListenerLogStart(1.4.0-SNAPSHOT)
17/05/05 16:39:44.723 qtp1478318899-1195 INFO ReplayListenerBus: listener --- event org.apache.spark.scheduler.ApplicationEventListener@6d5e35e1 ---SparkListenerLogStart(1.4.0-SNAPSHOT)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I know what this is. FsHistoryProvider has this code to speed up the log parsing for listings:

  protected def mergeApplicationListing(fileStatus: FileStatus): Unit = {
    val newAttempts = try {
      val eventsFilter: ReplayEventsFilter = { eventString =>
        eventString.startsWith(APPL_START_EVENT_PREFIX) ||
          eventString.startsWith(APPL_END_EVENT_PREFIX)
      }

It only allows those two events through currently, you probably need to the log start event to that list too.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok doPost is posting the events
7/05/05 17:17:17.265 qtp927704210-1183 INFO ReplayListenerBus: eventInDoPost SparkListenerLogStart(1.4.0-SNAPSHOT) So it should be good, so looks like ApplicationEventListener is not able to read the events, it used to before something has changed will dig deeper

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

aah ok the UI will work because it is replaying but the rest end point would break as it is not allowing it to pass through, thanks @vanzin

"endTimeEpoch" : 1479252138874,
"startTimeEpoch" : 1479252037079,
"lastUpdatedEpoch" : 0
Expand All @@ -36,6 +38,7 @@
"duration" : 10505,
"sparkUser" : "irashid",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1430917391398,
"startTimeEpoch" : 1430917380893,
"lastUpdatedEpoch" : 0
Expand All @@ -51,6 +54,7 @@
"duration" : 57,
"sparkUser" : "irashid",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1430917380950,
"startTimeEpoch" : 1430917380893,
"lastUpdatedEpoch" : 0
Expand All @@ -62,6 +66,7 @@
"duration" : 10,
"sparkUser" : "irashid",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1430917380890,
"startTimeEpoch" : 1430917380880,
"lastUpdatedEpoch" : 0
Expand All @@ -77,6 +82,7 @@
"duration" : 34935,
"sparkUser" : "irashid",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1426633945177,
"startTimeEpoch" : 1426633910242,
"lastUpdatedEpoch" : 0
Expand All @@ -88,6 +94,7 @@
"duration" : 34935,
"sparkUser" : "irashid",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1426533945177,
"startTimeEpoch" : 1426533910242,
"lastUpdatedEpoch" : 0
Expand All @@ -102,6 +109,8 @@
"duration" : 8635,
"sparkUser" : "irashid",
"completed" : true,
"appSparkVersion" : "",
"appSparkVersion" : "",
"endTimeEpoch" : 1425081766912,
"startTimeEpoch" : 1425081758277,
"lastUpdatedEpoch" : 0
Expand All @@ -116,6 +125,7 @@
"duration" : 9011,
"sparkUser" : "irashid",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1422981788731,
"startTimeEpoch" : 1422981779720,
"lastUpdatedEpoch" : 0
Expand All @@ -130,6 +140,7 @@
"duration" : 8635,
"sparkUser" : "irashid",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1422981766912,
"startTimeEpoch" : 1422981758277,
"lastUpdatedEpoch" : 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"duration" : 10671,
"sparkUser" : "jose",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1479335620587,
"startTimeEpoch" : 1479335609916,
"lastUpdatedEpoch" : 0
Expand All @@ -22,6 +23,7 @@
"duration" : 101795,
"sparkUser" : "jose",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1479252138874,
"startTimeEpoch" : 1479252037079,
"lastUpdatedEpoch" : 0
Expand All @@ -36,6 +38,7 @@
"duration" : 10505,
"sparkUser" : "irashid",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1430917391398,
"startTimeEpoch" : 1430917380893,
"lastUpdatedEpoch" : 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"duration" : 8635,
"sparkUser" : "irashid",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1422981766912,
"startTimeEpoch" : 1422981758277,
"lastUpdatedEpoch" : 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"duration" : 9011,
"sparkUser" : "irashid",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1422981788731,
"startTimeEpoch" : 1422981779720,
"lastUpdatedEpoch" : 0
Expand All @@ -22,6 +23,7 @@
"duration" : 8635,
"sparkUser" : "irashid",
"completed" : true,
"appSparkVersion" : "",
"endTimeEpoch" : 1422981766912,
"startTimeEpoch" : 1422981758277,
"lastUpdatedEpoch" : 0
Expand Down
Loading