Skip to content

Commit

Permalink
[SPARK-4857] [CORE] Adds Executor membership events to SparkListener
Browse files Browse the repository at this point in the history
Adds onExecutorAdded and onExecutorRemoved events to the SparkListener. This will allow a client to get notified when an executor has been added/removed and provide additional information such as how many vcores it is consuming.

In addition, this commit adds a SparkListenerAdapter to the Java API that provides default implementations to the SparkListener. This is to get around the fact that default implementations for traits don't work in Java. Having Java clients extend SparkListenerAdapter moving forward will prevent breakage in java when we add new events to SparkListener.

Author: Kostas Sakellis <kostas@cloudera.com>

Closes apache#3711 from ksakellis/kostas-spark-4857 and squashes the following commits:

946d2c5 [Kostas Sakellis] Added executorAdded/Removed events to MesosSchedulerBackend
b1d054a [Kostas Sakellis] Remove executorInfo from ExecutorRemoved event
1727b38 [Kostas Sakellis] Renamed ExecutorDetails back to ExecutorInfo and other CR feedback
14fe78d [Kostas Sakellis] Added executor added/removed events to json protocol
93d087b [Kostas Sakellis] [SPARK-4857] [CORE] Adds Executor membership events to SparkListener
  • Loading branch information
Kostas Sakellis authored and pwendell committed Jan 16, 2015
1 parent 65858ba commit 96c2c71
Show file tree
Hide file tree
Showing 18 changed files with 375 additions and 33 deletions.
97 changes: 97 additions & 0 deletions core/src/main/java/org/apache/spark/JavaSparkListener.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark;

import org.apache.spark.scheduler.SparkListener;
import org.apache.spark.scheduler.SparkListenerApplicationEnd;
import org.apache.spark.scheduler.SparkListenerApplicationStart;
import org.apache.spark.scheduler.SparkListenerBlockManagerAdded;
import org.apache.spark.scheduler.SparkListenerBlockManagerRemoved;
import org.apache.spark.scheduler.SparkListenerEnvironmentUpdate;
import org.apache.spark.scheduler.SparkListenerExecutorAdded;
import org.apache.spark.scheduler.SparkListenerExecutorMetricsUpdate;
import org.apache.spark.scheduler.SparkListenerExecutorRemoved;
import org.apache.spark.scheduler.SparkListenerJobEnd;
import org.apache.spark.scheduler.SparkListenerJobStart;
import org.apache.spark.scheduler.SparkListenerStageCompleted;
import org.apache.spark.scheduler.SparkListenerStageSubmitted;
import org.apache.spark.scheduler.SparkListenerTaskEnd;
import org.apache.spark.scheduler.SparkListenerTaskGettingResult;
import org.apache.spark.scheduler.SparkListenerTaskStart;
import org.apache.spark.scheduler.SparkListenerUnpersistRDD;

/**
* Java clients should extend this class instead of implementing
* SparkListener directly. This is to prevent java clients
* from breaking when new events are added to the SparkListener
* trait.
*
* This is a concrete class instead of abstract to enforce
* new events get added to both the SparkListener and this adapter
* in lockstep.
*/
public class JavaSparkListener implements SparkListener {

@Override
public void onStageCompleted(SparkListenerStageCompleted stageCompleted) { }

@Override
public void onStageSubmitted(SparkListenerStageSubmitted stageSubmitted) { }

@Override
public void onTaskStart(SparkListenerTaskStart taskStart) { }

@Override
public void onTaskGettingResult(SparkListenerTaskGettingResult taskGettingResult) { }

@Override
public void onTaskEnd(SparkListenerTaskEnd taskEnd) { }

@Override
public void onJobStart(SparkListenerJobStart jobStart) { }

@Override
public void onJobEnd(SparkListenerJobEnd jobEnd) { }

@Override
public void onEnvironmentUpdate(SparkListenerEnvironmentUpdate environmentUpdate) { }

@Override
public void onBlockManagerAdded(SparkListenerBlockManagerAdded blockManagerAdded) { }

@Override
public void onBlockManagerRemoved(SparkListenerBlockManagerRemoved blockManagerRemoved) { }

@Override
public void onUnpersistRDD(SparkListenerUnpersistRDD unpersistRDD) { }

@Override
public void onApplicationStart(SparkListenerApplicationStart applicationStart) { }

@Override
public void onApplicationEnd(SparkListenerApplicationEnd applicationEnd) { }

@Override
public void onExecutorMetricsUpdate(SparkListenerExecutorMetricsUpdate executorMetricsUpdate) { }

@Override
public void onExecutorAdded(SparkListenerExecutorAdded executorAdded) { }

@Override
public void onExecutorRemoved(SparkListenerExecutorRemoved executorRemoved) { }
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ private[spark] class ApplicationInfo(
extends Serializable {

@transient var state: ApplicationState.Value = _
@transient var executors: mutable.HashMap[Int, ExecutorInfo] = _
@transient var removedExecutors: ArrayBuffer[ExecutorInfo] = _
@transient var executors: mutable.HashMap[Int, ExecutorDesc] = _
@transient var removedExecutors: ArrayBuffer[ExecutorDesc] = _
@transient var coresGranted: Int = _
@transient var endTime: Long = _
@transient var appSource: ApplicationSource = _
Expand All @@ -55,12 +55,12 @@ private[spark] class ApplicationInfo(

private def init() {
state = ApplicationState.WAITING
executors = new mutable.HashMap[Int, ExecutorInfo]
executors = new mutable.HashMap[Int, ExecutorDesc]
coresGranted = 0
endTime = -1L
appSource = new ApplicationSource(this)
nextExecutorId = 0
removedExecutors = new ArrayBuffer[ExecutorInfo]
removedExecutors = new ArrayBuffer[ExecutorDesc]
}

private def newExecutorId(useID: Option[Int] = None): Int = {
Expand All @@ -75,14 +75,14 @@ private[spark] class ApplicationInfo(
}
}

def addExecutor(worker: WorkerInfo, cores: Int, useID: Option[Int] = None): ExecutorInfo = {
val exec = new ExecutorInfo(newExecutorId(useID), this, worker, cores, desc.memoryPerSlave)
def addExecutor(worker: WorkerInfo, cores: Int, useID: Option[Int] = None): ExecutorDesc = {
val exec = new ExecutorDesc(newExecutorId(useID), this, worker, cores, desc.memoryPerSlave)
executors(exec.id) = exec
coresGranted += cores
exec
}

def removeExecutor(exec: ExecutorInfo) {
def removeExecutor(exec: ExecutorDesc) {
if (executors.contains(exec.id)) {
removedExecutors += executors(exec.id)
executors -= exec.id
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.deploy.master

import org.apache.spark.deploy.{ExecutorDescription, ExecutorState}

private[spark] class ExecutorInfo(
private[spark] class ExecutorDesc(
val id: Int,
val application: ApplicationInfo,
val worker: WorkerInfo,
Expand All @@ -37,7 +37,7 @@ private[spark] class ExecutorInfo(

override def equals(other: Any): Boolean = {
other match {
case info: ExecutorInfo =>
case info: ExecutorDesc =>
fullId == info.fullId &&
worker.id == info.worker.id &&
cores == info.cores &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -581,7 +581,7 @@ private[spark] class Master(
}
}

def launchExecutor(worker: WorkerInfo, exec: ExecutorInfo) {
def launchExecutor(worker: WorkerInfo, exec: ExecutorDesc) {
logInfo("Launching executor " + exec.fullId + " on worker " + worker.id)
worker.addExecutor(exec)
worker.actor ! LaunchExecutor(masterUrl,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ private[spark] class WorkerInfo(
Utils.checkHost(host, "Expected hostname")
assert (port > 0)

@transient var executors: mutable.HashMap[String, ExecutorInfo] = _ // executorId => info
@transient var executors: mutable.HashMap[String, ExecutorDesc] = _ // executorId => info
@transient var drivers: mutable.HashMap[String, DriverInfo] = _ // driverId => info
@transient var state: WorkerState.Value = _
@transient var coresUsed: Int = _
Expand Down Expand Up @@ -70,13 +70,13 @@ private[spark] class WorkerInfo(
host + ":" + port
}

def addExecutor(exec: ExecutorInfo) {
def addExecutor(exec: ExecutorDesc) {
executors(exec.fullId) = exec
coresUsed += exec.cores
memoryUsed += exec.memory
}

def removeExecutor(exec: ExecutorInfo) {
def removeExecutor(exec: ExecutorDesc) {
if (executors.contains(exec.fullId)) {
executors -= exec.fullId
coresUsed -= exec.cores
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import org.json4s.JValue

import org.apache.spark.deploy.{ExecutorState, JsonProtocol}
import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
import org.apache.spark.deploy.master.ExecutorInfo
import org.apache.spark.deploy.master.ExecutorDesc
import org.apache.spark.ui.{UIUtils, WebUIPage}
import org.apache.spark.util.Utils

Expand Down Expand Up @@ -109,7 +109,7 @@ private[spark] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app
UIUtils.basicSparkPage(content, "Application: " + app.desc.name)
}

private def executorRow(executor: ExecutorInfo): Seq[Node] = {
private def executorRow(executor: ExecutorDesc): Seq[Node] = {
<tr>
<td>{executor.id}</td>
<td>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,10 @@ private[spark] class EventLoggingListener(
logEvent(event, flushLogger = true)
override def onApplicationEnd(event: SparkListenerApplicationEnd) =
logEvent(event, flushLogger = true)
override def onExecutorAdded(event: SparkListenerExecutorAdded) =
logEvent(event, flushLogger = true)
override def onExecutorRemoved(event: SparkListenerExecutorRemoved) =
logEvent(event, flushLogger = true)

// No-op because logging every update would be overkill
override def onExecutorMetricsUpdate(event: SparkListenerExecutorMetricsUpdate) { }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import scala.collection.mutable
import org.apache.spark.{Logging, TaskEndReason}
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.scheduler.cluster.ExecutorInfo
import org.apache.spark.storage.BlockManagerId
import org.apache.spark.util.{Distribution, Utils}

Expand Down Expand Up @@ -84,6 +85,14 @@ case class SparkListenerBlockManagerRemoved(time: Long, blockManagerId: BlockMan
@DeveloperApi
case class SparkListenerUnpersistRDD(rddId: Int) extends SparkListenerEvent

@DeveloperApi
case class SparkListenerExecutorAdded(executorId: String, executorInfo: ExecutorInfo)
extends SparkListenerEvent

@DeveloperApi
case class SparkListenerExecutorRemoved(executorId: String)
extends SparkListenerEvent

/**
* Periodic updates from executors.
* @param execId executor id
Expand All @@ -109,7 +118,8 @@ private[spark] case object SparkListenerShutdown extends SparkListenerEvent
/**
* :: DeveloperApi ::
* Interface for listening to events from the Spark scheduler. Note that this is an internal
* interface which might change in different Spark releases.
* interface which might change in different Spark releases. Java clients should extend
* {@link JavaSparkListener}
*/
@DeveloperApi
trait SparkListener {
Expand Down Expand Up @@ -183,6 +193,16 @@ trait SparkListener {
* Called when the driver receives task metrics from an executor in a heartbeat.
*/
def onExecutorMetricsUpdate(executorMetricsUpdate: SparkListenerExecutorMetricsUpdate) { }

/**
* Called when the driver registers a new executor.
*/
def onExecutorAdded(executorAdded: SparkListenerExecutorAdded) { }

/**
* Called when the driver removes an executor.
*/
def onExecutorRemoved(executorRemoved: SparkListenerExecutorRemoved) { }
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ private[spark] trait SparkListenerBus extends Logging {
foreachListener(_.onApplicationEnd(applicationEnd))
case metricsUpdate: SparkListenerExecutorMetricsUpdate =>
foreachListener(_.onExecutorMetricsUpdate(metricsUpdate))
case executorAdded: SparkListenerExecutorAdded =>
foreachListener(_.onExecutorAdded(executorAdded))
case executorRemoved: SparkListenerExecutorRemoved =>
foreachListener(_.onExecutorRemoved(executorRemoved))
case SparkListenerShutdown =>
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import akka.pattern.ask
import akka.remote.{DisassociatedEvent, RemotingLifecycleEvent}

import org.apache.spark.{ExecutorAllocationClient, Logging, SparkEnv, SparkException, TaskState}
import org.apache.spark.scheduler.{SchedulerBackend, SlaveLost, TaskDescription, TaskSchedulerImpl, WorkerOffer}
import org.apache.spark.scheduler._
import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
import org.apache.spark.util.{ActorLogReceive, SerializableBuffer, AkkaUtils, Utils}

Expand Down Expand Up @@ -66,6 +66,8 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val actorSyste
// Number of executors requested from the cluster manager that have not registered yet
private var numPendingExecutors = 0

private val listenerBus = scheduler.sc.listenerBus

// Executors we have requested the cluster manager to kill that have not died yet
private val executorsPendingToRemove = new HashSet[String]

Expand Down Expand Up @@ -106,6 +108,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val actorSyste
logDebug(s"Decremented number of pending executors ($numPendingExecutors left)")
}
}
listenerBus.post(SparkListenerExecutorAdded(executorId, data))
makeOffers()
}

Expand Down Expand Up @@ -213,6 +216,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val actorSyste
totalCoreCount.addAndGet(-executorInfo.totalCores)
totalRegisteredExecutors.addAndGet(-1)
scheduler.executorLost(executorId, SlaveLost(reason))
listenerBus.post(SparkListenerExecutorRemoved(executorId))
case None => logError(s"Asked to remove non-existent executor $executorId")
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import akka.actor.{Address, ActorRef}
private[cluster] class ExecutorData(
val executorActor: ActorRef,
val executorAddress: Address,
val executorHost: String ,
override val executorHost: String,
var freeCores: Int,
val totalCores: Int
)
override val totalCores: Int
) extends ExecutorInfo(executorHost, totalCores)
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.scheduler.cluster

import org.apache.spark.annotation.DeveloperApi

/**
* :: DeveloperApi ::
* Stores information about an executor to pass from the scheduler to SparkListeners.
*/
@DeveloperApi
class ExecutorInfo(
val executorHost: String,
val totalCores: Int
) {

def canEqual(other: Any): Boolean = other.isInstanceOf[ExecutorInfo]

override def equals(other: Any): Boolean = other match {
case that: ExecutorInfo =>
(that canEqual this) &&
executorHost == that.executorHost &&
totalCores == that.totalCores
case _ => false
}

override def hashCode(): Int = {
val state = Seq(executorHost, totalCores)
state.map(_.hashCode()).foldLeft(0)((a, b) => 31 * a + b)
}
}
Loading

0 comments on commit 96c2c71

Please sign in to comment.