Use special prefix for stage clusters to avoid collisions

Right now stage clusters assume the ID space of integers. This is not valid, however, after we merge with dag-viz-streaming, where the cluster ID may just be the operation ID. The result is that certain stage clusters disappear on the UI. This patch by itself doesn't fix anything noticeable. However, it does guard against the potential of collision in future changes.
apache · May 15, 2015 · 762b541 · 762b541
1 parent 51c95b9
commit 762b541
Show file tree

Hide file tree

Showing 4 changed files with 15 additions and 8 deletions.
diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
@@ -190,9 +190,10 @@ function renderDagVizForJob(svgContainer) {
         .attr("skipped", "true");
     } else {
       // Link each graph to the corresponding stage page (TODO: handle stage attempts)
+      // Use the link from the stage table so it also works for the history server
       var attemptId = 0
-      var stageLink = $("#stage-" + stageId + "-" + attemptId)
-        .find("a")
+      var stageLink = d3.select("#stage-" + stageId + "-" + attemptId)
+        .select("a")
         .attr("href") + "&expandDagViz=true";
       container = svgContainer
         .append("a")

diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -355,8 +355,9 @@ private[spark] object UIUtils extends Logging {
       <div id="dag-viz-metadata" style="display:none">
         {
           graphs.map { g =>
+            val stageId = g.rootCluster.id.replaceAll(RDDOperationGraph.STAGE_CLUSTER_PREFIX, "")
             val skipped = g.rootCluster.name.contains("skipped").toString
-            <div class="stage-metadata" stage-id={g.rootCluster.id} skipped={skipped}>
+            <div class="stage-metadata" stage-id={stageId} skipped={skipped}>
               <div class="dot-file">{RDDOperationGraph.makeDotFile(g)}</div>
               { g.incomingEdges.map { e => <div class="incoming-edge">{e.fromId},{e.toId}</div> } }
               { g.outgoingEdges.map { e => <div class="outgoing-edge">{e.fromId},{e.toId}</div> } }

diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
@@ -74,6 +74,8 @@ private[ui] class RDDOperationCluster(val id: String, private var _name: String)
 
 private[ui] object RDDOperationGraph extends Logging {
 
+  val STAGE_CLUSTER_PREFIX = "stage_"
+
   /**
    * Construct a RDDOperationGraph for a given stage.
    *
@@ -91,7 +93,8 @@ private[ui] object RDDOperationGraph extends Logging {
     val clusters = new mutable.HashMap[String, RDDOperationCluster] // indexed by cluster ID
 
     // Root cluster is the stage cluster
-    val stageClusterId = stage.stageId.toString
+    // Use a special prefix here to differentiate this cluster from other operation clusters
+    val stageClusterId = STAGE_CLUSTER_PREFIX + stage.stageId
     val stageClusterName = s"Stage ${stage.stageId}" +
       { if (stage.attemptId == 0) "" else s" (attempt ${stage.attemptId})" }
     val rootCluster = new RDDOperationCluster(stageClusterId, stageClusterName)

diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala
@@ -57,10 +57,12 @@ private[ui] class RDDOperationGraphListener(conf: SparkConf) extends SparkListen
       .getOrElse(Seq.empty)
       .flatMap { sid => stageIdToGraph.get(sid) }
     // Mark any skipped stages as such
-    graphs
-      .filter { g => skippedStageIds.contains(g.rootCluster.id.toInt) }
-      .filter { g => !g.rootCluster.name.contains("skipped") }
-      .foreach { g => g.rootCluster.setName(g.rootCluster.name + " (skipped)") }
+    graphs.foreach { g =>
+      val stageId = g.rootCluster.id.replaceAll(RDDOperationGraph.STAGE_CLUSTER_PREFIX, "").toInt
+      if (skippedStageIds.contains(stageId) && !g.rootCluster.name.contains("skipped")) {
+        g.rootCluster.setName(g.rootCluster.name + " (skipped)")
+      }
+    }
     graphs
   }