Skip to content

Commit

Permalink
[SPARK-2086] Improve output of toDebugString to make shuffle boundari…
Browse files Browse the repository at this point in the history
…es more clear

Changes RDD.toDebugString() to show hierarchy and shuffle transformations more clearly

New output:

```
(3) FlatMappedValuesRDD[325] at apply at Transformer.scala:22
 |  MappedValuesRDD[324] at apply at Transformer.scala:22
 |  CoGroupedRDD[323] at apply at Transformer.scala:22
 +-(5) MappedRDD[320] at apply at Transformer.scala:22
 |  |  MappedRDD[319] at apply at Transformer.scala:22
 |  |  MappedValuesRDD[318] at apply at Transformer.scala:22
 |  |  MapPartitionsRDD[317] at apply at Transformer.scala:22
 |  |  ShuffledRDD[316] at apply at Transformer.scala:22
 |  +-(10) MappedRDD[315] at apply at Transformer.scala:22
 |     |   ParallelCollectionRDD[314] at apply at Transformer.scala:22
 +-(100) MappedRDD[322] at apply at Transformer.scala:22
     |   ParallelCollectionRDD[321] at apply at Transformer.scala:22
```

Author: Gregory Owen <greowen@gmail.com>

Closes apache#1364 from GregOwen/to-debug-string and squashes the following commits:

08f5c78 [Gregory Owen] toDebugString: prettier debug printing to show shuffles and joins more clearly
1603f7b [Gregory Owen] toDebugString: prettier debug printing to show shuffles and joins more clearly
  • Loading branch information
GregOwen authored and conviva-zz committed Sep 4, 2014
1 parent 629296e commit 7798b08
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 4 deletions.
52 changes: 48 additions & 4 deletions core/src/main/scala/org/apache/spark/rdd/RDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1269,11 +1269,55 @@ abstract class RDD[T: ClassTag](

/** A description of this RDD and its recursive dependencies for debugging. */
def toDebugString: String = {
def debugString(rdd: RDD[_], prefix: String = ""): Seq[String] = {
Seq(prefix + rdd + " (" + rdd.partitions.size + " partitions)") ++
rdd.dependencies.flatMap(d => debugString(d.rdd, prefix + " "))
// Apply a different rule to the last child
def debugChildren(rdd: RDD[_], prefix: String): Seq[String] = {
val len = rdd.dependencies.length
len match {
case 0 => Seq.empty
case 1 =>
val d = rdd.dependencies.head
debugString(d.rdd, prefix, d.isInstanceOf[ShuffleDependency[_,_,_]], true)
case _ =>
val frontDeps = rdd.dependencies.take(len - 1)
val frontDepStrings = frontDeps.flatMap(
d => debugString(d.rdd, prefix, d.isInstanceOf[ShuffleDependency[_,_,_]]))

val lastDep = rdd.dependencies.last
val lastDepStrings =
debugString(lastDep.rdd, prefix, lastDep.isInstanceOf[ShuffleDependency[_,_,_]], true)

(frontDepStrings ++ lastDepStrings)
}
}
// The first RDD in the dependency stack has no parents, so no need for a +-
def firstDebugString(rdd: RDD[_]): Seq[String] = {
val partitionStr = "(" + rdd.partitions.size + ")"
val leftOffset = (partitionStr.length - 1) / 2
val nextPrefix = (" " * leftOffset) + "|" + (" " * (partitionStr.length - leftOffset))
Seq(partitionStr + " " + rdd) ++ debugChildren(rdd, nextPrefix)
}
def shuffleDebugString(rdd: RDD[_], prefix: String = "", isLastChild: Boolean): Seq[String] = {
val partitionStr = "(" + rdd.partitions.size + ")"
val leftOffset = (partitionStr.length - 1) / 2
val thisPrefix = prefix.replaceAll("\\|\\s+$", "")
val nextPrefix = (
thisPrefix
+ (if (isLastChild) " " else "| ")
+ (" " * leftOffset) + "|" + (" " * (partitionStr.length - leftOffset)))
Seq(thisPrefix + "+-" + partitionStr + " " + rdd) ++ debugChildren(rdd, nextPrefix)
}
def debugString(rdd: RDD[_],
prefix: String = "",
isShuffle: Boolean = true,
isLastChild: Boolean = false): Seq[String] = {
if (isShuffle) {
shuffleDebugString(rdd, prefix, isLastChild)
}
else {
Seq(prefix + rdd) ++ debugChildren(rdd, prefix)
}
}
debugString(this).mkString("\n")
firstDebugString(this).mkString("\n")
}

override def toString: String = "%s%s[%d] at %s".format(
Expand Down
8 changes: 8 additions & 0 deletions project/MimaExcludes.scala
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,14 @@ object MimaExcludes {
"org.apache.spark.api.java.JavaDoubleRDD.countApproxDistinct$default$1"),
ProblemFilters.exclude[MissingMethodProblem](
"org.apache.spark.storage.MemoryStore.Entry"),
ProblemFilters.exclude[MissingMethodProblem](
"org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$debugChildren$1"),
ProblemFilters.exclude[MissingMethodProblem](
"org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$firstDebugString$1"),
ProblemFilters.exclude[MissingMethodProblem](
"org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$shuffleDebugString$1"),
ProblemFilters.exclude[MissingMethodProblem](
"org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$debugString$1"),
ProblemFilters.exclude[MissingMethodProblem](
"org.apache.spark.rdd.PairRDDFunctions.org$apache$spark$rdd$PairRDDFunctions$$"
+ "createZero$1")
Expand Down

0 comments on commit 7798b08

Please sign in to comment.