Skip to content

Commit

Permalink
Further address the comments
Browse files Browse the repository at this point in the history
Change-Id: I5eba16903914932392e05ba56c27808c36b033b3
  • Loading branch information
jerryshao committed May 31, 2017
1 parent 8b16017 commit 1e3fb8a
Showing 1 changed file with 19 additions and 6 deletions.
25 changes: 19 additions & 6 deletions core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import java.util.{Arrays, Comparator, Date, Locale}
import java.util.concurrent.ConcurrentHashMap

import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.util.control.NonFatal

import com.google.common.primitives.Longs
Expand Down Expand Up @@ -148,13 +149,25 @@ class SparkHadoopUtil extends Logging {
private[spark] def getFSBytesReadOnThreadCallback(): () => Long = {
val f = () => FileSystem.getAllStatistics.asScala.map(_.getThreadStatistics.getBytesRead).sum
val baseline = (Thread.currentThread().getId, f())
val bytesReadMap = new ConcurrentHashMap[Long, Long]()

() => {
bytesReadMap.put(Thread.currentThread().getId, f())
bytesReadMap.asScala.map { case (k, v) =>
v - (if (k == baseline._1) baseline._2 else 0)
}.sum
new Function0[Long] {
private val bytesReadMap = new mutable.HashMap[Long, Long]()

/**
* Returns a function that can be called to calculate Hadoop FileSystem bytes read.
* This function may be called in both spawned child threads and parent task thread (in
* PythonRDD), and Hadoop FileSystem uses thread local variables to track the statistics.
* So we need a map to track the bytes read from the child threads and parent thread,
* summing them together to get the bytes read of this task.
*/
override def apply(): Long = {
bytesReadMap.synchronized {
bytesReadMap.put(Thread.currentThread().getId, f())
bytesReadMap.map { case (k, v) =>
v - (if (k == baseline._1) baseline._2 else 0)
}.sum
}
}
}
}

Expand Down

0 comments on commit 1e3fb8a

Please sign in to comment.