From 754085f2c35f385ad743202818a0eeeefc1bc714 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@apache.org>
Date: Wed, 16 Jul 2014 16:56:55 -0700
Subject: [PATCH] Explain why broadcasting serialized copy of the task.

---
 core/src/main/scala/org/apache/spark/rdd/RDD.scala | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 0fda13b3a6823..ff355546989d2 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -1207,8 +1207,10 @@ abstract class RDD[T: ClassTag](
   // =======================================================================
 
   /**
-   * Broadcasted copy of this RDD, used to dispatch tasks to executors. Note that this is
-   * a lazy val so the broadcast is created only when tasks are scheduled on this RDD.
+   * Broadcasted copy of this RDD, used to dispatch tasks to executors. Note that we broadcast
+   * the serialized copy of the RDD and for each task we will deserialize it, which means each
+   * task gets a different copy of the RDD. This provides stronger isolation between tasks that
+   * might modify state of objects referenced in their closures.
    */
   @transient private[spark] lazy val broadcasted = {
     val ser = SparkEnv.get.closureSerializer.newInstance()