From 729a8e25a01ead3815ab36c11b81382f4413247e Mon Sep 17 00:00:00 2001 From: Zongheng Yang Date: Mon, 14 Jul 2014 11:49:29 -0700 Subject: [PATCH] Update docs to be more explicit. --- .../apache/spark/sql/execution/SparkStrategies.scala | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index 0b78432ab2017..64d246346dd86 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -52,8 +52,10 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] { * the two join sides. When planning a [[execution.BroadcastHashJoin]], if one side has an * estimated physical size smaller than the user-settable threshold * `spark.sql.auto.convert.join.size`, the planner would mark it as the ''build'' relation and - * mark the other relation as the ''stream'' side. If both estimates exceed the threshold, - * they will instead be used to decide the build side in a [[execution.ShuffledHashJoin]]. + * mark the other relation as the ''stream'' side. The build table will be ''broadcasted'' to + * all of the executors involved in the join, as a [[org.apache.spark.broadcast.Broadcast]] + * object. If both estimates exceed the threshold, they will instead be used to decide the build + * side in a [[execution.ShuffledHashJoin]]. */ object HashJoin extends Strategy with PredicateHelper { private[this] def broadcastHashJoin( @@ -144,11 +146,6 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] { } } - /** - * This strategy applies a simple optimization based on the estimates of the physical sizes of - * the two join sides: the planner would mark the relation with the smaller estimated physical - * size as the ''build'' (broadcast) relation and mark the other as the ''stream'' relation. - */ object BroadcastNestedLoopJoin extends Strategy { def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { case logical.Join(left, right, joinType, condition) =>