diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala index b815e1f2cd2fd..4b767e35ea32a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala @@ -63,12 +63,14 @@ case class Union(children: Seq[SparkPlan])(@transient sc: SparkContext) extends } /** - * Take the first limit elements. + * Take the first limit elements. Note that the implementation is different depending on whether + * this is a terminal operator or not. If it is terminal and is invoked using executeCollect, + * this operator uses Spark's take method on the Spark driver. If it is not terminal or is + * invoked using execute, we first take the limit on each partition, and then repartition all the + * data to a single partition to compute the global limit. */ case class Limit(limit: Int, child: SparkPlan)(@transient sc: SparkContext) extends UnaryNode { override def otherCopyArgs = sc :: Nil - // Note that the implementation is different depending on - // whether this is a terminal operator or not. override def output = child.output