Skip to content

Commit

Permalink
[SPARK-17863][SQL] should not add column into Distinct
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

We are trying to resolve the attribute in sort by pulling up some column for grandchild into child, but that's wrong when the child is Distinct, because the added column will change the behavior of Distinct, we should not do that.

## How was this patch tested?

Added regression test.

Author: Davies Liu <davies@databricks.com>

Closes #15489 from davies/order_distinct.
  • Loading branch information
Davies Liu authored and yhuai committed Oct 14, 2016
1 parent 522dd0d commit da9aeb0
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,8 @@ class Analyzer(
// attributes that its child might have or could have.
val missing = missingAttrs -- g.child.outputSet
g.copy(join = true, child = addMissingAttr(g.child, missing))
case d: Distinct =>
throw new AnalysisException(s"Can't add $missingAttrs to $d")
case u: UnaryNode =>
u.withNewChildren(addMissingAttr(u.child, missingAttrs) :: Nil)
case other =>
Expand Down
24 changes: 24 additions & 0 deletions sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1106,6 +1106,30 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
)
}

test("SPARK-17863: SELECT distinct does not work correctly if order by missing attribute") {
checkAnswer(
sql("""select distinct struct.a, struct.b
|from (
| select named_struct('a', 1, 'b', 2, 'c', 3) as struct
| union all
| select named_struct('a', 1, 'b', 2, 'c', 4) as struct) tmp
|order by a, b
|""".stripMargin),
Row(1, 2) :: Nil)

val error = intercept[AnalysisException] {
sql("""select distinct struct.a, struct.b
|from (
| select named_struct('a', 1, 'b', 2, 'c', 3) as struct
| union all
| select named_struct('a', 1, 'b', 2, 'c', 4) as struct) tmp
|order by struct.a, struct.b
|""".stripMargin)
}
assert(error.message contains "cannot resolve '`struct.a`' given input columns: [a, b]")

}

test("cast boolean to string") {
// TODO Ensure true/false string letter casing is consistent with Hive in all cases.
checkAnswer(
Expand Down

0 comments on commit da9aeb0

Please sign in to comment.