Skip to content

Commit

Permalink
[SPARK-23316][SQL] AnalysisException after max iteration reached for …
Browse files Browse the repository at this point in the history
…IN query

## What changes were proposed in this pull request?
Added flag ignoreNullability to DataType.equalsStructurally.
The previous semantic is for ignoreNullability=false.
When ignoreNullability=true equalsStructurally ignores nullability of contained types (map key types, value types, array element types, structure field types).
In.checkInputTypes calls equalsStructurally to check if the children types match. They should match regardless of nullability (which is just a hint), so it is now called with ignoreNullability=true.

## How was this patch tested?
New test in SubquerySuite

Author: Bogdan Raducanu <bogdan@databricks.com>

Closes #20548 from bogdanrdc/SPARK-23316.

(cherry picked from commit 05d0512)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
  • Loading branch information
bogdanrdc authored and gatorsmile committed Feb 13, 2018
1 parent dbb1b39 commit ab01ba7
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,8 @@ case class In(value: Expression, list: Seq[Expression]) extends Predicate {
require(list != null, "list should not be null")

override def checkInputDataTypes(): TypeCheckResult = {
val mismatchOpt = list.find(l => !DataType.equalsStructurally(l.dataType, value.dataType))
val mismatchOpt = list.find(l => !DataType.equalsStructurally(l.dataType, value.dataType,
ignoreNullability = true))
if (mismatchOpt.isDefined) {
list match {
case ListQuery(_, _, _, childOutputs) :: Nil =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -295,25 +295,31 @@ object DataType {
}

/**
* Returns true if the two data types share the same "shape", i.e. the types (including
* nullability) are the same, but the field names don't need to be the same.
* Returns true if the two data types share the same "shape", i.e. the types
* are the same, but the field names don't need to be the same.
*
* @param ignoreNullability whether to ignore nullability when comparing the types
*/
def equalsStructurally(from: DataType, to: DataType): Boolean = {
def equalsStructurally(
from: DataType,
to: DataType,
ignoreNullability: Boolean = false): Boolean = {
(from, to) match {
case (left: ArrayType, right: ArrayType) =>
equalsStructurally(left.elementType, right.elementType) &&
left.containsNull == right.containsNull
(ignoreNullability || left.containsNull == right.containsNull)

case (left: MapType, right: MapType) =>
equalsStructurally(left.keyType, right.keyType) &&
equalsStructurally(left.valueType, right.valueType) &&
left.valueContainsNull == right.valueContainsNull
(ignoreNullability || left.valueContainsNull == right.valueContainsNull)

case (StructType(fromFields), StructType(toFields)) =>
fromFields.length == toFields.length &&
fromFields.zip(toFields)
.forall { case (l, r) =>
equalsStructurally(l.dataType, r.dataType) && l.nullable == r.nullable
equalsStructurally(l.dataType, r.dataType) &&
(ignoreNullability || l.nullable == r.nullable)
}

case (fromDataType, toDataType) => fromDataType == toDataType
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -950,4 +950,9 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
assert(join.duplicateResolved)
assert(optimizedPlan.resolved)
}

test("SPARK-23316: AnalysisException after max iteration reached for IN query") {
// before the fix this would throw AnalysisException
spark.range(10).where("(id,id) in (select id, null from range(3))").count
}
}

0 comments on commit ab01ba7

Please sign in to comment.