Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-15232][SQL] Add subquery SQL building tests to LogicalPlanToSQLSuite #14383

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
SELECT /*+ MAPJOIN(srcpart) */ subq.key1, z.value
FROM (SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
FROM src1 x JOIN src y ON (x.key = y.key)) subq
JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11)
ORDER BY subq.key1, z.value
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key1`, `gen_attr_1` AS `value` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_5` AS `gen_attr_0`, `gen_attr_7` AS `gen_attr_6`, `gen_attr_9` AS `gen_attr_8`, `gen_attr_11` AS `gen_attr_10` FROM (SELECT `key` AS `gen_attr_5`, `value` AS `gen_attr_7` FROM `default`.`src1`) AS gen_subquery_0 INNER JOIN (SELECT `key` AS `gen_attr_9`, `value` AS `gen_attr_11` FROM `default`.`src`) AS gen_subquery_1 ON (`gen_attr_5` = `gen_attr_9`)) AS subq INNER JOIN (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_1`, `ds` AS `gen_attr_3`, `hr` AS `gen_attr_4` FROM `default`.`srcpart`) AS gen_subquery_2 ON (((`gen_attr_0` = `gen_attr_2`) AND (`gen_attr_3` = "2008-04-08")) AND (CAST(`gen_attr_4` AS DOUBLE) = CAST(11 AS DOUBLE))) ORDER BY `gen_attr_0` ASC, `gen_attr_1` ASC) AS gen_subquery_3
8 changes: 8 additions & 0 deletions sql/hive/src/test/resources/sqlgen/subq2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
SELECT a.k, a.c
FROM (SELECT b.key as k, count(1) as c
FROM src b
GROUP BY b.key) a
WHERE a.k >= 90
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `k`, `gen_attr_1` AS `c` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_2` AS `gen_attr_0`, count(1) AS `gen_attr_1` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_3` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_2`) AS a WHERE (`gen_attr_0` >= 90)) AS a
8 changes: 8 additions & 0 deletions sql/hive/src/test/resources/sqlgen/subquery_exists_1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select *
from src b
where exists (select a.key
from src a
where b.value = a.value and a.key = b.key and a.value > 'val_9')
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_0 WHERE EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_3`, `gen_attr_2` FROM (SELECT `key` AS `gen_attr_3`, `value` AS `gen_attr_2` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_2` > "val_9")) AS gen_subquery_1 WHERE ((`gen_attr_1` = `gen_attr_2`) AND (`gen_attr_3` = `gen_attr_0`))) AS gen_subquery_3)) AS b
9 changes: 9 additions & 0 deletions sql/hive/src/test/resources/sqlgen/subquery_exists_2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select *
from (select *
from src b
where exists (select a.key
from src a
where b.value = a.value and a.key = b.key and a.value > 'val_9')) a
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_0 WHERE EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_3`, `gen_attr_2` FROM (SELECT `key` AS `gen_attr_3`, `value` AS `gen_attr_2` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_2` > "val_9")) AS gen_subquery_1 WHERE ((`gen_attr_1` = `gen_attr_2`) AND (`gen_attr_3` = `gen_attr_0`))) AS gen_subquery_3)) AS a) AS a
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select b.key, count(*)
from src b
group by b.key
having exists (select a.key
from src a
where a.key = b.key and a.value > 'val_9')
--------------------------------------------------------------------------------
SELECT `gen_attr_1` AS `key`, `gen_attr_2` AS `count(1)` FROM (SELECT `gen_attr_1`, count(1) AS `gen_attr_2` FROM (SELECT `key` AS `gen_attr_1`, `value` AS `gen_attr_3` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_1` HAVING EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_0` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_5` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_5` > "val_9")) AS gen_subquery_1 WHERE (`gen_attr_0` = `gen_attr_1`)) AS gen_subquery_3)) AS b
10 changes: 10 additions & 0 deletions sql/hive/src/test/resources/sqlgen/subquery_exists_having_2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select *
from (select b.key, count(*)
from src b
group by b.key
having exists (select a.key
from src a
where a.key = b.key and a.value > 'val_9')) a
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `count(1)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, count(1) AS `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_3` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_0` HAVING EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_2` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_5` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_5` > "val_9")) AS gen_subquery_1 WHERE (`gen_attr_2` = `gen_attr_0`)) AS gen_subquery_3)) AS a) AS a
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select b.key, min(b.value)
from src b
group by b.key
having exists (select a.key
from src a
where a.value > 'val_9' and a.value = min(b.value))
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `min(value)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, min(`gen_attr_4`) AS `gen_attr_1`, min(`gen_attr_4`) AS `gen_attr_3` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_4` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_0` HAVING EXISTS(SELECT `gen_attr_5` AS `1` FROM (SELECT 1 AS `gen_attr_5` FROM (SELECT `gen_attr_6`, `gen_attr_2` FROM (SELECT `key` AS `gen_attr_6`, `value` AS `gen_attr_2` FROM `default`.`src`) AS gen_subquery_3 WHERE (`gen_attr_2` > "val_9")) AS gen_subquery_2 WHERE (`gen_attr_2` = `gen_attr_3`)) AS gen_subquery_4)) AS gen_subquery_1) AS b
6 changes: 6 additions & 0 deletions sql/hive/src/test/resources/sqlgen/subquery_in.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
SELECT key
FROM src
WHERE key in (SELECT max(key) FROM src)
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key` FROM (SELECT `gen_attr_0` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_2` FROM `default`.`src`) AS gen_subquery_0 WHERE (`gen_attr_0` IN (SELECT `gen_attr_3` AS `_c0` FROM (SELECT `gen_attr_1` AS `gen_attr_3` FROM (SELECT max(`gen_attr_4`) AS `gen_attr_1` FROM (SELECT `key` AS `gen_attr_4`, `value` AS `gen_attr_5` FROM `default`.`src`) AS gen_subquery_2) AS gen_subquery_1) AS gen_subquery_3))) AS src
8 changes: 8 additions & 0 deletions sql/hive/src/test/resources/sqlgen/subquery_in_having_1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select key, count(*)
from src
group by key
having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key)
order by key
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `count(1)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, count(1) AS `gen_attr_1`, count(1) AS `gen_attr_2` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_4` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_0` HAVING (`gen_attr_2` IN (SELECT `gen_attr_5` AS `_c0` FROM (SELECT `gen_attr_3` AS `gen_attr_5` FROM (SELECT count(1) AS `gen_attr_3` FROM (SELECT `key` AS `gen_attr_6`, `value` AS `gen_attr_7` FROM `default`.`src`) AS gen_subquery_3 WHERE (CAST(`gen_attr_6` AS DOUBLE) = CAST("90" AS DOUBLE)) GROUP BY `gen_attr_6`) AS gen_subquery_2) AS gen_subquery_4))) AS gen_subquery_1 ORDER BY `gen_attr_0` ASC) AS src
10 changes: 10 additions & 0 deletions sql/hive/src/test/resources/sqlgen/subquery_in_having_2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select b.key, min(b.value)
from src b
group by b.key
having b.key in (select a.key
from src a
where a.value > 'val_9' and a.value = min(b.value))
order by b.key
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `min(value)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, min(`gen_attr_5`) AS `gen_attr_1`, min(`gen_attr_5`) AS `gen_attr_4` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_5` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_0` HAVING (struct(`gen_attr_0`, `gen_attr_4`) IN (SELECT `gen_attr_6` AS `_c0`, `gen_attr_7` AS `_c1` FROM (SELECT `gen_attr_2` AS `gen_attr_6`, `gen_attr_3` AS `gen_attr_7` FROM (SELECT `gen_attr_2`, `gen_attr_3` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_3` FROM `default`.`src`) AS gen_subquery_3 WHERE (`gen_attr_3` > "val_9")) AS gen_subquery_2) AS gen_subquery_4))) AS gen_subquery_1 ORDER BY `gen_attr_0` ASC) AS b
8 changes: 8 additions & 0 deletions sql/hive/src/test/resources/sqlgen/subquery_not_exists_1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select *
from src b
where not exists (select a.key
from src a
where b.value = a.value and a.key = b.key and a.value > 'val_2')
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_0 WHERE (NOT EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_3`, `gen_attr_2` FROM (SELECT `key` AS `gen_attr_3`, `value` AS `gen_attr_2` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_2` > "val_2")) AS gen_subquery_1 WHERE ((`gen_attr_1` = `gen_attr_2`) AND (`gen_attr_3` = `gen_attr_0`))) AS gen_subquery_3))) AS b
8 changes: 8 additions & 0 deletions sql/hive/src/test/resources/sqlgen/subquery_not_exists_2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select *
from src b
where not exists (select a.key
from src a
where b.value = a.value and a.value > 'val_2')
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_0 WHERE (NOT EXISTS(SELECT `gen_attr_3` AS `1` FROM (SELECT 1 AS `gen_attr_3` FROM (SELECT `gen_attr_4`, `gen_attr_2` FROM (SELECT `key` AS `gen_attr_4`, `value` AS `gen_attr_2` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_2` > "val_2")) AS gen_subquery_1 WHERE (`gen_attr_1` = `gen_attr_2`)) AS gen_subquery_3))) AS b
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select *
from src b
group by key, value
having not exists (select a.key
from src a
where b.value = a.value and a.key = b.key and a.value > 'val_12')
--------------------------------------------------------------------------------
SELECT `gen_attr_3` AS `key`, `gen_attr_0` AS `value` FROM (SELECT `gen_attr_3`, `gen_attr_0` FROM (SELECT `key` AS `gen_attr_3`, `value` AS `gen_attr_0` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_3`, `gen_attr_0` HAVING (NOT EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_2`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_1` > "val_12")) AS gen_subquery_1 WHERE ((`gen_attr_0` = `gen_attr_1`) AND (`gen_attr_2` = `gen_attr_3`))) AS gen_subquery_3))) AS b
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select *
from src b
group by key, value
having not exists (select distinct a.key
from src a
where b.value = a.value and a.value > 'val_12')
--------------------------------------------------------------------------------
SELECT `gen_attr_2` AS `key`, `gen_attr_0` AS `value` FROM (SELECT `gen_attr_2`, `gen_attr_0` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_0` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_2`, `gen_attr_0` HAVING (NOT EXISTS(SELECT `gen_attr_3` AS `1` FROM (SELECT 1 AS `gen_attr_3` FROM (SELECT DISTINCT `gen_attr_4`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_4`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_1` > "val_12")) AS gen_subquery_1 WHERE (`gen_attr_0` = `gen_attr_1`)) AS gen_subquery_3))) AS b
Original file line number Diff line number Diff line change
Expand Up @@ -934,6 +934,169 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
}
}

test("broadcast join") {
checkSQL(
"""
|SELECT /*+ MAPJOIN(srcpart) */ subq.key1, z.value
|FROM (SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
| FROM src1 x JOIN src y ON (x.key = y.key)) subq
|JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11)
|ORDER BY subq.key1, z.value
""".stripMargin,
"broadcast_join_subquery")
}

test("subquery using single table") {
checkSQL(
"""
|SELECT a.k, a.c
|FROM (SELECT b.key as k, count(1) as c
| FROM src b
| GROUP BY b.key) a
|WHERE a.k >= 90
""".stripMargin,
"subq2")
}

test("correlated subqueries using EXISTS on where clause") {
checkSQL(
"""
|select *
|from src b
|where exists (select a.key
| from src a
| where b.value = a.value and a.key = b.key and a.value > 'val_9')
""".stripMargin,
"subquery_exists_1")

checkSQL(
"""
|select *
|from (select *
| from src b
| where exists (select a.key
| from src a
| where b.value = a.value and a.key = b.key and a.value > 'val_9')) a
""".stripMargin,
"subquery_exists_2")
}

test("correlated subqueries using EXISTS on having clause") {
checkSQL(
"""
|select b.key, count(*)
|from src b
|group by b.key
|having exists (select a.key
| from src a
| where a.key = b.key and a.value > 'val_9')
""".stripMargin,
"subquery_exists_having_1")

checkSQL(
"""
|select *
|from (select b.key, count(*)
| from src b
| group by b.key
| having exists (select a.key
| from src a
| where a.key = b.key and a.value > 'val_9')) a
""".stripMargin,
"subquery_exists_having_2")

checkSQL(
"""
|select b.key, min(b.value)
|from src b
|group by b.key
|having exists (select a.key
| from src a
| where a.value > 'val_9' and a.value = min(b.value))
""".stripMargin,
"subquery_exists_having_3")
}

test("correlated subqueries using NOT EXISTS on where clause") {
checkSQL(
"""
|select *
|from src b
|where not exists (select a.key
| from src a
| where b.value = a.value and a.key = b.key and a.value > 'val_2')
""".stripMargin,
"subquery_not_exists_1")

checkSQL(
"""
|select *
|from src b
|where not exists (select a.key
| from src a
| where b.value = a.value and a.value > 'val_2')
""".stripMargin,
"subquery_not_exists_2")
}

test("correlated subqueries using NOT EXISTS on having clause") {
checkSQL(
"""
|select *
|from src b
|group by key, value
|having not exists (select a.key
| from src a
| where b.value = a.value and a.key = b.key and a.value > 'val_12')
""".stripMargin,
"subquery_not_exists_having_1")

checkSQL(
"""
|select *
|from src b
|group by key, value
|having not exists (select distinct a.key
| from src a
| where b.value = a.value and a.value > 'val_12')
""".stripMargin,
"subquery_not_exists_having_2")
}

test("subquery using IN on where clause") {
checkSQL(
"""
|SELECT key
|FROM src
|WHERE key in (SELECT max(key) FROM src)
""".stripMargin,
"subquery_in")
}

test("subquery using IN on having clause") {
checkSQL(
"""
|select key, count(*)
|from src
|group by key
|having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key)
|order by key
""".stripMargin,
"subquery_in_having_1")

checkSQL(
"""
|select b.key, min(b.value)
|from src b
|group by b.key
|having b.key in (select a.key
| from src a
| where a.value > 'val_9' and a.value = min(b.value))
|order by b.key
""".stripMargin,
"subquery_in_having_2")
}

test("SPARK-14933 - select orc table") {
withTable("orc_t") {
sql("create table orc_t stored as orc as select 1 as c1, 'abc' as c2")
Expand Down