Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-15232][SQL] Add subquery SQL building tests to LogicalPlanToSQLSuite #14383

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions sql/hive/src/test/resources/sqlgen/mapjoin_subquery.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
SELECT subq.key1, z.value
FROM (SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
FROM src1 x JOIN src y ON (x.key = y.key)) subq
JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11)
ORDER BY subq.key1, z.value
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key1`, `gen_attr_1` AS `value` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_5` AS `gen_attr_0`, `gen_attr_7` AS `gen_attr_6`, `gen_attr_9` AS `gen_attr_8`, `gen_attr_11` AS `gen_attr_10` FROM (SELECT `key` AS `gen_attr_5`, `value` AS `gen_attr_7` FROM `default`.`src1`) AS gen_subquery_0 INNER JOIN (SELECT `key` AS `gen_attr_9`, `value` AS `gen_attr_11` FROM `default`.`src`) AS gen_subquery_1 ON (`gen_attr_5` = `gen_attr_9`)) AS subq INNER JOIN (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_1`, `ds` AS `gen_attr_3`, `hr` AS `gen_attr_4` FROM `default`.`srcpart`) AS gen_subquery_2 ON (((`gen_attr_0` = `gen_attr_2`) AND (`gen_attr_3` = "2008-04-08")) AND (CAST(`gen_attr_4` AS DOUBLE) = CAST(11 AS DOUBLE))) ORDER BY `gen_attr_0` ASC, `gen_attr_1` ASC) AS gen_subquery_3
8 changes: 8 additions & 0 deletions sql/hive/src/test/resources/sqlgen/subq2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
SELECT a.k, a.c
FROM (SELECT b.key as k, count(1) as c
FROM src b
GROUP BY b.key) a
WHERE a.k >= 90
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `k`, `gen_attr_1` AS `c` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_2` AS `gen_attr_0`, count(1) AS `gen_attr_1` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_3` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_2`) AS a WHERE (`gen_attr_0` >= 90)) AS a
8 changes: 8 additions & 0 deletions sql/hive/src/test/resources/sqlgen/subquery_exists_1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select *
from src b
where exists (select a.key
from src a
where b.value = a.value and a.key = b.key and a.value > 'val_9')
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_0 WHERE EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_3`, `gen_attr_2` FROM (SELECT `key` AS `gen_attr_3`, `value` AS `gen_attr_2` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_2` > "val_9")) AS gen_subquery_1 WHERE ((`gen_attr_1` = `gen_attr_2`) AND (`gen_attr_3` = `gen_attr_0`))) AS gen_subquery_3)) AS b
9 changes: 9 additions & 0 deletions sql/hive/src/test/resources/sqlgen/subquery_exists_2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select *
from (select *
from src b
where exists (select a.key
from src a
where b.value = a.value and a.key = b.key and a.value > 'val_9')) a
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_0 WHERE EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_3`, `gen_attr_2` FROM (SELECT `key` AS `gen_attr_3`, `value` AS `gen_attr_2` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_2` > "val_9")) AS gen_subquery_1 WHERE ((`gen_attr_1` = `gen_attr_2`) AND (`gen_attr_3` = `gen_attr_0`))) AS gen_subquery_3)) AS a) AS a
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select b.key, count(*)
from src b
group by b.key
having exists (select a.key
from src a
where a.key = b.key and a.value > 'val_9')
--------------------------------------------------------------------------------
SELECT `gen_attr_1` AS `key`, `gen_attr_2` AS `count(1)` FROM (SELECT `gen_attr_1`, count(1) AS `gen_attr_2` FROM (SELECT `key` AS `gen_attr_1`, `value` AS `gen_attr_3` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_1` HAVING EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_0` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_5` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_5` > "val_9")) AS gen_subquery_1 WHERE (`gen_attr_0` = `gen_attr_1`)) AS gen_subquery_3)) AS b
10 changes: 10 additions & 0 deletions sql/hive/src/test/resources/sqlgen/subquery_exists_having_2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select *
from (select b.key, count(*)
from src b
group by b.key
having exists (select a.key
from src a
where a.key = b.key and a.value > 'val_9')) a
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `count(1)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, count(1) AS `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_3` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_0` HAVING EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_2` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_5` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_5` > "val_9")) AS gen_subquery_1 WHERE (`gen_attr_2` = `gen_attr_0`)) AS gen_subquery_3)) AS a) AS a
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select b.key, min(b.value)
from src b
group by b.key
having exists (select a.key
from src a
where a.value > 'val_9' and a.value = min(b.value))
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `min(value)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, min(`gen_attr_4`) AS `gen_attr_1`, min(`gen_attr_4`) AS `gen_attr_3` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_4` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_0` HAVING EXISTS(SELECT `gen_attr_5` AS `1` FROM (SELECT 1 AS `gen_attr_5` FROM (SELECT `gen_attr_6`, `gen_attr_2` FROM (SELECT `key` AS `gen_attr_6`, `value` AS `gen_attr_2` FROM `default`.`src`) AS gen_subquery_3 WHERE (`gen_attr_2` > "val_9")) AS gen_subquery_2 WHERE (`gen_attr_2` = `gen_attr_3`)) AS gen_subquery_4)) AS gen_subquery_1) AS b
8 changes: 8 additions & 0 deletions sql/hive/src/test/resources/sqlgen/subquery_in_having_1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select key, count(*)
from src
group by key
having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key)
order by key
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `count(1)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, count(1) AS `gen_attr_1`, count(1) AS `gen_attr_2` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_4` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_0` HAVING (`gen_attr_2` IN (SELECT `gen_attr_5` AS `_c0` FROM (SELECT `gen_attr_3` AS `gen_attr_5` FROM (SELECT count(1) AS `gen_attr_3` FROM (SELECT `key` AS `gen_attr_6`, `value` AS `gen_attr_7` FROM `default`.`src`) AS gen_subquery_3 WHERE (CAST(`gen_attr_6` AS DOUBLE) = CAST("90" AS DOUBLE)) GROUP BY `gen_attr_6`) AS gen_subquery_2) AS gen_subquery_4))) AS gen_subquery_1 ORDER BY `gen_attr_0` ASC) AS src
10 changes: 10 additions & 0 deletions sql/hive/src/test/resources/sqlgen/subquery_in_having_2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select b.key, min(b.value)
from src b
group by b.key
having b.key in (select a.key
from src a
where a.value > 'val_9' and a.value = min(b.value))
order by b.key
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `min(value)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, min(`gen_attr_5`) AS `gen_attr_1`, min(`gen_attr_5`) AS `gen_attr_4` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_5` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_0` HAVING (struct(`gen_attr_0`, `gen_attr_4`) IN (SELECT `gen_attr_6` AS `_c0`, `gen_attr_7` AS `_c1` FROM (SELECT `gen_attr_2` AS `gen_attr_6`, `gen_attr_3` AS `gen_attr_7` FROM (SELECT `gen_attr_2`, `gen_attr_3` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_3` FROM `default`.`src`) AS gen_subquery_3 WHERE (`gen_attr_3` > "val_9")) AS gen_subquery_2) AS gen_subquery_4))) AS gen_subquery_1 ORDER BY `gen_attr_0` ASC) AS b
8 changes: 8 additions & 0 deletions sql/hive/src/test/resources/sqlgen/subquery_notexists_1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select *
from src b
where not exists (select a.key
from src a
where b.value = a.value and a.key = b.key and a.value > 'val_2')
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_0 WHERE (NOT EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_3`, `gen_attr_2` FROM (SELECT `key` AS `gen_attr_3`, `value` AS `gen_attr_2` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_2` > "val_2")) AS gen_subquery_1 WHERE ((`gen_attr_1` = `gen_attr_2`) AND (`gen_attr_3` = `gen_attr_0`))) AS gen_subquery_3))) AS b
8 changes: 8 additions & 0 deletions sql/hive/src/test/resources/sqlgen/subquery_notexists_2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select *
from src b
where not exists (select a.key
from src a
where b.value = a.value and a.value > 'val_2')
--------------------------------------------------------------------------------
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_0 WHERE (NOT EXISTS(SELECT `gen_attr_3` AS `1` FROM (SELECT 1 AS `gen_attr_3` FROM (SELECT `gen_attr_4`, `gen_attr_2` FROM (SELECT `key` AS `gen_attr_4`, `value` AS `gen_attr_2` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_2` > "val_2")) AS gen_subquery_1 WHERE (`gen_attr_1` = `gen_attr_2`)) AS gen_subquery_3))) AS b
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select *
from src b
group by key, value
having not exists (select a.key
from src a
where b.value = a.value and a.key = b.key and a.value > 'val_12')
--------------------------------------------------------------------------------
SELECT `gen_attr_3` AS `key`, `gen_attr_0` AS `value` FROM (SELECT `gen_attr_3`, `gen_attr_0` FROM (SELECT `key` AS `gen_attr_3`, `value` AS `gen_attr_0` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_3`, `gen_attr_0` HAVING (NOT EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_2`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_1` > "val_12")) AS gen_subquery_1 WHERE ((`gen_attr_0` = `gen_attr_1`) AND (`gen_attr_2` = `gen_attr_3`))) AS gen_subquery_3))) AS b
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-- This file is automatically generated by LogicalPlanToSQLSuite.
select *
from src b
group by key, value
having not exists (select distinct a.key
from src a
where b.value = a.value and a.value > 'val_12')
--------------------------------------------------------------------------------
SELECT `gen_attr_2` AS `key`, `gen_attr_0` AS `value` FROM (SELECT `gen_attr_2`, `gen_attr_0` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_0` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_2`, `gen_attr_0` HAVING (NOT EXISTS(SELECT `gen_attr_3` AS `1` FROM (SELECT 1 AS `gen_attr_3` FROM (SELECT DISTINCT `gen_attr_4`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_4`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_1` > "val_12")) AS gen_subquery_1 WHERE (`gen_attr_0` = `gen_attr_1`)) AS gen_subquery_3))) AS b
Original file line number Diff line number Diff line change
Expand Up @@ -934,6 +934,159 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
}
}

test("mapjoin_subquery") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

broadcast join

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure!

checkSQL(
"""
|SELECT subq.key1, z.value
|FROM (SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2
| FROM src1 x JOIN src y ON (x.key = y.key)) subq
|JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11)
|ORDER BY subq.key1, z.value
""".stripMargin,
"mapjoin_subquery")
}

test("subq2") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a more descriptive name?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure. I actually just use the name from Hive suite for review.
I'll review this.

checkSQL(
"""
|SELECT a.k, a.c
|FROM (SELECT b.key as k, count(1) as c
| FROM src b
| GROUP BY b.key) a
|WHERE a.k >= 90
""".stripMargin,
"subq2")
}

test("subquery_exists") {
checkSQL(
"""
|select *
|from src b
|where exists (select a.key
| from src a
| where b.value = a.value and a.key = b.key and a.value > 'val_9')
""".stripMargin,
"subquery_exists_1")

checkSQL(
"""
|select *
|from (select *
| from src b
| where exists (select a.key
| from src a
| where b.value = a.value and a.key = b.key and a.value > 'val_9')) a
""".stripMargin,
"subquery_exists_2")
}

test("subquery_exists_having") {
checkSQL(
"""
|select b.key, count(*)
|from src b
|group by b.key
|having exists (select a.key
| from src a
| where a.key = b.key and a.value > 'val_9')
""".stripMargin,
"subquery_exists_having_1")

checkSQL(
"""
|select *
|from (select b.key, count(*)
| from src b
| group by b.key
| having exists (select a.key
| from src a
| where a.key = b.key and a.value > 'val_9')) a
""".stripMargin,
"subquery_exists_having_2")

checkSQL(
"""
|select b.key, min(b.value)
|from src b
|group by b.key
|having exists (select a.key
| from src a
| where a.value > 'val_9' and a.value = min(b.value))
""".stripMargin,
"subquery_exists_having_3")
}

test("subquery_notexists") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

subquery_not_exists

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same thing for the following few tests.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep. For the naming, I will revise all according to the test context.

checkSQL(
"""
|select *
|from src b
|where not exists (select a.key
| from src a
| where b.value = a.value and a.key = b.key and a.value > 'val_2')
""".stripMargin,
"subquery_notexists_1")

checkSQL(
"""
|select *
|from src b
|where not exists (select a.key
| from src a
| where b.value = a.value and a.value > 'val_2')
""".stripMargin,
"subquery_notexists_2")
}

test("subquery_notexists_having") {
checkSQL(
"""
|select *
|from src b
|group by key, value
|having not exists (select a.key
| from src a
| where b.value = a.value and a.key = b.key and a.value > 'val_12')
""".stripMargin,
"subquery_notexists_having_1")

checkSQL(
"""
|select *
|from src b
|group by key, value
|having not exists (select distinct a.key
| from src a
| where b.value = a.value and a.value > 'val_12')
""".stripMargin,
"subquery_notexists_having_2")
}

test("subquery_in_having") {
checkSQL(
"""
|select key, count(*)
|from src
|group by key
|having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key)
|order by key
""".stripMargin,
"subquery_in_having_1")

checkSQL(
"""
|select b.key, min(b.value)
|from src b
|group by b.key
|having b.key in (select a.key
| from src a
| where a.value > 'val_9' and a.value = min(b.value))
|order by b.key
""".stripMargin,
"subquery_in_having_2")
}

test("SPARK-14933 - select orc table") {
withTable("orc_t") {
sql("create table orc_t stored as orc as select 1 as c1, 'abc' as c2")
Expand Down