-
Notifications
You must be signed in to change notification settings - Fork 28.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-15232][SQL] Add subquery SQL building tests to LogicalPlanToSQLSuite #14383
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
-- This file is automatically generated by LogicalPlanToSQLSuite. | ||
SELECT subq.key1, z.value | ||
FROM (SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 | ||
FROM src1 x JOIN src y ON (x.key = y.key)) subq | ||
JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) | ||
ORDER BY subq.key1, z.value | ||
-------------------------------------------------------------------------------- | ||
SELECT `gen_attr_0` AS `key1`, `gen_attr_1` AS `value` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_5` AS `gen_attr_0`, `gen_attr_7` AS `gen_attr_6`, `gen_attr_9` AS `gen_attr_8`, `gen_attr_11` AS `gen_attr_10` FROM (SELECT `key` AS `gen_attr_5`, `value` AS `gen_attr_7` FROM `default`.`src1`) AS gen_subquery_0 INNER JOIN (SELECT `key` AS `gen_attr_9`, `value` AS `gen_attr_11` FROM `default`.`src`) AS gen_subquery_1 ON (`gen_attr_5` = `gen_attr_9`)) AS subq INNER JOIN (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_1`, `ds` AS `gen_attr_3`, `hr` AS `gen_attr_4` FROM `default`.`srcpart`) AS gen_subquery_2 ON (((`gen_attr_0` = `gen_attr_2`) AND (`gen_attr_3` = "2008-04-08")) AND (CAST(`gen_attr_4` AS DOUBLE) = CAST(11 AS DOUBLE))) ORDER BY `gen_attr_0` ASC, `gen_attr_1` ASC) AS gen_subquery_3 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
-- This file is automatically generated by LogicalPlanToSQLSuite. | ||
SELECT a.k, a.c | ||
FROM (SELECT b.key as k, count(1) as c | ||
FROM src b | ||
GROUP BY b.key) a | ||
WHERE a.k >= 90 | ||
-------------------------------------------------------------------------------- | ||
SELECT `gen_attr_0` AS `k`, `gen_attr_1` AS `c` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_2` AS `gen_attr_0`, count(1) AS `gen_attr_1` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_3` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_2`) AS a WHERE (`gen_attr_0` >= 90)) AS a |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
-- This file is automatically generated by LogicalPlanToSQLSuite. | ||
select * | ||
from src b | ||
where exists (select a.key | ||
from src a | ||
where b.value = a.value and a.key = b.key and a.value > 'val_9') | ||
-------------------------------------------------------------------------------- | ||
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_0 WHERE EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_3`, `gen_attr_2` FROM (SELECT `key` AS `gen_attr_3`, `value` AS `gen_attr_2` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_2` > "val_9")) AS gen_subquery_1 WHERE ((`gen_attr_1` = `gen_attr_2`) AND (`gen_attr_3` = `gen_attr_0`))) AS gen_subquery_3)) AS b |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
-- This file is automatically generated by LogicalPlanToSQLSuite. | ||
select * | ||
from (select * | ||
from src b | ||
where exists (select a.key | ||
from src a | ||
where b.value = a.value and a.key = b.key and a.value > 'val_9')) a | ||
-------------------------------------------------------------------------------- | ||
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_0 WHERE EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_3`, `gen_attr_2` FROM (SELECT `key` AS `gen_attr_3`, `value` AS `gen_attr_2` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_2` > "val_9")) AS gen_subquery_1 WHERE ((`gen_attr_1` = `gen_attr_2`) AND (`gen_attr_3` = `gen_attr_0`))) AS gen_subquery_3)) AS a) AS a |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
-- This file is automatically generated by LogicalPlanToSQLSuite. | ||
select b.key, count(*) | ||
from src b | ||
group by b.key | ||
having exists (select a.key | ||
from src a | ||
where a.key = b.key and a.value > 'val_9') | ||
-------------------------------------------------------------------------------- | ||
SELECT `gen_attr_1` AS `key`, `gen_attr_2` AS `count(1)` FROM (SELECT `gen_attr_1`, count(1) AS `gen_attr_2` FROM (SELECT `key` AS `gen_attr_1`, `value` AS `gen_attr_3` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_1` HAVING EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_0` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_5` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_5` > "val_9")) AS gen_subquery_1 WHERE (`gen_attr_0` = `gen_attr_1`)) AS gen_subquery_3)) AS b |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
-- This file is automatically generated by LogicalPlanToSQLSuite. | ||
select * | ||
from (select b.key, count(*) | ||
from src b | ||
group by b.key | ||
having exists (select a.key | ||
from src a | ||
where a.key = b.key and a.value > 'val_9')) a | ||
-------------------------------------------------------------------------------- | ||
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `count(1)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, count(1) AS `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_3` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_0` HAVING EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_2` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_5` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_5` > "val_9")) AS gen_subquery_1 WHERE (`gen_attr_2` = `gen_attr_0`)) AS gen_subquery_3)) AS a) AS a |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
-- This file is automatically generated by LogicalPlanToSQLSuite. | ||
select b.key, min(b.value) | ||
from src b | ||
group by b.key | ||
having exists (select a.key | ||
from src a | ||
where a.value > 'val_9' and a.value = min(b.value)) | ||
-------------------------------------------------------------------------------- | ||
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `min(value)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, min(`gen_attr_4`) AS `gen_attr_1`, min(`gen_attr_4`) AS `gen_attr_3` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_4` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_0` HAVING EXISTS(SELECT `gen_attr_5` AS `1` FROM (SELECT 1 AS `gen_attr_5` FROM (SELECT `gen_attr_6`, `gen_attr_2` FROM (SELECT `key` AS `gen_attr_6`, `value` AS `gen_attr_2` FROM `default`.`src`) AS gen_subquery_3 WHERE (`gen_attr_2` > "val_9")) AS gen_subquery_2 WHERE (`gen_attr_2` = `gen_attr_3`)) AS gen_subquery_4)) AS gen_subquery_1) AS b |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
-- This file is automatically generated by LogicalPlanToSQLSuite. | ||
select key, count(*) | ||
from src | ||
group by key | ||
having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key) | ||
order by key | ||
-------------------------------------------------------------------------------- | ||
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `count(1)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, count(1) AS `gen_attr_1`, count(1) AS `gen_attr_2` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_4` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_0` HAVING (`gen_attr_2` IN (SELECT `gen_attr_5` AS `_c0` FROM (SELECT `gen_attr_3` AS `gen_attr_5` FROM (SELECT count(1) AS `gen_attr_3` FROM (SELECT `key` AS `gen_attr_6`, `value` AS `gen_attr_7` FROM `default`.`src`) AS gen_subquery_3 WHERE (CAST(`gen_attr_6` AS DOUBLE) = CAST("90" AS DOUBLE)) GROUP BY `gen_attr_6`) AS gen_subquery_2) AS gen_subquery_4))) AS gen_subquery_1 ORDER BY `gen_attr_0` ASC) AS src |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
-- This file is automatically generated by LogicalPlanToSQLSuite. | ||
select b.key, min(b.value) | ||
from src b | ||
group by b.key | ||
having b.key in (select a.key | ||
from src a | ||
where a.value > 'val_9' and a.value = min(b.value)) | ||
order by b.key | ||
-------------------------------------------------------------------------------- | ||
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `min(value)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, min(`gen_attr_5`) AS `gen_attr_1`, min(`gen_attr_5`) AS `gen_attr_4` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_5` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_0` HAVING (struct(`gen_attr_0`, `gen_attr_4`) IN (SELECT `gen_attr_6` AS `_c0`, `gen_attr_7` AS `_c1` FROM (SELECT `gen_attr_2` AS `gen_attr_6`, `gen_attr_3` AS `gen_attr_7` FROM (SELECT `gen_attr_2`, `gen_attr_3` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_3` FROM `default`.`src`) AS gen_subquery_3 WHERE (`gen_attr_3` > "val_9")) AS gen_subquery_2) AS gen_subquery_4))) AS gen_subquery_1 ORDER BY `gen_attr_0` ASC) AS b |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
-- This file is automatically generated by LogicalPlanToSQLSuite. | ||
select * | ||
from src b | ||
where not exists (select a.key | ||
from src a | ||
where b.value = a.value and a.key = b.key and a.value > 'val_2') | ||
-------------------------------------------------------------------------------- | ||
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_0 WHERE (NOT EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_3`, `gen_attr_2` FROM (SELECT `key` AS `gen_attr_3`, `value` AS `gen_attr_2` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_2` > "val_2")) AS gen_subquery_1 WHERE ((`gen_attr_1` = `gen_attr_2`) AND (`gen_attr_3` = `gen_attr_0`))) AS gen_subquery_3))) AS b |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
-- This file is automatically generated by LogicalPlanToSQLSuite. | ||
select * | ||
from src b | ||
where not exists (select a.key | ||
from src a | ||
where b.value = a.value and a.value > 'val_2') | ||
-------------------------------------------------------------------------------- | ||
SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `value` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_0 WHERE (NOT EXISTS(SELECT `gen_attr_3` AS `1` FROM (SELECT 1 AS `gen_attr_3` FROM (SELECT `gen_attr_4`, `gen_attr_2` FROM (SELECT `key` AS `gen_attr_4`, `value` AS `gen_attr_2` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_2` > "val_2")) AS gen_subquery_1 WHERE (`gen_attr_1` = `gen_attr_2`)) AS gen_subquery_3))) AS b |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
-- This file is automatically generated by LogicalPlanToSQLSuite. | ||
select * | ||
from src b | ||
group by key, value | ||
having not exists (select a.key | ||
from src a | ||
where b.value = a.value and a.key = b.key and a.value > 'val_12') | ||
-------------------------------------------------------------------------------- | ||
SELECT `gen_attr_3` AS `key`, `gen_attr_0` AS `value` FROM (SELECT `gen_attr_3`, `gen_attr_0` FROM (SELECT `key` AS `gen_attr_3`, `value` AS `gen_attr_0` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_3`, `gen_attr_0` HAVING (NOT EXISTS(SELECT `gen_attr_4` AS `1` FROM (SELECT 1 AS `gen_attr_4` FROM (SELECT `gen_attr_2`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_1` > "val_12")) AS gen_subquery_1 WHERE ((`gen_attr_0` = `gen_attr_1`) AND (`gen_attr_2` = `gen_attr_3`))) AS gen_subquery_3))) AS b |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
-- This file is automatically generated by LogicalPlanToSQLSuite. | ||
select * | ||
from src b | ||
group by key, value | ||
having not exists (select distinct a.key | ||
from src a | ||
where b.value = a.value and a.value > 'val_12') | ||
-------------------------------------------------------------------------------- | ||
SELECT `gen_attr_2` AS `key`, `gen_attr_0` AS `value` FROM (SELECT `gen_attr_2`, `gen_attr_0` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_0` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_2`, `gen_attr_0` HAVING (NOT EXISTS(SELECT `gen_attr_3` AS `1` FROM (SELECT 1 AS `gen_attr_3` FROM (SELECT DISTINCT `gen_attr_4`, `gen_attr_1` FROM (SELECT `key` AS `gen_attr_4`, `value` AS `gen_attr_1` FROM `default`.`src`) AS gen_subquery_2 WHERE (`gen_attr_1` > "val_12")) AS gen_subquery_1 WHERE (`gen_attr_0` = `gen_attr_1`)) AS gen_subquery_3))) AS b |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -934,6 +934,159 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils { | |
} | ||
} | ||
|
||
test("mapjoin_subquery") { | ||
checkSQL( | ||
""" | ||
|SELECT subq.key1, z.value | ||
|FROM (SELECT x.key as key1, x.value as value1, y.key as key2, y.value as value2 | ||
| FROM src1 x JOIN src y ON (x.key = y.key)) subq | ||
|JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) | ||
|ORDER BY subq.key1, z.value | ||
""".stripMargin, | ||
"mapjoin_subquery") | ||
} | ||
|
||
test("subq2") { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there a more descriptive name? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure. I actually just use the name from Hive suite for review. |
||
checkSQL( | ||
""" | ||
|SELECT a.k, a.c | ||
|FROM (SELECT b.key as k, count(1) as c | ||
| FROM src b | ||
| GROUP BY b.key) a | ||
|WHERE a.k >= 90 | ||
""".stripMargin, | ||
"subq2") | ||
} | ||
|
||
test("subquery_exists") { | ||
checkSQL( | ||
""" | ||
|select * | ||
|from src b | ||
|where exists (select a.key | ||
| from src a | ||
| where b.value = a.value and a.key = b.key and a.value > 'val_9') | ||
""".stripMargin, | ||
"subquery_exists_1") | ||
|
||
checkSQL( | ||
""" | ||
|select * | ||
|from (select * | ||
| from src b | ||
| where exists (select a.key | ||
| from src a | ||
| where b.value = a.value and a.key = b.key and a.value > 'val_9')) a | ||
""".stripMargin, | ||
"subquery_exists_2") | ||
} | ||
|
||
test("subquery_exists_having") { | ||
checkSQL( | ||
""" | ||
|select b.key, count(*) | ||
|from src b | ||
|group by b.key | ||
|having exists (select a.key | ||
| from src a | ||
| where a.key = b.key and a.value > 'val_9') | ||
""".stripMargin, | ||
"subquery_exists_having_1") | ||
|
||
checkSQL( | ||
""" | ||
|select * | ||
|from (select b.key, count(*) | ||
| from src b | ||
| group by b.key | ||
| having exists (select a.key | ||
| from src a | ||
| where a.key = b.key and a.value > 'val_9')) a | ||
""".stripMargin, | ||
"subquery_exists_having_2") | ||
|
||
checkSQL( | ||
""" | ||
|select b.key, min(b.value) | ||
|from src b | ||
|group by b.key | ||
|having exists (select a.key | ||
| from src a | ||
| where a.value > 'val_9' and a.value = min(b.value)) | ||
""".stripMargin, | ||
"subquery_exists_having_3") | ||
} | ||
|
||
test("subquery_notexists") { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. subquery_not_exists There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same thing for the following few tests. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep. For the naming, I will revise all according to the test context. |
||
checkSQL( | ||
""" | ||
|select * | ||
|from src b | ||
|where not exists (select a.key | ||
| from src a | ||
| where b.value = a.value and a.key = b.key and a.value > 'val_2') | ||
""".stripMargin, | ||
"subquery_notexists_1") | ||
|
||
checkSQL( | ||
""" | ||
|select * | ||
|from src b | ||
|where not exists (select a.key | ||
| from src a | ||
| where b.value = a.value and a.value > 'val_2') | ||
""".stripMargin, | ||
"subquery_notexists_2") | ||
} | ||
|
||
test("subquery_notexists_having") { | ||
checkSQL( | ||
""" | ||
|select * | ||
|from src b | ||
|group by key, value | ||
|having not exists (select a.key | ||
| from src a | ||
| where b.value = a.value and a.key = b.key and a.value > 'val_12') | ||
""".stripMargin, | ||
"subquery_notexists_having_1") | ||
|
||
checkSQL( | ||
""" | ||
|select * | ||
|from src b | ||
|group by key, value | ||
|having not exists (select distinct a.key | ||
| from src a | ||
| where b.value = a.value and a.value > 'val_12') | ||
""".stripMargin, | ||
"subquery_notexists_having_2") | ||
} | ||
|
||
test("subquery_in_having") { | ||
checkSQL( | ||
""" | ||
|select key, count(*) | ||
|from src | ||
|group by key | ||
|having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key) | ||
|order by key | ||
""".stripMargin, | ||
"subquery_in_having_1") | ||
|
||
checkSQL( | ||
""" | ||
|select b.key, min(b.value) | ||
|from src b | ||
|group by b.key | ||
|having b.key in (select a.key | ||
| from src a | ||
| where a.value > 'val_9' and a.value = min(b.value)) | ||
|order by b.key | ||
""".stripMargin, | ||
"subquery_in_having_2") | ||
} | ||
|
||
test("SPARK-14933 - select orc table") { | ||
withTable("orc_t") { | ||
sql("create table orc_t stored as orc as select 1 as c1, 'abc' as c2") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
broadcast join
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure!