-
Notifications
You must be signed in to change notification settings - Fork 28.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SPARK-21366][SQL][TEST] Add sql test for window functions
## What changes were proposed in this pull request? Add sql test for window functions, also remove uncecessary test cases in `WindowQuerySuite`. ## How was this patch tested? Added `window.sql` and the corresponding output file. Author: Xingbo Jiang <xingbo.jiang@databricks.com> Closes #18591 from jiangxb1987/window.
- Loading branch information
1 parent
7514db1
commit 66d2168
Showing
3 changed files
with
273 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
-- Test data. | ||
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES | ||
(null, "a"), (1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"), (null, null), (3, null) | ||
AS testData(val, cate); | ||
|
||
-- RowsBetween | ||
SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData | ||
ORDER BY cate, val; | ||
SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val | ||
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) FROM testData ORDER BY cate, val; | ||
|
||
-- RangeBetween | ||
SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val RANGE 1 PRECEDING) FROM testData | ||
ORDER BY cate, val; | ||
SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val | ||
RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val; | ||
|
||
-- RangeBetween with reverse OrderBy | ||
SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val DESC | ||
RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val; | ||
|
||
-- Window functions | ||
SELECT val, cate, | ||
max(val) OVER w AS max, | ||
min(val) OVER w AS min, | ||
min(val) OVER w AS min, | ||
count(val) OVER w AS count, | ||
sum(val) OVER w AS sum, | ||
avg(val) OVER w AS avg, | ||
stddev(val) OVER w AS stddev, | ||
first_value(val) OVER w AS first_value, | ||
first_value(val, true) OVER w AS first_value_ignore_null, | ||
first_value(val, false) OVER w AS first_value_contain_null, | ||
last_value(val) OVER w AS last_value, | ||
last_value(val, true) OVER w AS last_value_ignore_null, | ||
last_value(val, false) OVER w AS last_value_contain_null, | ||
rank() OVER w AS rank, | ||
dense_rank() OVER w AS dense_rank, | ||
cume_dist() OVER w AS cume_dist, | ||
percent_rank() OVER w AS percent_rank, | ||
ntile(2) OVER w AS ntile, | ||
row_number() OVER w AS row_number, | ||
var_pop(val) OVER w AS var_pop, | ||
var_samp(val) OVER w AS var_samp, | ||
approx_count_distinct(val) OVER w AS approx_count_distinct | ||
FROM testData | ||
WINDOW w AS (PARTITION BY cate ORDER BY val) | ||
ORDER BY cate, val; | ||
|
||
-- Null inputs | ||
SELECT val, cate, avg(null) OVER(PARTITION BY cate ORDER BY val) FROM testData ORDER BY cate, val; | ||
|
||
-- OrderBy not specified | ||
SELECT val, cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER BY cate, val; | ||
|
||
-- Over clause is empty | ||
SELECT val, cate, sum(val) OVER(), avg(val) OVER() FROM testData ORDER BY cate, val; | ||
|
||
-- first_value()/last_value() over () | ||
SELECT val, cate, | ||
first_value(false) OVER w AS first_value, | ||
first_value(true, true) OVER w AS first_value_ignore_null, | ||
first_value(false, false) OVER w AS first_value_contain_null, | ||
last_value(false) OVER w AS last_value, | ||
last_value(true, true) OVER w AS last_value_ignore_null, | ||
last_value(false, false) OVER w AS last_value_contain_null | ||
FROM testData | ||
WINDOW w AS () | ||
ORDER BY cate, val; |
204 changes: 204 additions & 0 deletions
204
sql/core/src/test/resources/sql-tests/results/window.sql.out
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,204 @@ | ||
-- Automatically generated by SQLQueryTestSuite | ||
-- Number of queries: 11 | ||
|
||
|
||
-- !query 0 | ||
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES | ||
(null, "a"), (1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"), (null, null), (3, null) | ||
AS testData(val, cate) | ||
-- !query 0 schema | ||
struct<> | ||
-- !query 0 output | ||
|
||
|
||
|
||
-- !query 1 | ||
SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData | ||
ORDER BY cate, val | ||
-- !query 1 schema | ||
struct<val:int,cate:string,count(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND CURRENT ROW):bigint> | ||
-- !query 1 output | ||
NULL NULL 0 | ||
3 NULL 1 | ||
NULL a 0 | ||
1 a 1 | ||
1 a 1 | ||
2 a 1 | ||
1 b 1 | ||
2 b 1 | ||
3 b 1 | ||
|
||
|
||
-- !query 2 | ||
SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val | ||
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) FROM testData ORDER BY cate, val | ||
-- !query 2 schema | ||
struct<val:int,cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING):bigint> | ||
-- !query 2 output | ||
NULL NULL 3 | ||
3 NULL 3 | ||
NULL a 1 | ||
1 a 2 | ||
1 a 4 | ||
2 a 4 | ||
1 b 3 | ||
2 b 6 | ||
3 b 6 | ||
|
||
|
||
-- !query 3 | ||
SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val RANGE 1 PRECEDING) FROM testData | ||
ORDER BY cate, val | ||
-- !query 3 schema | ||
struct<val:int,cate:string,count(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN 1 PRECEDING AND CURRENT ROW):bigint> | ||
-- !query 3 output | ||
NULL NULL 0 | ||
3 NULL 1 | ||
NULL a 0 | ||
1 a 2 | ||
1 a 2 | ||
2 a 3 | ||
1 b 1 | ||
2 b 2 | ||
3 b 2 | ||
|
||
|
||
-- !query 4 | ||
SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val | ||
RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val | ||
-- !query 4 schema | ||
struct<val:int,cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING):bigint> | ||
-- !query 4 output | ||
NULL NULL NULL | ||
3 NULL 3 | ||
NULL a NULL | ||
1 a 4 | ||
1 a 4 | ||
2 a 2 | ||
1 b 3 | ||
2 b 5 | ||
3 b 3 | ||
|
||
|
||
-- !query 5 | ||
SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val DESC | ||
RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val | ||
-- !query 5 schema | ||
struct<val:int,cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING):bigint> | ||
-- !query 5 output | ||
NULL NULL NULL | ||
3 NULL 3 | ||
NULL a NULL | ||
1 a 2 | ||
1 a 2 | ||
2 a 4 | ||
1 b 1 | ||
2 b 3 | ||
3 b 5 | ||
|
||
|
||
-- !query 6 | ||
SELECT val, cate, | ||
max(val) OVER w AS max, | ||
min(val) OVER w AS min, | ||
min(val) OVER w AS min, | ||
count(val) OVER w AS count, | ||
sum(val) OVER w AS sum, | ||
avg(val) OVER w AS avg, | ||
stddev(val) OVER w AS stddev, | ||
first_value(val) OVER w AS first_value, | ||
first_value(val, true) OVER w AS first_value_ignore_null, | ||
first_value(val, false) OVER w AS first_value_contain_null, | ||
last_value(val) OVER w AS last_value, | ||
last_value(val, true) OVER w AS last_value_ignore_null, | ||
last_value(val, false) OVER w AS last_value_contain_null, | ||
rank() OVER w AS rank, | ||
dense_rank() OVER w AS dense_rank, | ||
cume_dist() OVER w AS cume_dist, | ||
percent_rank() OVER w AS percent_rank, | ||
ntile(2) OVER w AS ntile, | ||
row_number() OVER w AS row_number, | ||
var_pop(val) OVER w AS var_pop, | ||
var_samp(val) OVER w AS var_samp, | ||
approx_count_distinct(val) OVER w AS approx_count_distinct | ||
FROM testData | ||
WINDOW w AS (PARTITION BY cate ORDER BY val) | ||
ORDER BY cate, val | ||
-- !query 6 schema | ||
struct<val:int,cate:string,max:int,min:int,min:int,count:bigint,sum:bigint,avg:double,stddev:double,first_value:int,first_value_ignore_null:int,first_value_contain_null:int,last_value:int,last_value_ignore_null:int,last_value_contain_null:int,rank:int,dense_rank:int,cume_dist:double,percent_rank:double,ntile:int,row_number:int,var_pop:double,var_samp:double,approx_count_distinct:bigint> | ||
-- !query 6 output | ||
NULL NULL NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 1 0.5 0.0 1 1 NULL NULL 0 | ||
3 NULL 3 3 3 1 3 3.0 NaN NULL 3 NULL 3 3 3 2 2 1.0 1.0 2 2 0.0 NaN 1 | ||
NULL a NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 1 0.25 0.0 1 1 NULL NULL 0 | ||
1 a 1 1 1 2 2 1.0 0.0 NULL 1 NULL 1 1 1 2 2 0.75 0.3333333333333333 1 2 0.0 0.0 1 | ||
1 a 1 1 1 2 2 1.0 0.0 NULL 1 NULL 1 1 1 2 2 0.75 0.3333333333333333 2 3 0.0 0.0 1 | ||
2 a 2 1 1 3 4 1.3333333333333333 0.5773502691896258 NULL 1 NULL 2 2 2 4 3 1.0 1.0 2 4 0.22222222222222224 0.33333333333333337 2 | ||
1 b 1 1 1 1 1 1.0 NaN 1 1 1 1 1 1 1 1 0.3333333333333333 0.0 1 1 0.0 NaN 1 | ||
2 b 2 1 1 2 3 1.5 0.7071067811865476 1 1 1 2 2 2 2 2 0.6666666666666666 0.5 1 2 0.25 0.5 2 | ||
3 b 3 1 1 3 6 2.0 1.0 1 1 1 3 3 3 3 3 1.0 1.0 2 3 0.6666666666666666 1.0 3 | ||
|
||
|
||
-- !query 7 | ||
SELECT val, cate, avg(null) OVER(PARTITION BY cate ORDER BY val) FROM testData ORDER BY cate, val | ||
-- !query 7 schema | ||
struct<val:int,cate:string,avg(CAST(NULL AS DOUBLE)) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double> | ||
-- !query 7 output | ||
NULL NULL NULL | ||
3 NULL NULL | ||
NULL a NULL | ||
1 a NULL | ||
1 a NULL | ||
2 a NULL | ||
1 b NULL | ||
2 b NULL | ||
3 b NULL | ||
|
||
|
||
-- !query 8 | ||
SELECT val, cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER BY cate, val | ||
-- !query 8 schema | ||
struct<> | ||
-- !query 8 output | ||
org.apache.spark.sql.AnalysisException | ||
Window function row_number() requires window to be ordered, please add ORDER BY clause. For example SELECT row_number()(value_expr) OVER (PARTITION BY window_partition ORDER BY window_ordering) from table; | ||
|
||
|
||
-- !query 9 | ||
SELECT val, cate, sum(val) OVER(), avg(val) OVER() FROM testData ORDER BY cate, val | ||
-- !query 9 schema | ||
struct<val:int,cate:string,sum(CAST(val AS BIGINT)) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint,avg(CAST(val AS BIGINT)) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double> | ||
-- !query 9 output | ||
NULL NULL 13 1.8571428571428572 | ||
3 NULL 13 1.8571428571428572 | ||
NULL a 13 1.8571428571428572 | ||
1 a 13 1.8571428571428572 | ||
1 a 13 1.8571428571428572 | ||
2 a 13 1.8571428571428572 | ||
1 b 13 1.8571428571428572 | ||
2 b 13 1.8571428571428572 | ||
3 b 13 1.8571428571428572 | ||
|
||
|
||
-- !query 10 | ||
SELECT val, cate, | ||
first_value(false) OVER w AS first_value, | ||
first_value(true, true) OVER w AS first_value_ignore_null, | ||
first_value(false, false) OVER w AS first_value_contain_null, | ||
last_value(false) OVER w AS last_value, | ||
last_value(true, true) OVER w AS last_value_ignore_null, | ||
last_value(false, false) OVER w AS last_value_contain_null | ||
FROM testData | ||
WINDOW w AS () | ||
ORDER BY cate, val | ||
-- !query 10 schema | ||
struct<val:int,cate:string,first_value:boolean,first_value_ignore_null:boolean,first_value_contain_null:boolean,last_value:boolean,last_value_ignore_null:boolean,last_value_contain_null:boolean> | ||
-- !query 10 output | ||
NULL NULL false true false false true false | ||
3 NULL false true false false true false | ||
NULL a false true false false true false | ||
1 a false true false false true false | ||
1 a false true false false true false | ||
2 a false true false false true false | ||
1 b false true false false true false | ||
2 b false true false false true false | ||
3 b false true false false true false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters