From 0330f9def5ebd6b7813dc4656f40edc717dbd0a3 Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Mon, 17 Jun 2024 22:14:40 +0400 Subject: [PATCH] Support use of `BY NAME` quantifier across all set ops (#1309) Co-authored-by: Alexander Beedie Co-authored-by: Joey Hain --- README.md | 6 +++--- src/ast/data_type.rs | 4 ++-- src/ast/mod.rs | 6 +++--- src/parser/mod.rs | 15 +++------------ src/tokenizer.rs | 2 +- tests/sqlparser_common.rs | 6 ++++++ 6 files changed, 18 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 512f5f6c0..3226b9549 100644 --- a/README.md +++ b/README.md @@ -114,13 +114,12 @@ $ cargo run --features json_example --example cli FILENAME.sql [--dialectname] ## Users -This parser is currently being used by the [DataFusion] query engine, -[LocustDB], [Ballista], [GlueSQL], [Opteryx], [PRQL], [Qrlew], [JumpWire], and [ParadeDB]. +This parser is currently being used by the [DataFusion] query engine, [LocustDB], +[Ballista], [GlueSQL], [Opteryx], [Polars], [PRQL], [Qrlew], [JumpWire], and [ParadeDB]. If your project is using sqlparser-rs feel free to make a PR to add it to this list. - ## Design The core expression parser uses the [Pratt Parser] design, which is a top-down @@ -210,6 +209,7 @@ licensed as above, without any additional terms or conditions. [Ballista]: https://github.com/apache/arrow-ballista [GlueSQL]: https://github.com/gluesql/gluesql [Opteryx]: https://github.com/mabel-dev/opteryx +[Polars]: https://pola.rs/ [PRQL]: https://github.com/PRQL/prql [Qrlew]: https://github.com/Qrlew/qrlew [JumpWire]: https://github.com/extragoodlabs/jumpwire diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index 7d0aec8fc..6b1a542f4 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -168,7 +168,7 @@ pub enum DataType { UnsignedInt(Option), /// Unsigned int4 with optional display width e.g. INT4 UNSIGNED or INT4(11) UNSIGNED UnsignedInt4(Option), - /// Unsigned integer with optional display width e.g. INTGER UNSIGNED or INTEGER(11) UNSIGNED + /// Unsigned integer with optional display width e.g. INTEGER UNSIGNED or INTEGER(11) UNSIGNED UnsignedInteger(Option), /// Unsigned integer type in [clickhouse] /// Note: UInt8 mean 8 bits in [clickhouse] @@ -699,7 +699,7 @@ pub enum CharacterLength { /// Optional unit. If not informed, the ANSI handles it as CHARACTERS implicitly unit: Option, }, - /// VARCHAR(MAX) or NVARCHAR(MAX), used in T-SQL (Miscrosoft SQL Server) + /// VARCHAR(MAX) or NVARCHAR(MAX), used in T-SQL (Microsoft SQL Server) Max, } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 6e306b1e3..7af8efaec 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2265,7 +2265,7 @@ pub enum Statement { /// SET [ SESSION | LOCAL ] ROLE role_name /// ``` /// - /// Sets sesssion state. Examples: [ANSI][1], [Postgresql][2], [MySQL][3], and [Oracle][4] + /// Sets session state. Examples: [ANSI][1], [Postgresql][2], [MySQL][3], and [Oracle][4] /// /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#set-role-statement /// [2]: https://www.postgresql.org/docs/14/sql-set-role.html @@ -2283,7 +2283,7 @@ pub enum Statement { /// ``` /// /// Note: this is not a standard SQL statement, but it is supported by at - /// least MySQL and PostgreSQL. Not all MySQL-specific syntatic forms are + /// least MySQL and PostgreSQL. Not all MySQL-specific syntactic forms are /// supported yet. SetVariable { local: bool, @@ -4750,7 +4750,7 @@ impl fmt::Display for FunctionArguments { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct FunctionArgumentList { - /// `[ ALL | DISTINCT ] + /// `[ ALL | DISTINCT ]` pub duplicate_treatment: Option, /// The function arguments. pub args: Vec, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index c591b8116..e240441b9 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8138,7 +8138,7 @@ impl<'a> Parser<'a> { pub fn parse_set_quantifier(&mut self, op: &Option) -> SetQuantifier { match op { - Some(SetOperator::Union) => { + Some(SetOperator::Except | SetOperator::Intersect | SetOperator::Union) => { if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { SetQuantifier::DistinctByName } else if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { @@ -8155,15 +8155,6 @@ impl<'a> Parser<'a> { SetQuantifier::None } } - Some(SetOperator::Except) | Some(SetOperator::Intersect) => { - if self.parse_keyword(Keyword::ALL) { - SetQuantifier::All - } else if self.parse_keyword(Keyword::DISTINCT) { - SetQuantifier::Distinct - } else { - SetQuantifier::None - } - } _ => SetQuantifier::None, } } @@ -8547,10 +8538,10 @@ impl<'a> Parser<'a> { }) } else if variable.to_string() == "TRANSACTION" && modifier.is_none() { if self.parse_keyword(Keyword::SNAPSHOT) { - let snaphot_id = self.parse_value()?; + let snapshot_id = self.parse_value()?; return Ok(Statement::SetTransaction { modes: vec![], - snapshot: Some(snaphot_id), + snapshot: Some(snapshot_id), session: false, }); } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index bcc5478bc..4e64e0712 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -654,7 +654,7 @@ impl<'a> Tokenizer<'a> { Ok(()) } - // Tokenize the identifer or keywords in `ch` + // Tokenize the identifier or keywords in `ch` fn tokenize_identifier_or_keyword( &self, ch: impl IntoIterator, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f6518e276..a86858129 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -6010,6 +6010,12 @@ fn parse_union_except_intersect() { verified_stmt("SELECT foo FROM tab UNION SELECT bar FROM TAB"); verified_stmt("(SELECT * FROM new EXCEPT SELECT * FROM old) UNION ALL (SELECT * FROM old EXCEPT SELECT * FROM new) ORDER BY 1"); verified_stmt("(SELECT * FROM new EXCEPT DISTINCT SELECT * FROM old) UNION DISTINCT (SELECT * FROM old EXCEPT DISTINCT SELECT * FROM new) ORDER BY 1"); + verified_stmt("SELECT 1 AS x, 2 AS y EXCEPT BY NAME SELECT 9 AS y, 8 AS x"); + verified_stmt("SELECT 1 AS x, 2 AS y EXCEPT ALL BY NAME SELECT 9 AS y, 8 AS x"); + verified_stmt("SELECT 1 AS x, 2 AS y EXCEPT DISTINCT BY NAME SELECT 9 AS y, 8 AS x"); + verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT BY NAME SELECT 9 AS y, 8 AS x"); + verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT ALL BY NAME SELECT 9 AS y, 8 AS x"); + verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT DISTINCT BY NAME SELECT 9 AS y, 8 AS x"); } #[test]