From 18e0309d22440c4226406afd00b7618189e380eb Mon Sep 17 00:00:00 2001 From: Alan Cai Date: Tue, 2 Jul 2024 16:14:46 -0700 Subject: [PATCH 1/2] Fix bag constructor parsing --- .../lang/syntax/impl/PartiQLPigParser.kt | 2 +- .../lang/syntax/impl/PartiQLPigVisitor.kt | 9 +++++ .../partiql/lang/syntax/PartiQLParserTest.kt | 34 +++++++++++++++++++ .../parser/internal/PartiQLParserDefault.kt | 9 ++++- 4 files changed, 52 insertions(+), 2 deletions(-) diff --git a/partiql-lang/src/main/kotlin/org/partiql/lang/syntax/impl/PartiQLPigParser.kt b/partiql-lang/src/main/kotlin/org/partiql/lang/syntax/impl/PartiQLPigParser.kt index 94fa824e5c..10b6a017b4 100644 --- a/partiql-lang/src/main/kotlin/org/partiql/lang/syntax/impl/PartiQLPigParser.kt +++ b/partiql-lang/src/main/kotlin/org/partiql/lang/syntax/impl/PartiQLPigParser.kt @@ -95,7 +95,7 @@ internal class PartiQLPigParser(val customTypes: List = listOf()) : val tokenStream = createTokenStream(queryStream) val parser = parserInit(tokenStream) val tree = parser.root() - val visitor = PartiQLPigVisitor(customTypes, tokenStream.parameterIndexes) + val visitor = PartiQLPigVisitor(tokenStream, customTypes, tokenStream.parameterIndexes) return visitor.visit(tree) as PartiqlAst.Statement } diff --git a/partiql-lang/src/main/kotlin/org/partiql/lang/syntax/impl/PartiQLPigVisitor.kt b/partiql-lang/src/main/kotlin/org/partiql/lang/syntax/impl/PartiQLPigVisitor.kt index 69971bed5c..b56599ab61 100644 --- a/partiql-lang/src/main/kotlin/org/partiql/lang/syntax/impl/PartiQLPigVisitor.kt +++ b/partiql-lang/src/main/kotlin/org/partiql/lang/syntax/impl/PartiQLPigVisitor.kt @@ -33,6 +33,7 @@ import com.amazon.ionelement.api.ionNull import com.amazon.ionelement.api.ionString import com.amazon.ionelement.api.ionSymbol import com.amazon.ionelement.api.loadSingleElement +import org.antlr.v4.runtime.CommonTokenStream import org.antlr.v4.runtime.ParserRuleContext import org.antlr.v4.runtime.Token import org.antlr.v4.runtime.tree.TerminalNode @@ -62,6 +63,7 @@ import org.partiql.lang.util.getPrecisionFromTimeString import org.partiql.lang.util.unaryMinus import org.partiql.parser.internal.antlr.PartiQLParser import org.partiql.parser.internal.antlr.PartiQLParserBaseVisitor +import org.partiql.parser.internal.antlr.PartiQLTokens import org.partiql.pig.runtime.SymbolPrimitive import org.partiql.value.datetime.DateTimeException import org.partiql.value.datetime.TimeZone @@ -116,6 +118,7 @@ import java.time.format.DateTimeParseException * There could be clever ways of exploiting this, to avoid the dispatch via `visit()`. */ internal class PartiQLPigVisitor( + private val tokens: CommonTokenStream, val customTypes: List = listOf(), private val parameterIndexes: Map = mapOf(), ) : @@ -1507,6 +1510,12 @@ internal class PartiQLPigVisitor( */ override fun visitBag(ctx: PartiQLParser.BagContext) = PartiqlAst.build { + // Prohibit hidden characters between angle brackets + val startTokenIndex = ctx.start.tokenIndex + val endTokenIndex = ctx.stop.tokenIndex + if (tokens.getHiddenTokensToRight(startTokenIndex, PartiQLTokens.HIDDEN) != null || tokens.getHiddenTokensToLeft(endTokenIndex, PartiQLTokens.HIDDEN) != null) { + throw ParserException("Invalid bag expression", ErrorCode.PARSE_INVALID_QUERY) + } val exprList = ctx.expr().map { visitExpr(it) } bag(exprList, ctx.ANGLE_LEFT(0).getSourceMetaContainer()) } diff --git a/partiql-lang/src/test/kotlin/org/partiql/lang/syntax/PartiQLParserTest.kt b/partiql-lang/src/test/kotlin/org/partiql/lang/syntax/PartiQLParserTest.kt index 9a8c14819e..8e577f7fb1 100644 --- a/partiql-lang/src/test/kotlin/org/partiql/lang/syntax/PartiQLParserTest.kt +++ b/partiql-lang/src/test/kotlin/org/partiql/lang/syntax/PartiQLParserTest.kt @@ -5019,4 +5019,38 @@ class PartiQLParserTest : PartiQLParserTestBase() { lit(ionInt(1)) ) } + + // regression tests for bag constructor angle bracket + @Test + fun testBagConstructor() = assertExpression("<<<<1>>>>") { + bag( + bag( + lit(ionInt(1)) + ) + ) + } + + @Test + fun testSpacesInBagConstructor() = checkInputThrowingParserException( + "< < < < 1 > > > >", + ErrorCode.PARSE_UNEXPECTED_TOKEN, + expectErrorContextValues = mapOf( + Property.LINE_NUMBER to 1L, + Property.COLUMN_NUMBER to 1L, + Property.TOKEN_DESCRIPTION to PartiQLParser.ANGLE_LEFT.getAntlrDisplayString(), + Property.TOKEN_VALUE to ION.newSymbol("<") + ) + ) + + @Test + fun testCommentsInBagConstructor() = checkInputThrowingParserException( + ">>>", + ErrorCode.PARSE_UNEXPECTED_TOKEN, + expectErrorContextValues = mapOf( + Property.LINE_NUMBER to 1L, + Property.COLUMN_NUMBER to 1L, + Property.TOKEN_DESCRIPTION to PartiQLParser.ANGLE_LEFT.getAntlrDisplayString(), + Property.TOKEN_VALUE to ION.newSymbol("<") + ) + ) } diff --git a/partiql-parser/src/main/kotlin/org/partiql/parser/internal/PartiQLParserDefault.kt b/partiql-parser/src/main/kotlin/org/partiql/parser/internal/PartiQLParserDefault.kt index 83b362e6bd..19bdef2d9e 100644 --- a/partiql-parser/src/main/kotlin/org/partiql/parser/internal/PartiQLParserDefault.kt +++ b/partiql-parser/src/main/kotlin/org/partiql/parser/internal/PartiQLParserDefault.kt @@ -425,6 +425,7 @@ internal class PartiQLParserDefault : PartiQLParser { */ @OptIn(PartiQLValueExperimental::class) private class Visitor( + private val tokens: CommonTokenStream, private val locations: SourceLocations.Mutable, private val parameters: Map = mapOf(), ) : PartiQLParserBaseVisitor() { @@ -442,7 +443,7 @@ internal class PartiQLParserDefault : PartiQLParser { tree: GeneratedParser.RootContext, ): PartiQLParser.Result { val locations = SourceLocations.Mutable() - val visitor = Visitor(locations, tokens.parameterIndexes) + val visitor = Visitor(tokens, locations, tokens.parameterIndexes) val root = visitor.visitAs(tree) as Statement return PartiQLParser.Result( source = source, @@ -2022,6 +2023,12 @@ internal class PartiQLParserDefault : PartiQLParser { */ override fun visitBag(ctx: GeneratedParser.BagContext) = translate(ctx) { + // Prohibit hidden characters between angle brackets + val startTokenIndex = ctx.start.tokenIndex + val endTokenIndex = ctx.stop.tokenIndex + if (tokens.getHiddenTokensToRight(startTokenIndex, GeneratedLexer.HIDDEN) != null || tokens.getHiddenTokensToLeft(endTokenIndex, GeneratedLexer.HIDDEN) != null) { + throw error(ctx, "Invalid bag expression") + } val expressions = visitOrEmpty(ctx.expr()) exprCollection(Expr.Collection.Type.BAG, expressions) } From cc8fdbc5867270285cb2ab4e1d9f3a2b9e29aec9 Mon Sep 17 00:00:00 2001 From: Alan Cai Date: Wed, 3 Jul 2024 12:42:19 -0700 Subject: [PATCH 2/2] Add clarification comments --- .../test/kotlin/org/partiql/lang/syntax/PartiQLParserTest.kt | 4 ++-- .../kotlin/org/partiql/lang/syntax/PartiQLParserTestBase.kt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/partiql-lang/src/test/kotlin/org/partiql/lang/syntax/PartiQLParserTest.kt b/partiql-lang/src/test/kotlin/org/partiql/lang/syntax/PartiQLParserTest.kt index 8e577f7fb1..98fc988f04 100644 --- a/partiql-lang/src/test/kotlin/org/partiql/lang/syntax/PartiQLParserTest.kt +++ b/partiql-lang/src/test/kotlin/org/partiql/lang/syntax/PartiQLParserTest.kt @@ -5033,7 +5033,7 @@ class PartiQLParserTest : PartiQLParserTestBase() { @Test fun testSpacesInBagConstructor() = checkInputThrowingParserException( "< < < < 1 > > > >", - ErrorCode.PARSE_UNEXPECTED_TOKEN, + ErrorCode.PARSE_UNEXPECTED_TOKEN, // partiql-ast parser ErrorCode expectErrorContextValues = mapOf( Property.LINE_NUMBER to 1L, Property.COLUMN_NUMBER to 1L, @@ -5045,7 +5045,7 @@ class PartiQLParserTest : PartiQLParserTestBase() { @Test fun testCommentsInBagConstructor() = checkInputThrowingParserException( ">>>", - ErrorCode.PARSE_UNEXPECTED_TOKEN, + ErrorCode.PARSE_UNEXPECTED_TOKEN, // partiql-ast parser ErrorCode expectErrorContextValues = mapOf( Property.LINE_NUMBER to 1L, Property.COLUMN_NUMBER to 1L, diff --git a/partiql-lang/src/test/kotlin/org/partiql/lang/syntax/PartiQLParserTestBase.kt b/partiql-lang/src/test/kotlin/org/partiql/lang/syntax/PartiQLParserTestBase.kt index 802974a413..1f4896bdd8 100644 --- a/partiql-lang/src/test/kotlin/org/partiql/lang/syntax/PartiQLParserTestBase.kt +++ b/partiql-lang/src/test/kotlin/org/partiql/lang/syntax/PartiQLParserTestBase.kt @@ -152,7 +152,6 @@ abstract class PartiQLParserTestBase : TestBase() { input: String, errorCode: ErrorCode, expectErrorContextValues: Map, - targets: Array = arrayOf(ParserTarget.DEFAULT), assertContext: Boolean = true, ): Unit = forEachTarget { softAssert { @@ -160,7 +159,8 @@ abstract class PartiQLParserTestBase : TestBase() { parser.parseAstStatement(input) fail("Expected ParserException but there was no Exception") } catch (ex: ParserException) { - // split parser target does not use ErrorCode + // NOTE: only perform error code and error context checks for `ParserTarget.EXPERIMENTAL` (partiql-ast + // parser). if (assertContext && (this@forEachTarget == ParserTarget.EXPERIMENTAL)) { checkErrorAndErrorContext(errorCode, ex, expectErrorContextValues) }