From 35d3f36d2d8661fa17eb3a374ee1f4c9d868d08a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Ros?= Date: Tue, 7 Jan 2025 11:51:53 -0800 Subject: [PATCH] Fix escaped numbers parsing (#192) --- .github/workflows/build.yml | 3 +++ .github/workflows/publish.yml | 3 +++ src/Parlot/Scanner.cs | 18 +++++++++++------ test/Parlot.Tests/ScannerTests.cs | 33 ++++++++++++++++++++++++++++++- 4 files changed, 50 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 43234a9..8a7ee02 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -25,6 +25,9 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-dotnet@v4 with: + dotnet-version: | + 6.0.x + 8.0.x global-json-file: global.json - name: Test diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index f0ec025..fee65f1 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -17,6 +17,9 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-dotnet@v4 with: + dotnet-version: | + 6.0.x + 8.0.x global-json-file: global.json - name: Test diff --git a/src/Parlot/Scanner.cs b/src/Parlot/Scanner.cs index 68c76a6..273c415 100644 --- a/src/Parlot/Scanner.cs +++ b/src/Parlot/Scanner.cs @@ -556,6 +556,8 @@ private bool ReadQuotedString(char quoteChar, out ReadOnlySpan result) // We can read Eof if there is an escaped quote sequence and no actual end quote, e.g. "'abc\'def" if (Cursor.Eof) { + Cursor.ResetPosition(start); + result = []; return false; } @@ -585,11 +587,13 @@ private bool ReadQuotedString(char quoteChar, out ReadOnlySpan result) // https://stackoverflow.com/a/32175520/142772 // exactly 4 digits #if NET8_0_OR_GREATER - var lastHexIndex = Cursor.Span.Slice(0, 4).LastIndexOfAny(Character._hexDigits); - var isValidUnicode = lastHexIndex == 3; + var allHexDigits = Cursor.Span.Length > 4 && Cursor.Span.Slice(1, 4).IndexOfAnyExcept(Character._hexDigits) == -1; + var isValidUnicode = allHexDigits; if (!isValidUnicode) { + Cursor.ResetPosition(start); + result = []; return false; } @@ -629,19 +633,21 @@ private bool ReadQuotedString(char quoteChar, out ReadOnlySpan result) break; case 'x': - // At least two digits + // At least one digits #if NET8_0_OR_GREATER - lastHexIndex = Cursor.Span.Slice(0, 4).LastIndexOfAny(Character._hexDigits); - var isValidHex = lastHexIndex > 0; + var firstNonHexDigit = Cursor.Span.Length > 1 ? Cursor.Span.Slice(1).IndexOfAnyExcept(Character._hexDigits) : -1; + var isValidHex = firstNonHexDigit > 0; if (!isValidHex) { + Cursor.ResetPosition(start); + result = []; return false; } // Advance the cursor for the read digits - Cursor.Advance(lastHexIndex + 1); + Cursor.Advance(firstNonHexDigit); #else var isValidHex = false; diff --git a/test/Parlot.Tests/ScannerTests.cs b/test/Parlot.Tests/ScannerTests.cs index 1af848b..c215346 100644 --- a/test/Parlot.Tests/ScannerTests.cs +++ b/test/Parlot.Tests/ScannerTests.cs @@ -277,10 +277,41 @@ public void ShouldReadStringsWithLineBreaks(string text, string expected) [InlineData("' \\xa0 ' ", "' \\xa0 '")] [InlineData("' \\xfh ' ", "' \\xfh '")] [InlineData("' \\u1234 ' ", "' \\u1234 '")] - public void ShouldReadUnicodeSequence(string text, string expected) { new Scanner(text).ReadQuotedString(out var result); Assert.Equal(expected, result); } + + [Theory] + [InlineData("'\\u'")] + [InlineData("'\\u1'")] + [InlineData("'\\u12'")] + [InlineData("'\\u123'")] + [InlineData("'\\ug'")] + [InlineData("'\\u1g'")] + [InlineData("'\\u12g'")] + [InlineData("'\\u123g'")] + [InlineData("'\\x'")] + [InlineData("'\\xg'")] + public void ShouldNotParseInvalidEscapedNumbers(string input) + { + var s = new Scanner(input); + Assert.False(s.ReadQuotedString()); + Assert.Equal(0, s.Cursor.Position.Offset); + } + + [Theory] + [InlineData("'\\u1234'")] + [InlineData("'\\u12345'")] + [InlineData("'\\x1'")] + [InlineData("'\\x12'")] + [InlineData("'\\x123'")] + [InlineData("'\\x1234'")] + [InlineData("'\\x1234g'")] + public void ShouldParseValidEscapedNumbers(string input) + { + var s = new Scanner(input); + Assert.True(s.ReadQuotedString()); + } }