Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement AnyOf using SearchValues #164

Merged
merged 6 commits into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion .editorconfig
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
[*.cs]
[*]
indent_style = space
indent_size = 2
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true

[*.cs]
indent_size = 4

[*.cs]

# IDE0022: Use block body for method
csharp_style_expression_bodied_methods = when_on_single_line:silent
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ jobs:
steps:
- name: Checkout source code
uses: actions/checkout@v4

- uses: actions/setup-dotnet@v4
with:
dotnet-version: '9.0.x'

- name: Test
run: dotnet test --configuration Release
6 changes: 5 additions & 1 deletion .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@ jobs:
steps:
- name: Checkout source code
uses: actions/checkout@v4


- uses: actions/setup-dotnet@v4
with:
dotnet-version: '9.0.x'

- name: Test
run: dotnet test --configuration Release

Expand Down
22 changes: 22 additions & 0 deletions docs/parsers.md
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,28 @@ Result:
abab
```

### AnyOf

Matches any chars from a list of chars.

```c#
Parser<TextSpan> AnyOf(ReadOnlySpan<char> values, int minSize = 1, int maxSize = 0)
Parser<TextSpan> AnyOf(SearchValue<char> searchValues, int minSize = 1, int maxSize = 0)
```

Usage:

```c#
var input = "ababcad";
var parser = Terms.AnyOf("ab");
```

Result:

```
abab
```

## Combining parsers

### Or
Expand Down
39 changes: 39 additions & 0 deletions src/Parlot/Fluent/Parsers.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
using System;
using System.Collections.Generic;
#if NET8_0_OR_GREATER
using System.Buffers;
using System.Numerics;
#endif

#pragma warning disable CA1822 // Mark members as static

Expand Down Expand Up @@ -167,6 +170,24 @@ public Parser<T> Number<T>(NumberOptions numberOptions = NumberOptions.Number, c
/// <param name="minSize">The minimum number of matches required. Defaults to 1.</param>
/// <param name="maxSize">When the parser reaches the maximum number of matches it returns <see langword="True"/>. Defaults to 0, i.e. no maximum size.</param>
public Parser<TextSpan> Pattern(Func<char, bool> predicate, int minSize = 1, int maxSize = 0) => new PatternLiteral(predicate, minSize, maxSize);

#if NET8_0_OR_GREATER
/// <summary>
/// Builds a parser that matches a list of chars.
/// </summary>
/// <param name="searchValues">The <see cref="SearchValues{T}"/> instance to match against each char.</param>
/// <param name="minSize">The minimum number of matches required. Defaults to 1.</param>
/// <param name="maxSize">When the parser reaches the maximum number of matches it returns <see langword="True"/>. Defaults to 0, i.e. no maximum size.</param>
public Parser<TextSpan> AnyOf(SearchValues<char> searchValues, int minSize = 1, int maxSize = 0) => new SearchValuesLiteral(searchValues, minSize, maxSize);

/// <summary>
/// Builds a parser that matches a list of chars.
/// </summary>
/// <param name="values">The set of char to match.</param>
/// <param name="minSize">The minimum number of matches required. Defaults to 1.</param>
/// <param name="maxSize">When the parser reaches the maximum number of matches it returns <see langword="True"/>. Defaults to 0, i.e. no maximum size.</param>
public Parser<TextSpan> AnyOf(ReadOnlySpan<char> values, int minSize = 1, int maxSize = 0) => AnyOf(SearchValues.Create(values), minSize, maxSize);
#endif
}

public class TermBuilder
Expand Down Expand Up @@ -234,4 +255,22 @@ public Parser<T> Number<T>(NumberOptions numberOptions = NumberOptions.Number, c
/// <param name="minSize">The minimum number of matches required. Defaults to 1.</param>
/// <param name="maxSize">When the parser reaches the maximum number of matches it returns <see langword="True"/>. Defaults to 0, i.e. no maximum size.</param>
public Parser<TextSpan> Pattern(Func<char, bool> predicate, int minSize = 1, int maxSize = 0) => Parsers.SkipWhiteSpace(new PatternLiteral(predicate, minSize, maxSize));

#if NET8_0_OR_GREATER
/// <summary>
/// Builds a parser that matches a list of chars.
/// </summary>
/// <param name="searchValues">The <see cref="SearchValues{T}"/> instance to match against each char.</param>
/// <param name="minSize">The minimum number of matches required. Defaults to 1.</param>
/// <param name="maxSize">When the parser reaches the maximum number of matches it returns <see langword="True"/>. Defaults to 0, i.e. no maximum size.</param>
public Parser<TextSpan> AnyOf(SearchValues<char> searchValues, int minSize = 1, int maxSize = 0) => Parsers.SkipWhiteSpace(new SearchValuesLiteral(searchValues, minSize, maxSize));

/// <summary>
/// Builds a parser that matches a list of chars.
/// </summary>
/// <param name="values">The set of char to match.</param>
/// <param name="minSize">The minimum number of matches required. Defaults to 1.</param>
/// <param name="maxSize">When the parser reaches the maximum number of matches it returns <see langword="True"/>. Defaults to 0, i.e. no maximum size.</param>
public Parser<TextSpan> AnyOf(ReadOnlySpan<char> values, int minSize = 1, int maxSize = 0) => AnyOf(SearchValues.Create(values), minSize, maxSize);
#endif
}
53 changes: 53 additions & 0 deletions src/Parlot/Fluent/SearchValuesLiteral.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#if NET8_0_OR_GREATER
using System;
using System.Buffers;

namespace Parlot.Fluent;

internal sealed class SearchValuesLiteral : Parser<TextSpan>
{
private readonly SearchValues<char> _searchValues;
private readonly int _minSize;
private readonly int _maxSize;

public SearchValuesLiteral(SearchValues<char> searchValues, int minSize = 1, int maxSize = 0)
{
_searchValues = searchValues ?? throw new ArgumentNullException(nameof(searchValues));
_minSize = minSize;
_maxSize = maxSize;
}

public override bool Parse(ParseContext context, ref ParseResult<TextSpan> result)
{
context.EnterParser(this);

var span = context.Scanner.Cursor.Span;

// First char not matching the searched values
var index = span.IndexOfAnyExcept(_searchValues);

if (index != -1)
{
// Too small?
if (index == 0 || index < _minSize)
{
return false;
}

// Too large?
if (_maxSize > 0 && index > _maxSize)
{
return false;
}
}

// If index == -1 the while input is a match
var size = index == -1 ? span.Length : index;

var start = context.Scanner.Cursor.Position.Offset;
context.Scanner.Cursor.Advance(size);
result.Set(start, start + size, new TextSpan(context.Scanner.Buffer, start, size));
return true;
}
}
#endif
49 changes: 38 additions & 11 deletions src/Parlot/Scanner.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
using System;
using Parlot.Fluent;
#if NET8_0_OR_GREATER
using System.Buffers;
#endif
using System.Runtime.CompilerServices;

namespace Parlot;
Expand All @@ -12,6 +14,7 @@ public class Scanner
{
#if NET8_0_OR_GREATER
private static readonly SearchValues<char> _decimalDigits = SearchValues.Create("0123456789");
private static readonly SearchValues<char> _hexDigits = SearchValues.Create("01234567890abcdefABCDEF");
#endif

public readonly string Buffer;
Expand Down Expand Up @@ -63,7 +66,7 @@ public bool SkipWhiteSpace()
{
// Fast path if we know the current char is not a whitespace
var current = Cursor.Current;
if (current > ' ' && current < 256)
if (current is > ' ' and < (char)256)
{
return false;
}
Expand Down Expand Up @@ -146,7 +149,7 @@ public bool ReadDecimal(bool allowLeadingSign, bool allowDecimalSeparator, bool

if (allowLeadingSign)
{
if (Cursor.Current == '-' || Cursor.Current == '+')
if (Cursor.Current is '-' or '+')
{
Cursor.AdvanceNoNewLines(1);
}
Expand Down Expand Up @@ -190,11 +193,11 @@ public bool ReadDecimal(bool allowLeadingSign, bool allowDecimalSeparator, bool
}
}

if (allowExponent && (Cursor.Current == 'e' || Cursor.Current == 'E'))
if (allowExponent && (Cursor.Current is 'e' or 'E'))
{
Cursor.AdvanceNoNewLines(1);

if (Cursor.Current == '-' || Cursor.Current == '+')
if (Cursor.Current is '-' or '+')
{
Cursor.AdvanceNoNewLines(1);
}
Expand Down Expand Up @@ -362,7 +365,7 @@ public bool ReadText(ReadOnlySpan<char> text, StringComparison comparisonType, o
return false;
}

int start = Cursor.Offset;
var start = Cursor.Offset;
Cursor.Advance(text.Length);
result = Buffer.AsSpan(start, Cursor.Offset - start);

Expand All @@ -385,7 +388,7 @@ public bool ReadAnyOf(ReadOnlySpan<char> chars, StringComparison comparisonType,
return false;
}

int start = Cursor.Offset;
var start = Cursor.Offset;
Cursor.Advance(index + 1);
result = Cursor.Buffer.AsSpan(start, index + 1);

Expand Down Expand Up @@ -502,7 +505,7 @@ public bool ReadQuotedString(out ReadOnlySpan<char> result)
{
var startChar = Cursor.Current;

if (startChar != '\'' && startChar != '\"')
if (startChar is not '\'' and not '\"')
{
result = [];
return false;
Expand Down Expand Up @@ -584,7 +587,19 @@ private bool ReadQuotedString(char quoteChar, out ReadOnlySpan<char> result)

// https://stackoverflow.com/a/32175520/142772
// exactly 4 digits
#if NET8_0_OR_GREATER
var lastHexIndex = Cursor.Span.Slice(0, 4).LastIndexOfAny(_hexDigits);
var isValidUnicode = lastHexIndex == 3;

if (!isValidUnicode)
{
result = [];
return false;
}

// Advance the cursor by the 4 digits
Cursor.Advance(4);
#else
var isValidUnicode = false;

Cursor.Advance();
Expand Down Expand Up @@ -613,14 +628,25 @@ private bool ReadQuotedString(char quoteChar, out ReadOnlySpan<char> result)
result = [];
return false;
}

#endif
break;
case 'x':

// https://stackoverflow.com/a/32175520/142772
// exactly 4 digits
// At least two digits
#if NET8_0_OR_GREATER
lastHexIndex = Cursor.Span.Slice(0, 4).LastIndexOfAny(_hexDigits);
var isValidHex = lastHexIndex > 0;

if (!isValidHex)
{
result = [];
return false;
}

bool isValidHex = false;
// Advance the cursor for the read digits
Cursor.Advance(lastHexIndex + 1);
#else
var isValidHex = false;

Cursor.Advance();

Expand Down Expand Up @@ -651,6 +677,7 @@ private bool ReadQuotedString(char quoteChar, out ReadOnlySpan<char> result)
result = [];
return false;
}
#endif

break;
default:
Expand Down
2 changes: 1 addition & 1 deletion test/Parlot.Benchmarks/Parlot.Benchmarks.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFrameworks>net6.0;net8.0</TargetFrameworks>
<TargetFrameworks>net8.0</TargetFrameworks>
<IsPackable>false</IsPackable>
<AnalysisLevel>latest-Default</AnalysisLevel>
</PropertyGroup>
Expand Down
1 change: 1 addition & 0 deletions test/Parlot.Benchmarks/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ static void Main(string[] args)
BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run(args);
}
}

Loading