-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use Contains{Any} in Regex source generator #112065
Conversation
Tagging subscribers to this area: @dotnet/area-system-text-regularexpressions |
@MihuBot regexdiff |
118 out of 18857 patterns have generated source code changes. Examples of GeneratedRegex source diffs"^[a-f0-9]{32}$" (4920 uses)[GeneratedRegex("^[a-f0-9]{32}$")] return false; // The input didn't match.
}
- if (slice.Slice(0, 32).IndexOfAnyExcept(Utilities.s_asciiHexDigitsLower) >= 0)
+ if (slice.Slice(0, 32).ContainsAnyExcept(Utilities.s_asciiHexDigitsLower))
{
return false; // The input didn't match.
} "\"([a-fA-F0-9-\\{\\}]{36})\"" (569 uses)[GeneratedRegex("\"([a-fA-F0-9-\\{\\}]{36})\"", RegexOptions.CultureInvariant)] return false; // The input didn't match.
}
- if (slice.Slice(0, 36).IndexOfAnyExcept(Utilities.s_ascii_20FF037E0000007E000028) >= 0)
+ if (slice.Slice(0, 36).ContainsAnyExcept(Utilities.s_ascii_20FF037E0000007E000028))
{
UncaptureUntil(0);
return false; // The input didn't match. "^[a-z0-9]{24}$" (285 uses)[GeneratedRegex("^[a-z0-9]{24}$", RegexOptions.IgnoreCase)] return false; // The input didn't match.
}
- if (slice.Slice(0, 24).IndexOfAnyExcept(Utilities.s_asciiLettersAndDigitsAndKelvinSign) >= 0)
+ if (slice.Slice(0, 24).ContainsAnyExcept(Utilities.s_asciiLettersAndDigitsAndKelvinSign))
{
return false; // The input didn't match.
} "^[0-9a-f]{40}$" (202 uses)[GeneratedRegex("^[0-9a-f]{40}$", RegexOptions.IgnoreCase)] return false; // The input didn't match.
}
- if (slice.Slice(0, 40).IndexOfAnyExcept(Utilities.s_asciiHexDigits) >= 0)
+ if (slice.Slice(0, 40).ContainsAnyExcept(Utilities.s_asciiHexDigits))
{
return false; // The input didn't match.
} "\\A(?:[A-Z0-9]{17})\\z" (182 uses)[GeneratedRegex("\\A(?:[A-Z0-9]{17})\\z")] return false; // The input didn't match.
}
- if (slice.Slice(0, 17).IndexOfAnyExcept(Utilities.s_asciiLettersUpperAndDigits) >= 0)
+ if (slice.Slice(0, 17).ContainsAnyExcept(Utilities.s_asciiLettersUpperAndDigits))
{
return false; // The input didn't match.
} "^\\\\((?<StoreLocation>CurrentUser|LocalMach ..." (167 uses)[GeneratedRegex("^\\\\((?<StoreLocation>CurrentUser|LocalMachine)(\\\\(?<StoreName>[a-zA-Z]+)(\\\\(?<Thumbprint>[0-9a-f]{40}))?)?)?$")] goto LoopIterationNoMatch2;
}
- if (slice.Slice(0, 40).IndexOfAnyExcept(Utilities.s_asciiHexDigitsLower) >= 0)
+ if (slice.Slice(0, 40).ContainsAnyExcept(Utilities.s_asciiHexDigitsLower))
{
goto LoopIterationNoMatch2;
} "IR[0-9]{24}" (144 uses)[GeneratedRegex("IR[0-9]{24}", RegexOptions.IgnoreCase)] // Match a character in the set [0-9] exactly 24 times.
{
- if (slice.Slice(2, 24).IndexOfAnyExceptInRange('0', '9') >= 0)
+ if (slice.Slice(2, 24).ContainsAnyExceptInRange('0', '9'))
{
return false; // The input didn't match.
} "^committed\\s+changeset\\s+\\d+:(?<hash>[0-9 ..." (132 uses)[GeneratedRegex("^committed\\s+changeset\\s+\\d+:(?<hash>[0-9a-f]{40})$", RegexOptions.IgnoreCase)] return false; // The input didn't match.
}
- if (slice.Slice(0, 40).IndexOfAnyExcept(Utilities.s_asciiHexDigits) >= 0)
+ if (slice.Slice(0, 40).ContainsAnyExcept(Utilities.s_asciiHexDigits))
{
UncaptureUntil(0);
return false; // The input didn't match. "^[A-Fa-f0-9]{32}$|^({|\\()?[A-Fa-f0-9]{8}-([ ..." (130 uses)[GeneratedRegex("^[A-Fa-f0-9]{32}$|^({|\\()?[A-Fa-f0-9]{8}-([A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}(}|\\))?$|^({)?[0xA-Fa-f0-9]{3,10}(, {0,1}[0xA-Fa-f0-9]{3,6}){2}, {0,1}({)([0xA-Fa-f0-9]{3,4}, {0,1}){7}[0xA-Fa-f0-9]{3,4}(}})$")] goto AlternationBranch;
}
- if (slice.Slice(0, 32).IndexOfAnyExcept(Utilities.s_asciiHexDigits) >= 0)
+ if (slice.Slice(0, 32).ContainsAnyExcept(Utilities.s_asciiHexDigits))
{
goto AlternationBranch;
} "asmz://(?<guid>[0-9a-fA-F]{32})/(?<size>[0-9 ..." (99 uses)[GeneratedRegex("asmz://(?<guid>[0-9a-fA-F]{32})/(?<size>[0-9]+)(/(?<flags>[a-zA-Z0-9]*))?", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture)] return false; // The input didn't match.
}
- if (slice.Slice(0, 32).IndexOfAnyExcept(Utilities.s_asciiHexDigits) >= 0)
+ if (slice.Slice(0, 32).ContainsAnyExcept(Utilities.s_asciiHexDigits))
{
UncaptureUntil(0);
return false; // The input didn't match. For more diff examples, see https://gist.github.com/MihuBot/47c9e3e7aadcb5bd85d2a8b35243f90d
For a list of JIT diff improvements, see Improvements.md Sample source code for further analysisconst string JsonPath = "RegexResults-978.json";
if (!File.Exists(JsonPath))
{
await using var archiveStream = await new HttpClient().GetStreamAsync("https://mihubot.xyz/r/EogtjSRA");
using var archive = new ZipArchive(archiveStream, ZipArchiveMode.Read);
archive.Entries.First(e => e.Name == "Results.json").ExtractToFile(JsonPath);
}
using FileStream jsonFileStream = File.OpenRead(JsonPath);
RegexEntry[] entries = JsonSerializer.Deserialize<RegexEntry[]>(jsonFileStream, new JsonSerializerOptions { IncludeFields = true })!;
Console.WriteLine($"Working with {entries.Length} patterns");
record KnownPattern(string Pattern, RegexOptions Options, int Count);
sealed class RegexEntry
{
public required KnownPattern Regex { get; set; }
public required string MainSource { get; set; }
public required string PrSource { get; set; }
public string? FullDiff { get; set; }
public string? ShortDiff { get; set; }
public (string Name, string Values)[]? SearchValuesOfChar { get; set; }
public (string[] Values, StringComparison ComparisonType)[]? SearchValuesOfString { get; set; }
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks
* main: System.Net.Http.WinHttpHandler.StartRequestAsync assertion failed (dotnet#109799) Keep test PDB in helix payload for native AOT (dotnet#111949) Build the RID-specific System.IO.Ports packages in the VMR (dotnet#112054) Always inline number conversions (dotnet#112061) Use Contains{Any} in Regex source generator (dotnet#112065) Update dependencies from https://github.com/dotnet/arcade build 20250130.5 (dotnet#112013) JIT: Transform single-reg args to FIELD_LIST in physical promotion (dotnet#111590) Ensure that math calls into the CRT are tracked as needing vzeroupper (dotnet#112011) Use double.ConvertToIntegerNative where safe to do in System.Random (dotnet#112046) JIT: Compute `fgCalledCount` after synthesis (dotnet#112041) Simplify boolean logic in `TimeZoneInfo` (dotnet#112062) JIT: Update type when return temp is freshly created (dotnet#111948) Remove unused build controls and simplify DotNetBuild.props (dotnet#111986) Fix case-insensitive JSON deserialization of enum member names (dotnet#112028) WasmAppBuilder: Remove double computation of a value (dotnet#112047) Disable LTCG for brotli and zlibng. (dotnet#111805) JIT: Improve x86 unsigned to floating cast codegen (dotnet#111595) simplify x86 special intrinsic imports (dotnet#111836) JIT: Try to retain entry weight during profile synthesis (dotnet#111971) Fix explicit offset of ByRefLike fields. (dotnet#111584)
The compiler would be more annoying to match here since it can't be a simple string replace.
Replaces
with
In the one place where we aren't making further use of the actual index.