From af9fd59c47003b1fddecfc0bca61fb0e42087b17 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Wed, 9 Nov 2022 09:08:30 -0500 Subject: [PATCH 1/2] Fix non-determinism in Regex source generator The source generator enumerates a Hashtable to write out its contents. When the keys of the Hashtable are strings, string hash code randomization may result in the order of that enumeration being different in different processes, leading to non-deterministic ordering of values written out and thus non-deterministic source generator output. --- .../gen/RegexGenerator.Emitter.cs | 9 +++-- .../RegexGeneratorHelper.netcoreapp.cs | 26 +++++++++++++-- .../RegexGeneratorParserTests.cs | 33 +++++++++++++++++++ 3 files changed, 60 insertions(+), 8 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 62fb094cf0f569..09e6ce8759a156 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -126,13 +126,13 @@ private static void EmitRegexDerivedImplementation( if (rm.Tree.CaptureNumberSparseMapping is not null) { writer.Write(" base.Caps = new Hashtable {"); - AppendHashtableContents(writer, rm.Tree.CaptureNumberSparseMapping); + AppendHashtableContents(writer, rm.Tree.CaptureNumberSparseMapping.Cast()); writer.WriteLine($" }};"); } if (rm.Tree.CaptureNameToNumberMapping is not null) { writer.Write(" base.CapNames = new Hashtable {"); - AppendHashtableContents(writer, rm.Tree.CaptureNameToNumberMapping); + AppendHashtableContents(writer, rm.Tree.CaptureNameToNumberMapping.Cast().OrderBy(de => de.Key as string, StringComparer.Ordinal)); writer.WriteLine($" }};"); } if (rm.Tree.CaptureNames is not null) @@ -152,11 +152,10 @@ private static void EmitRegexDerivedImplementation( writer.WriteLine(runnerFactoryImplementation); writer.WriteLine($"}}"); - static void AppendHashtableContents(IndentedTextWriter writer, Hashtable ht) + static void AppendHashtableContents(IndentedTextWriter writer, IEnumerable contents) { - IDictionaryEnumerator en = ht.GetEnumerator(); string separator = ""; - while (en.MoveNext()) + foreach (DictionaryEntry en in contents) { writer.Write(separator); separator = ", "; diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorHelper.netcoreapp.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorHelper.netcoreapp.cs index d3a54c331f3d3e..04e9683bdd2ccf 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorHelper.netcoreapp.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorHelper.netcoreapp.cs @@ -67,8 +67,8 @@ internal static byte[] CreateAssemblyImage(string source, string assemblyName) throw new InvalidOperationException(); } - internal static async Task> RunGenerator( - string code, bool compile = false, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default) + private static async Task<(Compilation, GeneratorDriverRunResult)> RunGeneratorCore( + string code, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default) { var proj = new AdhocWorkspace() .AddSolution(SolutionInfo.Create(SolutionId.CreateNewId(), VersionStamp.Create())) @@ -87,7 +87,13 @@ internal static async Task> RunGenerator( var generator = new RegexGenerator(); CSharpGeneratorDriver cgd = CSharpGeneratorDriver.Create(new[] { generator.AsSourceGenerator() }, parseOptions: CSharpParseOptions.Default.WithLanguageVersion(langVersion)); GeneratorDriver gd = cgd.RunGenerators(comp!, cancellationToken); - GeneratorDriverRunResult generatorResults = gd.GetRunResult(); + return (comp, gd.GetRunResult()); + } + + internal static async Task> RunGenerator( + string code, bool compile = false, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default) + { + (Compilation comp, GeneratorDriverRunResult generatorResults) = await RunGeneratorCore(code, langVersion, additionalRefs, allowUnsafe, cancellationToken); if (!compile) { return generatorResults.Diagnostics; @@ -107,6 +113,20 @@ internal static async Task> RunGenerator( return generatorResults.Diagnostics.Concat(results.Diagnostics).Where(d => d.Severity != DiagnosticSeverity.Hidden).ToArray(); } + internal static async Task GenerateSourceText( + string code, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default) + { + (Compilation comp, GeneratorDriverRunResult generatorResults) = await RunGeneratorCore(code, langVersion, additionalRefs, allowUnsafe, cancellationToken); + string generatedSource = string.Concat(generatorResults.GeneratedTrees.Select(t => t.ToString())); + + if (generatorResults.Diagnostics.Length != 0) + { + throw new ArgumentException(string.Join(Environment.NewLine, generatorResults.Diagnostics) + Environment.NewLine + generatedSource); + } + + return generatedSource; + } + internal static async Task SourceGenRegexAsync( string pattern, CultureInfo? culture, RegexOptions? options = null, TimeSpan? matchTimeout = null, CancellationToken cancellationToken = default) { diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorParserTests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorParserTests.cs index f9ccaaf568a382..642bba87ad050e 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorParserTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorParserTests.cs @@ -3,7 +3,9 @@ using Microsoft.CodeAnalysis; using Microsoft.CodeAnalysis.CSharp; +using Microsoft.DotNet.RemoteExecutor; using System.Collections.Generic; +using System.Diagnostics; using System.Globalization; using System.Threading.Tasks; using Xunit; @@ -839,5 +841,36 @@ partial class C public static partial Regex Valid(); }", compile: true)); } + + [ConditionalFact(typeof(RemoteExecutor), nameof(RemoteExecutor.IsSupported))] + [OuterLoop("Takes several seconds")] + public void Deterministic_SameRegexProducesSameSource() + { + string first = Generate(); + for (int trials = 0; trials < 3; trials++) + { + Assert.Equal(first, Generate()); + } + + static string Generate() + { + const string Code = + @"using System.Text.RegularExpressions; + partial class C + { + [GeneratedRegex(""(?\w+) (?\w+), (?\w+) (?[A-Z]{2}) (?[0-9]{5})"")] + public static partial Regex Valid(); + }"; + + // Generate the source in a new process so that any process-specific randomization is different between runs, + // e.g. hash code randomization for strings. + + using RemoteInvokeHandle handle = RemoteExecutor.Invoke( + async () => Console.WriteLine(await RegexGeneratorHelper.GenerateSourceText(Code)), + new RemoteInvokeOptions { StartInfo = new ProcessStartInfo { RedirectStandardOutput = true } }); + + return handle.Process.StandardOutput.ReadToEnd(); + } + } } } From fc4d087be85a817837fc01546ffd936842e34d07 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Wed, 9 Nov 2022 21:07:07 -0500 Subject: [PATCH 2/2] Address PR feedback --- .../gen/RegexGenerator.Emitter.cs | 2 +- .../src/System/Text/RegularExpressions/Regex.cs | 1 + .../tests/FunctionalTests/Regex.GetGroupNames.Tests.cs | 4 ++++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 09e6ce8759a156..cacb2211be5c7f 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -126,7 +126,7 @@ private static void EmitRegexDerivedImplementation( if (rm.Tree.CaptureNumberSparseMapping is not null) { writer.Write(" base.Caps = new Hashtable {"); - AppendHashtableContents(writer, rm.Tree.CaptureNumberSparseMapping.Cast()); + AppendHashtableContents(writer, rm.Tree.CaptureNumberSparseMapping.Cast().OrderBy(de => de.Key as int?)); writer.WriteLine($" }};"); } if (rm.Tree.CaptureNameToNumberMapping is not null) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs index 1598c7e380194d..6516af531e2a6f 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs @@ -322,6 +322,7 @@ public int[] GetGroupNumbers() { result[(int)de.Value!] = (int)de.Key; } + Array.Sort(result); } return result; diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.GetGroupNames.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.GetGroupNames.Tests.cs index b270ea509ec884..8b1d21f250494e 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.GetGroupNames.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.GetGroupNames.Tests.cs @@ -149,6 +149,10 @@ public void GroupNamesAndNumbers(string pattern, string input, string[] expected int[] numbers = regex.GetGroupNumbers(); Assert.Equal(expectedNumbers.Length, numbers.Length); + for (int i = 0; i < numbers.Length - 1; i++) + { + Assert.True(numbers[i] <= numbers[i + 1]); + } string[] names = regex.GetGroupNames(); Assert.Equal(expectedNames.Length, names.Length);