Skip to content

Commit abecfbe

Browse files
[release/7.0] Fix non-determinism in Regex source generator (#78149)
* Fix non-determinism in Regex source generator The source generator enumerates a Hashtable to write out its contents. When the keys of the Hashtable are strings, string hash code randomization may result in the order of that enumeration being different in different processes, leading to non-deterministic ordering of values written out and thus non-deterministic source generator output. * Address PR feedback Co-authored-by: Stephen Toub <[email protected]>
1 parent 787d5dd commit abecfbe

File tree

5 files changed

+65
-8
lines changed

5 files changed

+65
-8
lines changed

src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -126,13 +126,13 @@ private static void EmitRegexDerivedImplementation(
126126
if (rm.Tree.CaptureNumberSparseMapping is not null)
127127
{
128128
writer.Write(" base.Caps = new Hashtable {");
129-
AppendHashtableContents(writer, rm.Tree.CaptureNumberSparseMapping);
129+
AppendHashtableContents(writer, rm.Tree.CaptureNumberSparseMapping.Cast<DictionaryEntry>().OrderBy(de => de.Key as int?));
130130
writer.WriteLine($" }};");
131131
}
132132
if (rm.Tree.CaptureNameToNumberMapping is not null)
133133
{
134134
writer.Write(" base.CapNames = new Hashtable {");
135-
AppendHashtableContents(writer, rm.Tree.CaptureNameToNumberMapping);
135+
AppendHashtableContents(writer, rm.Tree.CaptureNameToNumberMapping.Cast<DictionaryEntry>().OrderBy(de => de.Key as string, StringComparer.Ordinal));
136136
writer.WriteLine($" }};");
137137
}
138138
if (rm.Tree.CaptureNames is not null)
@@ -152,11 +152,10 @@ private static void EmitRegexDerivedImplementation(
152152
writer.WriteLine(runnerFactoryImplementation);
153153
writer.WriteLine($"}}");
154154

155-
static void AppendHashtableContents(IndentedTextWriter writer, Hashtable ht)
155+
static void AppendHashtableContents(IndentedTextWriter writer, IEnumerable<DictionaryEntry> contents)
156156
{
157-
IDictionaryEnumerator en = ht.GetEnumerator();
158157
string separator = "";
159-
while (en.MoveNext())
158+
foreach (DictionaryEntry en in contents)
160159
{
161160
writer.Write(separator);
162161
separator = ", ";

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,7 @@ public int[] GetGroupNumbers()
322322
{
323323
result[(int)de.Value!] = (int)de.Key;
324324
}
325+
Array.Sort(result);
325326
}
326327

327328
return result;

src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.GetGroupNames.Tests.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,10 @@ public void GroupNamesAndNumbers(string pattern, string input, string[] expected
149149

150150
int[] numbers = regex.GetGroupNumbers();
151151
Assert.Equal(expectedNumbers.Length, numbers.Length);
152+
for (int i = 0; i < numbers.Length - 1; i++)
153+
{
154+
Assert.True(numbers[i] <= numbers[i + 1]);
155+
}
152156

153157
string[] names = regex.GetGroupNames();
154158
Assert.Equal(expectedNames.Length, names.Length);

src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorHelper.netcoreapp.cs

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,8 @@ internal static byte[] CreateAssemblyImage(string source, string assemblyName)
6767
throw new InvalidOperationException();
6868
}
6969

70-
internal static async Task<IReadOnlyList<Diagnostic>> RunGenerator(
71-
string code, bool compile = false, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default)
70+
private static async Task<(Compilation, GeneratorDriverRunResult)> RunGeneratorCore(
71+
string code, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default)
7272
{
7373
var proj = new AdhocWorkspace()
7474
.AddSolution(SolutionInfo.Create(SolutionId.CreateNewId(), VersionStamp.Create()))
@@ -87,7 +87,13 @@ internal static async Task<IReadOnlyList<Diagnostic>> RunGenerator(
8787
var generator = new RegexGenerator();
8888
CSharpGeneratorDriver cgd = CSharpGeneratorDriver.Create(new[] { generator.AsSourceGenerator() }, parseOptions: CSharpParseOptions.Default.WithLanguageVersion(langVersion));
8989
GeneratorDriver gd = cgd.RunGenerators(comp!, cancellationToken);
90-
GeneratorDriverRunResult generatorResults = gd.GetRunResult();
90+
return (comp, gd.GetRunResult());
91+
}
92+
93+
internal static async Task<IReadOnlyList<Diagnostic>> RunGenerator(
94+
string code, bool compile = false, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default)
95+
{
96+
(Compilation comp, GeneratorDriverRunResult generatorResults) = await RunGeneratorCore(code, langVersion, additionalRefs, allowUnsafe, cancellationToken);
9197
if (!compile)
9298
{
9399
return generatorResults.Diagnostics;
@@ -107,6 +113,20 @@ internal static async Task<IReadOnlyList<Diagnostic>> RunGenerator(
107113
return generatorResults.Diagnostics.Concat(results.Diagnostics).Where(d => d.Severity != DiagnosticSeverity.Hidden).ToArray();
108114
}
109115

116+
internal static async Task<string> GenerateSourceText(
117+
string code, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default)
118+
{
119+
(Compilation comp, GeneratorDriverRunResult generatorResults) = await RunGeneratorCore(code, langVersion, additionalRefs, allowUnsafe, cancellationToken);
120+
string generatedSource = string.Concat(generatorResults.GeneratedTrees.Select(t => t.ToString()));
121+
122+
if (generatorResults.Diagnostics.Length != 0)
123+
{
124+
throw new ArgumentException(string.Join(Environment.NewLine, generatorResults.Diagnostics) + Environment.NewLine + generatedSource);
125+
}
126+
127+
return generatedSource;
128+
}
129+
110130
internal static async Task<Regex> SourceGenRegexAsync(
111131
string pattern, CultureInfo? culture, RegexOptions? options = null, TimeSpan? matchTimeout = null, CancellationToken cancellationToken = default)
112132
{

src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorParserTests.cs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33

44
using Microsoft.CodeAnalysis;
55
using Microsoft.CodeAnalysis.CSharp;
6+
using Microsoft.DotNet.RemoteExecutor;
67
using System.Collections.Generic;
8+
using System.Diagnostics;
79
using System.Globalization;
810
using System.Threading.Tasks;
911
using Xunit;
@@ -839,5 +841,36 @@ partial class C
839841
public static partial Regex Valid();
840842
}", compile: true));
841843
}
844+
845+
[ConditionalFact(typeof(RemoteExecutor), nameof(RemoteExecutor.IsSupported))]
846+
[OuterLoop("Takes several seconds")]
847+
public void Deterministic_SameRegexProducesSameSource()
848+
{
849+
string first = Generate();
850+
for (int trials = 0; trials < 3; trials++)
851+
{
852+
Assert.Equal(first, Generate());
853+
}
854+
855+
static string Generate()
856+
{
857+
const string Code =
858+
@"using System.Text.RegularExpressions;
859+
partial class C
860+
{
861+
[GeneratedRegex(""(?<Name>\w+) (?<Street>\w+), (?<City>\w+) (?<State>[A-Z]{2}) (?<Zip>[0-9]{5})"")]
862+
public static partial Regex Valid();
863+
}";
864+
865+
// Generate the source in a new process so that any process-specific randomization is different between runs,
866+
// e.g. hash code randomization for strings.
867+
868+
using RemoteInvokeHandle handle = RemoteExecutor.Invoke(
869+
async () => Console.WriteLine(await RegexGeneratorHelper.GenerateSourceText(Code)),
870+
new RemoteInvokeOptions { StartInfo = new ProcessStartInfo { RedirectStandardOutput = true } });
871+
872+
return handle.Process.StandardOutput.ReadToEnd();
873+
}
874+
}
842875
}
843876
}

0 commit comments

Comments
 (0)