Skip to content

Commit ba54d0a

Browse files
[release/7.0] React to CheckForOverflowUnderflow in regex source generator (#78256)
* React to CheckForOverflowUnderflow in regex source generator The regex source generator uses code patterns that might have arithmetic overflows, e.g. a bounds check with `(uint)index < span.Length`. These are intentional, and they're benign... unless the project/compilation has opted-in to overflow/underflow checking (CheckForOverflowUnderflow). In that case, the code for many patterns can start throwing false positive overflow exceptions, making the source generator unusable. This commit causes the generator to look at the CheckOverflow setting in the compilation options, and if it's set, to emit `unchecked { ... }` around all the relevant code. * Address PR feedback Co-authored-by: Stephen Toub <[email protected]>
1 parent 0b558e8 commit ba54d0a

File tree

4 files changed

+105
-66
lines changed

4 files changed

+105
-66
lines changed

src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs

Lines changed: 58 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,27 @@ static void AppendHashtableContents(IndentedTextWriter writer, IEnumerable<Dicti
175175
}
176176

177177
/// <summary>Emits the code for the RunnerFactory. This is the actual logic for the regular expression.</summary>
178-
private static void EmitRegexDerivedTypeRunnerFactory(IndentedTextWriter writer, RegexMethod rm, Dictionary<string, string[]> requiredHelpers)
178+
private static void EmitRegexDerivedTypeRunnerFactory(IndentedTextWriter writer, RegexMethod rm, Dictionary<string, string[]> requiredHelpers, bool checkOverflow)
179179
{
180+
void EnterCheckOverflow()
181+
{
182+
if (checkOverflow)
183+
{
184+
writer.WriteLine($"unchecked");
185+
writer.WriteLine($"{{");
186+
writer.Indent++;
187+
}
188+
}
189+
190+
void ExitCheckOverflow()
191+
{
192+
if (checkOverflow)
193+
{
194+
writer.Indent--;
195+
writer.WriteLine($"}}");
196+
}
197+
}
198+
180199
writer.WriteLine($"/// <summary>Provides a factory for creating <see cref=\"RegexRunner\"/> instances to be used by methods on <see cref=\"Regex\"/>.</summary>");
181200
writer.WriteLine($"private sealed class RunnerFactory : RegexRunnerFactory");
182201
writer.WriteLine($"{{");
@@ -211,7 +230,9 @@ private static void EmitRegexDerivedTypeRunnerFactory(IndentedTextWriter writer,
211230
writer.WriteLine($" protected override void Scan(ReadOnlySpan<char> inputSpan)");
212231
writer.WriteLine($" {{");
213232
writer.Indent += 3;
233+
EnterCheckOverflow();
214234
(bool needsTryFind, bool needsTryMatch) = EmitScan(writer, rm);
235+
ExitCheckOverflow();
215236
writer.Indent -= 3;
216237
writer.WriteLine($" }}");
217238
if (needsTryFind)
@@ -223,7 +244,9 @@ private static void EmitRegexDerivedTypeRunnerFactory(IndentedTextWriter writer,
223244
writer.WriteLine($" private bool TryFindNextPossibleStartingPosition(ReadOnlySpan<char> inputSpan)");
224245
writer.WriteLine($" {{");
225246
writer.Indent += 3;
247+
EnterCheckOverflow();
226248
EmitTryFindNextPossibleStartingPosition(writer, rm, requiredHelpers);
249+
ExitCheckOverflow();
227250
writer.Indent -= 3;
228251
writer.WriteLine($" }}");
229252
}
@@ -236,7 +259,9 @@ private static void EmitRegexDerivedTypeRunnerFactory(IndentedTextWriter writer,
236259
writer.WriteLine($" private bool TryMatchAtCurrentPosition(ReadOnlySpan<char> inputSpan)");
237260
writer.WriteLine($" {{");
238261
writer.Indent += 3;
239-
EmitTryMatchAtCurrentPosition(writer, rm, requiredHelpers);
262+
EnterCheckOverflow();
263+
EmitTryMatchAtCurrentPosition(writer, rm, requiredHelpers, checkOverflow);
264+
ExitCheckOverflow();
240265
writer.Indent -= 3;
241266
writer.WriteLine($" }}");
242267
}
@@ -291,51 +316,52 @@ private static void AddIsWordCharHelper(Dictionary<string, string[]> requiredHel
291316
}
292317

293318
/// <summary>Adds the IsBoundary helper to the required helpers collection.</summary>
294-
private static void AddIsBoundaryHelper(Dictionary<string, string[]> requiredHelpers)
319+
private static void AddIsBoundaryHelper(Dictionary<string, string[]> requiredHelpers, bool checkOverflow)
295320
{
296321
const string IsBoundary = nameof(IsBoundary);
297322
if (!requiredHelpers.ContainsKey(IsBoundary))
298323
{
324+
string uncheckedKeyword = checkOverflow ? "unchecked" : "";
299325
requiredHelpers.Add(IsBoundary, new string[]
300326
{
301-
"/// <summary>Determines whether the specified index is a boundary.</summary>",
302-
"[MethodImpl(MethodImplOptions.AggressiveInlining)]",
303-
"internal static bool IsBoundary(ReadOnlySpan<char> inputSpan, int index)",
304-
"{",
305-
" int indexMinus1 = index - 1;",
306-
" return ((uint)indexMinus1 < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[indexMinus1])) !=",
307-
" ((uint)index < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[index]));",
308-
"",
309-
" static bool IsBoundaryWordChar(char ch) => IsWordChar(ch) || (ch == '\\u200C' | ch == '\\u200D');",
310-
"}",
327+
$"/// <summary>Determines whether the specified index is a boundary.</summary>",
328+
$"[MethodImpl(MethodImplOptions.AggressiveInlining)]",
329+
$"internal static bool IsBoundary(ReadOnlySpan<char> inputSpan, int index)",
330+
$"{{",
331+
$" int indexMinus1 = index - 1;",
332+
$" return {uncheckedKeyword}((uint)indexMinus1 < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[indexMinus1])) !=",
333+
$" {uncheckedKeyword}((uint)index < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[index]));",
334+
$"",
335+
$" static bool IsBoundaryWordChar(char ch) => IsWordChar(ch) || (ch == '\\u200C' | ch == '\\u200D');",
336+
$"}}",
311337
});
312338

313339
AddIsWordCharHelper(requiredHelpers);
314340
}
315341
}
316342

317343
/// <summary>Adds the IsECMABoundary helper to the required helpers collection.</summary>
318-
private static void AddIsECMABoundaryHelper(Dictionary<string, string[]> requiredHelpers)
344+
private static void AddIsECMABoundaryHelper(Dictionary<string, string[]> requiredHelpers, bool checkOverflow)
319345
{
320346
const string IsECMABoundary = nameof(IsECMABoundary);
321347
if (!requiredHelpers.ContainsKey(IsECMABoundary))
322348
{
349+
string uncheckedKeyword = checkOverflow ? "unchecked" : "";
323350
requiredHelpers.Add(IsECMABoundary, new string[]
324351
{
325-
"/// <summary>Determines whether the specified index is a boundary (ECMAScript).</summary>",
326-
"[MethodImpl(MethodImplOptions.AggressiveInlining)]",
327-
"internal static bool IsECMABoundary(ReadOnlySpan<char> inputSpan, int index)",
328-
"{",
329-
" int indexMinus1 = index - 1;",
330-
" return ((uint)indexMinus1 < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[indexMinus1])) !=",
331-
" ((uint)index < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[index]));",
332-
"",
333-
" static bool IsECMAWordChar(char ch) =>",
334-
" ((((uint)ch - 'A') & ~0x20) < 26) || // ASCII letter",
335-
" (((uint)ch - '0') < 10) || // digit",
336-
" ch == '_' || // underscore",
337-
" ch == '\\u0130'; // latin capital letter I with dot above",
338-
"}",
352+
$"/// <summary>Determines whether the specified index is a boundary (ECMAScript).</summary>",
353+
$"[MethodImpl(MethodImplOptions.AggressiveInlining)]",
354+
$"internal static bool IsECMABoundary(ReadOnlySpan<char> inputSpan, int index)",
355+
$"{{",
356+
$" int indexMinus1 = index - 1;",
357+
$" return {uncheckedKeyword}((uint)indexMinus1 < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[indexMinus1])) !=",
358+
$" {uncheckedKeyword}((uint)index < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[index]));",
359+
$"",
360+
$" static bool IsECMAWordChar(char ch) =>",
361+
$" char.IsAsciiLetterOrDigit(ch) ||",
362+
$" ch == '_' ||",
363+
$" ch == '\\u0130'; // latin capital letter I with dot above",
364+
$"}}",
339365
});
340366
}
341367
}
@@ -429,7 +455,7 @@ FindNextStartingPositionMode.LeadingAnchor_RightToLeft_Start or
429455
/// <summary>Emits the body of the TryFindNextPossibleStartingPosition.</summary>
430456
private static void EmitTryFindNextPossibleStartingPosition(IndentedTextWriter writer, RegexMethod rm, Dictionary<string, string[]> requiredHelpers)
431457
{
432-
RegexOptions options = (RegexOptions)rm.Options;
458+
RegexOptions options = rm.Options;
433459
RegexTree regexTree = rm.Tree;
434460
bool rtl = (options & RegexOptions.RightToLeft) != 0;
435461

@@ -1025,7 +1051,7 @@ void EmitLiteralAfterAtomicLoop()
10251051
}
10261052

10271053
/// <summary>Emits the body of the TryMatchAtCurrentPosition.</summary>
1028-
private static void EmitTryMatchAtCurrentPosition(IndentedTextWriter writer, RegexMethod rm, Dictionary<string, string[]> requiredHelpers)
1054+
private static void EmitTryMatchAtCurrentPosition(IndentedTextWriter writer, RegexMethod rm, Dictionary<string, string[]> requiredHelpers, bool checkOverflow)
10291055
{
10301056
// In .NET Framework and up through .NET Core 3.1, the code generated for RegexOptions.Compiled was effectively an unrolled
10311057
// version of what RegexInterpreter would process. The RegexNode tree would be turned into a series of opcodes via
@@ -2668,14 +2694,14 @@ void EmitBoundary(RegexNode node)
26682694
call = node.Kind is RegexNodeKind.Boundary ?
26692695
$"!{HelpersTypeName}.IsBoundary" :
26702696
$"{HelpersTypeName}.IsBoundary";
2671-
AddIsBoundaryHelper(requiredHelpers);
2697+
AddIsBoundaryHelper(requiredHelpers, checkOverflow);
26722698
}
26732699
else
26742700
{
26752701
call = node.Kind is RegexNodeKind.ECMABoundary ?
26762702
$"!{HelpersTypeName}.IsECMABoundary" :
26772703
$"{HelpersTypeName}.IsECMABoundary";
2678-
AddIsECMABoundaryHelper(requiredHelpers);
2704+
AddIsECMABoundaryHelper(requiredHelpers, checkOverflow);
26792705
}
26802706

26812707
using (EmitBlock(writer, $"if ({call}(inputSpan, pos{(sliceStaticPos > 0 ? $" + {sliceStaticPos}" : "")}))"))

src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,24 @@ public partial class RegexGenerator : IIncrementalGenerator
3636
"#pragma warning disable CS0219 // Variable assigned but never used",
3737
};
3838

39+
private record struct CompilationData(bool AllowUnsafe, bool CheckOverflow);
40+
3941
public void Initialize(IncrementalGeneratorInitializationContext context)
4042
{
43+
// To avoid invalidating every regex's output when anything from the compilation changes,
44+
// we extract from it the only things we care about.
45+
IncrementalValueProvider<CompilationData> compilationDataProvider =
46+
context.CompilationProvider
47+
.Select((x, _) =>
48+
x.Options is CSharpCompilationOptions options ?
49+
new CompilationData(options.AllowUnsafe, options.CheckOverflow) :
50+
default);
51+
4152
// Produces one entry per generated regex. This may be:
4253
// - Diagnostic in the case of a failure that should end the compilation
4354
// - (RegexMethod regexMethod, string runnerFactoryImplementation, Dictionary<string, string[]> requiredHelpers) in the case of valid regex
4455
// - (RegexMethod regexMethod, string reason, Diagnostic diagnostic) in the case of a limited-support regex
45-
IncrementalValueProvider<ImmutableArray<object>> codeOrDiagnostics =
56+
IncrementalValueProvider<ImmutableArray<object>> results =
4657
context.SyntaxProvider
4758

4859
// Find all MethodDeclarationSyntax nodes attributed with GeneratedRegex and gather the required information.
@@ -52,9 +63,13 @@ public void Initialize(IncrementalGeneratorInitializationContext context)
5263
GetSemanticTargetForGeneration)
5364
.Where(static m => m is not null)
5465

66+
// Incorporate the compilation data, as it impacts code generation.
67+
.Combine(compilationDataProvider)
68+
5569
// Generate the RunnerFactory for each regex, if possible. This is where the bulk of the implementation occurs.
56-
.Select((state, _) =>
70+
.Select((methodOrDiagnosticAndCompilationData, _) =>
5771
{
72+
object? state = methodOrDiagnosticAndCompilationData.Left;
5873
if (state is not RegexMethod regexMethod)
5974
{
6075
Debug.Assert(state is Diagnostic);
@@ -74,26 +89,16 @@ public void Initialize(IncrementalGeneratorInitializationContext context)
7489
var writer = new IndentedTextWriter(sw);
7590
writer.Indent += 2;
7691
writer.WriteLine();
77-
EmitRegexDerivedTypeRunnerFactory(writer, regexMethod, requiredHelpers);
92+
EmitRegexDerivedTypeRunnerFactory(writer, regexMethod, requiredHelpers, methodOrDiagnosticAndCompilationData.Right.CheckOverflow);
7893
writer.Indent -= 2;
79-
return (regexMethod, sw.ToString(), requiredHelpers);
94+
return (regexMethod, sw.ToString(), requiredHelpers, methodOrDiagnosticAndCompilationData.Right);
8095
})
8196
.Collect();
8297

83-
// To avoid invalidating every regex's output when anything from the compilation changes,
84-
// we extract from it the only things we care about: whether unsafe code is allowed,
85-
// and a name based on the assembly's name, and only that information is then fed into
86-
// RegisterSourceOutput along with all of the cached generated data from each regex.
87-
IncrementalValueProvider<(bool AllowUnsafe, string? AssemblyName)> compilationDataProvider =
88-
context.CompilationProvider
89-
.Select((x, _) => (x.Options is CSharpCompilationOptions { AllowUnsafe: true }, x.AssemblyName));
90-
9198
// When there something to output, take all the generated strings and concatenate them to output,
9299
// and raise all of the created diagnostics.
93-
context.RegisterSourceOutput(codeOrDiagnostics.Combine(compilationDataProvider), static (context, compilationDataAndResults) =>
100+
context.RegisterSourceOutput(results, static (context, results) =>
94101
{
95-
ImmutableArray<object> results = compilationDataAndResults.Left;
96-
97102
// Report any top-level diagnostics.
98103
bool allFailures = true;
99104
foreach (object result in results)
@@ -150,7 +155,7 @@ public void Initialize(IncrementalGeneratorInitializationContext context)
150155
context.ReportDiagnostic(limitedSupportResult.Item3);
151156
regexMethod = limitedSupportResult.Item1;
152157
}
153-
else if (result is ValueTuple<RegexMethod, string, Dictionary<string, string[]>> regexImpl)
158+
else if (result is ValueTuple<RegexMethod, string, Dictionary<string, string[]>, CompilationData> regexImpl)
154159
{
155160
foreach (KeyValuePair<string, string[]> helper in regexImpl.Item3)
156161
{
@@ -214,11 +219,11 @@ public void Initialize(IncrementalGeneratorInitializationContext context)
214219
writer.WriteLine();
215220
}
216221
}
217-
else if (result is ValueTuple<RegexMethod, string, Dictionary<string, string[]>> regexImpl)
222+
else if (result is ValueTuple<RegexMethod, string, Dictionary<string, string[]>, CompilationData> regexImpl)
218223
{
219224
if (!regexImpl.Item1.IsDuplicate)
220225
{
221-
EmitRegexDerivedImplementation(writer, regexImpl.Item1, regexImpl.Item2, compilationDataAndResults.Right.AllowUnsafe);
226+
EmitRegexDerivedImplementation(writer, regexImpl.Item1, regexImpl.Item2, regexImpl.Item4.AllowUnsafe);
222227
writer.WriteLine();
223228
}
224229
}

0 commit comments

Comments
 (0)