Skip to content

Commit d18ff87

Browse files
[release/7.0] Fix RegexOptions.NonBacktracking matching end anchors at timeout check boundaries (#75308)
* Test for exposing timeout check bug * Fix timeout check bug * Improve naming * Simplify test * Make test search range higher * Simplify test for timeout check bug Co-authored-by: Olli Saarikivi <[email protected]>
1 parent acf0dd2 commit d18ff87

File tree

2 files changed

+24
-8
lines changed

2 files changed

+24
-8
lines changed

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -448,13 +448,13 @@ private int FindEndPosition<TInputReader, TFindOptimizationsHandler, TNullabilit
448448
// still check the timeout now and again to provide some semblance of the behavior a developer experiences with
449449
// the backtracking engines. We can, however, choose a large number here, since it's not actually needed for security.
450450
const int CharsPerTimeoutCheck = 1_000;
451-
ReadOnlySpan<char> inputForInnerLoop = _checkTimeout && input.Length - pos > CharsPerTimeoutCheck ?
452-
input.Slice(0, pos + CharsPerTimeoutCheck) :
453-
input;
451+
int innerLoopLength = _checkTimeout && input.Length - pos > CharsPerTimeoutCheck ?
452+
pos + CharsPerTimeoutCheck :
453+
input.Length;
454454

455455
bool done = currentState.NfaState is not null ?
456-
FindEndPositionDeltas<NfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(inputForInnerLoop, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate) :
457-
FindEndPositionDeltas<DfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(inputForInnerLoop, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate);
456+
FindEndPositionDeltas<NfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(input, innerLoopLength, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate) :
457+
FindEndPositionDeltas<DfaStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(input, innerLoopLength, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate);
458458

459459
// If the inner loop indicates that the search finished (for example due to reaching a deadend state) or
460460
// there is no more input available, then the whole search is done.
@@ -466,7 +466,7 @@ private int FindEndPosition<TInputReader, TFindOptimizationsHandler, TNullabilit
466466
// The search did not finish, so we either failed to transition (which should only happen if we were in DFA mode and
467467
// need to switch over to NFA mode) or ran out of input in the inner loop. Check if the inner loop still had more
468468
// input available.
469-
if (pos < inputForInnerLoop.Length)
469+
if (pos < innerLoopLength)
470470
{
471471
// Because there was still more input available, a failure to transition in DFA mode must be the cause
472472
// of the early exit. Upgrade to NFA mode.
@@ -505,7 +505,7 @@ private int FindEndPosition<TInputReader, TFindOptimizationsHandler, TNullabilit
505505
/// 0 if iteration completed because we reached an initial state.
506506
/// A negative value if iteration completed because we ran out of input or we failed to transition.
507507
/// </returns>
508-
private bool FindEndPositionDeltas<TStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(ReadOnlySpan<char> input, RegexRunnerMode mode,
508+
private bool FindEndPositionDeltas<TStateHandler, TInputReader, TFindOptimizationsHandler, TNullabilityHandler>(ReadOnlySpan<char> input, int length, RegexRunnerMode mode,
509509
ref int posRef, ref CurrentState state, ref int endPosRef, ref int endStateIdRef, ref int initialStatePosRef, ref int initialStatePosCandidateRef)
510510
where TStateHandler : struct, IStateHandler
511511
where TInputReader : struct, IInputReader
@@ -561,7 +561,7 @@ private bool FindEndPositionDeltas<TStateHandler, TInputReader, TFindOptimizatio
561561
}
562562

563563
// If there is more input available try to transition with the next character.
564-
if (!IsMintermId(positionId) || !TStateHandler.TryTakeTransition(this, ref state, positionId))
564+
if (pos >= length || !TStateHandler.TryTakeTransition(this, ref state, positionId))
565565
{
566566
return false;
567567
}

src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1278,6 +1278,22 @@ public void Match_CachedPattern_NewTimeoutApplies(RegexOptions options)
12781278
Assert.InRange(sw.Elapsed.TotalSeconds, 0, 10); // arbitrary upper bound that should be well above what's needed with a 1ms timeout
12791279
}
12801280

1281+
[ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNetCore))]
1282+
public void NonBacktracking_NoEndAnchorMatchAtTimeoutCheck()
1283+
{
1284+
// This constant must be at least as large as the one in the implementation that sets the maximum number
1285+
// of innermost loop iterations between timeout checks.
1286+
const int CharsToTriggerTimeoutCheck = 10000;
1287+
// Check that it is indeed large enough to trigger timeouts. If this fails the constant above needs to be larger.
1288+
Assert.Throws<RegexMatchTimeoutException>(() => new Regex("a*", RegexHelpers.RegexOptionNonBacktracking, TimeSpan.FromTicks(1))
1289+
.Match(new string('a', CharsToTriggerTimeoutCheck)));
1290+
1291+
// The actual test: ^a*$ shouldn't match in a string ending in 'b'
1292+
Regex testPattern = new Regex("^a*$", RegexHelpers.RegexOptionNonBacktracking, TimeSpan.FromHours(1));
1293+
string input = string.Concat(new string('a', CharsToTriggerTimeoutCheck), 'b');
1294+
Assert.False(testPattern.IsMatch(input));
1295+
}
1296+
12811297
public static IEnumerable<object[]> Match_Advanced_TestData()
12821298
{
12831299
foreach (RegexEngine engine in RegexHelpers.AvailableEngines)

0 commit comments

Comments
 (0)