Skip to content

Commit ec31d60

Browse files
authored
Fix auto-atomicity for loops around required one/notone/set loops (#56735)
Given an expression like `(?:a{2,3}){2}`, we currently erroneously convert that to `(?>a{2,3}){2}`, i.e. making the inner loop atomic, even though that then causes this to fail to match `aaaa` when it should match it (it fails because the first iteration will atomically match `aaa` and thus won't given any back, and then the next iteration will fail to match at least two `a`s). The simple fix is in FindLastExpressionInLoopForAutoAtomic, removing the special-case that tries to make the body of a loop that's a one/notone/set loop atomic. There are likely special-cases of this special-case that are still valid, but I'm not convinced it's worth trying to maintain and risk other gaps.
1 parent 822d877 commit ec31d60

File tree

3 files changed

+14
-12
lines changed

3 files changed

+14
-12
lines changed

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1398,12 +1398,6 @@ static void ProcessNode(RegexNode node, RegexNode subsequent, uint maxDepth)
13981398
node = node.Child(0);
13991399
}
14001400

1401-
// If the loop's body terminates with a {one/notone/set} loop, return it.
1402-
if (node.Type == Oneloop || node.Type == Notoneloop || node.Type == Setloop)
1403-
{
1404-
return node;
1405-
}
1406-
14071401
// If the loop's body is a concatenate, we can skip to its last child iff that
14081402
// last child doesn't conflict with the first child, since this whole concatenation
14091403
// could be repeated, such that the first node ends up following the last. For

src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,12 @@ public static IEnumerable<object[]> Match_Basic_TestData()
128128
yield return new object[] { @"(?>\w+)(?<!a)", "aa", RegexOptions.None, 0, 2, false, string.Empty };
129129
yield return new object[] { @".+a", "baa", RegexOptions.None, 0, 3, true, "baa" };
130130
yield return new object[] { @"[ab]+a", "cacbaac", RegexOptions.None, 0, 7, true, "baa" };
131+
yield return new object[] { @"^(\d{2,3}){2}$", "1234", RegexOptions.None, 0, 4, true, "1234" };
132+
yield return new object[] { @"(\d{2,3}){2}", "1234", RegexOptions.None, 0, 4, true, "1234" };
133+
yield return new object[] { @"((\d{2,3})){2}", "1234", RegexOptions.None, 0, 4, true, "1234" };
134+
yield return new object[] { @"(\d{2,3})+", "1234", RegexOptions.None, 0, 4, true, "123" };
135+
yield return new object[] { @"(\d{2,3})*", "123456", RegexOptions.None, 0, 4, true, "123" };
136+
yield return new object[] { @"(abc\d{2,3}){2}", "abc123abc4567", RegexOptions.None, 0, 12, true, "abc123abc456" };
131137
foreach (RegexOptions lineOption in new[] { RegexOptions.None, RegexOptions.Singleline, RegexOptions.Multiline })
132138
{
133139
yield return new object[] { @".*", "abc", lineOption, 1, 2, true, "bc" };
@@ -1213,10 +1219,10 @@ public void IsMatch_Invalid()
12131219

12141220
[SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework)] // take too long due to backtracking
12151221
[Theory]
1216-
[InlineData(@"(\w*)+\.", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", false)]
1217-
[InlineData(@"(a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", false)]
1218-
[InlineData(@"(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", false)]
1219-
public void IsMatch_SucceedQuicklyDueToAutoAtomicity(string regex, string input, bool expected)
1222+
[InlineData(@"(?:\w*)+\.", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", false)]
1223+
[InlineData(@"(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", false)]
1224+
[InlineData(@"(?:x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", false)]
1225+
public void IsMatch_SucceedQuicklyDueToLoopReduction(string regex, string input, bool expected)
12201226
{
12211227
Assert.Equal(expected, Regex.IsMatch(input, regex, RegexOptions.None));
12221228
Assert.Equal(expected, Regex.IsMatch(input, regex, RegexOptions.Compiled));

src/libraries/System.Text.RegularExpressions/tests/RegexReductionTests.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -379,8 +379,8 @@ private static int GetMinRequiredLength(Regex r)
379379
[InlineData("(?:a[ce]*|b*)c", "(?:a[ce]*|(?>b*))c")]
380380
[InlineData("apple|(?:orange|pear)|grape", "apple|orange|pear|grape")]
381381
[InlineData("(?>(?>(?>(?:abc)*)))", "(?:abc)*")]
382-
[InlineData("(w*)+", "((?>w*))+")]
383-
[InlineData("(w*)+\\.", "((?>w*))+\\.")]
382+
[InlineData("(?:w*)+", "(?>w*)+")]
383+
[InlineData("(?:w*)+\\.", "(?>w*)+\\.")]
384384
[InlineData("(a[bcd]e*)*fg", "(a[bcd](?>e*))*fg")]
385385
[InlineData("(\\w[bcd]\\s*)*fg", "(\\w[bcd](?>\\s*))*fg")]
386386
public void PatternsReduceIdentically(string pattern1, string pattern2)
@@ -457,6 +457,8 @@ public void PatternsReduceIdentically(string pattern1, string pattern2)
457457
[InlineData("[a-z]*[\x0000-\xFFFF]+", "(?>[a-z]*)[\x0000-\xFFFF]+")]
458458
[InlineData("[^a-c]*[e-g]", "(?>[^a-c]*)[e-g]")]
459459
[InlineData("[^a-c]*[^e-g]", "(?>[^a-c]*)[^e-g]")]
460+
[InlineData("(w+)+", "((?>w+))+")]
461+
[InlineData("(w{1,2})+", "((?>w{1,2}))+")]
460462
public void PatternsReduceDifferently(string pattern1, string pattern2)
461463
{
462464
var r1 = new Regex(pattern1);

0 commit comments

Comments
 (0)