Skip to content

Commit c9f9697

Browse files
authored
Add parser tests from nim-regex (#62093)
* new parser tests * baseline * Nim tests * typos * positive cases * new parser tests * change to \u
1 parent eff6546 commit c9f9697

File tree

4 files changed

+166
-7
lines changed

4 files changed

+166
-7
lines changed

src/libraries/System.Text.RegularExpressions/tests/RegexParserTests.cs

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -714,18 +714,40 @@ private static void Parse(string pattern, RegexOptions options, RegexParseError?
714714
if (error != null)
715715
{
716716
Assert.InRange(offset, 0, int.MaxValue);
717-
Throws(error.Value, offset, () => new Regex(pattern, options));
717+
Throws(pattern, options, error.Value, offset, () => new Regex(pattern, options));
718718
return;
719719
}
720720

721721
Assert.Equal(-1, offset);
722+
LogActual(pattern, options, RegexParseError.Unknown, -1);
722723

723724
// Nothing to assert here without having access to internals.
724725
new Regex(pattern, options); // Does not throw
725726

726727
ParsePatternFragments(pattern, options);
727728
}
728729

730+
private static void LogActual(string pattern, RegexOptions options, RegexParseError error, int offset)
731+
{
732+
// To conveniently add new interesting patterns to these tests, add them to the code in the format:
733+
//
734+
// [InlineData("SOMEREGEX1", RegexOptions.None, null)]
735+
// [InlineData("SOMEREGEX2", RegexOptions.None, null)]
736+
// ...
737+
//
738+
// then uncomment the lines below, and the correct baseline will be written to the file, eg
739+
//
740+
// [InlineData(@"SOMEREGEX1", RegexOptions.None, RegexParseError.UnrecognizedEscape, 3)]
741+
// [InlineData(@"SOMEREGEX2", RegexOptions.None, InsufficientClosingParentheses, 2)]
742+
// ...
743+
//
744+
//string s = (error == RegexParseError.Unknown) ?
745+
// @$" [InlineData(@""{pattern}"", RegexOptions.{options.ToString()}, null)]" :
746+
// @$" [InlineData(@""{pattern}"", RegexOptions.{options.ToString()}, RegexParseError.{error.ToString()}, {offset})]";
747+
748+
// File.AppendAllText(@"/tmp/out.cs", s + "\n");
749+
}
750+
729751
private static void ParsePatternFragments(string pattern, RegexOptions options)
730752
{
731753
// Trim the input in various places and parse.
@@ -755,7 +777,7 @@ private static void ParsePatternFragments(string pattern, RegexOptions options)
755777
/// </summary>
756778
/// <param name="error">The expected parse error</param>
757779
/// <param name="action">The action to invoke.</param>
758-
static partial void Throws(RegexParseError error, int offset, Action action);
780+
static partial void Throws(string pattern, RegexOptions options, RegexParseError error, int offset, Action action);
759781

760782
/// <summary>
761783
/// Checks that action succeeds or throws either a RegexParseException or an ArgumentException depending on the

src/libraries/System.Text.RegularExpressions/tests/RegexParserTests.netcoreapp.cs

Lines changed: 115 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ public partial class RegexParserTests
110110
[InlineData(@"(?P<a>.)(?P<a>.)", RegexOptions.None, RegexParseError.InvalidGroupingConstruct, 3)]
111111
[InlineData(@"[a-\A]", RegexOptions.None, RegexParseError.UnrecognizedEscape, 5)]
112112
[InlineData(@"[a-\z]", RegexOptions.None, RegexParseError.UnrecognizedEscape, 5)]
113-
[InlineData(@"[a-\b]", RegexOptions.None, RegexParseError.ReversedCharacterRange, 5)]
113+
[InlineData(@"[a-\b]", RegexOptions.None, RegexParseError.ReversedCharacterRange, 5)] // Nim: not an error
114114
[InlineData(@"[a-\-]", RegexOptions.None, RegexParseError.ReversedCharacterRange, 5)]
115115
[InlineData(@"[a-\-b]", RegexOptions.None, RegexParseError.ReversedCharacterRange, 5)]
116116
[InlineData(@"[a-\-\-b]", RegexOptions.None, RegexParseError.ReversedCharacterRange, 5)]
@@ -127,6 +127,115 @@ public partial class RegexParserTests
127127
[InlineData(@"[a-[:lower:]]", RegexOptions.None, null)] // errors in rust: range_end_no_class
128128
// End of Rust parser tests ==============
129129

130+
// Following are borrowed from Nim tests
131+
// https://github.com/nitely/nim-regex/blob/eeefb4f51264ff3bc3b36caf55672a74f52f5ef5/tests/tests.nim
132+
[InlineData(@"?", RegexOptions.None, RegexParseError.QuantifierAfterNothing, 1)]
133+
[InlineData(@"?|?", RegexOptions.None, RegexParseError.QuantifierAfterNothing, 1)]
134+
[InlineData(@"?abc", RegexOptions.None, RegexParseError.QuantifierAfterNothing, 1)]
135+
[InlineData(@"(?P<abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_>abc", RegexOptions.None, RegexParseError.InvalidGroupingConstruct, 3)] // Nim: not an error
136+
[InlineData(@"(?Pabc", RegexOptions.None, RegexParseError.InvalidGroupingConstruct, 3)]
137+
[InlineData(@"(?u-q)", RegexOptions.None, RegexParseError.InvalidGroupingConstruct, 3)]
138+
[InlineData(@"(?uq)", RegexOptions.None, RegexParseError.InvalidGroupingConstruct, 3)]
139+
[InlineData(@"(\b)", RegexOptions.None, null)]
140+
[InlineData(@"(+)", RegexOptions.None, RegexParseError.QuantifierAfterNothing, 2)]
141+
[InlineData(@"(a)b)", RegexOptions.None, RegexParseError.InsufficientOpeningParentheses, 5)]
142+
[InlineData(@"(b(a)", RegexOptions.None, RegexParseError.InsufficientClosingParentheses, 5)]
143+
[InlineData(@"[-", RegexOptions.None, RegexParseError.UnterminatedBracket, 2)]
144+
[InlineData(@"[-a", RegexOptions.None, RegexParseError.UnterminatedBracket, 3)]
145+
[InlineData(@"[[:abc:]]", RegexOptions.None, null)] // Nim: "Invalid ascii set. `abc` is not a valid name"
146+
[InlineData(@"[[:alnum:", RegexOptions.None, RegexParseError.UnterminatedBracket, 9)]
147+
[InlineData(@"[[:alnum]]", RegexOptions.None, null)] // Nim: "Invalid ascii set. Expected [:name:]"
148+
[InlineData(@"[]", RegexOptions.None, RegexParseError.UnterminatedBracket, 2)]
149+
[InlineData(@"[]a", RegexOptions.None, RegexParseError.UnterminatedBracket, 3)]
150+
[InlineData(@"[]abc", RegexOptions.None, RegexParseError.UnterminatedBracket, 5)]
151+
[InlineData(@"[\\", RegexOptions.None, RegexParseError.UnterminatedBracket, 3)]
152+
[InlineData(@"[^]", RegexOptions.None, RegexParseError.UnterminatedBracket, 3)]
153+
[InlineData(@"[a-", RegexOptions.None, RegexParseError.UnterminatedBracket, 3)]
154+
[InlineData(@"[a-\w]", RegexOptions.None, RegexParseError.ShorthandClassInCharacterRange, 5)]
155+
[InlineData(@"[a", RegexOptions.None, RegexParseError.UnterminatedBracket, 2)]
156+
[InlineData(@"[abc", RegexOptions.None, RegexParseError.UnterminatedBracket, 4)]
157+
[InlineData(@"[d-c]", RegexOptions.None, RegexParseError.ReversedCharacterRange, 4)]
158+
[InlineData(@"[z-[:alnum:]]", RegexOptions.None, null)] // Nim: "Invalid set range. Start must be lesser than end"
159+
[InlineData(@"{10}", RegexOptions.None, RegexParseError.QuantifierAfterNothing, 1)]
160+
[InlineData(@"*abc", RegexOptions.None, RegexParseError.QuantifierAfterNothing, 1)]
161+
[InlineData(@"\12", RegexOptions.None, null)] // Nim: "Invalid octal literal. Expected 3 octal digits, but found 2"
162+
[InlineData(@"\12@", RegexOptions.None, null)] // Nim: "Invalid octal literal. Expected octal digit, but found @"
163+
[InlineData(@"\b?", RegexOptions.None, null)]
164+
[InlineData(@"\b*", RegexOptions.None, null)]
165+
[InlineData(@"\b+", RegexOptions.None, null)]
166+
[InlineData(@"\p{11", RegexOptions.None, RegexParseError.InvalidUnicodePropertyEscape, 5)]
167+
[InlineData(@"\p{11}", RegexOptions.None, RegexParseError.UnrecognizedUnicodeProperty, 6)]
168+
[InlineData(@"\p{Bb}", RegexOptions.None, RegexParseError.UnrecognizedUnicodeProperty, 6)]
169+
[InlineData(@"\p11", RegexOptions.None, RegexParseError.InvalidUnicodePropertyEscape, 2)]
170+
[InlineData(@"\pB", RegexOptions.None, RegexParseError.InvalidUnicodePropertyEscape, 2)]
171+
[InlineData(@"\u123", RegexOptions.None, RegexParseError.InsufficientOrInvalidHexDigits, 2)]
172+
[InlineData(@"\U123", RegexOptions.None, RegexParseError.UnrecognizedEscape, 2)]
173+
[InlineData(@"\U123@a", RegexOptions.None, RegexParseError.UnrecognizedEscape, 2)]
174+
[InlineData(@"\u123@abc", RegexOptions.None, RegexParseError.InsufficientOrInvalidHexDigits, 6)]
175+
[InlineData(@"\UFFFFFFFF", RegexOptions.None, RegexParseError.UnrecognizedEscape, 2)]
176+
[InlineData(@"\x{00000000A}", RegexOptions.None, RegexParseError.InsufficientOrInvalidHexDigits, 3)]
177+
[InlineData(@"\x{2f894", RegexOptions.None, RegexParseError.InsufficientOrInvalidHexDigits, 3)]
178+
[InlineData(@"\x{61@}", RegexOptions.None, RegexParseError.InsufficientOrInvalidHexDigits, 3)]
179+
[InlineData(@"\x{7fffffff}", RegexOptions.None, RegexParseError.InsufficientOrInvalidHexDigits, 3)] // Nim: not an error (supports Unicode beyond basic multilingual plane)
180+
[InlineData(@"\x{FFFFFFFF}", RegexOptions.None, RegexParseError.InsufficientOrInvalidHexDigits, 3)]
181+
[InlineData(@"+", RegexOptions.None, RegexParseError.QuantifierAfterNothing, 1)]
182+
[InlineData(@"+abc", RegexOptions.None, RegexParseError.QuantifierAfterNothing, 1)]
183+
[InlineData(@"a???", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 4)]
184+
[InlineData(@"a??*", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 4)]
185+
[InlineData(@"a??+", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 4)]
186+
[InlineData(@"a?*", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 3)]
187+
[InlineData(@"a?+", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 3)]
188+
[InlineData(@"a(?P<>abc)", RegexOptions.None, RegexParseError.InvalidGroupingConstruct, 4)]
189+
[InlineData(@"a(?P<asd)", RegexOptions.None, RegexParseError.InvalidGroupingConstruct, 4)]
190+
[InlineData(@"a{,}", RegexOptions.None, null)] // Nim error
191+
[InlineData(@"a{,1}", RegexOptions.None, null)] // Nim error
192+
[InlineData(@"a{0,101}", RegexOptions.None, null)] // Nim error: "Invalid repetition range. Expected 100 repetitions or less, but found: 101"
193+
[InlineData(@"a{0,a}", RegexOptions.None, null)] // Nim error
194+
[InlineData(@"a{0,bad}", RegexOptions.None, null)] // Nim error: "Invalid repetition range. Range can only contain digits"
195+
[InlineData(@"a{1,,,2}", RegexOptions.None, null)] // Nim error
196+
[InlineData(@"a{1,,}", RegexOptions.None, null)] // Nim error
197+
[InlineData(@"a{1,,2}", RegexOptions.None, null)] // Nim error
198+
[InlineData(@"a{1,", RegexOptions.None, null)] // Nim error
199+
[InlineData(@"a{1,}??", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 7)]
200+
[InlineData(@"a{1,}?", RegexOptions.None, null)]
201+
[InlineData(@"a{1,}*", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 6)]
202+
[InlineData(@"a{1,}+", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 6)]
203+
[InlineData(@"a{1,101}", RegexOptions.None, null)]
204+
[InlineData(@"a{1,x}", RegexOptions.None, null)] // Nim error
205+
[InlineData(@"a{1", RegexOptions.None, null)] // Nim error
206+
[InlineData(@"a{1}??", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 6)]
207+
[InlineData(@"a{1}?", RegexOptions.None, null)]
208+
[InlineData(@"a{1}*", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 5)]
209+
[InlineData(@"a{1}+", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 5)]
210+
[InlineData(@"a{1111111111}", RegexOptions.None, null)] // Nim error: "Invalid repetition range. Max value is 32767."
211+
[InlineData(@"a{1x}", RegexOptions.None, null)] // Nim error
212+
[InlineData(@"a*??", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 4)]
213+
[InlineData(@"a*{,}", RegexOptions.None, null)] // Nim error
214+
[InlineData(@"a*{0}", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 3)]
215+
[InlineData(@"a*{1}", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 3)]
216+
[InlineData(@"a**", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 3)]
217+
[InlineData(@"a*****", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 3)]
218+
[InlineData(@"a*+", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 3)]
219+
[InlineData(@"a+??", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 4)]
220+
[InlineData(@"a+*", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 3)]
221+
[InlineData(@"a++", RegexOptions.None, RegexParseError.NestedQuantifiersNotParenthesized, 3)]
222+
[InlineData(@"a|?", RegexOptions.None, RegexParseError.QuantifierAfterNothing, 3)]
223+
[InlineData(@"a|?b", RegexOptions.None, RegexParseError.QuantifierAfterNothing, 3)]
224+
[InlineData(@"a|*", RegexOptions.None, RegexParseError.QuantifierAfterNothing, 3)]
225+
[InlineData(@"a|*b", RegexOptions.None, RegexParseError.QuantifierAfterNothing, 3)]
226+
[InlineData(@"a|+", RegexOptions.None, RegexParseError.QuantifierAfterNothing, 3)]
227+
[InlineData(@"a|+b", RegexOptions.None, RegexParseError.QuantifierAfterNothing, 3)]
228+
[InlineData(@"aaa(?Pabc", RegexOptions.None, RegexParseError.InvalidGroupingConstruct, 6)]
229+
[InlineData(@"abc(?P<abc", RegexOptions.None, RegexParseError.InvalidGroupingConstruct, 6)]
230+
[InlineData(@"abc(?Pabc)", RegexOptions.None, RegexParseError.InvalidGroupingConstruct, 6)]
231+
[InlineData(@"abc(?q)", RegexOptions.None, RegexParseError.InvalidGroupingConstruct, 6)]
232+
[InlineData(@"abc[]", RegexOptions.None, RegexParseError.UnterminatedBracket, 5)]
233+
[InlineData(@"abc\A{10}", RegexOptions.None, null)] // Nim error: "Invalid repetition range, either char, shorthand (i.e: \\w), group, or set expected before repetition range"
234+
[InlineData(@"\uD87E\uDC94(?Pabc", RegexOptions.None, RegexParseError.InvalidGroupingConstruct, 15)]
235+
[InlineData(@"\uD87E\uDC94aaa(?Pabc", RegexOptions.None, RegexParseError.InvalidGroupingConstruct, 18)]
236+
[InlineData(@"\uD87E\uDC94\uD87E\uDC94\uD87E\uDC94(?Pabc", RegexOptions.None, RegexParseError.InvalidGroupingConstruct, 39)]
237+
// End of Nim parser tests ==============
238+
130239
[SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework)]
131240
public void Parse_Netcoreapp(string pattern, RegexOptions options, RegexParseError? error, int offset = -1)
132241
{
@@ -157,7 +266,7 @@ public void RegexParseException_Serializes()
157266
/// </summary>
158267
/// <param name="error">The expected parse error</param>
159268
/// <param name="action">The action to invoke.</param>
160-
static partial void Throws(RegexParseError error, int offset, Action action)
269+
static partial void Throws(string pattern, RegexOptions options, RegexParseError error, int offset, Action action)
161270
{
162271
try
163272
{
@@ -171,16 +280,19 @@ static partial void Throws(RegexParseError error, int offset, Action action)
171280
if (error == regexParseError)
172281
{
173282
Assert.Equal(offset, e.Offset);
283+
LogActual(pattern, options, regexParseError, e.Offset);
174284
return;
175285
}
176286

287+
LogActual(pattern, options, regexParseError, e.Offset);
177288
throw new XunitException($"Expected RegexParseException with error {error} offset {offset} -> Actual error: {regexParseError} offset {e.Offset})");
178289
}
179290
catch (Exception e)
180291
{
181-
throw new XunitException($"Expected RegexParseException -> Actual: ({e})");
292+
throw new XunitException($"Expected RegexParseException for pattern '{pattern}' -> Actual: ({e})");
182293
}
183294

295+
LogActual(pattern, options, RegexParseError.Unknown, -1);
184296
throw new XunitException($"Expected RegexParseException with error: ({error}) -> Actual: No exception thrown");
185297
}
186298

src/libraries/System.Text.RegularExpressions/tests/RegexParserTests.netfx.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ public partial class RegexParserTests
1616
/// </summary>
1717
/// <param name="error">The expected parse error</param>
1818
/// <param name="action">The action to invoke.</param>
19-
static partial void Throws(RegexParseError error, int offset, Action action)
19+
static partial void Throws(string pattern, RegexOptions options, RegexParseError error, int offset, Action action)
2020
{
2121
try
2222
{

src/libraries/System.Text.RegularExpressions/tests/THIRD-PARTY-NOTICES.TXT

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,29 @@ SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
3535
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
3636
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
3737
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
38-
DEALINGS IN THE SOFTWARE.
38+
DEALINGS IN THE SOFTWARE.
39+
40+
License notice for https://github.com/nitely/nim-regex
41+
-------------------------------
42+
43+
MIT License
44+
45+
Copyright (c) 2017 Esteban Castro Borsani
46+
47+
Permission is hereby granted, free of charge, to any person obtaining a copy
48+
of this software and associated documentation files (the "Software"), to deal
49+
in the Software without restriction, including without limitation the rights
50+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
51+
copies of the Software, and to permit persons to whom the Software is
52+
furnished to do so, subject to the following conditions:
53+
54+
The above copyright notice and this permission notice shall be included in all
55+
copies or substantial portions of the Software.
56+
57+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
58+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
59+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
60+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
61+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
62+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
63+
SOFTWARE.

0 commit comments

Comments
 (0)