Skip to content

Commit

Permalink
Merge pull request #16 from AntonLydike/anton/fix-capture-syntax
Browse files Browse the repository at this point in the history
regex: Syntax fixes for a bunch of stuff (fixes #4, #12)
  • Loading branch information
AntonLydike authored Jul 12, 2024
2 parents acaaae0 + d3d6b07 commit 716e954
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 14 deletions.
12 changes: 8 additions & 4 deletions filecheck/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def pattern_for_opts(opts: Options) -> tuple[re.Pattern[str], re.Pattern[str]]:
)


LINE_SPLIT_RE = split = re.compile(r"(\{\{|\[\[|]]|}})")
LINE_SPLIT_RE = split = re.compile(r"(\{\{|\[\[\$?[#a-zA-Z_]|]|})")


@dataclass
Expand Down Expand Up @@ -150,17 +150,21 @@ def parse_args(self, arg: str, line: str) -> list[UOp]:
offset = len(line) - len(arg)
while parts:
part = parts.pop(0)
if part == "[[":
if part.startswith("[["):
brackets = 2
# grab parts greedily until we hit a ]]
while not part.endswith("]]"):
while brackets > 0:
if not parts:
raise ParseError(
"Invalid substitution block, no ]]",
self.line_no,
offset,
line,
)
part += parts.pop(0)
addition = parts.pop(0)
brackets += addition.count("[") - addition.count("\\[")
brackets -= addition.count("]") + addition.count("\\]")
part += addition
# check if we are a simple capture pattern [[<name>:<regex>]]
if match := VAR_CAPTURE_PATTERN.fullmatch(part):
re_expr = posix_to_python_regex(match.group(2))
Expand Down
15 changes: 7 additions & 8 deletions filecheck/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,14 @@
r"\[:(alpha|upper|lower|digit|alnum|xdigit|space|blank|print|punct|graph|word|ascii|cntrl):]"
)
POSIX_REGEXP_REPLACEMENTS = {
"alpha": "[A-Za-z]",
"upper": "[A-Z]",
"lower": "[a-z]",
"digit": "[0-9]",
"alnum": "[A-Za-z0-9]",
"xdigit": "[A-Fa-f0-9]",
"alpha": "A-Za-z",
"upper": "A-Z",
"lower": "a-z",
"digit": "0-9",
"alnum": "A-Za-z0-9",
"xdigit": "A-Fa-f0-9",
"space": r"\s",
"blank": r"[ \t]",
"word": r"\w+",
"blank": r" \t",
}

NEGATED_SET_WITHOUT_NEWLINES = re.compile(r"([^\\]|^)\[\^((?!\\n))")
Expand Down
2 changes: 1 addition & 1 deletion tests/filecheck/regex.test
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ sample text with a number: 144
// CHECK: sample text with a number: {{\d+}}

sample text with another number: 12*12
// CHECK: sample text with another number: {{([:digit:]{2})}}*{{([:digit:]{2})}}
// CHECK: sample text with another number: {{([[:digit:]]{2})}}*{{([[:digit:]]{2})}}

// make sure that negative capturing groups don't capture newlines
test 123
Expand Down
9 changes: 8 additions & 1 deletion tests/filecheck/variables.test
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ print z

// check for posix style regex
same FF77 FF77
// CHECK: [[VAR:[:xdigit:]+]] [[VAR]]
// CHECK: [[VAR:[[:xdigit:]]+]] [[VAR]]

// numeric captures:
print 0xFF00FF00
Expand All @@ -42,3 +42,10 @@ commutativity states that (x * y) = (y * x)
// CHECK: ([[a:(\w+)(x?)]] * [[b:(\w+)(x?)]]) = ([[b]] * [[a]])
again: (x * y) = (y * x)
// CHECK: ([[a]] * [[b]]) = ([[b]] * [[a]])


// check that we parse ambigous syntax correctly:
test %arg1
// CHECK: test [[ARG:%[[:alnum:]]+]]
test [%arg1][0]
// CHECK-NEXT: test [[[ARG]]][0]

0 comments on commit 716e954

Please sign in to comment.