AntonLydike · AntonLydike · Jul 12, 2024 · Jul 12, 2024 · Jul 12, 2024
diff --git a/filecheck/compiler.py b/filecheck/compiler.py
@@ -75,5 +75,7 @@ def compile_uops(
         elif isinstance(uop, NumSubst):
             # we don't do numerical substitutions yet
             raise NotImplementedError("Numerical substitutions not supported!")
-
-    return re.compile("".join(expr)), captures
+    try:
+        return re.compile("".join(expr)), captures
+    except re.error:
+        raise CheckError(f"Malformed regex expression: '{''.join(expr)}'", check)
diff --git a/filecheck/regex.py b/filecheck/regex.py
@@ -16,11 +16,28 @@
     "word": r"\w+",
 }
 
+NEGATED_SET_WITHOUT_NEWLINES = re.compile(r"([^\\]|^)\[\^((?!\\n))")
+
 
 def posix_to_python_regex(expr: str) -> str:
     """
     We need to translate things like `[:alpha:]` to `[A-Za-z]`, etc.
 
+    This also takes care of a little known fact about the llvm::Regex implementation:
+
+    ```
+    enum llvm::Regex::RegexFlags::Newline = 2U
+
+    Compile for newline-sensitive matching. With this flag '[^' bracket
+    expressions and '.' never match newline. A ^ anchor matches the
+    null string after any newline in the string in addition to its normal
+    function, and the $ anchor matches the null string before any
+    newline in the string in addition to its normal function.
+    ```
+
+    This bad boy is enabled in all FileCheck cases, meaning we need to also add `\n` to all
+    negative bracket expressions, otherwise we'll eat *so* many newlines.
+
     LLVM supports them, but pythons regex doesn't.
     """
     while (match := POSIX_REGEXP_PATTERN.search(expr)) is not None:
@@ -29,6 +46,9 @@ def posix_to_python_regex(expr: str) -> str:
                 f"Can't translate posix regex, unknown character set: {match.group(1)}"
             )
         expr = expr.replace(match.group(0), POSIX_REGEXP_REPLACEMENTS[match.group(1)])
+
+    expr = NEGATED_SET_WITHOUT_NEWLINES.sub(r"\1[^\\n\2", expr)
+
     return expr
 
 

diff --git a/tests/filecheck/regex.test b/tests/filecheck/regex.test
@@ -5,3 +5,9 @@ sample text with a number: 144
 
 sample text with another number: 12*12
 // CHECK: sample text with another number: {{([:digit:]{2})}}*{{([:digit:]{2})}}
+
+// make sure that negative capturing groups don't capture newlines
+test 123
+// CHECK: test [[VAR:[^ ,]+]]
+test 123, 123
+// CHECK: test [[VAR]], [[VAR]]