Skip to content

Commit

Permalink
Verify that patterns are anchored (#499)
Browse files Browse the repository at this point in the history
As we transpile to some of the systems which always expect the regular
expressions to be anchored at the start and the end (*e.g.*, XSD
engines), we verify in the patch that all verification patterns are
appropriately anchored. This ensures that the patterns will also be
correctly transpiled down the line.
  • Loading branch information
mristin authored Jun 20, 2024
1 parent 3c69322 commit b6d0a60
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 37 deletions.
86 changes: 85 additions & 1 deletion aas_core_codegen/intermediate/_translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,10 @@
TranspilableVerification,
type_annotations_equal,
)
from aas_core_codegen.parse import tree as parse_tree
from aas_core_codegen.parse import (
retree as parse_retree,
tree as parse_tree,
)

# pylint: disable=unused-argument

Expand Down Expand Up @@ -4403,6 +4406,85 @@ def _verify_only_simple_type_patterns(symbol_table: SymbolTable) -> List[Error]:
return errors


def _verify_patterns_anchored_at_start_and_end(
symbol_table: SymbolTable,
) -> List[Error]:
"""
Check that the patterns are anchored at the start (``^``) and at the end (``$``).
We need to make sure that all pattern verification functions can be transpiled into
many kinds of regular expressions. Some regular expression engines support
only patterns anchored at both ends (*e.g.*, XSD and the transpilation of regular
expressions to instructions of a virtual machine). To ensure inter-operability,
we decide to fail-fast here to prevent that non-anchored patterns propagate
downstream, which we might probably detect only much later.
"""
errors = [] # type: List[Error]
for verification in symbol_table.verification_functions:
if isinstance(verification, PatternVerification):
regex, error = parse_retree.parse([verification.pattern])
if error is not None:
regex_line, pointer_line = parse_retree.render_pointer(error.cursor)

errors.append(
Error(
verification.parsed.node,
f"Failed to parse the pattern of "
f"the pattern verification function:\n"
f"{error.message}\n"
f"{regex_line}\n"
f"{pointer_line}",
)
)
continue

assert regex is not None

if (
len(regex.union.uniates) == 0
or len(regex.union.uniates[0].concatenants) == 0
):
errors.append(
Error(
verification.parsed.node,
f"The pattern is empty. We expect only non-empty patterns "
f"all anchored at the start (``^``) and at the end (``$``) "
f"for inter-operability with different regex engines, *e.g.*, "
f"XSD engines. The pattern in question inferred for "
f"the verification function {verification.name!r} "
f"was: {verification.pattern}",
)
)
continue

first_term = regex.union.uniates[0].concatenants[0]
last_term = regex.union.uniates[0].concatenants[-1]

if (
len(regex.union.uniates) != 1
or not isinstance(first_term.value, parse_retree.Symbol)
or not (first_term.value.kind is parse_retree.SymbolKind.START)
or not isinstance(last_term.value, parse_retree.Symbol)
or not (last_term.value.kind is parse_retree.SymbolKind.END)
):
errors.append(
Error(
verification.parsed.node,
f"(mristin, 2024-05-31): We expect all the patterns to be "
f"anchored at the start (``^``) and at the end (``$``) for "
f"inter-operability with different regex engines, *e.g.*, XSD "
f"engines. Please consider re-writing your pattern with "
f"a prefix ``^.*``, if you want to match an arbitrary prefix, "
f"and a suffix ``.*$``, if you want to match an arbitrary "
f"suffix. The pattern in question inferred for "
f"the verification function {verification.name!r} "
f"was: {verification.pattern}",
)
)

return errors


def _assert_interfaces_defined_correctly(
symbol_table: SymbolTable, ontology: _hierarchy.Ontology
) -> None:
Expand Down Expand Up @@ -4497,6 +4579,8 @@ def _verify(symbol_table: SymbolTable, ontology: _hierarchy.Ontology) -> List[Er

errors.extend(_verify_only_simple_type_patterns(symbol_table=symbol_table))

errors.extend(_verify_patterns_anchored_at_start_and_end(symbol_table=symbol_table))

if len(errors) > 0:
return errors

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public static class Verification
[CodeAnalysis.SuppressMessage("ReSharper", "StringLiteralTypo")]
private static Regex _constructMatchSomething()
{
var pattern = "^\\ud800\\udc00|something$";
var pattern = "^(\\ud800\\udc00|something)$";

return new Regex(pattern);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
@verification
def match_something(text: str) -> bool:
pattern = f"^\\U00010000|something$"
pattern = "^(\\U00010000|something)$"
return match(pattern, text) is not None


Expand Down
2 changes: 1 addition & 1 deletion tests/csharp/test_description.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ def test_only_summary(self) -> None:
@verification
def verify_something(text: str) -> bool:
"""Verify something."""
return match(r'.*', text) is not None
return match(r'^.*$', text) is not None
__version__ = "dummy"
__xml_namespace__ = "https://dummy.com"
Expand Down
46 changes: 23 additions & 23 deletions tests/infer_for_schema/test_patterns_on_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def test_single_pattern(self) -> None:
@verification
def matches_something(text: str) -> bool:
prefix = "something"
return match(f"{prefix}-[a-zA-Z]+", text) is not None
return match(f"^{prefix}-[a-zA-Z]+$", text) is not None
@invariant(
Expand Down Expand Up @@ -100,7 +100,7 @@ def __init__(self, some_property: str) -> None:
patterns_by_property={
'some_property': [
PatternConstraint(
pattern='something-[a-zA-Z]+')]},
pattern='^something-[a-zA-Z]+$')]},
set_of_primitives_by_property={},
set_of_enumeration_literals_by_property={})"""
),
Expand All @@ -112,11 +112,11 @@ def test_two_patterns(self) -> None:
"""\
@verification
def matches_something(text: str) -> bool:
return match("something-[a-zA-Z]+", text) is not None
return match("^something-[a-zA-Z]+$", text) is not None
@verification
def matches_acme(text: str) -> bool:
return match(".*acme.*", text) is not None
return match("^.*acme.*$", text) is not None
@invariant(
Expand Down Expand Up @@ -162,9 +162,9 @@ def __init__(self, some_property: str) -> None:
patterns_by_property={
'some_property': [
PatternConstraint(
pattern='something-[a-zA-Z]+'),
pattern='^something-[a-zA-Z]+$'),
PatternConstraint(
pattern='.*acme.*')]},
pattern='^.*acme.*$')]},
set_of_primitives_by_property={},
set_of_enumeration_literals_by_property={})"""
),
Expand All @@ -176,7 +176,7 @@ def test_conditioned_on_property(self) -> None:
"""\
@verification
def matches_something(text: str) -> bool:
return match("something-[a-zA-Z]+", text) is not None
return match("^something-[a-zA-Z]+$", text) is not None
@invariant(
lambda self:
Expand Down Expand Up @@ -224,7 +224,7 @@ def __init__(self, some_property: Optional[str] = None) -> None:
patterns_by_property={
'some_property': [
PatternConstraint(
pattern='something-[a-zA-Z]+')]},
pattern='^something-[a-zA-Z]+$')]},
set_of_primitives_by_property={},
set_of_enumeration_literals_by_property={})"""
),
Expand All @@ -236,11 +236,11 @@ def test_no_inheritance_by_default(self) -> None:
"""\
@verification
def matches_something(text: str) -> bool:
return match("something-[a-zA-Z]+", text) is not None
return match("^something-[a-zA-Z]+$", text) is not None
@verification
def matches_acme(text: str) -> bool:
return match(".*acme.*", text) is not None
return match("^.*acme.*$", text) is not None
@invariant(
lambda self: matches_something(self.some_property),
Expand Down Expand Up @@ -298,7 +298,7 @@ def __init__(self, some_property: str) -> None:
patterns_by_property={
'some_property': [
PatternConstraint(
pattern='.*acme.*')]},
pattern='^.*acme.*$')]},
set_of_primitives_by_property={},
set_of_enumeration_literals_by_property={})"""
),
Expand All @@ -313,7 +313,7 @@ def test_no_inheritance_involved(self) -> None:
@verification
def matches_something(text: str) -> bool:
prefix = "something"
return match(f"{prefix}-[a-zA-Z]+", text) is not None
return match(f"^{prefix}-[a-zA-Z]+$", text) is not None
@invariant(
Expand Down Expand Up @@ -370,7 +370,7 @@ def __init__(self, some_property: str) -> None:
patterns_by_property={
'some_property': [
PatternConstraint(
pattern='something-[a-zA-Z]+')]},
pattern='^something-[a-zA-Z]+$')]},
set_of_primitives_by_property={},
set_of_enumeration_literals_by_property={})"""
),
Expand All @@ -383,7 +383,7 @@ def test_inheritance_from_parent_with_no_patterns_of_own(self) -> None:
@verification
def matches_something(text: str) -> bool:
prefix = "something"
return match(f"{prefix}-[a-zA-Z]+", text) is not None
return match(f"^{prefix}-[a-zA-Z]+$", text) is not None
@invariant(
Expand Down Expand Up @@ -440,7 +440,7 @@ def __init__(self, some_property: str) -> None:
patterns_by_property={
'some_property': [
PatternConstraint(
pattern='something-[a-zA-Z]+')]},
pattern='^something-[a-zA-Z]+$')]},
set_of_primitives_by_property={},
set_of_enumeration_literals_by_property={})"""
),
Expand All @@ -452,11 +452,11 @@ def test_merge_with_parent(self) -> None:
"""\
@verification
def matches_something(text: str) -> bool:
return match("something-[a-zA-Z]+", text) is not None
return match("^something-[a-zA-Z]+$", text) is not None
@verification
def matches_acme(text: str) -> bool:
return match(".*acme.*", text) is not None
return match("^.*acme.*$", text) is not None
@invariant(
lambda self: matches_something(self.some_property),
Expand Down Expand Up @@ -521,9 +521,9 @@ def __init__(self, some_property: str) -> None:
patterns_by_property={
'some_property': [
PatternConstraint(
pattern='.*acme.*'),
pattern='^.*acme.*$'),
PatternConstraint(
pattern='something-[a-zA-Z]+')]},
pattern='^something-[a-zA-Z]+$')]},
set_of_primitives_by_property={},
set_of_enumeration_literals_by_property={})"""
),
Expand All @@ -535,7 +535,7 @@ def test_merge_with_parent_and_grand_parent(self) -> None:
"""\
@verification
def matches_something(text: str) -> bool:
return match("something-[a-zA-Z]+", text) is not None
return match("^something-[a-zA-Z]+$", text) is not None
@invariant(
lambda self: matches_something(self.some_property),
Expand Down Expand Up @@ -599,7 +599,7 @@ def __init__(self, some_property: str) -> None:
patterns_by_property={
'some_property': [
PatternConstraint(
pattern='something-[a-zA-Z]+')]},
pattern='^something-[a-zA-Z]+$')]},
set_of_primitives_by_property={},
set_of_enumeration_literals_by_property={})"""
),
Expand All @@ -611,7 +611,7 @@ def test_merge_with_parent_over_constrained_primitive(self) -> None:
"""\
@verification
def matches_something(text: str) -> bool:
return match("something-[a-zA-Z]+", text) is not None
return match("^something-[a-zA-Z]+$", text) is not None
@invariant(
lambda self: matches_something(self),
Expand Down Expand Up @@ -674,7 +674,7 @@ def __init__(self, some_property: SomeConstrainedPrimitive) -> None:
patterns_by_property={
'some_property': [
PatternConstraint(
pattern='something-[a-zA-Z]+')]},
pattern='^something-[a-zA-Z]+$')]},
set_of_primitives_by_property={},
set_of_enumeration_literals_by_property={})"""
),
Expand Down
20 changes: 10 additions & 10 deletions tests/infer_for_schema/test_patterns_on_self.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def test_single_pattern(self) -> None:
@verification
def matches_something(text: str) -> bool:
prefix = "something"
return match(f"{prefix}-[a-zA-Z]+", text) is not None
return match(f"^{prefix}-[a-zA-Z]+$", text) is not None
@invariant(
Expand Down Expand Up @@ -106,7 +106,7 @@ def __init__(self, some_property: Some_constrained_primitive) -> None:
patterns_by_property={
'some_property': [
PatternConstraint(
pattern='something-[a-zA-Z]+')]},
pattern='^something-[a-zA-Z]+$')]},
set_of_primitives_by_property={},
set_of_enumeration_literals_by_property={})"""
),
Expand All @@ -118,11 +118,11 @@ def test_two_patterns(self) -> None:
"""\
@verification
def matches_something(text: str) -> bool:
return match("something-[a-zA-Z]+", text) is not None
return match("^something-[a-zA-Z]+$", text) is not None
@verification
def matches_acme(text: str) -> bool:
return match(".*acme.*", text) is not None
return match("^.*acme.*$", text) is not None
@invariant(
Expand Down Expand Up @@ -171,9 +171,9 @@ def __init__(self, some_property: Some_constrained_primitive) -> None:
patterns_by_property={
'some_property': [
PatternConstraint(
pattern='something-[a-zA-Z]+'),
pattern='^something-[a-zA-Z]+$'),
PatternConstraint(
pattern='.*acme.*')]},
pattern='^.*acme.*$')]},
set_of_primitives_by_property={},
set_of_enumeration_literals_by_property={})"""
),
Expand All @@ -185,11 +185,11 @@ def test_inheritance_between_constrained_primitives_by_default(self) -> None:
"""\
@verification
def matches_something(text: str) -> bool:
return match("something-[a-zA-Z]+", text) is not None
return match("^something-[a-zA-Z]+$", text) is not None
@verification
def matches_acme(text: str) -> bool:
return match(".*acme.*", text) is not None
return match("^.*acme.*$", text) is not None
@invariant(
lambda self: matches_something(self),
Expand Down Expand Up @@ -244,9 +244,9 @@ def __init__(self, some_property: Some_constrained_primitive) -> None:
patterns_by_property={
'some_property': [
PatternConstraint(
pattern='something-[a-zA-Z]+'),
pattern='^something-[a-zA-Z]+$'),
PatternConstraint(
pattern='.*acme.*')]},
pattern='^.*acme.*$')]},
set_of_primitives_by_property={},
set_of_enumeration_literals_by_property={})"""
),
Expand Down

0 comments on commit b6d0a60

Please sign in to comment.