Skip to content

Commit

Permalink
Ensure no overlapping ranges in patterns (#501)
Browse files Browse the repository at this point in the history
The overlapping ranges in regular expressions can hurt performance, so
we explicitly check that no ranges overlap in the pattern verification
functions.
  • Loading branch information
mristin authored Jun 20, 2024
1 parent e374451 commit 1a50e13
Showing 1 changed file with 41 additions and 3 deletions.
44 changes: 41 additions & 3 deletions aas_core_codegen/parse/retree/_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
import io
import math
import re
from typing import Tuple, Optional, List, Sequence, Union
from typing import Tuple, Optional, List, Sequence, Union, MutableMapping

from icontract import invariant, require, ensure, snapshot

from aas_core_codegen.common import pairwise
from aas_core_codegen.parse.retree._types import (
Char,
Range,
Expand Down Expand Up @@ -113,6 +114,10 @@ def __init__(self, values: Sequence[Union[str, FormattedValue]]) -> None:
if isinstance(self.pointed_value(), str):
self._minor_cursor = 0

def copy(self) -> "Cursor":
"""Make a deep copy of the cursor."""
return Cursor(values=self.values)

@property
def major_cursor(self) -> int:
"""Return the current cursor in the :attr:`~values`."""
Expand Down Expand Up @@ -617,6 +622,8 @@ def _parse_ranges_and_closing(
if cursor.try_literal("-"):
ranges.append(Range(start=Char("-"), end=None))

cursor_by_range = dict() # type: MutableMapping[Range, Cursor]

while True:
if cursor.done():
return None, Error(
Expand All @@ -625,10 +632,14 @@ def _parse_ranges_and_closing(
cursor,
)

cursor_at_start = cursor.copy()

# NOTE (mristin, 2022-06-08):
# A suffix dash is also allowed and should be considered a single character.
if cursor.try_literal("-]"):
ranges.append(Range(start=Char("-"), end=None))
the_range = Range(start=Char("-"), end=None)
cursor_by_range[the_range] = cursor_at_start
ranges.append(the_range)
break
elif cursor.try_literal("]"):
break
Expand Down Expand Up @@ -656,7 +667,34 @@ def _parse_ranges_and_closing(
"Invalid character range, start is smaller than end", cursor
)

ranges.append(Range(start=start, end=end))
the_range = Range(start=start, end=end)
cursor_by_range[the_range] = cursor_at_start
ranges.append(the_range)

for this_range, next_range in pairwise(
sorted(ranges, key=lambda rng: ord(rng.start.character))
):
this_end = this_range.end if this_range.end is not None else this_range.start

next_start = next_range.start

if ord(this_end.character) >= ord(next_start.character):
this_range_str = (
f"{this_range.start.character!r}"
if this_range.end is None
else f"{this_range.start.character!r}-{this_range.end.character!r}"
)

next_range_str = (
f"{next_range.start.character!r}"
if next_range.end is None
else f"{next_range.start.character!r}-{next_range.end.character!r}"
)

return None, Error(
f"The range {this_range_str} and the range {next_range_str} overlap",
cursor_by_range[this_range],
)

return ranges, None

Expand Down

0 comments on commit 1a50e13

Please sign in to comment.