Skip to content

Commit

Permalink
(FIX) harden against substring collision
Browse files Browse the repository at this point in the history
  • Loading branch information
rciric committed Aug 29, 2023
1 parent 39d7cf8 commit bb64404
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 8 deletions.
29 changes: 21 additions & 8 deletions src/gramform/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from abc import abstractmethod, abstractstaticmethod
from collections import defaultdict
from copy import deepcopy
from functools import partial
from hashlib import sha256
from typing import (
Any,
Expand Down Expand Up @@ -133,6 +134,10 @@ def substitute(
if end is None:
end = start + len(content)
if loc_type == 'index':
if (start > 0 and self.index[start] == self.index[start - 1]) or (
end < len(self) - 1 and self.index[end - 1] == self.index[end]
):
return self
start = self.index[start]
end = self.index[end]
if start == end:
Expand Down Expand Up @@ -798,23 +803,31 @@ def parse(
def verify_level(
self,
tree: SyntacticTree,
correct: bool = False,
) -> SyntacticTree:
if len(tree.children) != 0:
raise UnparsedTreeError(
f'Unparsed non-transform node {tree} '
f'(full version: {tree.materialise(recursive=True)}) '
f'has children: {[v for v in tree.children.values()]}. '
'All nodes must be either transforms or terminal (leaves).'
)
if correct:
tree.content = IndexedNestedString(
IndexedNestedString(tree.materialise(recursive=True)),
)
tree.children = {}
else:
raise UnparsedTreeError(
f'Unparsed non-transform node {tree} '
f'(full version: {tree.materialise(recursive=True)}) '
f'has children: {[v for v in tree.children.values()]}. '
'All nodes must be either transforms or terminal (leaves).'
)
return tree

def verify_parse(
self,
tree: SyntacticTree,
correct: bool = False,
) -> None:
Grammar.recur_depth_first(
tree=tree,
f=self.verify_level,
f=partial(self.verify_level, correct=correct),
skip_transform_roots=True,
)

Expand Down Expand Up @@ -927,7 +940,7 @@ def transform(
self,
tree: SyntacticTree,
) -> TransformTree:
self.verify_parse(tree)
self.verify_parse(tree, correct=True)
tree = self.transform_impl(tree)
return Grammar.annotate_leaf_count(tree)

Expand Down
1 change: 1 addition & 0 deletions src/gramform/tagops.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def __call__(self, *pparams, **params) -> Callable:

def return_selected(
arg: Any,
/,
**datatypes,
) -> Mapping[str, Any]:
keys = set(datatypes.keys())
Expand Down
16 changes: 16 additions & 0 deletions tests/test_tagops.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,24 @@ def tags():
}


def test_substr_compile():
grammar = DataTagGrammar()
f = grammar.compile('ab&b')
g = grammar.compile('b&ab')
assert(
set(f(tags(), **dataset()).keys()) ==
set(g(tags(), **dataset()).keys())
)

f = grammar.compile('ab&bcd&b&bcde&abcde&bc&abc&abcd')
assert(set(f(tags(), **dataset()).keys()) == {'b'})

f = grammar.compile('a|b&ab|abc|de&abcde')


def test_tags():
grammar = DataTagGrammar()

f = grammar.compile('~a&bcd')
assert(set(f(tags(), **dataset()).keys()) == {'b', 'c', 'd'})

Expand Down

0 comments on commit bb64404

Please sign in to comment.