Skip to content

Commit bb64404

Browse files
committed
(FIX) harden against substring collision
1 parent 39d7cf8 commit bb64404

File tree

3 files changed

+38
-8
lines changed

3 files changed

+38
-8
lines changed

src/gramform/grammar.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from abc import abstractmethod, abstractstaticmethod
1515
from collections import defaultdict
1616
from copy import deepcopy
17+
from functools import partial
1718
from hashlib import sha256
1819
from typing import (
1920
Any,
@@ -133,6 +134,10 @@ def substitute(
133134
if end is None:
134135
end = start + len(content)
135136
if loc_type == 'index':
137+
if (start > 0 and self.index[start] == self.index[start - 1]) or (
138+
end < len(self) - 1 and self.index[end - 1] == self.index[end]
139+
):
140+
return self
136141
start = self.index[start]
137142
end = self.index[end]
138143
if start == end:
@@ -798,23 +803,31 @@ def parse(
798803
def verify_level(
799804
self,
800805
tree: SyntacticTree,
806+
correct: bool = False,
801807
) -> SyntacticTree:
802808
if len(tree.children) != 0:
803-
raise UnparsedTreeError(
804-
f'Unparsed non-transform node {tree} '
805-
f'(full version: {tree.materialise(recursive=True)}) '
806-
f'has children: {[v for v in tree.children.values()]}. '
807-
'All nodes must be either transforms or terminal (leaves).'
808-
)
809+
if correct:
810+
tree.content = IndexedNestedString(
811+
IndexedNestedString(tree.materialise(recursive=True)),
812+
)
813+
tree.children = {}
814+
else:
815+
raise UnparsedTreeError(
816+
f'Unparsed non-transform node {tree} '
817+
f'(full version: {tree.materialise(recursive=True)}) '
818+
f'has children: {[v for v in tree.children.values()]}. '
819+
'All nodes must be either transforms or terminal (leaves).'
820+
)
809821
return tree
810822

811823
def verify_parse(
812824
self,
813825
tree: SyntacticTree,
826+
correct: bool = False,
814827
) -> None:
815828
Grammar.recur_depth_first(
816829
tree=tree,
817-
f=self.verify_level,
830+
f=partial(self.verify_level, correct=correct),
818831
skip_transform_roots=True,
819832
)
820833

@@ -927,7 +940,7 @@ def transform(
927940
self,
928941
tree: SyntacticTree,
929942
) -> TransformTree:
930-
self.verify_parse(tree)
943+
self.verify_parse(tree, correct=True)
931944
tree = self.transform_impl(tree)
932945
return Grammar.annotate_leaf_count(tree)
933946

src/gramform/tagops.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ def __call__(self, *pparams, **params) -> Callable:
7878

7979
def return_selected(
8080
arg: Any,
81+
/,
8182
**datatypes,
8283
) -> Mapping[str, Any]:
8384
keys = set(datatypes.keys())

tests/test_tagops.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,24 @@ def tags():
3838
}
3939

4040

41+
def test_substr_compile():
42+
grammar = DataTagGrammar()
43+
f = grammar.compile('ab&b')
44+
g = grammar.compile('b&ab')
45+
assert(
46+
set(f(tags(), **dataset()).keys()) ==
47+
set(g(tags(), **dataset()).keys())
48+
)
49+
50+
f = grammar.compile('ab&bcd&b&bcde&abcde&bc&abc&abcd')
51+
assert(set(f(tags(), **dataset()).keys()) == {'b'})
52+
53+
f = grammar.compile('a|b&ab|abc|de&abcde')
54+
55+
4156
def test_tags():
4257
grammar = DataTagGrammar()
58+
4359
f = grammar.compile('~a&bcd')
4460
assert(set(f(tags(), **dataset()).keys()) == {'b', 'c', 'd'})
4561

0 commit comments

Comments
 (0)