Skip to content

Commit a10a5f6

Browse files
committed
add unicode test case, more test cases, fuzzer
1 parent d071327 commit a10a5f6

File tree

3 files changed

+182
-1
lines changed

3 files changed

+182
-1
lines changed

test/data/unicode_example.csv

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Jamés,Likes,Coffee
2+
Анна,Likes,Tea

test/fuzz.py

+141
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
import random
2+
from kgl.graph import KnowledgeGraph
3+
import os
4+
import lark
5+
from nltk.corpus import stopwords
6+
from nltk import download as nltk_download
7+
8+
print("Downloading stopwords...")
9+
nltk_download("stopwords")
10+
11+
print("Running tests...")
12+
13+
test_dir = os.path.dirname(os.path.abspath(__file__))
14+
15+
kg = KnowledgeGraph().load_from_csv(os.path.join(test_dir, "data", "example.csv"))
16+
17+
seeds = [
18+
"{ coffee -> is }",
19+
"{ coffee -> is -> coffee }",
20+
"{ tea -> type-of }",
21+
"{ James -> favourite-songs } + { Taylor -> favourite-songs }",
22+
"{ coffee } INTERSECTION { tea }",
23+
"{ coffee } - { tea }",
24+
"{ coffee -> is } - { tea -> is }",
25+
]
26+
27+
seed_templates = {
28+
# query structure, number of words to generate
29+
"single_query": ("{ %s }", 1),
30+
"single_query_with_two_word_clause": ("{ %s %s -> %s }", 3),
31+
"set_union": ("{ %s } + { %s }", 2),
32+
"set_intersection": ("{ %s } INTERSECTION { %s }", 2),
33+
"set_difference": ("{ %s } - { %s }", 2),
34+
}
35+
36+
supported_languages = stopwords.fileids()
37+
38+
character_ranges = {
39+
file_id: list(stopwords.words(file_id)) for file_id in supported_languages
40+
}
41+
character_ranges["unicode"] = [chr(i) for i in range(0x0000, 0x10FFFF)]
42+
character_ranges["numbers"] = [str(random.randint(1, 10_000_000)) for _ in range(1000)]
43+
character_ranges["long_numbers"] = [
44+
str(random.randint(10_000_000_000_000, 10_000_000_000_000_000)) for _ in range(1000)
45+
]
46+
47+
supported_languages.append("unicode")
48+
49+
CHANGE_RATE = 0.1
50+
ITERATIONS_PER_SEED = 100
51+
52+
53+
def change():
54+
return (
55+
random.choices(
56+
population=[["do not change"], ["change"]],
57+
weights=[1 - CHANGE_RATE, CHANGE_RATE],
58+
k=1,
59+
)[0][0]
60+
== "change"
61+
)
62+
63+
64+
def mutate(
65+
seed, characters_to_skip=["{", "}", "-", ">", "<"], character_range="unicode"
66+
):
67+
seed = list(seed)
68+
69+
for i in range(len(seed)):
70+
if change() and i not in characters_to_skip:
71+
seed[i] = random.choice(character_ranges[character_range])
72+
73+
return "".join(seed)
74+
75+
76+
def get_random_word_from_random_language():
77+
return random.choice(character_ranges[random.choice(supported_languages)])
78+
79+
80+
def generate_query_from_scratch(template, num_words_to_generate):
81+
return template % tuple(
82+
get_random_word_from_random_language() for _ in range(num_words_to_generate)
83+
)
84+
85+
86+
def execute_query(query):
87+
try:
88+
kg.evaluate(query)
89+
except (lark.exceptions.UnexpectedCharacters, ValueError):
90+
# In this case, the program has successfully detected an invalid input.
91+
return False
92+
except Exception as e:
93+
# In this case, an unknown error has been raised.
94+
return True
95+
96+
97+
def test_fuzzer():
98+
failed_tests = []
99+
100+
tests = []
101+
102+
tests.extend([mutate(seed) for seed in seeds for _ in range(ITERATIONS_PER_SEED)])
103+
tests.extend(
104+
[mutate(seed, []) for seed in seeds for _ in range(ITERATIONS_PER_SEED)]
105+
)
106+
107+
for character_range in character_ranges:
108+
tests.extend(
109+
[
110+
mutate(seed, [], character_range)
111+
for seed in seeds
112+
for _ in range(ITERATIONS_PER_SEED)
113+
]
114+
)
115+
116+
tests.extend(
117+
[
118+
generate_query_from_scratch(template, num_words)
119+
for template, num_words in seed_templates.values()
120+
for _ in range(ITERATIONS_PER_SEED)
121+
]
122+
)
123+
124+
test_count = len(tests)
125+
126+
for test in tests:
127+
if execute_query(test):
128+
failed_tests.append(test)
129+
if __name__ != "__main__":
130+
print(test)
131+
assert False
132+
133+
failed_tests_count = len(failed_tests)
134+
135+
print(
136+
f"Ran {test_count} tests with {failed_tests_count} failures ({(test_count - failed_tests_count) / test_count * 100}% success rate)"
137+
)
138+
139+
140+
if __name__ == "__main__":
141+
test_fuzzer()

test/test.py

+39-1
Original file line numberDiff line numberDiff line change
@@ -12,25 +12,41 @@ def kg():
1212
kg = KnowledgeGraph().load_from_csv(os.path.join(test_dir, "data", "example.csv"))
1313
return kg
1414

15+
@pytest.fixture
16+
def unicode_kg():
17+
from kgl import KnowledgeGraph
18+
19+
kg = KnowledgeGraph().load_from_csv(os.path.join(test_dir, "data", "unicode_example.csv"))
20+
return kg
21+
1522

1623
def test_evaluate(kg):
1724
assert kg.evaluate("{ James }")[0] == [{"Likes": ["Coffee"]}]
1825
assert kg.evaluate("{ James -> Likes }")[0] == [["Coffee"]]
1926
assert kg.evaluate("{ James <-> Coffee }")[0] == [["James", ("Coffee", "Likes")]]
2027

28+
def test_unicode_query(unicode_kg):
29+
assert unicode_kg.evaluate("{ Jamés }")[0] == [{"Likes": ["Coffee"]}]
30+
assert unicode_kg.evaluate("{ Анна -> Likes }")[0] == [["Tea"]]
31+
assert unicode_kg.evaluate("{ Анна <-> Tea }")[0] == [["Анна", ("Tea", "Likes")]]
32+
33+
2134
def test_returns_query_time(kg):
2235
_, time_taken = kg.evaluate("{ James }")
23-
36+
2437
assert time_taken > 0
2538

39+
2640
def test_evaluate_operations(kg):
2741
assert kg.evaluate("{ James -> Likes }#")[0] == 1
2842
assert kg.evaluate("{ James -> Likes }?")[0] == True
2943
assert kg.evaluate("{ James <-> Coffee }?")[0] == True
3044

45+
3146
def test_add_node_with_query(kg):
3247
assert kg.evaluate("{evermore, is, amazing}")[0] == {"is": ["amazing"]}
3348

49+
3450
def test_adding_valid_triple_with_list_value(kg):
3551
kg.add_node(("James", "Likes", ["Terraria", "Cats"]))
3652
result = kg.evaluate("{ James -> Likes }")[0]
@@ -120,3 +136,25 @@ def test_read_from_json(kg):
120136
)
121137
assert kg.evaluate("{ James }")[0] == [{"Likes": ["Coffee"]}]
122138
assert kg.evaluate("{ Anna }")[0] == [{"Likes": ["Tea"]}]
139+
140+
141+
def test_max_query_call_invocation_error(kg):
142+
from kgl import QueryDepthExceededError
143+
144+
# length of this will be 150 calls, over default max of 50
145+
query = "{" + ("coffee -> is -> coffee" * 50) + "}"
146+
147+
with pytest.raises(QueryDepthExceededError):
148+
kg.evaluate(query)
149+
150+
151+
def test_incomplete_queries(kg):
152+
with pytest.raises(ValueError):
153+
kg.evaluate("{ James")
154+
kg.evaluate("{ James -> Likes")
155+
kg.evaluate("{")
156+
kg.evaluate("}")
157+
kg.evaluate("{{ James -> Likes }")
158+
kg.evaluate("{ James -> Likes }}")
159+
kg.evaluate("{ James -> Likes }{")
160+
kg.evaluate("{ James -> Likes } + ")

0 commit comments

Comments
 (0)