-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_generic.py
97 lines (86 loc) · 3.3 KB
/
test_generic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import os, sys, time
#import re #, json
import yaml
import wordplay
#print(wordplay.config.test)
site = wordplay.config['test']
author='author'
test_cases = [
dict( # fts-style
url='https://example.com/2024_05_06_financial-times-17727-by-bobcat',
),
dict( # p-style
url='https://example.com/2018_05_28_financial-times-15869-by-crux',
),
]
for case in test_cases: # [1:2]
url = case['url']
if False: # Test saved (vs that already saved as groundtruth in repo)
problem_arr = wordplay.create_yaml_from_url(site, url, author=author,
overwrite=False, force_parse=True, use_custom=True, use_generic=False)
if True: # Test parser parser (vs that already saved as groundtruth in repo from saved parser)
problem_arr = wordplay.create_yaml_from_url(site, url, author=author,
overwrite=False, force_parse=True, use_custom=False, use_generic=True)
fname_stub = wordplay.url_to_fname_stub(url)
site_base, site_url = site['site_base'], site['site_url']
fname_base = f"{site_base}/{author}/{fname_stub}"
with open(f"{fname_base}.yaml", 'r') as infile:
data_loaded = yaml.safe_load(infile)
problem_arr_saved = []
for clue in data_loaded['clues']:
p=wordplay.Problem()
p.from_dict(clue)
problem_arr_saved.append(p)
#print(p)
differences_reported=False
for c_save in problem_arr_saved: # Look through all the known-good examples
found=False
for c_parse in problem_arr: # Check against all newly found problems
if c_save.num!=c_parse.num or c_save.ad!=c_parse.ad:
continue
found=True
same=True
c_save_dict =c_save.as_dict()
c_parse_dict=c_parse.as_dict()
for k in 'clue pattern answer wordplay'.split(' '):
c_save_nospc =c_save_dict.get(k, 'MISSING').replace(' ','')
c_parse_nospc=c_parse_dict.get(k, 'NOTFOUND').replace(' ','')
if c_save_nospc != c_parse_nospc:
same=False
break
if not same:
print("\nMis-match between saved, and the following:")
print(c_saved)
print(c_parser)
differences_reported=True
c_parse.num=-1 # Make sure this not printed again
if not found:
print("\nThe following saved problem did not match any found problem:")
print(c_save)
differences_reported=True
for c_parse in problem_arr: # Print out non-matched newly found problems
if c_parse.num>=0:
print("\nThe following matched no saved problem:")
print(c_parse)
print()
differences_reported=True
if not differences_reported:
print("* No problems detected * ")
print()
continue
# Should do something more sophisticated than this to match the 'num' fields
# .. so that missing a clue out doesn't invalidate rest of clues vs gold data on disk
for c_saved, c_parser in zip(problem_arr_saved, problem_arr):
same=True
c_saved_dict =c_saved.as_dict()
c_parser_dict=c_parser.as_dict()
for k in 'ad clue pattern answer wordplay'.split(' '):
c_saved_nospc =c_saved_dict.get(k, 'MISSING').replace(' ','')
c_parser_nospc=c_parser_dict.get(k, 'NOTFOUND').replace(' ','')
if c_saved_nospc != c_parser_nospc:
same=False
break
if not same:
print(c_saved)
print(c_parser)
print()