Skip to content

Commit b835e68

Browse files
committed
removed ' from horses, added weapon, added .gitignore
1 parent 395c579 commit b835e68

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+12207
-11485
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.DS_Store

digital_manuscript.py

+18-12
Original file line numberDiff line numberDiff line change
@@ -9,25 +9,24 @@ class BnF():
99

1010
def __init__(self, entry_list = [], apply_corrections = True):
1111
""" Initialize entire manuscript. If a list of IDs is given, narrow it down to them. """
12-
complete_manuscript = generate_complete_manuscript(complete=False, apply_corrections=apply_corrections)
12+
complete_manuscript = generate_complete_manuscript(apply_corrections=apply_corrections)
13+
# complete_manuscript2 = generate_complete_manuscript2(apply_corrections=apply_corrections)
1314
if entry_list:
14-
self.entries = [page for page in complete_manuscript if page.identity in entry_list]
15+
self.entries = {i:e for i, e in complete_manuscript.items() if e.identity in entry_list}
1516
else:
1617
self.entries = complete_manuscript
1718

18-
def entry(self, identity: str):
19+
def entry(self, identity: str = ''):
1920
""" Return entry with the given identity. """
20-
for entry in self.entries:
21-
if entry.identity == identity:
22-
return entry
21+
return self.entries.get(identity)
2322

2423
search_type = Optional[Union[str, bool]]
2524
def search(self, animal: search_type = None, body_part: search_type = None, currency: search_type = None,
2625
definition: search_type = None, environment: search_type = None, material: bool = None,
2726
medical: search_type = None, measurement: search_type = None, music: search_type = None,
2827
plant: search_type = None, place: search_type = None, personal_name: search_type = None,
2928
profession: search_type = None, sensory: search_type = None, tool: search_type = None,
30-
time: search_type = None) -> List[str]:
29+
time: search_type = None, weapon: search_type = None) -> List[str]:
3130
"""
3231
Search through each entry and return the identities that satisfy the criterion. Arguments are each of the element
3332
types for a manuscript entry, which can be a string, bool, or None. If the argument is a string, then results must have
@@ -38,24 +37,28 @@ def search(self, animal: search_type = None, body_part: search_type = None, curr
3837
args = {'animal': animal, 'body_part': body_part, 'currency': currency, 'definition': definition,
3938
'environment': environment, 'material': material, 'medical': medical, 'measurement': measurement,
4039
'music': music, 'plant': plant, 'place': place, 'personal_name': personal_name, 'profession': profession,
41-
'sensory': sensory, 'tool': tool, 'time': time}
40+
'sensory': sensory, 'tool': tool, 'time': time, 'weapon': weapon}
4241

4342
versions = ['tc', 'tcn', 'tl']
4443
results = self.entries # initialize results
4544
search_bools = [k for k, v in args.items() if isinstance(v, bool)] # select bool element categories
4645
search_strings = [k for k, v in args.items() if isinstance(v, str)] # select string element categories
4746

4847
for s in search_bools: # filter by each bool
49-
results = [r for r in results if any(r.get_prop(s, v) for v in versions)]
48+
results = {i: r for i, r in results.items() if any(r.get_prop(s, v) for v in versions)}
5049
for s in search_strings: # filter by each string
51-
results = [r for r in results if any(args[s] in r.get_prop(s, v) for v in versions)]
50+
results = {i: r for i, r in results if any(args[s] in r.get_prop(s, v) for v in versions)}
5251

53-
return([r.identity for r in results]) # return identities
52+
return([i for i, r in results.items()]) # return identities
53+
54+
def search_margins(self, version: str, term: str, placement: str) -> List[str]:
55+
return [i for i, entry in self.entries.items()
56+
if any(margin[0] == placement and term in margin[1] for margin in entry.margins[version])]
5457

5558
def tablefy(self):
5659
# id, head, no. words, category, amount of each tag, margins
5760
# include figure margins?
58-
df = pd.DataFrame(columns=['entry'], data=self.entries)
61+
df = pd.DataFrame(columns=['entry'], data=self.entries.values())
5962
df['identity'] = df.entry.apply(lambda x: x.identity)
6063
df['title'] = df.entry.apply(lambda x: x.title['tl'])
6164
df['length'] = df.entry.apply(lambda x: x.length['tl'])
@@ -64,3 +67,6 @@ def tablefy(self):
6467
df['del_tags'] = df.entry.apply(lambda x: len(x.del_tags))
6568
df = df.drop(columns=['entry'])
6669
return df
70+
71+
manuscript = BnF()
72+
# print(manuscript.search_margins('tl', 'gold', 'left-middle'))

jsonify.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
properties = ['animal', 'body_part', 'currency', 'definition', 'environment', 'material',
1616
'medical', 'measurement', 'music', 'plant', 'place', 'personal_name',
17-
'profession', 'sensory', 'tool', 'time']
17+
'profession', 'sensory', 'tool', 'time', 'weapon']
1818

1919
def read_csvs() -> Dict[str, pd.DataFrame]:
2020
"""
@@ -39,8 +39,8 @@ def read_manuscript(manuscript: BnF, df_dict: Dict[str, pd.DataFrame]):
3939
Output: df_dict -- a dict where they keys are an element of properties and the value is the thesaurus
4040
DataFrame for that property.
4141
"""
42-
for i in tqdm(range(len(manuscript.entries))):
43-
entry = manuscript.entries[i]
42+
for identity in tqdm(manuscript.entries.keys()):
43+
entry = manuscript.entries[identity]
4444
for prop in properties:
4545
df = df_dict[prop]
4646
prop_list = entry.get_prop(prop, 'tl')
@@ -129,7 +129,6 @@ def jsonify():
129129
if not os.path.exists('thesaurus'):
130130
print('Thesaurus not found. Generating now.')
131131
os.system('python thesaurus.py')
132-
print('Finished Generating Thesaurus')
133132

134133
manuscript = BnF(apply_corrections=False)
135134
df_dict = read_csvs()

0 commit comments

Comments
 (0)