Skip to content

Commit ffc2410

Browse files
authored
Merge pull request #29 from cu-mkp/issue28
Update paths in update.py and manuscript_helpers.py to account for new m-k-manuscript-data location
2 parents 3a6eabe + e1622b6 commit ffc2410

File tree

2 files changed

+19
-18
lines changed

2 files changed

+19
-18
lines changed

manuscript_helpers.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@
1010
'music', 'plant', 'place', 'personal_name',
1111
'profession', 'sensory', 'tool', 'time', 'weapon']
1212

13-
cwd = os.getcwd()
14-
m_path = cwd if 'manuscript-object' not in cwd else f'{cwd}/../'
15-
m_k_data_to_thesaurus = f'{m_path}/manuscript-object/thesaurus'
13+
manuscript_data_path = os.path.dirname(os.getcwd()) + "/m-k-manuscript-data"
14+
thesaurus_path = os.getcwd() + "/thesaurus"
15+
assert(os.path.exists(manuscript_data_path)), ("Could not find manuscript data directory: " + manuscript_data_path)
16+
print("Using manuscript data directory:", manuscript_data_path)
1617

1718
def use_thesaurus(entries: Dict[str, Recipe]) -> List[Recipe]:
1819
"""
@@ -27,7 +28,7 @@ def use_thesaurus(entries: Dict[str, Recipe]) -> List[Recipe]:
2728
Output:
2829
entries: List[Recipe] -- same as above, but with the thesaurus corrections applied.
2930
"""
30-
if not os.path.exists(m_k_data_to_thesaurus):
31+
if not os.path.exists(thesaurus_path):
3132
print('Thesaurus not found. Generating now.')
3233
os.system(f'python {cwd}/manuscript-object/thesaurus.py')
3334
print('Finished Generating Thesaurus')
@@ -36,7 +37,7 @@ def use_thesaurus(entries: Dict[str, Recipe]) -> List[Recipe]:
3637

3738
for prop in properties:
3839
dct = {} # {verbatim_term: prefLabel_en}
39-
df = pd.read_csv(f'{m_k_data_to_thesaurus}/{prop}.csv')
40+
df = pd.read_csv(f'{thesaurus_path}/{prop}.csv')
4041

4142
# manual_df = manual_corrections[manual_corrections['property'] == prop]
4243
# manual_dict = {} # verbatim_term, prefLabel_en pairs
@@ -108,7 +109,7 @@ def generate_complete_manuscript(apply_corrections=True) -> Dict[str, Recipe]:
108109
TODO: Instead of going version by version, consider going folio by folio.
109110
"""
110111
for version in versions:
111-
dir_path = f'{m_path}/ms-xml/{version}/'
112+
dir_path = f'{manuscript_data_path}/ms-xml/{version}/'
112113
entry_dict = OrderedDict()
113114

114115
for r, d, f in os.walk(dir_path):

update.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
1-
# Last Updated | 2020-08-21
1+
# Last Updated | 2020-08-24
22
# Python Modules
33
import os
44
import sys
55
import re
66
from typing import List
77

8-
sys.path.insert(1, './manuscript-object/')
9-
108
# Third Party Modules
119
import pandas as pd
1210
from datetime import datetime
@@ -25,7 +23,9 @@
2523
'profession': 'pro', 'sensory': 'sn', 'tool': 'tl', 'time': 'tmp', 'weapon': 'wp',
2624
'german': 'de', 'greek': 'el', 'italian': 'it', 'latin': 'la', 'occitan': 'oc', 'poitevin': 'po',}
2725

28-
m_path = f'{os.getcwd()}'
26+
manuscript_data_path = os.path.dirname(os.getcwd()) + "/m-k-manuscript-data"
27+
assert(os.path.exists(manuscript_data_path)), ("Could not find manuscript data directory: " + manuscript_data_path)
28+
print("Using manuscript data directory:", manuscript_data_path)
2929

3030
def update_metadata(manuscript: BnF) -> None:
3131
"""
@@ -54,7 +54,7 @@ def update_metadata(manuscript: BnF) -> None:
5454
# remove entry column, since it only displays memory address
5555
df.drop(columns=['entry'], inplace=True)
5656

57-
df.to_csv(f'{m_path}/metadata/entry_metadata.csv', index=False)
57+
df.to_csv(f'{manuscript_data_path}/metadata/entry_metadata.csv', index=False)
5858

5959
def update_entries(manuscript: BnF) -> None:
6060
"""
@@ -67,13 +67,13 @@ def update_entries(manuscript: BnF) -> None:
6767
None
6868
"""
6969

70-
for path in [f'{m_path}/entries', f'{m_path}/entries/txt', f'{m_path}/entries/xml']:
70+
for path in [f'{manuscript_data_path}/entries', f'{manuscript_data_path}/entries/txt', f'{manuscript_data_path}/entries/xml']:
7171
if not os.path.exists(path):
7272
os.mkdir(path)
7373

7474
for version in versions:
75-
txt_path = f'{m_path}/entries/txt/{version}'
76-
xml_path = f'{m_path}/entries/xml/{version}'
75+
txt_path = f'{manuscript_data_path}/entries/txt/{version}'
76+
xml_path = f'{manuscript_data_path}/entries/xml/{version}'
7777

7878
# If the entries/txt or xml directory does not exist, create it. Otherwise, clear the directory.
7979
for path in [txt_path, xml_path]:
@@ -121,7 +121,7 @@ def update_all_folios(manuscript: BnF) -> None:
121121
text = f'{text}\n\n{new_text}' if text else new_text
122122

123123
# write file
124-
f = open(f'{m_path}/allFolios/{folder}/all_{version}.{folder}', 'w')
124+
f = open(f'{manuscript_data_path}/allFolios/{folder}/all_{version}.{folder}', 'w')
125125
f.write(text)
126126
f.close()
127127

@@ -136,11 +136,11 @@ def update_ms(manuscript: BnF) -> None:
136136
None
137137
"""
138138
for version in versions:
139-
for r, d, f in os.walk(f'{m_path}/ms-xml/{version}'):
139+
for r, d, f in os.walk(f'{manuscript_data_path}/ms-xml/{version}'):
140140
for filename in f: # iterate through /ms-xml/{version} folder
141141
# read xml file
142142
text = ''
143-
filepath = f'{m_path}/ms-xml/{version}/{filename}'
143+
filepath = f'{manuscript_data_path}/ms-xml/{version}/{filename}'
144144
with open(filepath, encoding="utf-8", errors="surrogateescape") as f:
145145
text = f.read()
146146

@@ -188,7 +188,7 @@ def update():
188188
print('Updating allFolios')
189189
update_all_folios(manuscript)
190190

191-
update_time()
191+
# update_time()
192192

193193
if __name__ == "__main__":
194194
update()

0 commit comments

Comments
 (0)