10
10
'music' , 'plant' , 'place' , 'personal_name' ,
11
11
'profession' , 'sensory' , 'tool' , 'time' , 'weapon' ]
12
12
13
- cwd = os .getcwd ()
14
- m_path = cwd if 'manuscript-object' not in cwd else f'{ cwd } /../'
15
- m_k_data_to_thesaurus = f'{ m_path } /manuscript-object/thesaurus'
13
+ manuscript_data_path = os .path .dirname (os .getcwd ()) + "/m-k-manuscript-data"
14
+ thesaurus_path = os .getcwd () + "/thesaurus"
15
+ assert (os .path .exists (manuscript_data_path )), ("Could not find manuscript data directory: " + manuscript_data_path )
16
+ print ("Using manuscript data directory:" , manuscript_data_path )
16
17
17
18
def use_thesaurus (entries : Dict [str , Recipe ]) -> List [Recipe ]:
18
19
"""
@@ -27,7 +28,7 @@ def use_thesaurus(entries: Dict[str, Recipe]) -> List[Recipe]:
27
28
Output:
28
29
entries: List[Recipe] -- same as above, but with the thesaurus corrections applied.
29
30
"""
30
- if not os .path .exists (m_k_data_to_thesaurus ):
31
+ if not os .path .exists (thesaurus_path ):
31
32
print ('Thesaurus not found. Generating now.' )
32
33
os .system (f'python { cwd } /manuscript-object/thesaurus.py' )
33
34
print ('Finished Generating Thesaurus' )
@@ -36,7 +37,7 @@ def use_thesaurus(entries: Dict[str, Recipe]) -> List[Recipe]:
36
37
37
38
for prop in properties :
38
39
dct = {} # {verbatim_term: prefLabel_en}
39
- df = pd .read_csv (f'{ m_k_data_to_thesaurus } /{ prop } .csv' )
40
+ df = pd .read_csv (f'{ thesaurus_path } /{ prop } .csv' )
40
41
41
42
# manual_df = manual_corrections[manual_corrections['property'] == prop]
42
43
# manual_dict = {} # verbatim_term, prefLabel_en pairs
@@ -108,13 +109,14 @@ def generate_complete_manuscript(apply_corrections=True) -> Dict[str, Recipe]:
108
109
TODO: Instead of going version by version, consider going folio by folio.
109
110
"""
110
111
for version in versions :
111
- dir_path = f'{ m_path } /ms-xml/{ version } /'
112
+ dir_path = f'{ manuscript_data_path } /ms-xml/{ version } /'
112
113
entry_dict = OrderedDict ()
113
114
114
115
for r , d , f in os .walk (dir_path ):
115
116
for filename in f : # iterate through /ms-xml/{version} folder
116
117
# split folio by entry
117
118
info = process_file (f'{ dir_path } { filename } ' )
119
+ print (f"Loading folio { filename } ..." )
118
120
for identity , text in info .items (): # add each entry to dictionary
119
121
entry_dict [identity ] = text
120
122
@@ -137,6 +139,7 @@ def generate_complete_manuscript(apply_corrections=True) -> Dict[str, Recipe]:
137
139
old .versions ['tl' ] + '\n \n ' + tl )
138
140
else :
139
141
entries [entry_id ] = Recipe (entry_id , folio , tc , tcn , tl )
142
+ print (f"Generating Recipe object for { entry_id } ..." )
140
143
141
144
# if specified, manually rewrite entry properties based on thesaurus.
142
145
if apply_corrections :
0 commit comments