-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.sh
executable file
·181 lines (134 loc) · 4.54 KB
/
run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#!/bin/bash
#bensmafx
#16.8.16
PREPROCESS_SWISSBIB=0
LINK_DBPEDIA=1
LINK_VIAF_NAME=0
LINK_VIAF_GND=0
POSTPROCESS_DATA=0
# Executes the linking workflow
CURRENT_WORKING_DIRECTORY=$(pwd)
source ./paths/load_path_variables.sh
# remove all temporary files created from last run.
# rm "$LINKED_TMP_DATA_FOLDER/"
# The start time of the process.
echo -n "Start process: " >> "$LINKED_LOGGING/process.log"
date >> "$LINKED_LOGGING/process.log"
if [ $PREPROCESS_SWISSBIB -eq 1 ] ; then
# Pre-process_swissbib_data
cd "$CURRENT_WORKING_DIRECTORY/swissbib"
# RUN
./preprocess_swissbib.sh
STATUS=$?
if [ "$STATUS" -eq 0 ]; then
echo Preprocessing Swissbib ok.
else
echo Error during preprocessing of Swissbib. Exiting. 1>&2
exit "$STATUS"
fi
fi
cd "$CURRENT_WORKING_DIRECTORY/linking"
if [ $LINK_DBPEDIA -eq 1 ] ; then
# RUN
./generate_configs4dbpedia.sh
STATUS=$?
if [ "$STATUS" -eq 0 ]; then
echo "Generating configuration files for interlinking whith DBpedia ok."
else
echo "Error during generation of configuration file for interlinking with DBpedia. Exiting." 1>&2
exit "$STATUS"
fi
./do_parallel_linking.sh
STATUS=$?
if [ "$STATUS" -eq 0 ]; then
echo "Parallel interlinking with DBpedia ok."
else
echo "Error during parallel interlinking with DBpedia. Exiting." 1>&2
exit "$STATUS"
fi
mv "$LINKED_TMP_DATA_FOLDER/accepted.nt" "$LINKED_TMP_DATA_FOLDER/dbpedia_link_file.nt"
# Get DBpedia enrichment
cd "$CURRENT_WORKING_DIRECTORY/dbpedia"
./postprocess_dbpedia.sh
STATUS=$?
if [ "$STATUS" -eq 0 ]; then
echo "Enriching with DBpedia ok."
else
echo "Error during enrichment with DBpedia. Exiting." 1>&2
exit "$STATUS"
fi
fi # END LINK DBPEDIA
if [ $LINK_VIAF_NAME -eq 1 ] ; then
# Link with Viaf normal
cd "$CURRENT_WORKING_DIRECTORY/linking"
./generate_configs4viaf.sh
STATUS=$?
if [ "$STATUS" -eq 0 ]; then
echo "Generating configuration files for interlinking with VIAF (FN-LN-BD) ok."
else
echo "Error during generation of configuration files for interlinking with VIAF (FN-LN-DB). Exiting." 1>&2
exit "$STATUS"
fi
./do_parallel_linking.sh
STATUS=$?
if [ "$STATUS" -eq 0 ]; then
echo "Parallel inter-linking with VIAF (FN-LN-BD) ok."
else
echo "Error during parallel inter-linking with VIAF (FN-LN-BD). Exiting." 1>&2
exit "$STATUS"
fi
mv "$LINKED_TMP_DATA_FOLDER/accepted.nt" "$LINKED_TMP_DATA_FOLDER/viaf_normal_link_file.nt"
fi # END LINK VIAF NAME
if [ $LINK_VIAF_GND -eq 1 ] ; then
# Link with Viaf gnd ids
./generate_configs4viaf_gndids.sh
STATUS=$?
if [ "$STATUS" -eq 0 ]; then
echo "Parallel interlinkin with VIAF (GND-ID) ok."
else
echo "Error during parallel interlinking with VIAF (GND-ID). Exiting." 1>&2
exit "$STATUS"
fi
./do_parallel_linking.sh
STATUS=$?
if [ "$STATUS" -eq 0 ]; then
echo "Parallel interlinkin with VIAF (GND-ID) ok."
else
echo "Error during parallel interlinking with VIAF (GND-ID). Exiting." 1>&2
exit "$STATUS"
fi
mv "$LINKED_TMP_DATA_FOLDER/accepted.nt" "$LINKED_TMP_DATA_FOLDER/viaf_gnd_link_file.nt"
fi # END LINK VIAF GND
# Merge link files
cat "$LINKED_TMP_DATA_FOLDER/viaf_normal_link_file.nt" "$LINKED_TMP_DATA_FOLDER/viaf_gnd_link_file.nt" > "$LINKED_TMP_DATA_FOLDER/accepted.nt"
reshaperdf sort "$LINKED_TMP_DATA_FOLDER/accepted.nt" "$LINKED_TMP_DATA_FOLDER/accepted_sorted.nt"
reshaperdf removeduplicates "$LINKED_TMP_DATA_FOLDER/accepted_sorted.nt" "$LINKED_TMP_DATA_FOLDER/accepted_wo_dup.nt"
rm "$LINKED_TMP_DATA_FOLDER/viaf_normal_link_file.nt" "$LINKED_TMP_DATA_FOLDER/viaf_gnd_link_file.nt" "$LINKED_TMP_DATA_FOLDER/accepted_sorted.nt"
mv "$LINKED_TMP_DATA_FOLDER/accepted_wo_dup.nt" "$LINKED_TMP_DATA_FOLDER/viaf_link_file.nt"
# Get Viaf enrichment
cd "$CURRENT_WORKING_DIRECTORY/viaf"
# RUN
./postprocess_viaf.sh
STATUS=$?
if [ "$STATUS" -eq 0 ]; then
echo "Enriching with VIAF ok."
else
echo "Error during enrichmetn with VIAF. Exiting." 1>&2
exit "$STATUS"
fi
# Write final output
cd "$CURRENT_WORKING_DIRECTORY/output"
./collect_data.sh
STATUS=$?
if [ "$STATUS" -eq 0 ]; then
echo "Merging original data, links and enrichment data ok."
else
echo "Error during merging original data, links and enrichment data. Exiting." 1>&2
exit "$STATUS"
fi
fi # END POSTPROCESS
# Log end time
echo -n "End process: " >> "$LINKED_LOGGING/process.log"
date >> "$LINKED_LOGGING/process.log"
echo Done
exit 0