-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathprocess_sources.sh
executable file
·91 lines (80 loc) · 2.62 KB
/
process_sources.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/bin/sh
# Phrases Processing Script
# Daniel M. Zimmerman, September 2024
# This script recursively traverses the supplied directory, processing all
# files within it using the phrases scripts. It assumes that the files all
# have unique basenames, even if they're in different directories.
# This script will only work properly if it is sitting in a directory
# containing a "phrases" directory with the phrases scripts.
SCRIPT=$(realpath "$0")
SCRIPTPATH=$(dirname "$SCRIPT")
PHRASESPATH="${SCRIPTPATH}/phrases"
# Check if the phrases scripts exist (note that we're only checking
# for their presence, not their contents)
if [ ! -d "${PHRASESPATH}" ] || [ ! -f "${PHRASESPATH}/nouns.py" ] || [ ! -f "${PHRASESPATH}/verbs.py" ]; then
echo "This script must be run from a directory containing the phrases script directory."
exit 1
fi
# Check if exactly one argument is passed
if [ "$#" -ne 2 ]; then
echo "Usage: $0 <input-directory> <output-directory>"
exit 1
fi
# Check if the input directory exists
if [ ! -d "$1" ]; then
echo "Error: '$1' is not a directory."
exit 1
fi
# Create the output directory if it doesn't exist
if [ -e "$2" ] && [ ! -d "$2" ]; then
echo "Error: '$2' is not a directory."
exit 1
fi
if [ ! -e "$2" ]; then
echo "Creating output directory '$2'"
mkdir -p $2
if [ $? -ne 0 ]; then
echo "Could not create directory '$2', exiting."
exit 1
fi
fi
echo "Generating histograms for all dependent documents that don't have them."
echo "Scanning directory $1."
find "$1" -follow -type f | while IFS= read -r file; do
base=`basename "$file"`
# We don't care about the file extension, we're just going to assume we
# can process it
noext=${base%.*}
# Don't process a README or a BibTeX file
if [ "${noext}" = "README" ] || [ "${file##*.}" = "bib" ] || [ "${noext}" = "" ]; then
echo "Skipping '${file}'."
continue
fi
echo "Processing '${file}'..."
nounsfile="$2/${noext}_nouns.csv"
verbsfile="$2/${noext}_verbs.csv"
if [ -f "${nounsfile}" ]; then
echo " ${noext} nouns histogram already exists"
else
echo " Creating nouns histogram..."
${PHRASESPATH}/nouns.py "${file}" > "${nounsfile}"
fi
if [ $? -ne 0 ]; then
rm -f "${nounsfile}"
echo "Processing of '${noext}' failed, exiting."
exit 1
fi
if [ -f "${verbsfile}" ]; then
echo " ${noext} verbs histogram already exists"
else
echo " Creating verbs histogram..."
${PHRASESPATH}/verbs.py "${file}" > "${verbsfile}"
fi
if [ $? -eq 0 ]; then
echo "Processing of '${noext}' complete."
else
rm -f ${verbsfile}
echo "Processing of '${noext}' failed, exiting."
exit 1
fi
done