-
Notifications
You must be signed in to change notification settings - Fork 66
/
sync-ebooks
executable file
·270 lines (227 loc) · 7.24 KB
/
sync-ebooks
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
#! /usr/bin/env bash
set -e
set -o pipefail
usage(){
cat <<EOF
DESCRIPTION
Syncs books from standardebooks.org GitHub org to specified folder.
USAGE
${0##*/} [-v,-vv,--verbosity INTEGER] [-u,--update-only] [--token TOKEN] DIRECTORY
With -v or --verbosity 1, display general progress updates.
With -vv or --verbosity 2, display general progress updates and verbose git output.
With --update-only, only sync existing repositories, do not download new repositories.
With -b or --bare, clone bare repositories (for a server) instead of working directories.
With --token TOKEN, specify a GitHub access token to use for request. Useful for when you hit the rate limit.
DIRECTORY should be where the repositories should go.
NOTE: This script requires GNU versions of grep and sed. If you are on a Mac, you will need to
install GNU versions (via Homebrew, MacPorts, etc.) and make sure they are first in your path,
or modify the script to use the GNU versions if they're named differently.
EXAMPLE
${0##*/} /standardebooks.org/ebooks
EOF
exit
}
# functions used by the script
die(){
printf "\033[0;7;31mError:\033[0m %s\n" "${1}" 1>&2;
exit 1;
}
require(){
command -v "$1" > /dev/null 2>&1 || {
suggestion="";
if [ -n "$2" ]; then
suggestion=" $2";
fi
die "$1 is required but not installed.${suggestion}";
}
}
check_arg() {
case "$2" in
''|$1) die "$3" ;;
esac
}
# end functions
# Terminate on CTRL-C
trap ctrl_c INT
ctrl_c() {
exit
}
require "git"
if [[ $# -eq 0 ]]; then
usage
fi
verbosity=0
updateOnly="false"
githubToken=""
target=""
bare=""
# process each of the parameters one at a time, shifting each time to get the next one
while [ $# -gt 0 ]; do
case "$1" in
-h|--help)
usage ;;
-v)
verbosity=1
shift 1
;;
-vv)
verbosity=2
shift 1
;;
-u|--update-only)
updateOnly="true"
shift 1
;;
--verbosity)
check_arg '*[!0-9]*' "$2" "Verbosity is not a positive integer."
verbosity="$2"
shift 2
;;
--token)
check_arg '*[!0-9a-zA-Z_]*' "$2" "Token is empty or contains illegal characters."
githubToken="$2"
shift 2
;;
-b|--bare)
bare="--mirror"
shift 1
;;
*)
break ;;
esac
done
if [ $# -ne 1 ] || [ -z "$1" ]; then
usage
fi
target="$1"
if ! [ -d "${target}" ]; then
die "${target} is not a directory."
fi
if ! cd "${target}"; then
die "Couldn’t cd into ${target}"
fi
# update any existing repositories
if [ "${verbosity}" -gt 0 ]; then
printf "Updating local repositories ... \n"
fi
for item in ./*/; do
[ -e "${item}" ] || break
if [ "${verbosity}" -gt 0 ]; then
printf "Updating %s ... " "${item}"
fi
# if it's not a repository directory, skip it
git -C "${item}" rev-parse > /dev/null 2>&1 || continue
# this works whether the repository is bare or a working directory
if [ "${verbosity}" -lt 2 ]; then
git -C "${item}" fetch -q
else
git -C "${item}" fetch -v
fi
if [ "${verbosity}" -gt 0 ]; then
printf "Done.\n"
fi
done
if [ "${updateOnly}" = "true" ]; then
exit
fi
# clone the remaining repositories
if [ "${verbosity}" -gt 0 ]; then
printf "Cloning remote repositories ... \n"
printf "Fetching repository urls ..."
fi
# get all of the repository names from the GitHub API, one "page" at a time
url="https://api.github.com/orgs/standardebooks/repos?per_page=100"
repoUrls=""
while true; do
# get a "page" worth of repository URL's
if [ -n "${githubToken}" ]; then
response=$(curl -H "Authorization: token ${githubToken}" -si "${url}") ||
die "Curl request failed."
else
response=$(curl -si "${url}") ||
die "Curl request failed."
fi
if printf "%s" "${response}" | grep -q "^X-RateLimit-Remaining: 0$"; then
limitReset=$(printf "%s" "${response}" | grep -oP "^X-RateLimit-Reset: \K[0-9]+$")
printf "You have reached your daily allowance for unauthenticated GitHub API requests.\n\
Either wait until %s or use an OAuth token.\n\
You can create a new token at https://github.com/settings/tokens/new and \
pass it to this script with the --token option.\n\
The token does not need any permissions.\n" "$(date -d @"${limitReset}")" 1>&2
exit
fi
# parse the response to get the current page's URL's
currentRepoUrls=$(printf "%s" "${response}" | awk 'BEGIN { FS="\""; RS="," }; { if ($2 == "clone_url") {print $4} }')
# add them to the full list in repoUrls
repoUrls=$(printf "%s\n%s" "${repoUrls}" "${currentRepoUrls}")
# set the variable to get the next "page"
url=$(printf "%s" "${response}" | grep -oP "<\Khttps://api.github.com/[^>]*(?=>; rel=\"next\",)") || break
if [ "${verbosity}" -gt 0 ]; then
printf "."
fi
done
if [ "${verbosity}" -gt 0 ]; then
printf " Done.\n"
fi
# skip the non-ebook repositories by removing their names from the list
repoUrls=$(printf "%s" "${repoUrls}" | grep -v -e "/tools.git\$" -e "/web.git\$" -e "/manual.git\$" -e "/standard-blackletter.git\$" -e "/sublime-text-se-plugin.git\$" | awk 'NF')
# process the list, reading one repository at a time
printf "%s\n" "${repoUrls}" | while IFS= read -r repoUrl; do
# make sure it's not an empty string
[ -n "${repoUrl}" ] || continue
# strip everything prior to the last segment of the name
repoName="${repoUrl##*/}"
if [ "${bare}" = "" ]; then
repoName="${repoName%.git}"
fi
# if the repo already exists, skip it (handled in the update above)
[ -d "${repoName}" ] && continue
# if the repository name has been truncated due to GitHub's name length limits,
# but a local clone with the full name exists, don't attempt to clone it again
repoNameLength=$(printf "%s" "${repoName%.git}" | wc -m)
if [ "${repoNameLength}" -ge 100 ]; then
if dirs=( "${repoName%.git}"*/ ) && [[ -d ${dirs[0]} ]]; then
continue
fi
fi
if [ "${verbosity}" -gt 0 ]; then
printf "Cloning %s ... \n" "${repoUrl}"
fi
# clone the repository, creating either a bare or working directory based on the option
if [ "${verbosity}" -lt 2 ]; then
git clone -q ${bare} "${repoUrl}"
else
git clone -v ${bare} "${repoUrl}"
fi
# if a directory with the repository name doesn't exist, the clone did not complete successfully
if ! [ -d "${repoName}" ]; then
printf "Failed to clone %s.\n" "${repoName}." 1>&2
elif [ "${verbosity}" -gt 0 ]; then
printf "Done.\n"
fi
# if the repository doesn't have a metadata file, skip to the next repository
metadata=$(git -C "${repoName}" show HEAD:src/epub/content.opf > /dev/null 2>&1) || continue
# get the last segment of the dc:identifier from the metadata
properName="$(git -C "${repoName}" show HEAD:src/epub/content.opf |
grep -oE "<dc:identifier id=\"uid\">url:https://standardebooks.org/ebooks/[^<]+</dc:identifier>" |
sed -E "s/<[^>]+?>//g" |
sed -E "s|url:https://standardebooks.org/ebooks/||g" |
sed -E "s|/|_|g").git"
if [ "${bare}" = "" ]; then
properName="${properName%.git}"
fi
# if for some reason the repository name isn't the same as the identifier (they are identical
# 99% of the time), rename the directory to be the identifier name; not sure why this is done, either
if [ "${repoName}" != "${properName}" ]; then
if [ -d "${properName}" ]; then
if [ "${verbosity}" -gt 0 ]; then
printf "Not moving %s to %s: directory exists\n" "${repoName}" "${properName}"
fi
else
if [ "${verbosity}" -gt 0 ]; then
printf "Moving %s to %s\n" "${repoName}" "${properName}"
fi
mv "${repoName}" "${properName}"
fi
fi
done