-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdump_build.sh
49 lines (39 loc) · 1.02 KB
/
dump_build.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/bin/bash
# Script for updating data dump from apps.wikitree.com
set -u
set -e
set -x
TIMESTAMP=$1
VERSION_DIR="data/version/${TIMESTAMP}/"
rm -rf $VERSION_DIR
mkdir -p $VERSION_DIR
echo
echo "(1) Unzip"
for x in people_users people_marriages categories; do
gunzip -c data/dumps/${TIMESTAMP}/dump_${x}.csv.gz > ${VERSION_DIR}/dump_${x}.csv
done
echo
echo "(2) Convert to parquet"
# 2m
time python3 csv_to_parquet.py --version=${TIMESTAMP}
echo
echo "(3) Compute relationships"
# 6m
time python3 pq_compute_relatives.py --version=${TIMESTAMP}
echo
echo "(4) Building Graph"
# 2m
time python3 graph_make_family.py --version=${TIMESTAMP}
mkdir -p "results/graphs/family/${TIMESTAMP}"
# Save copy of network in a more persistent place
cp "${VERSION_DIR}/graphs/family/all."* "results/graphs/family/${TIMESTAMP}/"
# 10m
time python3 graph_core.py ${VERSION_DIR}/graphs/family/all.adj.nx
echo
echo "(5) Convert to SQLite DB"
# 30m
time python3 csv_to_sqlite.py --version=${TIMESTAMP}
echo
echo "(6) TODO: Compute Stats?"
echo
echo "Done"