forked from OHDSI/ETL-Synthea
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbulk-load
executable file
·94 lines (63 loc) · 4.5 KB
/
bulk-load
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/bin/bash
# ./bulk-load database hostname synthea_schema cdm_schema user password /path/to/synthea/data /path/to/vocab/data
# ./bulk-load synthea10 synthea.us-east-1.rds.amazonaws.com native cdm_synthea10 postgres lollipop S://Git/Github/synthea/output/csv S://Vocabulary_20181119
DB=$1
HOSTNAME=$2
NATIVE=$3
CDM=$4
USERNAME=$5
PASSWORD=$6
DATA_DIRECTORY=$7
VOCAB_DIRECTORY=$8
echo "creating synthea tables"
psql -c "CREATE SCHEMA $NATIVE" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
PGOPTIONS="--search_path=$NATIVE" psql -f "ETL/SQL/synthea_ddl.sql" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
echo "creating cdm tables"
psql -c "CREATE SCHEMA $CDM" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
PGOPTIONS="--search_path=$CDM" psql -f "ETL/SQL/drop_cdm_tables.sql" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
PGOPTIONS="--search_path=$CDM" psql -f "ETL/SQL/cdm_v5.3_ddl.sql" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
echo "loading data from synthea csv files to relational database"
for TABLE in allergies careplans conditions encounters immunizations medications observations patients procedures
do
PGOPTIONS="--search_path=$NATIVE" psql -c "\copy $TABLE from '$DATA_DIRECTORY/$TABLE.csv' CSV HEADER" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
done
echo "loading data from vocab csv files to relational database"
for TABLE in CONCEPT CONCEPT_ANCESTOR CONCEPT_RELATIONSHIP CONCEPT_CLASS CONCEPT_SYNONYM DOMAIN DRUG_STRENGTH RELATIONSHIP VOCABULARY
do
PGOPTIONS="--search_path=$CDM" psql -c "\copy $TABLE from '$VOCAB_DIRECTORY/$TABLE.csv' CSV DELIMITER E'\t' QUOTE E'\b' HEADER" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
done
echo "creating vocab maps..."
PGOPTIONS="--search_path=$CDM" psql -f "ETL/SQL/create_source_to_source_vocab_map.sql" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
PGOPTIONS="--search_path=$CDM" psql -f "ETL/SQL/create_source_to_standard_vocab_map.sql" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
echo "creating visit logic tables..."
PGOPTIONS="--search_path=$CDM" psql -f "ETL/SQL/AllVisitTable.sql" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
PGOPTIONS="--search_path=$CDM" psql -f "ETL/SQL/AAVITable.sql" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
PGOPTIONS="--search_path=$CDM" psql -f "ETL/SQL/final_visit_ids.sql" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
echo "Performing ETL"
# echo "Loading location"
# PGOPTIONS="--search_path=$CDM" psql -f "ETL/SQL/insert_location.sql" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
echo "Loading person"
PGOPTIONS="--search_path=$CDM" psql -f "ETL/SQL/insert_person.sql" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
# echo "Loading death"
# PGOPTIONS="--search_path=$CDM" psql -f "ETL/SQL/insert_death.sql" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
echo "Loading observation_period"
PGOPTIONS="--search_path=$CDM" psql -f "ETL/SQL/insert_observation_period.sql" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
echo "Loading visit_occurrence"
PGOPTIONS="--search_path=$CDM" psql -f "ETL/SQL/insert_visit_occurrence.sql" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
echo "Loading observation"
PGOPTIONS="--search_path=$CDM" psql -f "ETL/SQL/insert_observation.sql" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
echo "Loading condition_occurrence"
PGOPTIONS="--search_path=$CDM" psql -f "ETL/SQL/insert_condition_occurrence.sql" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
echo "Loading procedure_occurrence"
PGOPTIONS="--search_path=$CDM" psql -f "ETL/SQL/insert_procedure_occurrence.sql" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
echo "Loading measurement"
PGOPTIONS="--search_path=$CDM" psql -f "ETL/SQL/insert_measurement.sql" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
echo "Loading drug exposure"
PGOPTIONS="--search_path=$CDM" psql -f "ETL/SQL/insert_drug_exposure.sql" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
echo "Loading condition_era"
PGOPTIONS="--search_path=$CDM" psql -f "ETL/SQL/insert_condition_era.sql" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
echo "Loading drug_era"
PGOPTIONS="--search_path=$CDM" psql -f "ETL/SQL/insert_drug_era.sql" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
# PGOPTIONS="--search_path=$CDM" psql -f "ETL/SQL/cdm_v5.3_indexes_ddl.sql" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
# PGOPTIONS="--search_path=$CDM" psql -f "ETL/SQL/cdm_v5.3_constraints_ddl.sql" "postgresql://$USERNAME:$PASSWORD@$HOSTNAME/$DB"
# TODO - Either create a staging schema for native Synthea tables or drop the Synthea tables after ETL complete