-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
160 lines (114 loc) · 3.87 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
shell = bash
PYTHON = python
PG_HOST =
PG_PORT =
PG_USER =
PG_DATABASE =
PSQLFLAGS = $(PG_DATABASE)
CONNECTION = dbname=$(PG_DATABASE)
ifdef PG_HOST
CONNECTION += host=$(PG_HOST)
PSQLFLAGS += -h $(PG_HOST)
endif
ifdef PG_PORT
CONNECTION += port=$(PG_PORT)
PSQLFLAGS += -p $(PG_PORT)
endif
ifdef PG_USER
CONNECTION += user=$(PG_USER)
PSQLFLAGS += -U $(PG_USER)
endif
ifdef PG_PASSWORD
CONNECTION += password=$(PG_PASSWORD)
endif
PSQL = psql $(PSQLFLAGS)
DATE = 2001-01-01
YEAR = $(shell echo $(DATE) | sed 's/\(.\{4\}\)-.*/\1/')
MONTH = $(shell echo $(DATE) | sed 's/.\{4\}-\(.\{2\}\)-.*/\1/')
PB2 = src/gtfs_realtime_pb2.py
alerts = http://gtfsrt.prod.obanyc.com/alerts
positions = http://gtfsrt.prod.obanyc.com/vehiclePositions
tripupdates = http://gtfsrt.prod.obanyc.com/tripUpdates
GTFSRDB = $(PYTHON) src/gtfsrdb.py -d "$(CONNECTION)"
GOOGLE_BUCKET ?= $(PG_DATABASE)
PREFIX = .
MODE ?= download
ARCHIVE ?= gcloud
.PHONY: all psql psql-% init install clean-date \
positions alerts tripupdates gcloud
all:
# Scrape GTFS-rt data.
alerts:; $(GTFSRDB) --alerts $(alerts)?key=$(BUSTIME_API_KEY)
positions:; $(GTFSRDB) --vehicle-positions $(positions)?key=$(BUSTIME_API_KEY)
tripupdates:; $(GTFSRDB) --trip-updates $(tripupdates)?key=$(BUSTIME_API_KEY)
ifeq ($(MODE),upload)
# Archive real-time data
gcloud: $(PREFIX)/$(YEAR)/$(MONTH)/$(DATE)-bus-positions.csv.xz
gsutil cp -rna public-read $< gs://$(GOOGLE_BUCKET)/$<
$(PREFIX)/$(YEAR)/$(MONTH)/$(DATE)-bus-positions.csv.xz: | $(PREFIX)/$(YEAR)/$(MONTH)
$(PSQL) -c "COPY (\
SELECT * FROM rt_vehicle_positions WHERE timestamp::date = '$(DATE)'::date \
) TO STDOUT WITH (NULL '\N', FORMAT CSV, HEADER true)" | \
xz -z - > $@
clean-date:
$(PSQL) -c "DELETE FROM rt_vehicle_positions where timestamp::date = '$(DATE)'::date"
rm -f $(PREFIX)/$(YEAR)/$(MONTH)/$(DATE)-bus-positions.csv{.xz,}
else
# Download past data
ifeq ($(ARCHIVE),mytransit)
ARCHIVE_COLS = timestamp,vehicle_id, \
latitude,longitude,bearing,progress, \
trip_start_date,trip_id,block_assigned, \
stop_id,dist_along_route,dist_from_stop
ARCHIVE_URL = http://data.mytransit.nyc.s3.amazonaws.com/bus_time/$(YEAR)/$(YEAR)-$(MONTH)/bus_time_$*.csv.xz
download: psql-$(subst -,,$(DATE))
else
ARCHIVE_COLS = timestamp,trip_id, \
route_id,trip_start_time,trip_start_date, \
vehicle_id,vehicle_label,vehicle_license_plate, \
latitude,longitude,bearing,speed,stop_id, \
stop_status,occupancy_status,congestion_level, \
progress,block_assigned,dist_along_route,dist_from_stop
ARCHIVE_URL = https://storage.googleapis.com/mta-bus-archive/$(YEAR)/$(MONTH)/$*-bus-positions.csv.xz
download: psql-$(DATE)
endif
psql-%: $(PREFIX)/$(YEAR)/$(MONTH)/%-bus-positions.csv
$(PSQL) -c "COPY rt_vehicle_positions ($(ARCHIVE_COLS)) \
FROM STDIN (FORMAT CSV, HEADER true, NULL '\N' ) " < $<
mysql-%: $(PREFIX)/$(YEAR)/$(MONTH)/%-bus-positions.csv
mysql --local-infile -e "LOAD DATA LOCAL INFILE '$<' \
IGNORE INTO TABLE positions \
FIELDS TERMINATED BY ',' \
LINES TERMINATED BY '\r\n' \
IGNORE 1 LINES"
%.csv: %.csv.xz
xz -cd $< > $@
$(PREFIX)/$(YEAR)/$(MONTH)/%-bus-positions.csv.xz: | $(PREFIX)/$(YEAR)/$(MONTH)
curl -L -o $@ $(ARCHIVE_URL)
endif
$(PREFIX)/$(YEAR)/$(MONTH): | $(PREFIX)
mkdir -p $@
YUM_REQUIRES = git \
gcc \
python \
python-devel \
postgresql95.x86_64 \
postgresql95-libs.x86_64 \
postgresql95-server.x86_64 \
postgresql95-contrib.x86_64 \
postgresql95-devel.x86_64 \
openssl-devel \
libffi-devel
init: sql/schema.sql
$(PSQL) -f $<
create:
service postgresql95 initdb
service postgresql95 start
createuser -s $(PG_USER)
-createdb $(PG_DATABASE)
install: requirements.txt
-which yum && sudo yum install -y $(YUM_REQUIRES)
$(PYTHON) -m pip > /dev/null || curl https://bootstrap.pypa.io/get-pip.py | sudo $(PYTHON)
$(PYTHON) -m pip install --upgrade --requirement $<
$(PB2): src/%_realtime_pb2.py: src/%-realtime.proto
protoc $< -I$(<D) --python_out=$(@D)