Skip to content

Commit

Permalink
Merge pull request #134 from transitmatters/bus_staging
Browse files Browse the repository at this point in the history
INTRODUCING BUS MODE!
  • Loading branch information
austinjpaul committed Jan 25, 2022
2 parents 2ded2aa + 5fe30f3 commit 0a12281
Show file tree
Hide file tree
Showing 58 changed files with 13,481 additions and 7,513 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,13 @@ build
.env.production.local
.eslintcache

**/data

npm-debug.log*
yarn-debug.log*
yarn-error.log*

.idea
.vscode
*~
*.pyc
16,202 changes: 9,474 additions & 6,728 deletions package-lock.json

Large diffs are not rendered by default.

13 changes: 6 additions & 7 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,17 @@
"chart.js": "^2.9.4",
"classnames": "^2.2.6",
"concurrently": "^5.2.0",
"flatpickr": "4.5.7",
"flatpickr": "^4.6.9",
"lodash.merge": "^4.6.2",
"react": "^16.13.1",
"react": "^17.0.0",
"react-chartjs-2": "^2.11.1",
"react-dom": "^16.13.1",
"react-ga": "^3.1.2",
"react-dom": "^17.0.0",
"react-router-dom": "^5.1.2",
"react-scripts": "^3.4.1",
"react-select": "^3.1.0"
"react-scripts": "^4.0.3",
"react-select": "^4.0.0"
},
"devDependencies": {
"eslint": "^6.8.0"
"eslint": "^7.11.0"
},
"homepage": ".",
"scripts": {
Expand Down
7 changes: 6 additions & 1 deletion public/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,15 @@
<meta name="twitter:card" content="summary" />
<meta name="twitter:site" content="@transitmatters" />
<meta name="twitter:title" content="TransitMatters Data Dashboard" />
<meta name="twitter:description" content="Explore and visualize MBTA rapid transit performance data with the new TransitMatters Data Dashboard." />
<meta name="twitter:description"
content="Explore MBTA subway and bus performance data with the TransitMatters Data Dashboard." />
<meta name="twitter:image" content="https://dashboard.transitmatters.org/twitter-card.jpg" />
<link rel="manifest" href="%PUBLIC_URL%/manifest.json" />
<title>TransitMatters Data Dashboard</title>
<script>
window.goatcounter = { no_onload: true };
</script>
<script data-goatcounter="https://transitmatters-dd.goatcounter.com/count" src="//gc.zgo.at/count.js"></script>
</head>

<body>
Expand Down
2 changes: 1 addition & 1 deletion server/Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ chalice = "*"
pytz = "*"
boto3 = "*"
flake8 = "*"
pandas = "*"
pandas = ">=1.3"
numpy = "*"
importlib-resources = "*"

Expand Down
478 changes: 281 additions & 197 deletions server/Pipfile.lock

Large diffs are not rendered by default.

48 changes: 25 additions & 23 deletions server/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,6 @@ def parse_user_date(user_date):
return date(year=year, month=month, day=day)


def parse_query_stop_args(query_params, expected_stop_param_names):
stops_dict = {}
for stop_param in expected_stop_param_names:
query_value = query_params.get(stop_param)
if query_value:
stops_dict[stop_param] = query_value
return stops_dict


def mutlidict_to_dict(mutlidict):
res_dict = {}
for key in mutlidict.keys():
Expand Down Expand Up @@ -75,23 +66,23 @@ def healthcheck():
@app.route("/headways/{user_date}", cors=cors_config)
def headways_route(user_date):
date = parse_user_date(user_date)
stop = app.current_request.query_params["stop"]
return data_funcs.headways(date, [stop])
stops = app.current_request.query_params.getlist("stop")
return data_funcs.headways(date, stops)


@app.route("/dwells/{user_date}", cors=cors_config)
def dwells_route(user_date):
date = parse_user_date(user_date)
stop = app.current_request.query_params["stop"]
return data_funcs.dwells(date, [stop])
stops = app.current_request.query_params.getlist("stop")
return data_funcs.dwells(date, stops)


@app.route("/traveltimes/{user_date}", cors=cors_config)
def traveltime_route(user_date):
date = parse_user_date(user_date)
from_stop = app.current_request.query_params["from_stop"]
to_stop = app.current_request.query_params["to_stop"]
return data_funcs.travel_times(date, [from_stop], [to_stop])
from_stops = app.current_request.query_params.getlist("from_stop")
to_stops = app.current_request.query_params.getlist("to_stop")
return data_funcs.travel_times(date, from_stops, to_stops)


@app.route("/alerts/{user_date}", cors=cors_config)
Expand All @@ -104,30 +95,41 @@ def alerts_route(user_date):
def traveltime_aggregate_route():
sdate = parse_user_date(app.current_request.query_params["start_date"])
edate = parse_user_date(app.current_request.query_params["end_date"])
from_stop = app.current_request.query_params["from_stop"]
to_stop = app.current_request.query_params["to_stop"]
from_stops = app.current_request.query_params.getlist("from_stop")
to_stops = app.current_request.query_params.getlist("to_stop")

response = aggregation.travel_times_over_time(sdate, edate, from_stops, to_stops)
return json.dumps(response, indent=4, sort_keys=True, default=str)


@app.route("/aggregate/traveltimes2", cors=cors_config)
def traveltime_aggregate_route_2():
sdate = parse_user_date(app.current_request.query_params["start_date"])
edate = parse_user_date(app.current_request.query_params["end_date"])
from_stop = app.current_request.query_params.getlist("from_stop")
to_stop = app.current_request.query_params.getlist("to_stop")

response = aggregation.travel_times_over_time(sdate, edate, from_stop, to_stop)
response = aggregation.travel_times_all(sdate, edate, from_stop, to_stop)
return json.dumps(response, indent=4, sort_keys=True, default=str)


@app.route("/aggregate/headways", cors=cors_config)
def headways_aggregate_route():
sdate = parse_user_date(app.current_request.query_params["start_date"])
edate = parse_user_date(app.current_request.query_params["end_date"])
stop = app.current_request.query_params["stop"]
stops = app.current_request.query_params.getlist("stop")

response = aggregation.headways_over_time(sdate, edate, stop)
response = aggregation.headways_over_time(sdate, edate, stops)
return json.dumps(response, indent=4, sort_keys=True, default=str)


@app.route("/aggregate/dwells", cors=cors_config)
def dwells_aggregate_route():
sdate = parse_user_date(app.current_request.query_params["start_date"])
edate = parse_user_date(app.current_request.query_params["end_date"])
stop = app.current_request.query_params["stop"]
stops = app.current_request.query_params.getlist("stop")

response = aggregation.dwells_over_time(sdate, edate, stop)
response = aggregation.dwells_over_time(sdate, edate, stops)
return json.dumps(response, indent=4, sort_keys=True, default=str)


Expand Down
144 changes: 144 additions & 0 deletions server/bus/bus2train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import argparse
import pathlib
import pandas as pd
from datetime import datetime


def load_data(input_csv, routes):
"""
Loads in the below format and makes some adjustments for processing.
- Filter only points with actual trip data
- Trim leading 0s from route_id
- Select only route_ids in `routes`
- Set scheduled/actual times to be on service_date, not 1900-01-01
- Map direction_id (Outbound -> 0, Inbound -> 1)
"""
"""
"service_date", "route_id", "direction", "half_trip_id", "stop_id", "time_point_id", "time_point_order", "point_type", "standard_type", "scheduled", "actual", "scheduled_headway", "headway"
2020-01-15, "01", "Inbound", 46374001, 67, "maput", 2, "Midpoint", "Schedule", 1900-01-01 05:08:00, 1900-01-01 05:09:07, -5, NA,NA
2020-01-15, "01", "Inbound", 46374001, 110, "hhgat", 1, "Startpoint", "Schedule", 1900-01-01 05:05:00, 1900-01-01 05:04:34, 26, NA,NA
2020-01-15, "01", "Inbound", 46374001, 72, "cntsq", 3, "Midpoint", "Schedule", 1900-01-01 05:11:00, 1900-01-01 05:12:01, -22, NA,NA
2020-01-15, "01", "Inbound", 46374001, 75, "mit", 4, "Midpoint", "Schedule", 1900-01-01 05:14:00, 1900-01-01 05:14:58, -25, NA,NA
2020-01-15, "01", "Inbound", 46374001, 79, "hynes", 5, "Midpoint", "Schedule", 1900-01-01 05:18:00, 1900-01-01 05:18:45, 32, NA,NA
2020-01-15, "01", "Inbound", 46374001, 187, "masta", 6, "Midpoint", "Schedule", 1900-01-01 05:20:00, 1900-01-01 05:21:04, -33, NA,NA
2020-01-15, "01", "Inbound", 46374045, 110, "hhgat", 1, "Startpoint", "Headway", 1900-01-01 05:20:00, 1900-01-01 05:20:45, NA, 900,971
"""

# thinking about doing this in pandas to have all the info at once
df = pd.read_csv(input_csv)
df.rename(columns={
# This set of transformations covers prior-year bus data.
'ServiceDate': 'service_date',
'Route': 'route_id',
'Direction': 'direction_id',
'HalfTripId': 'half_trip_id',
'Stop': 'stop_id',
'stop_name': 'time_point_id',
'stop_sequence': 'time_point_order',
'Timepoint': 'time_point_id',
'TimepointOrder': 'time_point_order',
'PointType': 'point_type',
'StandardType': 'standard_type',
'Scheduled': 'scheduled',
'Actual': 'actual',
'ScheduledHeadway': 'scheduled_headway',
'Headway': 'headway',
'direction': 'direction_id'
}, inplace=True)

# We need to keep both "Headway" AND "Schedule": both can have timepoint data.
df = df.loc[df.actual.notnull()]

df.route_id = df.route_id.str.lstrip("0")
if routes:
df = df.loc[df.route_id.isin(routes)]

# Convert dates
df.scheduled = pd.to_datetime(df.scheduled)
df.service_date = pd.to_datetime(df.service_date)
df.actual = pd.to_datetime(df.actual)

OFFSET = datetime(1900, 1, 1, 0, 0, 0)
df.scheduled = df.service_date + (df.scheduled - OFFSET)
df.actual = df.service_date + (df.actual - OFFSET)
df.service_date = df.service_date.dt.date

df.direction_id = df.direction_id.map({"Outbound": 0, "Inbound": 1})

return df


def process_events(df):
"""
Take the tidied input data and rearrange the columns to match rapidtransit format.
- Rename columns (trip_id, stop_sequence, event_time)
- Remove extra columns
- Add empty vehicle columns
- Calculate event_type column with ARR and DEP entries
"""
CSV_HEADER = ["service_date", "route_id", "trip_id", "direction_id", "stop_id",
"stop_sequence", "vehicle_id", "vehicle_label", "event_type", "event_time"]

df = df.rename(columns={"half_trip_id": "trip_id",
"time_point_order": "stop_sequence",
"actual": "event_time"})
df.drop(columns=["time_point_id", "standard_type", "scheduled", "scheduled_headway", "headway"])
df["vehicle_id"] = ""
df["vehicle_label"] = ""

df["event_type"] = df.point_type.map({"Startpoint": ["DEP"],
"Midpoint": ["ARR", "DEP"],
"Endpoint": ["ARR"]})
df = df.explode("event_type")
df = df[CSV_HEADER] # reorder

return df


def to_disk(df, outdir, nozip=False):
"""
For each service_date/stop_id/direction/route group, we write the events to disk.
"""
grouped = df.groupby(["service_date", "stop_id", "direction_id", "route_id"])

for name, events in grouped:
service_date, stop_id, direction_id, route_id = name

fname = pathlib.Path(outdir,
"Events",
"daily-bus-data",
f"{route_id}-{direction_id}-{stop_id}",
f"Year={service_date.year}",
f"Month={service_date.month}",
f"Day={service_date.day}",
"events.csv.gz")
fname.parent.mkdir(parents=True, exist_ok=True)
# set mtime to 0 in gzip header for determinism (so we can re-gen old routes, and rsync to s3 will ignore)
events.to_csv(fname, index=False, compression={"method": "gzip", "mtime": 0} if not nozip else None)


def main():
parser = argparse.ArgumentParser()

parser.add_argument("input", metavar="INPUT_CSV")
parser.add_argument("output", metavar="OUTPUT_DIR")
parser.add_argument("--routes", "-r", nargs="*", type=str,
help="One note here: we should always be additive with our route set \
in case 2 lines share the same stop id: we need both in the result file.")
parser.add_argument("--nozip", "-nz", action="store_true", help="debug feature to skip gzipping")

args = parser.parse_args()
input_csv = args.input
output_dir = args.output
routes = args.routes
no_zip = args.nozip

pathlib.Path(output_dir).mkdir(exist_ok=True)

data = load_data(input_csv, routes)
events = process_events(data)
to_disk(events, output_dir, nozip=no_zip)


if __name__ == "__main__":
main()
10 changes: 10 additions & 0 deletions server/bus/check_latest_manifests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/sh

newfile=$1

for i in 1 15 22 23 28 32 39 57 66 71 73 77 111; do
mkdir -p data/output/manifests/
pipenv run python manifest.py $newfile data/output/manifests/$i.json --checkpoints data/input/MBTA_GTFS/checkpoints.txt -r $i
echo "Comparing old and new manifests for route $i"
pipenv run python compare_manifest.py ../../src/bus_constants/$i.json data/output/manifests/$i.json
done
55 changes: 55 additions & 0 deletions server/bus/compare_manifest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import json
import sys

station_stops = {}


def runone(path, first=False):
unchanged = True
current = json.load(open(path))
# Print any removed stops first
if not first:
my_stations = list(current.values())[0]['stations']
stat_map = dict(map(lambda x: (x['station'], x), my_stations))
for s in station_stops:
if s not in stat_map:
print(" - Station %s removed in file %s. (Stops: %s)" % (s, path, station_stops[s]))
continue
for d in station_stops[s]:
for stop in station_stops[s][d]:
if stop not in stat_map[s]['stops'][d]:
print(" - Stop %s removed from %s in file %s" % (stop, s, path))

for i in list(current.values())[0]['stations']:
s = i['station']

if s not in station_stops:
station_stops[s] = {}
if not first:
print(" + Found new station %s" % s)
unchanged = False
for direction in i['stops']:
if direction not in station_stops[s]:
station_stops[s][direction] = []
for stop in i['stops'][direction]:
if stop not in station_stops[s][direction]:
station_stops[s][direction].append(stop)
if not first:
print(" + Found additional stop %s at station %s in %s" % (stop, s, path))
unchanged = False
return unchanged


def run(paths):
unchanged = True
runone(paths[0], first=True)
for path in reversed(paths[1:]):
unchanged = runone(path) and unchanged
if unchanged:
print("No new stations/stops on route.")
else:
print("Changed?")


if __name__ == "__main__":
run(sys.argv[1:])
13 changes: 13 additions & 0 deletions server/bus/gen_bus_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash -x

routes="$@"
if [ -z "$routes" ]; then
routes="1"
fi

for y in 2018 2019 2020 2021; do
for f in $(find data/input/$y/ -name '*.csv'); do
echo "Generating stop data from $f"
pipenv run python bus2train.py $f data/output -r $routes
done
done
10 changes: 10 additions & 0 deletions server/bus/gen_manifests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

for route in 1 111 15 22 23 28 32 39 57 66 71 73 77 114 116 117; do
mkdir -p data/output/manifests/$route

for f in $(find data/input/ -name *.csv); do
month=$(echo $f | cut -d/ -f4 | cut -d. -f1)
pipenv run python manifest.py $f "data/output/manifests/$route/$route_$month.json" --checkpoints "data/input/MBTA_GTFS/checkpoints.txt" -r $route
done
done
Loading

0 comments on commit 0a12281

Please sign in to comment.