-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathe2e_london.py
307 lines (251 loc) · 8.75 KB
/
e2e_london.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
# %% [markdown] noqa: D212, D400, D415
"""
# An End-to-end Example
Am end to end example to run through urban centre detection, population
retrieval, gtfs manipulation and validation, OSM clipping , analysing the
transport network using `r5py` and calculating a performance metric.
## Preamble
Call in script wide imports and the configuration information.
"""
import os
# import datetime
import geopandas as gpd
# import pandas as pd
import gtfs_kit as gk
# %%
import toml
from pyprojroot import here
from shapely.geometry import box
from transport_performance.gtfs.gtfs_utils import bbox_filter_gtfs
from transport_performance.gtfs.validation import GtfsInstance
from transport_performance.osm.osm_utils import filter_osm
from transport_performance.population.rasterpop import RasterPop
# from folium.map import Icon
# from r5py import (
# TransportNetwork,
# TravelTimeMatrixComputer,
# TransportMode,
# )
from transport_performance.urban_centres.raster_uc import UrbanCentre
from transport_performance.utils.raster import (
merge_raster_files,
) # sum_resample_file,
# import folium
# %%
# config filepath, and loading
CONFIG_FILE = here("notebooks/e2e/config/e2e_london.toml")
config = toml.load(CONFIG_FILE)
# split out into separate configs to minimise line length
uc_config = config["urban_centre"]
pop_config = config["population"]
gtfs_config = config["gtfs"]
osm_config = config["osm"]
# analyse_net_config = config["analyse_network"]
# metrics_config = config["metrics"]
# %% [markdown] noqa: D212, D400, D415
"""
## Urban Centre Detection
Merge 1Km gridded data together. Then detect the urban centre.
### Data Sources
Using [GHS-POP 1Km gridded](https://ghsl.jrc.ec.europa.eu/download.php?ds=pop)
population estimaes, in a **Mollweide CRS**. The following tiles are expected
in `config["urban_centre"]["input_dir"]`(which include the British isles and
France). Must use 2020 Epoch or update the `subset_regex` pattern to match your
files in the cell below:
- R3-C18
- R3-C19
- R4-C18
- R4-C19
"""
# %%
# merge the urban centre input raster files to form one larger area
# use subset regex to ensure expected year, CRS and resolution are used
if uc_config["override"]:
merge_raster_files(
here(uc_config["input_dir"]),
os.path.dirname(here(uc_config["merged_path"])),
os.path.basename(uc_config["merged_path"]),
subset_regex="GHS_POP_E2020_GLOBE_R2023A_54009_1000_",
)
# %%
# put bbox into a geopandas dataframe for `get_urban_centre` input
bbox_gdf = gpd.GeoDataFrame(
geometry=[box(*uc_config["bbox"])], crs="ESRI:54009"
)
# detect urban centre
uc = UrbanCentre(here(uc_config["merged_path"]))
uc_gdf = uc.get_urban_centre(
bbox_gdf,
centre=tuple(uc_config["centre"]),
buffer_size=uc_config["buffer_size"],
centre_crs="epsg:4326",
)
# set the index to the label column to make filtering easier
uc_gdf.set_index("label", inplace=True)
# %%
# visualise outputs
m = uc_gdf[::-1].reset_index().explore("label", cmap="viridis")
# write to file
if uc_config["write_outputs"]:
if not os.path.exists(os.path.dirname(here(uc_config["output_map_path"]))):
os.makedirs(os.path.dirname(here(uc_config["output_map_path"])))
m.save(here(uc_config["output_map_path"]))
m
# %% [markdown] noqa: D212, D400, D415
"""
## Population
Merge 100m gridded data sources together, then resample onto a 200m grid by
summing the consituent cells. Then retrive population data within the
buffered urban centre boundary detected in the step above.
### Data Sources
Using [GHS-POP 100m gridded](https://ghsl.jrc.ec.europa.eu/download.php?ds=pop)
population estimates, in a **Mollweide CRS**. The following tiles are expected
in `config["population"]["input_dir"]`(which include the British isles and
France). Must use 2020 Epoch or update the `subset_regex` pattern to match your
files in the cell below:
- R3-C18
- R3-C19
- R4-C18
- R4-C19
"""
# %%
# merge the population input raster files to form one larger area
# use regex to ensure 2020 data, CRS, and resolution are as expected in name
# if pop_config["override"]:
# merge_raster_files(
# here(pop_config["input_dir"]),
# os.path.dirname(here(pop_config["merged_path"])),
# os.path.basename(pop_config["merged_path"]),
# subset_regex="GHS_POP_E2020_GLOBE_R2023A_54009_100_",
# )
# %%
# resample 100m grids to 200m grids (default resample factor used)
# Can take a couple of minutes...
# if pop_config["override"]:
# sum_resample_file(
# here(pop_config["merged_path"]),
# here(pop_config["merged_resampled_path"]),
# )
# %%
# extract geometries from urban centre detection
aoi_bounds = uc_gdf.loc["buffer"].geometry
urban_centre_bounds = uc_gdf.loc["vectorized_uc"].geometry
# get population data
rp = RasterPop(here(pop_config["merged_resampled_path"]))
pop_gdf, centroid_gdf = rp.get_pop(
aoi_bounds,
threshold=pop_config["threshold"],
urban_centre_bounds=urban_centre_bounds,
)
centroid_gdf.to_file(
here("data/processed/population/london_centroid_gdf.json"),
drop_id=True,
driver="GeoJSON",
)
# %%
# write interactive visual to file
if pop_config["write_outputs"]:
rp.plot(which="folium", save=here(pop_config["output_map_path"]))
# view static visual in interactive window
rp.plot(which="cartopy")
# %% [markdown] noqa: D212, D400, D415
"""
## GTFS
Clip the GTFS data to the buffered urban centre area, then clean and validate
then GTFS data.
### Data Sources
In this example a whole of Wales GTFS data source is used, provided by the
[Department for Transport's BODS ](https://data.bus-data.dft.gov.uk/). The
`itm_wales_gtfs.zip` file is expected to be within the directory set by
`config['gtfs']['input_path']`.
"""
# %%
# clip the GTFS to the extent of the urban centre buffered area
if gtfs_config["override"]:
# get the extent of the urban centre bbox (includes buffer)
gtfs_bbox = list(uc_gdf.loc["bbox"].geometry.bounds)
# clip to region of interest, setting crs to match the bbox
bbox_filter_gtfs(
in_pth=here(gtfs_config["input_path"]),
out_pth=here(gtfs_config["filtered_path"]),
bbox=gtfs_bbox,
units=gtfs_config["units"],
crs=uc_gdf.crs.to_string(),
)
# %%
# read in filtered gtfs feed
gtfs = GtfsInstance(
gtfs_pth=here(gtfs_config["filtered_path"]), units=gtfs_config["units"]
)
# show valid dates
available_dates = gtfs.feed.get_dates()
s = available_dates[0]
f = available_dates[-1]
print(f"{len(available_dates)} dates available between {s} & {f}.")
# %%
# check validity, printing warnings and errors
gtfs.is_valid()
print("Errors:")
gtfs.print_alerts()
print("Warnings:")
gtfs.print_alerts(alert_type="warning")
# %%
# clean the gtfs, then re-check the validity and reprint errors/warnings
# note: this will remove 'Repeated pair (route_short_name, route_long_name)'
gtfs.clean_feed()
gtfs.is_valid()
print("Errors:")
gtfs.print_alerts()
print("Warnings:")
gtfs.print_alerts(alert_type="warning")
# %%
# get the route modes - frequency and proportion of modalities
gtfs.get_route_modes()
# %%
# summarise the trips by day of the week
gtfs.summarise_trips()
# %%
# summarise the routes by day of the week
gtfs.summarise_routes()
# %%
# write visuals (stops and hull) and cleaned feed to file
if gtfs_config["write_outputs"]:
gtfs.viz_stops(here(gtfs_config["stops_map_path"]), create_out_parent=True)
gtfs.viz_stops(
here(gtfs_config["hull_map_path"]),
geoms="hull",
create_out_parent=True,
)
# filter single day in gtfs - to be implemented in module
gtfs.feed = gk.miscellany.restrict_to_dates(gtfs.feed, ["20231024"])
gtfs.feed.write(here(gtfs_config["cleaned_path"]))
# %%
# display a map of only the stops used in `stop_times.txt`, excluding parents
unique_stops = gtfs.feed.stop_times.stop_id.unique()
m = gk.stops.map_stops(gtfs.feed, stop_ids=unique_stops)
if gtfs_config["write_outputs"]:
m.save(here(gtfs_config["used_stops_map_path"]))
m
# %% [markdown] noqa: D212, D400, D415
"""
## OpenStreetMap
Clip the OSM data to the buffered urban centre area.
### Data Sources
In this example a whole of Wales OSM data source is used, provided by the
[Geofabrik](https://download.geofabrik.de/europe/great-britain.html). The
`wales-latest.osm.pbf` file is expected to be within the directory set by
`config['osm']['input_path']`.
"""
# %%
# clip osm file to bbox of urban centre + buffer detected above
if osm_config["override"]:
# get the extent of the urban centre bbox (includes buffer)
# need to convert to EPSG:4326 here since this is required by osmosis
osm_bbox = list(uc_gdf.to_crs("EPSG:4326").loc["bbox"].geometry.bounds)
filter_osm(
pbf_pth=here(osm_config["input_path"]),
out_pth=here(osm_config["filtered_path"]),
bbox=osm_bbox,
tag_filter=osm_config["tag_filter"],
)
# %%