Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

May open data prep work #1116

Merged
merged 3 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions _shared_utils/shared_utils/gtfs_utils_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,14 +252,14 @@ def schedule_daily_feed_to_gtfs_dataset_name(
# Get GTFS schedule datasets from Airtable
dim_gtfs_datasets = schedule_rt_utils.filter_dim_gtfs_datasets(
keep_cols=["key", "name", "type", "regional_feed_type"], custom_filtering={"type": ["schedule"]}, get_df=False
) >> rename(name="gtfs_dataset_name")
)

# Merge on gtfs_dataset_key to get organization name
fact_feeds = (
tbls.mart_gtfs.fct_daily_schedule_feeds()
>> filter(_.date == selected_date)
>> inner_join(_, dim_gtfs_datasets, on="gtfs_dataset_key")
)
>> inner_join(_, dim_gtfs_datasets, on=["gtfs_dataset_key", "gtfs_dataset_name"])
) >> rename(name="gtfs_dataset_name")

if get_df:
fact_feeds = (
Expand Down
1 change: 1 addition & 0 deletions _shared_utils/shared_utils/rt_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
"apr2024d": "2024-04-19",
"apr2024e": "2024-04-20",
"apr2024f": "2024-04-21",
"may2024": "2024-05-26",
}

y2023_dates = [
Expand Down
27 changes: 20 additions & 7 deletions gtfs_funnel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,20 @@ download_gtfs_data:
python download_vehicle_positions.py
python concatenate_vehicle_positions.py


preprocess:
preprocess_schedule_vp_dependency:
python stop_times_with_direction.py
python crosswalk_gtfs_dataset_key_to_organization.py

preprocess_vp:
python vp_keep_usable.py
python vp_direction.py
python cleanup.py
python vp_condenser.py

preprocess_schedule_only:
make route_typologies_data
python operator_scheduled_stats.py



route_typologies_data:
python route_typologies.py
python schedule_stats_by_route_direction.py
Expand All @@ -35,9 +37,20 @@ monthly_scheduled_data:
python concatenate_monthly_scheduled_service.py


funnel_gtfs_data:
make download_gtfs_data && make preprocess && make timeseries_preprocessing
make monthly_scheduled_data
funnel_gtfs_single_day:
make download_gtfs_data
make preprocess_schedule_vp_dependency
make preprocess_vp
make preprocess_schedule_only
make timeseries_preprocessing


all:
make funnel_gtfs_single_day
# update update_vars.py
cd ../open_data/ && make create_gtfs_schedule_geospatial_open_data -f Makefile
# update update_vars.py
cd ../high_quality_transit_areas/ && make hqta_data -f Makefile
# update segment_speed_utils.project_vars.py
cd rt_segment_speeds/scripts/ && make all_speeds_pipeline -f Makefile && cd ../../
# update update_vars.py
Expand Down
Loading