Skip to content

Commit

Permalink
Merge pull request #1262 from cal-itp/oct-open-data
Browse files Browse the repository at this point in the history
Oct open data part 1
  • Loading branch information
tiffanychu90 authored Oct 18, 2024
2 parents bcad991 + 7db28c2 commit e9c5f5a
Show file tree
Hide file tree
Showing 24 changed files with 669 additions and 18 deletions.
10 changes: 9 additions & 1 deletion _shared_utils/shared_utils/rt_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,13 @@
"jul2024": "2024-07-17",
"aug2024": "2024-08-14",
"sep2024": "2024-09-18",
"oct2024a": "2024-10-14",
"oct2024b": "2024-10-15",
"oct2024": "2024-10-16",
"oct2024c": "2024-10-17",
"oct2024d": "2024-10-18",
"oct2024e": "2024-10-19",
"oct2024f": "2024-10-20",
}

y2023_dates = [
Expand All @@ -73,7 +80,7 @@
y2024_dates = [v for k, v in DATES.items() if k.endswith("2024")]


valid_weeks = ["apr2023", "oct2023", "apr2024"]
valid_weeks = ["apr2023", "oct2023", "apr2024", "oct2024"]


def get_week(month: Literal[[*valid_weeks]], exclude_wed: bool) -> list:
Expand All @@ -86,6 +93,7 @@ def get_week(month: Literal[[*valid_weeks]], exclude_wed: bool) -> list:
apr2023_week = get_week(month="apr2023", exclude_wed=False)
oct2023_week = get_week(month="oct2023", exclude_wed=False)
apr2024_week = get_week(month="apr2024", exclude_wed=False)
oct2024_week = get_week(month="oct2024", exclude_wed=False)

MONTH_DICT = {
1: "January",
Expand Down
32 changes: 32 additions & 0 deletions gtfs_funnel/logs/download_data.log
Original file line number Diff line number Diff line change
Expand Up @@ -533,3 +533,35 @@
2024-09-19 08:17:15.855 | INFO | __main__:download_one_day:33 - *********** Download st data ***********
2024-09-19 08:19:06.258 | INFO | __main__:download_one_day:56 - execution time: 0:01:52.036660
2024-09-19 09:28:35.882 | INFO | __main__:download_one_year:35 - execution time: 0:00:45.388883
2024-10-17 19:48:08.455 | INFO | __main__:download_one_day:45 - Analysis date: 2024-10-14
2024-10-17 19:48:10.847 | INFO | __main__:download_one_day:52 - # operators to run: 220
2024-10-17 19:48:10.847 | INFO | __main__:download_one_day:56 - *********** Download trips data ***********
2024-10-17 19:48:42.107 | INFO | __main__:download_one_day:86 - execution time: 0:00:33.631682
2024-10-17 19:48:42.297 | INFO | __main__:download_one_day:45 - Analysis date: 2024-10-15
2024-10-17 19:48:44.148 | INFO | __main__:download_one_day:52 - # operators to run: 220
2024-10-17 19:48:44.150 | INFO | __main__:download_one_day:56 - *********** Download trips data ***********
2024-10-17 19:49:14.779 | INFO | __main__:download_one_day:86 - execution time: 0:00:32.481154
2024-10-17 19:49:33.224 | INFO | __main__:download_one_day:22 - Analysis date: 2024-10-14
2024-10-17 19:49:34.939 | INFO | __main__:download_one_day:29 - # operators to run: 220
2024-10-17 19:49:34.940 | INFO | __main__:download_one_day:33 - *********** Download stops data ***********
2024-10-17 19:49:45.003 | INFO | __main__:download_one_day:64 - execution time: 0:00:11.778543
2024-10-17 19:49:45.047 | INFO | __main__:download_one_day:22 - Analysis date: 2024-10-15
2024-10-17 19:49:46.476 | INFO | __main__:download_one_day:29 - # operators to run: 220
2024-10-17 19:49:46.477 | INFO | __main__:download_one_day:33 - *********** Download stops data ***********
2024-10-17 19:49:56.983 | INFO | __main__:download_one_day:64 - execution time: 0:00:11.935309
2024-10-17 19:50:15.683 | INFO | __main__:download_one_day:22 - Analysis date: 2024-10-14
2024-10-17 19:50:17.694 | INFO | __main__:download_one_day:29 - # operators to run: 220
2024-10-17 19:50:17.695 | INFO | __main__:download_one_day:33 - *********** Download routelines data ***********
2024-10-17 19:52:27.566 | INFO | __main__:download_one_day:63 - execution time: 0:02:11.882708
2024-10-17 19:52:27.631 | INFO | __main__:download_one_day:22 - Analysis date: 2024-10-15
2024-10-17 19:52:29.725 | INFO | __main__:download_one_day:29 - # operators to run: 220
2024-10-17 19:52:29.726 | INFO | __main__:download_one_day:33 - *********** Download routelines data ***********
2024-10-17 19:54:42.349 | INFO | __main__:download_one_day:63 - execution time: 0:02:14.717313
2024-10-17 19:54:59.527 | INFO | __main__:download_one_day:21 - Analysis date: 2024-10-14
2024-10-17 19:55:01.219 | INFO | __main__:download_one_day:29 - # operators to run: 183
2024-10-17 19:55:01.219 | INFO | __main__:download_one_day:33 - *********** Download st data ***********
2024-10-17 19:56:58.579 | INFO | __main__:download_one_day:56 - execution time: 0:01:59.050949
2024-10-17 19:56:59.929 | INFO | __main__:download_one_day:21 - Analysis date: 2024-10-15
2024-10-17 19:57:01.448 | INFO | __main__:download_one_day:29 - # operators to run: 189
2024-10-17 19:57:01.449 | INFO | __main__:download_one_day:33 - *********** Download st data ***********
2024-10-17 19:59:04.659 | INFO | __main__:download_one_day:56 - execution time: 0:02:04.728848
22 changes: 22 additions & 0 deletions gtfs_funnel/logs/download_vp_v2.log
Original file line number Diff line number Diff line change
Expand Up @@ -350,3 +350,25 @@
2024-09-19 08:33:43.251 | INFO | __main__:<module>:112 - export concatenated vp: 0:04:05.069147
2024-09-19 08:37:30.865 | INFO | __main__:<module>:134 - remove batched parquets
2024-09-19 08:37:30.865 | INFO | __main__:<module>:137 - execution time: 0:08:10.892310
2024-10-17 19:59:24.445 | INFO | __main__:<module>:148 - Analysis date: 2024-10-14
2024-10-17 20:01:27.918 | INFO | __main__:loop_through_batches_and_download_vp:111 - exported batch 0 to GCS: 0:02:03.438786
2024-10-17 20:02:40.507 | INFO | __main__:loop_through_batches_and_download_vp:111 - exported batch 1 to GCS: 0:01:12.588438
2024-10-17 20:06:47.856 | INFO | __main__:loop_through_batches_and_download_vp:111 - exported batch 2 to GCS: 0:04:07.347856
2024-10-17 20:08:36.666 | INFO | __main__:loop_through_batches_and_download_vp:111 - exported batch 3 to GCS: 0:01:48.808560
2024-10-17 20:08:36.666 | INFO | __main__:<module>:155 - execution time: 0:09:12.186603
2024-10-17 20:08:36.667 | INFO | __main__:<module>:148 - Analysis date: 2024-10-15
2024-10-17 20:10:56.539 | INFO | __main__:loop_through_batches_and_download_vp:111 - exported batch 0 to GCS: 0:02:19.871706
2024-10-17 20:12:13.012 | INFO | __main__:loop_through_batches_and_download_vp:111 - exported batch 1 to GCS: 0:01:16.472618
2024-10-17 20:16:18.595 | INFO | __main__:loop_through_batches_and_download_vp:111 - exported batch 2 to GCS: 0:04:05.582366
2024-10-17 20:18:25.253 | INFO | __main__:loop_through_batches_and_download_vp:111 - exported batch 3 to GCS: 0:02:06.656799
2024-10-17 20:18:25.253 | INFO | __main__:<module>:155 - execution time: 0:09:48.586216
2024-10-17 20:18:43.093 | INFO | __main__:<module>:97 - Analysis date: 2024-10-14
2024-10-17 20:18:50.762 | INFO | __main__:<module>:105 - concat and filter batched data: 0:00:07.668354
2024-10-17 20:22:34.818 | INFO | __main__:<module>:112 - export concatenated vp: 0:03:44.055977
2024-10-17 20:26:21.567 | INFO | __main__:<module>:134 - remove batched parquets
2024-10-17 20:26:21.568 | INFO | __main__:<module>:137 - execution time: 0:07:38.474288
2024-10-17 20:26:23.338 | INFO | __main__:<module>:97 - Analysis date: 2024-10-15
2024-10-17 20:26:29.465 | INFO | __main__:<module>:105 - concat and filter batched data: 0:00:05.953508
2024-10-17 20:30:09.125 | INFO | __main__:<module>:112 - export concatenated vp: 0:03:39.660077
2024-10-17 20:33:58.275 | INFO | __main__:<module>:134 - remove batched parquets
2024-10-17 20:33:58.275 | INFO | __main__:<module>:137 - execution time: 0:07:34.764458
22 changes: 22 additions & 0 deletions gtfs_funnel/logs/vp_preprocessing.log
Original file line number Diff line number Diff line change
Expand Up @@ -211,3 +211,25 @@
2024-09-19 09:03:13.200 | INFO | __main__:<module>:235 - vp with dwell time 2024-09-18: 0:05:59.311280
2024-09-19 09:08:43.742 | INFO | __main__:<module>:120 - 2024-09-18: condense vp for trip 0:05:09.575132
2024-09-19 09:20:16.936 | INFO | __main__:<module>:128 - 2024-09-18: prepare vp to use in nearest neighbor: 0:11:33.194871
2024-10-17 20:49:37.343 | INFO | __main__:<module>:169 - 2024-10-14: pare down vp: 0:02:16.208887
2024-10-17 20:52:10.568 | INFO | __main__:<module>:169 - 2024-10-15: pare down vp: 0:02:33.177584
2024-10-17 20:56:36.196 | INFO | __main__:attach_prior_vp_add_direction:90 - persist vp gddf: 0:04:08.937551
2024-10-17 21:00:39.569 | INFO | __main__:attach_prior_vp_add_direction:122 - np vectorize arrays for direction: 0:04:03.372933
2024-10-17 21:00:46.826 | INFO | __main__:<module>:194 - 2024-10-14: export vp direction: 0:08:19.567745
2024-10-17 21:02:15.636 | INFO | __main__:<module>:200 - 2024-10-14: export usable vp with direction: 0:01:28.809937
2024-10-17 21:02:15.637 | INFO | __main__:<module>:203 - 2024-10-14: vp_direction script execution time: 0:09:48.377682
2024-10-17 21:06:33.984 | INFO | __main__:attach_prior_vp_add_direction:90 - persist vp gddf: 0:04:18.346553
2024-10-17 21:10:43.386 | INFO | __main__:attach_prior_vp_add_direction:122 - np vectorize arrays for direction: 0:04:09.402165
2024-10-17 21:10:50.943 | INFO | __main__:<module>:194 - 2024-10-15: export vp direction: 0:08:35.305648
2024-10-17 21:12:18.037 | INFO | __main__:<module>:200 - 2024-10-15: export usable vp with direction: 0:01:27.093838
2024-10-17 21:12:18.040 | INFO | __main__:<module>:203 - 2024-10-15: vp_direction script execution time: 0:10:02.399486
2024-10-17 21:17:25.614 | INFO | __main__:<module>:213 - compute dwell df: 0:04:31.222087
2024-10-17 21:18:45.449 | INFO | __main__:<module>:235 - merge with original and export: 0:01:19.834575
2024-10-17 21:18:45.451 | INFO | __main__:<module>:236 - vp with dwell time 2024-10-14: 0:05:51.056662
2024-10-17 21:24:09.014 | INFO | __main__:<module>:213 - compute dwell df: 0:05:23.562191
2024-10-17 21:25:22.669 | INFO | __main__:<module>:235 - merge with original and export: 0:01:13.654913
2024-10-17 21:25:22.671 | INFO | __main__:<module>:236 - vp with dwell time 2024-10-15: 0:06:37.217104
2024-10-17 21:31:14.849 | INFO | __main__:<module>:120 - 2024-10-14: condense vp for trip 0:05:34.574524
2024-10-17 21:42:43.893 | INFO | __main__:<module>:128 - 2024-10-14: prepare vp to use in nearest neighbor: 0:11:29.044763
2024-10-17 21:47:57.273 | INFO | __main__:<module>:120 - 2024-10-15: condense vp for trip 0:05:13.379949
2024-10-17 21:59:50.020 | INFO | __main__:<module>:128 - 2024-10-15: prepare vp to use in nearest neighbor: 0:11:52.747009
3 changes: 2 additions & 1 deletion gtfs_funnel/update_vars.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from shared_utils import catalog_utils, rt_dates

oct2024_week = rt_dates.get_week("oct2024", exclude_wed=True)
apr2024_week = rt_dates.get_week("apr2024", exclude_wed=True)
oct2023_week = rt_dates.get_week("oct2023", exclude_wed=True)
apr2023_week = rt_dates.get_week("apr2023", exclude_wed=True)
Expand All @@ -11,7 +12,7 @@
)


analysis_date_list = [rt_dates.DATES["sep2024"]]
analysis_date_list = [rt_dates.DATES["oct2024"]]

GTFS_DATA_DICT = catalog_utils.get_catalog("gtfs_analytics_data")

Expand Down
5 changes: 4 additions & 1 deletion open_data/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@ compile_open_data_portal:
python gcs_to_esri.py
#update metadata.yml (add new datasets here)
python supplement_meta.py # run if any changes are made to yml
#python arcgis_script_pro.py #(in ESRI!)
python update_data_dict.py # check if columns are missing in data_dictionary yml
python update_fields_fgdc.py # populate fields with data dictionary yml values, run if update_data_dict had changes to incorporate
# Download the zipped shapefiles and metadata.yml and move to local ESRI directory
#python arcgis_script_pro.py #(in ESRI!)
python metadata_update_pro.py # go back into ESRI and update xml
# Download the overwritten XML files in xml/run_in_esri/ and move to local ESRI directory.
#python arcgis_script_pro.py #(in ESRI!)
python cleanup.py # run after ESRI work done
Loading

0 comments on commit e9c5f5a

Please sign in to comment.