From 950aaa84493a882b25f1cbfdd6f12155c8358f4c Mon Sep 17 00:00:00 2001
From: tiffanychu90 <tiffany.chu@dot.ca.gov>
Date: Thu, 21 Mar 2024 19:53:36 +0000
Subject: [PATCH] rerun mar for hqta, too many muni brt

---
 .../A1_rail_ferry_brt_stops.py                | 22 +++++++++++---
 .../B1_create_hqta_segments.py                |  2 +-
 high_quality_transit_areas/Makefile           |  7 ++++-
 .../logs/hqta_processing.log                  |  8 +++++
 high_quality_transit_areas/muni_brt.ipynb     |  3 +-
 high_quality_transit_areas/operator_input.py  | 30 +++++++++++++++++++
 6 files changed, 64 insertions(+), 8 deletions(-)
 create mode 100644 high_quality_transit_areas/operator_input.py

diff --git a/high_quality_transit_areas/A1_rail_ferry_brt_stops.py b/high_quality_transit_areas/A1_rail_ferry_brt_stops.py
index 76b1920f3..9705610f9 100644
--- a/high_quality_transit_areas/A1_rail_ferry_brt_stops.py
+++ b/high_quality_transit_areas/A1_rail_ferry_brt_stops.py
@@ -26,13 +26,18 @@
 ac_transit_route_id = ["1T"]
 metro_route_desc = ["METRO SILVER LINE", "METRO ORANGE LINE", 
                     "METRO J LINE", "METRO G LINE"]
+
 muni_route_id = [
     '1', '1X', '2',
     '8', '8AX', '8BX', '9', '9R',
     '12', '14', '14R', '15', '19', '22', '27', '28', 
     '30', '33', '36', '38', '38R', '45', '49', '55',
     '90', '91', '714','TBUS',              
-]  
+]
+
+muni_brt_include = pd.read_parquet(
+    f"{GCS_FILE_PATH}operator_input/muni_brt_stops.parquet"
+).stop_id.tolist()
 
 '''
 Eric double checked for bus shelters
@@ -215,14 +220,23 @@ def additional_brt_filtering_out_stops(
     df: geopandas.GeoDataFrame
         Input BRT stops data (combined across operators)
     """
+    metro_name = "LA Metro Bus Schedule"
+    muni_name = "Bay Area 511 Muni Schedule"
+    
+    muni = df[df.name == muni_name].query(
+        'stop_id in @muni_brt_include'
+    )
+    
     # For Metro, unable to filter out non-station stops using GTFS, manual list
-    metro = df[df.name == "LA Metro Bus Schedule"].query(
+    metro = df[df.name == metro_name].query(
         'stop_id not in @metro_j_exclude')
     
-    other_operators = df[df.name != "LA Metro Bus Schedule"]
+    muni_metro = pd.concat([muni, metro], axis=0)
+    
+    other_operators = df[~df.name.isin([metro_name, muni_name])]
 
     brt_df_stops = pd.concat(
-        [metro, other_operators], axis=0
+        [muni_metro, other_operators], axis=0
     ).sort_values(["feed_key", "name"]).reset_index(drop=True)
     
     return brt_df_stops
diff --git a/high_quality_transit_areas/B1_create_hqta_segments.py b/high_quality_transit_areas/B1_create_hqta_segments.py
index 23c5df0cd..b446f4133 100644
--- a/high_quality_transit_areas/B1_create_hqta_segments.py
+++ b/high_quality_transit_areas/B1_create_hqta_segments.py
@@ -112,7 +112,7 @@ def select_shapes_and_segment(
     gdf = gtfs_schedule_wrangling.longest_shape_by_route_direction(
         analysis_date
     ).query(
-        'shape_array_key not in @outside_ca_amtrak_shapes'
+        'shape_array_key not in @outside_amtrak_shapes'
     ).drop(
         columns = ["schedule_gtfs_dataset_key", 
                    "shape_array_key", "route_length"]
diff --git a/high_quality_transit_areas/Makefile b/high_quality_transit_areas/Makefile
index 9a6cfcc51..6130489cc 100644
--- a/high_quality_transit_areas/Makefile
+++ b/high_quality_transit_areas/Makefile
@@ -7,4 +7,9 @@ hqta_data:
 	python C3_create_bus_hqta_types.py
 	python D1_assemble_hqta_points.py
 	python D2_assemble_hqta_polygons.py 
-    
\ No newline at end of file
+
+# Only need this is operator input changes
+# For now, Muni sent over a date-versioned list of stops
+# they want included as BRT
+add_operator_input:
+	python operator_input.py
diff --git a/high_quality_transit_areas/logs/hqta_processing.log b/high_quality_transit_areas/logs/hqta_processing.log
index daa56734f..fcbefd361 100644
--- a/high_quality_transit_areas/logs/hqta_processing.log
+++ b/high_quality_transit_areas/logs/hqta_processing.log
@@ -79,3 +79,11 @@
 2024-03-14 11:56:55.334 | INFO     | __main__:<module>:163 - C3_create_bus_hqta_types 2024-03-13 execution time: 0:00:27.390021
 2024-03-14 12:12:21.763 | INFO     | __main__:<module>:295 - D1_assemble_hqta_points 2024-03-13 execution time: 0:00:26.480160
 2024-03-14 12:13:12.687 | INFO     | __main__:<module>:167 - D2_assemble_hqta_polygons 2024-03-13 execution time: 0:00:29.033860
+2024-03-21 11:54:40.930 | INFO     | __main__:<module>:354 - A1_rail_ferry_brt_stops 2024-03-13 execution time: 0:00:51.987419
+2024-03-21 12:01:28.365 | INFO     | __main__:<module>:249 - B1_create_hqta_segments execution time: 0:03:02.428114
+2024-03-21 12:02:23.099 | INFO     | __main__:<module>:256 - B2_sjoin_stops_to_segments 2024-03-13 execution time: 0:00:35.845848
+2024-03-21 12:02:46.911 | INFO     | __main__:<module>:142 - C1_prep_pairwise_intersections 2024-03-13 execution time: 0:00:05.864652
+2024-03-21 12:03:24.770 | INFO     | __main__:<module>:125 - C2_find_intersections 2024-03-13 execution time: 0:00:21.158652
+2024-03-21 12:04:01.449 | INFO     | __main__:<module>:163 - C3_create_bus_hqta_types 2024-03-13 execution time: 0:00:19.553787
+2024-03-21 12:04:42.807 | INFO     | __main__:<module>:295 - D1_assemble_hqta_points 2024-03-13 execution time: 0:00:22.988739
+2024-03-21 12:05:20.102 | INFO     | __main__:<module>:167 - D2_assemble_hqta_polygons 2024-03-13 execution time: 0:00:19.166756
diff --git a/high_quality_transit_areas/muni_brt.ipynb b/high_quality_transit_areas/muni_brt.ipynb
index 4f5b8e103..d1918cc60 100644
--- a/high_quality_transit_areas/muni_brt.ipynb
+++ b/high_quality_transit_areas/muni_brt.ipynb
@@ -53,8 +53,7 @@
     "import pandas as pd\n",
     "\n",
     "from segment_speed_utils import helpers\n",
-    "from update_vars import analysis_date\n",
-    "from utilities import GCS_FILE_PATH\n",
+    "from update_vars import analysis_date, GCS_FILE_PATH\n",
     "\n",
     "analysis_date"
    ]
diff --git a/high_quality_transit_areas/operator_input.py b/high_quality_transit_areas/operator_input.py
new file mode 100644
index 000000000..e1a0b3fba
--- /dev/null
+++ b/high_quality_transit_areas/operator_input.py
@@ -0,0 +1,30 @@
+"""
+Script to save any custom operator request
+to include in our workflow.
+For now, this is Muni's list of bus stops to include
+as BRT.
+Initially, we tagged all of the routes on which
+these stops took place, but that flagged over 1_000 stops
+as major_stop_brt.
+Here, it's just 136 stops.
+"""
+import pandas as pd
+
+from update_vars import GCS_FILE_PATH
+
+if __name__ == "__main__":
+    FILE = "SFMTA_muni_high_quality_transit_stops_2024-02-01.csv"
+
+    muni_stops = (
+        pd.read_csv(f"{GCS_FILE_PATH}operator_input/{FILE}", 
+                    dtype={"bs_id": "str"})
+        .drop(columns=["latitude", "longitude"])
+        .rename(columns={"bs_id": "stop_id"})
+    )
+    
+    muni_stops.to_parquet(
+        f"{GCS_FILE_PATH}operator_input/"
+        f"muni_brt_stops.parquet"
+    )
+    
+    print(f"saved muni stops")
\ No newline at end of file