Piggy backing some updates I made to improve my viz_cache_csvs_to_cli…

…pped_geospatial util script into the repo here.
NOAA-OWP · Jan 25, 2024 · 5318848 · 5318848
1 parent abbc50f
commit 5318848
Showing 1 changed file with 33 additions and 20 deletions.
diff --git a/Source/Visualizations/aws_loosa/utils/viz_cache_csvs_to_clipped_geospatial.py b/Source/Visualizations/aws_loosa/utils/viz_cache_csvs_to_clipped_geospatial.py
@@ -138,30 +138,43 @@ def convert_folder_csvs_to_geospatial(folder_path, output_format='gpkg', clip_to
     # this up, see the Configuring AWS CLI section of the Hydrovis viz Guide at https://docs.google.com/document/d/1UIbAQycG-mWw5XwDPDunkQED5O96YtsbrOA4MMZ9zmA/edit?usp=sharing 
 
     ########## Specify your Args Here #############
-    sso_profile = None # The name of the AWS SSO profile you created, or set to None if you want to pull from the current environment of an EC2 machine (see notes above)
-    bucket_name = 'hydrovis-ti-fim-us-east-1' # Set this based on the hydrovis environment you are pulling from, e.g. 'hydrovis-ti-fim-us-east-1', 'hydrovis-uat-fim-us-east-1', 'hydrovis-prod-fim-us-east-1'
-    start_date = date(2022, 9, 17)
-    end_date = date(2022, 9, 20)
-    reference_times = ["1200"]
-    include_files_with = ["ref", "prvi"] # Anything you want to be included when filtering S3 files e.g ["ana", "mrf"] or ["mrf_"]
-    skip_files_with = [] # Anything you want to be skipped when filtering S3 files e.g. ["ana_streamflow", "rapid_onset_flooding"]
+    sso_profile = "prod" # The name of the AWS SSO profile you created, or set to None if you want to pull from the current environment of an EC2 machine (see notes above)
+    bucket_name = 'hydrovis-prod-fim-us-east-1' # Set this based on the hydrovis environment you are pulling from, e.g. 'hydrovis-ti-fim-us-east-1', 'hydrovis-uat-fim-us-east-1', 'hydrovis-prod-fim-us-east-1'
+    include_files_with = ["ana_inundation"] # Anything you want to be included when filtering S3 files e.g ["ana", "mrf"] or ["mrf_"]
+    skip_files_with = ["counties", "hucs", "building", "_hi.csv", "_prvi", "_public", "_src_skill"] # Anything you want to be skipped when filtering S3 files e.g. ["ana_streamflow", "rapid_onset_flooding"]
     clip_to_states = [] # Provide a list of state abbreviations to clip to set states, e.g. ["AL", "GA", "MS"]
     output_format = "gpkg" # Set to gpkg or shp - Can add any OGR formats, with some tweaks to the file_format logic in the functions above. BEWARE - large FIM files can be too large for shapefiles, and results may be truncated.
-    output_dir = r"C:\Users\arcgis\Desktop\Dev\VPP Data Requests" # Directory where you want output files saved.
+    output_dir = r"C:\Users\arcgis\Desktop\Dev\VPP Data Requests\AEP_2_1" # Directory where you want output files saved.
     overwrite = False # This will automatically skip files that have already been downloaded and/or converted when running the script when set to False (default).
     delete_csv = True # This will delete the csv files after conversion
     ###############################################
-
-    # Loop through days/hours specified
-    for day in daterange(start_date, end_date):
-        ref_date = day.strftime("%Y%m%d")
-        for reference_time in reference_times:
-            folder_name = f"viz_cache/{ref_date}/{reference_time}/"
-            destination_dir = fr"{output_dir}\{ref_date}\{reference_time}"
-            # Download files from S3
-            print(f"Searching Viz Cache for /{ref_date}/{reference_time}/ with files including {include_files_with} and not including {skip_files_with}.")
-            download_files_from_s3(bucket_name, folder_name, destination_dir, sso_profile, include_files_with=include_files_with, skip_files_with=skip_files_with, overwrite=False, output_format=output_format)
-            # Convert to geospatial (clip to states as well, if desired)
-            convert_folder_csvs_to_geospatial(destination_dir, output_format=output_format, clip_to_states=clip_to_states, delete_csv=delete_csv)
+    events = [
+        {"start_date": date(2023, 12, 21), "end_date": date(2023, 12, 21), "reference_times": ["0900", "1000"]},
+        {"start_date": date(2023, 12, 16), "end_date": date(2023, 12, 17), "reference_times": ["1200"]},
+        {"start_date": date(2023, 12, 17), "end_date": date(2023, 12, 17), "reference_times": ["1300", "1400"]},
+        {"start_date": date(2023, 12, 5), "end_date": date(2023, 12, 5), "reference_times": ["1400", "1500"]},
+        {"start_date": date(2023, 12, 2), "end_date": date(2023, 12, 2), "reference_times": ["1300", "1400"]},
+        {"start_date": date(2023, 10, 28), "end_date": date(2023, 10, 28), "reference_times": ["0900", "1000"]},
+        {"start_date": date(2023, 10, 24), "end_date": date(2023, 10, 24), "reference_times": ["1600", "1700"]},
+        {"start_date": date(2023, 10, 4), "end_date": date(2023, 10, 4), "reference_times": ["2100", "2200"]},
+        {"start_date": date(2023, 10, 3), "end_date": date(2023, 10, 3), "reference_times": ["2200", "2300"]}
+    ]
+
+    ###############################################
+    for event in events:
+        start_date = event['start_date']
+        end_date = event['end_date']
+        reference_times = event['reference_times']
+        # Loop through days/hours specified
+        for day in daterange(start_date, end_date):
+            ref_date = day.strftime("%Y%m%d")
+            for reference_time in reference_times:
+                folder_name = f"viz_cache/{ref_date}/{reference_time}/"
+                destination_dir = fr"{output_dir}\{ref_date}\{reference_time}"
+                # Download files from S3
+                print(f"Searching Viz Cache for /{ref_date}/{reference_time}/ with files including {include_files_with} and not including {skip_files_with}.")
+                download_files_from_s3(bucket_name, folder_name, destination_dir, sso_profile, include_files_with=include_files_with, skip_files_with=skip_files_with, overwrite=False, output_format=output_format)
+                # Convert to geospatial (clip to states as well, if desired)
+                convert_folder_csvs_to_geospatial(destination_dir, output_format=output_format, clip_to_states=clip_to_states, delete_csv=delete_csv)
 
     print(f"Finished in {round(time.time()-start,0)/60} minutes")