Skip to content

Commit

Permalink
Piggy backing some updates I made to improve my viz_cache_csvs_to_cli…
Browse files Browse the repository at this point in the history
…pped_geospatial util script into the repo here.
  • Loading branch information
TylerSchrag-NOAA committed Jan 25, 2024
1 parent abbc50f commit 5318848
Showing 1 changed file with 33 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -138,30 +138,43 @@ def convert_folder_csvs_to_geospatial(folder_path, output_format='gpkg', clip_to
# this up, see the Configuring AWS CLI section of the Hydrovis viz Guide at https://docs.google.com/document/d/1UIbAQycG-mWw5XwDPDunkQED5O96YtsbrOA4MMZ9zmA/edit?usp=sharing

########## Specify your Args Here #############
sso_profile = None # The name of the AWS SSO profile you created, or set to None if you want to pull from the current environment of an EC2 machine (see notes above)
bucket_name = 'hydrovis-ti-fim-us-east-1' # Set this based on the hydrovis environment you are pulling from, e.g. 'hydrovis-ti-fim-us-east-1', 'hydrovis-uat-fim-us-east-1', 'hydrovis-prod-fim-us-east-1'
start_date = date(2022, 9, 17)
end_date = date(2022, 9, 20)
reference_times = ["1200"]
include_files_with = ["ref", "prvi"] # Anything you want to be included when filtering S3 files e.g ["ana", "mrf"] or ["mrf_"]
skip_files_with = [] # Anything you want to be skipped when filtering S3 files e.g. ["ana_streamflow", "rapid_onset_flooding"]
sso_profile = "prod" # The name of the AWS SSO profile you created, or set to None if you want to pull from the current environment of an EC2 machine (see notes above)
bucket_name = 'hydrovis-prod-fim-us-east-1' # Set this based on the hydrovis environment you are pulling from, e.g. 'hydrovis-ti-fim-us-east-1', 'hydrovis-uat-fim-us-east-1', 'hydrovis-prod-fim-us-east-1'
include_files_with = ["ana_inundation"] # Anything you want to be included when filtering S3 files e.g ["ana", "mrf"] or ["mrf_"]
skip_files_with = ["counties", "hucs", "building", "_hi.csv", "_prvi", "_public", "_src_skill"] # Anything you want to be skipped when filtering S3 files e.g. ["ana_streamflow", "rapid_onset_flooding"]
clip_to_states = [] # Provide a list of state abbreviations to clip to set states, e.g. ["AL", "GA", "MS"]
output_format = "gpkg" # Set to gpkg or shp - Can add any OGR formats, with some tweaks to the file_format logic in the functions above. BEWARE - large FIM files can be too large for shapefiles, and results may be truncated.
output_dir = r"C:\Users\arcgis\Desktop\Dev\VPP Data Requests" # Directory where you want output files saved.
output_dir = r"C:\Users\arcgis\Desktop\Dev\VPP Data Requests\AEP_2_1" # Directory where you want output files saved.
overwrite = False # This will automatically skip files that have already been downloaded and/or converted when running the script when set to False (default).
delete_csv = True # This will delete the csv files after conversion
###############################################

# Loop through days/hours specified
for day in daterange(start_date, end_date):
ref_date = day.strftime("%Y%m%d")
for reference_time in reference_times:
folder_name = f"viz_cache/{ref_date}/{reference_time}/"
destination_dir = fr"{output_dir}\{ref_date}\{reference_time}"
# Download files from S3
print(f"Searching Viz Cache for /{ref_date}/{reference_time}/ with files including {include_files_with} and not including {skip_files_with}.")
download_files_from_s3(bucket_name, folder_name, destination_dir, sso_profile, include_files_with=include_files_with, skip_files_with=skip_files_with, overwrite=False, output_format=output_format)
# Convert to geospatial (clip to states as well, if desired)
convert_folder_csvs_to_geospatial(destination_dir, output_format=output_format, clip_to_states=clip_to_states, delete_csv=delete_csv)
events = [
{"start_date": date(2023, 12, 21), "end_date": date(2023, 12, 21), "reference_times": ["0900", "1000"]},
{"start_date": date(2023, 12, 16), "end_date": date(2023, 12, 17), "reference_times": ["1200"]},
{"start_date": date(2023, 12, 17), "end_date": date(2023, 12, 17), "reference_times": ["1300", "1400"]},
{"start_date": date(2023, 12, 5), "end_date": date(2023, 12, 5), "reference_times": ["1400", "1500"]},
{"start_date": date(2023, 12, 2), "end_date": date(2023, 12, 2), "reference_times": ["1300", "1400"]},
{"start_date": date(2023, 10, 28), "end_date": date(2023, 10, 28), "reference_times": ["0900", "1000"]},
{"start_date": date(2023, 10, 24), "end_date": date(2023, 10, 24), "reference_times": ["1600", "1700"]},
{"start_date": date(2023, 10, 4), "end_date": date(2023, 10, 4), "reference_times": ["2100", "2200"]},
{"start_date": date(2023, 10, 3), "end_date": date(2023, 10, 3), "reference_times": ["2200", "2300"]}
]

###############################################
for event in events:
start_date = event['start_date']
end_date = event['end_date']
reference_times = event['reference_times']
# Loop through days/hours specified
for day in daterange(start_date, end_date):
ref_date = day.strftime("%Y%m%d")
for reference_time in reference_times:
folder_name = f"viz_cache/{ref_date}/{reference_time}/"
destination_dir = fr"{output_dir}\{ref_date}\{reference_time}"
# Download files from S3
print(f"Searching Viz Cache for /{ref_date}/{reference_time}/ with files including {include_files_with} and not including {skip_files_with}.")
download_files_from_s3(bucket_name, folder_name, destination_dir, sso_profile, include_files_with=include_files_with, skip_files_with=skip_files_with, overwrite=False, output_format=output_format)
# Convert to geospatial (clip to states as well, if desired)
convert_folder_csvs_to_geospatial(destination_dir, output_format=output_format, clip_to_states=clip_to_states, delete_csv=delete_csv)

print(f"Finished in {round(time.time()-start,0)/60} minutes")

0 comments on commit 5318848

Please sign in to comment.