From 32004c511bdd488de1023d497d57ea140c680c2f Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Wed, 27 Nov 2024 10:29:14 -0700 Subject: [PATCH 1/3] More memory efficient handling of raster features. --- .../lambda_function.py | 69 ++++++++++++++++--- 1 file changed, 58 insertions(+), 11 deletions(-) diff --git a/Core/LAMBDA/viz_functions/image_based/viz_hand_fim_processing/lambda_function.py b/Core/LAMBDA/viz_functions/image_based/viz_hand_fim_processing/lambda_function.py index ee31c477..87c858c3 100644 --- a/Core/LAMBDA/viz_functions/image_based/viz_hand_fim_processing/lambda_function.py +++ b/Core/LAMBDA/viz_functions/image_based/viz_hand_fim_processing/lambda_function.py @@ -1,6 +1,8 @@ import boto3 from botocore.exceptions import ResponseStreamingError import rasterio +from rasterio import windows as riowindows +from rasterio.features import shapes import numpy as np import pandas as pd import awswrangler as wr @@ -10,6 +12,8 @@ import datetime import re from math import floor, ceil +from shapely.geometry import shape + from viz_classes import s3_file, database @@ -21,6 +25,55 @@ CACHE_FIM_RESOLUTION_FT = 0.25 CACHE_FIM_RESOLUTION_ROUNDING = 'up' + +# Vendor subdivide from Rasterio 1.4 +# REMOVE when rasterio is upgraded! +def subdivide(window, height, width): + """Divide a window into smaller windows. + + Windows have no overlap and will be at most the desired + height and width. Smaller windows will be generated where + the height and width do not evenly divide the window dimensions. + + Parameters + ---------- + window : Window + Source window to subdivide. + height : int + Subwindow height. + width : int + Subwindow width. + + Returns + ------- + list of Windows + """ + subwindows = [] + + irow = window.row_off + window.height + icol = window.col_off + window.width + + row_off = window.row_off + col_off = window.col_off + while row_off < irow: + if row_off + height > irow: + _height = irow - row_off + else: + _height = height + + while col_off < icol: + if col_off + width > icol: + _width = icol - col_off + else: + _width = width + + subwindows.append(riowindows.Window(col_off, row_off, _width, _height)) + col_off += width + + row_off += height + col_off = window.col_off + return subwindows + class HANDDatasetReadError(Exception): """ my custom exception class """ @@ -216,7 +269,7 @@ def create_inundation_catchment_boundary(huc8, branch): print("--> Setting up windows") # Get the list of windows according to the raster metadata so they can be looped through - windows = [window for ij, window in catchment_dataset.block_windows()] + windows = subdivide(riowindows.Window(0, 0, width=catchment_dataset.width, height=catchment_dataset.height), 1024, 1024) # This function will be run for each raster window. def process(window): @@ -253,20 +306,17 @@ def process(window): if not catchment_open_success: raise HANDDatasetReadError("Failed to open Catchment dataset window") - from rasterio.features import shapes results = [] - for s, v in shapes(catchment_window, mask=None, transform=rasterio.windows.transform(window, catchment_dataset.transform)): + for s, v in shapes(catchment_window, mask=None, transform=riowindows.transform(window, catchment_dataset.transform)): if int(v): - results.append({'hydro_id': int(v), 'geom': s}) + results.append((int(v), shape(s))) return results # Use threading to parallelize the processing of the inundation windows geoms = [] for window in windows: - catchment_windows = process(window) - if catchment_windows: - geoms.extend(catchment_windows) + geoms.extend(process(window)) except Exception as e: raise e @@ -275,11 +325,8 @@ def process(window): catchment_dataset.close() print("Generating polygons") - from shapely.geometry import shape - geom = [shape(i['geom']) for i in geoms] - hydro_ids = [i['hydro_id'] for i in geoms] crs = 'EPSG:3338' if str(huc8).startswith('19') else 'EPSG:5070' - df_final = gpd.GeoDataFrame({'geom':geom, 'hydro_id': hydro_ids}, crs=crs, geometry="geom") + df_final = gpd.GeoDataFrame(geoms, columns=['hydro_id', 'geom'], crs=crs, geometry="geom") df_final = df_final.dissolve(by="hydro_id") df_final = df_final.to_crs(3857) df_final = df_final.set_crs('epsg:3857') From 0de7fca762db675594cb43b17539350806416cf6 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Wed, 27 Nov 2024 11:45:29 -0700 Subject: [PATCH 2/3] More memory-efficient handling of shapes. --- .../viz_hand_fim_processing/lambda_function.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/Core/LAMBDA/viz_functions/image_based/viz_hand_fim_processing/lambda_function.py b/Core/LAMBDA/viz_functions/image_based/viz_hand_fim_processing/lambda_function.py index 87c858c3..dfe5c193 100644 --- a/Core/LAMBDA/viz_functions/image_based/viz_hand_fim_processing/lambda_function.py +++ b/Core/LAMBDA/viz_functions/image_based/viz_hand_fim_processing/lambda_function.py @@ -406,7 +406,7 @@ def create_inundation_output(huc8, branch, stage_lookup, reference_time, input_v print("--> Setting up windows") # Get the list of windows according to the raster metadata so they can be looped through - windows = [window for ij, window in hand_dataset.block_windows()] + windows = subdivide(riowindows.Window(0, 0, width=hand_dataset.width, height=hand_dataset.height), 1024, 1024) # This function will be run for each raster window. def process(window): @@ -491,11 +491,10 @@ def process(window): return if np.max(inundation_window) != 0: - from rasterio.features import shapes results = [] for s, v in shapes(inundation_window, mask=None, transform=rasterio.windows.transform(window, hand_dataset.transform)): if int(v): - results.append({'hydro_id': int(v), 'geom': s}) + results.append((int(v), shape(s))) return results @@ -516,11 +515,8 @@ def process(window): catchment_dataset.close() print("Generating polygons") - from shapely.geometry import shape - geom = [shape(i['geom']) for i in geoms] - hydro_ids = [i['hydro_id'] for i in geoms] crs = 'EPSG:3338' if str(huc8).startswith('19') else 'EPSG:5070' - df_final = gpd.GeoDataFrame({'geom':geom, 'hydro_id': hydro_ids}, crs=crs, geometry="geom") + df_final = gpd.GeoDataFrame(geoms, columns=['hydro_id', 'geom'], crs=crs, geometry="geom") df_final = df_final.dissolve(by="hydro_id") df_final['geom'] = df_final['geom'].simplify(5) #Simplifying polygons to ~5m to clean up problematic geometries df_final = df_final.to_crs(3857) From ce9660aee846e1b5e5c96ec7ef096079aab87112 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Wed, 27 Nov 2024 11:49:31 -0700 Subject: [PATCH 3/3] Update call. --- .../image_based/viz_hand_fim_processing/lambda_function.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Core/LAMBDA/viz_functions/image_based/viz_hand_fim_processing/lambda_function.py b/Core/LAMBDA/viz_functions/image_based/viz_hand_fim_processing/lambda_function.py index dfe5c193..76a9e1cb 100644 --- a/Core/LAMBDA/viz_functions/image_based/viz_hand_fim_processing/lambda_function.py +++ b/Core/LAMBDA/viz_functions/image_based/viz_hand_fim_processing/lambda_function.py @@ -492,7 +492,7 @@ def process(window): if np.max(inundation_window) != 0: results = [] - for s, v in shapes(inundation_window, mask=None, transform=rasterio.windows.transform(window, hand_dataset.transform)): + for s, v in shapes(inundation_window, mask=None, transform=riowindows.transform(window, hand_dataset.transform)): if int(v): results.append((int(v), shape(s)))