From a8b8d74d5bdc87032cde9b8a659d3a30cb93d1b1 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Mon, 29 Jan 2024 16:21:10 -0800 Subject: [PATCH 01/14] Using longs in _ManagedRaster indices. RE:1431 --- src/natcap/invest/sdr/sdr_core.pyx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/natcap/invest/sdr/sdr_core.pyx b/src/natcap/invest/sdr/sdr_core.pyx index c6848bfcf1..2dbe647f13 100644 --- a/src/natcap/invest/sdr/sdr_core.pyx +++ b/src/natcap/invest/sdr/sdr_core.pyx @@ -259,14 +259,14 @@ cdef class _ManagedRaster: (xi & (self.block_xmod))] cdef void _load_block(self, int block_index) except *: - cdef int block_xi = block_index % self.block_nx - cdef int block_yi = block_index // self.block_nx + cdef long block_xi = block_index % self.block_nx + cdef long block_yi = block_index // self.block_nx # we need the offsets to subtract from global indexes for cached array - cdef int xoff = block_xi << self.block_xbits - cdef int yoff = block_yi << self.block_ybits + cdef long xoff = block_xi << self.block_xbits + cdef long yoff = block_yi << self.block_ybits - cdef int xi_copy, yi_copy + cdef long xi_copy, yi_copy cdef numpy.ndarray[double, ndim=2] block_array cdef double *double_buffer cdef clist[BlockBufferPair] removed_value_list @@ -275,8 +275,8 @@ cdef class _ManagedRaster: # initially the win size is the same as the block size unless # we're at the edge of a raster - cdef int win_xsize = self.block_xsize - cdef int win_ysize = self.block_ysize + cdef long win_xsize = self.block_xsize + cdef long win_ysize = self.block_ysize # load a new block if xoff+win_xsize > self.raster_x_size: From 2cc5d1ec1d81dea84c3ea2940fdb5b4ed98687c2 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 30 Jan 2024 12:27:06 -0800 Subject: [PATCH 02/14] Logging some more debugging info, changing a couple types. RE:#1431 --- src/natcap/invest/sdr/sdr_core.pyx | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/natcap/invest/sdr/sdr_core.pyx b/src/natcap/invest/sdr/sdr_core.pyx index 2dbe647f13..4bfbd8e80c 100644 --- a/src/natcap/invest/sdr/sdr_core.pyx +++ b/src/natcap/invest/sdr/sdr_core.pyx @@ -258,7 +258,7 @@ cdef class _ManagedRaster: ((yi & (self.block_ymod))<PyMem_Malloc( @@ -446,7 +452,7 @@ def calculate_sediment_deposition( cdef unsigned long flat_index cdef long seed_col = 0 cdef long seed_row = 0 - cdef long neighbor_row, neighbor_col + cdef long neighbor_row, neighbor_col, ds_neighbor_row, ds_neighbor_col cdef int flow_val, neighbor_flow_val, ds_neighbor_flow_val cdef int flow_weight, neighbor_flow_weight cdef float flow_sum, neighbor_flow_sum From b5c84b8d19fcea6fdb05b9d9c2c8b6cf73095f10 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 30 Jan 2024 16:59:09 -0800 Subject: [PATCH 03/14] Using improved align_and_resize_raster_stack. --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 5b2583b0fb..f565c435f0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,7 +17,8 @@ numpy>=1.11.0,!=1.16.0 Rtree>=0.8.2,!=0.9.1 shapely>=2.0.0 scipy>=1.9.0 -pygeoprocessing>=2.4.2 # pip-only +#pygeoprocessing>=2.4.2 # pip-only +git+https://github.com/phargogh/pygeoprocessing.git@feature/366-eliminate-duplicate-rasterization-in-alignment#egg=pygeoprocessing taskgraph>=0.11.0 psutil>=5.6.6 chardet>=3.0.4 From e6f71ecc5b4ed6aa32952877148a907b950729d9 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 30 Jan 2024 17:05:50 -0800 Subject: [PATCH 04/14] Adding pip-only requirement. --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index f565c435f0..473d798f38 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,7 +18,7 @@ Rtree>=0.8.2,!=0.9.1 shapely>=2.0.0 scipy>=1.9.0 #pygeoprocessing>=2.4.2 # pip-only -git+https://github.com/phargogh/pygeoprocessing.git@feature/366-eliminate-duplicate-rasterization-in-alignment#egg=pygeoprocessing +git+https://github.com/phargogh/pygeoprocessing.git@feature/366-eliminate-duplicate-rasterization-in-alignment#egg=pygeoprocessing # pip-only taskgraph>=0.11.0 psutil>=5.6.6 chardet>=3.0.4 From e6388ed527497a9eaee63b6d920b35eb62f0920a Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 1 Feb 2024 10:19:17 -0800 Subject: [PATCH 05/14] Using the watersheds mask in align_and_resize_raster_stack. RE:#1431 --- src/natcap/invest/sdr/sdr.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/natcap/invest/sdr/sdr.py b/src/natcap/invest/sdr/sdr.py index f23526b4a1..dedcf59f04 100644 --- a/src/natcap/invest/sdr/sdr.py +++ b/src/natcap/invest/sdr/sdr.py @@ -471,6 +471,7 @@ 'aligned_erodibility_path': 'aligned_erodibility.tif', 'aligned_erosivity_path': 'aligned_erosivity.tif', 'aligned_lulc_path': 'aligned_lulc.tif', + 'watersheds_mask_path': 'watersheds_mask.tif', 'mask_path': 'mask.tif', 'masked_dem_path': 'masked_dem.tif', 'masked_drainage_path': 'masked_drainage.tif', @@ -609,7 +610,11 @@ def execute(args): target_pixel_size = (min_pixel_size, -min_pixel_size) target_sr_wkt = dem_raster_info['projection_wkt'] - vector_mask_options = {'mask_vector_path': args['watersheds_path']} + vector_mask_options = { + 'mask_vector_path': args['watersheds_path'], + 'mask_raster_path': f_reg['watersheds_mask_path'], + } + aligned_list.append(f_reg['watersheds_mask_path']) align_task = task_graph.add_task( func=pygeoprocessing.align_and_resize_raster_stack, args=( From 41ab9ca33c16ad1991fd972278feb55f90ef1a1d Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 1 Feb 2024 10:19:43 -0800 Subject: [PATCH 06/14] Adding some additional debugging information. RE:#1431 --- src/natcap/invest/sdr/sdr_core.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/natcap/invest/sdr/sdr_core.pyx b/src/natcap/invest/sdr/sdr_core.pyx index 4bfbd8e80c..d53ae84541 100644 --- a/src/natcap/invest/sdr/sdr_core.pyx +++ b/src/natcap/invest/sdr/sdr_core.pyx @@ -293,6 +293,8 @@ cdef class _ManagedRaster: numpy.float64) except ValueError as error: print("Attempted to use the following dimensions:") + print(f" self.block_nx={self.block_nx}, self.block_ny={self.block_ny}") + print(f" block_xi={block_xi}, block_yi={block_yi}") print(f" xoff={xoff}, yoff={yoff}, win_xsize={win_xsize}, win_ysize={win_ysize}") print(f" block_index={block_index}") raise error From 2d4bd54806e250cbdc594c07feee2fa9f073b685 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 1 Feb 2024 11:28:40 -0800 Subject: [PATCH 07/14] Adding some more debug logging. RE:#1431 --- src/natcap/invest/sdr/sdr_core.pyx | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/natcap/invest/sdr/sdr_core.pyx b/src/natcap/invest/sdr/sdr_core.pyx index d53ae84541..ecc63ba15b 100644 --- a/src/natcap/invest/sdr/sdr_core.pyx +++ b/src/natcap/invest/sdr/sdr_core.pyx @@ -245,14 +245,19 @@ cdef class _ManagedRaster: if dirty_itr == self.dirty_blocks.end(): self.dirty_blocks.insert(block_index) - cdef inline double get(self, long xi, long yi): + cdef inline double get(self, long xi, long yi) except *: """Return the value of the pixel at `xi,yi`.""" cdef int block_xi = xi >> self.block_xbits cdef int block_yi = yi >> self.block_ybits # this is the flat index for the block cdef int block_index = block_yi * self.block_nx + block_xi if not self.lru_cache.exist(block_index): - self._load_block(block_index) + try: + self._load_block(block_index) + except ValueError as error: + print(f".get() failed") + print(f" xi={xi}, yi={yi}, block_xi={block_xi}, block_yi={block_yi}, block_index={block_index}") + raise error return self.lru_cache.get( block_index)[ ((yi & (self.block_ymod))< Date: Thu, 1 Feb 2024 11:29:57 -0800 Subject: [PATCH 08/14] Fixing datatype of processing stack. We are overflowing the int32 space, so the flat_index of a pixel in the processing queue is overflowing int32 (the type of the stack). RE:#1431 --- src/natcap/invest/sdr/sdr_core.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/natcap/invest/sdr/sdr_core.pyx b/src/natcap/invest/sdr/sdr_core.pyx index ecc63ba15b..1e096d6a5f 100644 --- a/src/natcap/invest/sdr/sdr_core.pyx +++ b/src/natcap/invest/sdr/sdr_core.pyx @@ -449,14 +449,14 @@ def calculate_sediment_deposition( flow_dir_info = pygeoprocessing.get_raster_info(mfd_flow_direction_path) n_cols, n_rows = flow_dir_info['raster_size'] cdef int mfd_nodata = 0 - cdef stack[int] processing_stack + cdef stack[long] processing_stack cdef float sdr_nodata = pygeoprocessing.get_raster_info( sdr_path)['nodata'][0] cdef float e_prime_nodata = pygeoprocessing.get_raster_info( e_prime_path)['nodata'][0] cdef long col_index, row_index, win_xsize, win_ysize, xoff, yoff cdef long global_col, global_row, j, k - cdef unsigned long flat_index + cdef long flat_index cdef long seed_col = 0 cdef long seed_row = 0 cdef long neighbor_row, neighbor_col, ds_neighbor_row, ds_neighbor_col From d6a36b881cf8f256c2399791d51c5c29a3a30eb6 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Thu, 1 Feb 2024 11:41:36 -0800 Subject: [PATCH 09/14] Correcting paths passed to alignment. RE:#1431 --- src/natcap/invest/sdr/sdr.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/natcap/invest/sdr/sdr.py b/src/natcap/invest/sdr/sdr.py index dedcf59f04..0b5111282f 100644 --- a/src/natcap/invest/sdr/sdr.py +++ b/src/natcap/invest/sdr/sdr.py @@ -614,7 +614,6 @@ def execute(args): 'mask_vector_path': args['watersheds_path'], 'mask_raster_path': f_reg['watersheds_mask_path'], } - aligned_list.append(f_reg['watersheds_mask_path']) align_task = task_graph.add_task( func=pygeoprocessing.align_and_resize_raster_stack, args=( @@ -626,7 +625,7 @@ def execute(args): 'raster_align_index': 0, 'vector_mask_options': vector_mask_options, }, - target_path_list=aligned_list, + target_path_list=aligned_list + [f_reg['watersheds_mask_path']], task_name='align input rasters') mutual_mask_task = task_graph.add_task( From 33972733d8ff148779868d70297293840fd7e6d7 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 2 Feb 2024 10:40:18 -0800 Subject: [PATCH 10/14] Refactoring summary stats vector to minimize rasterizations. RE:#1431 --- src/natcap/invest/sdr/sdr.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/src/natcap/invest/sdr/sdr.py b/src/natcap/invest/sdr/sdr.py index 0b5111282f..5ba73da3be 100644 --- a/src/natcap/invest/sdr/sdr.py +++ b/src/natcap/invest/sdr/sdr.py @@ -19,6 +19,7 @@ from .. import gettext from .. import spec_utils +from .. import urban_nature_access from .. import utils from .. import validation from ..model_metadata import MODEL_METADATA @@ -1495,18 +1496,26 @@ def _generate_report( target_layer = target_vector.GetLayer() target_layer.SyncToDisk() + # It's worth it to check if the geometries don't significantly overlap. + # On large rasters, this can save a TON of time rasterizing even a + # relatively simple vector. + geometries_might_overlap = urban_nature_access._geometries_overlap( + watershed_results_sdr_path) + fields_and_rasters = [ + ('usle_tot', usle_path), ('sed_export', sed_export_path), + ('sed_dep', sed_deposition_path), ('avoid_exp', avoided_export_path), + ('avoid_eros', avoided_erosion_path)] + + # Using the list option for raster path bands so that we can reduce + # rasterizations, which are costly on large datasets. + zonal_stats_results = pygeoprocessing.zonal_statistics( + [(raster_path, 1) for (_, raster_path) in fields_and_rasters], + watershed_results_sdr_path, + polygons_might_overlap=geometries_might_overlap) + field_summaries = { - 'usle_tot': pygeoprocessing.zonal_statistics( - (usle_path, 1), watershed_results_sdr_path), - 'sed_export': pygeoprocessing.zonal_statistics( - (sed_export_path, 1), watershed_results_sdr_path), - 'sed_dep': pygeoprocessing.zonal_statistics( - (sed_deposition_path, 1), watershed_results_sdr_path), - 'avoid_exp': pygeoprocessing.zonal_statistics( - (avoided_export_path, 1), watershed_results_sdr_path), - 'avoid_eros': pygeoprocessing.zonal_statistics( - (avoided_erosion_path, 1), watershed_results_sdr_path), - } + field: stats for ((field, _), stats) in + zip(fields_and_rasters, zonal_stats_results)} for field_name in field_summaries: field_def = ogr.FieldDefn(field_name, ogr.OFTReal) From e4721abc83f0ccb2c5876c37c26a9a39812bb946 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 6 Feb 2024 15:35:30 -0800 Subject: [PATCH 11/14] Removing my debugging code. RE:#1431 --- src/natcap/invest/sdr/sdr_core.pyx | 41 ++++++++++-------------------- 1 file changed, 14 insertions(+), 27 deletions(-) diff --git a/src/natcap/invest/sdr/sdr_core.pyx b/src/natcap/invest/sdr/sdr_core.pyx index 1e096d6a5f..45b71b25b1 100644 --- a/src/natcap/invest/sdr/sdr_core.pyx +++ b/src/natcap/invest/sdr/sdr_core.pyx @@ -245,33 +245,28 @@ cdef class _ManagedRaster: if dirty_itr == self.dirty_blocks.end(): self.dirty_blocks.insert(block_index) - cdef inline double get(self, long xi, long yi) except *: + cdef inline double get(self, long xi, long yi): """Return the value of the pixel at `xi,yi`.""" cdef int block_xi = xi >> self.block_xbits cdef int block_yi = yi >> self.block_ybits # this is the flat index for the block cdef int block_index = block_yi * self.block_nx + block_xi if not self.lru_cache.exist(block_index): - try: - self._load_block(block_index) - except ValueError as error: - print(f".get() failed") - print(f" xi={xi}, yi={yi}, block_xi={block_xi}, block_yi={block_yi}, block_index={block_index}") - raise error + self._load_block(block_index) return self.lru_cache.get( block_index)[ ((yi & (self.block_ymod))< self.raster_x_size: @@ -291,18 +286,10 @@ cdef class _ManagedRaster: raster = gdal.OpenEx(self.raster_path, gdal.OF_RASTER) raster_band = raster.GetRasterBand(self.band_id) - try: - block_array = raster_band.ReadAsArray( - xoff=xoff, yoff=yoff, win_xsize=win_xsize, - win_ysize=win_ysize).astype( - numpy.float64) - except ValueError as error: - print("Attempted to use the following dimensions:") - print(f" self.block_nx={self.block_nx}, self.block_ny={self.block_ny}") - print(f" block_xi={block_xi}, block_yi={block_yi}") - print(f" xoff={xoff}, yoff={yoff}, win_xsize={win_xsize}, win_ysize={win_ysize}") - print(f" block_index={block_index}") - raise error + block_array = raster_band.ReadAsArray( + xoff=xoff, yoff=yoff, win_xsize=win_xsize, + win_ysize=win_ysize).astype( + numpy.float64) raster_band = None raster = None double_buffer = PyMem_Malloc( From e50260b304fe7f505122b7e51c264dac5fcd946e Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 6 Feb 2024 15:39:49 -0800 Subject: [PATCH 12/14] Restoring pygeoprocessing requirement. RE:#1431 --- requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 473d798f38..5b2583b0fb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,8 +17,7 @@ numpy>=1.11.0,!=1.16.0 Rtree>=0.8.2,!=0.9.1 shapely>=2.0.0 scipy>=1.9.0 -#pygeoprocessing>=2.4.2 # pip-only -git+https://github.com/phargogh/pygeoprocessing.git@feature/366-eliminate-duplicate-rasterization-in-alignment#egg=pygeoprocessing # pip-only +pygeoprocessing>=2.4.2 # pip-only taskgraph>=0.11.0 psutil>=5.6.6 chardet>=3.0.4 From 8535a02b74f4fd4db75bc880ae1872c414565853 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 6 Feb 2024 15:52:02 -0800 Subject: [PATCH 13/14] Noting changes in HISTORY. RE:1431 --- HISTORY.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/HISTORY.rst b/HISTORY.rst index 8a76e41d48..1db211a8e7 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -52,6 +52,12 @@ Unreleased Changes administrative units vector now has the correct values for these fields, consistent with the user's guide chapter. https://github.com/natcap/invest/issues/1512 +* SDR + * Fixed an issue encountered in the sediment deposition function where + rasters with more than 2^32 pixels would raise a cryptic error relating + to negative dimensions. https://github.com/natcap/invest/issues/1431 + * Optimized the creation of the summary vector by minimizing the number of + times the target vector needs to be rasterized. 3.14.1 (2023-12-18) ------------------- From bccab6cda3a81b87ce1f867736026ec8156ceeb6 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Tue, 6 Feb 2024 15:53:32 -0800 Subject: [PATCH 14/14] Removing the watersheds mask path so taskgraph doesn't fail on us. RE:#1431 --- src/natcap/invest/sdr/sdr.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/natcap/invest/sdr/sdr.py b/src/natcap/invest/sdr/sdr.py index 5ba73da3be..f1540982b7 100644 --- a/src/natcap/invest/sdr/sdr.py +++ b/src/natcap/invest/sdr/sdr.py @@ -472,7 +472,6 @@ 'aligned_erodibility_path': 'aligned_erodibility.tif', 'aligned_erosivity_path': 'aligned_erosivity.tif', 'aligned_lulc_path': 'aligned_lulc.tif', - 'watersheds_mask_path': 'watersheds_mask.tif', 'mask_path': 'mask.tif', 'masked_dem_path': 'masked_dem.tif', 'masked_drainage_path': 'masked_drainage.tif', @@ -613,7 +612,6 @@ def execute(args): target_sr_wkt = dem_raster_info['projection_wkt'] vector_mask_options = { 'mask_vector_path': args['watersheds_path'], - 'mask_raster_path': f_reg['watersheds_mask_path'], } align_task = task_graph.add_task( func=pygeoprocessing.align_and_resize_raster_stack, @@ -626,7 +624,7 @@ def execute(args): 'raster_align_index': 0, 'vector_mask_options': vector_mask_options, }, - target_path_list=aligned_list + [f_reg['watersheds_mask_path']], + target_path_list=aligned_list, task_name='align input rasters') mutual_mask_task = task_graph.add_task(