From 425da4363beceb97431012d60d205ad8b2107236 Mon Sep 17 00:00:00 2001 From: stuckyb Date: Fri, 6 Oct 2023 09:58:31 -0400 Subject: [PATCH] Added barely a start to a RAP GSDataSet class. I mean, barely. --- src/library/datasets/rap-annual.py | 137 +++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 src/library/datasets/rap-annual.py diff --git a/src/library/datasets/rap-annual.py b/src/library/datasets/rap-annual.py new file mode 100644 index 0000000..2dad274 --- /dev/null +++ b/src/library/datasets/rap-annual.py @@ -0,0 +1,137 @@ + +from .gsdataset import GSDataSet +from pyproj.crs import CRS +import datetime +import rioxarray +import api_core.data_request as dr +from subset_geom import SubsetPolygon, SubsetMultiPoint + + +class RAPVegCover(GSDataSet): + def __init__(self, store_path): + """ + store_path (Path): The location of on-disk dataset storage. + """ + super().__init__(store_path, 'rap-annual') + + # Basic dataset information. + self._id = 'RAPvegcover' + self.name = 'RAP vegetation cover' + self.url = 'https://rangelands.app/' + self.description = ( + 'The Rangeland Analysis Platform’s vegetation cover product ' + 'provides annual percent cover estimates from 1986 to present of: ' + 'annual forbs and grasses, perennial forbs and grasses, shrubs, ' + 'trees, and bare ground. The estimates were produced by combining ' + '75,000 field plots collected by BLM, NPS, and NRCS with the ' + 'historical Landsat satellite record. Cover estimates are ' + 'predicted across the United States at 30m resolution, an area ' + 'slightly larger than a baseball diamond.' + ) + + # Provider information + self.provider_name = '' + self.provider_url = '' + + # CRS information. + self.crs = CRS.from_epsg(4326) + + # The grid size. + self.grid_size = 53899.0 / 200000000 + self.grid_unit = 'degrees' + + # The variables/layers/bands in the dataset. + self.vars = { + 'ppt': 'total precipitation (rain+melted snow)', + 'tmean': 'mean temperature (mean of tmin and tmax)', + 'tmin': 'minimum temperature', + 'tmax': 'maximum temperature', + 'tdmean': 'mean dew point temperature', + 'vpdmin': 'minimum vapor pressure deficit', + 'vpdmax': 'maximum vapor pressure deficit' + } + + # Temporal coverage of the dataset. + self.date_ranges['year'] = [ + datetime.date(1986, 1, 1), datetime.date(2021, 1, 1) + ] + self.date_ranges['month'] = [ + None, None + ] + self.date_ranges['day'] = [ + None, None + ] + + # Temporal resolution. + self.temporal_resolution['year'] = '1 year' + + # File name patterns for each PRISM variable. Note that for + # precipation data, the current version of "M2" for years < 1981 and + # "M3" for years >= 1981. See + # https://prism.oregonstate.edu/documents/PRISM_datasets.pdf for + # details. + self.fpatterns = 'PRISM_{0}_stable_4km{1}_{2}_bil.bil' + + def getData( + self, varname, date_grain, request_date, ri_method, subset_geom=None + ): + """ + varname: The variable to return. + date_grain: The date granularity to return, specified as a constant in + data_request. + request_date: A data_request.RequestDate instance. + ri_method: The resample/interpolation method to use, if needed. + subset_geom: An instance of SubsetGeom. If the CRS does not match the + dataset, an exception is raised. + """ + # Get the path to the required data file. + if date_grain == dr.ANNUAL: + fname = self.fpatterns.format(varname, 'M3', request_date.year) + elif date_grain == dr.MONTHLY: + datestr = '{0}{1:02}'.format(request_date.year, request_date.month) + fname = self.fpatterns.format(varname, 'M3', datestr) + elif date_grain == dr.DAILY: + datestr = '{0}{1:02}{2:02}'.format( + request_date.year, request_date.month, request_date.day + ) + fname = self.fpatterns.format(varname, 'D2', datestr) + else: + raise ValueError('Invalid date grain specification.') + + # For precipation data, the current version of "M2" for years < 1981 + # and "M3" for years >= 1981. See + # https://prism.oregonstate.edu/documents/PRISM_datasets.pdf for + # details. + if (request_date.year < 1981 and date_grain is not dr.DAILY and + varname == 'ppt' + ): + fname = fname.replace('M3', 'M2') + + fpath = self.ds_path / fname + + # Open data file + data = rioxarray.open_rasterio(fpath, masked=True) + + if subset_geom is not None and not(self.crs.equals(subset_geom.crs)): + raise ValueError( + 'Subset geometry CRS does not match dataset CRS.' + ) + + if isinstance(subset_geom, SubsetPolygon): + # Drop unnecessary 'band' dimension because rioxarray + # can't handle >3 dimensions in some later operations + data = data.squeeze('band') + + data = data.rio.clip([subset_geom.json], all_touched = True) + elif isinstance(subset_geom, SubsetMultiPoint): + # Interpolate all (x,y) points in the subset geometry. For more + # information about how/why this works, see + # https://xarray.pydata.org/en/stable/user-guide/interpolation.html#advanced-interpolation. + res = data.interp( + x=('z', subset_geom.geom.x), y=('z', subset_geom.geom.y), + method=ri_method + ) + data = res.values[0] + + return data +