Skip to content

Commit

Permalink
Added barely a start to a RAP GSDataSet class. I mean, barely.
Browse files Browse the repository at this point in the history
  • Loading branch information
BrianStucky-USDA committed Oct 6, 2023
1 parent 8d6293b commit 3859ab1
Showing 1 changed file with 137 additions and 0 deletions.
137 changes: 137 additions & 0 deletions src/library/datasets/rap-annual.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@

from .gsdataset import GSDataSet
from pyproj.crs import CRS
import datetime
import rioxarray
import api_core.data_request as dr
from subset_geom import SubsetPolygon, SubsetMultiPoint


class RAPVegCover(GSDataSet):
def __init__(self, store_path):
"""
store_path (Path): The location of on-disk dataset storage.
"""
super().__init__(store_path, 'rap-annual')

# Basic dataset information.
self._id = 'RAPvegcover'
self.name = 'RAP vegetation cover'
self.url = 'https://rangelands.app/'
self.description = (
'The Rangeland Analysis Platform’s vegetation cover product '
'provides annual percent cover estimates from 1986 to present of: '
'annual forbs and grasses, perennial forbs and grasses, shrubs, '
'trees, and bare ground. The estimates were produced by combining '
'75,000 field plots collected by BLM, NPS, and NRCS with the '
'historical Landsat satellite record. Cover estimates are '
'predicted across the United States at 30m resolution, an area '
'slightly larger than a baseball diamond.'
)

# Provider information
self.provider_name = ''
self.provider_url = ''

# CRS information.
self.crs = CRS.from_epsg(4326)

# The grid size.
self.grid_size = 53899.0 / 200000000
self.grid_unit = 'degrees'

# The variables/layers/bands in the dataset.
self.vars = {
'ppt': 'total precipitation (rain+melted snow)',
'tmean': 'mean temperature (mean of tmin and tmax)',
'tmin': 'minimum temperature',
'tmax': 'maximum temperature',
'tdmean': 'mean dew point temperature',
'vpdmin': 'minimum vapor pressure deficit',
'vpdmax': 'maximum vapor pressure deficit'
}

# Temporal coverage of the dataset.
self.date_ranges['year'] = [
datetime.date(1986, 1, 1), datetime.date(2021, 1, 1)
]
self.date_ranges['month'] = [
None, None
]
self.date_ranges['day'] = [
None, None
]

# Temporal resolution.
self.temporal_resolution['year'] = '1 year'

# File name patterns for each PRISM variable. Note that for
# precipation data, the current version of "M2" for years < 1981 and
# "M3" for years >= 1981. See
# https://prism.oregonstate.edu/documents/PRISM_datasets.pdf for
# details.
self.fpatterns = 'PRISM_{0}_stable_4km{1}_{2}_bil.bil'

def getData(
self, varname, date_grain, request_date, ri_method, subset_geom=None
):
"""
varname: The variable to return.
date_grain: The date granularity to return, specified as a constant in
data_request.
request_date: A data_request.RequestDate instance.
ri_method: The resample/interpolation method to use, if needed.
subset_geom: An instance of SubsetGeom. If the CRS does not match the
dataset, an exception is raised.
"""
# Get the path to the required data file.
if date_grain == dr.ANNUAL:
fname = self.fpatterns.format(varname, 'M3', request_date.year)
elif date_grain == dr.MONTHLY:
datestr = '{0}{1:02}'.format(request_date.year, request_date.month)
fname = self.fpatterns.format(varname, 'M3', datestr)
elif date_grain == dr.DAILY:
datestr = '{0}{1:02}{2:02}'.format(
request_date.year, request_date.month, request_date.day
)
fname = self.fpatterns.format(varname, 'D2', datestr)
else:
raise ValueError('Invalid date grain specification.')

# For precipation data, the current version of "M2" for years < 1981
# and "M3" for years >= 1981. See
# https://prism.oregonstate.edu/documents/PRISM_datasets.pdf for
# details.
if (request_date.year < 1981 and date_grain is not dr.DAILY and
varname == 'ppt'
):
fname = fname.replace('M3', 'M2')

fpath = self.ds_path / fname

# Open data file
data = rioxarray.open_rasterio(fpath, masked=True)

if subset_geom is not None and not(self.crs.equals(subset_geom.crs)):
raise ValueError(
'Subset geometry CRS does not match dataset CRS.'
)

if isinstance(subset_geom, SubsetPolygon):
# Drop unnecessary 'band' dimension because rioxarray
# can't handle >3 dimensions in some later operations
data = data.squeeze('band')

data = data.rio.clip([subset_geom.json], all_touched = True)
elif isinstance(subset_geom, SubsetMultiPoint):
# Interpolate all (x,y) points in the subset geometry. For more
# information about how/why this works, see
# https://xarray.pydata.org/en/stable/user-guide/interpolation.html#advanced-interpolation.
res = data.interp(
x=('z', subset_geom.geom.x), y=('z', subset_geom.geom.y),
method=ri_method
)
data = res.values[0]

return data

0 comments on commit 3859ab1

Please sign in to comment.