diff --git a/sup3r/bias/base.py b/sup3r/bias/base.py index a9f6ef940..d03a2cb0f 100644 --- a/sup3r/bias/base.py +++ b/sup3r/bias/base.py @@ -541,9 +541,9 @@ def _match_zero_rate(bias_data, base_data): Parameters ---------- - bias_data : T_Array + bias_data : Union[np.ndarray, da.core.Array] 1D array of biased data observations. - base_data : T_Array + base_data : Union[np.ndarray, da.core.Array] 1D array of base data observations. Returns diff --git a/sup3r/bias/bias_transforms.py b/sup3r/bias/bias_transforms.py index 85fb4bb81..ae7de1c68 100644 --- a/sup3r/bias/bias_transforms.py +++ b/sup3r/bias/bias_transforms.py @@ -9,15 +9,16 @@ """ import logging +from typing import Union from warnings import warn +import dask.array as da import numpy as np import pandas as pd from rex.utilities.bc_utils import QuantileDeltaMapping from scipy.ndimage import gaussian_filter from sup3r.preprocessing import Rasterizer -from sup3r.typing import T_Array logger = logging.getLogger(__name__) @@ -114,7 +115,7 @@ def get_spatial_bc_factors(lat_lon, feature_name, bias_fp, threshold=0.1): def get_spatial_bc_quantiles( - lat_lon: T_Array, + lat_lon: Union[np.ndarray, da.core.Array], base_dset: str, feature_name: str, bias_fp: str, @@ -131,7 +132,7 @@ def get_spatial_bc_quantiles( Parameters ---------- - lat_lon : T_Array + lat_lon : Union[np.ndarray, da.core.Array] Array of latitudes and longitudes for the domain to bias correct (n_lats, n_lons, 2) base_dset : str @@ -480,7 +481,7 @@ def local_qdm_bc( Parameters ---------- - data : T_Array + data : Union[np.ndarray, da.core.Array] Sup3r input data to be bias corrected, assumed to be 3D with shape (spatial, spatial, temporal) for a single feature. lat_lon : np.ndarray diff --git a/sup3r/bias/utilities.py b/sup3r/bias/utilities.py index 13634196c..8f41e22fd 100644 --- a/sup3r/bias/utilities.py +++ b/sup3r/bias/utilities.py @@ -197,7 +197,7 @@ def bias_correct_feature( Returns ------- - data : T_Array + data : Union[np.ndarray, da.core.Array] Data corrected by the bias_correct_method ready for input to the forward pass through the generative model. """ diff --git a/sup3r/pipeline/forward_pass.py b/sup3r/pipeline/forward_pass.py index 0275e5e7b..81b927afc 100644 --- a/sup3r/pipeline/forward_pass.py +++ b/sup3r/pipeline/forward_pass.py @@ -143,7 +143,7 @@ def pad_source_data(self, input_data, pad_width, exo_data, mode='reflect'): Parameters ---------- - input_data : T_Array + input_data : Union[np.ndarray, da.core.Array] Source input data from data handler class, shape is: (spatial_1, spatial_2, temporal, features) pad_width : tuple @@ -158,7 +158,7 @@ def pad_source_data(self, input_data, pad_width, exo_data, mode='reflect'): Returns ------- - out : T_Array + out : Union[np.ndarray, da.core.Array] Padded copy of source input data from data handler class, shape is: (spatial_1, spatial_2, temporal, features) exo_data : dict @@ -287,7 +287,7 @@ def _reshape_data_chunk(model, data_chunk, exo_data): ---------- model : Sup3rGan Sup3rGan or similar sup3r model - data_chunk : T_Array + data_chunk : Union[np.ndarray, da.core.Array] Low resolution data for a single spatiotemporal chunk that is going to be passed to the model generate function. exo_data : dict | None @@ -296,7 +296,7 @@ def _reshape_data_chunk(model, data_chunk, exo_data): Returns ------- - data_chunk : T_Array + data_chunk : Union[np.ndarray, da.core.Array] Same as input but reshaped to (temporal, spatial_1, spatial_2, features) if the model is a spatial-first model or (n_obs, spatial_1, spatial_2, temporal, features) if the diff --git a/sup3r/pipeline/slicer.py b/sup3r/pipeline/slicer.py index 9b587a70a..cb073176a 100644 --- a/sup3r/pipeline/slicer.py +++ b/sup3r/pipeline/slicer.py @@ -326,7 +326,7 @@ def hr_crop_slices(self): list has a crop slice for each spatial dimension and temporal dimension and then slice(None) for the feature dimension. model.generate()[hr_crop_slice] gives the cropped generator output - corresponding to output_array[hr_slice] + corresponding to outpuUnion[np.ndarray, da.core.Array][hr_slice] """ if self._hr_crop_slices is None: self._hr_crop_slices = [] diff --git a/sup3r/pipeline/strategy.py b/sup3r/pipeline/strategy.py index f0a249944..b54ed45af 100644 --- a/sup3r/pipeline/strategy.py +++ b/sup3r/pipeline/strategy.py @@ -10,6 +10,7 @@ from dataclasses import dataclass from typing import Dict, Optional, Tuple, Union +import dask.array as da import numpy as np import pandas as pd @@ -27,7 +28,6 @@ get_input_handler_class, log_args, ) -from sup3r.typing import T_Array from sup3r.utilities.utilities import Timer logger = logging.getLogger(__name__) @@ -38,13 +38,13 @@ class ForwardPassChunk: """Structure storing chunk data and attributes for a specific chunk going through the generator.""" - input_data: T_Array + input_data: Union[np.ndarray, da.core.Array] exo_data: Dict hr_crop_slice: slice lr_pad_slice: slice - hr_lat_lon: T_Array + hr_lat_lon: Union[np.ndarray, da.core.Array] hr_times: pd.DatetimeIndex - gids: T_Array + gids: Union[np.ndarray, da.core.Array] out_file: str pad_width: Tuple[tuple, tuple, tuple] index: int @@ -76,14 +76,14 @@ class ForwardPassStrategy: string with a unix-style file path which will be passed through glob.glob model_kwargs : str | list - Keyword arguments to send to `model_class.load(**model_kwargs)` to + Keyword arguments to send to ``model_class.load(**model_kwargs)`` to initialize the GAN. Typically this is just the string path to the model directory, but can be multiple models or arguments for more complex models. fwp_chunk_shape : tuple Max shape (spatial_1, spatial_2, temporal) of an unpadded coarse chunk to use for a forward pass. The number of nodes that the - :class:`ForwardPassStrategy` is set to distribute to is calculated by + :class:`.ForwardPassStrategy` is set to distribute to is calculated by dividing up the total time index from all file_paths by the temporal part of this chunk shape. Each node will then be parallelized across parallel processes by the spatial chunk shape. If temporal_pad / @@ -100,8 +100,8 @@ class ForwardPassStrategy: the fwp_chunk_shape. model_class : str Name of the sup3r model class for the GAN model to load. The default is - the basic spatial / spatiotemporal Sup3rGan model. This will be loaded - from sup3r.models + the basic spatial / spatiotemporal ``Sup3rGan`` model. This will be + loaded from ``sup3r.models`` out_pattern : str Output file pattern. Must include {file_id} format key. Each output file will have a unique file_id filled in and the ext determines the @@ -109,16 +109,17 @@ class ForwardPassStrategy: and not saved. input_handler_name : str | None Class to use for input data. Provide a string name to match an - rasterizer or handler class in `sup3r.preprocessing` + rasterizer or handler class in ``sup3r.preprocessing`` input_handler_kwargs : dict | None - Any kwargs for initializing the `input_handler_name` class. + Any kwargs for initializing the ``input_handler_name`` class. exo_handler_kwargs : dict | None - Dictionary of args to pass to :class:`ExoDataHandler` for extracting - exogenous features for multistep foward pass. This should be a nested - dictionary with keys for each exogenous feature. The dictionaries - corresponding to the feature names should include the path to exogenous - data source, the resolution of the exogenous data, and how the - exogenous data should be used in the model. e.g. {'topography': + Dictionary of args to pass to + :class:`~sup3r.preprocessing.data_handlers.ExoDataHandler` for + extracting exogenous features for multistep foward pass. This should be + a nested dictionary with keys for each exogenous feature. The + dictionaries corresponding to the feature names should include the path + to exogenous data source, the resolution of the exogenous data, and how + the exogenous data should be used in the model. e.g. {'topography': {'file_paths': 'path to input files', 'source_file': 'path to exo data', 'steps': [..]}. bias_correct_method : str | None @@ -153,13 +154,13 @@ class ForwardPassStrategy: node. If 1 then all forward passes on chunks distributed to a single node will be run serially. pass_workers=2 is the minimum number of workers required to run the ForwardPass initialization and - :meth:`ForwardPass.run_chunk()` methods concurrently. + :meth:`~.forward_pass.ForwardPass.run_chunk()` methods concurrently. max_nodes : int | None Maximum number of nodes to distribute spatiotemporal chunks across. If None then a node will be used for each temporal chunk. head_node : bool Whether initialization is taking place on the head node of a multi node - job launch. When this is true :class:`ForwardPassStrategy` is only + job launch. When this is true :class:`.ForwardPassStrategy` is only partially initialized to provide the head node enough information for how to distribute jobs across nodes. Preflight tasks like bias correction will be skipped because they will be performed on the nodes diff --git a/sup3r/postprocessing/collectors/h5.py b/sup3r/postprocessing/collectors/h5.py index 29bb20eac..a2ad55f3d 100644 --- a/sup3r/postprocessing/collectors/h5.py +++ b/sup3r/postprocessing/collectors/h5.py @@ -137,7 +137,7 @@ def get_data( Returns ------- - f_data : T_Array + f_data : Union[np.ndarray, da.core.Array] Data array from the fpath cast as input dtype. row_slice : slice final_time_index[row_slice] = new_time_index diff --git a/sup3r/postprocessing/writers/base.py b/sup3r/postprocessing/writers/base.py index 94fddea36..65ce4abf3 100644 --- a/sup3r/postprocessing/writers/base.py +++ b/sup3r/postprocessing/writers/base.py @@ -205,7 +205,7 @@ def _ensure_dset_in_output(cls, out_file, dset, data=None): Pre-existing H5 file output path dset : str Dataset name - data : T_Array | None + data : Union[np.ndarray, da.core.Array] | None Optional data to write to dataset if initializing. """ diff --git a/sup3r/preprocessing/accessor.py b/sup3r/preprocessing/accessor.py index 10f19c74c..74c9379c0 100644 --- a/sup3r/preprocessing/accessor.py +++ b/sup3r/preprocessing/accessor.py @@ -23,7 +23,6 @@ parse_ellipsis, parse_to_list, ) -from sup3r.typing import T_Array logger = logging.getLogger(__name__) @@ -111,7 +110,9 @@ def parse_keys(self, keys): dim_keys = parse_ellipsis(dim_keys, dim_num=len(self._ds.dims)) return features, dict(zip(ordered_dims(self._ds.dims), dim_keys)) - def __getitem__(self, keys) -> Union[T_Array, Self]: + def __getitem__( + self, keys + ) -> Union[Union[np.ndarray, da.core.Array], Self]: """Method for accessing variables. keys can optionally include a feature name or list of feature names as the first entry of a keys tuple. @@ -164,7 +165,7 @@ def __setitem__(self, keys, data): keys to set. This can be a string like 'temperature' or a list like ``['u', 'v']``. ``data`` will be iterated over in the latter case. - data : T_Array | xr.DataArray + data : Union[np.ndarray, da.core.Array] | xr.DataArray array object used to set variable data. If ``variable`` is a list then this is expected to have a trailing dimension with length equal to the length of the list. @@ -212,7 +213,7 @@ def values(self, *args, **kwargs): ..., features)``""" return np.asarray(self.to_array(*args, **kwargs)) - def to_dataarray(self) -> T_Array: + def to_dataarray(self) -> Union[np.ndarray, da.core.Array]: """Return xr.DataArray for the contained xr.Dataset.""" if not self.features: coords = [self._ds[f] for f in Dimension.coords_2d()] @@ -390,7 +391,7 @@ def interpolate_na(self, **kwargs): return type(self)(self._ds) @staticmethod - def _needs_fancy_indexing(keys) -> T_Array: + def _needs_fancy_indexing(keys) -> Union[np.ndarray, da.core.Array]: """We use `.vindex` if keys require fancy indexing.""" where_list = [ ind for ind in keys if isinstance(ind, np.ndarray) and ind.ndim > 0 @@ -444,7 +445,9 @@ def add_dims_to_data_vars(self, vals): new_vals[k] = v return new_vals - def assign(self, vals: Dict[str, Union[T_Array, tuple]]): + def assign( + self, vals: Dict[str, Union[Union[np.ndarray, da.core.Array], tuple]] + ): """Override xarray assign and assign_coords methods to enable update without explicitly providing dimensions if variable already exists. @@ -506,7 +509,7 @@ def time_step(self): return float(mode(sec_diff, keepdims=False).mode) @property - def lat_lon(self) -> T_Array: + def lat_lon(self) -> Union[np.ndarray, da.core.Array]: """Base lat lon for contained data.""" coords = [self._ds[d] for d in Dimension.coords_2d()] return self._stack_features(coords) diff --git a/sup3r/preprocessing/batch_queues/base.py b/sup3r/preprocessing/batch_queues/base.py index a2c558fe6..ccba5deb0 100644 --- a/sup3r/preprocessing/batch_queues/base.py +++ b/sup3r/preprocessing/batch_queues/base.py @@ -40,7 +40,7 @@ def transform( Parameters ---------- - samples : T_Array + samples : Union[np.ndarray, da.core.Array] High resolution batch of samples. 4D | 5D array (batch_size, spatial_1, spatial_2, features) @@ -60,11 +60,11 @@ def transform( Returns ------- - low_res : T_Array + low_res : Union[np.ndarray, da.core.Array] 4D | 5D array (batch_size, spatial_1, spatial_2, features) (batch_size, spatial_1, spatial_2, temporal, features) - high_res : T_Array + high_res : Union[np.ndarray, da.core.Array] 4D | 5D array (batch_size, spatial_1, spatial_2, features) (batch_size, spatial_1, spatial_2, temporal, features) diff --git a/sup3r/preprocessing/batch_queues/conditional.py b/sup3r/preprocessing/batch_queues/conditional.py index c9691b0db..63b6ffbfe 100644 --- a/sup3r/preprocessing/batch_queues/conditional.py +++ b/sup3r/preprocessing/batch_queues/conditional.py @@ -98,14 +98,14 @@ def make_mask(self, high_res): Parameters ---------- - high_res : T_Array + high_res : Union[np.ndarray, da.core.Array] 4D | 5D array (batch_size, spatial_1, spatial_2, features) (batch_size, spatial_1, spatial_2, temporal, features) Returns ------- - mask: T_Array + mask: Union[np.ndarray, da.core.Array] 4D | 5D array (batch_size, spatial_1, spatial_2, features) (batch_size, spatial_1, spatial_2, temporal, features) @@ -136,7 +136,7 @@ def make_output(self, samples): Parameters ---------- - samples : Tuple[T_Array, T_Array] + samples : Tuple[Union[np.ndarray, da.core.Array], ...] Tuple of low_res, high_res. Each array is: 4D | 5D array (batch_size, spatial_1, spatial_2, features) @@ -144,7 +144,7 @@ def make_output(self, samples): Returns ------- - output: T_Array + output: Union[np.ndarray, da.core.Array] 4D | 5D array (batch_size, spatial_1, spatial_2, features) (batch_size, spatial_1, spatial_2, temporal, features) @@ -187,7 +187,7 @@ def make_output(self, samples): """ Returns ------- - SF: T_Array + SF: Union[np.ndarray, da.core.Array] 4D | 5D array (batch_size, spatial_1, spatial_2, features) (batch_size, spatial_1, spatial_2, temporal, features) @@ -214,7 +214,7 @@ def make_output(self, samples): """ Returns ------- - (HR - )**2: T_Array + (HR - )**2: Union[np.ndarray, da.core.Array] 4D | 5D array (batch_size, spatial_1, spatial_2, features) (batch_size, spatial_1, spatial_2, temporal, features) @@ -236,7 +236,7 @@ def make_output(self, samples): """ Returns ------- - HR**2: T_Array + HR**2: Union[np.ndarray, da.core.Array] 4D | 5D array (batch_size, spatial_1, spatial_2, features) (batch_size, spatial_1, spatial_2, temporal, features) @@ -253,7 +253,7 @@ def make_output(self, samples): """ Returns ------- - (SF - )**2: T_Array + (SF - )**2: Union[np.ndarray, da.core.Array] 4D | 5D array (batch_size, spatial_1, spatial_2, features) (batch_size, spatial_1, spatial_2, temporal, features) @@ -281,7 +281,7 @@ def make_output(self, samples): """ Returns ------- - SF**2: T_Array + SF**2: Union[np.ndarray, da.core.Array] 4D | 5D array (batch_size, spatial_1, spatial_2, features) (batch_size, spatial_1, spatial_2, temporal, features) diff --git a/sup3r/preprocessing/batch_queues/utilities.py b/sup3r/preprocessing/batch_queues/utilities.py index e4589adf8..59f0a99e6 100644 --- a/sup3r/preprocessing/batch_queues/utilities.py +++ b/sup3r/preprocessing/batch_queues/utilities.py @@ -14,7 +14,7 @@ def temporal_simple_enhancing(data, t_enhance=4, mode='constant'): Parameters ---------- - data : T_Array + data : Union[np.ndarray, da.core.Array] 5D array with dimensions (observations, spatial_1, spatial_2, temporal, features) t_enhance : int @@ -24,7 +24,7 @@ def temporal_simple_enhancing(data, t_enhance=4, mode='constant'): Returns ------- - enhanced_data : T_Array + enhanced_data : Union[np.ndarray, da.core.Array] 5D array with same dimensions as data with new enhanced resolution """ @@ -59,7 +59,7 @@ def smooth_data(low_res, training_features, smoothing_ignore, smoothing=None): Parameters ---------- - low_res : T_Array + low_res : Union[np.ndarray, da.core.Array] 4D | 5D array (batch_size, spatial_1, spatial_2, features) (batch_size, spatial_1, spatial_2, temporal, features) @@ -77,7 +77,7 @@ def smooth_data(low_res, training_features, smoothing_ignore, smoothing=None): Returns ------- - low_res : T_Array + low_res : Union[np.ndarray, da.core.Array] 4D | 5D array (batch_size, spatial_1, spatial_2, features) (batch_size, spatial_1, spatial_2, temporal, features) @@ -108,7 +108,7 @@ def spatial_simple_enhancing(data, s_enhance=2, obs_axis=True): Parameters ---------- - data : T_Array + data : Union[np.ndarray, da.core.Array] 5D | 4D | 3D array with dimensions: (n_obs, spatial_1, spatial_2, temporal, features) (obs_axis=True) (n_obs, spatial_1, spatial_2, features) (obs_axis=True) @@ -122,7 +122,7 @@ def spatial_simple_enhancing(data, s_enhance=2, obs_axis=True): Returns ------- - enhanced_data : T_Array + enhanced_data : Union[np.ndarray, da.core.Array] 3D | 4D | 5D array with same dimensions as data with new enhanced resolution """ diff --git a/sup3r/preprocessing/data_handlers/exo.py b/sup3r/preprocessing/data_handlers/exo.py index 70507ba69..b38f7ed53 100644 --- a/sup3r/preprocessing/data_handlers/exo.py +++ b/sup3r/preprocessing/data_handlers/exo.py @@ -34,7 +34,7 @@ def __init__(self, feature, combine_type, model, data): Specifies the model index which will use the `data`. For example, if ``model`` == 1 then the ``data`` will be used according to `combine_type` in the 2nd model step in a MultiStepGan. - data : T_Array + data : Union[np.ndarray, da.core.Array] The data to be used for the given model step. """ step = {'model': model, 'combine_type': combine_type, 'data': data} diff --git a/sup3r/preprocessing/data_handlers/nc_cc.py b/sup3r/preprocessing/data_handlers/nc_cc.py index 5443f9c82..632e66005 100644 --- a/sup3r/preprocessing/data_handlers/nc_cc.py +++ b/sup3r/preprocessing/data_handlers/nc_cc.py @@ -155,7 +155,7 @@ def get_clearsky_ghi(self): Returns ------- - cs_ghi : T_Array + cs_ghi : Union[np.ndarray, da.core.Array] Clearsky ghi (W/m2) from the nsrdb_source_fp h5 source file. Data shape is (lat, lon, time) where time is daily average values. """ diff --git a/sup3r/preprocessing/derivers/base.py b/sup3r/preprocessing/derivers/base.py index 3558559ab..4d2171bce 100644 --- a/sup3r/preprocessing/derivers/base.py +++ b/sup3r/preprocessing/derivers/base.py @@ -17,7 +17,6 @@ _rechunk_if_dask, parse_to_list, ) -from sup3r.typing import T_Array from sup3r.utilities.interpolation import Interpolator from .methods import DerivedFeature, RegistryBase @@ -105,7 +104,9 @@ def no_overlap(self, feature): """Check if any of the nested inputs for 'feature' contain 'feature'""" return feature not in self.get_inputs(feature) - def check_registry(self, feature) -> Union[T_Array, str, None]: + def check_registry( + self, feature + ) -> Union[np.ndarray, da.core.Array, str, None]: """Get compute method from the registry if available. Will check for pattern feature match in feature registry. e.g. if u_100m matches a feature registry entry of u_(.*)m @@ -175,11 +176,14 @@ def map_new_name(self, feature, pattern): raise RuntimeError(msg) logger.debug( 'Found alternative name "%s" for "%s". Continuing compute method ' - 'search for %s.', feature, new_feature, new_feature + 'search for %s.', + feature, + new_feature, + new_feature, ) return new_feature - def derive(self, feature) -> T_Array: + def derive(self, feature) -> Union[np.ndarray, da.core.Array]: """Routine to derive requested features. Employs a little recursion to locate differently named features with a name map in the feature registry. i.e. if `FEATURE_REGISTRY` contains a key, value pair like diff --git a/sup3r/preprocessing/derivers/utilities.py b/sup3r/preprocessing/derivers/utilities.py index 77fe9837e..b08a942cd 100644 --- a/sup3r/preprocessing/derivers/utilities.py +++ b/sup3r/preprocessing/derivers/utilities.py @@ -75,24 +75,24 @@ def transform_rotate_wind(ws, wd, lat_lon): Parameters ---------- - ws : T_Array + ws : Union[np.ndarray, da.core.Array] 3D array of high res windspeed data (spatial_1, spatial_2, temporal) - wd : T_Array + wd : Union[np.ndarray, da.core.Array] 3D array of high res winddirection data. Angle is in degrees and measured relative to the south_north direction. (spatial_1, spatial_2, temporal) - lat_lon : T_Array + lat_lon : Union[np.ndarray, da.core.Array] 3D array of lat lon (spatial_1, spatial_2, 2) Last dimension has lat / lon in that order Returns ------- - u : T_Array + u : Union[np.ndarray, da.core.Array] 3D array of high res U data (spatial_1, spatial_2, temporal) - v : T_Array + v : Union[np.ndarray, da.core.Array] 3D array of high res V data (spatial_1, spatial_2, temporal) """ @@ -132,23 +132,23 @@ def invert_uv(u, v, lat_lon): Parameters ---------- - u : T_Array + u : Union[np.ndarray, da.core.Array] 3D array of high res U data (spatial_1, spatial_2, temporal) - v : T_Array + v : Union[np.ndarray, da.core.Array] 3D array of high res V data (spatial_1, spatial_2, temporal) - lat_lon : T_Array + lat_lon : Union[np.ndarray, da.core.Array] 3D array of lat lon (spatial_1, spatial_2, 2) Last dimension has lat / lon in that order Returns ------- - ws : T_Array + ws : Union[np.ndarray, da.core.Array] 3D array of high res windspeed data (spatial_1, spatial_2, temporal) - wd : T_Array + wd : Union[np.ndarray, da.core.Array] 3D array of high res winddirection data. Angle is in degrees and measured relative to the south_north direction. (spatial_1, spatial_2, temporal) diff --git a/sup3r/preprocessing/samplers/base.py b/sup3r/preprocessing/samplers/base.py index 84e9d314f..aead91b8b 100644 --- a/sup3r/preprocessing/samplers/base.py +++ b/sup3r/preprocessing/samplers/base.py @@ -1,10 +1,10 @@ -"""Abstract sampler objects. These are containers which also can sample from -the underlying data. These interface with Batchers so they also have additional -information about how different features are used by models.""" +"""Basic ``Sampler`` objects. These are containers which also can sample from +the underlying data. These interface with ``BatchQueues`` so they also have +additional information about how different features are used by models.""" import logging from fnmatch import fnmatch -from typing import Dict, Optional, Tuple, Union +from typing import Dict, Optional, Tuple from warnings import warn import dask.array as da @@ -16,7 +16,6 @@ uniform_time_sampler, ) from sup3r.preprocessing.utilities import log_args, lowered -from sup3r.typing import T_Array logger = logging.getLogger(__name__) @@ -37,7 +36,7 @@ def __init__( Parameters ---------- data: Union[Sup3rX, Sup3rDataset], - Object with data that will be sampled from. Usually the `.data` + Object with data that will be sampled from. Usually the ``.data`` attribute of various :class:`~sup3r.preprocessing.base.Container` objects. i.e. :class:`~sup3r.preprocessing.loaders.Loader`, :class:`~sup3r.preprocessing.rasterizers.Rasterizer`, @@ -130,7 +129,8 @@ def preflight(self): 'the raw data. This prevents us from building batches from ' 'a single sample with n_time_steps = sample_shape[2] * batch_size ' 'which is far more performant than building batches n_samples = ' - 'batch_size, each with n_time_steps = sample_shape[2].') + 'batch_size, each with n_time_steps = sample_shape[2].' + ) if self.data.shape[2] < self.sample_shape[2] * self.batch_size: logger.warning(msg) warn(msg) @@ -173,7 +173,7 @@ def _reshape_samples(self, samples): Parameters ---------- - samples : T_Array + samples : Union[np.ndarray, da.core.Array] Selection from `self.data` with shape: (samp_shape[0], samp_shape[1], batch_size * samp_shape[2], n_feats) This is reshaped to: @@ -209,7 +209,8 @@ def _stack_samples(self, samples): Parameters ---------- - samples : Tuple[List[T_Array], List[T_Array]] | List[T_Array] + samples : Tuple[List[np.ndarray | da.core.Array], ...] | + List[np.ndarray | da.core.Array] Each list has length = batch_size and each array has shape: (samp_shape[0], samp_shape[1], samp_shape[2], n_feats) @@ -227,9 +228,7 @@ def _stack_samples(self, samples): def _fast_batch(self): """Get batch of samples with adjacent time slices.""" - out = self.data.sample( - self.get_sample_index(n_obs=self.batch_size) - ) + out = self.data.sample(self.get_sample_index(n_obs=self.batch_size)) if isinstance(out, tuple): return tuple(self._reshape_samples(o) for o in out) return self._reshape_samples(out) @@ -245,10 +244,18 @@ def _slow_batch(self): def _fast_batch_possible(self): return self.batch_size * self.sample_shape[2] <= self.data.shape[2] - def __next__(self) -> Union[T_Array, Tuple[T_Array, T_Array]]: + def __next__(self): """Get next batch of samples. This retrieves n_samples = batch_size with shape = sample_shape from the `.data` (a xr.Dataset or - Sup3rDataset) through the Sup3rX accessor.""" + Sup3rDataset) through the Sup3rX accessor. + + Returns + ------- + samples : tuple(np.ndarray | da.core.Array) | np.ndarray | da.core.Array + Either a tuple or single array of samples. This is a tuple when + this method is sampling from a ``Sup3rDataset`` with two data + members + """ # pylint: disable=line-too-long # noqa if self._fast_batch_possible(): return self._fast_batch() return self._slow_batch() diff --git a/sup3r/preprocessing/samplers/cc.py b/sup3r/preprocessing/samplers/cc.py index 77117176c..801a8f950 100644 --- a/sup3r/preprocessing/samplers/cc.py +++ b/sup3r/preprocessing/samplers/cc.py @@ -19,13 +19,13 @@ class DualSamplerCC(DualSampler): Note ---- - This will always give daily / hourly data if `t_enhance != 1`. The number + This will always give daily / hourly data if ``t_enhance != 1``. The number of days / hours in the samples is determined by t_enhance. For example, if - `t_enhance = 8` and `sample_shape = (..., 24)` there will be 3 days in the - low res sample: `lr_sample_shape = (..., 3)`. If `t_enhance != 24` and > 1 - :meth:`reduce_high_res_sub_daily` will be used to reduce a high res sample - shape from `(..., sample_shape[2] * 24 // t_enhance)` to `(..., - sample_shape[2])` + ``t_enhance = 8`` and ``sample_shape = (..., 24)`` there will be 3 days in + the low res sample: `lr_sample_shape = (..., 3)`. If + ``1 < t_enhance != 24`` :meth:`reduce_high_res_sub_daily` will be used to + reduce a high res sample shape from + ``(..., sample_shape[2] * 24 // t_enhance)`` to ``(..., sample_shape[2])`` """ def __init__( @@ -53,7 +53,7 @@ def __init__( Temporal enhancement factor feature_sets : Optional[dict] Optional dictionary describing how the full set of features is - split between `lr_only_features` and `hr_exo_features`. + split between ``lr_only_features`` and ``hr_exo_features``. lr_only_features : list | tuple List of feature names or patt*erns that should only be @@ -71,7 +71,7 @@ def __init__( """ msg = ( f'{self.__class__.__name__} requires a Sup3rDataset object ' - 'with `.daily` and `.hourly` data members, in that order' + 'with .daily and .hourly data members, in that order' ) assert hasattr(data, 'daily') and hasattr(data, 'hourly'), msg lr, hr = data.daily, data.hourly @@ -96,8 +96,8 @@ def __init__( ) def check_for_consistent_shapes(self): - """Make sure container shapes are compatible with enhancement - factors.""" + """Make sure container shapes and sample shapes are compatible with + enhancement factors.""" enhanced_shape = ( self.lr_data.shape[0] * self.s_enhance, self.lr_data.shape[1] * self.s_enhance, @@ -118,7 +118,7 @@ def reduce_high_res_sub_daily(self, high_res, csr_ind=0): Parameters ---------- - high_res : T_Array + high_res : Union[np.ndarray, da.core.Array] 5D array with dimensions (n_obs, spatial_1, spatial_2, temporal, n_features) where temporal >= 24 (set by the data handler). csr_ind : int @@ -127,7 +127,7 @@ def reduce_high_res_sub_daily(self, high_res, csr_ind=0): Returns ------- - high_res : T_Array + high_res : Union[np.ndarray, da.core.Array] 5D array with dimensions (n_obs, spatial_1, spatial_2, temporal, n_features) where temporal has been reduced down to the integer lr_sample_shape[2] * t_enhance. For example if hr_sample_shape[2] @@ -136,11 +136,11 @@ def reduce_high_res_sub_daily(self, high_res, csr_ind=0): Note ---- - This only does something when `1 < t_enhance < 24.` If t_enhance = 24 - there is no need for reduction since every daily time step will have 24 - hourly time steps in the high_res batch data. Of course, if t_enhance = - 1, we are running for a spatial only model so this routine is - unnecessary. + This only does something when ``1 < t_enhance < 24.`` If + ``t_enhance = 24`` there is no need for reduction since every daily + time step will have 24 hourly time steps in the high_res batch data. + Of course, if ``t_enhance = 1``, we are running for a spatial only + model so this routine is unnecessary. *Needs review from @grantbuster """ diff --git a/sup3r/preprocessing/samplers/dc.py b/sup3r/preprocessing/samplers/dc.py index 24cf40ce5..70c03ffa5 100644 --- a/sup3r/preprocessing/samplers/dc.py +++ b/sup3r/preprocessing/samplers/dc.py @@ -4,6 +4,9 @@ import logging from typing import Dict, List, Optional, Union +import dask.array as da +import numpy as np + from sup3r.preprocessing.accessor import Sup3rX from sup3r.preprocessing.base import Sup3rDataset from sup3r.preprocessing.samplers.base import Sampler @@ -13,7 +16,6 @@ weighted_box_sampler, weighted_time_sampler, ) -from sup3r.typing import T_Array logger = logging.getLogger(__name__) @@ -28,8 +30,12 @@ def __init__( sample_shape: Optional[tuple] = None, batch_size: int = 16, feature_sets: Optional[Dict] = None, - spatial_weights: Optional[Union[T_Array, List]] = None, - temporal_weights: Optional[Union[T_Array, List]] = None, + spatial_weights: Optional[ + Union[np.ndarray, da.core.Array, List] + ] = None, + temporal_weights: Optional[ + Union[np.ndarray, da.core.Array, List] + ] = None, ): """ Parameters @@ -51,12 +57,12 @@ def __init__( Optional dictionary describing how the full set of features is split between `lr_only_features` and `hr_exo_features`. See :class:`~sup3r.preprocessing.Sampler` - spatial_weights : T_Array | List | None + spatial_weights : Union[np.ndarray, da.core.Array] | List | None Set of weights used to initialize the spatial sampling. e.g. If we want to start off sampling across 2 spatial bins evenly this should be [0.5, 0.5]. During training these weights will be updated based only performance across the bins associated with these weights. - temporal_weights : T_Array | List | None + temporal_weights : Union[np.ndarray, da.core.Array] | List | None Set of weights used to initialize the temporal sampling. e.g. If we want to start off sampling only the first season of the year this should be [1, 0, 0, 0]. During training these weights will be diff --git a/sup3r/preprocessing/samplers/utilities.py b/sup3r/preprocessing/samplers/utilities.py index c86d5d3b5..9cee4bcc2 100644 --- a/sup3r/preprocessing/samplers/utilities.py +++ b/sup3r/preprocessing/samplers/utilities.py @@ -169,7 +169,7 @@ def daily_time_sampler(data, shape, time_index): Parameters ---------- - data : T_Array + data : Union[np.ndarray, da.core.Array] Data array with dimensions (spatial_1, spatial_2, temporal, features) shape : int @@ -260,7 +260,7 @@ def nsrdb_reduce_daily_data(data, shape, csr_ind=0): Parameters ---------- - data : T_Array + data : Union[np.ndarray, da.core.Array] 5D data array, where [..., csr_ind] is assumed to be clearsky ratio with NaN at night. (n_obs, spatial_1, spatial_2, temporal, features) @@ -273,7 +273,7 @@ def nsrdb_reduce_daily_data(data, shape, csr_ind=0): Returns ------- - data : T_Array + data : Union[np.ndarray, da.core.Array] Same as input but with axis=3 reduced to dailylight hours with requested shape. """ diff --git a/sup3r/qa/qa.py b/sup3r/qa/qa.py index e62af72a2..334b717f4 100644 --- a/sup3r/qa/qa.py +++ b/sup3r/qa/qa.py @@ -275,7 +275,7 @@ def get_dset_out(self, name): Returns ------- - out : T_Array + out : Union[np.ndarray, da.core.Array] A copy of the high-resolution output data as a numpy array of shape (spatial_1, spatial_2, temporal) """ @@ -305,13 +305,13 @@ def coarsen_data(self, idf, feature, data): Feature index feature : str Feature name - data : T_Array + data : Union[np.ndarray, da.core.Array] A copy of the high-resolution output data as a numpy array of shape (spatial_1, spatial_2, temporal) Returns ------- - data : T_Array + data : Union[np.ndarray, da.core.Array] A spatiotemporally coarsened copy of the input dataset, still with shape (spatial_1, spatial_2, temporal) """ @@ -388,7 +388,7 @@ def export(self, qa_fp, data, dset_name, dset_suffix=''): ---------- qa_fp : str | None Optional filepath to output QA file (only .h5 is supported) - data : T_Array + data : Union[np.ndarray, da.core.Array] An array with shape (space1, space2, time) that represents the re-coarsened synthetic data minus the source true low-res data, or another dataset of the same shape to be written to disk diff --git a/sup3r/solar/solar.py b/sup3r/solar/solar.py index 53464ae1e..ffe4b9025 100644 --- a/sup3r/solar/solar.py +++ b/sup3r/solar/solar.py @@ -158,7 +158,7 @@ def idnn(self): Returns ------- - idnn : T_Array + idnn : Union[np.ndarray, da.core.Array] 2D array of length (n_sup3r_sites, agg_factor) where the values are meta data indices from the NSRDB. """ @@ -178,7 +178,7 @@ def dist(self): Returns ------- - dist : T_Array + dist : Union[np.ndarray, da.core.Array] 2D array of length (n_sup3r_sites, agg_factor) where the values are decimal degree distances from the sup3r sites to the nsrdb nearest neighbors. @@ -204,7 +204,7 @@ def out_of_bounds(self): Returns ------- - out_of_bounds : T_Array + out_of_bounds : Union[np.ndarray, da.core.Array] 1D boolean array with length == number of sup3r GAN sites. True if the site is too far from the NSRDB. """ @@ -261,7 +261,7 @@ def clearsky_ratio(self): Returns ------- - clearsky_ratio : T_Array + clearsky_ratio : Union[np.ndarray, da.core.Array] 2D array with shape (time, sites) in UTC. """ if self._cs_ratio is None: @@ -285,7 +285,7 @@ def solar_zenith_angle(self): Returns ------- - solar_zenith_angle : T_Array + solar_zenith_angle : Union[np.ndarray, da.core.Array] 2D array with shape (time, sites) in UTC. """ if self._sza is None: @@ -299,7 +299,7 @@ def ghi(self): Returns ------- - ghi : T_Array + ghi : Union[np.ndarray, da.core.Array] 2D array with shape (time, sites) in UTC. """ if self._ghi is None: @@ -318,7 +318,7 @@ def dni(self): Returns ------- - dni : T_Array + dni : Union[np.ndarray, da.core.Array] 2D array with shape (time, sites) in UTC. """ if self._dni is None: @@ -342,7 +342,7 @@ def dhi(self): Returns ------- - dhi : T_Array + dhi : Union[np.ndarray, da.core.Array] 2D array with shape (time, sites) in UTC. """ if self._dhi is None: @@ -361,7 +361,7 @@ def cloud_mask(self): Returns ------- - cloud_mask : T_Array + cloud_mask : Union[np.ndarray, da.core.Array] 2D array with shape (time, sites) in UTC. """ return self.clearsky_ratio < self.cloud_threshold @@ -377,7 +377,7 @@ def get_nsrdb_data(self, dset): Returns ------- - out : T_Array + out : Union[np.ndarray, da.core.Array] Dataset of shape (time, sites) where time and sites correspond to the same shape as the sup3r GAN output data and if agg_factor > 1 the sites is an average across multiple NSRDB sites. diff --git a/sup3r/typing.py b/sup3r/typing.py deleted file mode 100644 index 747ecb5d3..000000000 --- a/sup3r/typing.py +++ /dev/null @@ -1,8 +0,0 @@ -"""Types used across preprocessing library.""" - -from typing import Union - -import dask -import numpy as np - -T_Array = Union[np.ndarray, dask.array.core.Array] diff --git a/sup3r/utilities/interpolation.py b/sup3r/utilities/interpolation.py index c879f721c..ed8416faf 100644 --- a/sup3r/utilities/interpolation.py +++ b/sup3r/utilities/interpolation.py @@ -1,6 +1,7 @@ """Interpolator class with methods for pressure and height interpolation""" import logging +from typing import Union from warnings import warn import dask.array as da @@ -9,7 +10,6 @@ from sup3r.preprocessing.utilities import ( _compute_chunks_if_dask, ) -from sup3r.typing import T_Array from sup3r.utilities.utilities import RANDOM_GENERATOR logger = logging.getLogger(__name__) @@ -25,10 +25,10 @@ def get_level_masks(cls, lev_array, level): Parameters ---------- - var_array : T_Array + var_array : Union[np.ndarray, da.core.Array] Array of variable data, for example u-wind in a 4D array of shape (lat, lon, time, level) - lev_array : T_Array + lev_array : Union[np.ndarray, da.core.Array] Height or pressure values for the corresponding entries in var_array, in the same shape as var_array. If this is height and the requested levels are hub heights above surface, lev_array @@ -41,11 +41,11 @@ def get_level_masks(cls, lev_array, level): Returns ------- - mask1 : T_Array + mask1 : Union[np.ndarray, da.core.Array] Array of bools selecting the entries with the closest levels to the one requested. (lat, lon, time, level) - mask2 : T_Array + mask2 : Union[np.ndarray, da.core.Array] Array of bools selecting the entries with the second closest levels to the one requested. (lat, lon, time, level) @@ -106,8 +106,8 @@ def _log_interp(cls, lev_samps, var_samps, level): @classmethod def interp_to_level( cls, - lev_array: T_Array, - var_array: T_Array, + lev_array: Union[np.ndarray, da.core.Array], + var_array: Union[np.ndarray, da.core.Array], level, interp_method='linear', ): @@ -131,7 +131,7 @@ def interp_to_level( Returns ------- - out : T_Array + out : Union[np.ndarray, da.core.Array] Interpolated var_array (lat, lon, time) """ @@ -229,10 +229,10 @@ def prep_level_interp(cls, var_array, lev_array, levels): Parameters ---------- - var_array : T_Array + var_array : Union[np.ndarray, da.core.Array] Array of variable data, for example u-wind in a 4D array of shape (time, vertical, lat, lon) - lev_array : T_Array + lev_array : Union[np.ndarray, da.core.Array] Array of height or pressure values corresponding to the wrf source data in the same shape as var_array. If this is height and the requested levels are hub heights above surface, lev_array should be @@ -245,7 +245,7 @@ def prep_level_interp(cls, var_array, lev_array, levels): Returns ------- - lev_array : T_Array + lev_array : Union[np.ndarray, da.core.Array] Array of levels with noise added to mask locations. levels : list List of levels to interpolate to. diff --git a/sup3r/utilities/utilities.py b/sup3r/utilities/utilities.py index 283358e4f..a35fc78a0 100644 --- a/sup3r/utilities/utilities.py +++ b/sup3r/utilities/utilities.py @@ -85,7 +85,7 @@ def temporal_coarsening(data, t_enhance=4, method='subsample'): Parameters ---------- - data : T_Array + data : Union[np.ndarray, da.core.Array] 5D array with dimensions (observations, spatial_1, spatial_2, temporal, features) t_enhance : int @@ -97,7 +97,7 @@ def temporal_coarsening(data, t_enhance=4, method='subsample'): Returns ------- - coarse_data : T_Array + coarse_data : Union[np.ndarray, da.core.Array] 5D array with same dimensions as data with new coarse resolution """ @@ -189,7 +189,7 @@ def spatial_coarsening(data, s_enhance=2, obs_axis=True): Parameters ---------- - data : T_Array + data : Union[np.ndarray, da.core.Array] 5D | 4D | 3D | 2D array with dimensions: (n_obs, spatial_1, spatial_2, temporal, features) (obs_axis=True) (n_obs, spatial_1, spatial_2, features) (obs_axis=True) @@ -204,7 +204,7 @@ def spatial_coarsening(data, s_enhance=2, obs_axis=True): Returns ------- - data : T_Array + data : Union[np.ndarray, da.core.Array] 2D, 3D | 4D | 5D array with same dimensions as data with new coarse resolution """ @@ -309,12 +309,12 @@ def nn_fill_array(array): Parameters ---------- - array : T_Array + array : Union[np.ndarray, da.core.Array] Input array with NaN values Returns ------- - array : T_Array + array : Union[np.ndarray, da.core.Array] Output array with NaN values filled """