diff --git a/asv_bench/benchmarks/interp.py b/asv_bench/benchmarks/interp.py new file mode 100644 index 00000000000..edec6df34dd --- /dev/null +++ b/asv_bench/benchmarks/interp.py @@ -0,0 +1,54 @@ +from __future__ import absolute_import, division, print_function + +import numpy as np +import pandas as pd + +import xarray as xr + +from . import parameterized, randn, requires_dask + +nx = 3000 +long_nx = 30000000 +ny = 2000 +nt = 1000 +window = 20 + +randn_xy = randn((nx, ny), frac_nan=0.1) +randn_xt = randn((nx, nt)) +randn_t = randn((nt, )) +randn_long = randn((long_nx, ), frac_nan=0.1) + + +new_x_short = np.linspace(0.3 * nx, 0.7 * nx, 100) +new_x_long = np.linspace(0.3 * nx, 0.7 * nx, 1000) +new_y_long = np.linspace(0.1, 0.9, 1000) + + +class Interpolation(object): + def setup(self, *args, **kwargs): + self.ds = xr.Dataset( + {'var1': (('x', 'y'), randn_xy), + 'var2': (('x', 't'), randn_xt), + 'var3': (('t', ), randn_t)}, + coords={'x': np.arange(nx), + 'y': np.linspace(0, 1, ny), + 't': pd.date_range('1970-01-01', periods=nt, freq='D'), + 'x_coords': ('x', np.linspace(1.1, 2.1, nx))}) + + @parameterized(['method', 'is_short'], + (['linear', 'cubic'], [True, False])) + def time_interpolation(self, method, is_short): + new_x = new_x_short if is_short else new_x_long + self.ds.interp(x=new_x, method=method).load() + + @parameterized(['method'], + (['linear', 'nearest'])) + def time_interpolation_2d(self, method): + self.ds.interp(x=new_x_long, y=new_y_long, method=method).load() + + +class InterpolationDask(Interpolation): + def setup(self, *args, **kwargs): + requires_dask() + super(InterpolationDask, self).setup(**kwargs) + self.ds = self.ds.chunk({'t': 50}) diff --git a/doc/_static/advanced_selection_interpolation.svg b/doc/_static/advanced_selection_interpolation.svg new file mode 100644 index 00000000000..096563a604f --- /dev/null +++ b/doc/_static/advanced_selection_interpolation.svg @@ -0,0 +1,731 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + y + x + + + + + z + + + + + + + + + + + + + + + + + + + + + + + + + + + + y + x + + + + + z + + + + + + + + + Advanced indexing + Advanced interpolation + + + + diff --git a/doc/api.rst b/doc/api.rst index a528496bb6a..cb44ef82c8f 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -110,6 +110,7 @@ Indexing Dataset.isel Dataset.sel Dataset.squeeze + Dataset.interp Dataset.reindex Dataset.reindex_like Dataset.set_index @@ -263,6 +264,7 @@ Indexing DataArray.isel DataArray.sel DataArray.squeeze + DataArray.interp DataArray.reindex DataArray.reindex_like DataArray.set_index diff --git a/doc/index.rst b/doc/index.rst index dc00c548b35..7528f3cb1fa 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -40,6 +40,7 @@ Documentation * :doc:`data-structures` * :doc:`indexing` +* :doc:`interpolation` * :doc:`computation` * :doc:`groupby` * :doc:`reshaping` @@ -57,6 +58,7 @@ Documentation data-structures indexing + interpolation computation groupby reshaping diff --git a/doc/indexing.rst b/doc/indexing.rst index cec438dd2e4..a44e64e4079 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -510,7 +510,7 @@ where three elements at ``(ix, iy) = ((0, 0), (1, 1), (6, 0))`` are selected and mapped along a new dimension ``z``. If you want to add a coordinate to the new dimension ``z``, -you can supply a :py:meth:`~xarray.DataArray` with a coordinate, +you can supply a :py:class:`~xarray.DataArray` with a coordinate, .. ipython:: python diff --git a/doc/installing.rst b/doc/installing.rst index 33f01b8c770..31fc109ee2e 100644 --- a/doc/installing.rst +++ b/doc/installing.rst @@ -35,6 +35,7 @@ For netCDF and IO For accelerating xarray ~~~~~~~~~~~~~~~~~~~~~~~ +- `scipy `__: necessary to enable the interpolation features for xarray objects - `bottleneck `__: speeds up NaN-skipping and rolling window aggregations by a large factor (1.1 or later) diff --git a/doc/interpolation.rst b/doc/interpolation.rst new file mode 100644 index 00000000000..c5fd5166aeb --- /dev/null +++ b/doc/interpolation.rst @@ -0,0 +1,261 @@ +.. _interp: + +Interpolating data +================== + +.. ipython:: python + :suppress: + + import numpy as np + import pandas as pd + import xarray as xr + np.random.seed(123456) + +xarray offers flexible interpolation routines, which have a similar interface +to our :ref:`indexing `. + +.. note:: + + ``interp`` requires `scipy` installed. + + +Scalar and 1-dimensional interpolation +-------------------------------------- + +Interpolating a :py:class:`~xarray.DataArray` works mostly like labeled +indexing of a :py:class:`~xarray.DataArray`, + +.. ipython:: python + + da = xr.DataArray(np.sin(0.3 * np.arange(12).reshape(4, 3)), + [('time', np.arange(4)), + ('space', [0.1, 0.2, 0.3])]) + # label lookup + da.sel(time=3) + + # interpolation + da.interp(time=3.5) + + +Similar to the indexing, :py:meth:`~xarray.DataArray.interp` also accepts an +array-like, which gives the interpolated result as an array. + +.. ipython:: python + + # label lookup + da.sel(time=[2, 3]) + + # interpolation + da.interp(time=[2.5, 3.5]) + +.. note:: + + Currently, our interpolation only works for regular grids. + Therefore, similarly to :py:meth:`~xarray.DataArray.sel`, + only 1D coordinates along a dimension can be used as the + original coordinate to be interpolated. + + +Multi-dimensional Interpolation +------------------------------- + +Like :py:meth:`~xarray.DataArray.sel`, :py:meth:`~xarray.DataArray.interp` +accepts multiple coordinates. In this case, multidimensional interpolation +is carried out. + +.. ipython:: python + + # label lookup + da.sel(time=2, space=0.1) + + # interpolation + da.interp(time=2.5, space=0.15) + +Array-like coordinates are also accepted: + +.. ipython:: python + + # label lookup + da.sel(time=[2, 3], space=[0.1, 0.2]) + + # interpolation + da.interp(time=[1.5, 2.5], space=[0.15, 0.25]) + + +Interpolation methods +--------------------- + +We use :py:func:`scipy.interpolate.interp1d` for 1-dimensional interpolation and +:py:func:`scipy.interpolate.interpn` for multi-dimensional interpolation. + +The interpolation method can be specified by the optional ``method`` argument. + +.. ipython:: python + + da = xr.DataArray(np.sin(np.linspace(0, 2 * np.pi, 10)), dims='x', + coords={'x': np.linspace(0, 1, 10)}) + + da.plot.line('o', label='original') + da.interp(x=np.linspace(0, 1, 100)).plot.line(label='linear (default)') + da.interp(x=np.linspace(0, 1, 100), method='cubic').plot.line(label='cubic') + @savefig interpolation_sample1.png width=4in + plt.legend() + +Additional keyword arguments can be passed to scipy's functions. + +.. ipython:: python + + # fill 0 for the outside of the original coordinates. + da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={'fill_value': 0.0}) + # extrapolation + da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={'fill_value': 'extrapolate'}) + + +Advanced Interpolation +---------------------- + +:py:meth:`~xarray.DataArray.interp` accepts :py:class:`~xarray.DataArray` +as similar to :py:meth:`~xarray.DataArray.sel`, which enables us more advanced interpolation. +Based on the dimension of the new coordinate passed to :py:meth:`~xarray.DataArray.interp`, the dimension of the result are determined. + +For example, if you want to interpolate a two dimensional array along a particular dimension, as illustrated below, +you can pass two 1-dimensional :py:class:`~xarray.DataArray` s with +a common dimension as new coordinate. + +.. image:: _static/advanced_selection_interpolation.svg + :height: 200px + :width: 400 px + :alt: advanced indexing and interpolation + :align: center + +For example: + +.. ipython:: python + + da = xr.DataArray(np.sin(0.3 * np.arange(20).reshape(5, 4)), + [('x', np.arange(5)), + ('y', [0.1, 0.2, 0.3, 0.4])]) + # advanced indexing + x = xr.DataArray([0, 2, 4], dims='z') + y = xr.DataArray([0.1, 0.2, 0.3], dims='z') + da.sel(x=x, y=y) + + # advanced interpolation + x = xr.DataArray([0.5, 1.5, 2.5], dims='z') + y = xr.DataArray([0.15, 0.25, 0.35], dims='z') + da.interp(x=x, y=y) + +where values on the original coordinates +``(x, y) = ((0.5, 0.15), (1.5, 0.25), (2.5, 0.35))`` are obtained by the +2-dimensional interpolation and mapped along a new dimension ``z``. + +If you want to add a coordinate to the new dimension ``z``, you can supply +:py:class:`~xarray.DataArray` s with a coordinate, + +.. ipython:: python + + x = xr.DataArray([0.5, 1.5, 2.5], dims='z', coords={'z': ['a', 'b','c']}) + y = xr.DataArray([0.15, 0.25, 0.35], dims='z', + coords={'z': ['a', 'b','c']}) + da.interp(x=x, y=y) + +For the details of the advanced indexing, +see :ref:`more advanced indexing `. + + +Interpolating arrays with NaN +----------------------------- + +Our :py:meth:`~xarray.DataArray.interp` works with arrays with NaN +the same way that +`scipy.interpolate.interp1d `_ and +`scipy.interpolate.interpn `_ do. +``linear`` and ``nearest`` methods return arrays including NaN, +while other methods such as ``cubic`` or ``quadratic`` return all NaN arrays. + +.. ipython:: python + + da = xr.DataArray([0, 2, np.nan, 3, 3.25], dims='x', + coords={'x': range(5)}) + da.interp(x=[0.5, 1.5, 2.5]) + da.interp(x=[0.5, 1.5, 2.5], method='cubic') + +To avoid this, you can drop NaN by :py:meth:`~xarray.DataArray.dropna`, and +then make the interpolation + +.. ipython:: python + + dropped = da.dropna('x') + dropped + dropped.interp(x=[0.5, 1.5, 2.5], method='cubic') + +If NaNs are distributed rondomly in your multidimensional array, +dropping all the columns containing more than one NaNs by +:py:meth:`~xarray.DataArray.dropna` may lose a significant amount of information. +In such a case, you can fill NaN by :py:meth:`~xarray.DataArray.interpolate_na`, +which is similar to :py:meth:`pandas.Series.interpolate`. + +.. ipython:: python + + filled = da.interpolate_na(dim='x') + filled + +This fills NaN by interpolating along the specified dimension. +After filling NaNs, you can interpolate: + +.. ipython:: python + + filled.interp(x=[0.5, 1.5, 2.5], method='cubic') + +For the details of :py:meth:`~xarray.DataArray.interpolate_na`, +see :ref:`Missing values `. + + +Example +------- + +Let's see how :py:meth:`~xarray.DataArray.interp` works on real data. + +.. ipython:: python + + # Raw data + ds = xr.tutorial.load_dataset('air_temperature') + fig, axes = plt.subplots(ncols=2, figsize=(10, 4)) + ds.air.isel(time=0).plot(ax=axes[0]) + axes[0].set_title('Raw data') + + # Interpolated data + new_lon = np.linspace(ds.lon[0], ds.lon[-1], ds.dims['lon'] * 4) + new_lat = np.linspace(ds.lat[0], ds.lat[-1], ds.dims['lat'] * 4) + dsi = ds.interp(lat=new_lat, lon=new_lon) + dsi.air.isel(time=0).plot(ax=axes[1]) + @savefig interpolation_sample3.png width=8in + axes[1].set_title('Interpolated data') + +Our advanced interpolation can be used to remap the data to the new coordinate. +Consider the new coordinates x and z on the two dimensional plane. +The remapping can be done as follows + +.. ipython:: python + + # new coordinate + x = np.linspace(240, 300, 100) + z = np.linspace(20, 70, 100) + # relation between new and original coordinates + lat = xr.DataArray(z, dims=['z'], coords={'z': z}) + lon = xr.DataArray((x[:, np.newaxis]-270)/np.cos(z*np.pi/180)+270, + dims=['x', 'z'], coords={'x': x, 'z': z}) + + fig, axes = plt.subplots(ncols=2, figsize=(10, 4)) + ds.air.isel(time=0).plot(ax=axes[0]) + # draw the new coordinate on the original coordinates. + for idx in [0, 33, 66, 99]: + axes[0].plot(lon.isel(x=idx), lat, '--k') + for idx in [0, 33, 66, 99]: + axes[0].plot(*xr.broadcast(lon.isel(z=idx), lat.isel(z=idx)), '--k') + axes[0].set_title('Raw data') + + dsi = ds.interp(lon=lon, lat=lat) + dsi.air.isel(time=0).plot(ax=axes[1]) + @savefig interpolation_sample4.png width=8in + axes[1].set_title('Remapped data') diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 5e5da295186..e33aaef7303 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -38,6 +38,15 @@ Enhancements - Plot labels now make use of metadata that follow CF conventions. By `Deepak Cherian `_ and `Ryan Abernathey `_. +- :py:meth:`~xarray.DataArray.interp` and :py:meth:`~xarray.Dataset.interp` + methods are newly added. + See :ref:`interpolating values with interp` for the detail. + (:issue:`2079`) + By `Keisuke Fujii `_. + +- `:py:meth:`~DataArray.dot` and :py:func:`~dot` are partly supported with older + dask<0.17.4. (related to :issue:`2203`) + By `Keisuke Fujii `_. + Bug fixes ~~~~~~~~~ diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 6a49610cb7b..9b251bb2c4b 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -10,7 +10,7 @@ import numpy as np -from . import duck_array_ops, utils, dtypes +from . import duck_array_ops, utils from .alignment import deep_align from .merge import expand_and_merge_variables from .pycompat import OrderedDict, dask_array_type, basestring diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index fd2b49cc08a..4129a3c5f26 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -906,10 +906,54 @@ def reindex(self, indexers=None, method=None, tolerance=None, copy=True, indexers=indexers, method=method, tolerance=tolerance, copy=copy) return self._from_temp_dataset(ds) + def interp(self, coords=None, method='linear', assume_sorted=False, + kwargs={}, **coords_kwargs): + """ Multidimensional interpolation of variables. + + coords : dict, optional + Mapping from dimension names to the new coordinates. + new coordinate can be an scalar, array-like or DataArray. + If DataArrays are passed as new coordates, their dimensions are + used for the broadcasting. + method: {'linear', 'nearest'} for multidimensional array, + {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic'} + for 1-dimensional array. + assume_sorted: boolean, optional + If False, values of x can be in any order and they are sorted + first. If True, x has to be an array of monotonically increasing + values. + kwargs: dictionary + Additional keyword passed to scipy's interpolator. + **coords_kwarg : {dim: coordinate, ...}, optional + The keyword arguments form of ``coords``. + One of coords or coords_kwargs must be provided. + + Returns + ------- + interpolated: xr.DataArray + New dataarray on the new coordinates. + + Note + ---- + scipy is required. + + See Also + -------- + scipy.interpolate.interp1d + scipy.interpolate.interpn + """ + if self.dtype.kind not in 'uifc': + raise TypeError('interp only works for a numeric type array. ' + 'Given {}.'.format(self.dtype)) + + ds = self._to_temp_dataset().interp( + coords, method=method, kwargs=kwargs, assume_sorted=assume_sorted, + **coords_kwargs) + return self._from_temp_dataset(ds) + def rename(self, new_name_or_name_dict=None, **names): """Returns a new DataArray with renamed coordinates or a new name. - Parameters ---------- new_name_or_name_dict : str or dict-like, optional diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 08f5f70d72b..90712c953da 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1318,7 +1318,7 @@ def _validate_indexers(self, indexers): # all indexers should be int, slice, np.ndarrays, or Variable indexers_list = [] for k, v in iteritems(indexers): - if isinstance(v, integer_types + (slice, Variable)): + if isinstance(v, (slice, Variable)): pass elif isinstance(v, DataArray): v = v.variable @@ -1328,6 +1328,14 @@ def _validate_indexers(self, indexers): raise TypeError('cannot use a Dataset as an indexer') else: v = np.asarray(v) + if v.ndim == 0: + v = as_variable(v) + elif v.ndim == 1: + v = as_variable((k, v)) + else: + raise IndexError( + "Unlabeled multi-dimensional array cannot be " + "used for indexing: {}".format(k)) indexers_list.append((k, v)) return indexers_list @@ -1806,6 +1814,85 @@ def reindex(self, indexers=None, method=None, tolerance=None, copy=True, coord_names.update(indexers) return self._replace_vars_and_dims(variables, coord_names) + def interp(self, coords=None, method='linear', assume_sorted=False, + kwargs={}, **coords_kwargs): + """ Multidimensional interpolation of Dataset. + + Parameters + ---------- + coords : dict, optional + Mapping from dimension names to the new coordinates. + New coordinate can be a scalar, array-like or DataArray. + If DataArrays are passed as new coordates, their dimensions are + used for the broadcasting. + method: string, optional. + {'linear', 'nearest'} for multidimensional array, + {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic'} + for 1-dimensional array. 'linear' is used by default. + assume_sorted: boolean, optional + If False, values of coordinates that are interpolated over can be + in any order and they are sorted first. If True, interpolated + coordinates are assumed to be an array of monotonically increasing + values. + kwargs: dictionary, optional + Additional keyword passed to scipy's interpolator. + **coords_kwarg : {dim: coordinate, ...}, optional + The keyword arguments form of ``coords``. + One of coords or coords_kwargs must be provided. + + Returns + ------- + interpolated: xr.Dataset + New dataset on the new coordinates. + + Note + ---- + scipy is required. + + See Also + -------- + scipy.interpolate.interp1d + scipy.interpolate.interpn + """ + from . import missing + + coords = either_dict_or_kwargs(coords, coords_kwargs, 'rename') + indexers = OrderedDict(self._validate_indexers(coords)) + + obj = self if assume_sorted else self.sortby([k for k in coords]) + + def maybe_variable(obj, k): + # workaround to get variable for dimension without coordinate. + try: + return obj._variables[k] + except KeyError: + return as_variable((k, range(obj.dims[k]))) + + variables = OrderedDict() + for name, var in iteritems(obj._variables): + if name not in indexers: + if var.dtype.kind in 'uifc': + var_indexers = {k: (maybe_variable(obj, k), v) for k, v + in indexers.items() if k in var.dims} + variables[name] = missing.interp( + var, var_indexers, method, **kwargs) + elif all(d not in indexers for d in var.dims): + # keep unrelated object array + variables[name] = var + + coord_names = set(variables).intersection(obj._coord_names) + selected = obj._replace_vars_and_dims(variables, + coord_names=coord_names) + # attach indexer as coordinate + variables.update(indexers) + # Extract coordinates from indexers + coord_vars = selected._get_indexers_coordinates(coords) + variables.update(coord_vars) + coord_names = (set(variables) + .intersection(obj._coord_names) + .union(coord_vars)) + return obj._replace_vars_and_dims(variables, coord_names=coord_names) + def rename(self, name_dict=None, inplace=False, **names): """Returns a new object with renamed variables and dimensions. diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 0da6750f5bc..e10f37d58d8 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -10,7 +10,9 @@ from .computation import apply_ufunc from .npcompat import flip from .pycompat import iteritems -from .utils import is_scalar +from .utils import is_scalar, OrderedSet +from .variable import Variable, broadcast_variables +from .duck_array_ops import dask_array_type class BaseInterpolator(object): @@ -203,7 +205,8 @@ def interp_na(self, dim=None, use_coordinate=True, method='linear', limit=None, # method index = get_clean_interp_index(self, dim, use_coordinate=use_coordinate, **kwargs) - interpolator = _get_interpolator(method, **kwargs) + interp_class, kwargs = _get_interpolator(method, **kwargs) + interpolator = partial(func_interpolate_na, interp_class, **kwargs) arr = apply_ufunc(interpolator, index, self, input_core_dims=[[dim], [dim]], @@ -219,7 +222,7 @@ def interp_na(self, dim=None, use_coordinate=True, method='linear', limit=None, return arr -def wrap_interpolator(interpolator, x, y, **kwargs): +def func_interpolate_na(interpolator, x, y, **kwargs): '''helper function to apply interpolation along 1 dimension''' # it would be nice if this wasn't necessary, works around: # "ValueError: assignment destination is read-only" in assignment below @@ -281,29 +284,41 @@ def bfill(arr, dim=None, limit=None): kwargs=dict(n=_limit, axis=axis)).transpose(*arr.dims) -def _get_interpolator(method, **kwargs): +def _get_interpolator(method, vectorizeable_only=False, **kwargs): '''helper function to select the appropriate interpolator class - returns a partial of wrap_interpolator + returns interpolator class and keyword arguments for the class ''' interp1d_methods = ['linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial'] valid_methods = interp1d_methods + ['barycentric', 'krog', 'pchip', 'spline', 'akima'] + has_scipy = True + try: + from scipy import interpolate + except ImportError: + has_scipy = False + + # prioritize scipy.interpolate if (method == 'linear' and not - kwargs.get('fill_value', None) == 'extrapolate'): + kwargs.get('fill_value', None) == 'extrapolate' and + not vectorizeable_only): kwargs.update(method=method) interp_class = NumpyInterpolator + elif method in valid_methods: - try: - from scipy import interpolate - except ImportError: + if not has_scipy: raise ImportError( 'Interpolation with method `%s` requires scipy' % method) + if method in interp1d_methods: kwargs.update(method=method) interp_class = ScipyInterpolator + elif vectorizeable_only: + raise ValueError('{} is not a vectorizeable interpolator. ' + 'Available methods are {}'.format( + method, interp1d_methods)) elif method == 'barycentric': interp_class = interpolate.BarycentricInterpolator elif method == 'krog': @@ -320,7 +335,30 @@ def _get_interpolator(method, **kwargs): else: raise ValueError('%s is not a valid interpolator' % method) - return partial(wrap_interpolator, interp_class, **kwargs) + return interp_class, kwargs + + +def _get_interpolator_nd(method, **kwargs): + '''helper function to select the appropriate interpolator class + + returns interpolator class and keyword arguments for the class + ''' + valid_methods = ['linear', 'nearest'] + + try: + from scipy import interpolate + except ImportError: + raise ImportError( + 'Interpolation with method `%s` requires scipy' % method) + + if method in valid_methods: + kwargs.update(method=method) + interp_class = interpolate.interpn + else: + raise ValueError('%s is not a valid interpolator for interpolating ' + 'over multiple dimensions.' % method) + + return interp_class, kwargs def _get_valid_fill_mask(arr, dim, limit): @@ -332,3 +370,167 @@ def _get_valid_fill_mask(arr, dim, limit): return (arr.isnull().rolling(min_periods=1, **kw) .construct(new_dim, fill_value=False) .sum(new_dim, skipna=False)) <= limit + + +def _assert_single_chunk(var, axes): + for axis in axes: + if len(var.chunks[axis]) > 1 or var.chunks[axis][0] < var.shape[axis]: + raise NotImplementedError( + 'Chunking along the dimension to be interpolated ' + '({}) is not yet supported.'.format(axis)) + + +def _localize(var, indexes_coords): + """ Speed up for linear and nearest neighbor method. + Only consider a subspace that is needed for the interpolation + """ + indexes = {} + for dim, [x, new_x] in indexes_coords.items(): + index = x.to_index() + imin = index.get_loc(np.min(new_x.values), method='nearest') + imax = index.get_loc(np.max(new_x.values), method='nearest') + + indexes[dim] = slice(max(imin - 2, 0), imax + 2) + indexes_coords[dim] = (x[indexes[dim]], new_x) + return var.isel(**indexes), indexes_coords + + +def interp(var, indexes_coords, method, **kwargs): + """ Make an interpolation of Variable + + Parameters + ---------- + var: Variable + index_coords: + Mapping from dimension name to a pair of original and new coordinates. + Original coordinates should be sorted in strictly ascending order. + Note that all the coordinates should be Variable objects. + method: string + One of {'linear', 'nearest', 'zero', 'slinear', 'quadratic', + 'cubic'}. For multidimensional interpolation, only + {'linear', 'nearest'} can be used. + **kwargs: + keyword arguments to be passed to scipy.interpolate + + Returns + ------- + Interpolated Variable + + See Also + -------- + DataArray.interp + Dataset.interp + """ + if not indexes_coords: + return var.copy() + + # simple speed up for the local interpolation + if method in ['linear', 'nearest']: + var, indexes_coords = _localize(var, indexes_coords) + + # default behavior + kwargs['bounds_error'] = kwargs.get('bounds_error', False) + + # target dimensions + dims = list(indexes_coords) + x, new_x = zip(*[indexes_coords[d] for d in dims]) + destination = broadcast_variables(*new_x) + + # transpose to make the interpolated axis to the last position + broadcast_dims = [d for d in var.dims if d not in dims] + original_dims = broadcast_dims + dims + new_dims = broadcast_dims + list(destination[0].dims) + interped = interp_func(var.transpose(*original_dims).data, + x, destination, method, kwargs) + + result = Variable(new_dims, interped, attrs=var.attrs) + + # dimension of the output array + out_dims = OrderedSet() + for d in var.dims: + if d in dims: + out_dims.update(indexes_coords[d][1].dims) + else: + out_dims.add(d) + return result.transpose(*tuple(out_dims)) + + +def interp_func(var, x, new_x, method, kwargs): + """ + multi-dimensional interpolation for array-like. Interpolated axes should be + located in the last position. + + Parameters + ---------- + var: np.ndarray or dask.array.Array + Array to be interpolated. The final dimension is interpolated. + x: a list of 1d array. + Original coordinates. Should not contain NaN. + new_x: a list of 1d array + New coordinates. Should not contain NaN. + method: string + {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic'} for + 1-dimensional itnterpolation. + {'linear', 'nearest'} for multidimensional interpolation + **kwargs: + Optional keyword arguments to be passed to scipy.interpolator + + Returns + ------- + interpolated: array + Interpolated array + + Note + ---- + This requiers scipy installed. + + See Also + -------- + scipy.interpolate.interp1d + """ + if not x: + return var.copy() + + if len(x) == 1: + func, kwargs = _get_interpolator(method, vectorizeable_only=True, + **kwargs) + else: + func, kwargs = _get_interpolator_nd(method, **kwargs) + + if isinstance(var, dask_array_type): + import dask.array as da + + _assert_single_chunk(var, range(var.ndim - len(x), var.ndim)) + chunks = var.chunks[:-len(x)] + new_x[0].shape + drop_axis = range(var.ndim - len(x), var.ndim) + new_axis = range(var.ndim - len(x), var.ndim - len(x) + new_x[0].ndim) + return da.map_blocks(_interpnd, var, x, new_x, func, kwargs, + dtype=var.dtype, chunks=chunks, + new_axis=new_axis, drop_axis=drop_axis) + + return _interpnd(var, x, new_x, func, kwargs) + + +def _interp1d(var, x, new_x, func, kwargs): + # x, new_x are tuples of size 1. + x, new_x = x[0], new_x[0] + rslt = func(x, var, assume_sorted=True, **kwargs)(np.ravel(new_x)) + if new_x.ndim > 1: + return rslt.reshape(var.shape[:-1] + new_x.shape) + if new_x.ndim == 0: + return rslt[..., -1] + return rslt + + +def _interpnd(var, x, new_x, func, kwargs): + if len(x) == 1: + return _interp1d(var, x, new_x, func, kwargs) + + # move the interpolation axes to the start position + var = var.transpose(range(-len(x), var.ndim - len(x))) + # stack new_x to 1 vector, with reshape + xi = np.stack([x1.values.ravel() for x1 in new_x], axis=-1) + rslt = func(x, var, xi, **kwargs) + # move back the interpolation axes to the last position + rslt = rslt.transpose(range(-rslt.ndim + 1, 1)) + return rslt.reshape(rslt.shape[:-1] + new_x[0].shape) diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py new file mode 100644 index 00000000000..592854a4d1b --- /dev/null +++ b/xarray/tests/test_interp.py @@ -0,0 +1,432 @@ +from __future__ import absolute_import, division, print_function + +import numpy as np +import pytest + +import xarray as xr +from xarray.tests import assert_allclose, assert_equal, requires_scipy +from . import has_dask, has_scipy +from .test_dataset import create_test_data + +try: + import scipy +except ImportError: + pass + + +def get_example_data(case): + x = np.linspace(0, 1, 100) + y = np.linspace(0, 0.1, 30) + data = xr.DataArray( + np.sin(x[:, np.newaxis]) * np.cos(y), dims=['x', 'y'], + coords={'x': x, 'y': y, 'x2': ('x', x**2)}) + + if case == 0: + return data + elif case == 1: + return data.chunk({'y': 3}) + elif case == 2: + return data.chunk({'x': 25, 'y': 3}) + elif case == 3: + x = np.linspace(0, 1, 100) + y = np.linspace(0, 0.1, 30) + z = np.linspace(0.1, 0.2, 10) + return xr.DataArray( + np.sin(x[:, np.newaxis, np.newaxis]) * np.cos( + y[:, np.newaxis]) * z, + dims=['x', 'y', 'z'], + coords={'x': x, 'y': y, 'x2': ('x', x**2), 'z': z}) + elif case == 4: + return get_example_data(3).chunk({'z': 5}) + + +def test_keywargs(): + if not has_scipy: + pytest.skip('scipy is not installed.') + + da = get_example_data(0) + assert_equal(da.interp(x=[0.5, 0.8]), da.interp({'x': [0.5, 0.8]})) + + +@pytest.mark.parametrize('method', ['linear', 'cubic']) +@pytest.mark.parametrize('dim', ['x', 'y']) +@pytest.mark.parametrize('case', [0, 1]) +def test_interpolate_1d(method, dim, case): + if not has_scipy: + pytest.skip('scipy is not installed.') + + if not has_dask and case in [1]: + pytest.skip('dask is not installed in the environment.') + + da = get_example_data(case) + xdest = np.linspace(0.0, 0.9, 80) + + if dim == 'y' and case == 1: + with pytest.raises(NotImplementedError): + actual = da.interp(method=method, **{dim: xdest}) + pytest.skip('interpolation along chunked dimension is ' + 'not yet supported') + + actual = da.interp(method=method, **{dim: xdest}) + + # scipy interpolation for the reference + def func(obj, new_x): + return scipy.interpolate.interp1d( + da[dim], obj.data, axis=obj.get_axis_num(dim), bounds_error=False, + fill_value=np.nan, kind=method)(new_x) + + if dim == 'x': + coords = {'x': xdest, 'y': da['y'], 'x2': ('x', func(da['x2'], xdest))} + else: # y + coords = {'x': da['x'], 'y': xdest, 'x2': da['x2']} + + expected = xr.DataArray(func(da, xdest), dims=['x', 'y'], coords=coords) + assert_allclose(actual, expected) + + +@pytest.mark.parametrize('method', ['cubic', 'zero']) +def test_interpolate_1d_methods(method): + if not has_scipy: + pytest.skip('scipy is not installed.') + + da = get_example_data(0) + dim = 'x' + xdest = np.linspace(0.0, 0.9, 80) + + actual = da.interp(method=method, **{dim: xdest}) + + # scipy interpolation for the reference + def func(obj, new_x): + return scipy.interpolate.interp1d( + da[dim], obj.data, axis=obj.get_axis_num(dim), bounds_error=False, + fill_value=np.nan, kind=method)(new_x) + + coords = {'x': xdest, 'y': da['y'], 'x2': ('x', func(da['x2'], xdest))} + expected = xr.DataArray(func(da, xdest), dims=['x', 'y'], coords=coords) + assert_allclose(actual, expected) + + +@pytest.mark.parametrize('use_dask', [False, True]) +def test_interpolate_vectorize(use_dask): + if not has_scipy: + pytest.skip('scipy is not installed.') + + if not has_dask and use_dask: + pytest.skip('dask is not installed in the environment.') + + # scipy interpolation for the reference + def func(obj, dim, new_x): + shape = [s for i, s in enumerate(obj.shape) + if i != obj.get_axis_num(dim)] + for s in new_x.shape[::-1]: + shape.insert(obj.get_axis_num(dim), s) + + return scipy.interpolate.interp1d( + da[dim], obj.data, axis=obj.get_axis_num(dim), + bounds_error=False, fill_value=np.nan)(new_x).reshape(shape) + + da = get_example_data(0) + if use_dask: + da = da.chunk({'y': 5}) + + # xdest is 1d but has different dimension + xdest = xr.DataArray(np.linspace(0.1, 0.9, 30), dims='z', + coords={'z': np.random.randn(30), + 'z2': ('z', np.random.randn(30))}) + + actual = da.interp(x=xdest, method='linear') + + expected = xr.DataArray(func(da, 'x', xdest), dims=['z', 'y'], + coords={'z': xdest['z'], 'z2': xdest['z2'], + 'y': da['y'], + 'x': ('z', xdest.values), + 'x2': ('z', func(da['x2'], 'x', xdest))}) + assert_allclose(actual, expected.transpose('z', 'y')) + + # xdest is 2d + xdest = xr.DataArray(np.linspace(0.1, 0.9, 30).reshape(6, 5), + dims=['z', 'w'], + coords={'z': np.random.randn(6), + 'w': np.random.randn(5), + 'z2': ('z', np.random.randn(6))}) + + actual = da.interp(x=xdest, method='linear') + + expected = xr.DataArray( + func(da, 'x', xdest), + dims=['z', 'w', 'y'], + coords={'z': xdest['z'], 'w': xdest['w'], 'z2': xdest['z2'], + 'y': da['y'], 'x': (('z', 'w'), xdest), + 'x2': (('z', 'w'), func(da['x2'], 'x', xdest))}) + assert_allclose(actual, expected.transpose('z', 'w', 'y')) + + +@pytest.mark.parametrize('case', [3, 4]) +def test_interpolate_nd(case): + if not has_scipy: + pytest.skip('scipy is not installed.') + + if not has_dask and case == 4: + pytest.skip('dask is not installed in the environment.') + + da = get_example_data(case) + + # grid -> grid + xdest = np.linspace(0.1, 1.0, 11) + ydest = np.linspace(0.0, 0.2, 10) + actual = da.interp(x=xdest, y=ydest, method='linear') + + # linear interpolation is separateable + expected = da.interp(x=xdest, method='linear') + expected = expected.interp(y=ydest, method='linear') + assert_allclose(actual.transpose('x', 'y', 'z'), + expected.transpose('x', 'y', 'z')) + + # grid -> 1d-sample + xdest = xr.DataArray(np.linspace(0.1, 1.0, 11), dims='y') + ydest = xr.DataArray(np.linspace(0.0, 0.2, 11), dims='y') + actual = da.interp(x=xdest, y=ydest, method='linear') + + # linear interpolation is separateable + expected_data = scipy.interpolate.RegularGridInterpolator( + (da['x'], da['y']), da.transpose('x', 'y', 'z').values, + method='linear', bounds_error=False, + fill_value=np.nan)(np.stack([xdest, ydest], axis=-1)) + expected = xr.DataArray( + expected_data, dims=['y', 'z'], + coords={'z': da['z'], 'y': ydest, 'x': ('y', xdest.values), + 'x2': da['x2'].interp(x=xdest)}) + assert_allclose(actual.transpose('y', 'z'), expected) + + # reversed order + actual = da.interp(y=ydest, x=xdest, method='linear') + assert_allclose(actual.transpose('y', 'z'), expected) + + +@pytest.mark.parametrize('method', ['linear']) +@pytest.mark.parametrize('case', [0, 1]) +def test_interpolate_scalar(method, case): + if not has_scipy: + pytest.skip('scipy is not installed.') + + if not has_dask and case in [1]: + pytest.skip('dask is not installed in the environment.') + + da = get_example_data(case) + xdest = 0.4 + + actual = da.interp(x=xdest, method=method) + + # scipy interpolation for the reference + def func(obj, new_x): + return scipy.interpolate.interp1d( + da['x'], obj.data, axis=obj.get_axis_num('x'), bounds_error=False, + fill_value=np.nan)(new_x) + + coords = {'x': xdest, 'y': da['y'], 'x2': func(da['x2'], xdest)} + expected = xr.DataArray(func(da, xdest), dims=['y'], coords=coords) + assert_allclose(actual, expected) + + +@pytest.mark.parametrize('method', ['linear']) +@pytest.mark.parametrize('case', [3, 4]) +def test_interpolate_nd_scalar(method, case): + if not has_scipy: + pytest.skip('scipy is not installed.') + + if not has_dask and case in [4]: + pytest.skip('dask is not installed in the environment.') + + da = get_example_data(case) + xdest = 0.4 + ydest = 0.05 + + actual = da.interp(x=xdest, y=ydest, method=method) + # scipy interpolation for the reference + expected_data = scipy.interpolate.RegularGridInterpolator( + (da['x'], da['y']), da.transpose('x', 'y', 'z').values, + method='linear', bounds_error=False, + fill_value=np.nan)(np.stack([xdest, ydest], axis=-1)) + + coords = {'x': xdest, 'y': ydest, 'x2': da['x2'].interp(x=xdest), + 'z': da['z']} + expected = xr.DataArray(expected_data[0], dims=['z'], coords=coords) + assert_allclose(actual, expected) + + +@pytest.mark.parametrize('use_dask', [True, False]) +def test_nans(use_dask): + if not has_scipy: + pytest.skip('scipy is not installed.') + + da = xr.DataArray([0, 1, np.nan, 2], dims='x', coords={'x': range(4)}) + + if not has_dask and use_dask: + pytest.skip('dask is not installed in the environment.') + da = da.chunk() + + actual = da.interp(x=[0.5, 1.5]) + # not all values are nan + assert actual.count() > 0 + + +@pytest.mark.parametrize('use_dask', [True, False]) +def test_errors(use_dask): + if not has_scipy: + pytest.skip('scipy is not installed.') + + # akima and spline are unavailable + da = xr.DataArray([0, 1, np.nan, 2], dims='x', coords={'x': range(4)}) + if not has_dask and use_dask: + pytest.skip('dask is not installed in the environment.') + da = da.chunk() + + for method in ['akima', 'spline']: + with pytest.raises(ValueError): + da.interp(x=[0.5, 1.5], method=method) + + # not sorted + if use_dask: + da = get_example_data(3) + else: + da = get_example_data(1) + + result = da.interp(x=[-1, 1, 3], kwargs={'fill_value': 0.0}) + assert not np.isnan(result.values).any() + result = da.interp(x=[-1, 1, 3]) + assert np.isnan(result.values).any() + + # invalid method + with pytest.raises(ValueError): + da.interp(x=[2, 0], method='boo') + with pytest.raises(ValueError): + da.interp(x=[2, 0], y=2, method='cubic') + with pytest.raises(ValueError): + da.interp(y=[2, 0], method='boo') + + # object-type DataArray cannot be interpolated + da = xr.DataArray(['a', 'b', 'c'], dims='x', coords={'x': [0, 1, 2]}) + with pytest.raises(TypeError): + da.interp(x=0) + + +@requires_scipy +def test_dtype(): + ds = xr.Dataset({'var1': ('x', [0, 1, 2]), 'var2': ('x', ['a', 'b', 'c'])}, + coords={'x': [0.1, 0.2, 0.3], 'z': ('x', ['a', 'b', 'c'])}) + actual = ds.interp(x=[0.15, 0.25]) + assert 'var1' in actual + assert 'var2' not in actual + # object array should be dropped + assert 'z' not in actual.coords + + +@requires_scipy +def test_sorted(): + # unsorted non-uniform gridded data + x = np.random.randn(100) + y = np.random.randn(30) + z = np.linspace(0.1, 0.2, 10) * 3.0 + da = xr.DataArray( + np.cos(x[:, np.newaxis, np.newaxis]) * np.cos( + y[:, np.newaxis]) * z, + dims=['x', 'y', 'z'], + coords={'x': x, 'y': y, 'x2': ('x', x**2), 'z': z}) + + x_new = np.linspace(0, 1, 30) + y_new = np.linspace(0, 1, 20) + + da_sorted = da.sortby('x') + assert_allclose(da.interp(x=x_new), + da_sorted.interp(x=x_new, assume_sorted=True)) + da_sorted = da.sortby(['x', 'y']) + assert_allclose(da.interp(x=x_new, y=y_new), + da_sorted.interp(x=x_new, y=y_new, assume_sorted=True)) + + with pytest.raises(ValueError): + da.interp(x=[0, 1, 2], assume_sorted=True) + + +@requires_scipy +def test_dimension_wo_coords(): + da = xr.DataArray(np.arange(12).reshape(3, 4), dims=['x', 'y'], + coords={'y': [0, 1, 2, 3]}) + da_w_coord = da.copy() + da_w_coord['x'] = np.arange(3) + + assert_equal(da.interp(x=[0.1, 0.2, 0.3]), + da_w_coord.interp(x=[0.1, 0.2, 0.3])) + assert_equal(da.interp(x=[0.1, 0.2, 0.3], y=[0.5]), + da_w_coord.interp(x=[0.1, 0.2, 0.3], y=[0.5])) + + +@requires_scipy +def test_dataset(): + ds = create_test_data() + ds.attrs['foo'] = 'var' + ds['var1'].attrs['buz'] = 'var2' + new_dim2 = xr.DataArray([0.11, 0.21, 0.31], dims='z') + interpolated = ds.interp(dim2=new_dim2) + + assert_allclose(interpolated['var1'], ds['var1'].interp(dim2=new_dim2)) + assert interpolated['var3'].equals(ds['var3']) + + # make sure modifying interpolated does not affect the original dataset + interpolated['var1'][:, 1] = 1.0 + interpolated['var2'][:, 1] = 1.0 + interpolated['var3'][:, 1] = 1.0 + + assert not interpolated['var1'].equals(ds['var1']) + assert not interpolated['var2'].equals(ds['var2']) + assert not interpolated['var3'].equals(ds['var3']) + # attrs should be kept + assert interpolated.attrs['foo'] == 'var' + assert interpolated['var1'].attrs['buz'] == 'var2' + + +@pytest.mark.parametrize('case', [0, 3]) +def test_interpolate_dimorder(case): + """ Make sure the resultant dimension order is consistent with .sel() """ + if not has_scipy: + pytest.skip('scipy is not installed.') + + da = get_example_data(case) + + new_x = xr.DataArray([0, 1, 2], dims='x') + assert da.interp(x=new_x).dims == da.sel(x=new_x, method='nearest').dims + + new_y = xr.DataArray([0, 1, 2], dims='y') + actual = da.interp(x=new_x, y=new_y).dims + expected = da.sel(x=new_x, y=new_y, method='nearest').dims + assert actual == expected + # reversed order + actual = da.interp(y=new_y, x=new_x).dims + expected = da.sel(y=new_y, x=new_x, method='nearest').dims + assert actual == expected + + new_x = xr.DataArray([0, 1, 2], dims='a') + assert da.interp(x=new_x).dims == da.sel(x=new_x, method='nearest').dims + assert da.interp(y=new_x).dims == da.sel(y=new_x, method='nearest').dims + new_y = xr.DataArray([0, 1, 2], dims='a') + actual = da.interp(x=new_x, y=new_y).dims + expected = da.sel(x=new_x, y=new_y, method='nearest').dims + assert actual == expected + + new_x = xr.DataArray([[0], [1], [2]], dims=['a', 'b']) + assert da.interp(x=new_x).dims == da.sel(x=new_x, method='nearest').dims + assert da.interp(y=new_x).dims == da.sel(y=new_x, method='nearest').dims + + if case == 3: + new_x = xr.DataArray([[0], [1], [2]], dims=['a', 'b']) + new_z = xr.DataArray([[0], [1], [2]], dims=['a', 'b']) + actual = da.interp(x=new_x, z=new_z).dims + expected = da.sel(x=new_x, z=new_z, method='nearest').dims + assert actual == expected + + actual = da.interp(z=new_z, x=new_x).dims + expected = da.sel(z=new_z, x=new_x, method='nearest').dims + assert actual == expected + + actual = da.interp(x=0.5, z=new_z).dims + expected = da.sel(x=0.5, z=new_z, method='nearest').dims + assert actual == expected