diff --git a/doc/api.rst b/doc/api.rst index 433aa93c9de..50682ac3a0e 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -105,8 +105,6 @@ Indexing Dataset.loc Dataset.isel Dataset.sel - Dataset.isel_points - Dataset.sel_points Dataset.squeeze Dataset.reindex Dataset.reindex_like @@ -247,8 +245,6 @@ Indexing DataArray.loc DataArray.isel DataArray.sel - DataArray.isel_points - DataArray.sel_points DataArray.squeeze DataArray.reindex DataArray.reindex_like diff --git a/doc/indexing.rst b/doc/indexing.rst index 378a04b3942..4c74976cbb0 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -11,10 +11,19 @@ Indexing and selecting data import xarray as xr np.random.seed(123456) -Similarly to pandas objects, xarray objects support both integer and label -based lookups along each dimension. However, xarray objects also have named -dimensions, so you can optionally use dimension names instead of relying on the -positional ordering of dimensions. +xarray offers extremely flexible indexing routines that combine the best +features of NumPy and pandas for data selection. + +The most basic way to access elements of a :py:class:`~xarray.DataArray` +object is to use Python's ``[]`` syntax, such as ``array[i, j]``, where +``i`` and ``j`` are both integers. +As xarray objects can store coordinates corresponding to each dimension of an +array, label-based indexing similar to ``pandas.DataFrame.loc`` is also possible. +In label-based indexing, the element position ``i`` is automatically +looked-up from the coordinate values. + +Dimensions of xarray objects have names, so you can also lookup the dimensions +by name, instead of remembering their positional order. Thus in total, xarray supports four different kinds of indexing, as described below and summarized in this table: @@ -37,6 +46,11 @@ below and summarized in this table: | | | ``arr.loc[dict(space='IA')]`` | ``ds.loc[dict(space='IA')]`` | +------------------+--------------+---------------------------------+--------------------------------+ +More advanced indexing is also possible for all the methods by +supplying :py:class:`~xarray.DataArray` objects as indexer. +See :ref:`vectorized_indexing` for the details. + + Positional indexing ------------------- @@ -58,9 +72,8 @@ Attributes are persisted in all indexing operations. .. warning:: Positional indexing deviates from the NumPy when indexing with multiple - arrays like ``arr[[0, 1], [0, 1]]``, as described in :ref:`orthogonal`. - See :ref:`pointwise indexing` for how to achieve this functionality in - xarray. + arrays like ``arr[[0, 1], [0, 1]]``, as described in + :ref:`vectorized_indexing`. xarray also supports label-based indexing, just like pandas. Because we use a :py:class:`pandas.Index` under the hood, label based indexing is very @@ -70,6 +83,10 @@ fast. To do label based indexing, use the :py:attr:`~xarray.DataArray.loc` attri arr.loc['2000-01-01':'2000-01-02', 'IA'] +In this example, the selected is a subpart of the array +in the range '2000-01-01':'2000-01-02' along the first coordinate `time` +and with 'IA' value from the second coordinate `space`. + You can perform any of the label indexing operations `supported by pandas`__, including indexing with individual, slices and arrays of labels, as well as indexing with boolean arrays. Like pandas, label based indexing in xarray is @@ -85,10 +102,10 @@ Setting values with label based indexing is also supported: arr -Indexing with labeled dimensions --------------------------------- +Indexing with dimension names +----------------------------- -With labeled dimensions, we do not have to rely on dimension order and can +With the dimension names, we do not have to rely on dimension order and can use them explicitly to slice data. There are two ways to do this: 1. Use a dictionary as the argument for array positional or label based array @@ -125,49 +142,56 @@ Python :py:func:`slice` objects or 1-dimensional arrays. __ http://legacy.python.org/dev/peps/pep-0472/ -.. warning:: - Do not try to assign values when using any of the indexing methods ``isel``, - ``isel_points``, ``sel`` or ``sel_points``:: +.. _nearest neighbor lookups: - # DO NOT do this - arr.isel(space=0) = 0 +Nearest neighbor lookups +------------------------ - Depending on whether the underlying numpy indexing returns a copy or a - view, the method will fail, and when it fails, **it will fail - silently**. Instead, you should use normal index assignment:: +The label based selection methods :py:meth:`~xarray.Dataset.sel`, +:py:meth:`~xarray.Dataset.reindex` and :py:meth:`~xarray.Dataset.reindex_like` all +support ``method`` and ``tolerance`` keyword argument. The method parameter allows for +enabling nearest neighbor (inexact) lookups by use of the methods ``'pad'``, +``'backfill'`` or ``'nearest'``: - # this is safe - arr[dict(space=0)] = 0 +.. ipython:: python -.. _pointwise indexing: + data = xr.DataArray([1, 2, 3], [('x', [0, 1, 2])]) + data.sel(x=[1.1, 1.9], method='nearest') + data.sel(x=0.1, method='backfill') + data.reindex(x=[0.5, 1, 1.5, 2, 2.5], method='pad') -Pointwise indexing ------------------- +Tolerance limits the maximum distance for valid matches with an inexact lookup: + +.. ipython:: python + + data.reindex(x=[1.1, 1.5], method='nearest', tolerance=0.2) -xarray pointwise indexing supports the indexing along multiple labeled dimensions -using list-like objects. While :py:meth:`~xarray.DataArray.isel` performs -orthogonal indexing, the :py:meth:`~xarray.DataArray.isel_points` method -provides similar numpy indexing behavior as if you were using multiple -lists to index an array (e.g. ``arr[[0, 1], [0, 1]]`` ): +The method parameter is not yet supported if any of the arguments +to ``.sel()`` is a ``slice`` object: + +.. ipython:: + :verbatim: + + In [1]: data.sel(x=slice(1, 3), method='nearest') + NotImplementedError + +However, you don't need to use ``method`` to do inexact slicing. Slicing +already returns all values inside the range (inclusive), as long as the index +labels are monotonic increasing: .. ipython:: python - # index by integer array indices - da = xr.DataArray(np.arange(56).reshape((7, 8)), dims=['x', 'y']) - da - da.isel_points(x=[0, 1, 6], y=[0, 1, 0]) + data.sel(x=slice(0.9, 3.1)) -There is also :py:meth:`~xarray.DataArray.sel_points`, which analogously -allows you to do point-wise indexing by label: +Indexing axes with monotonic decreasing labels also works, as long as the +``slice`` or ``.loc`` arguments are also decreasing: .. ipython:: python - times = pd.to_datetime(['2000-01-03', '2000-01-02', '2000-01-01']) - arr.sel_points(space=['IA', 'IL', 'IN'], time=times) + reversed_data = data[::-1] + reversed_data.loc[3.1:0.9] -The equivalent pandas method to ``sel_points`` is -:py:meth:`~pandas.DataFrame.lookup`. Dataset indexing ---------------- @@ -180,12 +204,10 @@ simultaneously, returning a new dataset: ds = arr.to_dataset(name='foo') ds.isel(space=[0], time=[0]) ds.sel(time='2000-01-01') - ds2 = da.to_dataset(name='bar') - ds2.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim='points') Positional indexing on a dataset is not supported because the ordering of dimensions in a dataset is somewhat ambiguous (it can vary between different -arrays). However, you can do normal indexing with labeled dimensions: +arrays). However, you can do normal indexing with dimension names: .. ipython:: python @@ -208,57 +230,6 @@ index labels along a dimension dropped: ``drop`` is both a ``Dataset`` and ``DataArray`` method. -.. _nearest neighbor lookups: - -Nearest neighbor lookups ------------------------- - -The label based selection methods :py:meth:`~xarray.Dataset.sel`, -:py:meth:`~xarray.Dataset.reindex` and :py:meth:`~xarray.Dataset.reindex_like` all -support ``method`` and ``tolerance`` keyword argument. The method parameter allows for -enabling nearest neighbor (inexact) lookups by use of the methods ``'pad'``, -``'backfill'`` or ``'nearest'``: - -.. ipython:: python - - data = xr.DataArray([1, 2, 3], [('x', [0, 1, 2])]) - data.sel(x=[1.1, 1.9], method='nearest') - data.sel(x=0.1, method='backfill') - data.reindex(x=[0.5, 1, 1.5, 2, 2.5], method='pad') - -Tolerance limits the maximum distance for valid matches with an inexact lookup: - -.. ipython:: python - - data.reindex(x=[1.1, 1.5], method='nearest', tolerance=0.2) - -Using ``method='nearest'`` or a scalar argument with ``.sel()`` requires pandas -version 0.16 or newer. Using ``tolerance`` requries pandas version 0.17 or newer. - -The method parameter is not yet supported if any of the arguments -to ``.sel()`` is a ``slice`` object: - -.. ipython:: - :verbatim: - - In [1]: data.sel(x=slice(1, 3), method='nearest') - NotImplementedError - -However, you don't need to use ``method`` to do inexact slicing. Slicing -already returns all values inside the range (inclusive), as long as the index -labels are monotonic increasing: - -.. ipython:: python - - data.sel(x=slice(0.9, 3.1)) - -Indexing axes with monotonic decreasing labels also works, as long as the -``slice`` or ``.loc`` arguments are also decreasing: - -.. ipython:: python - - reversed_data = data[::-1] - reversed_data.loc[3.1:0.9] .. _masking with where: @@ -294,126 +265,197 @@ elements that are fully masked: arr2.where(arr2.y < 2, drop=True) -.. _multi-level indexing: -Multi-level indexing --------------------- +.. _vectorized_indexing: -Just like pandas, advanced indexing on multi-level indexes is possible with -``loc`` and ``sel``. You can slice a multi-index by providing multiple indexers, -i.e., a tuple of slices, labels, list of labels, or any selector allowed by -pandas: +Vectorized Indexing +------------------- + +Like numpy and pandas, xarray supports indexing many array elements at once in a +`vectorized` manner. + +If you only provide integers, slices, or unlabeled arrays (array without +dimension names, such as ``np.ndarray``, ``list``, but not +:py:meth:`~xarray.DataArray` or :py:meth:`~xarray.Variable`) indexing can be +understand as orthogonally. Each indexer component selects independently along +the corresponding dimension, similar to how vector indexing works in Fortran or +MATLAB, or after using the :py:func:`numpy.xi_` helper: .. ipython:: python - midx = pd.MultiIndex.from_product([list('abc'), [0, 1]], - names=('one', 'two')) - mda = xr.DataArray(np.random.rand(6, 3), - [('x', midx), ('y', range(3))]) - mda - mda.sel(x=(list('ab'), [0])) + da = xr.DataArray(np.arange(12).reshape((3, 4)), dims=['x', 'y'], + coords={'x': [0, 1, 2], 'y': ['a', 'b', 'c', 'd']}) + da + da[[0, 1], [1, 1]] -You can also select multiple elements by providing a list of labels or tuples or -a slice of tuples: +For more flexibility, you can supply :py:meth:`~xarray.DataArray` objects +as indexers. +Dimensions on resultant arrays are given by the ordered union of the indexers' +dimensions: .. ipython:: python - mda.sel(x=[('a', 0), ('b', 1)]) + ind_x = xr.DataArray([0, 1], dims=['x']) + ind_y = xr.DataArray([0, 1], dims=['y']) + da[ind_x, ind_y] # orthogonal indexing + da[ind_x, ind_x] # vectorized indexing -Additionally, xarray supports dictionaries: +Slices or sequences/arrays without named-dimensions are treated as if they have +the same dimension which is indexed along: .. ipython:: python - mda.sel(x={'one': 'a', 'two': 0}) + # Because [0, 1] is used to index along dimension 'x', + # it is assumed to have dimension 'x' + da[[0, 1], ind_x] -For convenience, ``sel`` also accepts multi-index levels directly -as keyword arguments: +Furthermore, you can use multi-dimensional :py:meth:`~xarray.DataArray` +as indexers, where the resultant array dimension is also determined by +indexers' dimension: .. ipython:: python - mda.sel(one='a', two=0) + ind = xr.DataArray([[0, 1], [0, 1]], dims=['a', 'b']) + da[ind] -Note that using ``sel`` it is not possible to mix a dimension -indexer with level indexers for that dimension -(e.g., ``mda.sel(x={'one': 'a'}, two=0)`` will raise a ``ValueError``). +Similar to how NumPy's `advanced indexing`_ works, vectorized +indexing for xarray is based on our +:ref:`broadcasting rules `. +See :ref:`indexing.rules` for the complete specification. -Like pandas, xarray handles partial selection on multi-index (level drop). -As shown below, it also renames the dimension / coordinate when the -multi-index is reduced to a single index. +.. _advanced indexing: https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.indexing.html + +Vectorized indexing also works with ``isel``, ``loc``, and ``sel``: .. ipython:: python - mda.loc[{'one': 'a'}, ...] + ind = xr.DataArray([[0, 1], [0, 1]], dims=['a', 'b']) + da.isel(y=ind) # same as da[:, ind] -Unlike pandas, xarray does not guess whether you provide index levels or -dimensions when using ``loc`` in some ambiguous cases. For example, for -``mda.loc[{'one': 'a', 'two': 0}]`` and ``mda.loc['a', 0]`` xarray -always interprets ('one', 'two') and ('a', 0) as the names and -labels of the 1st and 2nd dimension, respectively. You must specify all -dimensions or use the ellipsis in the ``loc`` specifier, e.g. in the example -above, ``mda.loc[{'one': 'a', 'two': 0}, :]`` or ``mda.loc[('a', 0), ...]``. + ind = xr.DataArray([['a', 'b'], ['b', 'a']], dims=['a', 'b']) + da.loc[:, ind] # same as da.sel(y=ind) -Multi-dimensional indexing --------------------------- +These methods may and also be applied to ``Dataset`` objects -xarray does not yet support efficient routines for generalized multi-dimensional -indexing or regridding. However, we are definitely interested in adding support -for this in the future (see :issue:`475` for the ongoing discussion). +.. ipython:: python -.. _copies vs views: + ds2 = da.to_dataset(name='bar') + ds2.isel(x=xr.DataArray([0, 1, 2], dims=['points'])) -Copies vs. views ----------------- +.. tip:: -Whether array indexing returns a view or a copy of the underlying -data depends on the nature of the labels. For positional (integer) -indexing, xarray follows the same rules as NumPy: + If you are lazily loading your data from disk, not every form of vectorized + indexing is supported (or if supported, may not be supported efficiently). + You may find increased performance by loading your data into memory first, + e.g., with :py:meth:`~xarray.Dataset.load`. -* Positional indexing with only integers and slices returns a view. -* Positional indexing with arrays or lists returns a copy. +.. note:: -The rules for label based indexing are more complex: + Vectorized indexing is a new feature in v0.10. + In older versions of xarray, dimensions of indexers are ignored. + Dedicated methods for some advanced indexing use cases, + ``isel_points`` and ``sel_points`` are now deprecated. + See :ref:`more_advanced_indexing` for their alternative. -* Label-based indexing with only slices returns a view. -* Label-based indexing with arrays returns a copy. -* Label-based indexing with scalars returns a view or a copy, depending - upon if the corresponding positional indexer can be represented as an - integer or a slice object. The exact rules are determined by pandas. +.. _assigning_values: -Whether data is a copy or a view is more predictable in xarray than in pandas, so -unlike pandas, xarray does not produce `SettingWithCopy warnings`_. However, you -should still avoid assignment with chained indexing. +Assigning values with indexing +------------------------------ -.. _SettingWithCopy warnings: http://pandas.pydata.org/pandas-docs/stable/indexing.html#returning-a-view-versus-a-copy +Vectorized indexing can be used to assign values to xarray object. + +.. ipython:: python + + da = xr.DataArray(np.arange(12).reshape((3, 4)), dims=['x', 'y'], + coords={'x': [0, 1, 2], 'y': ['a', 'b', 'c', 'd']}) + da + da[0] = -1 # assignment with broadcasting + da -.. _orthogonal: + ind_x = xr.DataArray([0, 1], dims=['x']) + ind_y = xr.DataArray([0, 1], dims=['y']) + da[ind_x, ind_y] = -2 # assign -2 to (ix, iy) = (0, 0) and (1, 1) + da -Orthogonal (outer) vs. vectorized indexing ------------------------------------------- + da[ind_x, ind_y] += 100 # increment is also possible + da -Indexing with xarray objects has one important difference from indexing numpy -arrays: you can only use one-dimensional arrays to index xarray objects, and -each indexer is applied "orthogonally" along independent axes, instead of -using numpy's broadcasting rules to vectorize indexers. This means you can do -indexing like this, which would require slightly more awkward syntax with -numpy arrays: +Like ``numpy.ndarray``, value assignment sometimes works differently from what one may expect. .. ipython:: python - arr[arr['time.day'] > 1, arr['space'] != 'IL'] + da = xr.DataArray([0, 1, 2, 3], dims=['x']) + ind = xr.DataArray([0, 0, 0], dims=['x']) + da[ind] -= 1 + da + +Where the 0th element will be subtracted 1 only once. +This is because ``v[0] = v[0] - 1`` is called three times, rather than +``v[0] = v[0] - 1 - 1 - 1``. +See `Assigning values to indexed arrays`__ for the details. + +__ https://docs.scipy.org/doc/numpy/user/basics.indexing.html#assigning-values-to-indexed-arrays + + +.. note:: + Dask array does not support value assignment + (see :ref:`dask` for the details). + + +.. warning:: + + Do not try to assign values when using any of the indexing methods ``isel`` + or ``sel``:: -This is a much simpler model than numpy's `advanced indexing`__. If you would -like to do advanced-style array indexing in xarray, you have several options: + # DO NOT do this + arr.isel(space=0) = 0 -* :ref:`pointwise indexing` -* :ref:`masking with where` -* Index the underlying NumPy array directly using ``.values``, e.g., + Assigning values with the chained indexing using ``.sel`` or ``.isel`` fails silently. -__ http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html + .. ipython:: python + + da = xr.DataArray([0, 1, 2, 3], dims=['x']) + # DO NOT do this + da.isel(x=[0, 1, 2])[1] = -1 + da + + +.. _more_advanced_indexing: + +More advanced indexing +----------------------- + +The use of :py:meth:`~xarray.DataArray` objects as indexers enables very +flexible indexing. The following is an example of the pointwise indexing: .. ipython:: python - arr.values[arr.values > 0.5] + da = xr.DataArray(np.arange(56).reshape((7, 8)), dims=['x', 'y']) + da + da.isel(x=xr.DataArray([0, 1, 6], dims='z'), + y=xr.DataArray([0, 1, 0], dims='z')) + +where three elements at ``(ix, iy) = ((0, 0), (1, 1), (6, 0))`` are selected +and mapped along a new dimension ``z``. + +If you want to add a coordinate to the new dimension ``z``, +you can supply a :py:meth:`~xarray.DataArray` with a coordinate, + +.. ipython:: python + + da.isel(x=xr.DataArray([0, 1, 6], dims='z', + coords={'z': ['a', 'b', 'c']}), + y=xr.DataArray([0, 1, 0], dims='z')) + +Analogously, label-based pointwise-indexing is also possible by the ``.sel`` +method: + +.. ipython:: python + + times = xr.DataArray(pd.to_datetime(['2000-01-03', '2000-01-02', '2000-01-01']), + dims='new_time') + arr.sel(space=xr.DataArray(['IA', 'IL', 'IN'], dims=['new_time']), + time=times) .. _align and reindex: @@ -523,3 +565,131 @@ labels: array array.get_index('x') + + +.. _copies_vs_views: + +Copies vs. Views +---------------- + +Whether array indexing returns a view or a copy of the underlying +data depends on the nature of the labels. + +For positional (integer) +indexing, xarray follows the same rules as NumPy: + +* Positional indexing with only integers and slices returns a view. +* Positional indexing with arrays or lists returns a copy. + +The rules for label based indexing are more complex: + +* Label-based indexing with only slices returns a view. +* Label-based indexing with arrays returns a copy. +* Label-based indexing with scalars returns a view or a copy, depending + upon if the corresponding positional indexer can be represented as an + integer or a slice object. The exact rules are determined by pandas. + +Whether data is a copy or a view is more predictable in xarray than in pandas, so +unlike pandas, xarray does not produce `SettingWithCopy warnings`_. However, you +should still avoid assignment with chained indexing. + +.. _SettingWithCopy warnings: http://pandas.pydata.org/pandas-docs/stable/indexing.html#returning-a-view-versus-a-copy + + +.. _multi-level indexing: + +Multi-level indexing +-------------------- + +Just like pandas, advanced indexing on multi-level indexes is possible with +``loc`` and ``sel``. You can slice a multi-index by providing multiple indexers, +i.e., a tuple of slices, labels, list of labels, or any selector allowed by +pandas: + +.. ipython:: python + + midx = pd.MultiIndex.from_product([list('abc'), [0, 1]], + names=('one', 'two')) + mda = xr.DataArray(np.random.rand(6, 3), + [('x', midx), ('y', range(3))]) + mda + mda.sel(x=(list('ab'), [0])) + +You can also select multiple elements by providing a list of labels or tuples or +a slice of tuples: + +.. ipython:: python + + mda.sel(x=[('a', 0), ('b', 1)]) + +Additionally, xarray supports dictionaries: + +.. ipython:: python + + mda.sel(x={'one': 'a', 'two': 0}) + +For convenience, ``sel`` also accepts multi-index levels directly +as keyword arguments: + +.. ipython:: python + + mda.sel(one='a', two=0) + +Note that using ``sel`` it is not possible to mix a dimension +indexer with level indexers for that dimension +(e.g., ``mda.sel(x={'one': 'a'}, two=0)`` will raise a ``ValueError``). + +Like pandas, xarray handles partial selection on multi-index (level drop). +As shown below, it also renames the dimension / coordinate when the +multi-index is reduced to a single index. + +.. ipython:: python + + mda.loc[{'one': 'a'}, ...] + +Unlike pandas, xarray does not guess whether you provide index levels or +dimensions when using ``loc`` in some ambiguous cases. For example, for +``mda.loc[{'one': 'a', 'two': 0}]`` and ``mda.loc['a', 0]`` xarray +always interprets ('one', 'two') and ('a', 0) as the names and +labels of the 1st and 2nd dimension, respectively. You must specify all +dimensions or use the ellipsis in the ``loc`` specifier, e.g. in the example +above, ``mda.loc[{'one': 'a', 'two': 0}, :]`` or ``mda.loc[('a', 0), ...]``. + + +.. _indexing.rules: + +Indexing rules +-------------- + +Here we describe the full rules xarray uses for vectorized indexing. Note that +this is for the purposes of explanation: for the sake of efficiency and to +support various backends, the actual implementation is different. + +0. (Only for label based indexing.) Look up positional indexes along each + dimension from the corresponding :py:class:`pandas.Index`. + +1. A full slice object ``:`` is inserted for each dimension without an indexer. + +2. ``slice`` objects are converted into arrays, given by + ``np.arange(*slice.indices(...))``. + +3. Assume dimension names for array indexers without dimensions, such as + ``np.ndarray`` and ``list``, from the dimensions to be indexed along. + For example, ``v.isel(x=[0, 1])`` is understood as + ``v.isel(x=xr.DataArray([0, 1], dims=['x']))``. + +4. For each variable in a ``Dataset`` or ``DataArray`` (the array and its + coordinates): + + a. Broadcast all relevant indexers based on their dimension names + (see :ref:`compute.broadcasting` for full details). + + b. Index the underling array by the broadcast indexers, using NumPy's + advanced indexing rules. + +5. If any indexer DataArray has coordinates and no coordinate with the + same name exists, attach them to the indexed object. + +.. note:: + + Only 1-dimensional boolean arrays can be used as indexers. diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 7025e2dd7d9..eac7d92e46c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -16,7 +16,23 @@ What's New .. _whats-new.0.9.7: v0.10.0 (unreleased) -------------------- +-------------------- + +Backward Incompatible Changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- xarray now supports vectorized indexing, where we consider the dimension of + indexer, e.g. ``array.sel(x=ind)`` with ``ind.dims == ('y', )`` . + This enables us more advanced indexing, including outer indexing, diagonal + indexing, as well as vectorized indexing. + Due to this change, existing uses of xarray objects to index other xarray + objects will break in some cases. + ``isel_points`` / ``sel_points`` methods are deprecated, since the same thing + can be done by the new ``isel`` / ``sel`` methods. + See :ref:`vectorized_indexing` for the details + (:issue:`1444`, :issue:`1436`, ). + By `Keisuke Fujii `_ and + `Stephan Hoyer `_. Breaking changes ~~~~~~~~~~~~~~~~ @@ -54,9 +70,9 @@ Breaking changes [...] Note that both versions are currently supported, but using the old syntax will - produce a warning encouraging users to adopt the new syntax. + produce a warning encouraging users to adopt the new syntax. By `Daniel Rothenberg `_. - + - ``repr`` and the Jupyter Notebook won't automatically compute dask variables. Datasets loaded with ``open_dataset`` won't automatically read coords from disk when calling ``repr`` (:issue:`1522`). @@ -193,6 +209,9 @@ Bug fixes objects with data stored as ``dask`` arrays (:issue:`1529`). By `Joe Hamman `_. +- Fix positional indexing to allow the use of unsigned integers (:issue:`1405`). + By `Joe Hamman `_ and + `Gerrit Holl = 0 @@ -355,6 +358,14 @@ def var_indexers(var, indexers): reindexed = OrderedDict() for dim, indexer in indexers.items(): + if isinstance(indexer, DataArray) and indexer.dims != (dim, ): + warnings.warn( + "Indexer has dimensions {0:s} that are different " + "from that to be indexed along {1:s}. " + "This will behave differently in the future.".format( + str(indexer.dims), dim), + FutureWarning, stacklevel=3) + if dim in variables: var = variables[dim] args = (var.attrs, var.encoding) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 784c722989c..65077b223d0 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -138,6 +138,23 @@ def build_output_coords( signature, # type: _UFuncSignature exclude_dims=frozenset(), # type: set ): + """Build output coordinates for an operation. + + Parameters + ---------- + args : list + List of raw operation arguments. Any valid types for xarray operations + are OK, e.g., scalars, Variable, DataArray, Dataset. + signature : _UfuncSignature + Core dimensions signature for the operation. + exclude_dims : optional set + Dimensions excluded from the operation. Coordinates along these + dimensions are dropped. + + Returns + ------- + OrderedDict of Variable objects with merged coordinates. + """ # type: (...) -> List[OrderedDict[Any, Variable]] input_coords = _get_coord_variables(args) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 17fc9ebd299..85c5f0137a0 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -104,8 +104,11 @@ def _remap_key(self, key): return indexing.remap_label_indexers(self.data_array, key) def __getitem__(self, key): - pos_indexers, new_indexes = self._remap_key(key) - return self.data_array[pos_indexers]._replace_indexes(new_indexes) + if not utils.is_dict_like(key): + # expand the indexer so we can handle Ellipsis + labels = indexing.expanded_indexer(key, self.data_array.ndim) + key = dict(zip(self.data_array.dims, labels)) + return self.data_array.sel(**key) def __setitem__(self, key, value): pos_indexers, _ = self._remap_key(key) @@ -474,14 +477,14 @@ def __getitem__(self, key): if isinstance(key, basestring): return self._getitem_coord(key) else: - # orthogonal array indexing + # xarray-style array indexing return self.isel(**self._item_key_to_dict(key)) def __setitem__(self, key, value): if isinstance(key, basestring): self.coords[key] = value else: - # orthogonal array indexing + # xarray-style array indexing self.variable[key] = value def __delitem__(self, key): @@ -721,11 +724,9 @@ def sel(self, method=None, tolerance=None, drop=False, **indexers): Dataset.sel DataArray.isel """ - pos_indexers, new_indexes = indexing.remap_label_indexers( - self, indexers, method=method, tolerance=tolerance - ) - result = self.isel(drop=drop, **pos_indexers) - return result._replace_indexes(new_indexes) + ds = self._to_temp_dataset().sel(drop=drop, method=method, + tolerance=tolerance, **indexers) + return self._from_temp_dataset(ds) def isel_points(self, dim='points', **indexers): """Return a new DataArray whose dataset is given by pointwise integer diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 5d975ffd281..5dc9477c9c6 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5,6 +5,7 @@ from collections import Mapping, defaultdict from distutils.version import LooseVersion from numbers import Number +import warnings import sys @@ -26,7 +27,7 @@ from .common import ImplementsDatasetReduce, BaseDataObject from .dtypes import is_datetime_like from .merge import (dataset_update_method, dataset_merge_method, - merge_data_and_coords) + merge_data_and_coords, merge_variables) from .utils import (Frozen, SortedKeysDict, maybe_wrap_array, hashable, decode_numpy_dict_values, ensure_us_time_resolution) from .variable import (Variable, as_variable, IndexVariable, @@ -1125,6 +1126,78 @@ def maybe_chunk(name, var, chunks): for k, v in self.variables.items()]) return self._replace_vars_and_dims(variables) + def _validate_indexers(self, indexers): + """ Here we make sure + + indexer has a valid keys + + indexer is in a valid data type + """ + from .dataarray import DataArray + + invalid = [k for k in indexers if k not in self.dims] + if invalid: + raise ValueError("dimensions %r do not exist" % invalid) + + # all indexers should be int, slice, np.ndarrays, or Variable + indexers_list = [] + for k, v in iteritems(indexers): + if isinstance(v, integer_types + (slice, Variable)): + pass + elif isinstance(v, DataArray): + v = v.variable + elif isinstance(v, tuple): + v = as_variable(v) + elif isinstance(v, Dataset): + raise TypeError('cannot use a Dataset as an indexer') + else: + v = np.asarray(v) + indexers_list.append((k, v)) + return indexers_list + + def _get_indexers_coordinates(self, indexers): + """ Extract coordinates from indexers. + Returns an OrderedDict mapping from coordinate name to the + coordinate variable. + + Only coordinate with a name different from any of self.variables will + be attached. + """ + from .dataarray import DataArray + + coord_list = [] + for k, v in indexers.items(): + if isinstance(v, DataArray): + v_coords = v.coords + if v.dtype.kind == 'b': + if v.ndim != 1: # we only support 1-d boolean array + raise ValueError( + '{:d}d-boolean array is used for indexing along ' + 'dimension {!r}, but only 1d boolean arrays are ' + 'supported.'.format(v.ndim, k)) + # Make sure in case of boolean DataArray, its + # coordinate also should be indexed. + v_coords = v[v.values.nonzero()[0]].coords + + coord_list.append({d: v_coords[d].variable for d in v.coords}) + + # we don't need to call align() explicitly, because merge_variables + # already checks for exact alignment between dimension coordinates + coords = merge_variables(coord_list) + + for k in self.dims: + # make sure there are not conflict in dimension coordinates + if (k in coords and k in self._variables and + not coords[k].equals(self._variables[k])): + raise IndexError( + 'dimension coordinate {!r} conflicts between ' + 'indexed and indexing objects:\n{}\nvs.\n{}' + .format(k, self._variables[k], coords[k])) + + attached_coords = OrderedDict() + for k, v in coords.items(): # silently drop the conflicted variables. + if k not in self._variables: + attached_coords[k] = v + return attached_coords + def isel(self, drop=False, **indexers): """Returns a new dataset with each array indexed along the specified dimension(s). @@ -1141,40 +1214,45 @@ def isel(self, drop=False, **indexers): **indexers : {dim: indexer, ...} Keyword arguments with names matching dimensions and values given by integers, slice objects or arrays. + indexer can be a integer, slice, array-like or DataArray. + If DataArrays are passed as indexers, xarray-style indexing will be + carried out. See :ref:`indexing` for the details. Returns ------- obj : Dataset A new Dataset with the same contents as this dataset, except each - array and dimension is indexed by the appropriate indexers. In - general, each array's data will be a view of the array's data - in this dataset, unless numpy fancy indexing was triggered by using + array and dimension is indexed by the appropriate indexers. + If indexer DataArrays have coordinates that do not conflict with + this object, then these coordinates will be attached. + In general, each array's data will be a view of the array's data + in this dataset, unless vectorized indexing was triggered by using an array indexer, in which case the data will be a copy. See Also -------- Dataset.sel - Dataset.sel_points - Dataset.isel_points DataArray.isel """ - invalid = [k for k in indexers if k not in self.dims] - if invalid: - raise ValueError("dimensions %r do not exist" % invalid) - - # all indexers should be int, slice or np.ndarrays - indexers = [(k, (np.asarray(v) - if not isinstance(v, integer_types + (slice,)) - else v)) - for k, v in iteritems(indexers)] + indexers_list = self._validate_indexers(indexers) variables = OrderedDict() for name, var in iteritems(self._variables): - var_indexers = dict((k, v) for k, v in indexers if k in var.dims) + var_indexers = {k: v for k, v in indexers_list if k in var.dims} new_var = var.isel(**var_indexers) if not (drop and name in var_indexers): variables[name] = new_var - coord_names = set(self._coord_names) & set(variables) + + coord_names = set(variables).intersection(self._coord_names) + selected = self._replace_vars_and_dims(variables, + coord_names=coord_names) + + # Extract coordinates from indexers + coord_vars = selected._get_indexers_coordinates(indexers) + variables.update(coord_vars) + coord_names = (set(variables) + .intersection(self._coord_names) + .union(coord_vars)) return self._replace_vars_and_dims(variables, coord_names=coord_names) def sel(self, method=None, tolerance=None, drop=False, **indexers): @@ -1216,26 +1294,45 @@ def sel(self, method=None, tolerance=None, drop=False, **indexers): by scalars, slices or arrays of tick labels. For dimensions with multi-index, the indexer may also be a dict-like object with keys matching index level names. + If DataArrays are passed as indexers, xarray-style indexing will be + carried out. See :ref:`indexing` for the details. Returns ------- obj : Dataset A new Dataset with the same contents as this dataset, except each - variable and dimension is indexed by the appropriate indexers. In - general, each variable's data will be a view of the variable's data - in this dataset, unless numpy fancy indexing was triggered by using + variable and dimension is indexed by the appropriate indexers. + If indexer DataArrays have coordinates that do not conflict with + this object, then these coordinates will be attached. + In general, each array's data will be a view of the array's data + in this dataset, unless vectorized indexing was triggered by using an array indexer, in which case the data will be a copy. + See Also -------- Dataset.isel - Dataset.sel_points - Dataset.isel_points DataArray.sel """ + from .dataarray import DataArray + + v_indexers = {k: v.variable.data if isinstance(v, DataArray) else v + for k, v in indexers.items()} + pos_indexers, new_indexes = indexing.remap_label_indexers( - self, indexers, method=method, tolerance=tolerance + self, v_indexers, method=method, tolerance=tolerance ) + # attach indexer's coordinate to pos_indexers + for k, v in indexers.items(): + if isinstance(v, Variable): + pos_indexers[k] = Variable(v.dims, pos_indexers[k]) + elif isinstance(v, DataArray): + # drop coordinates found in indexers since .sel() already + # ensures alignments + coords = OrderedDict((k, v) for k, v in v._coords.items() + if k not in indexers) + pos_indexers[k] = DataArray(pos_indexers[k], + coords=coords, dims=v.dims) result = self.isel(drop=drop, **pos_indexers) return result._replace_indexes(new_indexes) @@ -1277,6 +1374,8 @@ def isel_points(self, dim='points', **indexers): Dataset.sel_points DataArray.isel_points """ + warnings.warn('Dataset.isel_points is deprecated: use Dataset.isel()' + 'instead.', DeprecationWarning, stacklevel=2) indexer_dims = set(indexers) @@ -1423,6 +1522,9 @@ def sel_points(self, dim='points', method=None, tolerance=None, Dataset.isel_points DataArray.sel_points """ + warnings.warn('Dataset.sel_points is deprecated: use Dataset.sel()' + 'instead.', DeprecationWarning, stacklevel=2) + pos_indexers, _ = indexing.remap_label_indexers( self, indexers, method=method, tolerance=tolerance ) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index ff6f9fb21aa..faa125e73a5 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -21,6 +21,7 @@ from .options import OPTIONS from .pycompat import PY2, unicode_type, bytes_type, dask_array_type +from .indexing import BasicIndexer def pretty_print(x, numchars): @@ -68,8 +69,8 @@ def _get_indexer_at_least_n_items(shape, n_desired): cum_items = np.cumprod(shape[::-1]) n_steps = np.argmax(cum_items >= n_desired) stop = int(np.ceil(float(n_desired) / np.r_[1, cum_items][n_steps])) - indexer = ((0, ) * (len(shape) - 1 - n_steps) + (slice(stop), ) + - (slice(None), ) * n_steps) + indexer = BasicIndexer((0, ) * (len(shape) - 1 - n_steps) + (slice(stop), ) + + (slice(None), ) * n_steps) return indexer diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index ef8200eb451..956e5e2d0ec 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -2,14 +2,15 @@ from __future__ import division from __future__ import print_function from datetime import timedelta -from collections import defaultdict +from collections import defaultdict, Hashable import numpy as np import pandas as pd +from . import nputils from . import utils from .pycompat import (iteritems, range, integer_types, dask_array_type, suppress) -from .utils import is_full_slice, is_dict_like +from .utils import is_dict_like def expanded_indexer(key, ndim): @@ -42,80 +43,10 @@ def expanded_indexer(key, ndim): return tuple(new_key) -def canonicalize_indexer(key, ndim): - """Given an indexer for orthogonal array indexing, return an indexer that - is a tuple composed entirely of slices, integer ndarrays and native python - ints. - """ - def canonicalize(indexer): - if not isinstance(indexer, slice): - indexer = np.asarray(indexer) - if indexer.ndim == 0: - indexer = int(np.asscalar(indexer)) - else: - if indexer.ndim != 1: - raise ValueError('orthogonal array indexing only supports ' - '1d arrays') - if indexer.dtype.kind == 'b': - indexer, = np.nonzero(indexer) - elif indexer.dtype.kind != 'i': - raise ValueError('invalid subkey %r for integer based ' - 'array indexing; all subkeys must be ' - 'slices, integers or sequences of ' - 'integers or Booleans' % indexer) - return indexer - - return tuple(canonicalize(k) for k in expanded_indexer(key, ndim)) - - def _expand_slice(slice_, size): return np.arange(*slice_.indices(size)) -def orthogonal_indexer(key, shape): - """Given a key for orthogonal array indexing, returns an equivalent key - suitable for indexing a numpy.ndarray with fancy indexing. - """ - # replace Ellipsis objects with slices - key = list(canonicalize_indexer(key, len(shape))) - # replace 1d arrays and slices with broadcast compatible arrays - # note: we treat integers separately (instead of turning them into 1d - # arrays) because integers (and only integers) collapse axes when used with - # __getitem__ - non_int_keys = [n for n, k in enumerate(key) - if not isinstance(k, integer_types)] - - def full_slices_unselected(n_list): - def all_full_slices(key_index): - return all(is_full_slice(key[n]) for n in key_index) - if not n_list: - return n_list - elif all_full_slices(range(n_list[0] + 1)): - return full_slices_unselected(n_list[1:]) - elif all_full_slices(range(n_list[-1], len(key))): - return full_slices_unselected(n_list[:-1]) - else: - return n_list - - # However, testing suggests it is OK to keep contiguous sequences of full - # slices at the start or the end of the key. Keeping slices around (when - # possible) instead of converting slices to arrays significantly speeds up - # indexing. - # (Honestly, I don't understand when it's not OK to keep slices even in - # between integer indices if as array is somewhere in the key, but such are - # the admittedly mind-boggling ways of numpy's advanced indexing.) - array_keys = full_slices_unselected(non_int_keys) - - def maybe_expand_slice(k, length): - return _expand_slice(k, length) if isinstance(k, slice) else k - - array_indexers = np.ix_(*(maybe_expand_slice(key[n], shape[n]) - for n in array_keys)) - for i, n in enumerate(array_keys): - key[n] = array_indexers[i] - return tuple(key) - - def _try_get_item(x): try: return x.item() @@ -163,9 +94,14 @@ def get_loc(index, label, method=None, tolerance=None): return index.get_loc(label, **kwargs) -def get_indexer(index, labels, method=None, tolerance=None): +def get_indexer_nd(index, labels, method=None, tolerance=None): + """ Call pd.Index.get_indexer(labels). """ kwargs = _index_method_kwargs(method, tolerance) - return index.get_indexer(labels, **kwargs) + + flat_labels = np.ravel(labels) + flat_indexer = index.get_indexer(flat_labels, **kwargs) + indexer = flat_indexer.reshape(labels.shape) + return indexer def convert_label_indexer(index, label, index_name='', method=None, @@ -200,8 +136,13 @@ def convert_label_indexer(index, label, index_name='', method=None, elif len(label) == index.nlevels and not is_nested_vals: indexer = index.get_loc(tuple((label[k] for k in index.names))) else: - indexer, new_index = index.get_loc_level(tuple(label.values()), - level=tuple(label.keys())) + for k, v in label.items(): + # index should be an item (i.e. Hashable) not an array-like + if not isinstance(v, Hashable): + raise ValueError('Vectorized selection is not ' + 'available along level variable: ' + k) + indexer, new_index = index.get_loc_level( + tuple(label.values()), level=tuple(label.keys())) elif isinstance(label, tuple) and isinstance(index, pd.MultiIndex): if _is_nested_tuple(label): @@ -214,16 +155,20 @@ def convert_label_indexer(index, label, index_name='', method=None, ) else: - label = _asarray_tuplesafe(label) + label = (label if getattr(label, 'ndim', 1) > 1 # vectorized-indexing + else _asarray_tuplesafe(label)) if label.ndim == 0: if isinstance(index, pd.MultiIndex): indexer, new_index = index.get_loc_level(label.item(), level=0) else: indexer = get_loc(index, label.item(), method, tolerance) elif label.dtype.kind == 'b': - indexer, = np.nonzero(label) + indexer = label else: - indexer = get_indexer(index, label, method, tolerance) + if isinstance(index, pd.MultiIndex) and label.ndim > 1: + raise ValueError('Vectorized selection is not available along ' + 'MultiIndex variable: ' + index_name) + indexer = get_indexer_nd(index, label, method, tolerance) if np.any(indexer < 0): raise KeyError('not all values found in index %r' % index_name) @@ -332,6 +277,32 @@ def _index_indexer_1d(old_indexer, applied_indexer, size): return indexer +class IndexerTuple(tuple): + """ Base class for xarray indexing tuples """ + + def __repr__(self): + return type(self).__name__ + super(IndexerTuple, self).__repr__() + + +def to_tuple(key): + """ Converts our indexer tuple to a native python tuple """ + return tuple(key) if isinstance(key, IndexerTuple) else key + + +class BasicIndexer(IndexerTuple): + """ Tuple for basic indexing. """ + + +class OuterIndexer(IndexerTuple): + """ Tuple for outer/orthogonal indexing. + All the items are one of integer, slice, and 1d-np.ndarray. + """ + + +class VectorizedIndexer(IndexerTuple): + """ Tuple for vectorized indexing """ + + class LazilyIndexedArray(utils.NDArrayMixin): """Wrap an array that handles orthogonal indexing to make indexing lazy """ @@ -345,20 +316,34 @@ def __init__(self, array, key=None): Array indexer. If provided, it is assumed to already be in canonical expanded form. """ - if key is None: - key = (slice(None),) * array.ndim - self.array = array - self.key = key + # We need to avoid doubly wrapping. + if isinstance(array, type(self)): + self.array = array.array + self.key = array.key + if key is not None: + self.key = self._updated_key(key) + + else: + if key is None: + key = (slice(None),) * array.ndim + key = OuterIndexer(key) + self.array = array + self.key = key def _updated_key(self, new_key): - new_key = iter(canonicalize_indexer(new_key, self.ndim)) + # TODO should suport VectorizedIndexer + if isinstance(new_key, VectorizedIndexer): + raise NotImplementedError( + 'Vectorized indexing for {} is not implemented. Load your ' + 'data first with .load() or .compute().'.format(type(self))) + new_key = iter(expanded_indexer(new_key, self.ndim)) key = [] for size, k in zip(self.array.shape, self.key): if isinstance(k, integer_types): key.append(k) else: key.append(_index_indexer_1d(k, next(new_key), size)) - return tuple(key) + return OuterIndexer(key) @property def shape(self): @@ -371,7 +356,7 @@ def shape(self): return tuple(shape) def __array__(self, dtype=None): - array = orthogonally_indexable(self.array) + array = xarray_indexable(self.array) return np.asarray(array[self.key], dtype=None) def __getitem__(self, key): @@ -434,7 +419,7 @@ def __setitem__(self, key, value): self.array[key] = value -def orthogonally_indexable(array): +def xarray_indexable(array): if isinstance(array, np.ndarray): return NumpyIndexingAdapter(array) if isinstance(array, pd.Index): @@ -444,25 +429,49 @@ def orthogonally_indexable(array): return array -class NumpyIndexingAdapter(utils.NDArrayMixin): - """Wrap a NumPy array to use orthogonal indexing (array indexing - accesses different dimensions independently, like netCDF4-python variables) +def _outer_to_numpy_indexer(key, shape): + """Convert an OuterIndexer into an indexer for NumPy. + + Parameters + ---------- + key : OuterIndexer + Outer indexing tuple to convert. + shape : tuple + Shape of the array subject to the indexing. + + Returns + ------- + tuple + Base tuple suitable for use to index a NumPy array. """ - # note: this object is somewhat similar to biggus.NumpyArrayAdapter in that - # it implements orthogonal indexing, except it casts to a numpy array, - # isn't lazy and supports writing values. - def __init__(self, array): - self.array = np.asarray(array) + if len([k for k in key if not isinstance(k, slice)]) <= 1: + # If there is only one vector and all others are slice, + # it can be safely used in mixed basic/advanced indexing. + # Boolean index should already be converted to integer array. + return tuple(key) + + n_dim = len([k for k in key if not isinstance(k, integer_types)]) + i_dim = 0 + new_key = [] + for k, size in zip(key, shape): + if isinstance(k, integer_types): + new_key.append(k) + else: # np.ndarray or slice + if isinstance(k, slice): + k = np.arange(*k.indices(size)) + assert k.dtype.kind in {'i', 'u'} + shape = [(1,) * i_dim + (k.size, ) + + (1,) * (n_dim - i_dim - 1)] + new_key.append(k.reshape(*shape)) + i_dim += 1 + return tuple(new_key) - def __array__(self, dtype=None): - return np.asarray(self.array, dtype=dtype) - def _convert_key(self, key): - key = expanded_indexer(key, self.ndim) - if any(not isinstance(k, integer_types + (slice,)) for k in key): - # key would trigger fancy indexing - key = orthogonal_indexer(key, self.shape) - return key +class NumpyIndexingAdapter(utils.NDArrayMixin): + """Wrap a NumPy array to use broadcasted indexing + """ + def __init__(self, array): + self.array = array def _ensure_ndarray(self, value): # We always want the result of indexing to be a NumPy array. If it's @@ -473,30 +482,65 @@ def _ensure_ndarray(self, value): value = utils.to_0d_array(value) return value + def _indexing_array_and_key(self, key): + if isinstance(key, OuterIndexer): + key = _outer_to_numpy_indexer(key, self.array.shape) + + if isinstance(key, VectorizedIndexer): + array = nputils.NumpyVIndexAdapter(self.array) + else: + array = self.array + + return array, to_tuple(key) + def __getitem__(self, key): - key = self._convert_key(key) - return self._ensure_ndarray(self.array[key]) + array, key = self._indexing_array_and_key(key) + return self._ensure_ndarray(array[key]) def __setitem__(self, key, value): - key = self._convert_key(key) - self.array[key] = value + array, key = self._indexing_array_and_key(key) + array[key] = value class DaskIndexingAdapter(utils.NDArrayMixin): - """Wrap a dask array to support orthogonal indexing + """Wrap a dask array to support xarray-style indexing. """ def __init__(self, array): + """ This adapter is usually called in Variable.__getitem__ with + array=Variable._broadcast_indexes + """ self.array = array def __getitem__(self, key): - key = expanded_indexer(key, self.ndim) - if any(not isinstance(k, integer_types + (slice,)) for k in key): - value = self.array - for axis, subkey in reversed(list(enumerate(key))): - value = value[(slice(None),) * axis + (subkey,)] + def to_int_tuple(key): + # workaround for uint64 indexer (GH:1406) + # TODO remove here after next dask release (0.15.3) + return tuple([k.astype(int) if isinstance(k, np.ndarray) + else k for k in key]) + + if isinstance(key, BasicIndexer): + return self.array[to_int_tuple(key)] + elif isinstance(key, VectorizedIndexer): + return self.array.vindex[to_int_tuple(tuple(key))] else: - value = self.array[key] - return value + assert isinstance(key, OuterIndexer) + key = to_int_tuple(tuple(key)) + try: + return self.array[key] + except NotImplementedError: + # manual orthogonal indexing. + # TODO: port this upstream into dask in a saner way. + value = self.array + for axis, subkey in reversed(list(enumerate(key))): + value = value[(slice(None),) * axis + (subkey,)] + return value + + def __setitem__(self, key, value): + raise TypeError("this variable's data is stored in a dask array, " + 'which does not support item assignment. To ' + 'assign to this variable, you must first load it ' + 'into memory explicitly using the .load() ' + 'method or accessing its .values attribute.') class PandasIndexAdapter(utils.NDArrayMixin): @@ -536,12 +580,16 @@ def shape(self): # .shape is broken on pandas prior to v0.15.2 return (len(self.array),) - def __getitem__(self, key): + def __getitem__(self, tuple_key): + key = to_tuple(tuple_key) if isinstance(key, tuple) and len(key) == 1: # unpack key so it can index a pandas.Index object (pandas.Index # objects don't like tuples) key, = key + if getattr(key, 'ndim', 0) > 1: # Return np-array if multidimensional + return NumpyIndexingAdapter(self.array.values)[tuple_key] + result = self.array[key] if isinstance(result, pd.Index): diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index 5ebab4ec407..a721425b839 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -79,3 +79,56 @@ def array_ne(self, other): with warnings.catch_warnings(): warnings.filterwarnings('ignore', r'elementwise comparison failed') return _ensure_bool_is_ndarray(self != other, self, other) + + +def _is_contiguous(positions): + """Given a non-empty list, does it consist of contiguous integers?""" + previous = positions[0] + for current in positions[1:]: + if current != previous + 1: + return False + previous = current + return True + + +def _advanced_indexer_subspaces(key): + """Indices of the advanced indexes subspaces for mixed indexing and vindex. + """ + if not isinstance(key, tuple): + key = (key,) + advanced_index_positions = [i for i, k in enumerate(key) + if not isinstance(k, slice)] + + if (not advanced_index_positions or + not _is_contiguous(advanced_index_positions)): + # Nothing to reorder: dimensions on the indexing result are already + # ordered like vindex. See NumPy's rule for "Combining advanced and + # basic indexing": + # https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#combining-advanced-and-basic-indexing + return (), () + + non_slices = [k for k in key if not isinstance(k, slice)] + ndim = len(np.broadcast(*non_slices).shape) + mixed_positions = advanced_index_positions[0] + np.arange(ndim) + vindex_positions = np.arange(ndim) + return mixed_positions, vindex_positions + + +class NumpyVIndexAdapter(object): + """Object that implements indexing like vindex on a np.ndarray. + + This is a pure Python implementation of (some of) the logic in this NumPy + proposal: https://github.com/numpy/numpy/pull/6256 + """ + def __init__(self, array): + self._array = array + + def __getitem__(self, key): + mixed_positions, vindex_positions = _advanced_indexer_subspaces(key) + return np.moveaxis(self._array[key], mixed_positions, vindex_positions) + + def __setitem__(self, key, value): + """Value must have dimensionality matching the key.""" + mixed_positions, vindex_positions = _advanced_indexer_subspaces(key) + self._array[key] = np.moveaxis(value, vindex_positions, + mixed_positions) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 89d1462328c..d31d6692c33 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -8,7 +8,7 @@ import itertools import re import warnings -from collections import Mapping, MutableMapping, Iterable +from collections import Mapping, MutableMapping, MutableSet, Iterable import numpy as np import pandas as pd @@ -378,6 +378,43 @@ def __len__(self): raise len(iter(self)) +class OrderedSet(MutableSet): + """A simple ordered set. + + The API matches the builtin set, but it preserves insertion order of + elements, like an OrderedDict. + """ + def __init__(self, values=None): + self._ordered_dict = OrderedDict() + if values is not None: + self |= values + + # Required methods for MutableSet + + def __contains__(self, value): + return value in self._ordered_dict + + def __iter__(self): + return iter(self._ordered_dict) + + def __len__(self): + return len(self._ordered_dict) + + def add(self, value): + self._ordered_dict[value] = None + + def discard(self, value): + del self._ordered_dict[value] + + # Additional methods + + def update(self, values): + self |= values + + def __repr__(self): + return '%s(%r)' % (type(self).__name__, list(self)) + + class NdimSizeLenMixin(object): """Mixin class that extends a class that defines a ``shape`` property to one that also defines ``ndim``, ``size`` and ``__len__``. diff --git a/xarray/core/variable.py b/xarray/core/variable.py index ae4cb08d6a4..a3b5a4d0a88 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -19,7 +19,9 @@ from . import utils from .pycompat import (basestring, OrderedDict, zip, integer_types, dask_array_type) -from .indexing import (PandasIndexAdapter, orthogonally_indexable) +from .indexing import (PandasIndexAdapter, xarray_indexable, BasicIndexer, + OuterIndexer, VectorizedIndexer) +from .utils import OrderedSet import xarray as xr # only for Dataset and DataArray @@ -29,6 +31,16 @@ pass +BASIC_INDEXING_TYPES = integer_types + (slice,) + + +class MissingDimensionsError(ValueError): + """Error class used when we can't safely guess a dimension name. + """ + # inherits from ValueError for backward compatibility + # TODO: move this to an xarray.exceptions module? + + def as_variable(obj, name=None): """Convert an object into a Variable. @@ -87,7 +99,7 @@ def as_variable(obj, name=None): elif name is not None: data = as_compatible_data(obj) if data.ndim != 1: - raise ValueError( + raise MissingDimensionsError( 'cannot set variable %r with %r-dimensional data ' 'without explicit dimension names. Pass a tuple of ' '(dims, data) instead.' % (name, data.ndim)) @@ -99,11 +111,11 @@ def as_variable(obj, name=None): if name is not None and name in obj.dims: # convert the Variable into an Index if obj.ndim != 1: - raise ValueError( + raise MissingDimensionsError( '%r has more than 1-dimension and the same name as one of its ' 'dimensions %r. xarray disallows such variables because they ' - 'conflict with the coordinates used to label dimensions.' - % (name, obj.dims)) + 'conflict with the coordinates used to label ' + 'dimensions.' % (name, obj.dims)) obj = obj.to_index_variable() return obj @@ -305,7 +317,7 @@ def data(self, data): @property def _indexable_data(self): - return orthogonally_indexable(self._data) + return xarray_indexable(self._data) def load(self, **kwargs): """Manually trigger loading of this variable's data from disk or a @@ -404,34 +416,188 @@ def _item_key_to_tuple(self, key): else: return key + def _broadcast_indexes(self, key): + """Prepare an indexing key for an indexing operation. + + Parameters + ----------- + key: int, slice, array, dict or tuple of integer, slices and arrays + Any valid input for indexing. + + Returns + ------- + dims: tuple + Dimension of the resultant variable. + indexers: IndexingTuple subclass + Tuple of integer, array-like, or slices to use when indexing + self._data. The type of this argument indicates the type of + indexing to perform, either basic, outer or vectorized. + new_order : Optional[Sequence[int]] + Optional reordering to do on the result of indexing. If not None, + the first len(new_order) indexing should be moved to these + positions. + """ + key = self._item_key_to_tuple(key) # key is a tuple + # key is a tuple of full size + key = indexing.expanded_indexer(key, self.ndim) + # Convert a scalar Variable as an integer + key = tuple([(k.data.item() if isinstance(k, Variable) and k.ndim == 0 + else k) for k in key]) + if all(isinstance(k, BASIC_INDEXING_TYPES) for k in key): + return self._broadcast_indexes_basic(key) + + self._validate_indexers(key) + # Detect it can be mapped as an outer indexer + # If all key is unlabeled, or + # key can be mapped as an OuterIndexer. + if all(not isinstance(k, Variable) for k in key): + return self._broadcast_indexes_outer(key) + + # If all key is 1-dimensional and there are no duplicate labels, + # key can be mapped as an OuterIndexer. + dims = [] + for k, d in zip(key, self.dims): + if isinstance(k, Variable): + if len(k.dims) > 1: + return self._broadcast_indexes_vectorized(key) + dims.append(k.dims[0]) + if not isinstance(k, integer_types): + dims.append(d) + + if len(set(dims)) == len(dims): + return self._broadcast_indexes_outer(key) + + return self._broadcast_indexes_vectorized(key) + + def _broadcast_indexes_basic(self, key): + dims = tuple(dim for k, dim in zip(key, self.dims) + if not isinstance(k, integer_types)) + return dims, BasicIndexer(key), None + + def _validate_indexers(self, key): + """ Make sanity checks """ + for dim, k in zip(self.dims, key): + if isinstance(k, BASIC_INDEXING_TYPES): + pass + else: + if not isinstance(k, Variable): + k = np.asarray(k) + if k.ndim > 1: + raise IndexError( + "Unlabeled multi-dimensional array cannot be " + "used for indexing: {}".format(k)) + if k.dtype.kind == 'b': + if self.shape[self.get_axis_num(dim)] != len(k): + raise IndexError( + "Boolean array size {0:d} is used to index array " + "with shape {1:s}.".format(len(k), + str(self.shape))) + if k.ndim > 1: + raise IndexError("{}-dimensional boolean indexing is " + "not supported. ".format(k.ndim)) + if getattr(k, 'dims', (dim, )) != (dim, ): + raise IndexError( + "Boolean indexer should be unlabeled or on the " + "same dimension to the indexed array. Indexer is " + "on {0:s} but the target dimension is " + "{1:s}.".format(str(k.dims), dim)) + + def _broadcast_indexes_outer(self, key): + dims = tuple(k.dims[0] if isinstance(k, Variable) else dim + for k, dim in zip(key, self.dims) + if not isinstance(k, integer_types)) + indexer = [] + for k in key: + if isinstance(k, Variable): + k = k.data + + if isinstance(k, BASIC_INDEXING_TYPES): + indexer.append(k) + else: + k = np.asarray(k) + indexer.append(k if k.dtype.kind != 'b' else np.flatnonzero(k)) + return dims, OuterIndexer(indexer), None + + def _nonzero(self): + """ Equivalent numpy's nonzero but returns a tuple of Varibles. """ + # TODO we should replace dask's native nonzero + # after https://github.com/dask/dask/issues/1076 is implemented. + nonzeros = np.nonzero(self.data) + return tuple(Variable((dim), nz) for nz, dim + in zip(nonzeros, self.dims)) + + def _broadcast_indexes_vectorized(self, key): + variables = [] + out_dims_set = OrderedSet() + for dim, value in zip(self.dims, key): + if isinstance(value, slice): + out_dims_set.add(dim) + else: + variable = (value if isinstance(value, Variable) else + as_variable(value, name=dim)) + if variable.dtype.kind == 'b': # boolean indexing case + (variable,) = variable._nonzero() + + variables.append(variable) + out_dims_set.update(variable.dims) + + variable_dims = set() + for variable in variables: + variable_dims.update(variable.dims) + + slices = [] + for i, (dim, value) in enumerate(zip(self.dims, key)): + if isinstance(value, slice): + if dim in variable_dims: + # We only convert slice objects to variables if they share + # a dimension with at least one other variable. Otherwise, + # we can equivalently leave them as slices aknd transpose + # the result. This is significantly faster/more efficient + # for most array backends. + values = np.arange(*value.indices(self.sizes[dim])) + variables.insert(i - len(slices), Variable((dim,), values)) + else: + slices.append((i, value)) + + try: + variables = _broadcast_compat_variables(*variables) + except ValueError: + raise IndexError("Dimensions of indexers mismatch: {}".format(key)) + + out_key = [variable.data for variable in variables] + out_dims = tuple(out_dims_set) + slice_positions = set() + for i, value in slices: + out_key.insert(i, value) + new_position = out_dims.index(self.dims[i]) + slice_positions.add(new_position) + + if slice_positions: + new_order = [i for i in range(len(out_dims)) + if i not in slice_positions] + else: + new_order = None + + return out_dims, VectorizedIndexer(out_key), new_order + def __getitem__(self, key): """Return a new Array object whose contents are consistent with getting the provided key from the underlying data. - NB. __getitem__ and __setitem__ implement "orthogonal indexing" like - netCDF4-python, where the key can only include integers, slices - (including `Ellipsis`) and 1d arrays, each of which are applied - orthogonally along their respective dimensions. - - The difference does not matter in most cases unless you are using - numpy's "fancy indexing," which can otherwise result in data arrays - whose shapes is inconsistent (or just uninterpretable with) with the - variable's dimensions. + NB. __getitem__ and __setitem__ implement xarray-style indexing, + where if keys are unlabeled arrays, we index the array orthogonally + with them. If keys are labeled array (such as Variables), they are + broadcasted with our usual scheme and then the array is indexed with + the broadcasted key, like numpy's fancy indexing. If you really want to do indexing like `x[x > 0]`, manipulate the numpy array `x.values` directly. """ - key = self._item_key_to_tuple(key) - key = indexing.expanded_indexer(key, self.ndim) - dims = tuple(dim for k, dim in zip(key, self.dims) - if not isinstance(k, integer_types)) - values = self._indexable_data[key] - # orthogonal indexing should ensure the dimensionality is consistent - if hasattr(values, 'ndim'): - assert values.ndim == len(dims), (values.ndim, len(dims)) - else: - assert len(dims) == 0, len(dims) - return type(self)(dims, values, self._attrs, self._encoding, + dims, index_tuple, new_order = self._broadcast_indexes(key) + data = self._indexable_data[index_tuple] + if new_order: + data = np.moveaxis(data, range(len(new_order)), new_order) + return type(self)(dims, data, self._attrs, self._encoding, fastpath=True) def __setitem__(self, key, value): @@ -440,15 +606,24 @@ def __setitem__(self, key, value): See __getitem__ for more details. """ - key = self._item_key_to_tuple(key) - if isinstance(self._data, dask_array_type): - raise TypeError("this variable's data is stored in a dask array, " - 'which does not support item assignment. To ' - 'assign to this variable, you must first load it ' - 'into memory explicitly using the .load() ' - 'method or accessing its .values attribute.') - data = orthogonally_indexable(self._data) - data[key] = value + dims, index_tuple, new_order = self._broadcast_indexes(key) + + if isinstance(value, Variable): + value = value.set_dims(dims).data + + if new_order: + value = duck_array_ops.asarray(value) + if value.ndim > len(dims): + raise ValueError( + 'shape mismatch: value array of shape %s could not be' + 'broadcast to indexing result with %s dimensions' + % (value.shape, len(dims))) + + value = value[(len(dims) - value.ndim) * (np.newaxis,) + + (Ellipsis,)] + value = np.moveaxis(value, new_order, range(len(new_order))) + + self._indexable_data[index_tuple] = value @property def attrs(self): @@ -789,8 +964,8 @@ def set_dims(self, dims, shape=None): missing_dims = set(self.dims) - set(dims) if missing_dims: - raise ValueError('new dimensions must be a superset of existing ' - 'dimensions') + raise ValueError('new dimensions %r must be a superset of ' + 'existing dimensions %r' % (dims, self.dims)) self_dims = set(self.dims) expanded_dims = tuple( @@ -1251,12 +1426,13 @@ def chunk(self, chunks=None, name=None, lock=False): return self.copy(deep=False) def __getitem__(self, key): - key = self._item_key_to_tuple(key) - values = self._indexable_data[key] - if not hasattr(values, 'ndim') or values.ndim == 0: - return Variable((), values, self._attrs, self._encoding) + dims, index_tuple, new_order = self._broadcast_indexes(key) + values = self._indexable_data[index_tuple] + if getattr(values, 'ndim', 0) != 1: + # returns Variable rather than IndexVariable if multi-dimensional + return Variable(dims, values, self._attrs, self._encoding) else: - return type(self)(self.dims, values, self._attrs, + return type(self)(dims, values, self._attrs, self._encoding, fastpath=True) def __setitem__(self, key, value): diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index a977868c7e6..0eba63cd929 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -348,7 +348,7 @@ def test_roundtrip_boolean_dtype(self): def test_orthogonal_indexing(self): in_memory = create_test_data() with self.roundtrip(in_memory) as on_disk: - indexers = {'dim1': np.arange(3), 'dim2': np.arange(4), + indexers = {'dim1': [1, 2, 0], 'dim2': [3, 2, 0, 3], 'dim3': np.arange(5)} expected = in_memory.isel(**indexers) actual = on_disk.isel(**indexers) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index e7920570fc8..c9e84de5104 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -469,6 +469,45 @@ def test_getitem_coords(self): dims='x') self.assertDataArrayIdentical(expected, actual) + def test_getitem_dataarray(self): + # It should not conflict + da = DataArray(np.arange(12).reshape((3, 4)), dims=['x', 'y']) + ind = DataArray([[0, 1], [0, 1]], dims=['x', 'z']) + actual = da[ind] + self.assertArrayEqual(actual, da.values[[[0, 1], [0, 1]], :]) + + da = DataArray(np.arange(12).reshape((3, 4)), dims=['x', 'y'], + coords={'x': [0, 1, 2], 'y': ['a', 'b', 'c', 'd']}) + ind = xr.DataArray([[0, 1], [0, 1]], dims=['X', 'Y']) + actual = da[ind] + expected = da.values[[[0, 1], [0, 1]], :] + self.assertArrayEqual(actual, expected) + assert actual.dims == ('X', 'Y', 'y') + + # boolean indexing + ind = xr.DataArray([True, True, False], dims=['x']) + self.assertDataArrayEqual(da[ind], da[[0, 1], :]) + self.assertDataArrayEqual(da[ind], da[[0, 1]]) + self.assertDataArrayEqual(da[ind], da[ind.values]) + + def test_setitem(self): + # basic indexing should work as numpy's indexing + tuples = [(0, 0), (0, slice(None, None)), + (slice(None, None), slice(None, None)), + (slice(None, None), 0), + ([1, 0], slice(None, None)), + (slice(None, None), [1, 0])] + for t in tuples: + expected = np.arange(6).reshape(3, 2) + orig = DataArray(np.arange(6).reshape(3, 2), + {'x': [1, 2, 3], 'y': ['a', 'b'], 'z': 4, + 'x2': ('x', ['a', 'b', 'c']), + 'y2': ('y', ['d', 'e'])}, + dims=['x', 'y']) + orig[t] = 1 + expected[t] = 1 + self.assertArrayEqual(orig.values, expected) + def test_attr_sources_multiindex(self): # make sure attr-style access for multi-index levels # returns DataArray objects @@ -508,6 +547,94 @@ def test_isel(self): self.assertDataArrayIdentical(self.dv[:3, :5], self.dv.isel(x=slice(3), y=slice(5))) + def test_isel_types(self): + # regression test for #1405 + da = DataArray([1, 2, 3], dims='x') + # uint64 + self.assertDataArrayIdentical(da.isel(x=np.array([0], dtype="uint64")), + da.isel(x=np.array([0]))) + # uint32 + self.assertDataArrayIdentical(da.isel(x=np.array([0], dtype="uint32")), + da.isel(x=np.array([0]))) + # int64 + self.assertDataArrayIdentical(da.isel(x=np.array([0], dtype="int64")), + da.isel(x=np.array([0]))) + + def test_isel_fancy(self): + shape = (10, 7, 6) + np_array = np.random.random(shape) + da = DataArray(np_array, dims=['time', 'y', 'x'], + coords={'time': np.arange(0, 100, 10)}) + y = [1, 3] + x = [3, 0] + + expected = da.values[:, y, x] + + actual = da.isel(y=(('test_coord', ), y), x=(('test_coord', ), x)) + assert actual.coords['test_coord'].shape == (len(y), ) + assert list(actual.coords) == ['time'] + assert actual.dims == ('time', 'test_coord') + + np.testing.assert_equal(actual, expected) + + # a few corner cases + da.isel(time=(('points',), [1, 2]), x=(('points',), [2, 2]), + y=(('points',), [3, 4])) + np.testing.assert_allclose( + da.isel_points(time=[1], x=[2], y=[4]).values.squeeze(), + np_array[1, 4, 2].squeeze()) + da.isel(time=(('points', ), [1, 2])) + y = [-1, 0] + x = [-2, 2] + expected = da.values[:, y, x] + actual = da.isel(x=(('points', ), x), y=(('points', ), y)).values + np.testing.assert_equal(actual, expected) + + # test that the order of the indexers doesn't matter + self.assertDataArrayIdentical( + da.isel(y=(('points', ), y), x=(('points', ), x)), + da.isel(x=(('points', ), x), y=(('points', ), y))) + + # make sure we're raising errors in the right places + with self.assertRaisesRegexp(IndexError, + 'Dimensions of indexers mismatch'): + da.isel(y=(('points', ), [1, 2]), x=(('points', ), [1, 2, 3])) + + # tests using index or DataArray as indexers + stations = Dataset() + stations['station'] = (('station', ), ['A', 'B', 'C']) + stations['dim1s'] = (('station', ), [1, 2, 3]) + stations['dim2s'] = (('station', ), [4, 5, 1]) + + actual = da.isel(x=stations['dim1s'], y=stations['dim2s']) + assert 'station' in actual.coords + assert 'station' in actual.dims + self.assertDataArrayIdentical(actual['station'], stations['station']) + + with self.assertRaisesRegexp(ValueError, 'conflicting values for '): + da.isel(x=DataArray([0, 1, 2], dims='station', + coords={'station': [0, 1, 2]}), + y=DataArray([0, 1, 2], dims='station', + coords={'station': [0, 1, 3]})) + + # multi-dimensional selection + stations = Dataset() + stations['a'] = (('a', ), ['A', 'B', 'C']) + stations['b'] = (('b', ), [0, 1]) + stations['dim1s'] = (('a', 'b'), [[1, 2], [2, 3], [3, 4]]) + stations['dim2s'] = (('a', ), [4, 5, 1]) + + actual = da.isel(x=stations['dim1s'], y=stations['dim2s']) + assert 'a' in actual.coords + assert 'a' in actual.dims + assert 'b' in actual.coords + assert 'b' in actual.dims + self.assertDataArrayIdentical(actual['a'], stations['a']) + self.assertDataArrayIdentical(actual['b'], stations['b']) + expected = da.variable[:, stations['dim2s'].variable, + stations['dim1s'].variable] + self.assertArrayEqual(actual, expected) + def test_sel(self): self.ds['x'] = ('x', np.array(list('abcdefghij'))) da = self.ds['foo'] @@ -521,6 +648,31 @@ def test_sel(self): self.assertDataArrayIdentical(da[1], da.sel(x=b)) self.assertDataArrayIdentical(da[[1]], da.sel(x=slice(b, b))) + def test_sel_dataarray(self): + # indexing with DataArray + self.ds['x'] = ('x', np.array(list('abcdefghij'))) + da = self.ds['foo'] + + ind = DataArray(['a', 'b', 'c'], dims=['x']) + actual = da.sel(x=ind) + self.assertDataArrayIdentical(actual, da.isel(x=[0, 1, 2])) + + # along new dimension + ind = DataArray(['a', 'b', 'c'], dims=['new_dim']) + actual = da.sel(x=ind) + self.assertArrayEqual(actual, da.isel(x=[0, 1, 2])) + assert 'new_dim' in actual.dims + + # with coordinate + ind = DataArray(['a', 'b', 'c'], dims=['new_dim'], + coords={'new_dim': [0, 1, 2]}) + actual = da.sel(x=ind) + self.assertArrayEqual(actual, da.isel(x=[0, 1, 2])) + assert 'new_dim' in actual.dims + assert 'new_dim' in actual.coords + self.assertDataArrayEqual(actual['new_dim'].drop('x'), + ind['new_dim']) + def test_sel_no_index(self): array = DataArray(np.arange(10), dims='x') self.assertDataArrayIdentical(array[0], array.sel(x=0)) @@ -638,11 +790,30 @@ def test_loc(self): self.assertDataArrayIdentical(da[:3, :4], da.loc[['a', 'b', 'c'], np.arange(4)]) self.assertDataArrayIdentical(da[:, :4], da.loc[:, self.ds['y'] < 4]) + + def test_loc_assign(self): + self.ds['x'] = ('x', np.array(list('abcdefghij'))) + da = self.ds['foo'] + # assignment da.loc['a':'j'] = 0 self.assertTrue(np.all(da.values == 0)) da.loc[{'x': slice('a', 'j')}] = 2 self.assertTrue(np.all(da.values == 2)) + da.loc[{'x': slice('a', 'j')}] = 2 + self.assertTrue(np.all(da.values == 2)) + + # Multi dimensional case + da = DataArray(np.arange(12).reshape(3, 4), dims=['x', 'y']) + da.loc[0, 0] = 0 + assert da.values[0, 0] == 0 + assert da.values[0, 1] != 0 + + da = DataArray(np.arange(12).reshape(3, 4), dims=['x', 'y']) + da.loc[0] = 0 + self.assertTrue(np.all(da.values[0] == np.zeros(4))) + assert da.values[1, 0] != 0 + def test_loc_single_boolean(self): data = DataArray([0, 1], coords=[[True, False]]) self.assertEqual(data.loc[True], 0) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index eda3b03a2e5..91cee61cf53 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -852,6 +852,216 @@ def test_isel(self): self.assertItemsEqual(data.coords, ret.coords) self.assertItemsEqual(data.indexes, list(ret.indexes) + ['time']) + def test_isel_fancy(self): + # isel with fancy indexing. + data = create_test_data() + + pdim1 = [1, 2, 3] + pdim2 = [4, 5, 1] + pdim3 = [1, 2, 3] + actual = data.isel(dim1=(('test_coord', ), pdim1), + dim2=(('test_coord', ), pdim2), + dim3=(('test_coord', ), pdim3)) + assert 'test_coord' in actual.dims + assert actual.coords['test_coord'].shape == (len(pdim1), ) + + # Should work with DataArray + actual = data.isel(dim1=DataArray(pdim1, dims='test_coord'), + dim2=(('test_coord', ), pdim2), + dim3=(('test_coord', ), pdim3)) + assert 'test_coord' in actual.dims + assert actual.coords['test_coord'].shape == (len(pdim1), ) + expected = data.isel(dim1=(('test_coord', ), pdim1), + dim2=(('test_coord', ), pdim2), + dim3=(('test_coord', ), pdim3)) + self.assertDatasetIdentical(actual, expected) + + # DataArray with coordinate + idx1 = DataArray(pdim1, dims=['a'], coords={'a': np.random.randn(3)}) + idx2 = DataArray(pdim2, dims=['b'], coords={'b': np.random.randn(3)}) + idx3 = DataArray(pdim3, dims=['c'], coords={'c': np.random.randn(3)}) + # Should work with DataArray + actual = data.isel(dim1=idx1, dim2=idx2, dim3=idx3) + assert 'a' in actual.dims + assert 'b' in actual.dims + assert 'c' in actual.dims + assert 'time' in actual.coords + assert 'dim2' in actual.coords + assert 'dim3' in actual.coords + expected = data.isel(dim1=(('a', ), pdim1), + dim2=(('b', ), pdim2), + dim3=(('c', ), pdim3)) + expected = expected.assign_coords(a=idx1['a'], b=idx2['b'], + c=idx3['c']) + self.assertDatasetIdentical(actual, expected) + + idx1 = DataArray(pdim1, dims=['a'], coords={'a': np.random.randn(3)}) + idx2 = DataArray(pdim2, dims=['a']) + idx3 = DataArray(pdim3, dims=['a']) + # Should work with DataArray + actual = data.isel(dim1=idx1, dim2=idx2, dim3=idx3) + assert 'a' in actual.dims + assert 'time' in actual.coords + assert 'dim2' in actual.coords + assert 'dim3' in actual.coords + expected = data.isel(dim1=(('a', ), pdim1), + dim2=(('a', ), pdim2), + dim3=(('a', ), pdim3)) + expected = expected.assign_coords(a=idx1['a']) + self.assertDatasetIdentical(actual, expected) + + actual = data.isel(dim1=(('points', ), pdim1), + dim2=(('points', ), pdim2)) + assert 'points' in actual.dims + assert 'dim3' in actual.dims + assert 'dim3' not in actual.data_vars + np.testing.assert_array_equal(data['dim2'][pdim2], actual['dim2']) + + # test that the order of the indexers doesn't matter + self.assertDatasetIdentical(data.isel(dim1=(('points', ), pdim1), + dim2=(('points', ), pdim2)), + data.isel(dim2=(('points', ), pdim2), + dim1=(('points', ), pdim1))) + # make sure we're raising errors in the right places + with self.assertRaisesRegexp(IndexError, + 'Dimensions of indexers mismatch'): + data.isel(dim1=(('points', ), [1, 2]), + dim2=(('points', ), [1, 2, 3])) + with self.assertRaisesRegexp(TypeError, 'cannot use a Dataset'): + data.isel(dim1=Dataset({'points': [1, 2]})) + + # test to be sure we keep around variables that were not indexed + ds = Dataset({'x': [1, 2, 3, 4], 'y': 0}) + actual = ds.isel(x=(('points', ), [0, 1, 2])) + self.assertDataArrayIdentical(ds['y'], actual['y']) + + # tests using index or DataArray as indexers + stations = Dataset() + stations['station'] = (('station', ), ['A', 'B', 'C']) + stations['dim1s'] = (('station', ), [1, 2, 3]) + stations['dim2s'] = (('station', ), [4, 5, 1]) + + actual = data.isel(dim1=stations['dim1s'], + dim2=stations['dim2s']) + assert 'station' in actual.coords + assert 'station' in actual.dims + self.assertDataArrayIdentical(actual['station'].drop(['dim2']), + stations['station']) + + with self.assertRaisesRegexp(ValueError, 'conflicting values for '): + data.isel(dim1=DataArray([0, 1, 2], dims='station', + coords={'station': [0, 1, 2]}), + dim2=DataArray([0, 1, 2], dims='station', + coords={'station': [0, 1, 3]})) + + # multi-dimensional selection + stations = Dataset() + stations['a'] = (('a', ), ['A', 'B', 'C']) + stations['b'] = (('b', ), [0, 1]) + stations['dim1s'] = (('a', 'b'), [[1, 2], [2, 3], [3, 4]]) + stations['dim2s'] = (('a', ), [4, 5, 1]) + actual = data.isel(dim1=stations['dim1s'], dim2=stations['dim2s']) + assert 'a' in actual.coords + assert 'a' in actual.dims + assert 'b' in actual.coords + assert 'b' in actual.dims + assert 'dim2' in actual.coords + assert 'a' in actual['dim2'].dims + + self.assertDataArrayIdentical(actual['a'].drop(['dim2']), + stations['a']) + self.assertDataArrayIdentical(actual['b'], stations['b']) + expected_var1 = data['var1'].variable[stations['dim1s'].variable, + stations['dim2s'].variable] + expected_var2 = data['var2'].variable[stations['dim1s'].variable, + stations['dim2s'].variable] + expected_var3 = data['var3'].variable[slice(None), + stations['dim1s'].variable] + self.assertDataArrayEqual(actual['a'].drop('dim2'), stations['a']) + self.assertArrayEqual(actual['var1'], expected_var1) + self.assertArrayEqual(actual['var2'], expected_var2) + self.assertArrayEqual(actual['var3'], expected_var3) + + def test_isel_dataarray(self): + """ Test for indexing by DataArray """ + data = create_test_data() + # indexing with DataArray with same-name coordinates. + indexing_da = DataArray(np.arange(1, 4), dims=['dim1'], + coords={'dim1': np.random.randn(3)}) + actual = data.isel(dim1=indexing_da) + self.assertDataArrayIdentical(indexing_da['dim1'], actual['dim1']) + self.assertDataArrayIdentical(data['dim2'], actual['dim2']) + + # Conflict in the dimension coordinate + indexing_da = DataArray(np.arange(1, 4), dims=['dim2'], + coords={'dim2': np.random.randn(3)}) + with self.assertRaisesRegexp( + IndexError, "dimension coordinate 'dim2'"): + actual = data.isel(dim2=indexing_da) + # Also the case for DataArray + with self.assertRaisesRegexp( + IndexError, "dimension coordinate 'dim2'"): + actual = data['var2'].isel(dim2=indexing_da) + with self.assertRaisesRegexp( + IndexError, "dimension coordinate 'dim2'"): + data['dim2'].isel(dim2=indexing_da) + + # same name coordinate which does not conflict + indexing_da = DataArray(np.arange(1, 4), dims=['dim2'], + coords={'dim2': data['dim2'].values[1:4]}) + actual = data.isel(dim2=indexing_da) + self.assertDataArrayIdentical(actual['dim2'], indexing_da['dim2']) + + # Silently drop conflicted (non-dimensional) coordinate of indexer + indexing_da = DataArray(np.arange(1, 4), dims=['dim2'], + coords={'dim2': data['dim2'].values[1:4], + 'numbers': ('dim2', np.arange(2, 5))}) + actual = data.isel(dim2=indexing_da) + self.assertDataArrayIdentical(actual['numbers'], data['numbers']) + + # boolean data array with coordinate with the same name + indexing_da = DataArray(np.arange(1, 10), dims=['dim2'], + coords={'dim2': data['dim2'].values}) + indexing_da = (indexing_da < 3) + actual = data.isel(dim2=indexing_da) + self.assertDataArrayIdentical(actual['dim2'], data['dim2'][:2]) + + # boolean data array with non-dimensioncoordinate + indexing_da = DataArray(np.arange(1, 10), dims=['dim2'], + coords={'dim2': data['dim2'].values, + 'non_dim': (('dim2', ), + np.random.randn(9)), + 'non_dim2': 0}) + indexing_da = (indexing_da < 3) + actual = data.isel(dim2=indexing_da) + self.assertDataArrayIdentical( + actual['dim2'].drop('non_dim').drop('non_dim2'), data['dim2'][:2]) + self.assertDataArrayIdentical( + actual['non_dim'], indexing_da['non_dim'][:2]) + self.assertDataArrayIdentical( + actual['non_dim2'], indexing_da['non_dim2']) + + # non-dimension coordinate will be also attached + indexing_da = DataArray(np.arange(1, 4), dims=['dim2'], + coords={'non_dim': (('dim2', ), + np.random.randn(3))}) + actual = data.isel(dim2=indexing_da) + assert 'non_dim' in actual + assert 'non_dim' in actual.coords + + # Index by a scalar DataArray + indexing_da = DataArray(3, dims=[], coords={'station': 2}) + actual = data.isel(dim2=indexing_da) + assert 'station' in actual + actual = data.isel(dim2=indexing_da['station']) + assert 'station' in actual + + # indexer generated from coordinates + indexing_ds = Dataset({}, coords={'dim2': [0, 1, 2]}) + with self.assertRaisesRegexp( + IndexError, "dimension coordinate 'dim2'"): + actual = data.isel(dim2=indexing_ds['dim2']) + def test_sel(self): data = create_test_data() int_slicers = {'dim1': slice(None, None, 2), @@ -886,6 +1096,108 @@ def test_sel(self): self.assertDatasetEqual(data.isel(td=slice(1, 3)), data.sel(td=slice('1 days', '2 days'))) + def test_sel_dataarray(self): + data = create_test_data() + + ind = DataArray([0.0, 0.5, 1.0], dims=['dim2']) + actual = data.sel(dim2=ind) + self.assertDatasetEqual(actual, data.isel(dim2=[0, 1, 2])) + + # with different dimension + ind = DataArray([0.0, 0.5, 1.0], dims=['new_dim']) + actual = data.sel(dim2=ind) + expected = data.isel(dim2=Variable('new_dim', [0, 1, 2])) + assert 'new_dim' in actual.dims + self.assertDatasetEqual(actual, expected) + + # Multi-dimensional + ind = DataArray([[0.0], [0.5], [1.0]], dims=['new_dim', 'new_dim2']) + actual = data.sel(dim2=ind) + expected = data.isel(dim2=Variable(('new_dim', 'new_dim2'), + [[0], [1], [2]])) + assert 'new_dim' in actual.dims + assert 'new_dim2' in actual.dims + self.assertDatasetEqual(actual, expected) + + # with coordinate + ind = DataArray([0.0, 0.5, 1.0], dims=['new_dim'], + coords={'new_dim': ['a', 'b', 'c']}) + actual = data.sel(dim2=ind) + expected = data.isel(dim2=[0, 1, 2]).rename({'dim2': 'new_dim'}) + assert 'new_dim' in actual.dims + assert 'new_dim' in actual.coords + self.assertDatasetEqual(actual.drop('new_dim').drop('dim2'), + expected.drop('new_dim')) + self.assertDataArrayEqual(actual['new_dim'].drop('dim2'), + ind['new_dim']) + + # with conflicted coordinate (silently ignored) + ind = DataArray([0.0, 0.5, 1.0], dims=['dim2'], + coords={'dim2': ['a', 'b', 'c']}) + actual = data.sel(dim2=ind) + expected = data.isel(dim2=[0, 1, 2]) + self.assertDatasetEqual(actual, expected) + + # with conflicted coordinate (silently ignored) + ind = DataArray([0.0, 0.5, 1.0], dims=['new_dim'], + coords={'new_dim': ['a', 'b', 'c'], + 'dim2': 3}) + actual = data.sel(dim2=ind) + self.assertDataArrayEqual(actual['new_dim'].drop('dim2'), + ind['new_dim'].drop('dim2')) + expected = data.isel(dim2=[0, 1, 2]) + expected['dim2'] = (('new_dim'), expected['dim2'].values) + self.assertDataArrayEqual(actual['dim2'].drop('new_dim'), + expected['dim2']) + assert actual['var1'].dims == ('dim1', 'new_dim') + + # with non-dimensional coordinate + ind = DataArray([0.0, 0.5, 1.0], dims=['dim2'], + coords={'dim2': ['a', 'b', 'c'], + 'numbers': ('dim2', [0, 1, 2]), + 'new_dim': ('dim2', [1.1, 1.2, 1.3])}) + actual = data.sel(dim2=ind) + expected = data.isel(dim2=[0, 1, 2]) + self.assertDatasetEqual(actual.drop('new_dim'), expected) + assert np.allclose(actual['new_dim'].values, ind['new_dim'].values) + + def test_sel_dataarray_mindex(self): + midx = pd.MultiIndex.from_product([list('abc'), [0, 1]], + names=('one', 'two')) + mds = xr.Dataset({'var': (('x', 'y'), np.random.rand(6, 3))}, + coords={'x': midx, 'y': range(3)}) + + actual_isel = mds.isel(x=xr.DataArray(np.arange(3), dims='x')) + actual_sel = mds.sel(x=DataArray(mds.indexes['x'][:3], dims='x')) + assert actual_isel['x'].dims == ('x', ) + assert actual_sel['x'].dims == ('x', ) + self.assertDatasetIdentical(actual_isel, actual_sel) + + actual_isel = mds.isel(x=xr.DataArray(np.arange(3), dims='z')) + actual_sel = mds.sel(x=Variable('z', mds.indexes['x'][:3])) + assert actual_isel['x'].dims == ('z', ) + assert actual_sel['x'].dims == ('z', ) + self.assertDatasetIdentical(actual_isel, actual_sel) + + # with coordinate + actual_isel = mds.isel(x=xr.DataArray(np.arange(3), dims='z', + coords={'z': [0, 1, 2]})) + actual_sel = mds.sel(x=xr.DataArray(mds.indexes['x'][:3], dims='z', + coords={'z': [0, 1, 2]})) + assert actual_isel['x'].dims == ('z', ) + assert actual_sel['x'].dims == ('z', ) + self.assertDatasetIdentical(actual_isel, actual_sel) + + # Vectorized indexing with level-variables raises an error + with self.assertRaisesRegexp(ValueError, 'Vectorized selection is '): + mds.sel(one=['a', 'b']) + + with self.assertRaisesRegexp(ValueError, 'Vectorized selection is ' + 'not available along MultiIndex variable:' + ' x'): + mds.sel(x=xr.DataArray([np.array(midx[:2]), np.array(midx[-2:])], + dims=['a', 'b'])) + def test_sel_drop(self): data = Dataset({'foo': ('x', [1, 2, 3])}, {'x': [0, 1, 2]}) expected = Dataset({'foo': 1}) @@ -1003,15 +1315,13 @@ def test_sel_points(self): self.assertDatasetIdentical(expected, actual) data = Dataset({'foo': (('x', 'y'), np.arange(9).reshape(3, 3))}) - expected = Dataset({'foo': ('points', [0, 4, 8])} - ) + expected = Dataset({'foo': ('points', [0, 4, 8])}) actual = data.sel_points(x=[0, 1, 2], y=[0, 1, 2]) self.assertDatasetIdentical(expected, actual) data.coords.update({'x': [0, 1, 2], 'y': [0, 1, 2]}) expected.coords.update({'x': ('points', [0, 1, 2]), - 'y': ('points', [0, 1, 2]) - }) + 'y': ('points', [0, 1, 2])}) actual = data.sel_points(x=[0.1, 1.1, 2.5], y=[0, 1.2, 2.0], method='pad') self.assertDatasetIdentical(expected, actual) @@ -1019,6 +1329,80 @@ def test_sel_points(self): with self.assertRaises(KeyError): data.sel_points(x=[2.5], y=[2.0], method='pad', tolerance=1e-3) + def test_sel_fancy(self): + data = create_test_data() + + # add in a range() index + data['dim1'] = data.dim1 + + pdim1 = [1, 2, 3] + pdim2 = [4, 5, 1] + pdim3 = [1, 2, 3] + expected = data.isel(dim1=Variable(('test_coord', ), pdim1), + dim2=Variable(('test_coord', ), pdim2), + dim3=Variable(('test_coord'), pdim3)) + actual = data.sel(dim1=Variable(('test_coord', ), data.dim1[pdim1]), + dim2=Variable(('test_coord', ), data.dim2[pdim2]), + dim3=Variable(('test_coord', ), data.dim3[pdim3])) + self.assertDatasetIdentical(expected, actual) + + # DataArray Indexer + idx_t = DataArray(data['time'][[3, 2, 1]].values, dims=['a'], + coords={'a': ['a', 'b', 'c']}) + idx_2 = DataArray(data['dim2'][[3, 2, 1]].values, dims=['a'], + coords={'a': ['a', 'b', 'c']}) + idx_3 = DataArray(data['dim3'][[3, 2, 1]].values, dims=['a'], + coords={'a': ['a', 'b', 'c']}) + actual = data.sel(time=idx_t, dim2=idx_2, dim3=idx_3) + expected = data.isel(time=Variable(('a', ), [3, 2, 1]), + dim2=Variable(('a', ), [3, 2, 1]), + dim3=Variable(('a', ), [3, 2, 1])) + expected = expected.assign_coords(a=idx_t['a']) + self.assertDatasetIdentical(expected, actual) + + idx_t = DataArray(data['time'][[3, 2, 1]].values, dims=['a'], + coords={'a': ['a', 'b', 'c']}) + idx_2 = DataArray(data['dim2'][[2, 1, 3]].values, dims=['b'], + coords={'b': [0, 1, 2]}) + idx_3 = DataArray(data['dim3'][[1, 2, 1]].values, dims=['c'], + coords={'c': [0.0, 1.1, 2.2]}) + actual = data.sel(time=idx_t, dim2=idx_2, dim3=idx_3) + expected = data.isel(time=Variable(('a', ), [3, 2, 1]), + dim2=Variable(('b', ), [2, 1, 3]), + dim3=Variable(('c', ), [1, 2, 1])) + expected = expected.assign_coords(a=idx_t['a'], b=idx_2['b'], + c=idx_3['c']) + self.assertDatasetIdentical(expected, actual) + + # test from sel_points + data = Dataset({'foo': (('x', 'y'), np.arange(9).reshape(3, 3))}) + data.coords.update({'x': [0, 1, 2], 'y': [0, 1, 2]}) + + expected = Dataset({'foo': ('points', [0, 4, 8])}, + coords={'x': Variable(('points', ), [0, 1, 2]), + 'y': Variable(('points', ), [0, 1, 2])}) + actual = data.sel(x=Variable(('points', ), [0, 1, 2]), + y=Variable(('points', ), [0, 1, 2])) + self.assertDatasetIdentical(expected, actual) + + expected.coords.update({'x': ('points', [0, 1, 2]), + 'y': ('points', [0, 1, 2])}) + actual = data.sel(x=Variable(('points', ), [0.1, 1.1, 2.5]), + y=Variable(('points', ), [0, 1.2, 2.0]), + method='pad') + self.assertDatasetIdentical(expected, actual) + + idx_x = DataArray([0, 1, 2], dims=['a'], coords={'a': ['a', 'b', 'c']}) + idx_y = DataArray([0, 2, 1], dims=['b'], coords={'b': [0, 3, 6]}) + expected_ary = data['foo'][[0, 1, 2], [0, 2, 1]] + actual = data.sel(x=idx_x, y=idx_y) + self.assertArrayEqual(expected_ary, actual['foo']) + self.assertDataArrayIdentical(actual['a'].drop('x'), idx_x['a']) + self.assertDataArrayIdentical(actual['b'].drop('y'), idx_y['b']) + + with self.assertRaises(KeyError): + data.sel_points(x=[2.5], y=[2.0], method='pad', tolerance=1e-3) + def test_sel_method(self): data = create_test_data() @@ -1178,6 +1562,22 @@ def test_reindex(self): actual = ds.reindex(x=[0, 1, 3], y=[0, 1]) self.assertDatasetIdentical(expected, actual) + def test_reindex_warning(self): + data = create_test_data() + + with pytest.warns(FutureWarning) as ws: + # DataArray with different dimension raises Future warning + ind = xr.DataArray([0.0, 1.0], dims=['new_dim'], name='ind') + data.reindex(dim2=ind) + assert any(["Indexer has dimensions " in + str(w.message) for w in ws]) + + # Should not warn + ind = xr.DataArray([0.0, 1.0], dims=['dim2'], name='ind') + with pytest.warns(None) as ws: + data.reindex(dim2=ind) + assert len(ws) == 0 + def test_reindex_variables_copied(self): data = create_test_data() reindexed_data = data.reindex(copy=False) diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index 3866e0511a5..f8268ea2d6d 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -1,11 +1,14 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import itertools + import numpy as np import pandas as pd from xarray import Dataset, DataArray, Variable from xarray.core import indexing +from xarray.core import nputils from . import TestCase, ReturnItem @@ -29,47 +32,6 @@ def test_expanded_indexer(self): with self.assertRaisesRegexp(IndexError, 'too many indices'): indexing.expanded_indexer(I[1, 2, 3], 2) - def test_orthogonal_indexer(self): - x = np.random.randn(10, 11, 12, 13, 14) - y = np.arange(5) - I = ReturnItem() - # orthogonal and numpy indexing should be equivalent, because we only - # use at most one array and it never in between two slice objects - # (i.e., we try to avoid numpy's mind-boggling "partial indexing" - # http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html) - for i in [I[:], I[0], I[0, 0], I[:5], I[5:], I[2:5], I[3:-3], I[::-1], - I[::-2], I[5::-2], I[:3:-2], I[2:5:-1], I[7:3:-2], I[:3, :4], - I[:3, 0, :4], I[:3, 0, :4, 0], I[y], I[:, y], I[0, y], - I[:2, :3, y], I[0, y, :, :4, 0]]: - j = indexing.orthogonal_indexer(i, x.shape) - self.assertArrayEqual(x[i], x[j]) - self.assertArrayEqual(self.set_to_zero(x, i), - self.set_to_zero(x, j)) - # for more complicated cases, check orthogonal indexing is still - # equivalent to slicing - z = np.arange(2, 8, 2) - for i, j, shape in [ - (I[y, y], I[:5, :5], (5, 5, 12, 13, 14)), - (I[y, z], I[:5, 2:8:2], (5, 3, 12, 13, 14)), - (I[0, y, y], I[0, :5, :5], (5, 5, 13, 14)), - (I[y, 0, z], I[:5, 0, 2:8:2], (5, 3, 13, 14)), - (I[y, :, z], I[:5, :, 2:8:2], (5, 11, 3, 13, 14)), - (I[0, :, z], I[0, :, 2:8:2], (11, 3, 13, 14)), - (I[0, :2, y, y, 0], I[0, :2, :5, :5, 0], (2, 5, 5)), - (I[0, :, y, :, 0], I[0, :, :5, :, 0], (11, 5, 13)), - (I[:, :, y, :, 0], I[:, :, :5, :, 0], (10, 11, 5, 13)), - (I[:, :, y, z, :], I[:, :, :5, 2:8:2], (10, 11, 5, 3, 14))]: - k = indexing.orthogonal_indexer(i, x.shape) - self.assertEqual(shape, x[k].shape) - self.assertArrayEqual(x[j], x[k]) - self.assertArrayEqual(self.set_to_zero(x, j), - self.set_to_zero(x, k)) - # standard numpy (non-orthogonal) indexing doesn't work anymore - with self.assertRaisesRegexp(ValueError, 'only supports 1d'): - indexing.orthogonal_indexer(x > 0, x.shape) - with self.assertRaisesRegexp(ValueError, 'invalid subkey'): - print(indexing.orthogonal_indexer((1.5 * y, 1.5 * y), x.shape)) - def test_asarray_tuplesafe(self): res = indexing._asarray_tuplesafe(('a', 1)) assert isinstance(res, np.ndarray) @@ -180,28 +142,40 @@ def test_slice_slice(self): self.assertArrayEqual(expected, actual) def test_lazily_indexed_array(self): - x = indexing.NumpyIndexingAdapter(np.random.rand(10, 20, 30)) + original = np.random.rand(10, 20, 30) + x = indexing.NumpyIndexingAdapter(original) + v = Variable(['i', 'j', 'k'], original) lazy = indexing.LazilyIndexedArray(x) + v_lazy = Variable(['i', 'j', 'k'], lazy) I = ReturnItem() # test orthogonally applied indexers - indexers = [I[:], 0, -2, I[:3], [0, 1, 2, 3], np.arange(10) < 5] + indexers = [I[:], 0, -2, I[:3], [0, 1, 2, 3], [0], np.arange(10) < 5] for i in indexers: for j in indexers: for k in indexers: - expected = np.asarray(x[i, j, k]) - for actual in [lazy[i, j, k], - lazy[:, j, k][i], - lazy[:, :, k][:, j][i]]: + if isinstance(j, np.ndarray) and j.dtype.kind == 'b': + j = np.arange(20) < 5 + if isinstance(k, np.ndarray) and k.dtype.kind == 'b': + k = np.arange(30) < 5 + expected = np.asarray(v[i, j, k]) + for actual in [v_lazy[i, j, k], + v_lazy[:, j, k][i], + v_lazy[:, :, k][:, j][i]]: self.assertEqual(expected.shape, actual.shape) self.assertArrayEqual(expected, actual) + assert isinstance(actual._data, + indexing.LazilyIndexedArray) # test sequentially applied indexers indexers = [(3, 2), (I[:], 0), (I[:2], -1), (I[:4], [0]), ([4, 5], 0), ([0, 1, 2], [0, 1]), ([0, 3, 5], I[:2])] for i, j in indexers: - expected = np.asarray(x[i][j]) - actual = lazy[i][j] + expected = np.asarray(v[i][j]) + actual = v_lazy[i][j] self.assertEqual(expected.shape, actual.shape) self.assertArrayEqual(expected, actual) + assert isinstance(actual._data, indexing.LazilyIndexedArray) + assert isinstance(actual._data.array, + indexing.NumpyIndexingAdapter) class TestCopyOnWriteArray(TestCase): @@ -254,3 +228,39 @@ def test_index_scalar(self): # regression test for GH1374 x = indexing.MemoryCachedArray(np.array(['foo', 'bar'])) assert np.array(x[0][()]) == 'foo' + + +class TestIndexerTuple(TestCase): + """ Make sure _outer_to_numpy_indexer gives similar result to + Variable._broadcast_indexes_vectorized + """ + def test_outer_indexer(self): + def nonzero(x): + if isinstance(x, np.ndarray) and x.dtype.kind == 'b': + x = x.nonzero()[0] + return x + original = np.random.rand(10, 20, 30) + v = Variable(['i', 'j', 'k'], original) + I = ReturnItem() + # test orthogonally applied indexers + indexers = [I[:], 0, -2, I[:3], np.array([0, 1, 2, 3]), np.array([0]), + np.arange(10) < 5] + for i, j, k in itertools.product(indexers, repeat=3): + + if isinstance(j, np.ndarray) and j.dtype.kind == 'b': # match size + j = np.arange(20) < 4 + if isinstance(k, np.ndarray) and k.dtype.kind == 'b': + k = np.arange(30) < 8 + + _, expected, new_order = v._broadcast_indexes_vectorized((i, j, k)) + expected_data = nputils.NumpyVIndexAdapter(v.data)[expected] + if new_order: + old_order = range(len(new_order)) + expected_data = np.moveaxis(expected_data, old_order, + new_order) + + outer_index = indexing.OuterIndexer( + (nonzero(i), nonzero(j), nonzero(k))) + actual = indexing._outer_to_numpy_indexer(outer_index, v.shape) + actual_data = v.data[actual] + self.assertArrayEqual(actual_data, expected_data) diff --git a/xarray/tests/test_nputils.py b/xarray/tests/test_nputils.py new file mode 100644 index 00000000000..83445e4639f --- /dev/null +++ b/xarray/tests/test_nputils.py @@ -0,0 +1,30 @@ +import numpy as np +from numpy.testing import assert_array_equal + +from xarray.core.nputils import _is_contiguous, NumpyVIndexAdapter + + +def test_is_contiguous(): + assert _is_contiguous([1]) + assert _is_contiguous([1, 2, 3]) + assert not _is_contiguous([1, 3]) + + +def test_vindex(): + x = np.arange(3 * 4 * 5).reshape((3, 4, 5)) + vindex = NumpyVIndexAdapter(x) + + # getitem + assert_array_equal(vindex[0], x[0]) + assert_array_equal(vindex[[1, 2], [1, 2]], x[[1, 2], [1, 2]]) + assert vindex[[0, 1], [0, 1], :].shape == (2, 5) + assert vindex[[0, 1], :, [0, 1]].shape == (2, 4) + assert vindex[:, [0, 1], [0, 1]].shape == (2, 3) + + # setitem + vindex[:] = 0 + assert_array_equal(x, np.zeros_like(x)) + # assignment should not raise + vindex[[0, 1], [0, 1], :] = vindex[[0, 1], [0, 1], :] + vindex[[0, 1], :, [0, 1]] = vindex[[0, 1], :, [0, 1]] + vindex[:, [0, 1], [0, 1]] = vindex[:, [0, 1], [0, 1]] diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 7b68783f611..c93b4d98367 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -53,6 +53,50 @@ def test_getitem_dict(self): expected = v[0] self.assertVariableIdentical(expected, actual) + def test_getitem_1d(self): + v = self.cls(['x'], [0, 1, 2]) + + v_new = v[dict(x=[0, 1])] + assert v_new.dims == ('x', ) + self.assertArrayEqual(v_new, v._data[[0, 1]]) + + v_new = v[dict(x=slice(None))] + assert v_new.dims == ('x', ) + self.assertArrayEqual(v_new, v._data) + + v_new = v[dict(x=Variable('a', [0, 1]))] + assert v_new.dims == ('a', ) + self.assertArrayEqual(v_new, v._data[[0, 1]]) + + v_new = v[dict(x=1)] + assert v_new.dims == () + self.assertArrayEqual(v_new, v._data[1]) + + # tuple argument + v_new = v[slice(None)] + assert v_new.dims == ('x', ) + self.assertArrayEqual(v_new, v._data) + + def test_getitem_1d_fancy(self): + v = self.cls(['x'], [0, 1, 2]) + # 1d-variable should be indexable by multi-dimensional Variable + ind = Variable(('a', 'b'), [[0, 1], [0, 1]]) + v_new = v[ind] + assert v_new.dims == ('a', 'b') + expected = np.array(v._data)[([0, 1], [0, 1]), ] + self.assertArrayEqual(v_new, expected) + + # boolean indexing + ind = Variable(('x', ), [True, False, True]) + v_new = v[ind] + self.assertVariableIdentical(v[[0, 2]], v_new) + v_new = v[[True, False, True]] + self.assertVariableIdentical(v[[0, 2]], v_new) + + with self.assertRaisesRegexp(IndexError, "Boolean indexer should"): + ind = Variable(('a', ), [True, False, True]) + v[ind] + def _assertIndexedLikeNDArray(self, variable, expected_value0, expected_dtype=None): """Given a 1-dimensional variable, verify that the variable is indexed @@ -142,9 +186,9 @@ def test_0d_object_array_with_list(self): listarray = np.empty((1,), dtype=object) listarray[0] = [1, 2, 3] x = self.cls('x', listarray) - assert x.data == listarray - assert x[0].data == listarray.squeeze() - assert x.squeeze().data == listarray.squeeze() + self.assertArrayEqual(x.data, listarray) + self.assertArrayEqual(x[0].data, listarray.squeeze()) + self.assertArrayEqual(x.squeeze().data, listarray.squeeze()) def test_index_and_concat_datetime(self): # regression test for #125 @@ -207,7 +251,9 @@ def test_pandas_data(self): self.assertEqual(v[0].values, v.values[0]) def test_pandas_period_index(self): - v = self.cls(['x'], pd.period_range(start='2000', periods=20, freq='B')) + v = self.cls(['x'], pd.period_range(start='2000', periods=20, + freq='B')) + v = v.load() # for dask-based Variable self.assertEqual(v[0], pd.Period('2000', freq='B')) assert "Period('2000-01-03', 'B')" in repr(v) @@ -463,10 +509,186 @@ def test_load(self): array = self.cls('x', np.arange(5)) orig_data = array._data copied = array.copy(deep=True) - array.load() - assert type(array._data) is type(orig_data) - assert type(copied._data) is type(orig_data) - self.assertVariableIdentical(array, copied) + if array.chunks is None: + array.load() + assert type(array._data) is type(orig_data) + assert type(copied._data) is type(orig_data) + self.assertVariableIdentical(array, copied) + + def test_getitem_advanced(self): + v = self.cls(['x', 'y'], [[0, 1, 2], [3, 4, 5]]) + v_data = v.compute().data + + # orthogonal indexing + v_new = v[([0, 1], [1, 0])] + assert v_new.dims == ('x', 'y') + self.assertArrayEqual(v_new, v_data[[0, 1]][:, [1, 0]]) + + v_new = v[[0, 1]] + assert v_new.dims == ('x', 'y') + self.assertArrayEqual(v_new, v_data[[0, 1]]) + + # with mixed arguments + ind = Variable(['a'], [0, 1]) + v_new = v[dict(x=[0, 1], y=ind)] + assert v_new.dims == ('x', 'a') + self.assertArrayEqual(v_new, v_data[[0, 1]][:, [0, 1]]) + + # boolean indexing + v_new = v[dict(x=[True, False], y=[False, True, False])] + assert v_new.dims == ('x', 'y') + self.assertArrayEqual(v_new, v_data[0][1]) + + # with scalar variable + ind = Variable((), 2) + v_new = v[dict(y=ind)] + expected = v[dict(y=2)] + self.assertArrayEqual(v_new, expected) + + # with boolean variable with wrong shape + ind = np.array([True, False]) + with self.assertRaisesRegexp(IndexError, 'Boolean array size 2 is '): + v[Variable(('a', 'b'), [[0, 1]]), ind] + + # boolean indexing with different dimension + ind = Variable(['a'], [True, False, False]) + with self.assertRaisesRegexp(IndexError, 'Boolean indexer should be'): + v[dict(y=ind)] + + def test_getitem_uint_1d(self): + # regression test for #1405 + v = self.cls(['x'], [0, 1, 2]) + v_data = v.compute().data + + v_new = v[np.array([0])] + self.assertArrayEqual(v_new, v_data[0]) + v_new = v[np.array([0], dtype="uint64")] + self.assertArrayEqual(v_new, v_data[0]) + + def test_getitem_uint(self): + # regression test for #1405 + v = self.cls(['x', 'y'], [[0, 1, 2], [3, 4, 5]]) + v_data = v.compute().data + + v_new = v[np.array([0])] + self.assertArrayEqual(v_new, v_data[[0], :]) + v_new = v[np.array([0], dtype="uint64")] + self.assertArrayEqual(v_new, v_data[[0], :]) + + def test_getitem_0d_array(self): + # make sure 0d-np.array can be used as an indexer + v = self.cls(['x'], [0, 1, 2]) + v_data = v.compute().data + + v_new = v[np.array([0])[0]] + self.assertArrayEqual(v_new, v_data[0]) + + def test_getitem_fancy(self): + v = self.cls(['x', 'y'], [[0, 1, 2], [3, 4, 5]]) + v_data = v.compute().data + + ind = Variable(['a', 'b'], [[0, 1, 1], [1, 1, 0]]) + v_new = v[ind] + assert v_new.dims == ('a', 'b', 'y') + self.assertArrayEqual(v_new, v_data[[[0, 1, 1], [1, 1, 0]], :]) + + # It would be ok if indexed with the multi-dimensional array including + # the same name + ind = Variable(['x', 'b'], [[0, 1, 1], [1, 1, 0]]) + v_new = v[ind] + assert v_new.dims == ('x', 'b', 'y') + self.assertArrayEqual(v_new, v_data[[[0, 1, 1], [1, 1, 0]], :]) + + ind = Variable(['a', 'b'], [[0, 1, 2], [2, 1, 0]]) + v_new = v[dict(y=ind)] + assert v_new.dims == ('x', 'a', 'b') + self.assertArrayEqual(v_new, v_data[:, ([0, 1, 2], [2, 1, 0])]) + + ind = Variable(['a', 'b'], [[0, 0], [1, 1]]) + v_new = v[dict(x=[1, 0], y=ind)] + assert v_new.dims == ('x', 'a', 'b') + self.assertArrayEqual(v_new, v_data[[1, 0]][:, ind]) + + # along diagonal + ind = Variable(['a'], [0, 1]) + v_new = v[ind, ind] + assert v_new.dims == ('a',) + self.assertArrayEqual(v_new, v_data[[0, 1], [0, 1]]) + + # with integer + ind = Variable(['a', 'b'], [[0, 0], [1, 1]]) + v_new = v[dict(x=0, y=ind)] + assert v_new.dims == ('a', 'b') + self.assertArrayEqual(v_new[0], v_data[0][[0, 0]]) + self.assertArrayEqual(v_new[1], v_data[0][[1, 1]]) + + # with slice + ind = Variable(['a', 'b'], [[0, 0], [1, 1]]) + v_new = v[dict(x=slice(None), y=ind)] + assert v_new.dims == ('x', 'a', 'b') + self.assertArrayEqual(v_new, v_data[:, [[0, 0], [1, 1]]]) + + ind = Variable(['a', 'b'], [[0, 0], [1, 1]]) + v_new = v[dict(x=ind, y=slice(None))] + assert v_new.dims == ('a', 'b', 'y') + self.assertArrayEqual(v_new, v_data[[[0, 0], [1, 1]], :]) + + ind = Variable(['a', 'b'], [[0, 0], [1, 1]]) + v_new = v[dict(x=ind, y=slice(None, 1))] + assert v_new.dims == ('a', 'b', 'y') + self.assertArrayEqual(v_new, v_data[[[0, 0], [1, 1]], slice(None, 1)]) + + # slice matches explicit dimension + ind = Variable(['y'], [0, 1]) + v_new = v[ind, :2] + assert v_new.dims == ('y',) + self.assertArrayEqual(v_new, v_data[[0, 1], [0, 1]]) + + # with multiple slices + v = self.cls(['x', 'y', 'z'], [[[1, 2, 3], [4, 5, 6]]]) + ind = Variable(['a', 'b'], [[0]]) + v_new = v[ind, :, :] + expected = Variable(['a', 'b', 'y', 'z'], v.data[np.newaxis, ...]) + self.assertVariableIdentical(v_new, expected) + + v = Variable(['w', 'x', 'y', 'z'], [[[[1, 2, 3], [4, 5, 6]]]]) + ind = Variable(['y'], [0]) + v_new = v[ind, :, 1:2, 2] + expected = Variable(['y', 'x'], [[6]]) + self.assertVariableIdentical(v_new, expected) + + # slice and vector mixed indexing resulting in the same dimension + v = Variable(['x', 'y', 'z'], np.arange(60).reshape(3, 4, 5)) + ind = Variable(['x'], [0, 1, 2]) + v_new = v[:, ind] + expected = Variable(('x', 'z'), np.zeros((3, 5))) + expected[0] = v.data[0, 0] + expected[1] = v.data[1, 1] + expected[2] = v.data[2, 2] + self.assertVariableIdentical(v_new, expected) + + v_new = v[:, ind.data] + assert v_new.shape == (3, 3, 5) + + def test_getitem_error(self): + v = self.cls(['x', 'y'], [[0, 1, 2], [3, 4, 5]]) + + with self.assertRaisesRegexp(IndexError, "labeled multi-"): + v[[[0, 1], [1, 2]]] + + ind_x = Variable(['a'], [0, 1, 1]) + ind_y = Variable(['a'], [0, 1]) + with self.assertRaisesRegexp(IndexError, "Dimensions of indexers "): + v[ind_x, ind_y] + + ind = Variable(['a', 'b'], [[True, False], [False, True]]) + with self.assertRaisesRegexp(IndexError, '2-dimensional boolean'): + v[dict(x=ind)] + + v = Variable(['x', 'y', 'z'], np.arange(60).reshape(3, 4, 5)) + ind = Variable(['x'], [0, 1]) + with self.assertRaisesRegexp(IndexError, 'Dimensions of indexers mis'): + v[:, ind] class TestVariable(TestCase, VariableSubclassTestCases): @@ -677,6 +899,45 @@ def test_repr_lazy_data(self): self.assertIn('200000 values with dtype', repr(v)) self.assertIsInstance(v._data, LazilyIndexedArray) + def test_detect_indexer_type(self): + """ Tests indexer type was correctly detected. """ + data = np.random.random((10, 11)) + v = Variable(['x', 'y'], data) + + _, ind, _ = v._broadcast_indexes((0, 1)) + assert type(ind) == indexing.BasicIndexer + + _, ind, _ = v._broadcast_indexes((0, slice(0, 8, 2))) + assert type(ind) == indexing.BasicIndexer + + _, ind, _ = v._broadcast_indexes((0, [0, 1])) + assert type(ind) == indexing.OuterIndexer + + _, ind, _ = v._broadcast_indexes(([0, 1], 1)) + assert type(ind) == indexing.OuterIndexer + + _, ind, _ = v._broadcast_indexes(([0, 1], [1, 2])) + assert type(ind) == indexing.OuterIndexer + + _, ind, _ = v._broadcast_indexes(([0, 1], slice(0, 8, 2))) + assert type(ind) == indexing.OuterIndexer + + vind = Variable(('a', ), [0, 1]) + _, ind, _ = v._broadcast_indexes((vind, slice(0, 8, 2))) + assert type(ind) == indexing.OuterIndexer + + vind = Variable(('y', ), [0, 1]) + _, ind, _ = v._broadcast_indexes((vind, 3)) + assert type(ind) == indexing.OuterIndexer + + vind = Variable(('a', ), [0, 1]) + _, ind, _ = v._broadcast_indexes((vind, vind)) + assert type(ind) == indexing.VectorizedIndexer + + vind = Variable(('a', 'b'), [[0, 2], [1, 3]]) + _, ind, _ = v._broadcast_indexes((vind, 3)) + assert type(ind) == indexing.VectorizedIndexer + def test_items(self): data = np.random.random((10, 11)) v = Variable(['x', 'y'], data) @@ -709,6 +970,30 @@ def test_items(self): v[range(10), range(11)] = 1 self.assertArrayEqual(v.values, np.ones((10, 11))) + def test_getitem_basic(self): + v = self.cls(['x', 'y'], [[0, 1, 2], [3, 4, 5]]) + + v_new = v[dict(x=0)] + assert v_new.dims == ('y', ) + self.assertArrayEqual(v_new, v._data[0]) + + v_new = v[dict(x=0, y=slice(None))] + assert v_new.dims == ('y', ) + self.assertArrayEqual(v_new, v._data[0]) + + v_new = v[dict(x=0, y=1)] + assert v_new.dims == () + self.assertArrayEqual(v_new, v._data[0, 1]) + + v_new = v[dict(y=1)] + assert v_new.dims == ('x', ) + self.assertArrayEqual(v_new, v._data[:, 1]) + + # tuple argument + v_new = v[(slice(None), 1)] + assert v_new.dims == ('x', ) + self.assertArrayEqual(v_new, v._data[:, 1]) + def test_isel(self): v = Variable(['time', 'x'], self.d) self.assertVariableIdentical(v.isel(time=slice(None)), v) @@ -1095,6 +1380,87 @@ def test_count(self): actual = Variable(['x', 'y'], [[1, 0, np.nan], [1, 1, 1]]).count('y') self.assertVariableIdentical(expected, actual) + def test_setitem(self): + v = Variable(['x', 'y'], [[0, 3, 2], [3, 4, 5]]) + v[0, 1] = 1 + self.assertTrue(v[0, 1] == 1) + + v = Variable(['x', 'y'], [[0, 3, 2], [3, 4, 5]]) + v[dict(x=[0, 1])] = 1 + self.assertArrayEqual(v[[0, 1]], np.ones_like(v[[0, 1]])) + + # boolean indexing + v = Variable(['x', 'y'], [[0, 3, 2], [3, 4, 5]]) + v[dict(x=[True, False])] = 1 + + self.assertArrayEqual(v[0], np.ones_like(v[0])) + v = Variable(['x', 'y'], [[0, 3, 2], [3, 4, 5]]) + v[dict(x=[True, False], y=[False, True, False])] = 1 + self.assertTrue(v[0, 1] == 1) + + # dimension broadcast + v = Variable(['x', 'y'], np.ones((3, 2))) + ind = Variable(['a', 'b'], [[0, 1]]) + v[ind, :] = 0 + expected = Variable(['x', 'y'], [[0, 0], [0, 0], [1, 1]]) + self.assertVariableIdentical(expected, v) + + with self.assertRaisesRegexp(ValueError, "shape mismatch"): + v[ind, ind] = np.zeros((1, 2, 1)) + + v = Variable(['x', 'y'], [[0, 3, 2], [3, 4, 5]]) + ind = Variable(['a'], [0, 1]) + v[dict(x=ind)] = Variable(['a', 'y'], np.ones((2, 3), dtype=int) * 10) + self.assertArrayEqual(v[0], np.ones_like(v[0]) * 10) + self.assertArrayEqual(v[1], np.ones_like(v[1]) * 10) + assert v.dims == ('x', 'y') # dimension should not change + + # increment + v = Variable(['x', 'y'], np.arange(6).reshape(3, 2)) + ind = Variable(['a'], [0, 1]) + v[dict(x=ind)] += 1 + expected = Variable(['x', 'y'], [[1, 2], [3, 4], [4, 5]]) + self.assertVariableIdentical(v, expected) + + ind = Variable(['a'], [0, 0]) + v[dict(x=ind)] += 1 + expected = Variable(['x', 'y'], [[2, 3], [3, 4], [4, 5]]) + self.assertVariableIdentical(v, expected) + + +@requires_dask +class TestVariableWithDask(TestCase, VariableSubclassTestCases): + cls = staticmethod(lambda *args: Variable(*args).chunk()) + + @pytest.mark.xfail + def test_0d_object_array_with_list(self): + super(TestVariableWithDask, self).test_0d_object_array_with_list() + + @pytest.mark.xfail + def test_array_interface(self): + # dask array does not have `argsort` + super(TestVariableWithDask, self).test_array_interface() + + @pytest.mark.xfail + def test_copy_index(self): + super(TestVariableWithDask, self).test_copy_index() + + @pytest.mark.xfail + def test_eq_all_dtypes(self): + super(TestVariableWithDask, self).test_eq_all_dtypes() + + def test_getitem_fancy(self): + import dask + if LooseVersion(dask.__version__) <= LooseVersion('0.15.1'): + pytest.xfail("vindex from latest dask is required") + super(TestVariableWithDask, self).test_getitem_fancy() + + def test_getitem_1d_fancy(self): + import dask + if LooseVersion(dask.__version__) <= LooseVersion('0.15.1'): + pytest.xfail("vindex from latest dask is required") + super(TestVariableWithDask, self).test_getitem_1d_fancy() + class TestIndexVariable(TestCase, VariableSubclassTestCases): cls = staticmethod(IndexVariable) @@ -1174,6 +1540,23 @@ def test_coordinate_alias(self): x = Coordinate('x', [1, 2, 3]) self.assertIsInstance(x, IndexVariable) + # These tests make use of multi-dimensional variables, which are not valid + # IndexVariable objects: + @pytest.mark.xfail + def test_getitem_error(self): + super(TestIndexVariable, self).test_getitem_error() + + @pytest.mark.xfail + def test_getitem_advanced(self): + super(TestIndexVariable, self).test_getitem_advanced() + + @pytest.mark.xfail + def test_getitem_fancy(self): + super(TestIndexVariable, self).test_getitem_fancy() + + @pytest.mark.xfail + def test_getitem_uint(self): + super(TestIndexVariable, self).test_getitem_fancy() class TestAsCompatibleData(TestCase): @@ -1248,7 +1631,6 @@ def test_full_like(self): self.assertEquals(expect.dtype, bool) self.assertVariableIdentical(expect, full_like(orig, True, dtype=bool)) - @requires_dask def test_full_like_dask(self): orig = Variable(dims=('x', 'y'), data=[[1.5, 2.0], [3.1, 4.3]],