Skip to content

Argmin indexes #1469

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 11 commits into from
9 changes: 9 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ Indexing
Dataset.reset_index
Dataset.reorder_levels


Computation
-----------

Expand All @@ -127,6 +128,8 @@ Computation
Dataset.resample
Dataset.diff
Dataset.quantile
Dataset.idxmin
Dataset.idxmax

**Aggregation**:
:py:attr:`~Dataset.all`
Expand Down Expand Up @@ -280,6 +283,10 @@ Computation
DataArray.diff
DataArray.dot
DataArray.quantile
DataArray.idxmin
DataArray.idxmax
DataArray.indexes_min
DataArray.indexes_max

**Aggregation**:
:py:attr:`~DataArray.all`
Expand All @@ -294,6 +301,8 @@ Computation
:py:attr:`~DataArray.sum`
:py:attr:`~DataArray.std`
:py:attr:`~DataArray.var`
:py:attr:`~DataArray.argmax_indexes`
:py:attr:`~DataArray.argmax_indexes`

**Missing values**:
:py:attr:`~DataArray.isnull`
Expand Down
9 changes: 9 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,15 @@ Enhancements
Bug fixes
~~~~~~~~~

- Now ``.argmin`` and `.argmax` only supports 1-dimensional array.
Instead, `~xarray.DataArray` now supports ``.indexes_min`` and
(and ``.indexes_max``), which returns a Dataset with minimum
(or maximum) indexes along the specified dimensions as DataArrays.
By `Keisuke Fujii <https://github.com/fujiisoup>`_.
Also `.idxmin` and `.idxmax` are also added that works as similar to
pandas's `.idxmin` and `.idxmax`.


.. _whats-new.0.9.6:

v0.9.6 (8 June 2017)
Expand Down
63 changes: 38 additions & 25 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,28 @@ def wrapped_func(self, dim=None, axis=None, keep_attrs=False,
allow_lazy=True, **kwargs)
return wrapped_func

_reduce_extra_args_docstring = \
"""dim : str or sequence of str, optional
Dimension(s) over which to apply `{name}`.
axis : int or sequence of int, optional
Axis(es) over which to apply `{name}`. Only one of the 'dim'
and 'axis' arguments can be supplied. If neither are supplied, then
`{name}` is calculated over axes."""

_cum_extra_args_docstring = \
"""dim : str or sequence of str, optional
Dimension over which to apply `{name}`.
axis : int or sequence of int, optional
Axis over which to apply `{name}`. Only one of the 'dim'
and 'axis' arguments can be supplied."""
_reduce_extra_args_docstring = """\
dim : str or sequence of str, optional
Dimension(s) over which to apply `{name}`.
axis : int or sequence of int, optional
Axis(es) over which to apply `{name}`. Only one of the 'dim'
and 'axis' arguments can be supplied. If neither are supplied, then
`{name}` is calculated over axes."""

_reduce1dim_extra_args_docstring = """\
dim : str, optional
Dimension over which to apply `{name}`.
axis : int, optional
Axis over which to apply `{name}`. Only one of the 'dim'
and 'axis' arguments can be supplied. If neither are supplied, then
`{name}` is calculated over axes."""

_cum_extra_args_docstring = """\
dim : str or sequence of str, optional
Dimension over which to apply `{name}`.
axis : int or sequence of int, optional
Axis over which to apply `{name}`. Only one of the 'dim'
and 'axis' arguments can be supplied."""


class ImplementsDatasetReduce(object):
Expand All @@ -56,17 +64,22 @@ def wrapped_func(self, dim=None, keep_attrs=False, **kwargs):
**kwargs)
return wrapped_func

_reduce_extra_args_docstring = \
"""dim : str or sequence of str, optional
Dimension(s) over which to apply `{name}`. By default `{name}` is
applied over all dimensions."""

_cum_extra_args_docstring = \
"""dim : str or sequence of str, optional
Dimension over which to apply `{name}`.
axis : int or sequence of int, optional
Axis over which to apply `{name}`. Only one of the 'dim'
and 'axis' arguments can be supplied."""
_reduce_extra_args_docstring = """\
dim : str or sequence of str, optional
Dimension(s) over which to apply `{name}`. By default `{name}` is
applied over all dimensions."""

_reduce1dim_extra_args_docstring = """\
dim : str
Dimension over which to apply `{name}`. By default `{name}` is
applied over all dimensions."""

_cum_extra_args_docstring = """\
dim : str or sequence of str, optional
Dimension over which to apply `{name}`.
axis : int or sequence of int, optional
Axis over which to apply `{name}`. Only one of the 'dim'
and 'axis' arguments can be supplied."""


class AbstractArray(ImplementsArrayReduce, formatting.ReprMixin):
Expand Down
83 changes: 83 additions & 0 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,89 @@ def indexes(self):
"""
return Indexes(self._coords, self.sizes)

def idxmax(self, dim=None, skipna=True, keep_dims=False):
"""Return indexes of the maximum values along a given dimension.

Parameters
----------
dim : string
If True, the given dimension is kept with size one.
keep_dims: bool
If True, the given dimension is kept with size one.

Returns
-------
idx : DataArray
DataArray which stores the first occurence of the maximum index
"""
ds = self._to_temp_dataset().idxmax(dim, skipna, keep_dims)
return self._from_temp_dataset(ds)

def idxmin(self, dim=None, skipna=True, keep_dims=False):
"""Return indexes of the minimum values along a given dimension.

Parameters
----------
dim : string
Which dimension the maximum index is taken.
keep_dims: bool
If True, the given dimension is kept with size one.

Returns
-------
idx : DataArray
DataArray which stores the first occurence of the minimum index
"""
ds = self._to_temp_dataset().idxmin(dim, skipna, keep_dims)
return self._from_temp_dataset(ds)

def _indexes_min_max(self, func, dims, skipna):
""" Methods for indexes_min and indexes_max """
arg_dict = getattr(self.variable, func)(dims)

variables = OrderedDict()
for key, item in arg_dict.items():
coords={d: self.coords[d] for d in item.dims}
variables[key] = DataArray(item, dims=item.dims, name=key,
coords=coords)
return Dataset(variables)

def indexes_min(self, dims=None, skipna=True):
"""Return indexes of the minimum values along a dim(dims).

Parameters
----------
dim : string
Which dimension the minimum index is taken.
skipna: boolean
Exclude NA/null values. If an entire row/column is NA, the result
will be first index.

Returns
-------
indexes : Dataset
Dataset mappig dimension nemes to minimum indexes.
"""
return self._indexes_min_max('indexes_min', dims, skipna)

def indexes_max(self, dims=None, skipna=True):
"""Return indexes of the minimum values along a dim(dims).

Parameters
----------
dim : string
Which dimension the maximum index is taken.
skipna: boolean
Exclude NA/null values. If an entire row/column is NA, the result
will be first index.

Returns
-------
indexes : Dataset
Dataset mappig dimension nemes to maximum indexes.
"""
return self._indexes_min_max('indexes_max', dims, skipna)

@property
def coords(self):
"""Dictionary-like container of coordinate arrays.
Expand Down
75 changes: 75 additions & 0 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -819,6 +819,81 @@ def indexes(self):
"""
return Indexes(self._variables, self._dims)

def _idx_min_max(self, func, dim, skipna, keep_dims):
"""Methods both for idxmin and idxmin"""
if dim is not None and not isinstance(dim, basestring):
raise ValueError('dim should be a string (not array) ' + dim)

if dim is None and keep_dims: # The reduced dim should be identical.
dim_set = set([v.dims[0] for k, v in iteritems(self._variables)
if len(v.dims)==1])
if len(dim_set) > 1:
raise ValueError('with keep_dims option, the reduced index'
' cannot be different in ' + func + '.')
if len(dim_set) == 1:
dim = dim_set[0]

variables = OrderedDict()
coord_names = []
for k, v in iteritems(self._variables):
if dim is None and len(v.dims) > 1:
raise ValueError('dim should be specified for more than '
'1-dimensional array ' + k)

if k in self._coord_names: # Do not change coordinates
if not keep_dims or k != dim:
variables[k] = v
coord_names.append(k)
elif len(v.dims) == 0:
variables[k] = v
elif dim is not None and dim not in v.dims:
variables[k] = v
else:
d = dim or v.dims[0]
if d in v.dims:
variables[k] = getattr(v, func)(d, skipna, keep_dims)[d]
return self._replace_vars_and_dims(variables, set(coord_names))

def idxmax(self, dim=None, skipna=True, keep_dims=False):
"""Return indexes of the maximum values along a given dimension.

Parameters
----------
dim : string
Which dimension the maximum index is taken.
skipna: boolean
Exclude NA/null values. If an entire row/column is NA, the result
will be first index.
keep_dims: bool
If True, the given dimension is kept with size one.

Returns
-------
idx : DataArray
DataArray which stores the first occurence of the maximum index
"""
return self._idx_min_max('indexes_max', dim, skipna, keep_dims)

def idxmin(self, dim=None, skipna=True, keep_dims=False):
"""Return indexes of the maximum values along a given dimension.

Parameters
----------
dim : string
Which dimension the minimum index is taken.
skipna: boolean
Exclude NA/null values. If an entire row/column is NA, the result
will be first index.
keep_dims: bool
If True, the given dimension is kept with size one.

Returns
-------
idx : DataArray
DataArray which stores the first occurence of the minimum index
"""
return self._idx_min_max('indexes_min', dim, skipna, keep_dims)

@property
def coords(self):
"""Dictionary of xarray.DataArray objects corresponding to coordinate
Expand Down
14 changes: 11 additions & 3 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def _ignore_warnings_if(condition):

def _create_nan_agg_method(name, numeric_only=False, np_compat=False,
no_bottleneck=False, coerce_strings=False,
keep_dims=False):
keep_dims=False, only_1dim=False):
def f(values, axis=None, skipna=None, **kwargs):
# ignore keyword args inserted by np.mean and other numpy aggregators
# automatically:
Expand All @@ -176,6 +176,13 @@ def f(values, axis=None, skipna=None, **kwargs):

values = asarray(values)

if only_1dim:
if ((axis is None and values.ndim > 1) or
(hasattr(axis, 'len') and len(axis) > 1)):
raise ValueError('Method %s is only applicable to '
'1-dimensional data (or with a single dim '
'arguments).' % name)

if coerce_strings and values.dtype.kind in 'SU':
values = values.astype(object)

Expand Down Expand Up @@ -214,13 +221,14 @@ def f(values, axis=None, skipna=None, **kwargs):
'or newer to use skipna=True or skipna=None' % name)
raise NotImplementedError(msg)
f.numeric_only = numeric_only
f.only_1dim = only_1dim
f.keep_dims = keep_dims
f.__name__ = name
return f


argmax = _create_nan_agg_method('argmax', coerce_strings=True)
argmin = _create_nan_agg_method('argmin', coerce_strings=True)
argmax = _create_nan_agg_method('argmax', coerce_strings=True, only_1dim=True)
argmin = _create_nan_agg_method('argmin', coerce_strings=True, only_1dim=True)
max = _create_nan_agg_method('max', coerce_strings=True)
min = _create_nan_agg_method('min', coerce_strings=True)
sum = _create_nan_agg_method('sum', numeric_only=True)
Expand Down
15 changes: 14 additions & 1 deletion xarray/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@

_REDUCE_DOCSTRING_TEMPLATE = """\
Reduce this {cls}'s data by applying `{name}` along some dimension(s).
{only_1dim}

Parameters
----------
Expand All @@ -105,6 +106,11 @@
indicated dimension(s) removed.
"""

_REDUCE_ONLY1DIM_DOCSTRING = """\
The data should be 1-dimensional or either of (a single) dim or axis should be
passed.
"""

_ROLLING_REDUCE_DOCSTRING_TEMPLATE = """\
Reduce this {da_or_ds}'s data windows by applying `{name}` along its dimension.

Expand Down Expand Up @@ -206,11 +212,18 @@ def inject_reduce_methods(cls):
[('count', duck_array_ops.count, False)])
for name, f, include_skipna in methods:
numeric_only = getattr(f, 'numeric_only', False)
only_1dim = getattr(f, 'only_1dim', False)
only_1dim_doc = _REDUCE_ONLY1DIM_DOCSTRING if only_1dim else ''
if only_1dim:
extra_args = cls._reduce1dim_extra_args_docstring.format(name=name)
else:
extra_args = cls._reduce_extra_args_docstring.format(name=name)

func = cls._reduce_method(f, include_skipna, numeric_only)
func.__name__ = name
func.__doc__ = _REDUCE_DOCSTRING_TEMPLATE.format(
name=name, cls=cls.__name__,
extra_args=cls._reduce_extra_args_docstring.format(name=name))
only_1dim=only_1dim_doc, extra_args=extra_args)
setattr(cls, name, func)


Expand Down
Loading