diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 22971bb9955..8796c79da4c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -31,6 +31,24 @@ Breaking changes - The ``inplace`` kwarg for public methods now raises an error, having been deprecated since v0.11.0. By `Maximilian Roos `_ +- Most xarray objects now define ``__slots__``. This reduces overall RAM usage by ~22% + (not counting the underlying numpy buffers); on CPython 3.7/x64, a trivial DataArray + has gone down from 1.9kB to 1.5kB. + + Caveats: + + - Pickle streams produced by older versions of xarray can't be loaded using this + release, and vice versa. + - Any user code that was accessing the ``__dict__`` attribute of + xarray objects will break. The best practice to attach custom metadata to xarray + objects is to use the ``attrs`` dictionary. + - Any user code that defines custom subclasses of xarray classes must now explicitly + define ``__slots__`` itself. Subclasses that don't add any attributes must state so + by defining ``__slots__ = ()`` right after the class header. + Omitting ``__slots__`` will now cause a ``FutureWarning`` to be logged, and a hard + crash in a later release. + + (:issue:`3250`) by `Guido Imperiale `_. New functions/methods ~~~~~~~~~~~~~~~~~~~~~ diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 9ad1db1829b..a20d3c2a306 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -694,6 +694,8 @@ def open_dataarray( class _MultiFileCloser: + __slots__ = ("file_objs",) + def __init__(self, file_objs): self.file_objs = file_objs diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 7ee11052192..455b77907f9 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -68,12 +68,16 @@ def robust_getitem(array, key, catch=Exception, max_retries=6, initial_delay=500 class BackendArray(NdimSizeLenMixin, indexing.ExplicitlyIndexed): + __slots__ = () + def __array__(self, dtype=None): key = indexing.BasicIndexer((slice(None),) * self.ndim) return np.asarray(self[key], dtype=dtype) class AbstractDataStore(Mapping): + __slots__ = () + def __iter__(self): return iter(self.variables) @@ -165,6 +169,8 @@ def __exit__(self, exception_type, exception_value, traceback): class ArrayWriter: + __slots__ = ("sources", "targets", "regions", "lock") + def __init__(self, lock=None): self.sources = [] self.targets = [] @@ -205,6 +211,8 @@ def sync(self, compute=True): class AbstractWritableDataStore(AbstractDataStore): + __slots__ = () + def encode(self, variables, attributes): """ Encode the variables and attributes in this store @@ -371,6 +379,8 @@ def set_dimensions(self, variables, unlimited_dims=None): class WritableCFDataStore(AbstractWritableDataStore): + __slots__ = () + def encode(self, variables, attributes): # All NetCDF files get CF encoded by default, without this attempting # to write times, for example, would fail. diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 57317a7a1a5..813942c2f32 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -30,6 +30,8 @@ class BaseNetCDF4Array(BackendArray): + __slots__ = ("datastore", "dtype", "shape", "variable_name") + def __init__(self, variable_name, datastore): self.datastore = datastore self.variable_name = variable_name @@ -52,8 +54,13 @@ def __setitem__(self, key, value): if self.datastore.autoclose: self.datastore.close(needs_lock=False) + def get_array(self, needs_lock=True): + raise NotImplementedError("Virtual Method") + class NetCDF4ArrayWrapper(BaseNetCDF4Array): + __slots__ = () + def get_array(self, needs_lock=True): ds = self.datastore._acquire(needs_lock) variable = ds.variables[self.variable_name] @@ -294,6 +301,17 @@ class NetCDF4DataStore(WritableCFDataStore): This store supports NetCDF3, NetCDF4 and OpenDAP datasets. """ + __slots__ = ( + "autoclose", + "format", + "is_remote", + "lock", + "_filename", + "_group", + "_manager", + "_mode", + ) + def __init__( self, manager, group=None, mode=None, lock=NETCDF4_PYTHON_LOCK, autoclose=False ): diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 31997d258c8..9a115de55ef 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -29,6 +29,8 @@ def _encode_zarr_attr_value(value): class ZarrArrayWrapper(BackendArray): + __slots__ = ("datastore", "dtype", "shape", "variable_name") + def __init__(self, variable_name, datastore): self.datastore = datastore self.variable_name = variable_name @@ -231,6 +233,15 @@ class ZarrStore(AbstractWritableDataStore): """Store for reading and writing data via zarr """ + __slots__ = ( + "append_dim", + "ds", + "_consolidate_on_close", + "_group", + "_read_only", + "_synchronizer", + ) + @classmethod def open_group( cls, diff --git a/xarray/conventions.py b/xarray/conventions.py index c15e5c40e73..1e40d254e96 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -31,6 +31,8 @@ class NativeEndiannessArray(indexing.ExplicitlyIndexedNDArrayMixin): dtype('int16') """ + __slots__ = ("array",) + def __init__(self, array): self.array = indexing.as_indexable(array) @@ -60,6 +62,8 @@ class BoolTypeArray(indexing.ExplicitlyIndexedNDArrayMixin): dtype('bool') """ + __slots__ = ("array",) + def __init__(self, array): self.array = indexing.as_indexable(array) diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py index 03a6d37b01e..8838e71e6ca 100644 --- a/xarray/core/accessor_str.py +++ b/xarray/core/accessor_str.py @@ -75,6 +75,8 @@ class StringAccessor: """ + __slots__ = ("_obj",) + def __init__(self, obj): self._obj = obj diff --git a/xarray/core/arithmetic.py b/xarray/core/arithmetic.py index 5e8c8758ef5..137db034c95 100644 --- a/xarray/core/arithmetic.py +++ b/xarray/core/arithmetic.py @@ -14,6 +14,8 @@ class SupportsArithmetic: Used by Dataset, DataArray, Variable and GroupBy. """ + __slots__ = () + # TODO: implement special methods for arithmetic here rather than injecting # them in xarray/core/ops.py. Ideally, do so by inheriting from # numpy.lib.mixins.NDArrayOperatorsMixin. diff --git a/xarray/core/common.py b/xarray/core/common.py index 2e834492521..ab9e7616ce1 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1,3 +1,4 @@ +import warnings from collections import OrderedDict from contextlib import suppress from textwrap import dedent @@ -35,6 +36,8 @@ class ImplementsArrayReduce: + __slots__ = () + @classmethod def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool): if include_skipna: @@ -72,6 +75,8 @@ def wrapped_func(self, dim=None, axis=None, **kwargs): # type: ignore class ImplementsDatasetReduce: + __slots__ = () + @classmethod def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool): if include_skipna: @@ -110,6 +115,8 @@ class AbstractArray(ImplementsArrayReduce): """Shared base class for DataArray and Variable. """ + __slots__ = () + def __bool__(self: Any) -> bool: return bool(self.values) @@ -180,7 +187,25 @@ class AttrAccessMixin: """Mixin class that allows getting keys with attribute access """ - _initialized = False + __slots__ = () + + def __init_subclass__(cls): + """Verify that all subclasses explicitly define ``__slots__``. If they don't, + raise error in the core xarray module and a FutureWarning in third-party + extensions. + This check is only triggered in Python 3.6+. + """ + if not hasattr(object.__new__(cls), "__dict__"): + cls.__setattr__ = cls._setattr_slots + elif cls.__module__.startswith("xarray."): + raise AttributeError("%s must explicitly define __slots__" % cls.__name__) + else: + cls.__setattr__ = cls._setattr_dict + warnings.warn( + "xarray subclass %s should explicitly define __slots__" % cls.__name__, + FutureWarning, + stacklevel=2, + ) @property def _attr_sources(self) -> List[Mapping[Hashable, Any]]: @@ -195,7 +220,7 @@ def _item_sources(self) -> List[Mapping[Hashable, Any]]: return [] def __getattr__(self, name: str) -> Any: - if name != "__setstate__": + if name not in {"__dict__", "__setstate__"}: # this avoids an infinite loop when pickle looks for the # __setstate__ attribute before the xarray object is initialized for source in self._attr_sources: @@ -205,20 +230,52 @@ def __getattr__(self, name: str) -> Any: "%r object has no attribute %r" % (type(self).__name__, name) ) - def __setattr__(self, name: str, value: Any) -> None: - if self._initialized: - try: - # Allow setting instance variables if they already exist - # (e.g., _attrs). We use __getattribute__ instead of hasattr - # to avoid key lookups with attribute-style access. - self.__getattribute__(name) - except AttributeError: - raise AttributeError( - "cannot set attribute %r on a %r object. Use __setitem__ " - "style assignment (e.g., `ds['name'] = ...`) instead to " - "assign variables." % (name, type(self).__name__) - ) + # This complicated three-method design boosts overall performance of simple + # operations - particularly DataArray methods that perform a _to_temp_dataset() + # round-trip - by a whopping 8% compared to a single method that checks + # hasattr(self, "__dict__") at runtime before every single assignment (like + # _setattr_py35 does). All of this is just temporary until the FutureWarning can be + # changed into a hard crash. + def _setattr_dict(self, name: str, value: Any) -> None: + """Deprecated third party subclass (see ``__init_subclass__`` above) + """ object.__setattr__(self, name, value) + if name in self.__dict__: + # Custom, non-slotted attr, or improperly assigned variable? + warnings.warn( + "Setting attribute %r on a %r object. Explicitly define __slots__ " + "to suppress this warning for legitimate custom attributes and " + "raise an error when attempting variables assignments." + % (name, type(self).__name__), + FutureWarning, + stacklevel=2, + ) + + def _setattr_slots(self, name: str, value: Any) -> None: + """Objects with ``__slots__`` raise AttributeError if you try setting an + undeclared attribute. This is desirable, but the error message could use some + improvement. + """ + try: + object.__setattr__(self, name, value) + except AttributeError as e: + # Don't accidentally shadow custom AttributeErrors, e.g. + # DataArray.dims.setter + if str(e) != "%r object has no attribute %r" % (type(self).__name__, name): + raise + raise AttributeError( + "cannot set attribute %r on a %r object. Use __setitem__ style" + "assignment (e.g., `ds['name'] = ...`) instead of assigning variables." + % (name, type(self).__name__) + ) from e + + def _setattr_py35(self, name: str, value: Any) -> None: + if hasattr(self, "__dict__"): + return self._setattr_dict(name, value) + return self._setattr_slots(name, value) + + # Overridden in Python >=3.6 by __init_subclass__ + __setattr__ = _setattr_py35 def __dir__(self) -> List[str]: """Provide method name lookup and completion. Only provide 'public' @@ -283,6 +340,8 @@ def get_squeeze_dims( class DataWithCoords(SupportsArithmetic, AttrAccessMixin): """Shared base class for Dataset and DataArray.""" + __slots__ = () + _rolling_exp_cls = RollingExp def squeeze( diff --git a/xarray/core/computation.py b/xarray/core/computation.py index da97106098f..424ab5be87a 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -51,6 +51,14 @@ class _UFuncSignature: Core dimension names on each output variable. """ + __slots__ = ( + "input_core_dims", + "output_core_dims", + "_all_input_core_dims", + "_all_output_core_dims", + "_all_core_dims", + ) + def __init__(self, input_core_dims, output_core_dims=((),)): self.input_core_dims = tuple(tuple(a) for a in input_core_dims) self.output_core_dims = tuple(tuple(a) for a in output_core_dims) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 82488f252f4..ddea5739fff 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -35,7 +35,7 @@ class AbstractCoordinates(Mapping[Hashable, "DataArray"]): - _data = None # type: Union["DataArray", "Dataset"] + __slots__ = () def __getitem__(self, key: Hashable) -> "DataArray": raise NotImplementedError() @@ -53,7 +53,7 @@ def dims(self) -> Union[Mapping[Hashable, int], Tuple[Hashable, ...]]: @property def indexes(self) -> Indexes: - return self._data.indexes + return self._data.indexes # type: ignore @property def variables(self): @@ -108,9 +108,9 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index: raise ValueError("no valid index for a 0-dimensional object") elif len(ordered_dims) == 1: (dim,) = ordered_dims - return self._data.get_index(dim) + return self._data.get_index(dim) # type: ignore else: - indexes = [self._data.get_index(k) for k in ordered_dims] + indexes = [self._data.get_index(k) for k in ordered_dims] # type: ignore names = list(ordered_dims) return pd.MultiIndex.from_product(indexes, names=names) @@ -187,7 +187,7 @@ class DatasetCoordinates(AbstractCoordinates): objects. """ - _data = None # type: Dataset + __slots__ = ("_data",) def __init__(self, dataset: "Dataset"): self._data = dataset @@ -258,7 +258,7 @@ class DataArrayCoordinates(AbstractCoordinates): dimensions and the values given by corresponding DataArray objects. """ - _data = None # type: DataArray + __slots__ = ("_data",) def __init__(self, dataarray: "DataArray"): self._data = dataarray @@ -314,6 +314,8 @@ class LevelCoordinatesSource(Mapping[Hashable, Any]): by any public methods. """ + __slots__ = ("_data",) + def __init__(self, data_object: "Union[DataArray, Dataset]"): self._data = data_object diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 4f78ae7d021..26904969945 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -177,6 +177,8 @@ def _check_data_shape(data, coords, dims): class _LocIndexer: + __slots__ = ("data_array",) + def __init__(self, data_array: "DataArray"): self.data_array = data_array @@ -241,6 +243,8 @@ class DataArray(AbstractArray, DataWithCoords): Dictionary for holding arbitrary metadata. """ + __slots__ = ("_accessors", "_coords", "_file_obj", "_name", "_indexes", "_variable") + _groupby_cls = groupby.DataArrayGroupBy _rolling_cls = rolling.DataArrayRolling _coarsen_cls = rolling.DataArrayCoarsen @@ -351,6 +355,7 @@ def __init__( assert isinstance(coords, OrderedDict) self._coords = coords # type: OrderedDict[Any, Variable] self._name = name # type: Optional[Hashable] + self._accessors = None # type: Optional[Dict[str, Any]] # TODO(shoyer): document this argument, once it becomes part of the # public interface. @@ -358,8 +363,6 @@ def __init__( self._file_obj = None - self._initialized = True # type: bool - def _replace( self, variable: Variable = None, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 14237a244fd..f3ad4650b38 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -345,6 +345,8 @@ def as_dataset(obj: Any) -> "Dataset": class DataVariables(Mapping[Hashable, "DataArray"]): + __slots__ = ("_dataset",) + def __init__(self, dataset: "Dataset"): self._dataset = dataset @@ -384,6 +386,8 @@ def _ipython_key_completions_(self): class _LocIndexer: + __slots__ = ("dataset",) + def __init__(self, dataset: "Dataset"): self.dataset = dataset @@ -407,6 +411,17 @@ class Dataset(Mapping, ImplementsDatasetReduce, DataWithCoords): coordinates used for label based indexing. """ + __slots__ = ( + "_accessors", + "_attrs", + "_coord_names", + "_dims", + "_encoding", + "_file_obj", + "_indexes", + "_variables", + ) + _groupby_cls = groupby.DatasetGroupBy _rolling_cls = rolling.DatasetRolling _coarsen_cls = rolling.DatasetCoarsen @@ -485,6 +500,7 @@ def __init__( self._variables = OrderedDict() # type: OrderedDict[Any, Variable] self._coord_names = set() # type: Set[Hashable] self._dims = {} # type: Dict[Any, int] + self._accessors = None # type: Optional[Dict[str, Any]] self._attrs = None # type: Optional[OrderedDict] self._file_obj = None if data_vars is None: @@ -500,7 +516,6 @@ def __init__( self._attrs = OrderedDict(attrs) self._encoding = None # type: Optional[Dict] - self._initialized = True def _set_init_vars_and_dims(self, data_vars, coords, compat): """Set the initial value of Dataset variables and dimensions @@ -839,7 +854,7 @@ def _construct_direct( obj._attrs = attrs obj._file_obj = file_obj obj._encoding = encoding - obj._initialized = True + obj._accessors = None return obj __default = object() diff --git a/xarray/core/extensions.py b/xarray/core/extensions.py index 302a7fb2ec6..f473eaa497d 100644 --- a/xarray/core/extensions.py +++ b/xarray/core/extensions.py @@ -19,6 +19,14 @@ def __get__(self, obj, cls): if obj is None: # we're accessing the attribute of the class, i.e., Dataset.geo return self._accessor + + try: + return obj._accessors[self._name] + except TypeError: + obj._accessors = {} + except KeyError: + pass + try: accessor_obj = self._accessor(obj) except AttributeError: @@ -26,11 +34,8 @@ def __get__(self, obj, cls): # raised when initializing the accessor, so we need to raise as # something else (GH933): raise RuntimeError("error initializing %r accessor." % self._name) - # Replace the property with the accessor object. Inspired by: - # http://www.pydanny.com/cached-property.html - # We need to use object.__setattr__ because we overwrite __setattr__ on - # AttrAccessMixin. - object.__setattr__(obj, self._name, accessor_obj) + + obj._accessors[self._name] = accessor_obj return accessor_obj diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 5d81b13983d..41de4846e81 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -139,13 +139,24 @@ class _DummyGroup: Should not be user visible. """ + __slots__ = ("name", "coords", "size") + def __init__(self, obj, name, coords): self.name = name self.coords = coords - self.dims = (name,) - self.ndim = 1 self.size = obj.sizes[name] - self.values = range(self.size) + + @property + def dims(self): + return (self.name,) + + @property + def ndim(self): + return 1 + + @property + def values(self): + return range(self.size) def _ensure_1d(group, obj): @@ -216,6 +227,19 @@ class GroupBy(SupportsArithmetic): DataArray.groupby """ + __slots__ = ( + "_full_index", + "_inserted_dims", + "_group", + "_group_dim", + "_group_indices", + "_groups", + "_obj", + "_restore_coord_dims", + "_stacked_dim", + "_unique_coord", + ) + def __init__( self, obj, diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 5917f7c7a2d..94188fabc92 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -11,6 +11,8 @@ class Indexes(collections.abc.Mapping): """Immutable proxy for Dataset or DataArrary indexes.""" + __slots__ = ("_indexes",) + def __init__(self, indexes): """Not for public consumption. diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index f6570149484..c6a8f6f35e4 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1,9 +1,10 @@ +import enum import functools import operator from collections import defaultdict from contextlib import suppress from datetime import timedelta -from typing import Any, Sequence, Tuple, Union +from typing import Any, Callable, Sequence, Tuple, Union import numpy as np import pandas as pd @@ -327,6 +328,8 @@ class ExplicitIndexer: sub-classes BasicIndexer, OuterIndexer or VectorizedIndexer. """ + __slots__ = ("_key",) + def __init__(self, key): if type(self) is ExplicitIndexer: # noqa raise TypeError("cannot instantiate base ExplicitIndexer objects") @@ -359,6 +362,8 @@ class BasicIndexer(ExplicitIndexer): indexed with an integer are dropped from the result. """ + __slots__ = () + def __init__(self, key): if not isinstance(key, tuple): raise TypeError("key must be a tuple: {!r}".format(key)) @@ -389,6 +394,8 @@ class OuterIndexer(ExplicitIndexer): indexing works like MATLAB/Fortran. """ + __slots__ = () + def __init__(self, key): if not isinstance(key, tuple): raise TypeError("key must be a tuple: {!r}".format(key)) @@ -432,6 +439,8 @@ class VectorizedIndexer(ExplicitIndexer): https://github.com/numpy/numpy/pull/6256 """ + __slots__ = () + def __init__(self, key): if not isinstance(key, tuple): raise TypeError("key must be a tuple: {!r}".format(key)) @@ -468,10 +477,15 @@ def __init__(self, key): class ExplicitlyIndexed: - """Mixin to mark support for Indexer subclasses in indexing.""" + """Mixin to mark support for Indexer subclasses in indexing. + """ + + __slots__ = () class ExplicitlyIndexedNDArrayMixin(utils.NDArrayMixin, ExplicitlyIndexed): + __slots__ = () + def __array__(self, dtype=None): key = BasicIndexer((slice(None),) * self.ndim) return np.asarray(self[key], dtype=dtype) @@ -480,6 +494,8 @@ def __array__(self, dtype=None): class ImplicitToExplicitIndexingAdapter(utils.NDArrayMixin): """Wrap an array, converting tuples into the indicated explicit indexer.""" + __slots__ = ("array", "indexer_cls") + def __init__(self, array, indexer_cls=BasicIndexer): self.array = as_indexable(array) self.indexer_cls = indexer_cls @@ -502,6 +518,8 @@ class LazilyOuterIndexedArray(ExplicitlyIndexedNDArrayMixin): """Wrap an array to make basic and outer indexing lazy. """ + __slots__ = ("array", "key") + def __init__(self, array, key=None): """ Parameters @@ -577,6 +595,8 @@ class LazilyVectorizedIndexedArray(ExplicitlyIndexedNDArrayMixin): """Wrap an array to make vectorized indexing lazy. """ + __slots__ = ("array", "key") + def __init__(self, array, key): """ Parameters @@ -631,6 +651,8 @@ def _wrap_numpy_scalars(array): class CopyOnWriteArray(ExplicitlyIndexedNDArrayMixin): + __slots__ = ("array", "_copied") + def __init__(self, array): self.array = as_indexable(array) self._copied = False @@ -655,6 +677,8 @@ def __setitem__(self, key, value): class MemoryCachedArray(ExplicitlyIndexedNDArrayMixin): + __slots__ = ("array",) + def __init__(self, array): self.array = _wrap_numpy_scalars(as_indexable(array)) @@ -783,18 +807,24 @@ def _combine_indexers(old_key, shape, new_key): ) -class IndexingSupport: # could inherit from enum.Enum on Python 3 +@enum.unique +class IndexingSupport(enum.Enum): # for backends that support only basic indexer - BASIC = "BASIC" + BASIC = 0 # for backends that support basic / outer indexer - OUTER = "OUTER" + OUTER = 1 # for backends that support outer indexer including at most 1 vector. - OUTER_1VECTOR = "OUTER_1VECTOR" + OUTER_1VECTOR = 2 # for backends that support full vectorized indexer. - VECTORIZED = "VECTORIZED" + VECTORIZED = 3 -def explicit_indexing_adapter(key, shape, indexing_support, raw_indexing_method): +def explicit_indexing_adapter( + key: ExplicitIndexer, + shape: Tuple[int, ...], + indexing_support: IndexingSupport, + raw_indexing_method: Callable, +) -> Any: """Support explicit indexing by delegating to a raw indexing method. Outer and/or vectorized indexers are supported by indexing a second time @@ -824,7 +854,9 @@ def explicit_indexing_adapter(key, shape, indexing_support, raw_indexing_method) return result -def decompose_indexer(indexer, shape, indexing_support): +def decompose_indexer( + indexer: ExplicitIndexer, shape: Tuple[int, ...], indexing_support: IndexingSupport +) -> Tuple[ExplicitIndexer, ExplicitIndexer]: if isinstance(indexer, VectorizedIndexer): return _decompose_vectorized_indexer(indexer, shape, indexing_support) if isinstance(indexer, (BasicIndexer, OuterIndexer)): @@ -848,7 +880,11 @@ def _decompose_slice(key, size): return slice(start, stop, -step), slice(None, None, -1) -def _decompose_vectorized_indexer(indexer, shape, indexing_support): +def _decompose_vectorized_indexer( + indexer: VectorizedIndexer, + shape: Tuple[int, ...], + indexing_support: IndexingSupport, +) -> Tuple[ExplicitIndexer, ExplicitIndexer]: """ Decompose vectorized indexer to the successive two indexers, where the first indexer will be used to index backend arrays, while the second one @@ -884,45 +920,49 @@ def _decompose_vectorized_indexer(indexer, shape, indexing_support): if indexing_support is IndexingSupport.VECTORIZED: return indexer, BasicIndexer(()) - backend_indexer = [] - np_indexer = [] + backend_indexer_elems = [] + np_indexer_elems = [] # convert negative indices - indexer = [ + indexer_elems = [ np.where(k < 0, k + s, k) if isinstance(k, np.ndarray) else k for k, s in zip(indexer.tuple, shape) ] - for k, s in zip(indexer, shape): + for k, s in zip(indexer_elems, shape): if isinstance(k, slice): # If it is a slice, then we will slice it as-is # (but make its step positive) in the backend, # and then use all of it (slice(None)) for the in-memory portion. bk_slice, np_slice = _decompose_slice(k, s) - backend_indexer.append(bk_slice) - np_indexer.append(np_slice) + backend_indexer_elems.append(bk_slice) + np_indexer_elems.append(np_slice) else: # If it is a (multidimensional) np.ndarray, just pickup the used # keys without duplication and store them as a 1d-np.ndarray. oind, vind = np.unique(k, return_inverse=True) - backend_indexer.append(oind) - np_indexer.append(vind.reshape(*k.shape)) + backend_indexer_elems.append(oind) + np_indexer_elems.append(vind.reshape(*k.shape)) - backend_indexer = OuterIndexer(tuple(backend_indexer)) - np_indexer = VectorizedIndexer(tuple(np_indexer)) + backend_indexer = OuterIndexer(tuple(backend_indexer_elems)) + np_indexer = VectorizedIndexer(tuple(np_indexer_elems)) if indexing_support is IndexingSupport.OUTER: return backend_indexer, np_indexer # If the backend does not support outer indexing, # backend_indexer (OuterIndexer) is also decomposed. - backend_indexer, np_indexer1 = _decompose_outer_indexer( + backend_indexer1, np_indexer1 = _decompose_outer_indexer( backend_indexer, shape, indexing_support ) np_indexer = _combine_indexers(np_indexer1, shape, np_indexer) - return backend_indexer, np_indexer + return backend_indexer1, np_indexer -def _decompose_outer_indexer(indexer, shape, indexing_support): +def _decompose_outer_indexer( + indexer: Union[BasicIndexer, OuterIndexer], + shape: Tuple[int, ...], + indexing_support: IndexingSupport, +) -> Tuple[ExplicitIndexer, ExplicitIndexer]: """ Decompose outer indexer to the successive two indexers, where the first indexer will be used to index backend arrays, while the second one @@ -930,7 +970,7 @@ def _decompose_outer_indexer(indexer, shape, indexing_support): Parameters ---------- - indexer: VectorizedIndexer + indexer: OuterIndexer or BasicIndexer indexing_support: One of the entries of IndexingSupport Returns @@ -968,7 +1008,7 @@ def _decompose_outer_indexer(indexer, shape, indexing_support): pos_indexer.append(k + s) else: pos_indexer.append(k) - indexer = pos_indexer + indexer_elems = pos_indexer if indexing_support is IndexingSupport.OUTER_1VECTOR: # some backends such as h5py supports only 1 vector in indexers @@ -977,11 +1017,11 @@ def _decompose_outer_indexer(indexer, shape, indexing_support): (np.max(k) - np.min(k) + 1.0) / len(np.unique(k)) if isinstance(k, np.ndarray) else 0 - for k in indexer + for k in indexer_elems ] array_index = np.argmax(np.array(gains)) if len(gains) > 0 else None - for i, (k, s) in enumerate(zip(indexer, shape)): + for i, (k, s) in enumerate(zip(indexer_elems, shape)): if isinstance(k, np.ndarray) and i != array_index: # np.ndarray key is converted to slice that covers the entire # entries of this key. @@ -1002,7 +1042,7 @@ def _decompose_outer_indexer(indexer, shape, indexing_support): return (OuterIndexer(tuple(backend_indexer)), OuterIndexer(tuple(np_indexer))) if indexing_support == IndexingSupport.OUTER: - for k, s in zip(indexer, shape): + for k, s in zip(indexer_elems, shape): if isinstance(k, slice): # slice: convert positive step slice for backend bk_slice, np_slice = _decompose_slice(k, s) @@ -1024,7 +1064,7 @@ def _decompose_outer_indexer(indexer, shape, indexing_support): # basic indexer assert indexing_support == IndexingSupport.BASIC - for k, s in zip(indexer, shape): + for k, s in zip(indexer_elems, shape): if isinstance(k, np.ndarray): # np.ndarray key is converted to slice that covers the entire # entries of this key. @@ -1199,6 +1239,8 @@ def posify_mask_indexer(indexer): class NumpyIndexingAdapter(ExplicitlyIndexedNDArrayMixin): """Wrap a NumPy array to use explicit indexing.""" + __slots__ = ("array",) + def __init__(self, array): # In NumpyIndexingAdapter we only allow to store bare np.ndarray if not isinstance(array, np.ndarray): @@ -1249,6 +1291,8 @@ def __setitem__(self, key, value): class NdArrayLikeIndexingAdapter(NumpyIndexingAdapter): + __slots__ = ("array",) + def __init__(self, array): if not hasattr(array, "__array_function__"): raise TypeError( @@ -1261,6 +1305,8 @@ def __init__(self, array): class DaskIndexingAdapter(ExplicitlyIndexedNDArrayMixin): """Wrap a dask array to support explicit indexing.""" + __slots__ = ("array",) + def __init__(self, array): """ This adapter is created in Variable.__getitem__ in Variable._broadcast_indexes. @@ -1302,6 +1348,8 @@ class PandasIndexAdapter(ExplicitlyIndexedNDArrayMixin): """Wrap a pandas.Index to preserve dtypes and handle explicit indexing. """ + __slots__ = ("array", "_dtype") + def __init__(self, array: Any, dtype: DTypeLike = None): self.array = utils.safe_cast_to_index(array) if dtype is None: diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 592cae9007e..a812e7472ca 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -43,7 +43,8 @@ class Rolling: DataArray.rolling """ - _attributes = ["window", "min_periods", "center", "dim"] + __slots__ = ("obj", "window", "min_periods", "center", "dim") + _attributes = ("window", "min_periods", "center", "dim") def __init__(self, obj, windows, min_periods=None, center=False): """ @@ -93,17 +94,17 @@ def __init__(self, obj, windows, min_periods=None, center=False): # attributes self.window = window + if min_periods is not None and min_periods <= 0: + raise ValueError("min_periods must be greater than zero or None") self.min_periods = min_periods - if min_periods is None: - self._min_periods = window - else: - if min_periods <= 0: - raise ValueError("min_periods must be greater than zero or None") - self._min_periods = min_periods self.center = center self.dim = dim + @property + def _min_periods(self): + return self.min_periods if self.min_periods is not None else self.window + def __repr__(self): """provide a nice str repr of our rolling object""" @@ -152,6 +153,8 @@ def count(self): class DataArrayRolling(Rolling): + __slots__ = ("window_labels",) + def __init__(self, obj, windows, min_periods=None, center=False): """ Moving window object for DataArray. @@ -381,6 +384,8 @@ def _numpy_or_bottleneck_reduce( class DatasetRolling(Rolling): + __slots__ = ("rollings",) + def __init__(self, obj, windows, min_periods=None, center=False): """ Moving window object for Dataset. @@ -516,7 +521,8 @@ class Coarsen: DataArray.coarsen """ - _attributes = ["windows", "side", "trim_excess"] + __slots__ = ("obj", "boundary", "coord_func", "windows", "side", "trim_excess") + _attributes = ("windows", "side", "trim_excess") def __init__(self, obj, windows, boundary, side, coord_func): """ @@ -569,6 +575,8 @@ def __repr__(self): class DataArrayCoarsen(Coarsen): + __slots__ = () + @classmethod def _reduce_method(cls, func): """ @@ -599,6 +607,8 @@ def wrapped_func(self, **kwargs): class DatasetCoarsen(Coarsen): + __slots__ = () + @classmethod def _reduce_method(cls, func): """ diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 9e0037b4da0..0d730edeaeb 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -376,7 +376,7 @@ class Frozen(Mapping[K, V]): saved under the `mapping` attribute. """ - __slots__ = ["mapping"] + __slots__ = ("mapping",) def __init__(self, mapping: Mapping[K, V]): self.mapping = mapping @@ -407,7 +407,7 @@ class SortedKeysDict(MutableMapping[K, V]): mapping. """ - __slots__ = ["mapping"] + __slots__ = ("mapping",) def __init__(self, mapping: MutableMapping[K, V] = None): self.mapping = {} if mapping is None else mapping @@ -441,6 +441,8 @@ class OrderedSet(MutableSet[T]): elements, like an OrderedDict. """ + __slots__ = ("_ordered_dict",) + def __init__(self, values: AbstractSet[T] = None): self._ordered_dict = OrderedDict() # type: MutableMapping[T, None] if values is not None: @@ -481,6 +483,8 @@ class NdimSizeLenMixin: one that also defines ``ndim``, ``size`` and ``__len__``. """ + __slots__ = () + @property def ndim(self: Any) -> int: return len(self.shape) @@ -505,6 +509,8 @@ class NDArrayMixin(NdimSizeLenMixin): `dtype`, `shape` and `__getitem__`. """ + __slots__ = () + @property def dtype(self: Any) -> np.dtype: return self.array.dtype @@ -618,6 +624,8 @@ class HiddenKeyDict(MutableMapping[K, V]): """Acts like a normal dictionary, but hides certain keys. """ + __slots__ = ("_data", "_hidden_keys") + # ``__init__`` method required to create instance from class. def __init__(self, data: MutableMapping[K, V], hidden_keys: Iterable[K]): diff --git a/xarray/core/variable.py b/xarray/core/variable.py index c64dd8af6c6..ac4f7052f14 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -267,6 +267,8 @@ class Variable( they can use more complete metadata in context of coordinate labels. """ + __slots__ = ("_dims", "_data", "_attrs", "_encoding") + def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False): """ Parameters @@ -1936,6 +1938,8 @@ class IndexVariable(Variable): unless another name is given. """ + __slots__ = () + def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False): super().__init__(dims, data, attrs, encoding, fastpath) if self.ndim != 1: diff --git a/xarray/plot/facetgrid.py b/xarray/plot/facetgrid.py index 79f94077c8f..ec51ff26c07 100644 --- a/xarray/plot/facetgrid.py +++ b/xarray/plot/facetgrid.py @@ -67,7 +67,6 @@ class FacetGrid: Contains dictionaries mapping coordinate names to values. None is used as a sentinel value for axes which should remain empty, ie. sometimes the bottom right grid - """ def __init__( diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 14f03d42fe7..8ca62ef58f1 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -452,6 +452,8 @@ class _PlotMethods: For example, DataArray.plot.imshow """ + __slots__ = ("_da",) + def __init__(self, darray): self._da = darray diff --git a/xarray/testing.py b/xarray/testing.py index fbb5904c678..9fa58b64001 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -197,8 +197,6 @@ def _assert_dataarray_invariants(da: DataArray): if da._indexes is not None: _assert_indexes_invariants_checks(da._indexes, da._coords, da.dims) - assert da._initialized is True - def _assert_dataset_invariants(ds: Dataset): assert isinstance(ds._variables, OrderedDict), type(ds._variables) @@ -235,7 +233,6 @@ def _assert_dataset_invariants(ds: Dataset): assert isinstance(ds._encoding, (type(None), dict)) assert isinstance(ds._attrs, (type(None), OrderedDict)) - assert ds._initialized is True def _assert_internal_invariants(xarray_obj: Union[DataArray, Dataset, Variable],): diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 532cc32376a..42a93a5a8ee 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4616,3 +4616,25 @@ def test_rolling_exp(da, dim, window_type, window): ) assert_allclose(expected.variable, result.variable) + + +def test_no_dict(): + d = DataArray() + with pytest.raises(AttributeError): + d.__dict__ + + +@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher") +def test_subclass_slots(): + """Test that DataArray subclasses must explicitly define ``__slots__``. + + .. note:: + As of 0.13.0, this is actually mitigated into a FutureWarning for any class + defined outside of the xarray package. + """ + with pytest.raises(AttributeError) as e: + + class MyArray(DataArray): + pass + + assert str(e.value) == "MyArray must explicitly define __slots__" diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 3c2b9b6ce8f..3953e6c4146 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -5706,3 +5706,25 @@ def test_trapz_datetime(dask, which_datetime): actual2 = da.integrate("time", datetime_unit="h") assert_allclose(actual, actual2 / 24.0) + + +def test_no_dict(): + d = Dataset() + with pytest.raises(AttributeError): + d.__dict__ + + +@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher") +def test_subclass_slots(): + """Test that Dataset subclasses must explicitly define ``__slots__``. + + .. note:: + As of 0.13.0, this is actually mitigated into a FutureWarning for any class + defined outside of the xarray package. + """ + with pytest.raises(AttributeError) as e: + + class MyDS(Dataset): + pass + + assert str(e.value) == "MyDS must explicitly define __slots__"