From a0bea98a1b38045af5a874bbe9447a3460c91bd5 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Thu, 21 Dec 2017 10:23:02 -0800 Subject: [PATCH 1/4] move backend append logic to the prepare_variable methods --- xarray/backends/common.py | 14 +++----------- xarray/backends/h5netcdf_.py | 7 +++++-- xarray/backends/netCDF4_.py | 32 ++++++++++++++++++-------------- xarray/backends/netcdf3.py | 1 - xarray/backends/scipy_.py | 3 ++- xarray/backends/zarr.py | 7 +++++-- xarray/core/variable.py | 4 ---- 7 files changed, 33 insertions(+), 35 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index fd408877f87..c289d35fa2e 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -7,7 +7,6 @@ import traceback import contextlib from collections import Mapping -from distutils.version import LooseVersion from ..conventions import cf_encoder from ..core import indexing @@ -183,11 +182,7 @@ def add(self, source, target): def sync(self): if self.sources: import dask.array as da - import dask - if LooseVersion(dask.__version__) > LooseVersion('0.8.1'): - da.store(self.sources, self.targets, lock=self.lock) - else: - da.store(self.sources, self.targets) + da.store(self.sources, self.targets, lock=self.lock) self.sources = [] self.targets = [] @@ -232,11 +227,8 @@ def set_variables(self, variables, check_encoding_set, for vn, v in iteritems(variables): name = _encode_variable_name(vn) check = vn in check_encoding_set - if vn not in self.variables: - target, source = self.prepare_variable( - name, v, check, unlimited_dims=unlimited_dims) - else: - target, source = self.ds.variables[name], v.data + target, source = self.prepare_variable( + name, v, check, unlimited_dims=unlimited_dims) self.writer.add(source, target) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index b4d2dc7e689..82abaade06a 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -156,8 +156,11 @@ def prepare_variable(self, name, variable, check_encoding=False, 'chunksizes', 'fletcher32']: if key in encoding: kwargs[key] = encoding[key] - nc4_var = self.ds.createVariable(name, dtype, variable.dims, - fill_value=fill_value, **kwargs) + if name not in self.ds.variables: + nc4_var = self.ds.createVariable(name, dtype, variable.dims, + fill_value=fill_value, **kwargs) + else: + nc4_var = self.ds.variables[name] for k, v in iteritems(attrs): nc4_var.setncattr(k, v) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 59e195b1c9a..d8aa33f35dc 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -352,20 +352,24 @@ def prepare_variable(self, name, variable, check_encoding=False, encoding = _extract_nc4_variable_encoding( variable, raise_on_invalid=check_encoding, unlimited_dims=unlimited_dims) - nc4_var = self.ds.createVariable( - varname=name, - datatype=datatype, - dimensions=variable.dims, - zlib=encoding.get('zlib', False), - complevel=encoding.get('complevel', 4), - shuffle=encoding.get('shuffle', True), - fletcher32=encoding.get('fletcher32', False), - contiguous=encoding.get('contiguous', False), - chunksizes=encoding.get('chunksizes'), - endian='native', - least_significant_digit=encoding.get('least_significant_digit'), - fill_value=fill_value) - _disable_auto_decode_variable(nc4_var) + if name in self.ds.variables: + nc4_var = self.ds.variables[name] + else: + nc4_var = self.ds.createVariable( + varname=name, + datatype=datatype, + dimensions=variable.dims, + zlib=encoding.get('zlib', False), + complevel=encoding.get('complevel', 4), + shuffle=encoding.get('shuffle', True), + fletcher32=encoding.get('fletcher32', False), + contiguous=encoding.get('contiguous', False), + chunksizes=encoding.get('chunksizes'), + endian='native', + least_significant_digit=encoding.get( + 'least_significant_digit'), + fill_value=fill_value) + _disable_auto_decode_variable(nc4_var) for k, v in iteritems(attrs): # set attributes one-by-one since netCDF4<1.0.10 can't handle diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py index 7194e06186f..7aa054bc119 100644 --- a/xarray/backends/netcdf3.py +++ b/xarray/backends/netcdf3.py @@ -6,7 +6,6 @@ import numpy as np from .. import conventions, Variable -from ..core import duck_array_ops from ..core.pycompat import basestring, unicode_type, OrderedDict diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 240b8f2ebaa..75d2de5e43b 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -196,7 +196,8 @@ def prepare_variable(self, name, variable, check_encoding=False, # nb. this still creates a numpy array in all memory, even though we # don't write the data yet; scipy.io.netcdf does not not support # incremental writes. - self.ds.createVariable(name, data.dtype, variable.dims) + if name not in self.variables: + self.ds.createVariable(name, data.dtype, variable.dims) scipy_var = self.ds.variables[name] for k, v in iteritems(variable.attrs): self._validate_attr_key(k) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 779d8d07886..30ea51811c4 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -378,8 +378,11 @@ def prepare_variable(self, name, variable, check_encoding=False, # compressor='default', fill_value=0, order='C', store=None, # synchronizer=None, overwrite=False, path=None, chunk_store=None, # filters=None, cache_metadata=True, **kwargs) - zarr_array = self.ds.create(name, shape=shape, dtype=dtype, - fill_value=fill_value, **encoding) + if name in self.ds: + zarr_array = self.ds[name] + else: + zarr_array = self.ds.create(name, shape=shape, dtype=dtype, + fill_value=fill_value, **encoding) # decided not to explicity enumerate encoding options because we # risk overriding zarr's defaults (e.g. if we specificy # cache_metadata=None instead of True). Alternative is to have lots of diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 385ab2066cf..e3bead51a94 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -5,7 +5,6 @@ from collections import defaultdict import functools import itertools -from distutils.version import LooseVersion import numpy as np import pandas as pd @@ -1392,9 +1391,6 @@ def quantile(self, q, dim=None, interpolation='linear'): raise TypeError("quantile does not work for arrays stored as dask " "arrays. Load the data via .compute() or .load() " "prior to calling this method.") - if LooseVersion(np.__version__) < LooseVersion('1.10.0'): - raise NotImplementedError( - 'quantile requres numpy version 1.10.0 or later') q = np.asarray(q, dtype=np.float64) From afdb254b74d01d4ed751a784e997fe579654de39 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 22 Dec 2017 11:38:46 -0700 Subject: [PATCH 2/4] deprecate variables/dimensions/attrs properties on AbstractWritableDataStore --- xarray/backends/common.py | 18 +++++------------- xarray/backends/scipy_.py | 4 ++-- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index c289d35fa2e..2f910456f54 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -132,24 +132,15 @@ def load(self): @property def variables(self): - # Because encoding/decoding might happen which may require both the - # attributes and the variables, and because a store may be updated - # we need to load both the attributes and variables - # anytime either one is requested. - variables, _ = self.load() - return variables + raise RuntimeError('using variables property is deprecated') @property def attrs(self): - # Because encoding/decoding might happen which may require both the - # attributes and the variables, and because a store may be updated - # we need to load both the attributes and variables - # anytime either one is requested. - _, attributes = self.load() - return attributes + raise RuntimeError('using attrs property is deprecated') @property def dimensions(self): + raise RuntimeError('using dimensions property is deprecated') return self.get_dimensions() def close(self): @@ -235,8 +226,9 @@ def set_variables(self, variables, check_encoding_set, def set_necessary_dimensions(self, variable, unlimited_dims=None): if unlimited_dims is None: unlimited_dims = set() + dims = self.get_dimensions() for d, l in zip(variable.dims, variable.shape): - if d not in self.dimensions: + if d not in dims: is_unlimited = d in unlimited_dims self.set_dimension(d, l, is_unlimited) diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 75d2de5e43b..0994d8510b8 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -165,7 +165,7 @@ def get_encoding(self): def set_dimension(self, name, length, is_unlimited=False): with self.ensure_open(autoclose=False): - if name in self.dimensions: + if name in self.ds.dimensions: raise ValueError('%s does not support modifying dimensions' % type(self).__name__) dim_length = length if not is_unlimited else None @@ -196,7 +196,7 @@ def prepare_variable(self, name, variable, check_encoding=False, # nb. this still creates a numpy array in all memory, even though we # don't write the data yet; scipy.io.netcdf does not not support # incremental writes. - if name not in self.variables: + if name not in self.ds.variables: self.ds.createVariable(name, data.dtype, variable.dims) scipy_var = self.ds.variables[name] for k, v in iteritems(variable.attrs): From cc021508b090ce7b7ca05033b03e9260dfa2cb73 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sun, 24 Dec 2017 12:23:34 -0700 Subject: [PATCH 3/4] warnings instead of errors for backend properties --- xarray/backends/common.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 2f910456f54..83753ced8f5 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -7,6 +7,7 @@ import traceback import contextlib from collections import Mapping +import warnings from ..conventions import cf_encoder from ..core import indexing @@ -132,15 +133,25 @@ def load(self): @property def variables(self): - raise RuntimeError('using variables property is deprecated') + warnings.warn('The ``variables`` property has been deprecated and ' + 'will be removed in xarray v0.11.', + FutureWarning, stacklevel=2) + variables, _ = self.load() + return variables @property def attrs(self): - raise RuntimeError('using attrs property is deprecated') + warnings.warn('The ``attrs`` property has been deprecated and ' + 'will be removed in xarray v0.11.', + FutureWarning, stacklevel=2) + _, attrs = self.load() + return attrs @property def dimensions(self): - raise RuntimeError('using dimensions property is deprecated') + warnings.warn('The ``dimensions`` property has been deprecated and ' + 'will be removed in xarray v0.11.', + FutureWarning, stacklevel=2) return self.get_dimensions() def close(self): From ec5172e2a6b9aea2f1d3326fa5370b64dc453a9c Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sun, 24 Dec 2017 14:33:10 -0700 Subject: [PATCH 4/4] whatsnew --- doc/whats-new.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 17ff3ab661c..728e40d4409 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -59,8 +59,13 @@ Bug fixes ``dask.threaded.get``. By `Matthew Rocklin `_. - Bug fixes in :py:meth:`DataArray.plot.imshow`: all-NaN arrays and arrays with size one in some dimension can now be plotted, which is good for - exploring satellite imagery. (:issue:`1780`) + exploring satellite imagery (:issue:`1780`). By `Zac Hatfield-Dodds `_. +- The ``variables``, ``attrs``, and ``dimensions`` properties have been + deprecated as part of a bug fix addressing an issue where backends were + unintentionally loading the datastores data and attributes repeatedly during + writes (:issue:`1798`). + By `Joe Hamman `_. .. _whats-new.0.10.0: