Skip to content

Commit 6eac857

Browse files
author
Joe Hamman
authored
move backend append logic to the prepare_variable methods (#1799)
* move backend append logic to the prepare_variable methods * deprecate variables/dimensions/attrs properties on AbstractWritableDataStore * warnings instead of errors for backend properties * whatsnew
1 parent 4a9c1e3 commit 6eac857

File tree

8 files changed

+54
-48
lines changed

8 files changed

+54
-48
lines changed

doc/whats-new.rst

+6-1
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,13 @@ Bug fixes
5959
``dask.threaded.get``. By `Matthew Rocklin <https://github.com/mrocklin>`_.
6060
- Bug fixes in :py:meth:`DataArray.plot.imshow`: all-NaN arrays and arrays
6161
with size one in some dimension can now be plotted, which is good for
62-
exploring satellite imagery. (:issue:`1780`)
62+
exploring satellite imagery (:issue:`1780`).
6363
By `Zac Hatfield-Dodds <https://github.com/Zac-HD>`_.
64+
- The ``variables``, ``attrs``, and ``dimensions`` properties have been
65+
deprecated as part of a bug fix addressing an issue where backends were
66+
unintentionally loading the datastores data and attributes repeatedly during
67+
writes (:issue:`1798`).
68+
By `Joe Hamman <https://github.com/jhamman>`_.
6469

6570

6671
.. _whats-new.0.10.0:

xarray/backends/common.py

+17-22
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import traceback
88
import contextlib
99
from collections import Mapping
10-
from distutils.version import LooseVersion
10+
import warnings
1111

1212
from ..conventions import cf_encoder
1313
from ..core import indexing
@@ -133,24 +133,25 @@ def load(self):
133133

134134
@property
135135
def variables(self):
136-
# Because encoding/decoding might happen which may require both the
137-
# attributes and the variables, and because a store may be updated
138-
# we need to load both the attributes and variables
139-
# anytime either one is requested.
136+
warnings.warn('The ``variables`` property has been deprecated and '
137+
'will be removed in xarray v0.11.',
138+
FutureWarning, stacklevel=2)
140139
variables, _ = self.load()
141140
return variables
142141

143142
@property
144143
def attrs(self):
145-
# Because encoding/decoding might happen which may require both the
146-
# attributes and the variables, and because a store may be updated
147-
# we need to load both the attributes and variables
148-
# anytime either one is requested.
149-
_, attributes = self.load()
150-
return attributes
144+
warnings.warn('The ``attrs`` property has been deprecated and '
145+
'will be removed in xarray v0.11.',
146+
FutureWarning, stacklevel=2)
147+
_, attrs = self.load()
148+
return attrs
151149

152150
@property
153151
def dimensions(self):
152+
warnings.warn('The ``dimensions`` property has been deprecated and '
153+
'will be removed in xarray v0.11.',
154+
FutureWarning, stacklevel=2)
154155
return self.get_dimensions()
155156

156157
def close(self):
@@ -183,11 +184,7 @@ def add(self, source, target):
183184
def sync(self):
184185
if self.sources:
185186
import dask.array as da
186-
import dask
187-
if LooseVersion(dask.__version__) > LooseVersion('0.8.1'):
188-
da.store(self.sources, self.targets, lock=self.lock)
189-
else:
190-
da.store(self.sources, self.targets)
187+
da.store(self.sources, self.targets, lock=self.lock)
191188
self.sources = []
192189
self.targets = []
193190

@@ -232,19 +229,17 @@ def set_variables(self, variables, check_encoding_set,
232229
for vn, v in iteritems(variables):
233230
name = _encode_variable_name(vn)
234231
check = vn in check_encoding_set
235-
if vn not in self.variables:
236-
target, source = self.prepare_variable(
237-
name, v, check, unlimited_dims=unlimited_dims)
238-
else:
239-
target, source = self.ds.variables[name], v.data
232+
target, source = self.prepare_variable(
233+
name, v, check, unlimited_dims=unlimited_dims)
240234

241235
self.writer.add(source, target)
242236

243237
def set_necessary_dimensions(self, variable, unlimited_dims=None):
244238
if unlimited_dims is None:
245239
unlimited_dims = set()
240+
dims = self.get_dimensions()
246241
for d, l in zip(variable.dims, variable.shape):
247-
if d not in self.dimensions:
242+
if d not in dims:
248243
is_unlimited = d in unlimited_dims
249244
self.set_dimension(d, l, is_unlimited)
250245

xarray/backends/h5netcdf_.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,11 @@ def prepare_variable(self, name, variable, check_encoding=False,
156156
'chunksizes', 'fletcher32']:
157157
if key in encoding:
158158
kwargs[key] = encoding[key]
159-
nc4_var = self.ds.createVariable(name, dtype, variable.dims,
160-
fill_value=fill_value, **kwargs)
159+
if name not in self.ds.variables:
160+
nc4_var = self.ds.createVariable(name, dtype, variable.dims,
161+
fill_value=fill_value, **kwargs)
162+
else:
163+
nc4_var = self.ds.variables[name]
161164

162165
for k, v in iteritems(attrs):
163166
nc4_var.setncattr(k, v)

xarray/backends/netCDF4_.py

+18-14
Original file line numberDiff line numberDiff line change
@@ -352,20 +352,24 @@ def prepare_variable(self, name, variable, check_encoding=False,
352352
encoding = _extract_nc4_variable_encoding(
353353
variable, raise_on_invalid=check_encoding,
354354
unlimited_dims=unlimited_dims)
355-
nc4_var = self.ds.createVariable(
356-
varname=name,
357-
datatype=datatype,
358-
dimensions=variable.dims,
359-
zlib=encoding.get('zlib', False),
360-
complevel=encoding.get('complevel', 4),
361-
shuffle=encoding.get('shuffle', True),
362-
fletcher32=encoding.get('fletcher32', False),
363-
contiguous=encoding.get('contiguous', False),
364-
chunksizes=encoding.get('chunksizes'),
365-
endian='native',
366-
least_significant_digit=encoding.get('least_significant_digit'),
367-
fill_value=fill_value)
368-
_disable_auto_decode_variable(nc4_var)
355+
if name in self.ds.variables:
356+
nc4_var = self.ds.variables[name]
357+
else:
358+
nc4_var = self.ds.createVariable(
359+
varname=name,
360+
datatype=datatype,
361+
dimensions=variable.dims,
362+
zlib=encoding.get('zlib', False),
363+
complevel=encoding.get('complevel', 4),
364+
shuffle=encoding.get('shuffle', True),
365+
fletcher32=encoding.get('fletcher32', False),
366+
contiguous=encoding.get('contiguous', False),
367+
chunksizes=encoding.get('chunksizes'),
368+
endian='native',
369+
least_significant_digit=encoding.get(
370+
'least_significant_digit'),
371+
fill_value=fill_value)
372+
_disable_auto_decode_variable(nc4_var)
369373

370374
for k, v in iteritems(attrs):
371375
# set attributes one-by-one since netCDF4<1.0.10 can't handle

xarray/backends/netcdf3.py

-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import numpy as np
77

88
from .. import conventions, Variable
9-
from ..core import duck_array_ops
109
from ..core.pycompat import basestring, unicode_type, OrderedDict
1110

1211

xarray/backends/scipy_.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def get_encoding(self):
165165

166166
def set_dimension(self, name, length, is_unlimited=False):
167167
with self.ensure_open(autoclose=False):
168-
if name in self.dimensions:
168+
if name in self.ds.dimensions:
169169
raise ValueError('%s does not support modifying dimensions'
170170
% type(self).__name__)
171171
dim_length = length if not is_unlimited else None
@@ -196,7 +196,8 @@ def prepare_variable(self, name, variable, check_encoding=False,
196196
# nb. this still creates a numpy array in all memory, even though we
197197
# don't write the data yet; scipy.io.netcdf does not not support
198198
# incremental writes.
199-
self.ds.createVariable(name, data.dtype, variable.dims)
199+
if name not in self.ds.variables:
200+
self.ds.createVariable(name, data.dtype, variable.dims)
200201
scipy_var = self.ds.variables[name]
201202
for k, v in iteritems(variable.attrs):
202203
self._validate_attr_key(k)

xarray/backends/zarr.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -378,8 +378,11 @@ def prepare_variable(self, name, variable, check_encoding=False,
378378
# compressor='default', fill_value=0, order='C', store=None,
379379
# synchronizer=None, overwrite=False, path=None, chunk_store=None,
380380
# filters=None, cache_metadata=True, **kwargs)
381-
zarr_array = self.ds.create(name, shape=shape, dtype=dtype,
382-
fill_value=fill_value, **encoding)
381+
if name in self.ds:
382+
zarr_array = self.ds[name]
383+
else:
384+
zarr_array = self.ds.create(name, shape=shape, dtype=dtype,
385+
fill_value=fill_value, **encoding)
383386
# decided not to explicity enumerate encoding options because we
384387
# risk overriding zarr's defaults (e.g. if we specificy
385388
# cache_metadata=None instead of True). Alternative is to have lots of

xarray/core/variable.py

-4
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from collections import defaultdict
66
import functools
77
import itertools
8-
from distutils.version import LooseVersion
98

109
import numpy as np
1110
import pandas as pd
@@ -1392,9 +1391,6 @@ def quantile(self, q, dim=None, interpolation='linear'):
13921391
raise TypeError("quantile does not work for arrays stored as dask "
13931392
"arrays. Load the data via .compute() or .load() "
13941393
"prior to calling this method.")
1395-
if LooseVersion(np.__version__) < LooseVersion('1.10.0'):
1396-
raise NotImplementedError(
1397-
'quantile requres numpy version 1.10.0 or later')
13981394

13991395
q = np.asarray(q, dtype=np.float64)
14001396

0 commit comments

Comments
 (0)