@@ -83,7 +83,7 @@ def __getitem__(self, key):
83
83
# could possibly have a work-around for 0d data here
84
84
85
85
86
- def _determine_zarr_chunks (enc_chunks , var_chunks , ndim , name ):
86
+ def _determine_zarr_chunks (enc_chunks , var_chunks , ndim , name , safe_chunks ):
87
87
"""
88
88
Given encoding chunks (possibly None) and variable chunks (possibly None)
89
89
"""
@@ -133,7 +133,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name):
133
133
134
134
if len (enc_chunks_tuple ) != ndim :
135
135
# throw away encoding chunks, start over
136
- return _determine_zarr_chunks (None , var_chunks , ndim , name )
136
+ return _determine_zarr_chunks (None , var_chunks , ndim , name , safe_chunks )
137
137
138
138
for x in enc_chunks_tuple :
139
139
if not isinstance (x , int ):
@@ -164,24 +164,32 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name):
164
164
continue
165
165
for dchunk in dchunks [:- 1 ]:
166
166
if dchunk % zchunk :
167
- raise NotImplementedError (
167
+ base_error = (
168
168
f"Specified zarr chunks encoding['chunks']={ enc_chunks_tuple !r} for "
169
169
f"variable named { name !r} would overlap multiple dask chunks { var_chunks !r} . "
170
- "This is not implemented in xarray yet. "
171
- "Consider either rechunking using `chunk()` or instead deleting "
172
- "or modifying `encoding['chunks']`."
170
+ f"Writing this array in parallel with dask could lead to corrupted data."
173
171
)
172
+ if safe_chunks :
173
+ raise NotImplementedError (
174
+ base_error
175
+ + " Consider either rechunking using `chunk()`, deleting "
176
+ "or modifying `encoding['chunks']`, or specify `safe_chunks=False`."
177
+ )
174
178
if dchunks [- 1 ] > zchunk :
175
- raise ValueError (
179
+ base_error = (
176
180
"Final chunk of Zarr array must be the same size or "
177
181
"smaller than the first. "
178
182
f"Specified Zarr chunk encoding['chunks']={ enc_chunks_tuple } , "
179
183
f"for variable named { name !r} "
180
- f"but { dchunks } in the variable's Dask chunks { var_chunks } is "
184
+ f"but { dchunks } in the variable's Dask chunks { var_chunks } are "
181
185
"incompatible with this encoding. "
182
- "Consider either rechunking using `chunk()` or instead deleting "
183
- "or modifying `encoding['chunks']`."
184
186
)
187
+ if safe_chunks :
188
+ raise NotImplementedError (
189
+ base_error
190
+ + " Consider either rechunking using `chunk()`, deleting "
191
+ "or modifying `encoding['chunks']`, or specify `safe_chunks=False`."
192
+ )
185
193
return enc_chunks_tuple
186
194
187
195
raise AssertionError ("We should never get here. Function logic must be wrong." )
@@ -203,7 +211,9 @@ def _get_zarr_dims_and_attrs(zarr_obj, dimension_key):
203
211
return dimensions , attributes
204
212
205
213
206
- def extract_zarr_variable_encoding (variable , raise_on_invalid = False , name = None ):
214
+ def extract_zarr_variable_encoding (
215
+ variable , raise_on_invalid = False , name = None , safe_chunks = True
216
+ ):
207
217
"""
208
218
Extract zarr encoding dictionary from xarray Variable
209
219
@@ -233,7 +243,7 @@ def extract_zarr_variable_encoding(variable, raise_on_invalid=False, name=None):
233
243
del encoding [k ]
234
244
235
245
chunks = _determine_zarr_chunks (
236
- encoding .get ("chunks" ), variable .chunks , variable .ndim , name
246
+ encoding .get ("chunks" ), variable .chunks , variable .ndim , name , safe_chunks
237
247
)
238
248
encoding ["chunks" ] = chunks
239
249
return encoding
@@ -285,6 +295,7 @@ class ZarrStore(AbstractWritableDataStore):
285
295
"_read_only" ,
286
296
"_synchronizer" ,
287
297
"_write_region" ,
298
+ "_safe_chunks" ,
288
299
)
289
300
290
301
@classmethod
@@ -300,6 +311,7 @@ def open_group(
300
311
storage_options = None ,
301
312
append_dim = None ,
302
313
write_region = None ,
314
+ safe_chunks = True ,
303
315
):
304
316
305
317
# zarr doesn't support pathlib.Path objects yet. zarr-python#601
@@ -323,10 +335,17 @@ def open_group(
323
335
zarr_group = zarr .open_consolidated (store , ** open_kwargs )
324
336
else :
325
337
zarr_group = zarr .open_group (store , ** open_kwargs )
326
- return cls (zarr_group , consolidate_on_close , append_dim , write_region )
338
+ return cls (
339
+ zarr_group , consolidate_on_close , append_dim , write_region , safe_chunks
340
+ )
327
341
328
342
def __init__ (
329
- self , zarr_group , consolidate_on_close = False , append_dim = None , write_region = None
343
+ self ,
344
+ zarr_group ,
345
+ consolidate_on_close = False ,
346
+ append_dim = None ,
347
+ write_region = None ,
348
+ safe_chunks = True ,
330
349
):
331
350
self .ds = zarr_group
332
351
self ._read_only = self .ds .read_only
@@ -335,6 +354,7 @@ def __init__(
335
354
self ._consolidate_on_close = consolidate_on_close
336
355
self ._append_dim = append_dim
337
356
self ._write_region = write_region
357
+ self ._safe_chunks = safe_chunks
338
358
339
359
def open_store_variable (self , name , zarr_array ):
340
360
data = indexing .LazilyIndexedArray (ZarrArrayWrapper (name , self ))
@@ -497,7 +517,7 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No
497
517
else :
498
518
# new variable
499
519
encoding = extract_zarr_variable_encoding (
500
- v , raise_on_invalid = check , name = vn
520
+ v , raise_on_invalid = check , name = vn , safe_chunks = self . _safe_chunks
501
521
)
502
522
encoded_attrs = {}
503
523
# the magic for storing the hidden dimension data
0 commit comments