27
27
_normalize_path ,
28
28
)
29
29
from xarray .backends .locks import _get_scheduler
30
+ from xarray .backends .zarr import open_zarr
30
31
from xarray .core import indexing
31
32
from xarray .core .combine import (
32
33
_infer_concat_order_from_positions ,
@@ -1443,10 +1444,63 @@ def save_mfdataset(
1443
1444
)
1444
1445
1445
1446
1446
- def _validate_region (ds , region ):
1447
+ def _auto_detect_region (ds_new , ds_orig , dim ):
1448
+ # Create a mapping array of coordinates to indices on the original array
1449
+ coord = ds_orig [dim ]
1450
+ da_map = DataArray (np .arange (coord .size ), coords = {dim : coord })
1451
+
1452
+ try :
1453
+ da_idxs = da_map .sel ({dim : ds_new [dim ]})
1454
+ except KeyError as e :
1455
+ if "not all values found" in str (e ):
1456
+ raise KeyError (
1457
+ f"Not all values of coordinate '{ dim } ' in the new array were"
1458
+ " found in the original store. Writing to a zarr region slice"
1459
+ " requires that no dimensions or metadata are changed by the write."
1460
+ )
1461
+ else :
1462
+ raise e
1463
+
1464
+ if (da_idxs .diff (dim ) != 1 ).any ():
1465
+ raise ValueError (
1466
+ f"The auto-detected region of coordinate '{ dim } ' for writing new data"
1467
+ " to the original store had non-contiguous indices. Writing to a zarr"
1468
+ " region slice requires that the new data constitute a contiguous subset"
1469
+ " of the original store."
1470
+ )
1471
+
1472
+ dim_slice = slice (da_idxs .values [0 ], da_idxs .values [- 1 ] + 1 )
1473
+
1474
+ return dim_slice
1475
+
1476
+
1477
+ def _auto_detect_regions (ds , region , open_kwargs ):
1478
+ ds_original = open_zarr (** open_kwargs )
1479
+ for key , val in region .items ():
1480
+ if val == "auto" :
1481
+ region [key ] = _auto_detect_region (ds , ds_original , key )
1482
+ return region
1483
+
1484
+
1485
+ def _validate_and_autodetect_region (
1486
+ ds , region , mode , open_kwargs
1487
+ ) -> tuple [dict [str , slice ], bool ]:
1488
+ if region == "auto" :
1489
+ region = {dim : "auto" for dim in ds .dims }
1490
+
1447
1491
if not isinstance (region , dict ):
1448
1492
raise TypeError (f"``region`` must be a dict, got { type (region )} " )
1449
1493
1494
+ if any (v == "auto" for v in region .values ()):
1495
+ region_was_autodetected = True
1496
+ if mode != "r+" :
1497
+ raise ValueError (
1498
+ f"``mode`` must be 'r+' when using ``region='auto'``, got { mode } "
1499
+ )
1500
+ region = _auto_detect_regions (ds , region , open_kwargs )
1501
+ else :
1502
+ region_was_autodetected = False
1503
+
1450
1504
for k , v in region .items ():
1451
1505
if k not in ds .dims :
1452
1506
raise ValueError (
@@ -1478,6 +1532,8 @@ def _validate_region(ds, region):
1478
1532
f".drop_vars({ non_matching_vars !r} )"
1479
1533
)
1480
1534
1535
+ return region , region_was_autodetected
1536
+
1481
1537
1482
1538
def _validate_datatypes_for_zarr_append (zstore , dataset ):
1483
1539
"""If variable exists in the store, confirm dtype of the data to append is compatible with
@@ -1529,7 +1585,7 @@ def to_zarr(
1529
1585
compute : Literal [True ] = True ,
1530
1586
consolidated : bool | None = None ,
1531
1587
append_dim : Hashable | None = None ,
1532
- region : Mapping [str , slice ] | None = None ,
1588
+ region : Mapping [str , slice | Literal [ "auto" ]] | Literal [ "auto" ] | None = None ,
1533
1589
safe_chunks : bool = True ,
1534
1590
storage_options : dict [str , str ] | None = None ,
1535
1591
zarr_version : int | None = None ,
@@ -1553,7 +1609,7 @@ def to_zarr(
1553
1609
compute : Literal [False ],
1554
1610
consolidated : bool | None = None ,
1555
1611
append_dim : Hashable | None = None ,
1556
- region : Mapping [str , slice ] | None = None ,
1612
+ region : Mapping [str , slice | Literal [ "auto" ]] | Literal [ "auto" ] | None = None ,
1557
1613
safe_chunks : bool = True ,
1558
1614
storage_options : dict [str , str ] | None = None ,
1559
1615
zarr_version : int | None = None ,
@@ -1575,7 +1631,7 @@ def to_zarr(
1575
1631
compute : bool = True ,
1576
1632
consolidated : bool | None = None ,
1577
1633
append_dim : Hashable | None = None ,
1578
- region : Mapping [str , slice ] | None = None ,
1634
+ region : Mapping [str , slice | Literal [ "auto" ]] | Literal [ "auto" ] | None = None ,
1579
1635
safe_chunks : bool = True ,
1580
1636
storage_options : dict [str , str ] | None = None ,
1581
1637
zarr_version : int | None = None ,
@@ -1640,7 +1696,20 @@ def to_zarr(
1640
1696
_validate_dataset_names (dataset )
1641
1697
1642
1698
if region is not None :
1643
- _validate_region (dataset , region )
1699
+ open_kwargs = dict (
1700
+ store = store ,
1701
+ synchronizer = synchronizer ,
1702
+ group = group ,
1703
+ consolidated = consolidated ,
1704
+ storage_options = storage_options ,
1705
+ zarr_version = zarr_version ,
1706
+ )
1707
+ region , region_was_autodetected = _validate_and_autodetect_region (
1708
+ dataset , region , mode , open_kwargs
1709
+ )
1710
+ # drop indices to avoid potential race condition with auto region
1711
+ if region_was_autodetected :
1712
+ dataset = dataset .drop_vars (dataset .indexes )
1644
1713
if append_dim is not None and append_dim in region :
1645
1714
raise ValueError (
1646
1715
f"cannot list the same dimension in both ``append_dim`` and "
0 commit comments