diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 49d39927f2b..31a14bcb5da 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -35,6 +35,8 @@ Deprecations Bug fixes ~~~~~~~~~ +- Allow in-memory arrays with :py:func:`xarray.open_mfdataset` by passing ``chunks=None``. (:pull:`5704`). + By `Jimmy Westling `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 76fcac62cd3..4b376a7967a 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -817,7 +817,7 @@ def open_datatree( def open_mfdataset( paths: str | NestedSequence[str | os.PathLike], - chunks: T_Chunks | None = None, + chunks: T_Chunks | None = {}, concat_dim: ( str | DataArray @@ -858,7 +858,7 @@ def open_mfdataset( concatenation along more than one dimension is desired, then ``paths`` must be a nested list-of-lists (see ``combine_nested`` for details). (A string glob will be expanded to a 1-dimensional list.) - chunks : int, dict, 'auto' or None, optional + chunks : int, dict, 'auto' or None, default: {} Dictionary with keys given by dimension names and values given by chunk sizes. In general, these should divide the dimensions of each dataset. If int, chunk each dimension by ``chunks``. By default, chunks will be chosen to load entire @@ -1037,7 +1037,7 @@ def open_mfdataset( "instead specify combine='nested' along with a value for `concat_dim`.", ) - open_kwargs = dict(engine=engine, chunks=chunks or {}, **kwargs) + open_kwargs = dict(engine=engine, chunks=chunks, **kwargs) if parallel: import dask diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 33039dee7b0..ad9c2f90194 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3947,7 +3947,7 @@ def parallel(request): return request.param -@pytest.fixture(params=[None, 5]) +@pytest.fixture(params=[None, {}, 5]) def chunks(request): return request.param @@ -3999,16 +3999,21 @@ def test_open_mfdataset_manyfiles( subds.to_zarr(store=tmpfiles[ii]) # check that calculation on opened datasets works properly + chunks = chunks if (not chunks and readengine != "zarr") else "auto" with open_mfdataset( tmpfiles, combine="nested", concat_dim="x", engine=readengine, parallel=parallel, - chunks=chunks if (not chunks and readengine != "zarr") else "auto", + chunks=chunks, ) as actual: # check that using open_mfdataset returns dask arrays for variables - assert isinstance(actual["foo"].data, dask_array_type) + # when a chunks parameter has been defined: + if chunks is None: + assert isinstance(actual["foo"].data, np.ndarray) + else: + assert isinstance(actual["foo"].data, dask_array_type) assert_identical(original, actual)