Closed
Description
I'm attempting to run a similar example shown in the xarray flox demo.
Everything appears to compute lazily, but fails when I try to pull the entire array down. Certain subsets can be pulled down, but over certain chunks, it seems to fail.
xr.set_options(use_flox=True, use_numbagg=True)
county_mean = fxarray.xarray_reduce(
ds_subset,
aligned_counties,
func="mean",
expected_groups=(county_ids,),
method="cohorts")
county_mean
# some slices work
county_mean.isel(time=slice(0, 2), GEOID=slice(300, 310)).values
array([[1.02196488, 1.44516348, 0.75965351, 1.08162497, 1.42856831,
0.57487382, 1.25551675, 1.10393899, 0.64814745, 0.87742339],
[1.02451059, 1.44312821, 0.75893909, 1.08319094, 1.4299601 ,
0.57509014, 1.25875236, 1.11137815, 0.64105671, 0.87635005]])
# others don't (including the entire array at once)
county_mean.values
IndexError Traceback (most recent call last)
File /glade/work/cbecker/conda-envs/cat/lib/python3.11/site-packages/dask/array/chunk.py:421, in getitem(obj, index)
420 try:
--> 421 result = obj[index]
422 except IndexError as e:
IndexError: index 1 is out of bounds for axis 1 with size 1
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
Cell In[63], line 2
1 # others don't (including the entire array at once)
----> 2 county_mean.values
File /glade/work/cbecker/conda-envs/cat/lib/python3.11/site-packages/xarray/core/dataarray.py:811, in DataArray.values(self)
798 @property
799 def values(self) -> np.ndarray:
800 """
801 The array's data converted to numpy.ndarray.
802
(...)
809 to this array may be reflected in the DataArray as well.
810 """
--> 811 return self.variable.values
File /glade/work/cbecker/conda-envs/cat/lib/python3.11/site-packages/xarray/core/variable.py:554, in Variable.values(self)
551 @property
552 def values(self) -> np.ndarray:
553 """The variable's data as a numpy.ndarray"""
--> 554 return _as_array_or_item(self._data)
File /glade/work/cbecker/conda-envs/cat/lib/python3.11/site-packages/xarray/core/variable.py:352, in _as_array_or_item(data)
338 def _as_array_or_item(data):
339 """Return the given values as a numpy array, or as an individual item if
340 it's a 0d datetime64 or timedelta64 array.
341
(...)
350 TODO: remove this (replace with np.asarray) once these issues are fixed
351 """
--> 352 data = np.asarray(data)
353 if data.ndim == 0:
354 if data.dtype.kind == "M":
File /glade/work/cbecker/conda-envs/cat/lib/python3.11/site-packages/dask/array/core.py:1746, in Array.__array__(self, dtype, **kwargs)
1745 def __array__(self, dtype=None, **kwargs):
-> 1746 x = self.compute()
1747 if dtype and x.dtype != dtype:
1748 x = x.astype(dtype)
File /glade/work/cbecker/conda-envs/cat/lib/python3.11/site-packages/dask/base.py:372, in DaskMethodsMixin.compute(self, **kwargs)
348 def compute(self, **kwargs):
349 """Compute this dask collection
350
351 This turns a lazy Dask collection into its in-memory equivalent.
(...)
370 dask.compute
371 """
--> 372 (result,) = compute(self, traverse=False, **kwargs)
373 return result
File /glade/work/cbecker/conda-envs/cat/lib/python3.11/site-packages/dask/base.py:660, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
657 postcomputes.append(x.__dask_postcompute__())
659 with shorten_traceback():
--> 660 results = schedule(dsk, keys, **kwargs)
662 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
File /glade/work/cbecker/conda-envs/cat/lib/python3.11/site-packages/dask/array/_shuffle.py:305, in _getitem(obj, index)
304 def _getitem(obj, index):
--> 305 return getitem(obj, index[1])
File /glade/work/cbecker/conda-envs/cat/lib/python3.11/site-packages/dask/array/chunk.py:423, in getitem(obj, index)
421 result = obj[index]
422 except IndexError as e:
--> 423 raise ValueError(
424 "Array chunk size or shape is unknown. "
425 "Possible solution with x.compute_chunk_sizes()"
426 ) from e
428 try:
429 if not result.flags.owndata and obj.size >= 2 * result.size:
ValueError: Array chunk size or shape is unknown. Possible solution with x.compute_chunk_sizes()
Metadata
Metadata
Assignees
Labels
No labels