Skip to content

Commit 57a4479

Browse files
authored
ensure combine_by_coords raises on different types (#5090)
* ensure combine_by_coords raises on different calendars [test-upstream] * check types [test-upstream] * use correct cftime version * move to function [test-upstream]
1 parent ba47216 commit 57a4479

File tree

2 files changed

+36
-2
lines changed

2 files changed

+36
-2
lines changed

xarray/core/combine.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,18 @@ def _infer_tile_ids_from_nested_list(entry, current_pos):
4444
yield current_pos, entry
4545

4646

47+
def _ensure_same_types(series, dim):
48+
49+
if series.dtype == object:
50+
types = set(series.map(type))
51+
if len(types) > 1:
52+
types = ", ".join(t.__name__ for t in types)
53+
raise TypeError(
54+
f"Cannot combine along dimension '{dim}' with mixed types."
55+
f" Found: {types}."
56+
)
57+
58+
4759
def _infer_concat_order_from_coords(datasets):
4860

4961
concat_dims = []
@@ -88,11 +100,15 @@ def _infer_concat_order_from_coords(datasets):
88100
raise ValueError("Cannot handle size zero dimensions")
89101
first_items = pd.Index([index[0] for index in indexes])
90102

103+
series = first_items.to_series()
104+
105+
# ensure series does not contain mixed types, e.g. cftime calendars
106+
_ensure_same_types(series, dim)
107+
91108
# Sort datasets along dim
92109
# We want rank but with identical elements given identical
93110
# position indices - they should be concatenated along another
94111
# dimension, not along this one
95-
series = first_items.to_series()
96112
rank = series.rank(
97113
method="dense", ascending=ascending, numeric_only=False
98114
)

xarray/tests/test_combine.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from datetime import datetime
2+
from distutils.version import LooseVersion
23
from itertools import product
34

45
import numpy as np
@@ -865,5 +866,22 @@ def test_combine_by_coords_raises_for_differing_calendars():
865866
da_1 = DataArray([0], dims=["time"], coords=[time_1], name="a").to_dataset()
866867
da_2 = DataArray([1], dims=["time"], coords=[time_2], name="a").to_dataset()
867868

868-
with raises_regex(TypeError, r"cannot compare .* \(different calendars\)"):
869+
if LooseVersion(cftime.__version__) >= LooseVersion("1.5"):
870+
error_msg = "Cannot combine along dimension 'time' with mixed types."
871+
else:
872+
error_msg = r"cannot compare .* \(different calendars\)"
873+
874+
with raises_regex(TypeError, error_msg):
875+
combine_by_coords([da_1, da_2])
876+
877+
878+
def test_combine_by_coords_raises_for_differing_types():
879+
880+
# str and byte cannot be compared
881+
da_1 = DataArray([0], dims=["time"], coords=[["a"]], name="a").to_dataset()
882+
da_2 = DataArray([1], dims=["time"], coords=[[b"b"]], name="a").to_dataset()
883+
884+
with raises_regex(
885+
TypeError, "Cannot combine along dimension 'time' with mixed types."
886+
):
869887
combine_by_coords([da_1, da_2])

0 commit comments

Comments
 (0)