Skip to content

Commit fbddd3e

Browse files
authored
Merge branch 'master' into idxmax
2 parents ad5c092 + acf7d41 commit fbddd3e

File tree

5 files changed

+125
-28
lines changed

5 files changed

+125
-28
lines changed

doc/whats-new.rst

+3
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ New Features
3838
- Implement :py:meth:`~xarray.DataArray.idxmax`, :py:meth:`~xarray.DataArray.idxmin`,
3939
:py:meth:`~xarray.Dataset.idxmax`, :py:meth:`~xarray.Dataset.idxmin`. (:issue:`60`, :pull:`3871`)
4040
By `Todd Jennings <https://github.com/toddrjen>`_
41+
- Limited the length of array items with long string reprs to a
42+
reasonable width (:pull:`3900`)
43+
By `Maximilian Roos <https://github.com/max-sixty>`_
4144

4245

4346
Bug fixes

xarray/backends/zarr.py

+32-12
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,20 @@
1010
from .common import AbstractWritableDataStore, BackendArray, _encode_variable_name
1111

1212
# need some special secret attributes to tell us the dimensions
13-
_DIMENSION_KEY = "_ARRAY_DIMENSIONS"
13+
DIMENSION_KEY = "_ARRAY_DIMENSIONS"
1414

1515

16-
# zarr attributes have to be serializable as json
17-
# many xarray datasets / variables have numpy arrays and values
18-
# these functions handle encoding / decoding of such items
19-
def _encode_zarr_attr_value(value):
16+
def encode_zarr_attr_value(value):
17+
"""
18+
Encode a attribute value as something that can be serialized as json
19+
20+
Many xarray datasets / variables have numpy arrays and values. This
21+
function handles encoding / decoding of such items.
22+
23+
ndarray -> list
24+
scalar array -> scalar
25+
other -> other (no change)
26+
"""
2027
if isinstance(value, np.ndarray):
2128
encoded = value.tolist()
2229
# this checks if it's a scalar number
@@ -170,7 +177,20 @@ def _get_zarr_dims_and_attrs(zarr_obj, dimension_key):
170177
return dimensions, attributes
171178

172179

173-
def _extract_zarr_variable_encoding(variable, raise_on_invalid=False):
180+
def extract_zarr_variable_encoding(variable, raise_on_invalid=False):
181+
"""
182+
Extract zarr encoding dictionary from xarray Variable
183+
184+
Parameters
185+
----------
186+
variable : xarray.Variable
187+
raise_on_invalid : bool, optional
188+
189+
Returns
190+
-------
191+
encoding : dict
192+
Zarr encoding for `variable`
193+
"""
174194
encoding = variable.encoding.copy()
175195

176196
valid_encodings = {"chunks", "compressor", "filters", "cache_metadata"}
@@ -271,7 +291,7 @@ def __init__(self, zarr_group, consolidate_on_close=False):
271291

272292
def open_store_variable(self, name, zarr_array):
273293
data = indexing.LazilyOuterIndexedArray(ZarrArrayWrapper(name, self))
274-
dimensions, attributes = _get_zarr_dims_and_attrs(zarr_array, _DIMENSION_KEY)
294+
dimensions, attributes = _get_zarr_dims_and_attrs(zarr_array, DIMENSION_KEY)
275295
attributes = dict(attributes)
276296
encoding = {
277297
"chunks": zarr_array.chunks,
@@ -298,7 +318,7 @@ def get_dimensions(self):
298318
dimensions = {}
299319
for k, v in self.ds.arrays():
300320
try:
301-
for d, s in zip(v.attrs[_DIMENSION_KEY], v.shape):
321+
for d, s in zip(v.attrs[DIMENSION_KEY], v.shape):
302322
if d in dimensions and dimensions[d] != s:
303323
raise ValueError(
304324
"found conflicting lengths for dimension %s "
@@ -310,7 +330,7 @@ def get_dimensions(self):
310330
raise KeyError(
311331
"Zarr object is missing the attribute `%s`, "
312332
"which is required for xarray to determine "
313-
"variable dimensions." % (_DIMENSION_KEY)
333+
"variable dimensions." % (DIMENSION_KEY)
314334
)
315335
return dimensions
316336

@@ -328,7 +348,7 @@ def encode_variable(self, variable):
328348
return variable
329349

330350
def encode_attribute(self, a):
331-
return _encode_zarr_attr_value(a)
351+
return encode_zarr_attr_value(a)
332352

333353
def store(
334354
self,
@@ -433,10 +453,10 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No
433453
writer.add(v.data, zarr_array, region=tuple(new_region))
434454
else:
435455
# new variable
436-
encoding = _extract_zarr_variable_encoding(v, raise_on_invalid=check)
456+
encoding = extract_zarr_variable_encoding(v, raise_on_invalid=check)
437457
encoded_attrs = {}
438458
# the magic for storing the hidden dimension data
439-
encoded_attrs[_DIMENSION_KEY] = dims
459+
encoded_attrs[DIMENSION_KEY] = dims
440460
for k2, v2 in attrs.items():
441461
encoded_attrs[k2] = self.encode_attribute(v2)
442462

xarray/core/formatting.py

+25-9
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import functools
55
from datetime import datetime, timedelta
66
from itertools import zip_longest
7+
from typing import Hashable
78

89
import numpy as np
910
import pandas as pd
@@ -14,7 +15,7 @@
1415
from .pycompat import dask_array_type, sparse_array_type
1516

1617

17-
def pretty_print(x, numchars):
18+
def pretty_print(x, numchars: int):
1819
"""Given an object `x`, call `str(x)` and format the returned string so
1920
that it is numchars long, padding with trailing spaces or truncating with
2021
ellipses as necessary
@@ -163,7 +164,7 @@ def format_items(x):
163164
return formatted
164165

165166

166-
def format_array_flat(array, max_width):
167+
def format_array_flat(array, max_width: int):
167168
"""Return a formatted string for as many items in the flattened version of
168169
array that will fit within max_width characters.
169170
"""
@@ -198,11 +199,20 @@ def format_array_flat(array, max_width):
198199
num_back = count - num_front
199200
# note that num_back is 0 <--> array.size is 0 or 1
200201
# <--> relevant_back_items is []
201-
pprint_str = (
202-
" ".join(relevant_front_items[:num_front])
203-
+ padding
204-
+ " ".join(relevant_back_items[-num_back:])
202+
pprint_str = "".join(
203+
[
204+
" ".join(relevant_front_items[:num_front]),
205+
padding,
206+
" ".join(relevant_back_items[-num_back:]),
207+
]
205208
)
209+
210+
# As a final check, if it's still too long even with the limit in values,
211+
# replace the end with an ellipsis
212+
# NB: this will still returns a full 3-character ellipsis when max_width < 3
213+
if len(pprint_str) > max_width:
214+
pprint_str = pprint_str[: max(max_width - 3, 0)] + "..."
215+
206216
return pprint_str
207217

208218

@@ -258,10 +268,16 @@ def inline_variable_array_repr(var, max_width):
258268
return "..."
259269

260270

261-
def summarize_variable(name, var, col_width, marker=" ", max_width=None):
271+
def summarize_variable(
272+
name: Hashable, var, col_width: int, marker: str = " ", max_width: int = None
273+
):
262274
"""Summarize a variable in one line, e.g., for the Dataset.__repr__."""
263275
if max_width is None:
264-
max_width = OPTIONS["display_width"]
276+
max_width_options = OPTIONS["display_width"]
277+
if not isinstance(max_width_options, int):
278+
raise TypeError(f"`max_width` value of `{max_width}` is not a valid int")
279+
else:
280+
max_width = max_width_options
265281
first_col = pretty_print(f" {marker} {name} ", col_width)
266282
if var.dims:
267283
dims_str = "({}) ".format(", ".join(map(str, var.dims)))
@@ -295,7 +311,7 @@ def summarize_datavar(name, var, col_width):
295311
return summarize_variable(name, var.variable, col_width)
296312

297313

298-
def summarize_coord(name, var, col_width):
314+
def summarize_coord(name: Hashable, var, col_width: int):
299315
is_index = name in var.dims
300316
marker = "*" if is_index else " "
301317
if is_index:

xarray/tests/test_backends.py

+47
Original file line numberDiff line numberDiff line change
@@ -4498,3 +4498,50 @@ def test_invalid_netcdf_raises(engine):
44984498
data = create_test_data()
44994499
with raises_regex(ValueError, "unrecognized option 'invalid_netcdf'"):
45004500
data.to_netcdf("foo.nc", engine=engine, invalid_netcdf=True)
4501+
4502+
4503+
@requires_zarr
4504+
def test_encode_zarr_attr_value():
4505+
# array -> list
4506+
arr = np.array([1, 2, 3])
4507+
expected = [1, 2, 3]
4508+
actual = backends.zarr.encode_zarr_attr_value(arr)
4509+
assert isinstance(actual, list)
4510+
assert actual == expected
4511+
4512+
# scalar array -> scalar
4513+
sarr = np.array(1)[()]
4514+
expected = 1
4515+
actual = backends.zarr.encode_zarr_attr_value(sarr)
4516+
assert isinstance(actual, int)
4517+
assert actual == expected
4518+
4519+
# string -> string (no change)
4520+
expected = "foo"
4521+
actual = backends.zarr.encode_zarr_attr_value(expected)
4522+
assert isinstance(actual, str)
4523+
assert actual == expected
4524+
4525+
4526+
@requires_zarr
4527+
def test_extract_zarr_variable_encoding():
4528+
4529+
var = xr.Variable("x", [1, 2])
4530+
actual = backends.zarr.extract_zarr_variable_encoding(var)
4531+
assert "chunks" in actual
4532+
assert actual["chunks"] is None
4533+
4534+
var = xr.Variable("x", [1, 2], encoding={"chunks": (1,)})
4535+
actual = backends.zarr.extract_zarr_variable_encoding(var)
4536+
assert actual["chunks"] == (1,)
4537+
4538+
# does not raise on invalid
4539+
var = xr.Variable("x", [1, 2], encoding={"foo": (1,)})
4540+
actual = backends.zarr.extract_zarr_variable_encoding(var)
4541+
4542+
# raises on invalid
4543+
var = xr.Variable("x", [1, 2], encoding={"foo": (1,)})
4544+
with raises_regex(ValueError, "unexpected encoding parameters"):
4545+
actual = backends.zarr.extract_zarr_variable_encoding(
4546+
var, raise_on_invalid=True
4547+
)

xarray/tests/test_formatting.py

+18-7
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def test_format_items(self):
115115

116116
def test_format_array_flat(self):
117117
actual = formatting.format_array_flat(np.arange(100), 2)
118-
expected = "0 ... 99"
118+
expected = "..."
119119
assert expected == actual
120120

121121
actual = formatting.format_array_flat(np.arange(100), 9)
@@ -134,11 +134,13 @@ def test_format_array_flat(self):
134134
expected = "0 1 2 ... 98 99"
135135
assert expected == actual
136136

137+
# NB: Probably not ideal; an alternative would be cutting after the
138+
# first ellipsis
137139
actual = formatting.format_array_flat(np.arange(100.0), 11)
138-
expected = "0.0 ... 99.0"
140+
expected = "0.0 ... ..."
139141
assert expected == actual
140142

141-
actual = formatting.format_array_flat(np.arange(100.0), 1)
143+
actual = formatting.format_array_flat(np.arange(100.0), 12)
142144
expected = "0.0 ... 99.0"
143145
assert expected == actual
144146

@@ -154,16 +156,25 @@ def test_format_array_flat(self):
154156
expected = ""
155157
assert expected == actual
156158

157-
actual = formatting.format_array_flat(np.arange(1), 0)
159+
actual = formatting.format_array_flat(np.arange(1), 1)
158160
expected = "0"
159161
assert expected == actual
160162

161-
actual = formatting.format_array_flat(np.arange(2), 0)
163+
actual = formatting.format_array_flat(np.arange(2), 3)
162164
expected = "0 1"
163165
assert expected == actual
164166

165-
actual = formatting.format_array_flat(np.arange(4), 0)
166-
expected = "0 ... 3"
167+
actual = formatting.format_array_flat(np.arange(4), 7)
168+
expected = "0 1 2 3"
169+
assert expected == actual
170+
171+
actual = formatting.format_array_flat(np.arange(5), 7)
172+
expected = "0 ... 4"
173+
assert expected == actual
174+
175+
long_str = [" ".join(["hello world" for _ in range(100)])]
176+
actual = formatting.format_array_flat(np.asarray([long_str]), 21)
177+
expected = "'hello world hello..."
167178
assert expected == actual
168179

169180
def test_pretty_print(self):

0 commit comments

Comments
 (0)