Skip to content

Commit

Permalink
Merge branch 'main' of github.com:DKISTDC/dkist into bugfix-192
Browse files Browse the repository at this point in the history
  • Loading branch information
SolarDrew committed Jan 6, 2025
2 parents 5cb429b + f650849 commit 1b49a64
Show file tree
Hide file tree
Showing 8 changed files with 41 additions and 7 deletions.
1 change: 1 addition & 0 deletions changelog/467.performance.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve the performance of the ``TiledDataset`` ``repr`` and ``str``.
1 change: 1 addition & 0 deletions changelog/475.trivial.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix small bug which caused `ds.flat` to break if not indexed.
1 change: 1 addition & 0 deletions changelog/479.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix some small issues with `Dataset.__repr__`.
1 change: 1 addition & 0 deletions changelog/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Each file should be named like ``<PULL REQUEST>.<TYPE>.rst``, where ``<PULL REQU
* ``breaking``: A change which requires users to change code and is not backwards compatible. (Not to be used for removal of deprecated features.)
* ``feature``: New user facing features and any new behavior.
* ``bugfix``: Fixes a reported bug.
* ``performance``: A performance improvement which does not change behaviour.
* ``doc``: Documentation addition or improvement, like rewording an entire session or adding missing docs.
* ``removal``: Feature deprecation and/or feature removal.
* ``trivial``: A change which has no user facing effect or is tiny change.
Expand Down
14 changes: 14 additions & 0 deletions dkist/dataset/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,20 @@ def test_repr(dataset, dataset_3d):
r = repr(dataset_3d)
assert str(dataset_3d.data) in r

def test_repr_numpy(dataset):
# Do it the old way to support old ndcube
dataset._data = dataset.data.compute()
r = repr(dataset)
assert "numpy.ndarray" in r
assert f"{dataset.data.shape}" in r
assert f"{dataset.data.dtype}" in r


@pytest.mark.accept_cli_dataset
def test_flat_repr(large_tiled_dataset):
r = repr(large_tiled_dataset.flat)
assert f"is an array of ({np.prod(large_tiled_dataset.shape)},) Dataset objects" in r


@pytest.mark.accept_cli_dataset
def test_wcs_roundtrip(dataset):
Expand Down
2 changes: 1 addition & 1 deletion dkist/dataset/tests/test_tiled_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def test_tileddataset_plot(share_zscale):
newtiles = []
for tile in ds.flat:
newtiles.append(tile.rebin((1, 8, 8), operation=np.sum))
ds = TiledDataset(np.array(newtiles).reshape(ds.shape), inventory=ds.inventory)
ds = TiledDataset(np.array(newtiles).reshape(ds.shape), inventory=newtiles[0].inventory)
fig = plt.figure(figsize=(600, 800))
ds.plot(0, share_zscale=share_zscale)
return plt.gcf()
Expand Down
23 changes: 17 additions & 6 deletions dkist/dataset/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,25 @@
__all__ = ["dataset_info_str"]


def get_array_repr(array):
"""
Return a "repr-like" string for an array, without any values.
The objective of this function is primarily to provide a dask array like repr for numpy arrays.
"""
if isinstance(array, np.ndarray):
return f"numpy.ndarray<shape={array.shape}, dtype={array.dtype}>"
return repr(array)


def dataset_info_str(ds_in):
# Check for an attribute that only appears on TiledDataset
# Not using isinstance to avoid circular import
is_tiled = hasattr(ds_in, "combined_headers")
# Import here to remove circular import
from dkist.dataset import TiledDataset
is_tiled = isinstance(ds_in, TiledDataset)
dstype = type(ds_in).__name__
if is_tiled:
tile_shape = ds_in.shape
ds = ds_in[0, 0]
ds = ds_in.flat[0]
else:
ds = ds_in
wcs = ds.wcs.low_level_wcs
Expand Down Expand Up @@ -48,7 +59,7 @@ def dataset_info_str(ds_in):
s += "\nThis "
s += f"Dataset has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions.\n\n"

s += f"The data are represented by a {type(ds.data)} object:\n{ds.data}\n\n"
s += f"The data are represented by a {type(ds.data)} object:\n{get_array_repr(ds.data)}\n\n"

array_shape = wcs.array_shape or (0,)
pixel_shape = wcs.pixel_shape or (None,) * wcs.pixel_n_dim
Expand Down Expand Up @@ -139,7 +150,7 @@ def _get_pp_matrix(wcs):
world.insert(0, "")
mstr = np.insert(mstr, 0, world, axis=1)
widths = [np.max([len(a) for a in col]) for col in mstr.T]
mstr = np.insert(mstr, 2, ["-"*wid for wid in widths], axis=0)
mstr = np.insert(mstr, header.shape[0], ["-"*wid for wid in widths], axis=0)
for i, col in enumerate(mstr.T):
if i == 0:
mstr[:, i] = np.char.rjust(col, widths[i])
Expand Down
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,8 @@ write_to = "dkist/_version.py"
directory = "trivial"
name = "Trivial/Internal Changes"
showcontent = true

[[tool.towncrier.type]]
directory = "performance"
name = "Performance Improvements"
showcontent = true

0 comments on commit 1b49a64

Please sign in to comment.