From cd506515e2c533a5b676b55c8abd6369e9a27d5b Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Mon, 6 Jan 2025 11:24:28 +0000 Subject: [PATCH 1/3] Don't try to index a flat array with two indices (#475) * Don't try to index a flat array with two indices * Add a test to check that flat dataset repr doesn't bork * Check that the repr is outputting the right length of flat array * Changelog --------- Co-authored-by: Stuart Mumford --- changelog/475.trivial.rst | 1 + dkist/dataset/tests/test_dataset.py | 6 ++++++ dkist/dataset/utils.py | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 changelog/475.trivial.rst diff --git a/changelog/475.trivial.rst b/changelog/475.trivial.rst new file mode 100644 index 00000000..8aeee1fa --- /dev/null +++ b/changelog/475.trivial.rst @@ -0,0 +1 @@ +Fix small bug which caused `ds.flat` to break if not indexed. diff --git a/dkist/dataset/tests/test_dataset.py b/dkist/dataset/tests/test_dataset.py index 90b8c577..941cee76 100644 --- a/dkist/dataset/tests/test_dataset.py +++ b/dkist/dataset/tests/test_dataset.py @@ -51,6 +51,12 @@ def test_repr(dataset, dataset_3d): assert str(dataset_3d.data) in r +@pytest.mark.accept_cli_dataset +def test_flat_repr(large_tiled_dataset): + r = repr(large_tiled_dataset.flat) + assert f"is an array of ({np.prod(large_tiled_dataset.shape)},) Dataset objects" in r + + @pytest.mark.accept_cli_dataset def test_wcs_roundtrip(dataset): p = [1*u.pixel] * dataset.wcs.pixel_n_dim diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py index bab3e338..2b690c16 100644 --- a/dkist/dataset/utils.py +++ b/dkist/dataset/utils.py @@ -18,7 +18,7 @@ def dataset_info_str(ds_in): dstype = type(ds_in).__name__ if is_tiled: tile_shape = ds_in.shape - ds = ds_in[0, 0] + ds = ds_in.flat[0] else: ds = ds_in wcs = ds.wcs.low_level_wcs From 9c1705451e7c98e41254d36e6a5fa173d944a615 Mon Sep 17 00:00:00 2001 From: Stuart Mumford Date: Mon, 6 Jan 2025 11:33:14 +0000 Subject: [PATCH 2/3] Improve performace of TiledDataset repr (#467) * Change how we detect tiled dataset for speed * Add a couple of repr benchmarks * Add a performance improvement section to the changelog * Add changelog * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- changelog/467.performance.rst | 1 + changelog/README.rst | 1 + dkist/dataset/utils.py | 6 +++--- pyproject.toml | 5 +++++ 4 files changed, 10 insertions(+), 3 deletions(-) create mode 100644 changelog/467.performance.rst diff --git a/changelog/467.performance.rst b/changelog/467.performance.rst new file mode 100644 index 00000000..cae5f1e0 --- /dev/null +++ b/changelog/467.performance.rst @@ -0,0 +1 @@ +Improve the performance of the ``TiledDataset`` ``repr`` and ``str``. diff --git a/changelog/README.rst b/changelog/README.rst index 766d10d9..14d82bfb 100644 --- a/changelog/README.rst +++ b/changelog/README.rst @@ -21,6 +21,7 @@ Each file should be named like ``..rst``, where `` Date: Mon, 6 Jan 2025 11:43:24 +0000 Subject: [PATCH 3/3] Some repr fixes (#479) * Some repr fixes * Changelog * Attempt to fix the tileddataset plot * Add a test for numpy repr --- changelog/479.bugfix.rst | 1 + dkist/dataset/tests/test_dataset.py | 8 ++++++++ dkist/dataset/tests/test_tiled_dataset.py | 2 +- dkist/dataset/utils.py | 15 +++++++++++++-- 4 files changed, 23 insertions(+), 3 deletions(-) create mode 100644 changelog/479.bugfix.rst diff --git a/changelog/479.bugfix.rst b/changelog/479.bugfix.rst new file mode 100644 index 00000000..19d1eb1e --- /dev/null +++ b/changelog/479.bugfix.rst @@ -0,0 +1 @@ +Fix some small issues with `Dataset.__repr__`. diff --git a/dkist/dataset/tests/test_dataset.py b/dkist/dataset/tests/test_dataset.py index 941cee76..85d0dc61 100644 --- a/dkist/dataset/tests/test_dataset.py +++ b/dkist/dataset/tests/test_dataset.py @@ -50,6 +50,14 @@ def test_repr(dataset, dataset_3d): r = repr(dataset_3d) assert str(dataset_3d.data) in r +def test_repr_numpy(dataset): + # Do it the old way to support old ndcube + dataset._data = dataset.data.compute() + r = repr(dataset) + assert "numpy.ndarray" in r + assert f"{dataset.data.shape}" in r + assert f"{dataset.data.dtype}" in r + @pytest.mark.accept_cli_dataset def test_flat_repr(large_tiled_dataset): diff --git a/dkist/dataset/tests/test_tiled_dataset.py b/dkist/dataset/tests/test_tiled_dataset.py index e72b8117..c01abd36 100644 --- a/dkist/dataset/tests/test_tiled_dataset.py +++ b/dkist/dataset/tests/test_tiled_dataset.py @@ -81,7 +81,7 @@ def test_tileddataset_plot(share_zscale): newtiles = [] for tile in ds.flat: newtiles.append(tile.rebin((1, 8, 8), operation=np.sum)) - ds = TiledDataset(np.array(newtiles).reshape(ds.shape), inventory=ds.inventory) + ds = TiledDataset(np.array(newtiles).reshape(ds.shape), inventory=newtiles[0].inventory) fig = plt.figure(figsize=(600, 800)) ds.plot(0, share_zscale=share_zscale) return plt.gcf() diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py index 45eb20d9..3630030a 100644 --- a/dkist/dataset/utils.py +++ b/dkist/dataset/utils.py @@ -11,6 +11,17 @@ __all__ = ["dataset_info_str"] +def get_array_repr(array): + """ + Return a "repr-like" string for an array, without any values. + + The objective of this function is primarily to provide a dask array like repr for numpy arrays. + """ + if isinstance(array, np.ndarray): + return f"numpy.ndarray" + return repr(array) + + def dataset_info_str(ds_in): # Import here to remove circular import from dkist.dataset import TiledDataset @@ -48,7 +59,7 @@ def dataset_info_str(ds_in): s += "\nThis " s += f"Dataset has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions.\n\n" - s += f"The data are represented by a {type(ds.data)} object:\n{ds.data}\n\n" + s += f"The data are represented by a {type(ds.data)} object:\n{get_array_repr(ds.data)}\n\n" array_shape = wcs.array_shape or (0,) pixel_shape = wcs.pixel_shape or (None,) * wcs.pixel_n_dim @@ -139,7 +150,7 @@ def _get_pp_matrix(wcs): world.insert(0, "") mstr = np.insert(mstr, 0, world, axis=1) widths = [np.max([len(a) for a in col]) for col in mstr.T] - mstr = np.insert(mstr, 2, ["-"*wid for wid in widths], axis=0) + mstr = np.insert(mstr, header.shape[0], ["-"*wid for wid in widths], axis=0) for i, col in enumerate(mstr.T): if i == 0: mstr[:, i] = np.char.rjust(col, widths[i])