From cd506515e2c533a5b676b55c8abd6369e9a27d5b Mon Sep 17 00:00:00 2001
From: Drew Leonard <andy.j.leonard@gmail.com>
Date: Mon, 6 Jan 2025 11:24:28 +0000
Subject: [PATCH 1/3] Don't try to index a flat array with two indices (#475)

* Don't try to index a flat array with two indices

* Add a test to check that flat dataset repr doesn't bork

* Check that the repr is outputting the right length of flat array

* Changelog

---------

Co-authored-by: Stuart Mumford <stuart@cadair.com>
---
 changelog/475.trivial.rst           | 1 +
 dkist/dataset/tests/test_dataset.py | 6 ++++++
 dkist/dataset/utils.py              | 2 +-
 3 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 changelog/475.trivial.rst
diff --git a/changelog/475.trivial.rst b/changelog/475.trivial.rst
new file mode 100644
index 00000000..8aeee1fa
--- /dev/null
+++ b/changelog/475.trivial.rst
@@ -0,0 +1 @@
+Fix small bug which caused `ds.flat` to break if not indexed.
diff --git a/dkist/dataset/tests/test_dataset.py b/dkist/dataset/tests/test_dataset.py
index 90b8c577..941cee76 100644
--- a/dkist/dataset/tests/test_dataset.py
+++ b/dkist/dataset/tests/test_dataset.py
@@ -51,6 +51,12 @@ def test_repr(dataset, dataset_3d):
     assert str(dataset_3d.data) in r
 
 
+@pytest.mark.accept_cli_dataset
+def test_flat_repr(large_tiled_dataset):
+    r = repr(large_tiled_dataset.flat)
+    assert f"is an array of ({np.prod(large_tiled_dataset.shape)},) Dataset objects" in r
+
+
 @pytest.mark.accept_cli_dataset
 def test_wcs_roundtrip(dataset):
     p = [1*u.pixel] * dataset.wcs.pixel_n_dim
diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py
index bab3e338..2b690c16 100644
--- a/dkist/dataset/utils.py
+++ b/dkist/dataset/utils.py
@@ -18,7 +18,7 @@ def dataset_info_str(ds_in):
     dstype = type(ds_in).__name__
     if is_tiled:
         tile_shape = ds_in.shape
-        ds = ds_in[0, 0]
+        ds = ds_in.flat[0]
     else:
         ds = ds_in
     wcs = ds.wcs.low_level_wcs

From 9c1705451e7c98e41254d36e6a5fa173d944a615 Mon Sep 17 00:00:00 2001
From: Stuart Mumford <stuart@cadair.com>
Date: Mon, 6 Jan 2025 11:33:14 +0000
Subject: [PATCH 2/3] Improve performace of TiledDataset repr (#467)

* Change how we detect tiled dataset for speed

* Add a couple of repr benchmarks

* Add a performance improvement section to the changelog

* Add changelog

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 changelog/467.performance.rst | 1 +
 changelog/README.rst          | 1 +
 dkist/dataset/utils.py        | 6 +++---
 pyproject.toml                | 5 +++++
 4 files changed, 10 insertions(+), 3 deletions(-)
 create mode 100644 changelog/467.performance.rst

diff --git a/changelog/467.performance.rst b/changelog/467.performance.rst
new file mode 100644
index 00000000..cae5f1e0
--- /dev/null
+++ b/changelog/467.performance.rst
@@ -0,0 +1 @@
+Improve the performance of the ``TiledDataset`` ``repr`` and ``str``.
diff --git a/changelog/README.rst b/changelog/README.rst
index 766d10d9..14d82bfb 100644
--- a/changelog/README.rst
+++ b/changelog/README.rst
@@ -21,6 +21,7 @@ Each file should be named like ``<PULL REQUEST>.<TYPE>.rst``, where ``<PULL REQU
 * ``breaking``: A change which requires users to change code and is not backwards compatible. (Not to be used for removal of deprecated features.)
 * ``feature``: New user facing features and any new behavior.
 * ``bugfix``: Fixes a reported bug.
+* ``performance``: A performance improvement which does not change behaviour.
 * ``doc``: Documentation addition or improvement, like rewording an entire session or adding missing docs.
 * ``removal``: Feature deprecation and/or feature removal.
 * ``trivial``: A change which has no user facing effect or is tiny change.
diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py
index 2b690c16..45eb20d9 100644
--- a/dkist/dataset/utils.py
+++ b/dkist/dataset/utils.py
@@ -12,9 +12,9 @@
 
 
 def dataset_info_str(ds_in):
-    # Check for an attribute that only appears on TiledDataset
-    # Not using isinstance to avoid circular import
-    is_tiled = hasattr(ds_in, "combined_headers")
+    # Import here to remove circular import
+    from dkist.dataset import TiledDataset
+    is_tiled = isinstance(ds_in, TiledDataset)
     dstype = type(ds_in).__name__
     if is_tiled:
         tile_shape = ds_in.shape
diff --git a/pyproject.toml b/pyproject.toml
index c6a0812f..79a421da 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -173,3 +173,8 @@ write_to = "dkist/_version.py"
     directory = "trivial"
     name = "Trivial/Internal Changes"
     showcontent = true
+
+  [[tool.towncrier.type]]
+    directory = "performance"
+    name = "Performance Improvements"
+    showcontent = true

From f650849e8e01fa08740e15aecff18feb9db1ee84 Mon Sep 17 00:00:00 2001
From: Stuart Mumford <stuart@cadair.com>
Date: Mon, 6 Jan 2025 11:43:24 +0000
Subject: [PATCH 3/3] Some repr fixes (#479)

* Some repr fixes

* Changelog

* Attempt to fix the tileddataset plot

* Add a test for numpy repr
---
 changelog/479.bugfix.rst                  |  1 +
 dkist/dataset/tests/test_dataset.py       |  8 ++++++++
 dkist/dataset/tests/test_tiled_dataset.py |  2 +-
 dkist/dataset/utils.py                    | 15 +++++++++++++--
 4 files changed, 23 insertions(+), 3 deletions(-)
 create mode 100644 changelog/479.bugfix.rst

diff --git a/changelog/479.bugfix.rst b/changelog/479.bugfix.rst
new file mode 100644
index 00000000..19d1eb1e
--- /dev/null
+++ b/changelog/479.bugfix.rst
@@ -0,0 +1 @@
+Fix some small issues with `Dataset.__repr__`.
diff --git a/dkist/dataset/tests/test_dataset.py b/dkist/dataset/tests/test_dataset.py
index 941cee76..85d0dc61 100644
--- a/dkist/dataset/tests/test_dataset.py
+++ b/dkist/dataset/tests/test_dataset.py
@@ -50,6 +50,14 @@ def test_repr(dataset, dataset_3d):
     r = repr(dataset_3d)
     assert str(dataset_3d.data) in r
 
+def test_repr_numpy(dataset):
+    # Do it the old way to support old ndcube
+    dataset._data = dataset.data.compute()
+    r = repr(dataset)
+    assert "numpy.ndarray" in r
+    assert f"{dataset.data.shape}" in r
+    assert f"{dataset.data.dtype}" in r
+
 
 @pytest.mark.accept_cli_dataset
 def test_flat_repr(large_tiled_dataset):
diff --git a/dkist/dataset/tests/test_tiled_dataset.py b/dkist/dataset/tests/test_tiled_dataset.py
index e72b8117..c01abd36 100644
--- a/dkist/dataset/tests/test_tiled_dataset.py
+++ b/dkist/dataset/tests/test_tiled_dataset.py
@@ -81,7 +81,7 @@ def test_tileddataset_plot(share_zscale):
     newtiles = []
     for tile in ds.flat:
         newtiles.append(tile.rebin((1, 8, 8), operation=np.sum))
-    ds = TiledDataset(np.array(newtiles).reshape(ds.shape), inventory=ds.inventory)
+    ds = TiledDataset(np.array(newtiles).reshape(ds.shape), inventory=newtiles[0].inventory)
     fig = plt.figure(figsize=(600, 800))
     ds.plot(0, share_zscale=share_zscale)
     return plt.gcf()
diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py
index 45eb20d9..3630030a 100644
--- a/dkist/dataset/utils.py
+++ b/dkist/dataset/utils.py
@@ -11,6 +11,17 @@
 __all__ = ["dataset_info_str"]
 
 
+def get_array_repr(array):
+    """
+    Return a "repr-like" string for an array, without any values.
+
+    The objective of this function is primarily to provide a dask array like repr for numpy arrays.
+    """
+    if isinstance(array, np.ndarray):
+        return f"numpy.ndarray<shape={array.shape}, dtype={array.dtype}>"
+    return repr(array)
+
+
 def dataset_info_str(ds_in):
     # Import here to remove circular import
     from dkist.dataset import TiledDataset
@@ -48,7 +59,7 @@ def dataset_info_str(ds_in):
         s += "\nThis "
     s += f"Dataset has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions.\n\n"
 
-    s += f"The data are represented by a {type(ds.data)} object:\n{ds.data}\n\n"
+    s += f"The data are represented by a {type(ds.data)} object:\n{get_array_repr(ds.data)}\n\n"
 
     array_shape = wcs.array_shape or (0,)
     pixel_shape = wcs.pixel_shape or (None,) * wcs.pixel_n_dim
@@ -139,7 +150,7 @@ def _get_pp_matrix(wcs):
         world.insert(0, "")
     mstr = np.insert(mstr, 0, world, axis=1)
     widths = [np.max([len(a) for a in col]) for col in mstr.T]
-    mstr = np.insert(mstr, 2, ["-"*wid for wid in widths], axis=0)
+    mstr = np.insert(mstr, header.shape[0], ["-"*wid for wid in widths], axis=0)
     for i, col in enumerate(mstr.T):
         if i == 0:
             mstr[:, i] = np.char.rjust(col, widths[i])