Merge branch 'main' of github.com:DKISTDC/dkist into bugfix-192

DKISTDC · Jan 6, 2025 · 1b49a64 · 1b49a64
2 parents 5cb429b + f650849
commit 1b49a64
Show file tree

Hide file tree

Showing 8 changed files with 41 additions and 7 deletions.
diff --git a/changelog/467.performance.rst b/changelog/467.performance.rst
@@ -0,0 +1 @@
+Improve the performance of the ``TiledDataset`` ``repr`` and ``str``.
diff --git a/changelog/475.trivial.rst b/changelog/475.trivial.rst
@@ -0,0 +1 @@
+Fix small bug which caused `ds.flat` to break if not indexed.
diff --git a/changelog/479.bugfix.rst b/changelog/479.bugfix.rst
@@ -0,0 +1 @@
+Fix some small issues with `Dataset.__repr__`.
diff --git a/changelog/README.rst b/changelog/README.rst
@@ -21,6 +21,7 @@ Each file should be named like ``<PULL REQUEST>.<TYPE>.rst``, where ``<PULL REQU
 * ``breaking``: A change which requires users to change code and is not backwards compatible. (Not to be used for removal of deprecated features.)
 * ``feature``: New user facing features and any new behavior.
 * ``bugfix``: Fixes a reported bug.
+* ``performance``: A performance improvement which does not change behaviour.
 * ``doc``: Documentation addition or improvement, like rewording an entire session or adding missing docs.
 * ``removal``: Feature deprecation and/or feature removal.
 * ``trivial``: A change which has no user facing effect or is tiny change.

diff --git a/dkist/dataset/tests/test_dataset.py b/dkist/dataset/tests/test_dataset.py
@@ -50,6 +50,20 @@ def test_repr(dataset, dataset_3d):
     r = repr(dataset_3d)
     assert str(dataset_3d.data) in r
 
+def test_repr_numpy(dataset):
+    # Do it the old way to support old ndcube
+    dataset._data = dataset.data.compute()
+    r = repr(dataset)
+    assert "numpy.ndarray" in r
+    assert f"{dataset.data.shape}" in r
+    assert f"{dataset.data.dtype}" in r
+
+
+@pytest.mark.accept_cli_dataset
+def test_flat_repr(large_tiled_dataset):
+    r = repr(large_tiled_dataset.flat)
+    assert f"is an array of ({np.prod(large_tiled_dataset.shape)},) Dataset objects" in r
+
 
 @pytest.mark.accept_cli_dataset
 def test_wcs_roundtrip(dataset):

diff --git a/dkist/dataset/tests/test_tiled_dataset.py b/dkist/dataset/tests/test_tiled_dataset.py
@@ -81,7 +81,7 @@ def test_tileddataset_plot(share_zscale):
     newtiles = []
     for tile in ds.flat:
         newtiles.append(tile.rebin((1, 8, 8), operation=np.sum))
-    ds = TiledDataset(np.array(newtiles).reshape(ds.shape), inventory=ds.inventory)
+    ds = TiledDataset(np.array(newtiles).reshape(ds.shape), inventory=newtiles[0].inventory)
     fig = plt.figure(figsize=(600, 800))
     ds.plot(0, share_zscale=share_zscale)
     return plt.gcf()

diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py
@@ -11,14 +11,25 @@
 __all__ = ["dataset_info_str"]
 
 
+def get_array_repr(array):
+    """
+    Return a "repr-like" string for an array, without any values.
+
+    The objective of this function is primarily to provide a dask array like repr for numpy arrays.
+    """
+    if isinstance(array, np.ndarray):
+        return f"numpy.ndarray<shape={array.shape}, dtype={array.dtype}>"
+    return repr(array)
+
+
 def dataset_info_str(ds_in):
-    # Check for an attribute that only appears on TiledDataset
-    # Not using isinstance to avoid circular import
-    is_tiled = hasattr(ds_in, "combined_headers")
+    # Import here to remove circular import
+    from dkist.dataset import TiledDataset
+    is_tiled = isinstance(ds_in, TiledDataset)
     dstype = type(ds_in).__name__
     if is_tiled:
         tile_shape = ds_in.shape
-        ds = ds_in[0, 0]
+        ds = ds_in.flat[0]
     else:
         ds = ds_in
     wcs = ds.wcs.low_level_wcs
@@ -48,7 +59,7 @@ def dataset_info_str(ds_in):
         s += "\nThis "
     s += f"Dataset has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions.\n\n"
 
-    s += f"The data are represented by a {type(ds.data)} object:\n{ds.data}\n\n"
+    s += f"The data are represented by a {type(ds.data)} object:\n{get_array_repr(ds.data)}\n\n"
 
     array_shape = wcs.array_shape or (0,)
     pixel_shape = wcs.pixel_shape or (None,) * wcs.pixel_n_dim
@@ -139,7 +150,7 @@ def _get_pp_matrix(wcs):
         world.insert(0, "")
     mstr = np.insert(mstr, 0, world, axis=1)
     widths = [np.max([len(a) for a in col]) for col in mstr.T]
-    mstr = np.insert(mstr, 2, ["-"*wid for wid in widths], axis=0)
+    mstr = np.insert(mstr, header.shape[0], ["-"*wid for wid in widths], axis=0)
     for i, col in enumerate(mstr.T):
         if i == 0:
             mstr[:, i] = np.char.rjust(col, widths[i])

diff --git a/pyproject.toml b/pyproject.toml
@@ -173,3 +173,8 @@ write_to = "dkist/_version.py"
     directory = "trivial"
     name = "Trivial/Internal Changes"
     showcontent = true
+
+  [[tool.towncrier.type]]
+    directory = "performance"
+    name = "Performance Improvements"
+    showcontent = true
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Improve the performance of the ``TiledDataset`` ``repr`` and ``str``.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Fix small bug which caused `ds.flat` to break if not indexed.