From 52f7fd6464d1cedfa3eda56831a2469a92a8b94e Mon Sep 17 00:00:00 2001
From: Drew Leonard <andy.j.leonard@gmail.com>
Date: Wed, 21 Aug 2024 15:27:15 +0100
Subject: [PATCH 01/22] Replace "pixel dims" for "array dims" in dataset repr,
 because that's what it actually shows

---
 dkist/dataset/utils.py | 35 +++++------------------------------
 1 file changed, 5 insertions(+), 30 deletions(-)

diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py
index c2c6be6a..856f942b 100644
--- a/dkist/dataset/utils.py
+++ b/dkist/dataset/utils.py
@@ -19,7 +19,7 @@ def dataset_info_str(ds):
         ds = ds[0, 0]
     wcs = ds.wcs.low_level_wcs
 
-    # Pixel dimensions table
+    # Array dimensions table
 
     instr = ds.inventory.get("instrument", "")
     if instr:
@@ -47,7 +47,7 @@ def dataset_info_str(ds):
     pixel_nam_width = max(9, max(len(x) for x in pixel_axis_names))
     pixel_siz_width = max(9, len(str(max(array_shape))))
 
-    s += (("{0:" + str(pixel_dim_width) + "s}").format("Pixel Dim") + "  " +
+    s += (("{0:" + str(pixel_dim_width) + "s}").format("Array Dim") + "  " +
             ("{0:" + str(pixel_nam_width) + "s}").format("Axis Name") + "  " +
             ("{0:" + str(pixel_siz_width) + "s}").format("Data size") + "  " +
             "Bounds\n")
@@ -89,40 +89,15 @@ def dataset_info_str(ds):
 
     pixel_dim_width = max(3, len(str(wcs.world_n_dim)))
 
-    s += "Correlation between pixel and world axes:\n\n"
+    s += "Correlation between array and world axes:\n\n"
 
-    s += (" " * world_dim_width + "  " +
-            ("{0:^" + str(wcs.pixel_n_dim * 5 - 2) + "s}").format("Pixel Dim") +
-            "\n")
-
-    s += (("{0:" + str(world_dim_width) + "s}").format("World Dim") +
-            "".join(["  " + ("{0:" + str(pixel_dim_width) + "d}").format(ipix)
-                    for ipix in range(wcs.pixel_n_dim)]) +
-            "\n")
-
-    matrix = wcs.axis_correlation_matrix[::-1, ::-1]
-    matrix_str = np.empty(matrix.shape, dtype="U3")
-    matrix_str[matrix] = "yes"
-    matrix_str[~matrix] = "no"
-
-    for iwrl in range(wcs.world_n_dim):
-        s += (("{0:" + str(world_dim_width) + "d}").format(iwrl) +
-                "".join(["  " + ("{0:>" + str(pixel_dim_width) + "s}").format(matrix_str[iwrl, ipix])
-                        for ipix in range(wcs.pixel_n_dim)]) +
-                "\n")
+    s += _get_pp_matrix(ds.wcs)
 
     # Make sure we get rid of the extra whitespace at the end of some lines
     return "\n".join([line.rstrip() for line in s.splitlines()])
 
 
-def pp_matrix(wcs):
-    """
-    A small helper function to print a correlation matrix with labels
-
-    Parameters
-    ----------
-    wcs : `BaseHighLevelWCS` or `BaseLowLevelWCS`
-    """
+def _get_pp_matrix(wcs):
     slen = np.max([len(line) for line in list(wcs.world_axis_names) + list(wcs.pixel_axis_names)])
     mstr = wcs.axis_correlation_matrix.astype(f"<U{slen}")
     mstr = np.insert(mstr, 0, wcs.pixel_axis_names, axis=0)

From 7bda76f34bab822d6804e37501de4846222f88fd Mon Sep 17 00:00:00 2001
From: Drew Leonard <andy.j.leonard@gmail.com>
Date: Wed, 21 Aug 2024 15:28:48 +0100
Subject: [PATCH 02/22] Add a little more info to dataset repr

---
 dkist/dataset/utils.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py
index 856f942b..113fb7cf 100644
--- a/dkist/dataset/utils.py
+++ b/dkist/dataset/utils.py
@@ -21,16 +21,13 @@ def dataset_info_str(ds):
 
     # Array dimensions table
 
-    instr = ds.inventory.get("instrument", "")
+    instr = ds.inventory.get("instrumentName", "")
     if instr:
         instr += " "
+    nframes = ds.inventory.get("frameCount", "")
 
-    if is_tiled:
-        s = f"This {dstype} consists of an array of {tile_shape} Dataset objects\n\n"
-        s += f"Each {instr}Dataset has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions\n\n"
-    else:
-        s = f"This {instr}Dataset has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions\n\n"
-    s += f"{ds.data}\n\n"
+    s = f"This {instr}Dataset has {wcs.pixel_n_dim} array and {wcs.world_n_dim} world dimensions and consists of {nframes} frames stored in {ds.files.basepath}\n\n"
+    s += f"The data are represented by a Dask array: {ds.data}\n\n"
 
     array_shape = wcs.array_shape or (0,)
     pixel_shape = wcs.pixel_shape or (None,) * wcs.pixel_n_dim

From 9efe70e4c59646d6bfaec3f9f50c3bc54ad4c323 Mon Sep 17 00:00:00 2001
From: Drew Leonard <andy.j.leonard@gmail.com>
Date: Wed, 21 Aug 2024 15:29:09 +0100
Subject: [PATCH 03/22] Use pretty correlation matrix instead of plain one

---
 dkist/dataset/utils.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py
index 113fb7cf..c13b52c8 100644
--- a/dkist/dataset/utils.py
+++ b/dkist/dataset/utils.py
@@ -103,7 +103,18 @@ def _get_pp_matrix(wcs):
     for i, col in enumerate(mstr.T):
         wid = np.max([len(a) for a in col])
         mstr[:, i] = np.char.rjust(col, wid)
-    print(np.array_str(mstr, max_line_width=1000))
+    return np.array_str(mstr, max_line_width=1000)
+
+
+def pp_matrix(wcs):
+    """
+    A small helper function to print a correlation matrix with labels
+
+    Parameters
+    ----------
+    wcs : `BaseHighLevelWCS` or `BaseLowLevelWCS`
+    """
+    print(_get_pp_matrix(wcs))
 
 
 def extract_pc_matrix(headers, naxes=None):

From 9ddfce6089e6310656d3c97d12b1736df16b266c Mon Sep 17 00:00:00 2001
From: Drew Leonard <andy.j.leonard@gmail.com>
Date: Fri, 23 Aug 2024 11:01:35 +0100
Subject: [PATCH 04/22] Tweak some output a little

---
 dkist/dataset/utils.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py
index c13b52c8..146e3786 100644
--- a/dkist/dataset/utils.py
+++ b/dkist/dataset/utils.py
@@ -26,8 +26,15 @@ def dataset_info_str(ds):
         instr += " "
     nframes = ds.inventory.get("frameCount", "")
 
-    s = f"This {instr}Dataset has {wcs.pixel_n_dim} array and {wcs.world_n_dim} world dimensions and consists of {nframes} frames stored in {ds.files.basepath}\n\n"
-    s += f"The data are represented by a Dask array: {ds.data}\n\n"
+    if is_tiled:
+        s = f"This {dstype} consists of an array of {tile_shape} Dataset objects\n\nEach "
+    else:
+        s = "This "
+
+    s += f"{instr}Dataset has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions and consists of {nframes} frames\n"
+    if ds.files:
+        s +="Files are stored in {ds.files.basepath}\n\n"
+    s += f"The data are represented by a {type(ds.data)} object:\n{ds.data}\n\n"
 
     array_shape = wcs.array_shape or (0,)
     pixel_shape = wcs.pixel_shape or (None,) * wcs.pixel_n_dim

From d203862c5a07491b0cfd27dce3383d941bb8a514 Mon Sep 17 00:00:00 2001
From: Drew Leonard <andy.j.leonard@gmail.com>
Date: Fri, 23 Aug 2024 11:01:53 +0100
Subject: [PATCH 05/22] Flip the ordering of world array indices to be correct

---
 dkist/dataset/utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py
index 146e3786..a3d1c8eb 100644
--- a/dkist/dataset/utils.py
+++ b/dkist/dataset/utils.py
@@ -76,11 +76,11 @@ def dataset_info_str(ds):
             ("{0:" + str(world_typ_width) + "s}").format("Physical Type") + "  " +
             "Units\n")
 
-    for iwrl in range(wcs.world_n_dim):
+    for iwrl in range(wcs.world_n_dim)[::-1]:
 
-        name = wcs.world_axis_names[::-1][iwrl] or "None"
-        typ = wcs.world_axis_physical_types[::-1][iwrl] or "None"
-        unit = wcs.world_axis_units[::-1][iwrl] or "unknown"
+        name = wcs.world_axis_names[iwrl] or "None"
+        typ = wcs.world_axis_physical_types[iwrl] or "None"
+        unit = wcs.world_axis_units[iwrl] or "unknown"
 
         s += (("{0:" + str(world_dim_width) + "d}").format(iwrl) + "  " +
                 ("{0:" + str(world_nam_width) + "s}").format(name) + "  " +

From cfc8043e92724f1b5e075a92dc28eb92ecdeefea Mon Sep 17 00:00:00 2001
From: Drew Leonard <andy.j.leonard@gmail.com>
Date: Fri, 23 Aug 2024 11:04:37 +0100
Subject: [PATCH 06/22] Improvements to pretty correlation matrix output

---
 dkist/dataset/utils.py | 41 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 34 insertions(+), 7 deletions(-)

diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py
index a3d1c8eb..b034b2f2 100644
--- a/dkist/dataset/utils.py
+++ b/dkist/dataset/utils.py
@@ -2,6 +2,8 @@
 Helper functions for the Dataset class.
 """
 
+import textwrap
+
 import numpy as np
 
 import gwcs
@@ -93,7 +95,7 @@ def dataset_info_str(ds):
 
     pixel_dim_width = max(3, len(str(wcs.world_n_dim)))
 
-    s += "Correlation between array and world axes:\n\n"
+    s += "Correlation between pixel and world axes:\n\n"
 
     s += _get_pp_matrix(ds.wcs)
 
@@ -103,14 +105,39 @@ def dataset_info_str(ds):
 
 def _get_pp_matrix(wcs):
     slen = np.max([len(line) for line in list(wcs.world_axis_names) + list(wcs.pixel_axis_names)])
-    mstr = wcs.axis_correlation_matrix.astype(f"<U{slen}")
-    mstr = np.insert(mstr, 0, wcs.pixel_axis_names, axis=0)
-    world = ["", *list(wcs.world_axis_names)]
+    mstr = wcs.axis_correlation_matrix.astype("<U")
+    mstr[np.where(mstr == "True")] = "x"
+    mstr[np.where(mstr == "False")] = ""
+    mstr = mstr.astype(f"<U{slen}")
+
+    labels = wcs.pixel_axis_names
+    width = max(max([len(w) for w in label.split(" ")]) for label in labels)
+    wrapped = [textwrap.wrap(l, width=width, break_long_words=False) for l in labels]
+    maxlines = max([len(l) for l in wrapped])
+    for l in wrapped:
+        while len(l) < maxlines:
+            l.append("")
+    header = np.vstack([[s.center(width) for s in wrapped[l]] for l, _ in enumerate(labels)]).T
+
+    mstr = np.insert(mstr, 0, header, axis=0)
+    world = ["", "WORLD DIMENSIONS", *list(wcs.world_axis_names)]
     mstr = np.insert(mstr, 0, world, axis=1)
+    widths = [np.max([len(a) for a in col]) for col in mstr.T]
+    mstr = np.insert(mstr, 2, ["-"*wid for wid in widths], axis=0)
     for i, col in enumerate(mstr.T):
-        wid = np.max([len(a) for a in col])
-        mstr[:, i] = np.char.rjust(col, wid)
-    return np.array_str(mstr, max_line_width=1000)
+        if i == 0:
+            mstr[:, i] = np.char.rjust(col, widths[i])
+        else:
+            mstr[:, i] = np.char.center(col, widths[i])
+
+    mstr = np.array_str(mstr, max_line_width=1000)
+    # Make the matrix string prettier for this context by stripping out the array presentation
+    # Probably a nicer way to do this with regexes but this works fine
+    mstr = mstr.replace("[[", "").replace(" [", "").replace("]", "").replace("' '", " | ").replace("'", "")
+    wid = sum(widths[1:])
+    header = (" "*widths[0]) + " | " + "PIXEL DIMENSIONS".center(wid+(3*(len(wcs.pixel_axis_names)-1))) + "\n"
+
+    return header + mstr
 
 
 def pp_matrix(wcs):

From cfc0c5eaf25a7dfc3913d03f87ee9d61997e57af Mon Sep 17 00:00:00 2001
From: Drew Leonard <andy.j.leonard@gmail.com>
Date: Fri, 23 Aug 2024 13:50:18 +0100
Subject: [PATCH 07/22] Include dataset ID

---
 dkist/dataset/utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py
index b034b2f2..bee70f48 100644
--- a/dkist/dataset/utils.py
+++ b/dkist/dataset/utils.py
@@ -29,13 +29,13 @@ def dataset_info_str(ds):
     nframes = ds.inventory.get("frameCount", "")
 
     if is_tiled:
-        s = f"This {dstype} consists of an array of {tile_shape} Dataset objects\n\nEach "
+        s = f"This {dstype} {ds.inventory['datasetId']} consists of an array of {tile_shape} Dataset objects\n\nEach "
     else:
         s = "This "
 
-    s += f"{instr}Dataset has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions and consists of {nframes} frames\n"
+    s += f"{instr}Dataset {ds.inventory['datasetId']} has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions and consists of {nframes} frames\n"
     if ds.files:
-        s +="Files are stored in {ds.files.basepath}\n\n"
+        s += f"Files are stored in {ds.files.basepath}\n\n"
     s += f"The data are represented by a {type(ds.data)} object:\n{ds.data}\n\n"
 
     array_shape = wcs.array_shape or (0,)

From e6a8968fa4dfa9c0406dbdaa02e62e97d8748b89 Mon Sep 17 00:00:00 2001
From: Drew Leonard <andy.j.leonard@gmail.com>
Date: Fri, 23 Aug 2024 13:53:25 +0100
Subject: [PATCH 08/22] Add changelog

---
 changelog/431.trivial.rst | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog/431.trivial.rst

diff --git a/changelog/431.trivial.rst b/changelog/431.trivial.rst
new file mode 100644
index 00000000..b9cd4d0a
--- /dev/null
+++ b/changelog/431.trivial.rst
@@ -0,0 +1 @@
+Update Dataset representation for better readability.

From 206ab93922319d2523175dfde64e1f163b6067a9 Mon Sep 17 00:00:00 2001
From: Drew Leonard <andy.j.leonard@gmail.com>
Date: Fri, 23 Aug 2024 14:36:01 +0100
Subject: [PATCH 09/22] Slight generalisation to make the tests pass and
 hopefully catch any weird data

---
 dkist/dataset/utils.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py
index bee70f48..5c997293 100644
--- a/dkist/dataset/utils.py
+++ b/dkist/dataset/utils.py
@@ -26,14 +26,15 @@ def dataset_info_str(ds):
     instr = ds.inventory.get("instrumentName", "")
     if instr:
         instr += " "
-    nframes = ds.inventory.get("frameCount", "")
+    nframes = ds.inventory.get("frameCount", "an unknown number")
+    dsID = ds.inventory.get("datasetId", "(no DatasetID)")
 
     if is_tiled:
-        s = f"This {dstype} {ds.inventory['datasetId']} consists of an array of {tile_shape} Dataset objects\n\nEach "
+        s = f"This {dstype} {dsID} consists of an array of {tile_shape} Dataset objects\n\nEach "
     else:
         s = "This "
 
-    s += f"{instr}Dataset {ds.inventory['datasetId']} has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions and consists of {nframes} frames\n"
+    s += f"{instr}Dataset {dsID} has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions and consists of {nframes} frames\n"
     if ds.files:
         s += f"Files are stored in {ds.files.basepath}\n\n"
     s += f"The data are represented by a {type(ds.data)} object:\n{ds.data}\n\n"
@@ -120,7 +121,10 @@ def _get_pp_matrix(wcs):
     header = np.vstack([[s.center(width) for s in wrapped[l]] for l, _ in enumerate(labels)]).T
 
     mstr = np.insert(mstr, 0, header, axis=0)
-    world = ["", "WORLD DIMENSIONS", *list(wcs.world_axis_names)]
+    world = ["WORLD DIMENSIONS", *list(wcs.world_axis_names)]
+    nrows = maxlines + len(wcs.world_axis_names)
+    while len(world) < nrows:
+        world.insert(0, "")
     mstr = np.insert(mstr, 0, world, axis=1)
     widths = [np.max([len(a) for a in col]) for col in mstr.T]
     mstr = np.insert(mstr, 2, ["-"*wid for wid in widths], axis=0)

From 9b4ac7dfd6b8c6a20bca40e00e556eaa8b941d9f Mon Sep 17 00:00:00 2001
From: Drew Leonard <andy.j.leonard@gmail.com>
Date: Fri, 23 Aug 2024 14:45:10 +0100
Subject: [PATCH 10/22] Slight tweaks

---
 dkist/dataset/utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py
index 5c997293..9c4a50e7 100644
--- a/dkist/dataset/utils.py
+++ b/dkist/dataset/utils.py
@@ -30,11 +30,11 @@ def dataset_info_str(ds):
     dsID = ds.inventory.get("datasetId", "(no DatasetID)")
 
     if is_tiled:
-        s = f"This {dstype} {dsID} consists of an array of {tile_shape} Dataset objects\n\nEach "
+        s = f"This {instr}{dstype} {dsID} consists of an array of {tile_shape} Dataset objects\n\nEach Dataset "
     else:
-        s = "This "
+        s = f"This {instr}Dataset {dsID} "
 
-    s += f"{instr}Dataset {dsID} has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions and consists of {nframes} frames\n"
+    s += f"has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions and consists of {nframes} frames\n"
     if ds.files:
         s += f"Files are stored in {ds.files.basepath}\n\n"
     s += f"The data are represented by a {type(ds.data)} object:\n{ds.data}\n\n"

From 8d46818f6f685854768faea7f4bd06e67857de28 Mon Sep 17 00:00:00 2001
From: Drew Leonard <andy.j.leonard@gmail.com>
Date: Thu, 29 Aug 2024 16:01:34 +0100
Subject: [PATCH 11/22] Minor tweaks

---
 dkist/dataset/utils.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py
index 9c4a50e7..808d9737 100644
--- a/dkist/dataset/utils.py
+++ b/dkist/dataset/utils.py
@@ -26,7 +26,7 @@ def dataset_info_str(ds):
     instr = ds.inventory.get("instrumentName", "")
     if instr:
         instr += " "
-    nframes = ds.inventory.get("frameCount", "an unknown number")
+    nframes = len(ds.files)
     dsID = ds.inventory.get("datasetId", "(no DatasetID)")
 
     if is_tiled:
@@ -34,8 +34,10 @@ def dataset_info_str(ds):
     else:
         s = f"This {instr}Dataset {dsID} "
 
-    s += f"has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions and consists of {nframes} frames\n"
+    s += f"has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions"
+
     if ds.files:
+        s += f" and consists of {nframes} frames\n"
         s += f"Files are stored in {ds.files.basepath}\n\n"
     s += f"The data are represented by a {type(ds.data)} object:\n{ds.data}\n\n"
 

From 43db5f59e0df5fef09b098ef222f0a8ce72518d6 Mon Sep 17 00:00:00 2001
From: Drew Leonard <andy.j.leonard@gmail.com>
Date: Fri, 30 Aug 2024 15:03:36 +0100
Subject: [PATCH 12/22] Tweaks to make doc tests pass (mostly)

---
 dkist/dataset/loader.py | 49 +++++++++++++++++++++--------------------
 dkist/dataset/utils.py  |  3 +--
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/dkist/dataset/loader.py b/dkist/dataset/loader.py
index 46af93a5..71db188e 100644
--- a/dkist/dataset/loader.py
+++ b/dkist/dataset/loader.py
@@ -45,45 +45,46 @@ def load_dataset(target):
     Examples
     --------
 
+    >>> import dkist
+
     >>> dkist.load_dataset("/path/to/VISP_L1_ABCDE.asdf")  # doctest: +SKIP
 
     >>> dkist.load_dataset("/path/to/ABCDE/")  # doctest: +SKIP
 
     >>> dkist.load_dataset(Path("/path/to/ABCDE"))  # doctest: +SKIP
 
-    >>> from sunpy.net import Fido, attrs as a
-    >>> import dkist.net
-    >>> search_results = Fido.search(a.dkist.Dataset("AGLKO"))   # doctest: +REMOTE_DATA
-    >>> files = Fido.fetch(search_results)   # doctest: +REMOTE_DATA
-    >>> dkist.load_dataset(files)   # doctest: +REMOTE_DATA
-    <dkist.dataset.dataset.Dataset object at ...>
-    This Dataset has 4 pixel and 5 world dimensions
+    >>> from dkist.data.sample import VISP_BKPLX
+    >>> print(dkist.load_dataset(VISP_BKPLX))
+    This VISP Dataset BKPLX has 4 pixel and 5 world dimensions and consists of 1700 frames
+    Files are stored in /home/drew/.local/share/dkist/VISP_BKPLX
     <BLANKLINE>
-    dask.array<reshape, shape=(4, 1000, 976, 2555), dtype=float64, chunksize=(1, 1, 976, 2555), chunktype=numpy.ndarray>
+    The data are represented by a <class 'dask.array.core.Array'> object:
+    dask.array<reshape, shape=(4, 425, 980, 2554), dtype=float64, chunksize=(1, 1, 980, 2554), chunktype=numpy.ndarray>
     <BLANKLINE>
-    Pixel Dim  Axis Name                Data size  Bounds
+    Array Dim  Axis Name                Data size  Bounds
             0  polarization state               4  None
-            1  raster scan step number       1000  None
-            2  dispersion axis                976  None
-            3  spatial along slit            2555  None
+            1  raster scan step number        425  None
+            2  dispersion axis                980  None
+            3  spatial along slit            2554  None
     <BLANKLINE>
     World Dim  Axis Name                  Physical Type                   Units
-            0  stokes                     phys.polarization.stokes        unknown
-            1  time                       time                            s
+            4  stokes                     phys.polarization.stokes        unknown
+            3  time                       time                            s
             2  helioprojective latitude   custom:pos.helioprojective.lat  arcsec
-            3  wavelength                 em.wl                           nm
-            4  helioprojective longitude  custom:pos.helioprojective.lon  arcsec
+            1  wavelength                 em.wl                           nm
+            0  helioprojective longitude  custom:pos.helioprojective.lon  arcsec
     <BLANKLINE>
     Correlation between pixel and world axes:
     <BLANKLINE>
-                   Pixel Dim
-    World Dim    0    1    2    3
-            0  yes   no   no   no
-            1   no  yes   no   no
-            2   no  yes   no  yes
-            3   no   no  yes   no
-            4   no  yes   no  yes
-
+                              |                      PIXEL DIMENSIONS
+                              |   spatial    |  dispersion  | raster scan  | polarization
+             WORLD DIMENSIONS |  along slit  |     axis     | step number  |    state
+    ------------------------- | ------------ | ------------ | ------------ | ------------
+    helioprojective longitude |      x       |              |      x       |
+                   wavelength |              |      x       |              |
+     helioprojective latitude |      x       |              |      x       |
+                         time |              |              |      x       |
+                       stokes |              |              |              |      x
     """
     known_types = _known_types_docs().keys()
     raise TypeError(f"Input type {type(target).__name__} not recognised. It must be one of {', '.join(known_types)}.")
diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py
index 808d9737..00d350e6 100644
--- a/dkist/dataset/utils.py
+++ b/dkist/dataset/utils.py
@@ -26,7 +26,6 @@ def dataset_info_str(ds):
     instr = ds.inventory.get("instrumentName", "")
     if instr:
         instr += " "
-    nframes = len(ds.files)
     dsID = ds.inventory.get("datasetId", "(no DatasetID)")
 
     if is_tiled:
@@ -37,7 +36,7 @@ def dataset_info_str(ds):
     s += f"has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions"
 
     if ds.files:
-        s += f" and consists of {nframes} frames\n"
+        s += f" and consists of {len(ds.files)} frames\n"
         s += f"Files are stored in {ds.files.basepath}\n\n"
     s += f"The data are represented by a {type(ds.data)} object:\n{ds.data}\n\n"
 

From af5829d5d3ad0c62ae1a10f5d42cee2cbfd0eaef Mon Sep 17 00:00:00 2001
From: Drew Leonard <andy.j.leonard@gmail.com>
Date: Fri, 30 Aug 2024 15:15:49 +0100
Subject: [PATCH 13/22] Nope that still needed to be a remote test

---
 dkist/dataset/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dkist/dataset/loader.py b/dkist/dataset/loader.py
index 71db188e..50526595 100644
--- a/dkist/dataset/loader.py
+++ b/dkist/dataset/loader.py
@@ -53,7 +53,7 @@ def load_dataset(target):
 
     >>> dkist.load_dataset(Path("/path/to/ABCDE"))  # doctest: +SKIP
 
-    >>> from dkist.data.sample import VISP_BKPLX
+    >>> from dkist.data.sample import VISP_BKPLX  # doctest: +REMOTE_DATA
     >>> print(dkist.load_dataset(VISP_BKPLX))
     This VISP Dataset BKPLX has 4 pixel and 5 world dimensions and consists of 1700 frames
     Files are stored in /home/drew/.local/share/dkist/VISP_BKPLX

From f55dfcc5ab5983cbedbe25b748d34ff040ad44d2 Mon Sep 17 00:00:00 2001
From: Drew Leonard <andy.j.leonard@gmail.com>
Date: Fri, 30 Aug 2024 16:00:17 +0100
Subject: [PATCH 14/22] Calculate correct number of files for TiledDatasets

---
 dkist/dataset/utils.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py
index 00d350e6..6bc47823 100644
--- a/dkist/dataset/utils.py
+++ b/dkist/dataset/utils.py
@@ -11,14 +11,16 @@
 __all__ = ["dataset_info_str"]
 
 
-def dataset_info_str(ds):
+def dataset_info_str(ds_in):
     # Check for an attribute that only appears on TiledDataset
     # Not using isinstance to avoid circular import
-    is_tiled = hasattr(ds, "combined_headers")
-    dstype = type(ds).__name__
+    is_tiled = hasattr(ds_in, "combined_headers")
+    dstype = type(ds_in).__name__
     if is_tiled:
-        tile_shape = ds.shape
-        ds = ds[0, 0]
+        tile_shape = ds_in.shape
+        ds = ds_in[0, 0]
+    else:
+        ds = ds_in
     wcs = ds.wcs.low_level_wcs
 
     # Array dimensions table
@@ -36,7 +38,8 @@ def dataset_info_str(ds):
     s += f"has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions"
 
     if ds.files:
-        s += f" and consists of {len(ds.files)} frames\n"
+        nframes = len(ds.files) if not is_tiled else sum([len(tile.files) for tile in ds_in.flat])
+        s += f" and consists of {nframes} frames\n"
         s += f"Files are stored in {ds.files.basepath}\n\n"
     s += f"The data are represented by a {type(ds.data)} object:\n{ds.data}\n\n"
 

From 6960de82be37731b7846ff795779da6b71919dfe Mon Sep 17 00:00:00 2001
From: Drew Leonard <andy.j.leonard@gmail.com>
Date: Fri, 30 Aug 2024 16:01:36 +0100
Subject: [PATCH 15/22] Slicing TiledDatasets fives back a different kind of
 WCS

---
 dkist/dataset/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py
index 6bc47823..8501a63d 100644
--- a/dkist/dataset/utils.py
+++ b/dkist/dataset/utils.py
@@ -109,6 +109,7 @@ def dataset_info_str(ds_in):
 
 
 def _get_pp_matrix(wcs):
+    wcs = wcs.low_level_wcs # Just in case the dataset has been sliced and returned the wrong kind of wcs
     slen = np.max([len(line) for line in list(wcs.world_axis_names) + list(wcs.pixel_axis_names)])
     mstr = wcs.axis_correlation_matrix.astype("<U")
     mstr[np.where(mstr == "True")] = "x"

From 8bb0259cbad6d29ad650c1df751ff1a3dc45d67c Mon Sep 17 00:00:00 2001
From: Drew Leonard <andy.j.leonard@gmail.com>
Date: Mon, 2 Sep 2024 11:10:21 +0100
Subject: [PATCH 16/22] This needs to be REMOTE_DATA'd as well

---
 dkist/dataset/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dkist/dataset/loader.py b/dkist/dataset/loader.py
index 50526595..cc100a12 100644
--- a/dkist/dataset/loader.py
+++ b/dkist/dataset/loader.py
@@ -54,7 +54,7 @@ def load_dataset(target):
     >>> dkist.load_dataset(Path("/path/to/ABCDE"))  # doctest: +SKIP
 
     >>> from dkist.data.sample import VISP_BKPLX  # doctest: +REMOTE_DATA
-    >>> print(dkist.load_dataset(VISP_BKPLX))
+    >>> print(dkist.load_dataset(VISP_BKPLX))  # doctest: +REMOTE_DATA
     This VISP Dataset BKPLX has 4 pixel and 5 world dimensions and consists of 1700 frames
     Files are stored in /home/drew/.local/share/dkist/VISP_BKPLX
     <BLANKLINE>

From d433c450e64095cb4c7e00909bc4b9bece4caa23 Mon Sep 17 00:00:00 2001
From: Drew Leonard <andy.j.leonard@gmail.com>
Date: Mon, 2 Sep 2024 11:32:16 +0100
Subject: [PATCH 17/22] Correct/obfuscate sample data path in dataset repr test

---
 dkist/dataset/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dkist/dataset/loader.py b/dkist/dataset/loader.py
index cc100a12..c9c84278 100644
--- a/dkist/dataset/loader.py
+++ b/dkist/dataset/loader.py
@@ -56,7 +56,7 @@ def load_dataset(target):
     >>> from dkist.data.sample import VISP_BKPLX  # doctest: +REMOTE_DATA
     >>> print(dkist.load_dataset(VISP_BKPLX))  # doctest: +REMOTE_DATA
     This VISP Dataset BKPLX has 4 pixel and 5 world dimensions and consists of 1700 frames
-    Files are stored in /home/drew/.local/share/dkist/VISP_BKPLX
+    Files are stored in /.../VISP_BKPLX
     <BLANKLINE>
     The data are represented by a <class 'dask.array.core.Array'> object:
     dask.array<reshape, shape=(4, 425, 980, 2554), dtype=float64, chunksize=(1, 1, 980, 2554), chunktype=numpy.ndarray>

From caeb378f11a6bed589779e9436f6a1f597e57589 Mon Sep 17 00:00:00 2001
From: Drew Leonard <andy.j.leonard@gmail.com>
Date: Mon, 2 Sep 2024 11:40:25 +0100
Subject: [PATCH 18/22] Don't need to run doctests on old releases

---
 docs/whatsnew/1.0.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/whatsnew/1.0.rst b/docs/whatsnew/1.0.rst
index 52356301..b64bab26 100644
--- a/docs/whatsnew/1.0.rst
+++ b/docs/whatsnew/1.0.rst
@@ -60,8 +60,8 @@ Any DKIST level one ASDF file can be loaded with the `dkist.load_dataset` functi
 
     >>> import dkist
 
-    >>> ds = dkist.load_dataset(asdf_files)  # doctest: +REMOTE_DATA
-    >>> ds  # doctest: +REMOTE_DATA
+    >>> ds = dkist.load_dataset(asdf_files)  # doctest: +SKIP
+    >>> ds  # doctest: +SKIP
     <dkist.dataset.dataset.Dataset object at ...>
     This Dataset has 4 pixel and 5 world dimensions
     <BLANKLINE>

From 08edf43ab1ee8ef0397e0626f53d8019de43fef3 Mon Sep 17 00:00:00 2001
From: Drew Leonard <andy.j.leonard@gmail.com>
Date: Mon, 2 Sep 2024 11:53:48 +0100
Subject: [PATCH 19/22] Fine I'll just skip all of them if you're going to be
 like that, doctest

---
 docs/whatsnew/1.0.rst | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/whatsnew/1.0.rst b/docs/whatsnew/1.0.rst
index b64bab26..bcaf8a04 100644
--- a/docs/whatsnew/1.0.rst
+++ b/docs/whatsnew/1.0.rst
@@ -29,7 +29,7 @@ Here is a really quick demo of searching for all unembargoed VISP data and downl
     >>> from sunpy.net import Fido, attrs as a
     >>> import dkist.net
 
-    >>> res = Fido.search(a.Instrument.visp, a.dkist.Embargoed.false)  # doctest: +REMOTE_DATA
+    >>> res = Fido.search(a.Instrument.visp, a.dkist.Embargoed.false)  # doctest: +SKIP
     >>> res  # doctest: +SKIP
     <sunpy.net.fido_factory.UnifiedResponse object at ...>
     Results from 1 Provider:
@@ -44,8 +44,8 @@ Here is a really quick demo of searching for all unembargoed VISP data and downl
     <BLANKLINE>
     <BLANKLINE>
 
-    >>> asdf_files = Fido.fetch(res[:, 0])  # doctest: +REMOTE_DATA
-    >>> asdf_files  # doctest: +REMOTE_DATA
+    >>> asdf_files = Fido.fetch(res[:, 0])  # doctest: +SKIP
+    >>> asdf_files  # doctest: +SKIP
     <parfive.results.Results object at ...>
     ['...VISP_L1_20220602T175042_BDWQK.asdf']
 
@@ -133,8 +133,8 @@ This means you can first slice out a smaller dataset, and then only download the
 
 .. code-block:: python
 
-    >>> stokes_I_ds = ds[0]  # doctest: +REMOTE_DATA
-    >>> stokes_I_ds  # doctest: +REMOTE_DATA
+    >>> stokes_I_ds = ds[0]  # doctest: +SKIP
+    >>> stokes_I_ds  # doctest: +SKIP
     <dkist.dataset.dataset.Dataset object at ...>
     This Dataset has 3 pixel and 4 world dimensions
     <BLANKLINE>

From b18e98e68ebfc006e8d0a2f6e823b7787efc514a Mon Sep 17 00:00:00 2001
From: Stuart Mumford <stuart@cadair.com>
Date: Tue, 3 Sep 2024 16:01:20 +0100
Subject: [PATCH 20/22] Update dkist/dataset/loader.py

---
 dkist/dataset/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dkist/dataset/loader.py b/dkist/dataset/loader.py
index c9c84278..9964f480 100644
--- a/dkist/dataset/loader.py
+++ b/dkist/dataset/loader.py
@@ -56,7 +56,7 @@ def load_dataset(target):
     >>> from dkist.data.sample import VISP_BKPLX  # doctest: +REMOTE_DATA
     >>> print(dkist.load_dataset(VISP_BKPLX))  # doctest: +REMOTE_DATA
     This VISP Dataset BKPLX has 4 pixel and 5 world dimensions and consists of 1700 frames
-    Files are stored in /.../VISP_BKPLX
+    Files are stored in ...VISP_BKPLX
     <BLANKLINE>
     The data are represented by a <class 'dask.array.core.Array'> object:
     dask.array<reshape, shape=(4, 425, 980, 2554), dtype=float64, chunksize=(1, 1, 980, 2554), chunktype=numpy.ndarray>

From 20037e95adc5150e314e2051d5e470d0f748c866 Mon Sep 17 00:00:00 2001
From: Stuart Mumford <stuart@cadair.com>
Date: Tue, 3 Sep 2024 17:03:52 +0100
Subject: [PATCH 21/22] Some reworking for tiled / not tiled englishing

---
 dkist/dataset/utils.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py
index 8501a63d..05d1d102 100644
--- a/dkist/dataset/utils.py
+++ b/dkist/dataset/utils.py
@@ -30,17 +30,23 @@ def dataset_info_str(ds_in):
         instr += " "
     dsID = ds.inventory.get("datasetId", "(no DatasetID)")
 
+    s = f"This {instr}Dataset {dsID} "
     if is_tiled:
-        s = f"This {instr}{dstype} {dsID} consists of an array of {tile_shape} Dataset objects\n\nEach Dataset "
-    else:
-        s = f"This {instr}Dataset {dsID} "
+        s += f"is an array of {tile_shape} Dataset objects "
+        if ds.files:
+            s += "and \n"
 
-    s += f"has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions"
 
     if ds.files:
         nframes = len(ds.files) if not is_tiled else sum([len(tile.files) for tile in ds_in.flat])
-        s += f" and consists of {nframes} frames\n"
-        s += f"Files are stored in {ds.files.basepath}\n\n"
+        s += f"consists of {nframes} frames stored in {ds.files.basepath}\n"
+
+    if is_tiled:
+        s += "\nEach "
+    else:
+        s += "\nThis "
+    s += f"Dataset has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions.\n\n"
+
     s += f"The data are represented by a {type(ds.data)} object:\n{ds.data}\n\n"
 
     array_shape = wcs.array_shape or (0,)

From cd0efaab7203d9c172976203ae00b51d9af397c7 Mon Sep 17 00:00:00 2001
From: Drew Leonard <andy.j.leonard@gmail.com>
Date: Wed, 4 Sep 2024 11:46:38 +0100
Subject: [PATCH 22/22] Tweak repr again and update loader docstring

---
 dkist/dataset/loader.py | 4 +++-
 dkist/dataset/utils.py  | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/dkist/dataset/loader.py b/dkist/dataset/loader.py
index 9964f480..1b1f4ea0 100644
--- a/dkist/dataset/loader.py
+++ b/dkist/dataset/loader.py
@@ -55,9 +55,11 @@ def load_dataset(target):
 
     >>> from dkist.data.sample import VISP_BKPLX  # doctest: +REMOTE_DATA
     >>> print(dkist.load_dataset(VISP_BKPLX))  # doctest: +REMOTE_DATA
-    This VISP Dataset BKPLX has 4 pixel and 5 world dimensions and consists of 1700 frames
+    This VISP Dataset BKPLX consists of 1700 frames.
     Files are stored in ...VISP_BKPLX
     <BLANKLINE>
+    This Dataset has 4 pixel and 5 world dimensions.
+    <BLANKLINE>
     The data are represented by a <class 'dask.array.core.Array'> object:
     dask.array<reshape, shape=(4, 425, 980, 2554), dtype=float64, chunksize=(1, 1, 980, 2554), chunktype=numpy.ndarray>
     <BLANKLINE>
diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py
index 05d1d102..6ab71090 100644
--- a/dkist/dataset/utils.py
+++ b/dkist/dataset/utils.py
@@ -39,7 +39,8 @@ def dataset_info_str(ds_in):
 
     if ds.files:
         nframes = len(ds.files) if not is_tiled else sum([len(tile.files) for tile in ds_in.flat])
-        s += f"consists of {nframes} frames stored in {ds.files.basepath}\n"
+        s += f"consists of {nframes} frames.\n"
+        s += f"Files are stored in {ds.files.basepath}\n"
 
     if is_tiled:
         s += "\nEach "