From 52f7fd6464d1cedfa3eda56831a2469a92a8b94e Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Wed, 21 Aug 2024 15:27:15 +0100 Subject: [PATCH 01/22] Replace "pixel dims" for "array dims" in dataset repr, because that's what it actually shows --- dkist/dataset/utils.py | 35 +++++------------------------------ 1 file changed, 5 insertions(+), 30 deletions(-) diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py index c2c6be6a..856f942b 100644 --- a/dkist/dataset/utils.py +++ b/dkist/dataset/utils.py @@ -19,7 +19,7 @@ def dataset_info_str(ds): ds = ds[0, 0] wcs = ds.wcs.low_level_wcs - # Pixel dimensions table + # Array dimensions table instr = ds.inventory.get("instrument", "") if instr: @@ -47,7 +47,7 @@ def dataset_info_str(ds): pixel_nam_width = max(9, max(len(x) for x in pixel_axis_names)) pixel_siz_width = max(9, len(str(max(array_shape)))) - s += (("{0:" + str(pixel_dim_width) + "s}").format("Pixel Dim") + " " + + s += (("{0:" + str(pixel_dim_width) + "s}").format("Array Dim") + " " + ("{0:" + str(pixel_nam_width) + "s}").format("Axis Name") + " " + ("{0:" + str(pixel_siz_width) + "s}").format("Data size") + " " + "Bounds\n") @@ -89,40 +89,15 @@ def dataset_info_str(ds): pixel_dim_width = max(3, len(str(wcs.world_n_dim))) - s += "Correlation between pixel and world axes:\n\n" + s += "Correlation between array and world axes:\n\n" - s += (" " * world_dim_width + " " + - ("{0:^" + str(wcs.pixel_n_dim * 5 - 2) + "s}").format("Pixel Dim") + - "\n") - - s += (("{0:" + str(world_dim_width) + "s}").format("World Dim") + - "".join([" " + ("{0:" + str(pixel_dim_width) + "d}").format(ipix) - for ipix in range(wcs.pixel_n_dim)]) + - "\n") - - matrix = wcs.axis_correlation_matrix[::-1, ::-1] - matrix_str = np.empty(matrix.shape, dtype="U3") - matrix_str[matrix] = "yes" - matrix_str[~matrix] = "no" - - for iwrl in range(wcs.world_n_dim): - s += (("{0:" + str(world_dim_width) + "d}").format(iwrl) + - "".join([" " + ("{0:>" + str(pixel_dim_width) + "s}").format(matrix_str[iwrl, ipix]) - for ipix in range(wcs.pixel_n_dim)]) + - "\n") + s += _get_pp_matrix(ds.wcs) # Make sure we get rid of the extra whitespace at the end of some lines return "\n".join([line.rstrip() for line in s.splitlines()]) -def pp_matrix(wcs): - """ - A small helper function to print a correlation matrix with labels - - Parameters - ---------- - wcs : `BaseHighLevelWCS` or `BaseLowLevelWCS` - """ +def _get_pp_matrix(wcs): slen = np.max([len(line) for line in list(wcs.world_axis_names) + list(wcs.pixel_axis_names)]) mstr = wcs.axis_correlation_matrix.astype(f" Date: Wed, 21 Aug 2024 15:28:48 +0100 Subject: [PATCH 02/22] Add a little more info to dataset repr --- dkist/dataset/utils.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py index 856f942b..113fb7cf 100644 --- a/dkist/dataset/utils.py +++ b/dkist/dataset/utils.py @@ -21,16 +21,13 @@ def dataset_info_str(ds): # Array dimensions table - instr = ds.inventory.get("instrument", "") + instr = ds.inventory.get("instrumentName", "") if instr: instr += " " + nframes = ds.inventory.get("frameCount", "") - if is_tiled: - s = f"This {dstype} consists of an array of {tile_shape} Dataset objects\n\n" - s += f"Each {instr}Dataset has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions\n\n" - else: - s = f"This {instr}Dataset has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions\n\n" - s += f"{ds.data}\n\n" + s = f"This {instr}Dataset has {wcs.pixel_n_dim} array and {wcs.world_n_dim} world dimensions and consists of {nframes} frames stored in {ds.files.basepath}\n\n" + s += f"The data are represented by a Dask array: {ds.data}\n\n" array_shape = wcs.array_shape or (0,) pixel_shape = wcs.pixel_shape or (None,) * wcs.pixel_n_dim From 9efe70e4c59646d6bfaec3f9f50c3bc54ad4c323 Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Wed, 21 Aug 2024 15:29:09 +0100 Subject: [PATCH 03/22] Use pretty correlation matrix instead of plain one --- dkist/dataset/utils.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py index 113fb7cf..c13b52c8 100644 --- a/dkist/dataset/utils.py +++ b/dkist/dataset/utils.py @@ -103,7 +103,18 @@ def _get_pp_matrix(wcs): for i, col in enumerate(mstr.T): wid = np.max([len(a) for a in col]) mstr[:, i] = np.char.rjust(col, wid) - print(np.array_str(mstr, max_line_width=1000)) + return np.array_str(mstr, max_line_width=1000) + + +def pp_matrix(wcs): + """ + A small helper function to print a correlation matrix with labels + + Parameters + ---------- + wcs : `BaseHighLevelWCS` or `BaseLowLevelWCS` + """ + print(_get_pp_matrix(wcs)) def extract_pc_matrix(headers, naxes=None): From 9ddfce6089e6310656d3c97d12b1736df16b266c Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Fri, 23 Aug 2024 11:01:35 +0100 Subject: [PATCH 04/22] Tweak some output a little --- dkist/dataset/utils.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py index c13b52c8..146e3786 100644 --- a/dkist/dataset/utils.py +++ b/dkist/dataset/utils.py @@ -26,8 +26,15 @@ def dataset_info_str(ds): instr += " " nframes = ds.inventory.get("frameCount", "") - s = f"This {instr}Dataset has {wcs.pixel_n_dim} array and {wcs.world_n_dim} world dimensions and consists of {nframes} frames stored in {ds.files.basepath}\n\n" - s += f"The data are represented by a Dask array: {ds.data}\n\n" + if is_tiled: + s = f"This {dstype} consists of an array of {tile_shape} Dataset objects\n\nEach " + else: + s = "This " + + s += f"{instr}Dataset has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions and consists of {nframes} frames\n" + if ds.files: + s +="Files are stored in {ds.files.basepath}\n\n" + s += f"The data are represented by a {type(ds.data)} object:\n{ds.data}\n\n" array_shape = wcs.array_shape or (0,) pixel_shape = wcs.pixel_shape or (None,) * wcs.pixel_n_dim From d203862c5a07491b0cfd27dce3383d941bb8a514 Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Fri, 23 Aug 2024 11:01:53 +0100 Subject: [PATCH 05/22] Flip the ordering of world array indices to be correct --- dkist/dataset/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py index 146e3786..a3d1c8eb 100644 --- a/dkist/dataset/utils.py +++ b/dkist/dataset/utils.py @@ -76,11 +76,11 @@ def dataset_info_str(ds): ("{0:" + str(world_typ_width) + "s}").format("Physical Type") + " " + "Units\n") - for iwrl in range(wcs.world_n_dim): + for iwrl in range(wcs.world_n_dim)[::-1]: - name = wcs.world_axis_names[::-1][iwrl] or "None" - typ = wcs.world_axis_physical_types[::-1][iwrl] or "None" - unit = wcs.world_axis_units[::-1][iwrl] or "unknown" + name = wcs.world_axis_names[iwrl] or "None" + typ = wcs.world_axis_physical_types[iwrl] or "None" + unit = wcs.world_axis_units[iwrl] or "unknown" s += (("{0:" + str(world_dim_width) + "d}").format(iwrl) + " " + ("{0:" + str(world_nam_width) + "s}").format(name) + " " + From cfc8043e92724f1b5e075a92dc28eb92ecdeefea Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Fri, 23 Aug 2024 11:04:37 +0100 Subject: [PATCH 06/22] Improvements to pretty correlation matrix output --- dkist/dataset/utils.py | 41 ++++++++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py index a3d1c8eb..b034b2f2 100644 --- a/dkist/dataset/utils.py +++ b/dkist/dataset/utils.py @@ -2,6 +2,8 @@ Helper functions for the Dataset class. """ +import textwrap + import numpy as np import gwcs @@ -93,7 +95,7 @@ def dataset_info_str(ds): pixel_dim_width = max(3, len(str(wcs.world_n_dim))) - s += "Correlation between array and world axes:\n\n" + s += "Correlation between pixel and world axes:\n\n" s += _get_pp_matrix(ds.wcs) @@ -103,14 +105,39 @@ def dataset_info_str(ds): def _get_pp_matrix(wcs): slen = np.max([len(line) for line in list(wcs.world_axis_names) + list(wcs.pixel_axis_names)]) - mstr = wcs.axis_correlation_matrix.astype(f" Date: Fri, 23 Aug 2024 13:50:18 +0100 Subject: [PATCH 07/22] Include dataset ID --- dkist/dataset/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py index b034b2f2..bee70f48 100644 --- a/dkist/dataset/utils.py +++ b/dkist/dataset/utils.py @@ -29,13 +29,13 @@ def dataset_info_str(ds): nframes = ds.inventory.get("frameCount", "") if is_tiled: - s = f"This {dstype} consists of an array of {tile_shape} Dataset objects\n\nEach " + s = f"This {dstype} {ds.inventory['datasetId']} consists of an array of {tile_shape} Dataset objects\n\nEach " else: s = "This " - s += f"{instr}Dataset has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions and consists of {nframes} frames\n" + s += f"{instr}Dataset {ds.inventory['datasetId']} has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions and consists of {nframes} frames\n" if ds.files: - s +="Files are stored in {ds.files.basepath}\n\n" + s += f"Files are stored in {ds.files.basepath}\n\n" s += f"The data are represented by a {type(ds.data)} object:\n{ds.data}\n\n" array_shape = wcs.array_shape or (0,) From e6a8968fa4dfa9c0406dbdaa02e62e97d8748b89 Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Fri, 23 Aug 2024 13:53:25 +0100 Subject: [PATCH 08/22] Add changelog --- changelog/431.trivial.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/431.trivial.rst diff --git a/changelog/431.trivial.rst b/changelog/431.trivial.rst new file mode 100644 index 00000000..b9cd4d0a --- /dev/null +++ b/changelog/431.trivial.rst @@ -0,0 +1 @@ +Update Dataset representation for better readability. From 206ab93922319d2523175dfde64e1f163b6067a9 Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Fri, 23 Aug 2024 14:36:01 +0100 Subject: [PATCH 09/22] Slight generalisation to make the tests pass and hopefully catch any weird data --- dkist/dataset/utils.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py index bee70f48..5c997293 100644 --- a/dkist/dataset/utils.py +++ b/dkist/dataset/utils.py @@ -26,14 +26,15 @@ def dataset_info_str(ds): instr = ds.inventory.get("instrumentName", "") if instr: instr += " " - nframes = ds.inventory.get("frameCount", "") + nframes = ds.inventory.get("frameCount", "an unknown number") + dsID = ds.inventory.get("datasetId", "(no DatasetID)") if is_tiled: - s = f"This {dstype} {ds.inventory['datasetId']} consists of an array of {tile_shape} Dataset objects\n\nEach " + s = f"This {dstype} {dsID} consists of an array of {tile_shape} Dataset objects\n\nEach " else: s = "This " - s += f"{instr}Dataset {ds.inventory['datasetId']} has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions and consists of {nframes} frames\n" + s += f"{instr}Dataset {dsID} has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions and consists of {nframes} frames\n" if ds.files: s += f"Files are stored in {ds.files.basepath}\n\n" s += f"The data are represented by a {type(ds.data)} object:\n{ds.data}\n\n" @@ -120,7 +121,10 @@ def _get_pp_matrix(wcs): header = np.vstack([[s.center(width) for s in wrapped[l]] for l, _ in enumerate(labels)]).T mstr = np.insert(mstr, 0, header, axis=0) - world = ["", "WORLD DIMENSIONS", *list(wcs.world_axis_names)] + world = ["WORLD DIMENSIONS", *list(wcs.world_axis_names)] + nrows = maxlines + len(wcs.world_axis_names) + while len(world) < nrows: + world.insert(0, "") mstr = np.insert(mstr, 0, world, axis=1) widths = [np.max([len(a) for a in col]) for col in mstr.T] mstr = np.insert(mstr, 2, ["-"*wid for wid in widths], axis=0) From 9b4ac7dfd6b8c6a20bca40e00e556eaa8b941d9f Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Fri, 23 Aug 2024 14:45:10 +0100 Subject: [PATCH 10/22] Slight tweaks --- dkist/dataset/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py index 5c997293..9c4a50e7 100644 --- a/dkist/dataset/utils.py +++ b/dkist/dataset/utils.py @@ -30,11 +30,11 @@ def dataset_info_str(ds): dsID = ds.inventory.get("datasetId", "(no DatasetID)") if is_tiled: - s = f"This {dstype} {dsID} consists of an array of {tile_shape} Dataset objects\n\nEach " + s = f"This {instr}{dstype} {dsID} consists of an array of {tile_shape} Dataset objects\n\nEach Dataset " else: - s = "This " + s = f"This {instr}Dataset {dsID} " - s += f"{instr}Dataset {dsID} has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions and consists of {nframes} frames\n" + s += f"has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions and consists of {nframes} frames\n" if ds.files: s += f"Files are stored in {ds.files.basepath}\n\n" s += f"The data are represented by a {type(ds.data)} object:\n{ds.data}\n\n" From 8d46818f6f685854768faea7f4bd06e67857de28 Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Thu, 29 Aug 2024 16:01:34 +0100 Subject: [PATCH 11/22] Minor tweaks --- dkist/dataset/utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py index 9c4a50e7..808d9737 100644 --- a/dkist/dataset/utils.py +++ b/dkist/dataset/utils.py @@ -26,7 +26,7 @@ def dataset_info_str(ds): instr = ds.inventory.get("instrumentName", "") if instr: instr += " " - nframes = ds.inventory.get("frameCount", "an unknown number") + nframes = len(ds.files) dsID = ds.inventory.get("datasetId", "(no DatasetID)") if is_tiled: @@ -34,8 +34,10 @@ def dataset_info_str(ds): else: s = f"This {instr}Dataset {dsID} " - s += f"has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions and consists of {nframes} frames\n" + s += f"has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions" + if ds.files: + s += f" and consists of {nframes} frames\n" s += f"Files are stored in {ds.files.basepath}\n\n" s += f"The data are represented by a {type(ds.data)} object:\n{ds.data}\n\n" From 43db5f59e0df5fef09b098ef222f0a8ce72518d6 Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Fri, 30 Aug 2024 15:03:36 +0100 Subject: [PATCH 12/22] Tweaks to make doc tests pass (mostly) --- dkist/dataset/loader.py | 49 +++++++++++++++++++++-------------------- dkist/dataset/utils.py | 3 +-- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/dkist/dataset/loader.py b/dkist/dataset/loader.py index 46af93a5..71db188e 100644 --- a/dkist/dataset/loader.py +++ b/dkist/dataset/loader.py @@ -45,45 +45,46 @@ def load_dataset(target): Examples -------- + >>> import dkist + >>> dkist.load_dataset("/path/to/VISP_L1_ABCDE.asdf") # doctest: +SKIP >>> dkist.load_dataset("/path/to/ABCDE/") # doctest: +SKIP >>> dkist.load_dataset(Path("/path/to/ABCDE")) # doctest: +SKIP - >>> from sunpy.net import Fido, attrs as a - >>> import dkist.net - >>> search_results = Fido.search(a.dkist.Dataset("AGLKO")) # doctest: +REMOTE_DATA - >>> files = Fido.fetch(search_results) # doctest: +REMOTE_DATA - >>> dkist.load_dataset(files) # doctest: +REMOTE_DATA - - This Dataset has 4 pixel and 5 world dimensions + >>> from dkist.data.sample import VISP_BKPLX + >>> print(dkist.load_dataset(VISP_BKPLX)) + This VISP Dataset BKPLX has 4 pixel and 5 world dimensions and consists of 1700 frames + Files are stored in /home/drew/.local/share/dkist/VISP_BKPLX - dask.array + The data are represented by a object: + dask.array - Pixel Dim Axis Name Data size Bounds + Array Dim Axis Name Data size Bounds 0 polarization state 4 None - 1 raster scan step number 1000 None - 2 dispersion axis 976 None - 3 spatial along slit 2555 None + 1 raster scan step number 425 None + 2 dispersion axis 980 None + 3 spatial along slit 2554 None World Dim Axis Name Physical Type Units - 0 stokes phys.polarization.stokes unknown - 1 time time s + 4 stokes phys.polarization.stokes unknown + 3 time time s 2 helioprojective latitude custom:pos.helioprojective.lat arcsec - 3 wavelength em.wl nm - 4 helioprojective longitude custom:pos.helioprojective.lon arcsec + 1 wavelength em.wl nm + 0 helioprojective longitude custom:pos.helioprojective.lon arcsec Correlation between pixel and world axes: - Pixel Dim - World Dim 0 1 2 3 - 0 yes no no no - 1 no yes no no - 2 no yes no yes - 3 no no yes no - 4 no yes no yes - + | PIXEL DIMENSIONS + | spatial | dispersion | raster scan | polarization + WORLD DIMENSIONS | along slit | axis | step number | state + ------------------------- | ------------ | ------------ | ------------ | ------------ + helioprojective longitude | x | | x | + wavelength | | x | | + helioprojective latitude | x | | x | + time | | | x | + stokes | | | | x """ known_types = _known_types_docs().keys() raise TypeError(f"Input type {type(target).__name__} not recognised. It must be one of {', '.join(known_types)}.") diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py index 808d9737..00d350e6 100644 --- a/dkist/dataset/utils.py +++ b/dkist/dataset/utils.py @@ -26,7 +26,6 @@ def dataset_info_str(ds): instr = ds.inventory.get("instrumentName", "") if instr: instr += " " - nframes = len(ds.files) dsID = ds.inventory.get("datasetId", "(no DatasetID)") if is_tiled: @@ -37,7 +36,7 @@ def dataset_info_str(ds): s += f"has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions" if ds.files: - s += f" and consists of {nframes} frames\n" + s += f" and consists of {len(ds.files)} frames\n" s += f"Files are stored in {ds.files.basepath}\n\n" s += f"The data are represented by a {type(ds.data)} object:\n{ds.data}\n\n" From af5829d5d3ad0c62ae1a10f5d42cee2cbfd0eaef Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Fri, 30 Aug 2024 15:15:49 +0100 Subject: [PATCH 13/22] Nope that still needed to be a remote test --- dkist/dataset/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dkist/dataset/loader.py b/dkist/dataset/loader.py index 71db188e..50526595 100644 --- a/dkist/dataset/loader.py +++ b/dkist/dataset/loader.py @@ -53,7 +53,7 @@ def load_dataset(target): >>> dkist.load_dataset(Path("/path/to/ABCDE")) # doctest: +SKIP - >>> from dkist.data.sample import VISP_BKPLX + >>> from dkist.data.sample import VISP_BKPLX # doctest: +REMOTE_DATA >>> print(dkist.load_dataset(VISP_BKPLX)) This VISP Dataset BKPLX has 4 pixel and 5 world dimensions and consists of 1700 frames Files are stored in /home/drew/.local/share/dkist/VISP_BKPLX From f55dfcc5ab5983cbedbe25b748d34ff040ad44d2 Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Fri, 30 Aug 2024 16:00:17 +0100 Subject: [PATCH 14/22] Calculate correct number of files for TiledDatasets --- dkist/dataset/utils.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py index 00d350e6..6bc47823 100644 --- a/dkist/dataset/utils.py +++ b/dkist/dataset/utils.py @@ -11,14 +11,16 @@ __all__ = ["dataset_info_str"] -def dataset_info_str(ds): +def dataset_info_str(ds_in): # Check for an attribute that only appears on TiledDataset # Not using isinstance to avoid circular import - is_tiled = hasattr(ds, "combined_headers") - dstype = type(ds).__name__ + is_tiled = hasattr(ds_in, "combined_headers") + dstype = type(ds_in).__name__ if is_tiled: - tile_shape = ds.shape - ds = ds[0, 0] + tile_shape = ds_in.shape + ds = ds_in[0, 0] + else: + ds = ds_in wcs = ds.wcs.low_level_wcs # Array dimensions table @@ -36,7 +38,8 @@ def dataset_info_str(ds): s += f"has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions" if ds.files: - s += f" and consists of {len(ds.files)} frames\n" + nframes = len(ds.files) if not is_tiled else sum([len(tile.files) for tile in ds_in.flat]) + s += f" and consists of {nframes} frames\n" s += f"Files are stored in {ds.files.basepath}\n\n" s += f"The data are represented by a {type(ds.data)} object:\n{ds.data}\n\n" From 6960de82be37731b7846ff795779da6b71919dfe Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Fri, 30 Aug 2024 16:01:36 +0100 Subject: [PATCH 15/22] Slicing TiledDatasets fives back a different kind of WCS --- dkist/dataset/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py index 6bc47823..8501a63d 100644 --- a/dkist/dataset/utils.py +++ b/dkist/dataset/utils.py @@ -109,6 +109,7 @@ def dataset_info_str(ds_in): def _get_pp_matrix(wcs): + wcs = wcs.low_level_wcs # Just in case the dataset has been sliced and returned the wrong kind of wcs slen = np.max([len(line) for line in list(wcs.world_axis_names) + list(wcs.pixel_axis_names)]) mstr = wcs.axis_correlation_matrix.astype(" Date: Mon, 2 Sep 2024 11:10:21 +0100 Subject: [PATCH 16/22] This needs to be REMOTE_DATA'd as well --- dkist/dataset/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dkist/dataset/loader.py b/dkist/dataset/loader.py index 50526595..cc100a12 100644 --- a/dkist/dataset/loader.py +++ b/dkist/dataset/loader.py @@ -54,7 +54,7 @@ def load_dataset(target): >>> dkist.load_dataset(Path("/path/to/ABCDE")) # doctest: +SKIP >>> from dkist.data.sample import VISP_BKPLX # doctest: +REMOTE_DATA - >>> print(dkist.load_dataset(VISP_BKPLX)) + >>> print(dkist.load_dataset(VISP_BKPLX)) # doctest: +REMOTE_DATA This VISP Dataset BKPLX has 4 pixel and 5 world dimensions and consists of 1700 frames Files are stored in /home/drew/.local/share/dkist/VISP_BKPLX From d433c450e64095cb4c7e00909bc4b9bece4caa23 Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Mon, 2 Sep 2024 11:32:16 +0100 Subject: [PATCH 17/22] Correct/obfuscate sample data path in dataset repr test --- dkist/dataset/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dkist/dataset/loader.py b/dkist/dataset/loader.py index cc100a12..c9c84278 100644 --- a/dkist/dataset/loader.py +++ b/dkist/dataset/loader.py @@ -56,7 +56,7 @@ def load_dataset(target): >>> from dkist.data.sample import VISP_BKPLX # doctest: +REMOTE_DATA >>> print(dkist.load_dataset(VISP_BKPLX)) # doctest: +REMOTE_DATA This VISP Dataset BKPLX has 4 pixel and 5 world dimensions and consists of 1700 frames - Files are stored in /home/drew/.local/share/dkist/VISP_BKPLX + Files are stored in /.../VISP_BKPLX The data are represented by a object: dask.array From caeb378f11a6bed589779e9436f6a1f597e57589 Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Mon, 2 Sep 2024 11:40:25 +0100 Subject: [PATCH 18/22] Don't need to run doctests on old releases --- docs/whatsnew/1.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/whatsnew/1.0.rst b/docs/whatsnew/1.0.rst index 52356301..b64bab26 100644 --- a/docs/whatsnew/1.0.rst +++ b/docs/whatsnew/1.0.rst @@ -60,8 +60,8 @@ Any DKIST level one ASDF file can be loaded with the `dkist.load_dataset` functi >>> import dkist - >>> ds = dkist.load_dataset(asdf_files) # doctest: +REMOTE_DATA - >>> ds # doctest: +REMOTE_DATA + >>> ds = dkist.load_dataset(asdf_files) # doctest: +SKIP + >>> ds # doctest: +SKIP This Dataset has 4 pixel and 5 world dimensions From 08edf43ab1ee8ef0397e0626f53d8019de43fef3 Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Mon, 2 Sep 2024 11:53:48 +0100 Subject: [PATCH 19/22] Fine I'll just skip all of them if you're going to be like that, doctest --- docs/whatsnew/1.0.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/whatsnew/1.0.rst b/docs/whatsnew/1.0.rst index b64bab26..bcaf8a04 100644 --- a/docs/whatsnew/1.0.rst +++ b/docs/whatsnew/1.0.rst @@ -29,7 +29,7 @@ Here is a really quick demo of searching for all unembargoed VISP data and downl >>> from sunpy.net import Fido, attrs as a >>> import dkist.net - >>> res = Fido.search(a.Instrument.visp, a.dkist.Embargoed.false) # doctest: +REMOTE_DATA + >>> res = Fido.search(a.Instrument.visp, a.dkist.Embargoed.false) # doctest: +SKIP >>> res # doctest: +SKIP Results from 1 Provider: @@ -44,8 +44,8 @@ Here is a really quick demo of searching for all unembargoed VISP data and downl - >>> asdf_files = Fido.fetch(res[:, 0]) # doctest: +REMOTE_DATA - >>> asdf_files # doctest: +REMOTE_DATA + >>> asdf_files = Fido.fetch(res[:, 0]) # doctest: +SKIP + >>> asdf_files # doctest: +SKIP ['...VISP_L1_20220602T175042_BDWQK.asdf'] @@ -133,8 +133,8 @@ This means you can first slice out a smaller dataset, and then only download the .. code-block:: python - >>> stokes_I_ds = ds[0] # doctest: +REMOTE_DATA - >>> stokes_I_ds # doctest: +REMOTE_DATA + >>> stokes_I_ds = ds[0] # doctest: +SKIP + >>> stokes_I_ds # doctest: +SKIP This Dataset has 3 pixel and 4 world dimensions From b18e98e68ebfc006e8d0a2f6e823b7787efc514a Mon Sep 17 00:00:00 2001 From: Stuart Mumford Date: Tue, 3 Sep 2024 16:01:20 +0100 Subject: [PATCH 20/22] Update dkist/dataset/loader.py --- dkist/dataset/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dkist/dataset/loader.py b/dkist/dataset/loader.py index c9c84278..9964f480 100644 --- a/dkist/dataset/loader.py +++ b/dkist/dataset/loader.py @@ -56,7 +56,7 @@ def load_dataset(target): >>> from dkist.data.sample import VISP_BKPLX # doctest: +REMOTE_DATA >>> print(dkist.load_dataset(VISP_BKPLX)) # doctest: +REMOTE_DATA This VISP Dataset BKPLX has 4 pixel and 5 world dimensions and consists of 1700 frames - Files are stored in /.../VISP_BKPLX + Files are stored in ...VISP_BKPLX The data are represented by a object: dask.array From 20037e95adc5150e314e2051d5e470d0f748c866 Mon Sep 17 00:00:00 2001 From: Stuart Mumford Date: Tue, 3 Sep 2024 17:03:52 +0100 Subject: [PATCH 21/22] Some reworking for tiled / not tiled englishing --- dkist/dataset/utils.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py index 8501a63d..05d1d102 100644 --- a/dkist/dataset/utils.py +++ b/dkist/dataset/utils.py @@ -30,17 +30,23 @@ def dataset_info_str(ds_in): instr += " " dsID = ds.inventory.get("datasetId", "(no DatasetID)") + s = f"This {instr}Dataset {dsID} " if is_tiled: - s = f"This {instr}{dstype} {dsID} consists of an array of {tile_shape} Dataset objects\n\nEach Dataset " - else: - s = f"This {instr}Dataset {dsID} " + s += f"is an array of {tile_shape} Dataset objects " + if ds.files: + s += "and \n" - s += f"has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions" if ds.files: nframes = len(ds.files) if not is_tiled else sum([len(tile.files) for tile in ds_in.flat]) - s += f" and consists of {nframes} frames\n" - s += f"Files are stored in {ds.files.basepath}\n\n" + s += f"consists of {nframes} frames stored in {ds.files.basepath}\n" + + if is_tiled: + s += "\nEach " + else: + s += "\nThis " + s += f"Dataset has {wcs.pixel_n_dim} pixel and {wcs.world_n_dim} world dimensions.\n\n" + s += f"The data are represented by a {type(ds.data)} object:\n{ds.data}\n\n" array_shape = wcs.array_shape or (0,) From cd0efaab7203d9c172976203ae00b51d9af397c7 Mon Sep 17 00:00:00 2001 From: Drew Leonard Date: Wed, 4 Sep 2024 11:46:38 +0100 Subject: [PATCH 22/22] Tweak repr again and update loader docstring --- dkist/dataset/loader.py | 4 +++- dkist/dataset/utils.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/dkist/dataset/loader.py b/dkist/dataset/loader.py index 9964f480..1b1f4ea0 100644 --- a/dkist/dataset/loader.py +++ b/dkist/dataset/loader.py @@ -55,9 +55,11 @@ def load_dataset(target): >>> from dkist.data.sample import VISP_BKPLX # doctest: +REMOTE_DATA >>> print(dkist.load_dataset(VISP_BKPLX)) # doctest: +REMOTE_DATA - This VISP Dataset BKPLX has 4 pixel and 5 world dimensions and consists of 1700 frames + This VISP Dataset BKPLX consists of 1700 frames. Files are stored in ...VISP_BKPLX + This Dataset has 4 pixel and 5 world dimensions. + The data are represented by a object: dask.array diff --git a/dkist/dataset/utils.py b/dkist/dataset/utils.py index 05d1d102..6ab71090 100644 --- a/dkist/dataset/utils.py +++ b/dkist/dataset/utils.py @@ -39,7 +39,8 @@ def dataset_info_str(ds_in): if ds.files: nframes = len(ds.files) if not is_tiled else sum([len(tile.files) for tile in ds_in.flat]) - s += f"consists of {nframes} frames stored in {ds.files.basepath}\n" + s += f"consists of {nframes} frames.\n" + s += f"Files are stored in {ds.files.basepath}\n" if is_tiled: s += "\nEach "