Skip to content

Commit

Permalink
Merge branch 'fix-publish-tests' of github.com:SolarDrew/dkist into f…
Browse files Browse the repository at this point in the history
…ix-publish-tests
  • Loading branch information
SolarDrew committed Sep 4, 2024
2 parents e2ca11e + 67c71ec commit ce81964
Show file tree
Hide file tree
Showing 15 changed files with 199 additions and 75 deletions.
2 changes: 1 addition & 1 deletion .cruft.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"template": "https://github.com/sunpy/package-template",
"commit": "112d7d4adf0fa168bbb9ddb1886ad4f1e595b8be",
"commit": "f7458b35be5824d419efd6ce8c135ff67a00d1d5",
"checkout": null,
"context": {
"cookiecutter": {
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
repos:
# This should be before any formatting hooks like isort
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: "v0.5.2"
rev: "v0.6.2"
hooks:
- id: ruff
args: ["--fix"]
Expand Down
2 changes: 1 addition & 1 deletion .readthedocs.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
version: 2

build:
os: ubuntu-22.04
os: ubuntu-lts-latest
tools:
python: "mambaforge-latest"
jobs:
Expand Down
2 changes: 1 addition & 1 deletion .rtd-environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ name: dkist
channels:
- conda-forge
dependencies:
- python=3.10
- python=3.12
- pip
- graphviz!=2.42.*,!=2.43.*
4 changes: 4 additions & 0 deletions changelog/402.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Add various features for easier inspection of `TiledDataset`:
- `__repr__` method to output basic dataset info;
- `tiles_shape` property to access data array shape for each individual tile;
- `slice_tiles()` method to apply the same slice to all datasets.
1 change: 1 addition & 0 deletions changelog/422.trivial.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
replace usages of ``copy_arrays`` with ``memmap`` for ``asdf>=3.1.0``
1 change: 1 addition & 0 deletions changelog/431.trivial.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Update Dataset representation for better readability.
9 changes: 9 additions & 0 deletions dkist/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,15 @@ def simple_tiled_dataset(dataset):
return TiledDataset(dataset_array, dataset.meta["inventory"])


@pytest.fixture
def large_tiled_dataset(tmp_path_factory):
vbidir = tmp_path_factory.mktemp("data")
with gzip.open(Path(rootdir) / "large_vbi.asdf.gz", mode="rb") as gfo:
with open(vbidir / "test_vbi.asdf", mode="wb") as afo:
afo.write(gfo.read())
return load_dataset(vbidir / "test_vbi.asdf")


@pytest.fixture
def small_visp_dataset():
"""
Expand Down
Binary file added dkist/data/test/large_vbi.asdf.gz
Binary file not shown.
59 changes: 34 additions & 25 deletions dkist/dataset/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@
from asdf import ValidationError


def asdf_open_memory_mapping_kwarg(memmap: bool) -> dict:
if asdf.__version__ > "3.1.0":
return {"memmap": memmap}
return {"copy_arrays": not memmap}


@singledispatch
def load_dataset(target):
"""
Expand All @@ -39,45 +45,48 @@ def load_dataset(target):
Examples
--------
>>> import dkist
>>> dkist.load_dataset("/path/to/VISP_L1_ABCDE.asdf") # doctest: +SKIP
>>> dkist.load_dataset("/path/to/ABCDE/") # doctest: +SKIP
>>> dkist.load_dataset(Path("/path/to/ABCDE")) # doctest: +SKIP
>>> from sunpy.net import Fido, attrs as a
>>> import dkist.net
>>> search_results = Fido.search(a.dkist.Dataset("AGLKO")) # doctest: +REMOTE_DATA
>>> files = Fido.fetch(search_results) # doctest: +REMOTE_DATA
>>> dkist.load_dataset(files) # doctest: +REMOTE_DATA
<dkist.dataset.dataset.Dataset object at ...>
This Dataset has 4 pixel and 5 world dimensions
>>> from dkist.data.sample import VISP_BKPLX # doctest: +REMOTE_DATA
>>> print(dkist.load_dataset(VISP_BKPLX)) # doctest: +REMOTE_DATA
This VISP Dataset BKPLX consists of 1700 frames.
Files are stored in ...VISP_BKPLX
<BLANKLINE>
dask.array<reshape, shape=(4, 1000, 976, 2555), dtype=float64, chunksize=(1, 1, 976, 2555), chunktype=numpy.ndarray>
This Dataset has 4 pixel and 5 world dimensions.
<BLANKLINE>
Pixel Dim Axis Name Data size Bounds
The data are represented by a <class 'dask.array.core.Array'> object:
dask.array<reshape, shape=(4, 425, 980, 2554), dtype=float64, chunksize=(1, 1, 980, 2554), chunktype=numpy.ndarray>
<BLANKLINE>
Array Dim Axis Name Data size Bounds
0 polarization state 4 None
1 raster scan step number 1000 None
2 dispersion axis 976 None
3 spatial along slit 2555 None
1 raster scan step number 425 None
2 dispersion axis 980 None
3 spatial along slit 2554 None
<BLANKLINE>
World Dim Axis Name Physical Type Units
0 stokes phys.polarization.stokes unknown
1 time time s
4 stokes phys.polarization.stokes unknown
3 time time s
2 helioprojective latitude custom:pos.helioprojective.lat arcsec
3 wavelength em.wl nm
4 helioprojective longitude custom:pos.helioprojective.lon arcsec
1 wavelength em.wl nm
0 helioprojective longitude custom:pos.helioprojective.lon arcsec
<BLANKLINE>
Correlation between pixel and world axes:
<BLANKLINE>
Pixel Dim
World Dim 0 1 2 3
0 yes no no no
1 no yes no no
2 no yes no yes
3 no no yes no
4 no yes no yes
| PIXEL DIMENSIONS
| spatial | dispersion | raster scan | polarization
WORLD DIMENSIONS | along slit | axis | step number | state
------------------------- | ------------ | ------------ | ------------ | ------------
helioprojective longitude | x | | x |
wavelength | | x | |
helioprojective latitude | x | | x |
time | | | x |
stokes | | | | x
"""
known_types = _known_types_docs().keys()
raise TypeError(f"Input type {type(target).__name__} not recognised. It must be one of {', '.join(known_types)}.")
Expand Down Expand Up @@ -156,7 +165,7 @@ def _load_from_asdf(filepath):
try:
with importlib_resources.as_file(importlib_resources.files("dkist.io") / "level_1_dataset_schema.yaml") as schema_path:
with asdf.open(filepath, custom_schema=schema_path.as_posix(),
lazy_load=False, copy_arrays=True) as ff:
lazy_load=False, **asdf_open_memory_mapping_kwarg(memmap=False)) as ff:
ds = ff.tree["dataset"]
if isinstance(ds, TiledDataset):
for sub in ds.flat:
Expand Down
16 changes: 16 additions & 0 deletions dkist/dataset/tests/test_tiled_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ def test_tiled_dataset_slice(simple_tiled_dataset, aslice):
assert np.all(simple_tiled_dataset[aslice] == simple_tiled_dataset._data[aslice])


@pytest.mark.parametrize("aslice", [np.s_[0, :100, 100:200]])
def test_tiled_dataset_slice_tiles(large_tiled_dataset, aslice):
sliced = large_tiled_dataset.slice_tiles[aslice]
for i, tile in enumerate(sliced.flat):
# This will throw an AttributeError if you do tile.shape and I don't know why
assert tile.data.shape == (100, 100)


def test_tiled_dataset_headers(simple_tiled_dataset, dataset):
assert len(simple_tiled_dataset.combined_headers) == len(dataset.meta["headers"]) * 4
assert simple_tiled_dataset.combined_headers.colnames == dataset.meta["headers"].colnames
Expand Down Expand Up @@ -75,3 +83,11 @@ def test_tileddataset_plot(share_zscale):
fig = plt.figure(figsize=(600, 800))
ds.plot(0, share_zscale=share_zscale)
return plt.gcf()

def test_repr(simple_tiled_dataset):
r = repr(simple_tiled_dataset)
assert str(simple_tiled_dataset[0, 0].data) in r


def test_tiles_shape(simple_tiled_dataset):
assert simple_tiled_dataset.tiles_shape == [[tile.data.shape for tile in row] for row in simple_tiled_dataset]
38 changes: 38 additions & 0 deletions dkist/dataset/tiled_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
but not representable in a single NDCube derived object as the array data are
not contiguous in the spatial dimensions (due to overlaps and offsets).
"""
from textwrap import dedent
from collections.abc import Collection

import matplotlib.pyplot as plt
Expand All @@ -13,10 +14,26 @@
from astropy.table import vstack

from .dataset import Dataset
from .utils import dataset_info_str

__all__ = ["TiledDataset"]


class TiledDatasetSlicer:
"""
Basic class to provide the slicing
"""
def __init__(self, data, inventory):
self.data = data
self.inventory = inventory

def __getitem__(self, slice_):
new_data = []
for tile in self.data.flat:
new_data.append(tile[slice_])
return TiledDataset(np.array(new_data).reshape(self.data.shape), self.inventory)


class TiledDataset(Collection):
"""
Holds a grid of `.Dataset` objects.
Expand Down Expand Up @@ -125,6 +142,13 @@ def shape(self):
"""
return self._data.shape

@property
def tiles_shape(self):
"""
The shape of each individual tile in the TiledDataset.
"""
return [[tile.data.shape for tile in row] for row in self]

def plot(self, slice_index: int, share_zscale=False, **kwargs):
vmin, vmax = np.inf, 0
fig = plt.figure()
Expand All @@ -151,4 +175,18 @@ def plot(self, slice_index: int, share_zscale=False, **kwargs):
fig.suptitle(f"{self.inventory['instrumentName']} Dataset ({self.inventory['datasetId']}) at time {timestamp} (slice={slice_index})", y=0.95)
return fig

@property
def slice_tiles(self):
return TiledDatasetSlicer(self._data, self.inventory)

# TODO: def regrid()

def __repr__(self):
"""
Overload the NDData repr because it does not play nice with the dask delayed io.
"""
prefix = object.__repr__(self)
return dedent(f"{prefix}\n{self.__str__()}")

def __str__(self):
return dataset_info_str(self)
Loading

0 comments on commit ce81964

Please sign in to comment.