Skip to content

Commit

Permalink
Add a regression test and docs for new path loader
Browse files Browse the repository at this point in the history
[ci skip]
  • Loading branch information
Cadair committed Jan 22, 2025
1 parent 041ee09 commit ce6e6fa
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 6 deletions.
33 changes: 27 additions & 6 deletions dkist/dataset/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,21 +138,42 @@ def _load_from_path(path: Path):

def _load_from_directory(directory):
"""
Construct a `~dkist.dataset.Dataset` from a directory containing one
asdf file and a collection of FITS files.
Construct a `~dkist.dataset.Dataset` from a directory containing one (or
more) ASDF files and a collection of FITS files.
ASDF files have the generic pattern:
``{instrument}_L1_{start_time:%Y%m%dT%H%M%S}_{dataset_id}[_{suffix}].asdf``
where the ``_{suffix}`` on the end may be absent or one of a few different
suffixes which have been used at different times. When searching a
directory for one or more ASDF file to load we should attempt to only load
one per dataset ID by selecting files in suffix order.
The order of suffixes are (from newest used to oldest):
- ``_metadata``
- ``_user_tools``
- None
The algorithm used to find ASDF files to load in a directory is therefore:
- Glob the directory for all ASDF files
- Group all results by the filename up to and including the dataset id in the filename
- Ignore any ASDF files with an old suffix if a new suffix is present
- Throw a warning to the user if any ASDF files with older suffixes are found
"""
base_path = Path(directory).expanduser()
asdf_files = tuple(base_path.glob("*.asdf"))

if not asdf_files:
raise ValueError(f"No asdf file found in directory {base_path}.")

if len(asdf_files) > 1:
return _load_from_iterable(asdf_files)
if len(asdf_files) == 1:
return _load_from_asdf(asdf_files[0])

asdf_file = asdf_files[0]
return _load_from_iterable(asdf_files)

return _load_from_asdf(asdf_file)


def _load_from_asdf(filepath):
Expand Down
52 changes: 52 additions & 0 deletions dkist/dataset/tests/test_load_dataset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import shutil
import numbers

import pytest
from parfive import Results
Expand Down Expand Up @@ -114,3 +115,54 @@ def test_not_dkist_asdf(tmp_path):

with pytest.raises(TypeError, match="not a valid DKIST"):
load_dataset(tmp_path / "test.asdf")


def generate_asdf_folder(tmp_path, asdf_path, filenames):
for fname in filenames:
shutil.copy(asdf_path, tmp_path / fname)

return tmp_path


@pytest.mark.parametrize(("filenames", "indices"), [
# param[0] is list of filenames
# parram[1] is the indices in that list that should be used
(("VBI_L1_20231016T184519_AJQWW.asdf",), 0),
(("VBI_L1_20231016T184519_AJQWW_user_tools.asdf",), 0),
(("VBI_L1_20231016T184519_AJQWW_metadata.asdf",), 0),
(("VBI_L1_20231016T184519_AJQWW_unknown.asdf",), 0),
(("VBI_L1_20231016T184519_AJQWW.asdf",
"VBI_L1_20231016T184519_AJQWW_user_tools.asdf",), 1),
(("VBI_L1_20231016T184519_AJQWW.asdf",
"VBI_L1_20231016T184519_AJQWW_user_tools.asdf",
"VBI_L1_20231016T184519_AJQWW_metadata.asdf",), 2),
(("VBI_L1_20231016T184519_AJQWW.asdf",
"VBI_L1_20231016T184519_AJQWW_user_tools.asdf",
"VBI_L1_20231016T184519_AJQWW_metadata.asdf",
"VBI_L1_20231016T184519_AJQWW_unknown.asdf"), (2, 3)),
])
def test_select_asdf(tmp_path, asdf_path, filenames, indices, mocker):
asdf_folder = generate_asdf_folder(tmp_path, asdf_path, filenames)

asdf_file_paths = tuple(asdf_folder / fname for fname in filenames)

# First we check that we load the correct amount of datasets and that the
# loading completes correctly

datasets = load_dataset(asdf_folder)

if isinstance(indices, numbers.Integral):
assert isinstance(datasets, Dataset)
else:
assert len(datasets) == len(indices)

# Now we check that the correct files are chosen

load_from_asdf = mocker.patch("dkist.dataset.loader._load_from_asdf")
load_from_iterable = mocker.patch("dkist.dataset.loader._load_from_iterable")

datasets = load_dataset(asdf_folder)
if isinstance(indices, numbers.Integral):
load_from_asdf.assert_called_once_with(asdf_file_paths[indices])
else:
load_from_iterable.assert_called_once_with(tuple(asdf_file_paths[i] for i in indices))

0 comments on commit ce6e6fa

Please sign in to comment.