Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add FileManager to TiledDataset #437

Merged
merged 10 commits into from
Sep 30, 2024
1 change: 1 addition & 0 deletions changelog/437.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add a FileManager property to TiledDataset for tracking files more easily.
1 change: 0 additions & 1 deletion dkist/dataset/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ def test_file_manager():
dataset.files = 10

assert len(dataset.files.filenames) == 11
assert len(dataset.files.filenames) == 11

assert isinstance(dataset[5]._file_manager, FileManager)
assert len(dataset[..., 5].files.filenames) == 11
Expand Down
20 changes: 20 additions & 0 deletions dkist/dataset/tests/test_tiled_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,23 @@ def test_repr(simple_tiled_dataset):
@pytest.mark.accept_cli_tiled_dataset
def test_tiles_shape(simple_tiled_dataset):
assert simple_tiled_dataset.tiles_shape == [[tile.data.shape for tile in row] for row in simple_tiled_dataset]


def test_file_manager(large_tiled_dataset):
ds = large_tiled_dataset
with pytest.raises(AttributeError):
ds.files = 10

assert len(ds.files.filenames) == 27
assert ds.files.shape == (1, 4096, 4096)
assert ds.files.output_shape == (3, 3, 3, 4096, 4096)

# Have some slicing tests here
assert len(ds.slice_tiles[0].files.filenames) == 9
assert len(ds[:2, :2].files.filenames) == 12

# TODO Also test that the other checks raise errors
# This at least demonstrates that the structure works
ds[1, 1].files.fileuri_array.dtype = np.dtype("<i")
with pytest.raises(AssertionError, match="must be the same across all tiles"):
ds.files
39 changes: 39 additions & 0 deletions dkist/dataset/tiled_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@

from astropy.table import vstack

from dkist.io.file_manager import FileManager, StripedExternalArray
from dkist.io.loaders import AstropyFITSLoader

from .dataset import Dataset
from .utils import dataset_info_str

Expand Down Expand Up @@ -190,3 +193,39 @@ def __repr__(self):

def __str__(self):
return dataset_info_str(self)

@property
def files(self):
"""
A `~.FileManager` helper for interacting with the files backing the data in this ``Dataset``.
"""
return self._file_manager

@property
def _file_manager(self):
fileuris = [[tile.files.filenames for tile in row] for row in self]
dtype = self[0, 0].files.fileuri_array.dtype
shape = self[0, 0].files.shape
basepath = self[0, 0].files.basepath
chunksize = self[0, 0]._data.chunksize

for tile in self.flat:
try:
assert dtype == tile.files.fileuri_array.dtype
assert shape == tile.files.shape
assert basepath == tile.files.basepath
assert chunksize == tile._data.chunksize
except AssertionError as err:
raise AssertionError("Attributes of TiledDataset.FileManager must be the same across all tiles.") from err

return FileManager(
StripedExternalArray(
SolarDrew marked this conversation as resolved.
Show resolved Hide resolved
fileuris=fileuris,
target=1,
dtype=dtype,
shape=shape,
loader=AstropyFITSLoader,
basepath=basepath,
chunksize=chunksize
)
)