Skip to content

Commit

Permalink
Add FileManager to TiledDataset (#437)
Browse files Browse the repository at this point in the history
* Add FileManager to TiledDataset

* Add test for TiledDataset filemanager

* Move file manager construction into property on TiledDataset

* Trim duplicate line

* Don't use sample data for that test because internet

* Add changelog

* Typo

* Don't need a thing

* Check that file manager values match all tiles

---------

Co-authored-by: Stuart Mumford <[email protected]>
  • Loading branch information
SolarDrew and Cadair authored Sep 30, 2024
1 parent fb3ca0d commit 6770e8e
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 1 deletion.
1 change: 1 addition & 0 deletions changelog/437.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add a FileManager property to TiledDataset for tracking files more easily.
1 change: 0 additions & 1 deletion dkist/dataset/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ def test_file_manager():
dataset.files = 10

assert len(dataset.files.filenames) == 11
assert len(dataset.files.filenames) == 11

assert isinstance(dataset[5]._file_manager, FileManager)
assert len(dataset[..., 5].files.filenames) == 11
Expand Down
20 changes: 20 additions & 0 deletions dkist/dataset/tests/test_tiled_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,23 @@ def test_repr(simple_tiled_dataset):
@pytest.mark.accept_cli_tiled_dataset
def test_tiles_shape(simple_tiled_dataset):
assert simple_tiled_dataset.tiles_shape == [[tile.data.shape for tile in row] for row in simple_tiled_dataset]


def test_file_manager(large_tiled_dataset):
ds = large_tiled_dataset
with pytest.raises(AttributeError):
ds.files = 10

assert len(ds.files.filenames) == 27
assert ds.files.shape == (1, 4096, 4096)
assert ds.files.output_shape == (3, 3, 3, 4096, 4096)

# Have some slicing tests here
assert len(ds.slice_tiles[0].files.filenames) == 9
assert len(ds[:2, :2].files.filenames) == 12

# TODO Also test that the other checks raise errors
# This at least demonstrates that the structure works
ds[1, 1].files.fileuri_array.dtype = np.dtype("<i")
with pytest.raises(AssertionError, match="must be the same across all tiles"):
ds.files
39 changes: 39 additions & 0 deletions dkist/dataset/tiled_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@

from astropy.table import vstack

from dkist.io.file_manager import FileManager, StripedExternalArray
from dkist.io.loaders import AstropyFITSLoader

from .dataset import Dataset
from .utils import dataset_info_str

Expand Down Expand Up @@ -190,3 +193,39 @@ def __repr__(self):

def __str__(self):
return dataset_info_str(self)

@property
def files(self):
"""
A `~.FileManager` helper for interacting with the files backing the data in this ``Dataset``.
"""
return self._file_manager

@property
def _file_manager(self):
fileuris = [[tile.files.filenames for tile in row] for row in self]
dtype = self[0, 0].files.fileuri_array.dtype
shape = self[0, 0].files.shape
basepath = self[0, 0].files.basepath
chunksize = self[0, 0]._data.chunksize

for tile in self.flat:
try:
assert dtype == tile.files.fileuri_array.dtype
assert shape == tile.files.shape
assert basepath == tile.files.basepath
assert chunksize == tile._data.chunksize
except AssertionError as err:
raise AssertionError("Attributes of TiledDataset.FileManager must be the same across all tiles.") from err

return FileManager(
StripedExternalArray(
fileuris=fileuris,
target=1,
dtype=dtype,
shape=shape,
loader=AstropyFITSLoader,
basepath=basepath,
chunksize=chunksize
)
)

0 comments on commit 6770e8e

Please sign in to comment.