Skip to content

Commit

Permalink
Return dataclasses rather than tuple
Browse files Browse the repository at this point in the history
  • Loading branch information
timj committed Dec 5, 2024
1 parent 970e68b commit 2cbb8f0
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 26 deletions.
43 changes: 27 additions & 16 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

__all__ = ["Butler"]

import dataclasses
import urllib.parse
import uuid
from abc import abstractmethod
Expand Down Expand Up @@ -74,6 +75,19 @@
_LOG = getLogger(__name__)


@dataclasses.dataclass
class ParsedButlerDatasetURI:
label: str
dataset_id: uuid.UUID
uri: str


@dataclasses.dataclass
class SpecificButlerDataset:
butler: Butler
dataset: DatasetRef | None


class Butler(LimitedButler): # numpydoc ignore=PR02
"""Interface for data butler and factory for Butler instances.
Expand Down Expand Up @@ -530,7 +544,7 @@ def get_known_repos(cls) -> set[str]:
return ButlerRepoIndex.get_known_repos()

@classmethod
def parse_dataset_uri(cls, uri: str) -> tuple[str, DatasetId]:
def parse_dataset_uri(cls, uri: str) -> ParsedButlerDatasetURI:
"""Extract the butler label and dataset ID from a dataset URI.
Parameters
Expand All @@ -540,11 +554,9 @@ def parse_dataset_uri(cls, uri: str) -> tuple[str, DatasetId]:
Returns
-------
label : `str`
parsed : `ParsedButlerDatasetURI`
The label associated with the butler repository from which this
dataset originates.
dataset_id : `DatasetId`
The ID of the dataset.
dataset originates and the ID of the dataset.
Notes
-----
Expand Down Expand Up @@ -594,12 +606,12 @@ def parse_dataset_uri(cls, uri: str) -> tuple[str, DatasetId]:
e.add_note(f"Error extracting dataset ID from uri {uri!r} with dataset ID string {id_!r}")
raise

return label, dataset_id
return ParsedButlerDatasetURI(label=label, dataset_id=dataset_id, uri=uri)

@classmethod
def get_dataset_from_uri(
cls, uri: str, factory: LabeledButlerFactoryProtocol | None = None
) -> tuple[Butler, DatasetRef | None]:
) -> SpecificButlerDataset:
"""Get the dataset associated with the given dataset URI.
Parameters
Expand All @@ -613,23 +625,22 @@ def get_dataset_from_uri(
Returns
-------
butler : `Butler`
Butler object associated with this URI.
ref : `DatasetRef` or `None`
The dataset associated with that URI, or `None` if the UUID
is valid but the dataset is not known to this butler.
result : `SpecificButlerDataset`
The butler associated with this URI and the dataset itself.
The dataset can be `None` if the UUID is valid but the dataset
is not known to this butler.
"""
label, dataset_id = cls.parse_dataset_uri(uri)
parsed = cls.parse_dataset_uri(uri)
butler: Butler | None = None
if factory is not None:
# If the label is not recognized, it might be a path.
try:
butler = factory(label)
butler = factory(parsed.label)
except KeyError:
pass
if butler is None:
butler = cls.from_config(label)
return butler, butler.get_dataset(dataset_id)
butler = cls.from_config(parsed.label)
return SpecificButlerDataset(butler=butler, dataset=butler.get_dataset(parsed.dataset_id))

@abstractmethod
def _caching_context(self) -> AbstractContextManager[None]:
Expand Down
20 changes: 10 additions & 10 deletions tests/test_simpleButler.py
Original file line number Diff line number Diff line change
Expand Up @@ -917,24 +917,24 @@ def test_dataset_uris(self):
f"butler://{label}/{ref.id}",
f"ivo://org.rubinobs/usdac/lsst-dp1?repo={label}&id={ref.id}",
):
new_butler, ref2 = Butler.get_dataset_from_uri(dataset_uri)
self.assertEqual(ref, ref2)
result = Butler.get_dataset_from_uri(dataset_uri)
self.assertEqual(result.dataset, ref)
# The returned butler needs to have the datastore mocked.
DatastoreMock.apply(new_butler)
dataset_id, _ = butler.get(ref2)
DatastoreMock.apply(result.butler)
dataset_id, _ = result.butler.get(result.dataset)
self.assertEqual(dataset_id, ref.id)

factory_butler, ref2 = Butler.get_dataset_from_uri(dataset_uri, factory=factory)
self.assertEqual(ref, ref2)
factory_result = Butler.get_dataset_from_uri(dataset_uri, factory=factory)
self.assertEqual(factory_result.dataset, ref)
# The returned butler needs to have the datastore mocked.
DatastoreMock.apply(factory_butler)
dataset_id, _ = factory_butler.get(ref2)
DatastoreMock.apply(factory_result.butler)
dataset_id, _ = factory_result.butler.get(factory_result.dataset)
self.assertEqual(dataset_id, ref.id)

# Non existent dataset.
missing_id = str(ref.id).replace("2", "3")
_, no_ref = Butler.get_dataset_from_uri(f"butler://{label}/{missing_id}")
self.assertIsNone(no_ref)
result = Butler.get_dataset_from_uri(f"butler://{label}/{missing_id}")
self.assertIsNone(result.dataset)

# Test some failure modes.
for dataset_uri in (
Expand Down

0 comments on commit 2cbb8f0

Please sign in to comment.