Skip to content

Commit

Permalink
Add Butler.get_dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
timj committed Oct 27, 2023
1 parent 2a50477 commit 07f5f39
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 4 deletions.
19 changes: 18 additions & 1 deletion python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
from ._butler_repo_index import ButlerRepoIndex
from ._config import Config, ConfigSubset
from ._dataset_existence import DatasetExistence
from ._dataset_ref import DatasetIdGenEnum, DatasetRef
from ._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef
from ._dataset_type import DatasetType
from ._deferredDatasetHandle import DeferredDatasetHandle
from ._file_dataset import FileDataset
Expand Down Expand Up @@ -799,6 +799,23 @@ def get_dataset_type(self, name: str) -> DatasetType:
"""
raise NotImplementedError()

@abstractmethod
def get_dataset(self, id: DatasetId) -> DatasetRef | None:
"""Retrieve a Dataset entry.
Parameters
----------
id : `DatasetId`
The unique identifier for the dataset.
Returns
-------
ref : `DatasetRef` or `None`
A ref to the Dataset, or `None` if no matching Dataset
was found.
"""
raise NotImplementedError()

@abstractmethod
def find_dataset(
self,
Expand Down
5 changes: 4 additions & 1 deletion python/lsst/daf/butler/direct_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
from ._butler_config import ButlerConfig
from ._config import Config
from ._dataset_existence import DatasetExistence
from ._dataset_ref import DatasetIdGenEnum, DatasetRef
from ._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef
from ._dataset_type import DatasetType
from ._deferredDatasetHandle import DeferredDatasetHandle
from ._exceptions import ValidationError
Expand Down Expand Up @@ -1322,6 +1322,9 @@ def getURI(
def get_dataset_type(self, name: str) -> DatasetType:
return self._registry.getDatasetType(name)

def get_dataset(self, id: DatasetId) -> DatasetRef | None:
return self._registry.getDataset(id)

def find_dataset(
self,
datasetType: DatasetType | str,
Expand Down
7 changes: 6 additions & 1 deletion python/lsst/daf/butler/remote_butler/_remote_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
from .._butler_config import ButlerConfig
from .._config import Config
from .._dataset_existence import DatasetExistence
from .._dataset_ref import DatasetIdGenEnum, DatasetRef, SerializedDatasetRef
from .._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef, SerializedDatasetRef
from .._dataset_type import DatasetType, SerializedDatasetType
from .._deferredDatasetHandle import DeferredDatasetHandle
from .._file_dataset import FileDataset
Expand Down Expand Up @@ -221,6 +221,11 @@ def get_dataset_type(self, name: str) -> DatasetType:
response.raise_for_status()
return DatasetType.from_simple(SerializedDatasetType(**response.json()), universe=self.dimensions)

def get_dataset(self, id: DatasetId) -> DatasetRef | None:
path = f"dataset/{id}"
response = self._client.get(self._get_url(path))
return DatasetRef.from_simple(SerializedDatasetRef(**response.json()), universe=self.dimensions)

def find_dataset(
self,
datasetType: DatasetType | str,
Expand Down
20 changes: 20 additions & 0 deletions python/lsst/daf/butler/remote_butler/server/_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
__all__ = ("app", "factory_dependency")

import logging
import uuid
from functools import cache
from typing import Any

Expand Down Expand Up @@ -107,6 +108,25 @@ def get_dataset_type(
return datasetType.to_simple()


@app.get(
"/butler/v1/dataset/{id}",
summary="Retrieve this dataset definition.",
response_model=SerializedDatasetRef | None,
response_model_exclude_unset=True,
response_model_exclude_defaults=True,
response_model_exclude_none=True,
)
def get_dataset(id: uuid.UUID, factory: Factory = Depends(factory_dependency)) -> SerializedDatasetRef | None:
"""Return a single dataset reference."""
butler = factory.create_butler()
ref = butler.get_dataset(id)
if ref is not None:
return ref.to_simple()
# This could raise a 404 since id is not found. The standard implementation
# get_dataset method returns without error so follow that example here.
return ref


# Not yet supported: TimeSpan is not yet a pydantic model.
# collections parameter assumes client-side has resolved regexes.
@app.post(
Expand Down
2 changes: 1 addition & 1 deletion tests/test_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> Dir
with self.assertRaises(FileNotFoundError):
butler.get(ref)
# Registry shouldn't be able to find it by dataset_id anymore.
self.assertIsNone(butler.registry.getDataset(ref.id))
self.assertIsNone(butler.get_dataset(ref.id))

# Do explicit registry removal since we know they are
# empty
Expand Down
3 changes: 3 additions & 0 deletions tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ def test_find_dataset(self):
self.assertIsInstance(ref, DatasetRef)
self.assertEqual(ref.id, uuid.UUID("e15ab039-bc8b-4135-87c5-90902a7c0b22"))

ref2 = self.butler.get_dataset(ref.id)
self.assertEqual(ref2, ref)


if __name__ == "__main__":
unittest.main()

0 comments on commit 07f5f39

Please sign in to comment.