From 97c065a52241fdc114d6ae00a1cda816030feb25 Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Tue, 24 Oct 2023 09:49:30 -0700 Subject: [PATCH] Move part of RemoteRegistry to RemoteButler Extract dimensions and some httpx setup logic from RemoteRegistry and move them to RemoteButler. Delete RemoteRegistry and tests associated with it. --- python/lsst/daf/butler/registries/remote.py | 686 ------------------ .../butler/remote_butler/_remote_butler.py | 49 +- tests/test_server.py | 93 +-- 3 files changed, 65 insertions(+), 763 deletions(-) delete mode 100644 python/lsst/daf/butler/registries/remote.py diff --git a/python/lsst/daf/butler/registries/remote.py b/python/lsst/daf/butler/registries/remote.py deleted file mode 100644 index 94653119ef..0000000000 --- a/python/lsst/daf/butler/registries/remote.py +++ /dev/null @@ -1,686 +0,0 @@ -# This file is part of daf_butler. -# -# Developed for the LSST Data Management System. -# This product includes software developed by the LSST Project -# (http://www.lsst.org). -# See the COPYRIGHT file at the top-level directory of this distribution -# for details of code ownership. -# -# This software is dual licensed under the GNU General Public License and also -# under a 3-clause BSD license. Recipients may choose which of these licenses -# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, -# respectively. If you choose the GPL option then the following text applies -# (but note that there is still no warranty even if you opt for BSD instead): -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -from __future__ import annotations - -__all__ = ("RemoteRegistry",) - -import contextlib -from collections.abc import Iterable, Iterator, Mapping, Sequence -from typing import TYPE_CHECKING, Any - -import httpx -from lsst.daf.butler import __version__ -from lsst.resources import ResourcePath, ResourcePathExpression -from lsst.utils.introspection import get_full_type_name -from lsst.utils.iteration import ensure_iterable - -from .._config import Config -from .._dataset_association import DatasetAssociation -from .._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef, SerializedDatasetRef -from .._dataset_type import DatasetType, SerializedDatasetType -from .._named import NameLookupMapping -from .._storage_class import StorageClassFactory -from .._timespan import Timespan -from ..dimensions import ( - DataCoordinate, - DataCoordinateSequence, - DataId, - Dimension, - DimensionConfig, - DimensionElement, - DimensionGraph, - DimensionRecord, - DimensionUniverse, - SerializedDataCoordinate, - SerializedDimensionRecord, -) -from ..registry import CollectionSummary, CollectionType, RegistryConfig, RegistryDefaults, _ButlerRegistry -from ..server_models import ( - DatasetsQueryParameter, - ExpressionQueryParameter, - QueryDataIdsModel, - QueryDatasetsModel, - QueryDimensionRecordsModel, -) - -if TYPE_CHECKING: - from .._butler_config import ButlerConfig - from ..datastore._datastore import DatastoreOpaqueTable - from ..registry._registry import CollectionArgType - from ..registry.interfaces import CollectionRecord, DatastoreRegistryBridgeManager - - -class RemoteRegistry(_ButlerRegistry): - """Registry that can talk to a remote Butler server. - - Parameters - ---------- - server_uri : `lsst.resources.ResourcePath` - URL of the remote Butler server. - defaults : `RegistryDefaults` - Default collection search path and/or output `~CollectionType.RUN` - collection. - """ - - @classmethod - def createFromConfig( - cls, - config: RegistryConfig | str | None = None, - dimensionConfig: DimensionConfig | str | None = None, - butlerRoot: ResourcePathExpression | None = None, - ) -> _ButlerRegistry: - """Create registry database and return `_ButlerRegistry` instance. - - A remote registry can not create a registry database. Calling this - method will raise an exception. - """ - raise NotImplementedError("A remote registry can not create a registry.") - - @classmethod - def fromConfig( - cls, - config: ButlerConfig | RegistryConfig | Config | str, - butlerRoot: ResourcePathExpression | None = None, - writeable: bool = True, - defaults: RegistryDefaults | None = None, - ) -> _ButlerRegistry: - # Docstring inherited from lsst.daf.butler.registry.Registry - config = cls.forceRegistryConfig(config) - config.replaceRoot(butlerRoot) - - if defaults is None: - defaults = RegistryDefaults() - - if isinstance(config["db"], httpx.Client): - client = config["db"] - server_uri = ResourcePath("/") - else: - client = None - server_uri = ResourcePath(config["db"]) - return cls(server_uri, defaults, writeable, client=client) - - def __init__( - self, - server_uri: ResourcePath, - defaults: RegistryDefaults, - writeable: bool, - client: httpx.Client | None = None, - ): - self._db = server_uri - self._defaults = defaults - - # All PUT calls should be short-circuited if not writeable. - self._writeable = writeable - - self._dimensions: DimensionUniverse | None = None - - if client is not None: - # We have injected a client explicitly in to the class. - # This is generally done for testing. - self._client = client - else: - headers = {"user-agent": f"{get_full_type_name(self)}/{__version__}"} - self._client = httpx.Client(headers=headers) - - # Does each API need to be sent the defaults so that the server - # can use specific defaults each time? - - # Storage class information should be pulled from server. - # Dimensions should be pulled from server. - - def __str__(self) -> str: - return str(self._db) - - def __repr__(self) -> str: - return f"RemoteRegistry({self._db!r}, {self.dimensions!r})" - - def isWriteable(self) -> bool: - # Docstring inherited from lsst.daf.butler.registry.Registry - # Can be used to prevent any PUTs to server - return self._writeable - - def copy(self, defaults: RegistryDefaults | None = None) -> _ButlerRegistry: - # Docstring inherited from lsst.daf.butler.registry._ButlerRegistry - if defaults is None: - # No need to copy, because `RegistryDefaults` is immutable; we - # effectively copy on write. - defaults = self.defaults - return type(self)(self._db, defaults, self.isWriteable()) - - def _get_url(self, path: str, version: str = "v1") -> str: - """Form the full URL to the server given the path on server. - - Parameters - ---------- - path : `str` - The path to the server endpoint. Should not include the "/butler" - prefix. - version : `str`, optional - Version string to prepend to path. Defaults to "v1". - - Returns - ------- - url : `str` - The full URL to the service. - """ - prefix = "butler" - if self._db.scheme == "file": - # Not a server, assume a test server and so prepend a /. - return f"/{prefix}/{version}/{path}" - return str(self._db.join(prefix).join(path)) - - @property - def dimensions(self) -> DimensionUniverse: - # Docstring inherited from lsst.daf.butler.registry.Registry - if self._dimensions is not None: - return self._dimensions - - # Access /dimensions.json on server and cache it locally. - response = self._client.get(self._get_url("universe")) - response.raise_for_status() - - config = DimensionConfig.fromString(response.text, format="json") - self._dimensions = DimensionUniverse(config) - return self._dimensions - - def refresh(self) -> None: - # Docstring inherited from lsst.daf.butler.registry.Registry - - # Need to determine what to refresh. - # Might need a server method to return all the DatasetTypes up front. - # How do we know which server should be refreshed? - # Should there be caches in the client? - response = self._client.put(self._get_url("registry/refresh")) - response.raise_for_status() - - return - - @contextlib.contextmanager - def transaction(self, *, savepoint: bool = False) -> Iterator[None]: - # Transaction handling for client server is hard and will require - # some support in the server to store registry changes and defer - # committing them. This will likely require a change in transaction - # interface. For now raise. - raise NotImplementedError() - - # insertOpaqueData + fetchOpaqueData + deleteOpaqueData - # There are no managers for opaque data in client. This implies - # that the server would have to have specific implementations for - # use by Datastore. DatastoreBridgeManager also is not needed. - - def registerCollection( - self, name: str, type: CollectionType = CollectionType.TAGGED, doc: str | None = None - ) -> bool: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError() - - def getCollectionType(self, name: str) -> CollectionType: - # Docstring inherited from lsst.daf.butler.registry.Registry - # This could use a local cache since collection types won't - # change. - path = f"registry/collection/type/{name}" - response = self._client.get(self._get_url(path)) - response.raise_for_status() - typeName = response.json() - return CollectionType.from_name(typeName) - - def _get_collection_record(self, name: str) -> CollectionRecord: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError - - def registerRun(self, name: str, doc: str | None = None) -> bool: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError() - - def removeCollection(self, name: str) -> None: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError() - - def getCollectionChain(self, parent: str) -> tuple[str, ...]: - # Docstring inherited from lsst.daf.butler.registry.Registry - path = f"registry/collection/chain/{parent}" - response = self._client.get(self._get_url(path)) - response.raise_for_status() - chain = response.json() - return tuple(chain) - - def setCollectionChain(self, parent: str, children: Any, *, flatten: bool = False) -> None: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError() - - def getCollectionParentChains(self, collection: str) -> set[str]: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError() - - def getCollectionDocumentation(self, collection: str) -> str | None: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError() - - def setCollectionDocumentation(self, collection: str, doc: str | None) -> None: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError() - - def getCollectionSummary(self, collection: str) -> CollectionSummary: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError() - - def registerDatasetType(self, datasetType: DatasetType) -> bool: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError() - - def removeDatasetType(self, name: str | tuple[str, ...]) -> None: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError() - - def getDatasetType(self, name: str) -> DatasetType: - # Docstring inherited from lsst.daf.butler.registry.Registry - path = f"registry/datasetType/{name}" - response = self._client.get(self._get_url(path)) - response.raise_for_status() - return DatasetType.from_simple(SerializedDatasetType(**response.json()), universe=self.dimensions) - - def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError() - - def _simplify_dataId(self, dataId: DataId | None) -> SerializedDataCoordinate | None: - """Take a generic Data ID and convert it to a serializable form. - - Parameters - ---------- - dataId : `dict`, `None`, `DataCoordinate` - The data ID to serialize. - - Returns - ------- - data_id : `SerializedDataCoordinate` or `None` - A serializable form. - """ - if dataId is None: - return None - if isinstance(dataId, DataCoordinate): - return dataId.to_simple() - # Assume we can treat it as a dict. - return SerializedDataCoordinate(dataId=dataId) - - def findDataset( - self, - datasetType: DatasetType | str, - dataId: DataId | None = None, - *, - collections: CollectionArgType | None = None, - timespan: Timespan | None = None, - **kwargs: Any, - ) -> DatasetRef | None: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError() - - def insertDatasets( - self, - datasetType: DatasetType | str, - dataIds: Iterable[DataId], - run: str | None = None, - expand: bool = True, - idGenerationMode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE, - ) -> list[DatasetRef]: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError() - - def _importDatasets( - self, - datasets: Iterable[DatasetRef], - expand: bool = True, - ) -> list[DatasetRef]: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError() - - def getDataset(self, id: DatasetId) -> DatasetRef | None: - # Docstring inherited from lsst.daf.butler.registry.Registry - path = f"registry/dataset/{id}" - response = self._client.get(self._get_url(path)) - response.raise_for_status() - return DatasetRef.from_simple(SerializedDatasetRef(**response.json()), universe=self.dimensions) - - def removeDatasets(self, refs: Iterable[DatasetRef]) -> None: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError() - - def associate(self, collection: str, refs: Iterable[DatasetRef]) -> None: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError() - - def disassociate(self, collection: str, refs: Iterable[DatasetRef]) -> None: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError() - - def certify(self, collection: str, refs: Iterable[DatasetRef], timespan: Timespan) -> None: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError() - - def decertify( - self, - collection: str, - datasetType: str | DatasetType, - timespan: Timespan, - *, - dataIds: Iterable[DataId] | None = None, - ) -> None: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError() - - def getDatastoreBridgeManager(self) -> DatastoreRegistryBridgeManager: - """Return an object that allows a new `Datastore` instance to - communicate with this `Registry`. - - Returns - ------- - manager : `DatastoreRegistryBridgeManager` - Object that mediates communication between this `Registry` and its - associated datastores. - """ - from ..tests._dummyRegistry import DummyDatastoreRegistryBridgeManager, DummyOpaqueTableStorageManager - - return DummyDatastoreRegistryBridgeManager(DummyOpaqueTableStorageManager(), self.dimensions, int) - - def getDatasetLocations(self, ref: DatasetRef) -> Iterable[str]: - # Docstring inherited from lsst.daf.butler.registry.Registry - path = f"registry/datasetLocations/{ref.id}" - response = self._client.get(self._get_url(path)) - response.raise_for_status() - return response.json() - - def expandDataId( - self, - dataId: DataId | None = None, - *, - graph: DimensionGraph | None = None, - records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None, - withDefaults: bool = True, - **kwargs: Any, - ) -> DataCoordinate: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError() - - def insertDimensionData( - self, - element: DimensionElement | str, - *data: Mapping[str, Any] | DimensionRecord, - conform: bool = True, - replace: bool = False, - skip_existing: bool = False, - ) -> None: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError() - - def syncDimensionData( - self, - element: DimensionElement | str, - row: Mapping[str, Any] | DimensionRecord, - conform: bool = True, - update: bool = False, - ) -> bool | dict[str, Any]: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError() - - def queryDatasetTypes( - self, - expression: Any = ..., - *, - components: bool | None = False, - missing: list[str] | None = None, - ) -> Iterable[DatasetType]: - # Docstring inherited from lsst.daf.butler.registry.Registry - # Note no caching implemented in client. - if missing is not None: - raise NotImplementedError("RemoteRegistry does not support the 'missing' parameter.") - - params: dict[str, Any] = {} - - expression = ExpressionQueryParameter.from_expression(expression) - if expression.regex is not None: - params["regex"] = expression.regex - if expression.glob is not None: - params["glob"] = expression.glob - - path = "registry/datasetTypes" - if params: - path += "/re" - - if components is not None: - params = {"components": components} - - response = self._client.get(self._get_url(path), params=params) - response.raise_for_status() - - # Really could do with a ListSerializedDatasetType model but for - # now do it explicitly. - datasetTypes = response.json() - return [ - DatasetType.from_simple(SerializedDatasetType(**d), universe=self.dimensions) - for d in datasetTypes - ] - - def queryCollections( - self, - expression: Any = ..., - datasetType: DatasetType | None = None, - collectionTypes: Iterable[CollectionType] | CollectionType = CollectionType.all(), - flattenChains: bool = False, - includeChains: bool | None = None, - ) -> Sequence[str]: - # Docstring inherited from lsst.daf.butler.registry.Registry - params: dict[str, Any] = {"flattenChains": flattenChains} - - expression = ExpressionQueryParameter.from_expression(expression) - if expression.regex is not None: - params["regex"] = expression.regex - if expression.glob is not None: - params["glob"] = expression.glob - if datasetType is not None: - params["datasetType"] = datasetType.name - if includeChains is not None: - params["includeChains"] = includeChains - - collection_types = [collectionType.name for collectionType in ensure_iterable(collectionTypes)] - params["collectionType"] = collection_types - - path = "registry/collections" - response = self._client.get(self._get_url(path), params=params) - response.raise_for_status() - - collections = response.json() - return list(collections) - - def queryDatasets( # type: ignore - self, - datasetType: Any, - *, - collections: CollectionArgType | None = None, - dimensions: Iterable[Dimension | str] | None = None, - dataId: DataId | None = None, - where: str = "", - findFirst: bool = False, - components: bool | None = False, - bind: Mapping[str, Any] | None = None, - check: bool = True, - **kwargs: Any, - ) -> Iterable[DatasetRef]: - # Docstring inherited from lsst.daf.butler.registry.Registry - if dimensions is not None: - dimensions = [str(d) for d in ensure_iterable(dimensions)] - - collections_param: ExpressionQueryParameter | None = None - if collections is not None: - collections_param = ExpressionQueryParameter.from_expression(collections) - - parameters = QueryDatasetsModel( - datasetType=ExpressionQueryParameter.from_expression(datasetType), - collections=collections_param, - dimensions=dimensions, - dataId=self._simplify_dataId(dataId), - where=where, - findFirst=findFirst, - components=components, - bind=bind, - check=check, - keyword_args=kwargs, - ) - - response = self._client.post( - self._get_url("registry/datasets"), - json=parameters.model_dump(mode="json", exclude_unset=True, exclude_defaults=True), - timeout=20, - ) - response.raise_for_status() - - simple_refs = response.json() - return ( - DatasetRef.from_simple(SerializedDatasetRef(**r), universe=self.dimensions) for r in simple_refs - ) - - def queryDataIds( # type: ignore - self, - dimensions: Iterable[Dimension | str] | Dimension | str, - *, - dataId: DataId | None = None, - datasets: Any = None, - collections: CollectionArgType | None = None, - where: str = "", - components: bool | None = False, - bind: Mapping[str, Any] | None = None, - check: bool = True, - **kwargs: Any, - ) -> DataCoordinateSequence: - # Docstring inherited from lsst.daf.butler.registry.Registry - cleaned_dimensions = [str(d) for d in ensure_iterable(dimensions)] - - collections_param: ExpressionQueryParameter | None = None - if collections is not None: - collections_param = ExpressionQueryParameter.from_expression(collections) - if datasets is not None: - datasets = DatasetsQueryParameter.from_expression(datasets) - - parameters = QueryDataIdsModel( - dimensions=cleaned_dimensions, - dataId=self._simplify_dataId(dataId), - collections=collections_param, - datasets=datasets, - where=where, - components=components, - bind=bind, - check=check, - keyword_args=kwargs, - ) - - response = self._client.post( - self._get_url("registry/dataIds"), - json=parameters.model_dump(mode="json", exclude_unset=True, exclude_defaults=True), - timeout=20, - ) - response.raise_for_status() - - simple = response.json() - dataIds = [ - DataCoordinate.from_simple(SerializedDataCoordinate(**d), universe=self.dimensions) - for d in simple - ] - return DataCoordinateSequence( - dataIds=dataIds, graph=DimensionGraph(self.dimensions, names=cleaned_dimensions) - ) - - def queryDimensionRecords( # type: ignore - self, - element: DimensionElement | str, - *, - dataId: DataId | None = None, - datasets: Any = None, - collections: CollectionArgType | None = None, - where: str = "", - components: bool | None = False, - bind: Mapping[str, Any] | None = None, - check: bool = True, - **kwargs: Any, - ) -> Iterator[DimensionRecord]: - # Docstring inherited from lsst.daf.butler.registry.Registry - collections_param: ExpressionQueryParameter | None = None - if collections is not None: - collections_param = ExpressionQueryParameter.from_expression(collections) - if datasets is not None: - datasets = DatasetsQueryParameter.from_expression(datasets) - - parameters = QueryDimensionRecordsModel( - dataId=self._simplify_dataId(dataId), - datasets=datasets, - collections=collections_param, - where=where, - components=components, - bind=bind, - check=check, - keyword_args=kwargs, - ) - response = self._client.post( - self._get_url(f"registry/dimensionRecords/{element}"), - json=parameters.model_dump(mode="json", exclude_unset=True, exclude_defaults=True), - timeout=20, - ) - response.raise_for_status() - - simple_records = response.json() - - return ( - DimensionRecord.from_simple(SerializedDimensionRecord(**r), universe=self.dimensions) - for r in simple_records - ) - - def queryDatasetAssociations( - self, - datasetType: str | DatasetType, - collections: CollectionArgType | None = ..., - *, - collectionTypes: Iterable[CollectionType] = CollectionType.all(), - flattenChains: bool = False, - ) -> Iterator[DatasetAssociation]: - # Docstring inherited from lsst.daf.butler.registry.Registry - raise NotImplementedError() - - def get_datastore_records(self, ref: DatasetRef) -> DatasetRef: - # Docstring inherited from base class. - # For now this does not do anything and just returns the ref. - return ref - - def store_datastore_records(self, refs: Mapping[str, DatasetRef]) -> None: - # Docstring inherited from base class. - return - - def make_datastore_tables(self, tables: Mapping[str, DatastoreOpaqueTable]) -> None: - # Docstring inherited from base class. - return - - storageClasses: StorageClassFactory - """All storage classes known to the registry (`StorageClassFactory`). - """ diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py index 30c16a4403..777db8d3b9 100644 --- a/python/lsst/daf/butler/remote_butler/_remote_butler.py +++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py @@ -29,7 +29,10 @@ from contextlib import AbstractContextManager from typing import Any, TextIO +import httpx +from lsst.daf.butler import __version__ from lsst.resources import ResourcePath, ResourcePathExpression +from lsst.utils.introspection import get_full_type_name from .._butler import Butler from .._butler_config import ButlerConfig @@ -42,7 +45,7 @@ from .._limited_butler import LimitedButler from .._storage_class import StorageClass from ..datastore import DatasetRefURIs -from ..dimensions import DataId, DimensionUniverse +from ..dimensions import DataId, DimensionConfig, DimensionUniverse from ..registry import Registry from ..transfers import RepoExportContext from ._config import RemoteButlerConfigModel @@ -51,12 +54,25 @@ class RemoteButler(Butler): def __init__( self, + # These parameters are inherited from the Butler() constructor config: Config | ResourcePathExpression | None = None, + *, searchPaths: Sequence[ResourcePathExpression] | None = None, + # Parameters unique to RemoteButler + http_client: httpx.Client | None = None, **kwargs: Any, ): butler_config = ButlerConfig(config, searchPaths, without_datastore=True) self._config = RemoteButlerConfigModel.model_validate(butler_config) + self._dimensions: DimensionUniverse | None = None + + if http_client is not None: + # We have injected a client explicitly in to the class. + # This is generally done for testing. + self._client = http_client + else: + headers = {"user-agent": f"{get_full_type_name(self)}/{__version__}"} + self._client = httpx.Client(headers=headers, base_url=str(self._config.remote_butler.url)) def isWriteable(self) -> bool: # Docstring inherited. @@ -64,6 +80,18 @@ def isWriteable(self) -> bool: @property def dimensions(self) -> DimensionUniverse: + # Docstring inherited. + if self._dimensions is not None: + return self._dimensions + + response = self._client.get(self._get_url("universe")) + response.raise_for_status() + + config = DimensionConfig.fromString(response.text, format="json") + self._dimensions = DimensionUniverse(config) + return self._dimensions + + def getDatasetType(self, name: str) -> DatasetType: # Docstring inherited. raise NotImplementedError() @@ -261,3 +289,22 @@ def pruneDatasets( ) -> None: # Docstring inherited. raise NotImplementedError() + + def _get_url(self, path: str, version: str = "v1") -> str: + """Form the complete path to an endpoint on the server + + Parameters + ---------- + path : `str` + The relative path to the server endpoint. Should not include the + "/butler" prefix. + version : `str`, optional + Version string to prepend to path. Defaults to "v1". + + Returns + ------- + path : `str` + The full path to the endpoint + """ + prefix = "butler" + return f"{prefix}/{version}/{path}" diff --git a/tests/test_server.py b/tests/test_server.py index 0dea1afdfd..724db51144 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -32,18 +32,31 @@ # Failing to import any of these should disable the tests. import lsst.daf.butler.server from fastapi.testclient import TestClient + from lsst.daf.butler.remote_butler import RemoteButler from lsst.daf.butler.server import app except ImportError: TestClient = None app = None -from lsst.daf.butler import Butler, CollectionType, Config, DataCoordinate, DatasetRef -from lsst.daf.butler.tests import addDatasetType +from lsst.daf.butler import CollectionType from lsst.daf.butler.tests.utils import MetricTestRepo, makeTestTempDir, removeTestTempDir TESTDIR = os.path.abspath(os.path.dirname(__file__)) +def _make_remote_butler(http_client): + return RemoteButler( + config={ + "remote_butler": { + # This URL is ignored because we override the HTTP client, but + # must be valid to satisfy the config validation + "url": "https://test.example" + } + }, + http_client=http_client, + ) + + @unittest.skipIf(TestClient is None or app is None, "FastAPI not installed.") class ButlerClientServerTestCase(unittest.TestCase): """Test for Butler client/server.""" @@ -64,16 +77,7 @@ def setUpClass(cls): lsst.daf.butler.server.BUTLER_ROOT = cls.root cls.client = TestClient(app) - # Create a client butler. We need to modify the contents of the - # server configuration to reflect the use of the test client. - response = cls.client.get("/butler/butler.json") - config = Config(response.json()) - config["registry", "db"] = cls.client - - # Since there is no client datastore we also need to specify - # the datastore root. - config["datastore", "root"] = cls.root - cls.butler = Butler(config) + cls.butler = _make_remote_butler(cls.client) @classmethod def tearDownClass(cls): @@ -92,73 +96,10 @@ def test_simple(self): self.assertEqual(response.status_code, 200) self.assertIn("namespace", response.json()) - def test_registry(self): + def test_remote_butler(self): universe = self.butler.dimensions self.assertEqual(universe.namespace, "daf_butler") - dataset_type = self.butler.registry.getDatasetType("test_metric_comp") - self.assertEqual(dataset_type.name, "test_metric_comp") - - dataset_types = list(self.butler.registry.queryDatasetTypes(...)) - self.assertIn("test_metric_comp", [ds.name for ds in dataset_types]) - dataset_types = list(self.butler.registry.queryDatasetTypes("test_*")) - self.assertEqual(len(dataset_types), 1) - - collections = self.butler.registry.queryCollections( - ..., collectionTypes={CollectionType.RUN, CollectionType.TAGGED} - ) - self.assertEqual(len(collections), 2, collections) - - collection_type = self.butler.registry.getCollectionType("ingest") - self.assertEqual(collection_type.name, "TAGGED") - - chain = self.butler.registry.getCollectionChain("chain") - self.assertEqual(list(chain), ["ingest"]) - - datasets = list(self.butler.registry.queryDatasets("test_metric_comp", collections=...)) - self.assertEqual(len(datasets), 2) - - ref = self.butler.registry.getDataset(datasets[0].id) - self.assertEqual(ref, datasets[0]) - - locations = self.butler.registry.getDatasetLocations(ref) - self.assertEqual(locations[0], "FileDatastore@") - - fake_ref = DatasetRef( - dataset_type, - dataId={"instrument": "DummyCamComp", "physical_filter": "d-r", "visit": 424}, - run="missing", - ) - locations = self.butler.registry.getDatasetLocations(fake_ref) - self.assertEqual(locations, []) - - dataIds = list(self.butler.registry.queryDataIds("visit", dataId={"instrument": "DummyCamComp"})) - self.assertEqual(len(dataIds), 2) - - # Create a DataCoordinate to test the alternate path for specifying - # a data ID. - data_id = DataCoordinate.standardize({"instrument": "DummyCamComp"}, universe=self.butler.dimensions) - records = list(self.butler.registry.queryDimensionRecords("physical_filter", dataId=data_id)) - self.assertEqual(len(records), 1) - - def test_experimental(self): - """Experimental interfaces.""" - # Got URI testing we can not yet support disassembly so must - # add a dataset with a different dataset type. - datasetType = addDatasetType( - self.repo.butler, "metric", {"instrument", "visit"}, "StructuredCompositeReadCompNoDisassembly" - ) - - self.repo.addDataset({"instrument": "DummyCamComp", "visit": 424}, datasetType=datasetType) - self.butler.registry.refresh() - - # Need a DatasetRef. - datasets = list(self.butler.registry.queryDatasets("metric", collections=...)) - - response = self.client.get(f"/butler/v1/uri/{datasets[0].id}") - self.assertEqual(response.status_code, 200) - self.assertIn("file://", response.json()) - if __name__ == "__main__": unittest.main()