From 48ecfc83e012eb318748ca29c4b86fb4c629de85 Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Mon, 23 Oct 2023 12:39:14 -0700 Subject: [PATCH 1/8] Add stub RemoteButler Add a no-op RemoteButler that can be instantiated via the Butler() constructor. --- .../lsst/daf/butler/remote_butler/__init__.py | 28 ++ .../lsst/daf/butler/remote_butler/_config.py | 38 +++ .../butler/remote_butler/_remote_butler.py | 263 ++++++++++++++++++ tests/test_remote_butler.py | 60 ++++ 4 files changed, 389 insertions(+) create mode 100644 python/lsst/daf/butler/remote_butler/__init__.py create mode 100644 python/lsst/daf/butler/remote_butler/_config.py create mode 100644 python/lsst/daf/butler/remote_butler/_remote_butler.py create mode 100644 tests/test_remote_butler.py diff --git a/python/lsst/daf/butler/remote_butler/__init__.py b/python/lsst/daf/butler/remote_butler/__init__.py new file mode 100644 index 0000000000..98bb440c81 --- /dev/null +++ b/python/lsst/daf/butler/remote_butler/__init__.py @@ -0,0 +1,28 @@ +# This file is part of daf_butler. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (http://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This software is dual licensed under the GNU General Public License and also +# under a 3-clause BSD license. Recipients may choose which of these licenses +# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, +# respectively. If you choose the GPL option then the following text applies +# (but note that there is still no warranty even if you opt for BSD instead): +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from ._remote_butler import RemoteButler diff --git a/python/lsst/daf/butler/remote_butler/_config.py b/python/lsst/daf/butler/remote_butler/_config.py new file mode 100644 index 0000000000..a825c69045 --- /dev/null +++ b/python/lsst/daf/butler/remote_butler/_config.py @@ -0,0 +1,38 @@ +# This file is part of daf_butler. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (http://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This software is dual licensed under the GNU General Public License and also +# under a 3-clause BSD license. Recipients may choose which of these licenses +# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, +# respectively. If you choose the GPL option then the following text applies +# (but note that there is still no warranty even if you opt for BSD instead): +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from pydantic import AnyHttpUrl + +from .._compat import _BaseModelCompat + + +class RemoteButlerOptionsModel(_BaseModelCompat): + url: AnyHttpUrl + + +class RemoteButlerConfigModel(_BaseModelCompat): + remote_butler: RemoteButlerOptionsModel diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py new file mode 100644 index 0000000000..30c16a4403 --- /dev/null +++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py @@ -0,0 +1,263 @@ +# This file is part of daf_butler. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (http://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This software is dual licensed under the GNU General Public License and also +# under a 3-clause BSD license. Recipients may choose which of these licenses +# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, +# respectively. If you choose the GPL option then the following text applies +# (but note that there is still no warranty even if you opt for BSD instead): +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from collections.abc import Collection, Iterable, Sequence +from contextlib import AbstractContextManager +from typing import Any, TextIO + +from lsst.resources import ResourcePath, ResourcePathExpression + +from .._butler import Butler +from .._butler_config import ButlerConfig +from .._config import Config +from .._dataset_existence import DatasetExistence +from .._dataset_ref import DatasetIdGenEnum, DatasetRef +from .._dataset_type import DatasetType +from .._deferredDatasetHandle import DeferredDatasetHandle +from .._file_dataset import FileDataset +from .._limited_butler import LimitedButler +from .._storage_class import StorageClass +from ..datastore import DatasetRefURIs +from ..dimensions import DataId, DimensionUniverse +from ..registry import Registry +from ..transfers import RepoExportContext +from ._config import RemoteButlerConfigModel + + +class RemoteButler(Butler): + def __init__( + self, + config: Config | ResourcePathExpression | None = None, + searchPaths: Sequence[ResourcePathExpression] | None = None, + **kwargs: Any, + ): + butler_config = ButlerConfig(config, searchPaths, without_datastore=True) + self._config = RemoteButlerConfigModel.model_validate(butler_config) + + def isWriteable(self) -> bool: + # Docstring inherited. + return False + + @property + def dimensions(self) -> DimensionUniverse: + # Docstring inherited. + raise NotImplementedError() + + def transaction(self) -> AbstractContextManager[None]: + """Will always raise NotImplementedError. + Transactions are not supported by RemoteButler. + """ + raise NotImplementedError() + + def put( + self, + obj: Any, + datasetRefOrType: DatasetRef | DatasetType | str, + /, + dataId: DataId | None = None, + *, + run: str | None = None, + **kwargs: Any, + ) -> DatasetRef: + # Docstring inherited. + raise NotImplementedError() + + def getDeferred( + self, + datasetRefOrType: DatasetRef | DatasetType | str, + /, + dataId: DataId | None = None, + *, + parameters: dict | None = None, + collections: Any = None, + storageClass: str | StorageClass | None = None, + **kwargs: Any, + ) -> DeferredDatasetHandle: + # Docstring inherited. + raise NotImplementedError() + + def get( + self, + datasetRefOrType: DatasetRef | DatasetType | str, + /, + dataId: DataId | None = None, + *, + parameters: dict[str, Any] | None = None, + collections: Any = None, + storageClass: StorageClass | str | None = None, + **kwargs: Any, + ) -> Any: + # Docstring inherited. + raise NotImplementedError() + + def getURIs( + self, + datasetRefOrType: DatasetRef | DatasetType | str, + /, + dataId: DataId | None = None, + *, + predict: bool = False, + collections: Any = None, + run: str | None = None, + **kwargs: Any, + ) -> DatasetRefURIs: + # Docstring inherited. + raise NotImplementedError() + + def getURI( + self, + datasetRefOrType: DatasetRef | DatasetType | str, + /, + dataId: DataId | None = None, + *, + predict: bool = False, + collections: Any = None, + run: str | None = None, + **kwargs: Any, + ) -> ResourcePath: + # Docstring inherited. + raise NotImplementedError() + + def retrieveArtifacts( + self, + refs: Iterable[DatasetRef], + destination: ResourcePathExpression, + transfer: str = "auto", + preserve_path: bool = True, + overwrite: bool = False, + ) -> list[ResourcePath]: + # Docstring inherited. + raise NotImplementedError() + + def exists( + self, + dataset_ref_or_type: DatasetRef | DatasetType | str, + /, + data_id: DataId | None = None, + *, + full_check: bool = True, + collections: Any = None, + **kwargs: Any, + ) -> DatasetExistence: + # Docstring inherited. + raise NotImplementedError() + + def _exists_many( + self, + refs: Iterable[DatasetRef], + /, + *, + full_check: bool = True, + ) -> dict[DatasetRef, DatasetExistence]: + # Docstring inherited. + raise NotImplementedError() + + def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: + # Docstring inherited. + raise NotImplementedError() + + def ingest( + self, + *datasets: FileDataset, + transfer: str | None = "auto", + run: str | None = None, + idGenerationMode: DatasetIdGenEnum | None = None, + record_validation_info: bool = True, + ) -> None: + # Docstring inherited. + raise NotImplementedError() + + def export( + self, + *, + directory: str | None = None, + filename: str | None = None, + format: str | None = None, + transfer: str | None = None, + ) -> AbstractContextManager[RepoExportContext]: + # Docstring inherited. + raise NotImplementedError() + + def import_( + self, + *, + directory: ResourcePathExpression | None = None, + filename: ResourcePathExpression | TextIO | None = None, + format: str | None = None, + transfer: str | None = None, + skip_dimensions: set | None = None, + ) -> None: + # Docstring inherited. + raise NotImplementedError() + + def transfer_from( + self, + source_butler: LimitedButler, + source_refs: Iterable[DatasetRef], + transfer: str = "auto", + skip_missing: bool = True, + register_dataset_types: bool = False, + transfer_dimensions: bool = False, + ) -> Collection[DatasetRef]: + # Docstring inherited. + raise NotImplementedError() + + def validateConfiguration( + self, + logFailures: bool = False, + datasetTypeNames: Iterable[str] | None = None, + ignore: Iterable[str] | None = None, + ) -> None: + # Docstring inherited. + raise NotImplementedError() + + @property + def collections(self) -> Sequence[str]: + # Docstring inherited. + raise NotImplementedError() + + @property + def run(self) -> str | None: + # Docstring inherited. + raise NotImplementedError() + + @property + def registry(self) -> Registry: + # Docstring inherited. + raise NotImplementedError() + + def pruneDatasets( + self, + refs: Iterable[DatasetRef], + *, + disassociate: bool = True, + unstore: bool = False, + tags: Iterable[str] = (), + purge: bool = False, + ) -> None: + # Docstring inherited. + raise NotImplementedError() diff --git a/tests/test_remote_butler.py b/tests/test_remote_butler.py new file mode 100644 index 0000000000..836c56fd2d --- /dev/null +++ b/tests/test_remote_butler.py @@ -0,0 +1,60 @@ +# This file is part of daf_butler. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (http://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This software is dual licensed under the GNU General Public License and also +# under a 3-clause BSD license. Recipients may choose which of these licenses +# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, +# respectively. If you choose the GPL option then the following text applies +# (but note that there is still no warranty even if you opt for BSD instead): +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import unittest + +from lsst.daf.butler import Butler +from pydantic import ValidationError + +try: + from lsst.daf.butler.remote_butler import RemoteButler +except ImportError: + # httpx is not available in rubin-env yet, so skip these tests if it's not + # available + RemoteButler = None + + +@unittest.skipIf(RemoteButler is None, "httpx is not installed") +class RemoteButlerConfigTests(unittest.TestCase): + """Test construction of RemoteButler via Butler()""" + + def test_instantiate_via_butler(self): + butler = Butler( + { + "cls": "lsst.daf.butler.remote_butler.RemoteButler", + "remote_butler": {"url": "https://validurl.example"}, + } + ) + assert isinstance(butler, RemoteButler) + + def test_bad_config(self): + with self.assertRaises(ValidationError): + Butler({"cls": "lsst.daf.butler.remote_butler.RemoteButler", "remote_butler": {"url": "!"}}) + + +if __name__ == "__main__": + unittest.main() From 9b608ce9e3c8cb444dc81e07dad5ff5290f011c4 Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Tue, 24 Oct 2023 09:49:30 -0700 Subject: [PATCH 2/8] Move part of RemoteRegistry to RemoteButler Extract dimensions and some httpx setup logic from RemoteRegistry and move them to RemoteButler. Delete RemoteRegistry and tests associated with it. --- .../butler/remote_butler/_remote_butler.py | 49 +++++++++- tests/test_server.py | 94 ++++--------------- 2 files changed, 65 insertions(+), 78 deletions(-) diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py index 30c16a4403..777db8d3b9 100644 --- a/python/lsst/daf/butler/remote_butler/_remote_butler.py +++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py @@ -29,7 +29,10 @@ from contextlib import AbstractContextManager from typing import Any, TextIO +import httpx +from lsst.daf.butler import __version__ from lsst.resources import ResourcePath, ResourcePathExpression +from lsst.utils.introspection import get_full_type_name from .._butler import Butler from .._butler_config import ButlerConfig @@ -42,7 +45,7 @@ from .._limited_butler import LimitedButler from .._storage_class import StorageClass from ..datastore import DatasetRefURIs -from ..dimensions import DataId, DimensionUniverse +from ..dimensions import DataId, DimensionConfig, DimensionUniverse from ..registry import Registry from ..transfers import RepoExportContext from ._config import RemoteButlerConfigModel @@ -51,12 +54,25 @@ class RemoteButler(Butler): def __init__( self, + # These parameters are inherited from the Butler() constructor config: Config | ResourcePathExpression | None = None, + *, searchPaths: Sequence[ResourcePathExpression] | None = None, + # Parameters unique to RemoteButler + http_client: httpx.Client | None = None, **kwargs: Any, ): butler_config = ButlerConfig(config, searchPaths, without_datastore=True) self._config = RemoteButlerConfigModel.model_validate(butler_config) + self._dimensions: DimensionUniverse | None = None + + if http_client is not None: + # We have injected a client explicitly in to the class. + # This is generally done for testing. + self._client = http_client + else: + headers = {"user-agent": f"{get_full_type_name(self)}/{__version__}"} + self._client = httpx.Client(headers=headers, base_url=str(self._config.remote_butler.url)) def isWriteable(self) -> bool: # Docstring inherited. @@ -64,6 +80,18 @@ def isWriteable(self) -> bool: @property def dimensions(self) -> DimensionUniverse: + # Docstring inherited. + if self._dimensions is not None: + return self._dimensions + + response = self._client.get(self._get_url("universe")) + response.raise_for_status() + + config = DimensionConfig.fromString(response.text, format="json") + self._dimensions = DimensionUniverse(config) + return self._dimensions + + def getDatasetType(self, name: str) -> DatasetType: # Docstring inherited. raise NotImplementedError() @@ -261,3 +289,22 @@ def pruneDatasets( ) -> None: # Docstring inherited. raise NotImplementedError() + + def _get_url(self, path: str, version: str = "v1") -> str: + """Form the complete path to an endpoint on the server + + Parameters + ---------- + path : `str` + The relative path to the server endpoint. Should not include the + "/butler" prefix. + version : `str`, optional + Version string to prepend to path. Defaults to "v1". + + Returns + ------- + path : `str` + The full path to the endpoint + """ + prefix = "butler" + return f"{prefix}/{version}/{path}" diff --git a/tests/test_server.py b/tests/test_server.py index a52ddf1b66..724db51144 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -32,19 +32,31 @@ # Failing to import any of these should disable the tests. import lsst.daf.butler.server from fastapi.testclient import TestClient + from lsst.daf.butler.remote_butler import RemoteButler from lsst.daf.butler.server import app except ImportError: TestClient = None app = None -from lsst.daf.butler import Butler, CollectionType, Config, DataCoordinate, DatasetRef -from lsst.daf.butler.tests import addDatasetType +from lsst.daf.butler import CollectionType from lsst.daf.butler.tests.utils import MetricTestRepo, makeTestTempDir, removeTestTempDir TESTDIR = os.path.abspath(os.path.dirname(__file__)) -@unittest.skip("Test does not work after RemoteRegistry removal, to be fixed later.") +def _make_remote_butler(http_client): + return RemoteButler( + config={ + "remote_butler": { + # This URL is ignored because we override the HTTP client, but + # must be valid to satisfy the config validation + "url": "https://test.example" + } + }, + http_client=http_client, + ) + + @unittest.skipIf(TestClient is None or app is None, "FastAPI not installed.") class ButlerClientServerTestCase(unittest.TestCase): """Test for Butler client/server.""" @@ -65,16 +77,7 @@ def setUpClass(cls): lsst.daf.butler.server.BUTLER_ROOT = cls.root cls.client = TestClient(app) - # Create a client butler. We need to modify the contents of the - # server configuration to reflect the use of the test client. - response = cls.client.get("/butler/butler.json") - config = Config(response.json()) - config["registry", "db"] = cls.client - - # Since there is no client datastore we also need to specify - # the datastore root. - config["datastore", "root"] = cls.root - cls.butler = Butler(config) + cls.butler = _make_remote_butler(cls.client) @classmethod def tearDownClass(cls): @@ -93,73 +96,10 @@ def test_simple(self): self.assertEqual(response.status_code, 200) self.assertIn("namespace", response.json()) - def test_registry(self): + def test_remote_butler(self): universe = self.butler.dimensions self.assertEqual(universe.namespace, "daf_butler") - dataset_type = self.butler.registry.getDatasetType("test_metric_comp") - self.assertEqual(dataset_type.name, "test_metric_comp") - - dataset_types = list(self.butler.registry.queryDatasetTypes(...)) - self.assertIn("test_metric_comp", [ds.name for ds in dataset_types]) - dataset_types = list(self.butler.registry.queryDatasetTypes("test_*")) - self.assertEqual(len(dataset_types), 1) - - collections = self.butler.registry.queryCollections( - ..., collectionTypes={CollectionType.RUN, CollectionType.TAGGED} - ) - self.assertEqual(len(collections), 2, collections) - - collection_type = self.butler.registry.getCollectionType("ingest") - self.assertEqual(collection_type.name, "TAGGED") - - chain = self.butler.registry.getCollectionChain("chain") - self.assertEqual(list(chain), ["ingest"]) - - datasets = list(self.butler.registry.queryDatasets("test_metric_comp", collections=...)) - self.assertEqual(len(datasets), 2) - - ref = self.butler.registry.getDataset(datasets[0].id) - self.assertEqual(ref, datasets[0]) - - locations = self.butler.registry.getDatasetLocations(ref) - self.assertEqual(locations[0], "FileDatastore@") - - fake_ref = DatasetRef( - dataset_type, - dataId={"instrument": "DummyCamComp", "physical_filter": "d-r", "visit": 424}, - run="missing", - ) - locations = self.butler.registry.getDatasetLocations(fake_ref) - self.assertEqual(locations, []) - - dataIds = list(self.butler.registry.queryDataIds("visit", dataId={"instrument": "DummyCamComp"})) - self.assertEqual(len(dataIds), 2) - - # Create a DataCoordinate to test the alternate path for specifying - # a data ID. - data_id = DataCoordinate.standardize({"instrument": "DummyCamComp"}, universe=self.butler.dimensions) - records = list(self.butler.registry.queryDimensionRecords("physical_filter", dataId=data_id)) - self.assertEqual(len(records), 1) - - def test_experimental(self): - """Experimental interfaces.""" - # Got URI testing we can not yet support disassembly so must - # add a dataset with a different dataset type. - datasetType = addDatasetType( - self.repo.butler, "metric", {"instrument", "visit"}, "StructuredCompositeReadCompNoDisassembly" - ) - - self.repo.addDataset({"instrument": "DummyCamComp", "visit": 424}, datasetType=datasetType) - self.butler.registry.refresh() - - # Need a DatasetRef. - datasets = list(self.butler.registry.queryDatasets("metric", collections=...)) - - response = self.client.get(f"/butler/v1/uri/{datasets[0].id}") - self.assertEqual(response.status_code, 200) - self.assertIn("file://", response.json()) - if __name__ == "__main__": unittest.main() From b6b297504eb2021143596f059efbd25786cfe59e Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Tue, 24 Oct 2023 10:14:23 -0700 Subject: [PATCH 3/8] Remove unused server code --- python/lsst/daf/butler/server.py | 375 +----------------------- python/lsst/daf/butler/server_models.py | 284 ------------------ tests/test_server.py | 4 - 3 files changed, 2 insertions(+), 661 deletions(-) diff --git a/python/lsst/daf/butler/server.py b/python/lsst/daf/butler/server.py index 1839838954..8170612085 100644 --- a/python/lsst/daf/butler/server.py +++ b/python/lsst/daf/butler/server.py @@ -30,49 +30,16 @@ __all__ = () import logging -from collections.abc import Mapping -from enum import Enum, auto from typing import Any -from fastapi import Depends, FastAPI, HTTPException, Query +from fastapi import Depends, FastAPI from fastapi.middleware.gzip import GZipMiddleware -from lsst.daf.butler import ( - Butler, - Config, - DataCoordinate, - DatasetId, - DatasetRef, - SerializedDataCoordinate, - SerializedDatasetRef, - SerializedDatasetType, - SerializedDimensionRecord, -) -from lsst.daf.butler.registry import CollectionType -from lsst.daf.butler.server_models import ( - ExpressionQueryParameter, - QueryDataIdsModel, - QueryDatasetsModel, - QueryDimensionRecordsModel, -) +from lsst.daf.butler import Butler BUTLER_ROOT = "ci_hsc_gen3/DATA" log = logging.getLogger("excalibur") - -class CollectionTypeNames(str, Enum): - """Collection type names supported by the interface.""" - - def _generate_next_value_(name, start, count, last_values) -> str: # type: ignore # noqa: N805 - # Use the name directly as the value - return name - - RUN = auto() - CALIBRATION = auto() - CHAINED = auto() - TAGGED = auto() - - app = FastAPI() app.add_middleware(GZipMiddleware, minimum_size=1000) @@ -101,351 +68,13 @@ def butler_readwrite_dependency() -> Butler: return Butler.from_config(butler=GLOBAL_READWRITE_BUTLER) -def unpack_dataId(butler: Butler, data_id: SerializedDataCoordinate | None) -> DataCoordinate | None: - """Convert the serialized dataId back to full DataCoordinate. - - Parameters - ---------- - butler : `lsst.daf.butler.Butler` - The butler to use for registry and universe. - data_id : `SerializedDataCoordinate` or `None` - The serialized form. - - Returns - ------- - dataId : `DataCoordinate` or `None` - The DataId usable by registry. - """ - if data_id is None: - return None - return DataCoordinate.from_simple(data_id, registry=butler.registry) - - @app.get("/butler/") def read_root() -> str: """Return message when accessing the root URL.""" return "Welcome to Excalibur... aka your Butler Server" -@app.get("/butler/butler.json", response_model=dict[str, Any]) -def read_server_config() -> Mapping: - """Return the butler configuration that the client should use.""" - config_str = f""" -datastore: - root: {BUTLER_ROOT} -registry: - cls: lsst.daf.butler.registries.remote.RemoteRegistry - db: -""" - config = Config.fromString(config_str, format="yaml") - return config.toDict() - - @app.get("/butler/v1/universe", response_model=dict[str, Any]) def get_dimension_universe(butler: Butler = Depends(butler_readonly_dependency)) -> dict[str, Any]: """Allow remote client to get dimensions definition.""" return butler.dimensions.dimensionConfig.toDict() - - -@app.get("/butler/v1/uri/{id}", response_model=str) -def get_uri(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> str: - """Return a single URI of non-disassembled dataset.""" - ref = butler.registry.getDataset(id) - if not ref: - raise HTTPException(status_code=404, detail=f"Dataset with id {id} does not exist.") - - uri = butler.getURI(ref) - - # In reality would have to convert this to a signed URL - return str(uri) - - -@app.put("/butler/v1/registry/refresh") -def refresh(butler: Butler = Depends(butler_readonly_dependency)) -> None: - """Refresh the registry cache.""" - # Unclear whether this should exist. Which butler is really being - # refreshed? How do we know the server we are refreshing is used later? - # For testing at the moment it is important if a test adds a dataset type - # directly in the server since the test client will not see it. - butler.registry.refresh() - - -@app.get( - "/butler/v1/registry/datasetType/{datasetTypeName}", - summary="Retrieve this dataset type definition.", - response_model=SerializedDatasetType, - response_model_exclude_unset=True, - response_model_exclude_defaults=True, - response_model_exclude_none=True, -) -def get_dataset_type( - datasetTypeName: str, butler: Butler = Depends(butler_readonly_dependency) -) -> SerializedDatasetType: - """Return the dataset type.""" - datasetType = butler.registry.getDatasetType(datasetTypeName) - return datasetType.to_simple() - - -@app.get( - "/butler/v1/registry/datasetTypes", - summary="Retrieve all dataset type definitions.", - response_model=list[SerializedDatasetType], - response_model_exclude_unset=True, - response_model_exclude_defaults=True, - response_model_exclude_none=True, -) -def query_all_dataset_types( - components: bool | None = Query(None), butler: Butler = Depends(butler_readonly_dependency) -) -> list[SerializedDatasetType]: - """Return all dataset types.""" - datasetTypes = butler.registry.queryDatasetTypes(..., components=components) - return [d.to_simple() for d in datasetTypes] - - -@app.get( - "/butler/v1/registry/datasetTypes/re", - summary="Retrieve dataset type definitions matching expressions", - response_model=list[SerializedDatasetType], - response_model_exclude_unset=True, - response_model_exclude_defaults=True, - response_model_exclude_none=True, -) -def query_dataset_types_re( - regex: list[str] | None = Query(None), - glob: list[str] | None = Query(None), - components: bool | None = Query(None), - butler: Butler = Depends(butler_readonly_dependency), -) -> list[SerializedDatasetType]: - """Return all dataset types matching a regular expression.""" - expression_params = ExpressionQueryParameter(regex=regex, glob=glob) - - datasetTypes = butler.registry.queryDatasetTypes(expression_params.expression(), components=components) - return [d.to_simple() for d in datasetTypes] - - -@app.get("/butler/v1/registry/collection/chain/{parent:path}", response_model=list[str]) -def get_collection_chain(parent: str, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]: - """Return the collection chain members.""" - chain = butler.registry.getCollectionChain(parent) - return list(chain) - - -@app.get("/butler/v1/registry/collections", response_model=list[str]) -def query_collections( - regex: list[str] | None = Query(None), - glob: list[str] | None = Query(None), - datasetType: str | None = Query(None), - flattenChains: bool = Query(False), - collectionType: list[CollectionTypeNames] | None = Query(None), - includeChains: bool | None = Query(None), - butler: Butler = Depends(butler_readonly_dependency), -) -> list[str]: - """Return collections matching query.""" - expression_params = ExpressionQueryParameter(regex=regex, glob=glob) - collectionTypes = CollectionType.from_names(collectionType) - dataset_type = butler.registry.getDatasetType(datasetType) if datasetType else None - - collections = butler.registry.queryCollections( - expression=expression_params.expression(), - datasetType=dataset_type, - collectionTypes=collectionTypes, - flattenChains=flattenChains, - includeChains=includeChains, - ) - return list(collections) - - -@app.get("/butler/v1/registry/collection/type/{name:path}", response_model=str) -def get_collection_type(name: str, butler: Butler = Depends(butler_readonly_dependency)) -> str: - """Return type for named collection.""" - collectionType = butler.registry.getCollectionType(name) - return collectionType.name - - -@app.put("/butler/v1/registry/collection/{name:path}/{type_}", response_model=str) -def register_collection( - name: str, - collectionTypeName: CollectionTypeNames, - doc: str | None = Query(None), - butler: Butler = Depends(butler_readwrite_dependency), -) -> str: - """Register a collection.""" - collectionType = CollectionType.from_name(collectionTypeName) - butler.registry.registerCollection(name, collectionType, doc) - - # Need to refresh the global read only butler otherwise other clients - # may not see this change. - if GLOBAL_READONLY_BUTLER is not None: # for mypy - GLOBAL_READONLY_BUTLER.registry.refresh() - - return name - - -@app.get( - "/butler/v1/registry/dataset/{id}", - summary="Retrieve this dataset definition.", - response_model=SerializedDatasetRef | None, - response_model_exclude_unset=True, - response_model_exclude_defaults=True, - response_model_exclude_none=True, -) -def get_dataset( - id: DatasetId, butler: Butler = Depends(butler_readonly_dependency) -) -> SerializedDatasetRef | None: - """Return a single dataset reference.""" - ref = butler.registry.getDataset(id) - if ref is not None: - return ref.to_simple() - # This could raise a 404 since id is not found. The standard regsitry - # getDataset method returns without error so follow that example here. - return ref - - -@app.get("/butler/v1/registry/datasetLocations/{id}", response_model=list[str]) -def get_dataset_locations(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]: - """Return locations of datasets.""" - # Takes an ID so need to convert to a real DatasetRef - fake_ref = SerializedDatasetRef(id=id) - - try: - # Converting this to a real DatasetRef takes time and is not - # needed internally since only the ID is used. - ref = DatasetRef.from_simple(fake_ref, registry=butler.registry) - except Exception: - # SQL getDatasetLocations looks at ID in datastore and does not - # check it is in registry. Follow that example and return without - # error. - return [] - - return list(butler.registry.getDatasetLocations(ref)) - - -# TimeSpan not yet a pydantic model -@app.post( - "/butler/v1/registry/findDataset/{datasetType}", - summary="Retrieve this dataset definition from collection, dataset type, and dataId", - response_model=SerializedDatasetRef, - response_model_exclude_unset=True, - response_model_exclude_defaults=True, - response_model_exclude_none=True, -) -def find_dataset( - datasetType: str, - dataId: SerializedDataCoordinate | None = None, - collections: list[str] | None = Query(None), - butler: Butler = Depends(butler_readonly_dependency), -) -> SerializedDatasetRef | None: - """Return a single dataset reference matching query.""" - collection_query = collections if collections else None - - ref = butler.registry.findDataset( - datasetType, dataId=unpack_dataId(butler, dataId), collections=collection_query - ) - return ref.to_simple() if ref else None - - -# POST is used for the complex dict data structures -@app.post( - "/butler/v1/registry/datasets", - summary="Query all dataset holdings.", - response_model=list[SerializedDatasetRef], - response_model_exclude_unset=True, - response_model_exclude_defaults=True, - response_model_exclude_none=True, -) -def query_datasets( - query: QueryDatasetsModel, butler: Butler = Depends(butler_readonly_dependency) -) -> list[SerializedDatasetRef]: - """Return datasets matching query.""" - # This method might return a lot of results - - if query.collections: - collections = query.collections.expression() - else: - collections = None - - datasets = butler.registry.queryDatasets( - query.datasetType.expression(), - collections=collections, - dimensions=query.dimensions, - dataId=unpack_dataId(butler, query.dataId), - where=query.where, - findFirst=query.findFirst, - components=query.components, - bind=query.bind, - check=query.check, - **query.kwargs(), - ) - return [ref.to_simple() for ref in datasets] - - -# POST is used for the complex dict data structures -@app.post( - "/butler/v1/registry/dataIds", - summary="Query all data IDs.", - response_model=list[SerializedDataCoordinate], - response_model_exclude_unset=True, - response_model_exclude_defaults=True, - response_model_exclude_none=True, -) -def query_data_ids( - query: QueryDataIdsModel, butler: Butler = Depends(butler_readonly_dependency) -) -> list[SerializedDataCoordinate]: - """Return data IDs matching query.""" - if query.datasets: - datasets = query.datasets.expression() - else: - datasets = None - if query.collections: - collections = query.collections.expression() - else: - collections = None - - dataIds = butler.registry.queryDataIds( - query.dimensions, - collections=collections, - datasets=datasets, - dataId=unpack_dataId(butler, query.dataId), - where=query.where, - components=query.components, - bind=query.bind, - check=query.check, - **query.kwargs(), - ) - return [coord.to_simple() for coord in dataIds] - - -# Uses POST to handle the DataId -@app.post( - "/butler/v1/registry/dimensionRecords/{element}", - summary="Retrieve dimension records matching query", - response_model=list[SerializedDimensionRecord], - response_model_exclude_unset=True, - response_model_exclude_defaults=True, - response_model_exclude_none=True, -) -def query_dimension_records( - element: str, query: QueryDimensionRecordsModel, butler: Butler = Depends(butler_readonly_dependency) -) -> list[SerializedDimensionRecord]: - """Return dimension records matching query.""" - if query.datasets: - datasets = query.datasets.expression() - else: - datasets = None - if query.collections: - collections = query.collections.expression() - else: - collections = None - - records = butler.registry.queryDimensionRecords( - element, - dataId=unpack_dataId(butler, query.dataId), - collections=collections, - where=query.where, - datasets=datasets, - components=query.components, - bind=query.bind, - check=query.check, - **query.kwargs(), - ) - return [r.to_simple() for r in records] diff --git a/python/lsst/daf/butler/server_models.py b/python/lsst/daf/butler/server_models.py index 4cb4c5e929..1c34747e33 100644 --- a/python/lsst/daf/butler/server_models.py +++ b/python/lsst/daf/butler/server_models.py @@ -26,287 +26,3 @@ # along with this program. If not, see . """Models used for client/server communication.""" - -__all__ = ( - "QueryDatasetsModel", - "QueryDataIdsModel", - "QueryDimensionRecordsModel", - "ExpressionQueryParameter", - "DatasetsQueryParameter", -) - -import re -from collections.abc import Mapping -from typing import Any, ClassVar - -import pydantic -from lsst.utils.iteration import ensure_iterable -from pydantic import Field - -from ._compat import PYDANTIC_V2, _BaseModelCompat -from .dimensions import DataIdValue, SerializedDataCoordinate -from .utils import globToRegex - -# Simple scalar python types. -ScalarType = int | bool | float | str - -# Bind parameters can have any scalar type. -BindType = dict[str, ScalarType] - -# For serialization purposes a data ID key must be a str. -SimpleDataId = Mapping[str, DataIdValue] - - -# While supporting pydantic v1 and v2 keep this outside the model. -_expression_query_schema_extra = { - "examples": [ - { - "regex": ["^cal.*"], - "glob": ["cal*", "raw"], - } - ] -} - - -class ExpressionQueryParameter(_BaseModelCompat): - """Represents a specification for an expression query. - - Generally used for collection or dataset type expressions. This - implementation returns ``...`` by default. - """ - - _allow_ellipsis: ClassVar[bool] = True - """Control whether expression can match everything.""" - - regex: list[str] | None = Field( - None, - title="List of regular expression strings.", - examples=["^cal.*"], - ) - - glob: list[str] | None = Field( - None, - title="List of globs or explicit strings to use in expression.", - examples=["cal*"], - ) - - if PYDANTIC_V2: - model_config = { - "json_schema_extra": _expression_query_schema_extra, # type: ignore[typeddict-item] - } - else: - - class Config: - """Local configuration overrides for model.""" - - schema_extra = _expression_query_schema_extra - - def expression(self) -> Any: - """Combine regex and glob lists into single expression.""" - if self.glob is None and self.regex is None: - if self._allow_ellipsis: - return ... - # Rather than matching all, interpret this as no expression - # at all. - return None - - expression: list[str | re.Pattern] = [] - if self.regex is not None: - for r in self.regex: - expression.append(re.compile(r)) - if self.glob is not None: - regexes = globToRegex(self.glob) - if isinstance(regexes, list): - expression.extend(regexes) - else: - if self._allow_ellipsis: - return ... - raise ValueError("Expression matches everything but that is not allowed.") - return expression - - @classmethod - def from_expression(cls, expression: Any) -> "ExpressionQueryParameter": - """Convert a standard dataset type expression to wire form.""" - if expression is ...: - return cls() - - expressions = ensure_iterable(expression) - params: dict[str, list[str]] = {"glob": [], "regex": []} - for expression in expressions: - if expression is ...: - # This matches everything - return cls() - - if isinstance(expression, re.Pattern): - params["regex"].append(expression.pattern) - elif isinstance(expression, str): - params["glob"].append(expression) - elif hasattr(expression, "name"): - params["glob"].append(expression.name) - else: - raise ValueError(f"Unrecognized type given to expression: {expression!r}") - - # Clean out empty dicts. - for k in list(params): - if not params[k]: - del params[k] - - return cls(**params) - - -class DatasetsQueryParameter(ExpressionQueryParameter): - """Represents a specification for a dataset expression query. - - This differs from the standard expression query in that an empty - expression will return `None` rather than ``...``. - """ - - _allow_ellipsis: ClassVar[bool] = False - - -# Shared field definitions -Where = Field( - "", - title="String expression similar to a SQL WHERE clause.", - examples=["detector = 5 AND instrument = 'HSC'"], -) -Collections = Field( - None, - title="An expression that identifies the collections to search.", -) -Datasets = Field( - None, - title="An expression that identifies dataset types to search (must not match all datasets).", -) -OptionalDimensions = Field( - None, - title="Relevant dimensions to include.", - examples=["detector", "physical_filter"], -) -Dimensions = Field( - ..., - title="Relevant dimensions to include.", - examples=["detector", "physical_filter"], -) -DataId = Field( - None, - title="Data ID to constrain the query.", -) -FindFirst = Field( - False, - title="Control whether only first matching dataset ref or type is returned.", -) -Components = Field( - None, - title="Control how expressions apply to components.", -) -Bind = Field( - None, - title="Mapping to use to inject values into the WHERE parameter clause.", -) -Check = Field( - True, - title="Control whether to check the query for consistency.", -) -KeywordArgs = Field( - None, - title="Additional parameters to use when standardizing the supplied data ID.", -) - - -class QueryBaseModel(_BaseModelCompat): - """Base model for all query models.""" - - if PYDANTIC_V2: - - @pydantic.field_validator("keyword_args", check_fields=False) # type: ignore[attr-defined] - @classmethod - def _check_keyword_args(cls, v: SimpleDataId) -> SimpleDataId | None: - """Convert kwargs into None if empty. - - This retains the property at its default value and can therefore - remove it from serialization. - - The validator will be ignored if the subclass does not have this - property in its model. - """ - if not v: - return None - return v - - else: - - @pydantic.validator("keyword_args", check_fields=False) - def _check_keyword_args(cls, v, values) -> SimpleDataId | None: # type: ignore # noqa: N805 - """Convert kwargs into None if empty. - - This retains the property at its default value and can therefore - remove it from serialization. - - The validator will be ignored if the subclass does not have this - property in its model. - """ - if not v: - return None - return v - - def kwargs(self) -> SimpleDataId: - """Return keyword args, converting None to a `dict`. - - Returns - ------- - **kwargs - The keword arguments stored in the model. `None` is converted - to an empty dict. Returns empty dict if the ``keyword_args`` - property is not defined. - """ - try: - # mypy does not know about the except - kwargs = self.keyword_args # type: ignore - except AttributeError: - kwargs = {} - if kwargs is None: - return {} - return kwargs - - -class QueryDatasetsModel(QueryBaseModel): - """Information needed for a registry dataset query.""" - - datasetType: ExpressionQueryParameter = Field(..., title="Dataset types to query. Can match all.") - collections: ExpressionQueryParameter | None = Collections - dimensions: list[str] | None = OptionalDimensions - dataId: SerializedDataCoordinate | None = DataId - where: str = Where - findFirst: bool = FindFirst - components: bool | None = Components - bind: BindType | None = Bind - check: bool = Check - keyword_args: SimpleDataId | None = KeywordArgs # mypy refuses to allow kwargs in model - - -class QueryDataIdsModel(QueryBaseModel): - """Information needed to query data IDs.""" - - dimensions: list[str] = Dimensions - dataId: SerializedDataCoordinate | None = DataId - datasets: DatasetsQueryParameter | None = Datasets - collections: ExpressionQueryParameter | None = Collections - where: str = Where - components: bool | None = Components - bind: BindType | None = Bind - check: bool = Check - keyword_args: SimpleDataId | None = KeywordArgs # mypy refuses to allow kwargs in model - - -class QueryDimensionRecordsModel(QueryBaseModel): - """Information needed to query the dimension records.""" - - dataId: SerializedDataCoordinate | None = DataId - datasets: DatasetsQueryParameter | None = Datasets - collections: ExpressionQueryParameter | None = Collections - where: str = Where - components: bool | None = Components - bind: SimpleDataId | None = Bind - check: bool = Check - keyword_args: SimpleDataId | None = KeywordArgs # mypy refuses to allow kwargs in model diff --git a/tests/test_server.py b/tests/test_server.py index 724db51144..de0bc682dd 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -88,10 +88,6 @@ def test_simple(self): self.assertEqual(response.status_code, 200) self.assertIn("Butler Server", response.json()) - response = self.client.get("/butler/butler.json") - self.assertEqual(response.status_code, 200) - self.assertIn("registry", response.json()) - response = self.client.get("/butler/v1/universe") self.assertEqual(response.status_code, 200) self.assertIn("namespace", response.json()) From 81c29c539c33fdc7ccb9180657b4dc8cdd172d1e Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Tue, 24 Oct 2023 13:13:15 -0700 Subject: [PATCH 4/8] Move server code into a subpackage --- .../butler/remote_butler/server/__init__.py | 28 +++++++++++++++++++ .../server/_server.py} | 0 .../server/_server_models.py} | 0 tests/test_server.py | 7 +++-- 4 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 python/lsst/daf/butler/remote_butler/server/__init__.py rename python/lsst/daf/butler/{server.py => remote_butler/server/_server.py} (100%) rename python/lsst/daf/butler/{server_models.py => remote_butler/server/_server_models.py} (100%) diff --git a/python/lsst/daf/butler/remote_butler/server/__init__.py b/python/lsst/daf/butler/remote_butler/server/__init__.py new file mode 100644 index 0000000000..0377dc8a5d --- /dev/null +++ b/python/lsst/daf/butler/remote_butler/server/__init__.py @@ -0,0 +1,28 @@ +# This file is part of daf_butler. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (http://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This software is dual licensed under the GNU General Public License and also +# under a 3-clause BSD license. Recipients may choose which of these licenses +# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, +# respectively. If you choose the GPL option then the following text applies +# (but note that there is still no warranty even if you opt for BSD instead): +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from ._server import app diff --git a/python/lsst/daf/butler/server.py b/python/lsst/daf/butler/remote_butler/server/_server.py similarity index 100% rename from python/lsst/daf/butler/server.py rename to python/lsst/daf/butler/remote_butler/server/_server.py diff --git a/python/lsst/daf/butler/server_models.py b/python/lsst/daf/butler/remote_butler/server/_server_models.py similarity index 100% rename from python/lsst/daf/butler/server_models.py rename to python/lsst/daf/butler/remote_butler/server/_server_models.py diff --git a/tests/test_server.py b/tests/test_server.py index de0bc682dd..f18a6a81c8 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -29,11 +29,12 @@ import unittest try: + import lsst.daf.butler.remote_butler.server._server + # Failing to import any of these should disable the tests. - import lsst.daf.butler.server from fastapi.testclient import TestClient from lsst.daf.butler.remote_butler import RemoteButler - from lsst.daf.butler.server import app + from lsst.daf.butler.remote_butler.server._server import app except ImportError: TestClient = None app = None @@ -74,7 +75,7 @@ def setUpClass(cls): # Globally change where the server thinks its butler repository # is located. This will prevent any other server tests and is # not a long term fix. - lsst.daf.butler.server.BUTLER_ROOT = cls.root + lsst.daf.butler.remote_butler.server._server.BUTLER_ROOT = cls.root cls.client = TestClient(app) cls.butler = _make_remote_butler(cls.client) From f1fbc18b1dce8bc985cb3d679d4d954fcb79fcaa Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Tue, 24 Oct 2023 13:43:58 -0700 Subject: [PATCH 5/8] Use FastAPI dependency overrides Instead of manually setting a global variable to configure the test server's Butler, use FastAPI's dependency injection framework. --- .../butler/remote_butler/server/__init__.py | 3 +- .../butler/remote_butler/server/_factory.py | 36 +++++++++++++++++++ .../butler/remote_butler/server/_server.py | 31 ++++++---------- tests/test_server.py | 22 ++++++------ 4 files changed, 58 insertions(+), 34 deletions(-) create mode 100644 python/lsst/daf/butler/remote_butler/server/_factory.py diff --git a/python/lsst/daf/butler/remote_butler/server/__init__.py b/python/lsst/daf/butler/remote_butler/server/__init__.py index 0377dc8a5d..a3fea4f9c3 100644 --- a/python/lsst/daf/butler/remote_butler/server/__init__.py +++ b/python/lsst/daf/butler/remote_butler/server/__init__.py @@ -25,4 +25,5 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from ._server import app +from ._factory import Factory +from ._server import app, factory_dependency diff --git a/python/lsst/daf/butler/remote_butler/server/_factory.py b/python/lsst/daf/butler/remote_butler/server/_factory.py new file mode 100644 index 0000000000..da5ff1b1c9 --- /dev/null +++ b/python/lsst/daf/butler/remote_butler/server/_factory.py @@ -0,0 +1,36 @@ +# This file is part of daf_butler. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (http://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This software is dual licensed under the GNU General Public License and also +# under a 3-clause BSD license. Recipients may choose which of these licenses +# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, +# respectively. If you choose the GPL option then the following text applies +# (but note that there is still no warranty even if you opt for BSD instead): +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from lsst.daf.butler import Butler + + +class Factory: + def __init__(self, *, butler: Butler): + self._butler = butler + + def create_butler(self) -> Butler: + return self._butler diff --git a/python/lsst/daf/butler/remote_butler/server/_server.py b/python/lsst/daf/butler/remote_butler/server/_server.py index 8170612085..8f7ad4b1d7 100644 --- a/python/lsst/daf/butler/remote_butler/server/_server.py +++ b/python/lsst/daf/butler/remote_butler/server/_server.py @@ -30,12 +30,15 @@ __all__ = () import logging +from functools import cache from typing import Any from fastapi import Depends, FastAPI from fastapi.middleware.gzip import GZipMiddleware from lsst.daf.butler import Butler +from ._factory import Factory + BUTLER_ROOT = "ci_hsc_gen3/DATA" log = logging.getLogger("excalibur") @@ -44,28 +47,13 @@ app.add_middleware(GZipMiddleware, minimum_size=1000) -GLOBAL_READWRITE_BUTLER: Butler | None = None -GLOBAL_READONLY_BUTLER: Butler | None = None - - -def _make_global_butler() -> None: - global GLOBAL_READONLY_BUTLER, GLOBAL_READWRITE_BUTLER - if GLOBAL_READONLY_BUTLER is None: - GLOBAL_READONLY_BUTLER = Butler.from_config(BUTLER_ROOT, writeable=False) - if GLOBAL_READWRITE_BUTLER is None: - GLOBAL_READWRITE_BUTLER = Butler.from_config(BUTLER_ROOT, writeable=True) - - -def butler_readonly_dependency() -> Butler: - """Return global read-only butler.""" - _make_global_butler() - return Butler.from_config(butler=GLOBAL_READONLY_BUTLER) +@cache +def _make_global_butler() -> Butler: + return Butler.from_config(BUTLER_ROOT) -def butler_readwrite_dependency() -> Butler: - """Return read-write butler.""" - _make_global_butler() - return Butler.from_config(butler=GLOBAL_READWRITE_BUTLER) +def factory_dependency() -> Factory: + return Factory(butler=_make_global_butler()) @app.get("/butler/") @@ -75,6 +63,7 @@ def read_root() -> str: @app.get("/butler/v1/universe", response_model=dict[str, Any]) -def get_dimension_universe(butler: Butler = Depends(butler_readonly_dependency)) -> dict[str, Any]: +def get_dimension_universe(factory: Factory = Depends(factory_dependency)) -> dict[str, Any]: """Allow remote client to get dimensions definition.""" + butler = factory.create_butler() return butler.dimensions.dimensionConfig.toDict() diff --git a/tests/test_server.py b/tests/test_server.py index f18a6a81c8..f2e157cb23 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -29,17 +29,15 @@ import unittest try: - import lsst.daf.butler.remote_butler.server._server - # Failing to import any of these should disable the tests. from fastapi.testclient import TestClient from lsst.daf.butler.remote_butler import RemoteButler - from lsst.daf.butler.remote_butler.server._server import app + from lsst.daf.butler.remote_butler.server import Factory, app, factory_dependency except ImportError: TestClient = None app = None -from lsst.daf.butler import CollectionType +from lsst.daf.butler import Butler from lsst.daf.butler.tests.utils import MetricTestRepo, makeTestTempDir, removeTestTempDir TESTDIR = os.path.abspath(os.path.dirname(__file__)) @@ -67,21 +65,21 @@ def setUpClass(cls): # First create a butler and populate it. cls.root = makeTestTempDir(TESTDIR) cls.repo = MetricTestRepo(root=cls.root, configFile=os.path.join(TESTDIR, "config/basic/butler.yaml")) + # Override the server's Butler initialization to point at our test repo + server_butler = Butler.from_config(cls.root) - # Add a collection chain. - cls.repo.butler.registry.registerCollection("chain", CollectionType.CHAINED) - cls.repo.butler.registry.setCollectionChain("chain", ["ingest"]) + def create_factory_dependency(): + return Factory(butler=server_butler) - # Globally change where the server thinks its butler repository - # is located. This will prevent any other server tests and is - # not a long term fix. - lsst.daf.butler.remote_butler.server._server.BUTLER_ROOT = cls.root - cls.client = TestClient(app) + app.dependency_overrides[factory_dependency] = create_factory_dependency + # Set up the RemoteButler that will connect to the server + cls.client = TestClient(app) cls.butler = _make_remote_butler(cls.client) @classmethod def tearDownClass(cls): + del app.dependency_overrides[factory_dependency] removeTestTempDir(cls.root) def test_simple(self): From 3ddba4846a8658870cf4c1491c429131260d837d Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Wed, 25 Oct 2023 10:14:39 -0700 Subject: [PATCH 6/8] Remove Excalibur branding --- python/lsst/daf/butler/remote_butler/server/_server.py | 8 +------- tests/test_server.py | 4 ---- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/python/lsst/daf/butler/remote_butler/server/_server.py b/python/lsst/daf/butler/remote_butler/server/_server.py index 8f7ad4b1d7..c27b394c3d 100644 --- a/python/lsst/daf/butler/remote_butler/server/_server.py +++ b/python/lsst/daf/butler/remote_butler/server/_server.py @@ -41,7 +41,7 @@ BUTLER_ROOT = "ci_hsc_gen3/DATA" -log = logging.getLogger("excalibur") +log = logging.getLogger(__name__) app = FastAPI() app.add_middleware(GZipMiddleware, minimum_size=1000) @@ -56,12 +56,6 @@ def factory_dependency() -> Factory: return Factory(butler=_make_global_butler()) -@app.get("/butler/") -def read_root() -> str: - """Return message when accessing the root URL.""" - return "Welcome to Excalibur... aka your Butler Server" - - @app.get("/butler/v1/universe", response_model=dict[str, Any]) def get_dimension_universe(factory: Factory = Depends(factory_dependency)) -> dict[str, Any]: """Allow remote client to get dimensions definition.""" diff --git a/tests/test_server.py b/tests/test_server.py index f2e157cb23..401e0126dd 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -83,10 +83,6 @@ def tearDownClass(cls): removeTestTempDir(cls.root) def test_simple(self): - response = self.client.get("/butler/") - self.assertEqual(response.status_code, 200) - self.assertIn("Butler Server", response.json()) - response = self.client.get("/butler/v1/universe") self.assertEqual(response.status_code, 200) self.assertIn("namespace", response.json()) From d3446392770f5d434c59835e9e1175d539b5fdd6 Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Wed, 25 Oct 2023 10:47:24 -0700 Subject: [PATCH 7/8] Implement collections and run in RemoteButler --- .../lsst/daf/butler/remote_butler/_remote_butler.py | 13 ++++++++++--- tests/test_remote_butler.py | 6 +++++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py index 777db8d3b9..5f21417a21 100644 --- a/python/lsst/daf/butler/remote_butler/_remote_butler.py +++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py @@ -46,7 +46,7 @@ from .._storage_class import StorageClass from ..datastore import DatasetRefURIs from ..dimensions import DataId, DimensionConfig, DimensionUniverse -from ..registry import Registry +from ..registry import Registry, RegistryDefaults from ..transfers import RepoExportContext from ._config import RemoteButlerConfigModel @@ -57,7 +57,11 @@ def __init__( # These parameters are inherited from the Butler() constructor config: Config | ResourcePathExpression | None = None, *, + collections: Any = None, + run: str | None = None, searchPaths: Sequence[ResourcePathExpression] | None = None, + writeable: bool | None = None, + inferDefaults: bool = True, # Parameters unique to RemoteButler http_client: httpx.Client | None = None, **kwargs: Any, @@ -65,6 +69,9 @@ def __init__( butler_config = ButlerConfig(config, searchPaths, without_datastore=True) self._config = RemoteButlerConfigModel.model_validate(butler_config) self._dimensions: DimensionUniverse | None = None + # TODO: RegistryDefaults should have finish() called on it, but this + # requires getCollectionSummary() which is not yet implemented + self._registry_defaults = RegistryDefaults(collections, run, inferDefaults, **kwargs) if http_client is not None: # We have injected a client explicitly in to the class. @@ -266,12 +273,12 @@ def validateConfiguration( @property def collections(self) -> Sequence[str]: # Docstring inherited. - raise NotImplementedError() + return self._registry_defaults.collections @property def run(self) -> str | None: # Docstring inherited. - raise NotImplementedError() + return self._registry_defaults.run @property def registry(self) -> Registry: diff --git a/tests/test_remote_butler.py b/tests/test_remote_butler.py index 836c56fd2d..3a671311b9 100644 --- a/tests/test_remote_butler.py +++ b/tests/test_remote_butler.py @@ -47,9 +47,13 @@ def test_instantiate_via_butler(self): { "cls": "lsst.daf.butler.remote_butler.RemoteButler", "remote_butler": {"url": "https://validurl.example"}, - } + }, + collections=["collection1", "collection2"], + run="collection2", ) assert isinstance(butler, RemoteButler) + self.assertEqual(butler.collections, ("collection1", "collection2")) + self.assertEqual(butler.run, "collection2") def test_bad_config(self): with self.assertRaises(ValidationError): From 6f6001b65a08ba9a75e1aee67014ee3d4e3bf8c1 Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Thu, 26 Oct 2023 10:52:48 -0700 Subject: [PATCH 8/8] Use __all__ instead of import by name --- python/lsst/daf/butler/remote_butler/__init__.py | 2 +- python/lsst/daf/butler/remote_butler/_remote_butler.py | 2 ++ python/lsst/daf/butler/remote_butler/server/__init__.py | 4 ++-- python/lsst/daf/butler/remote_butler/server/_factory.py | 2 ++ python/lsst/daf/butler/remote_butler/server/_server.py | 2 +- 5 files changed, 8 insertions(+), 4 deletions(-) diff --git a/python/lsst/daf/butler/remote_butler/__init__.py b/python/lsst/daf/butler/remote_butler/__init__.py index 98bb440c81..56a73ebef3 100644 --- a/python/lsst/daf/butler/remote_butler/__init__.py +++ b/python/lsst/daf/butler/remote_butler/__init__.py @@ -25,4 +25,4 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from ._remote_butler import RemoteButler +from ._remote_butler import * diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py index 5f21417a21..a9a0273618 100644 --- a/python/lsst/daf/butler/remote_butler/_remote_butler.py +++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py @@ -25,6 +25,8 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +__all__ = ("RemoteButler",) + from collections.abc import Collection, Iterable, Sequence from contextlib import AbstractContextManager from typing import Any, TextIO diff --git a/python/lsst/daf/butler/remote_butler/server/__init__.py b/python/lsst/daf/butler/remote_butler/server/__init__.py index a3fea4f9c3..d63badaf11 100644 --- a/python/lsst/daf/butler/remote_butler/server/__init__.py +++ b/python/lsst/daf/butler/remote_butler/server/__init__.py @@ -25,5 +25,5 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from ._factory import Factory -from ._server import app, factory_dependency +from ._factory import * +from ._server import * diff --git a/python/lsst/daf/butler/remote_butler/server/_factory.py b/python/lsst/daf/butler/remote_butler/server/_factory.py index da5ff1b1c9..7d57c9c246 100644 --- a/python/lsst/daf/butler/remote_butler/server/_factory.py +++ b/python/lsst/daf/butler/remote_butler/server/_factory.py @@ -27,6 +27,8 @@ from lsst.daf.butler import Butler +__all__ = ("Factory",) + class Factory: def __init__(self, *, butler: Butler): diff --git a/python/lsst/daf/butler/remote_butler/server/_server.py b/python/lsst/daf/butler/remote_butler/server/_server.py index c27b394c3d..3be9348223 100644 --- a/python/lsst/daf/butler/remote_butler/server/_server.py +++ b/python/lsst/daf/butler/remote_butler/server/_server.py @@ -27,7 +27,7 @@ from __future__ import annotations -__all__ = () +__all__ = ("app", "factory_dependency") import logging from functools import cache