From 02737b6a2577db44f314a7083bcc6d314efca68a Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Tue, 24 Oct 2023 10:14:23 -0700 Subject: [PATCH] Remove unused server code --- python/lsst/daf/butler/server.py | 375 +----------------------- python/lsst/daf/butler/server_models.py | 284 ------------------ tests/test_server.py | 4 - 3 files changed, 2 insertions(+), 661 deletions(-) diff --git a/python/lsst/daf/butler/server.py b/python/lsst/daf/butler/server.py index 1839838954..8170612085 100644 --- a/python/lsst/daf/butler/server.py +++ b/python/lsst/daf/butler/server.py @@ -30,49 +30,16 @@ __all__ = () import logging -from collections.abc import Mapping -from enum import Enum, auto from typing import Any -from fastapi import Depends, FastAPI, HTTPException, Query +from fastapi import Depends, FastAPI from fastapi.middleware.gzip import GZipMiddleware -from lsst.daf.butler import ( - Butler, - Config, - DataCoordinate, - DatasetId, - DatasetRef, - SerializedDataCoordinate, - SerializedDatasetRef, - SerializedDatasetType, - SerializedDimensionRecord, -) -from lsst.daf.butler.registry import CollectionType -from lsst.daf.butler.server_models import ( - ExpressionQueryParameter, - QueryDataIdsModel, - QueryDatasetsModel, - QueryDimensionRecordsModel, -) +from lsst.daf.butler import Butler BUTLER_ROOT = "ci_hsc_gen3/DATA" log = logging.getLogger("excalibur") - -class CollectionTypeNames(str, Enum): - """Collection type names supported by the interface.""" - - def _generate_next_value_(name, start, count, last_values) -> str: # type: ignore # noqa: N805 - # Use the name directly as the value - return name - - RUN = auto() - CALIBRATION = auto() - CHAINED = auto() - TAGGED = auto() - - app = FastAPI() app.add_middleware(GZipMiddleware, minimum_size=1000) @@ -101,351 +68,13 @@ def butler_readwrite_dependency() -> Butler: return Butler.from_config(butler=GLOBAL_READWRITE_BUTLER) -def unpack_dataId(butler: Butler, data_id: SerializedDataCoordinate | None) -> DataCoordinate | None: - """Convert the serialized dataId back to full DataCoordinate. - - Parameters - ---------- - butler : `lsst.daf.butler.Butler` - The butler to use for registry and universe. - data_id : `SerializedDataCoordinate` or `None` - The serialized form. - - Returns - ------- - dataId : `DataCoordinate` or `None` - The DataId usable by registry. - """ - if data_id is None: - return None - return DataCoordinate.from_simple(data_id, registry=butler.registry) - - @app.get("/butler/") def read_root() -> str: """Return message when accessing the root URL.""" return "Welcome to Excalibur... aka your Butler Server" -@app.get("/butler/butler.json", response_model=dict[str, Any]) -def read_server_config() -> Mapping: - """Return the butler configuration that the client should use.""" - config_str = f""" -datastore: - root: {BUTLER_ROOT} -registry: - cls: lsst.daf.butler.registries.remote.RemoteRegistry - db: -""" - config = Config.fromString(config_str, format="yaml") - return config.toDict() - - @app.get("/butler/v1/universe", response_model=dict[str, Any]) def get_dimension_universe(butler: Butler = Depends(butler_readonly_dependency)) -> dict[str, Any]: """Allow remote client to get dimensions definition.""" return butler.dimensions.dimensionConfig.toDict() - - -@app.get("/butler/v1/uri/{id}", response_model=str) -def get_uri(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> str: - """Return a single URI of non-disassembled dataset.""" - ref = butler.registry.getDataset(id) - if not ref: - raise HTTPException(status_code=404, detail=f"Dataset with id {id} does not exist.") - - uri = butler.getURI(ref) - - # In reality would have to convert this to a signed URL - return str(uri) - - -@app.put("/butler/v1/registry/refresh") -def refresh(butler: Butler = Depends(butler_readonly_dependency)) -> None: - """Refresh the registry cache.""" - # Unclear whether this should exist. Which butler is really being - # refreshed? How do we know the server we are refreshing is used later? - # For testing at the moment it is important if a test adds a dataset type - # directly in the server since the test client will not see it. - butler.registry.refresh() - - -@app.get( - "/butler/v1/registry/datasetType/{datasetTypeName}", - summary="Retrieve this dataset type definition.", - response_model=SerializedDatasetType, - response_model_exclude_unset=True, - response_model_exclude_defaults=True, - response_model_exclude_none=True, -) -def get_dataset_type( - datasetTypeName: str, butler: Butler = Depends(butler_readonly_dependency) -) -> SerializedDatasetType: - """Return the dataset type.""" - datasetType = butler.registry.getDatasetType(datasetTypeName) - return datasetType.to_simple() - - -@app.get( - "/butler/v1/registry/datasetTypes", - summary="Retrieve all dataset type definitions.", - response_model=list[SerializedDatasetType], - response_model_exclude_unset=True, - response_model_exclude_defaults=True, - response_model_exclude_none=True, -) -def query_all_dataset_types( - components: bool | None = Query(None), butler: Butler = Depends(butler_readonly_dependency) -) -> list[SerializedDatasetType]: - """Return all dataset types.""" - datasetTypes = butler.registry.queryDatasetTypes(..., components=components) - return [d.to_simple() for d in datasetTypes] - - -@app.get( - "/butler/v1/registry/datasetTypes/re", - summary="Retrieve dataset type definitions matching expressions", - response_model=list[SerializedDatasetType], - response_model_exclude_unset=True, - response_model_exclude_defaults=True, - response_model_exclude_none=True, -) -def query_dataset_types_re( - regex: list[str] | None = Query(None), - glob: list[str] | None = Query(None), - components: bool | None = Query(None), - butler: Butler = Depends(butler_readonly_dependency), -) -> list[SerializedDatasetType]: - """Return all dataset types matching a regular expression.""" - expression_params = ExpressionQueryParameter(regex=regex, glob=glob) - - datasetTypes = butler.registry.queryDatasetTypes(expression_params.expression(), components=components) - return [d.to_simple() for d in datasetTypes] - - -@app.get("/butler/v1/registry/collection/chain/{parent:path}", response_model=list[str]) -def get_collection_chain(parent: str, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]: - """Return the collection chain members.""" - chain = butler.registry.getCollectionChain(parent) - return list(chain) - - -@app.get("/butler/v1/registry/collections", response_model=list[str]) -def query_collections( - regex: list[str] | None = Query(None), - glob: list[str] | None = Query(None), - datasetType: str | None = Query(None), - flattenChains: bool = Query(False), - collectionType: list[CollectionTypeNames] | None = Query(None), - includeChains: bool | None = Query(None), - butler: Butler = Depends(butler_readonly_dependency), -) -> list[str]: - """Return collections matching query.""" - expression_params = ExpressionQueryParameter(regex=regex, glob=glob) - collectionTypes = CollectionType.from_names(collectionType) - dataset_type = butler.registry.getDatasetType(datasetType) if datasetType else None - - collections = butler.registry.queryCollections( - expression=expression_params.expression(), - datasetType=dataset_type, - collectionTypes=collectionTypes, - flattenChains=flattenChains, - includeChains=includeChains, - ) - return list(collections) - - -@app.get("/butler/v1/registry/collection/type/{name:path}", response_model=str) -def get_collection_type(name: str, butler: Butler = Depends(butler_readonly_dependency)) -> str: - """Return type for named collection.""" - collectionType = butler.registry.getCollectionType(name) - return collectionType.name - - -@app.put("/butler/v1/registry/collection/{name:path}/{type_}", response_model=str) -def register_collection( - name: str, - collectionTypeName: CollectionTypeNames, - doc: str | None = Query(None), - butler: Butler = Depends(butler_readwrite_dependency), -) -> str: - """Register a collection.""" - collectionType = CollectionType.from_name(collectionTypeName) - butler.registry.registerCollection(name, collectionType, doc) - - # Need to refresh the global read only butler otherwise other clients - # may not see this change. - if GLOBAL_READONLY_BUTLER is not None: # for mypy - GLOBAL_READONLY_BUTLER.registry.refresh() - - return name - - -@app.get( - "/butler/v1/registry/dataset/{id}", - summary="Retrieve this dataset definition.", - response_model=SerializedDatasetRef | None, - response_model_exclude_unset=True, - response_model_exclude_defaults=True, - response_model_exclude_none=True, -) -def get_dataset( - id: DatasetId, butler: Butler = Depends(butler_readonly_dependency) -) -> SerializedDatasetRef | None: - """Return a single dataset reference.""" - ref = butler.registry.getDataset(id) - if ref is not None: - return ref.to_simple() - # This could raise a 404 since id is not found. The standard regsitry - # getDataset method returns without error so follow that example here. - return ref - - -@app.get("/butler/v1/registry/datasetLocations/{id}", response_model=list[str]) -def get_dataset_locations(id: DatasetId, butler: Butler = Depends(butler_readonly_dependency)) -> list[str]: - """Return locations of datasets.""" - # Takes an ID so need to convert to a real DatasetRef - fake_ref = SerializedDatasetRef(id=id) - - try: - # Converting this to a real DatasetRef takes time and is not - # needed internally since only the ID is used. - ref = DatasetRef.from_simple(fake_ref, registry=butler.registry) - except Exception: - # SQL getDatasetLocations looks at ID in datastore and does not - # check it is in registry. Follow that example and return without - # error. - return [] - - return list(butler.registry.getDatasetLocations(ref)) - - -# TimeSpan not yet a pydantic model -@app.post( - "/butler/v1/registry/findDataset/{datasetType}", - summary="Retrieve this dataset definition from collection, dataset type, and dataId", - response_model=SerializedDatasetRef, - response_model_exclude_unset=True, - response_model_exclude_defaults=True, - response_model_exclude_none=True, -) -def find_dataset( - datasetType: str, - dataId: SerializedDataCoordinate | None = None, - collections: list[str] | None = Query(None), - butler: Butler = Depends(butler_readonly_dependency), -) -> SerializedDatasetRef | None: - """Return a single dataset reference matching query.""" - collection_query = collections if collections else None - - ref = butler.registry.findDataset( - datasetType, dataId=unpack_dataId(butler, dataId), collections=collection_query - ) - return ref.to_simple() if ref else None - - -# POST is used for the complex dict data structures -@app.post( - "/butler/v1/registry/datasets", - summary="Query all dataset holdings.", - response_model=list[SerializedDatasetRef], - response_model_exclude_unset=True, - response_model_exclude_defaults=True, - response_model_exclude_none=True, -) -def query_datasets( - query: QueryDatasetsModel, butler: Butler = Depends(butler_readonly_dependency) -) -> list[SerializedDatasetRef]: - """Return datasets matching query.""" - # This method might return a lot of results - - if query.collections: - collections = query.collections.expression() - else: - collections = None - - datasets = butler.registry.queryDatasets( - query.datasetType.expression(), - collections=collections, - dimensions=query.dimensions, - dataId=unpack_dataId(butler, query.dataId), - where=query.where, - findFirst=query.findFirst, - components=query.components, - bind=query.bind, - check=query.check, - **query.kwargs(), - ) - return [ref.to_simple() for ref in datasets] - - -# POST is used for the complex dict data structures -@app.post( - "/butler/v1/registry/dataIds", - summary="Query all data IDs.", - response_model=list[SerializedDataCoordinate], - response_model_exclude_unset=True, - response_model_exclude_defaults=True, - response_model_exclude_none=True, -) -def query_data_ids( - query: QueryDataIdsModel, butler: Butler = Depends(butler_readonly_dependency) -) -> list[SerializedDataCoordinate]: - """Return data IDs matching query.""" - if query.datasets: - datasets = query.datasets.expression() - else: - datasets = None - if query.collections: - collections = query.collections.expression() - else: - collections = None - - dataIds = butler.registry.queryDataIds( - query.dimensions, - collections=collections, - datasets=datasets, - dataId=unpack_dataId(butler, query.dataId), - where=query.where, - components=query.components, - bind=query.bind, - check=query.check, - **query.kwargs(), - ) - return [coord.to_simple() for coord in dataIds] - - -# Uses POST to handle the DataId -@app.post( - "/butler/v1/registry/dimensionRecords/{element}", - summary="Retrieve dimension records matching query", - response_model=list[SerializedDimensionRecord], - response_model_exclude_unset=True, - response_model_exclude_defaults=True, - response_model_exclude_none=True, -) -def query_dimension_records( - element: str, query: QueryDimensionRecordsModel, butler: Butler = Depends(butler_readonly_dependency) -) -> list[SerializedDimensionRecord]: - """Return dimension records matching query.""" - if query.datasets: - datasets = query.datasets.expression() - else: - datasets = None - if query.collections: - collections = query.collections.expression() - else: - collections = None - - records = butler.registry.queryDimensionRecords( - element, - dataId=unpack_dataId(butler, query.dataId), - collections=collections, - where=query.where, - datasets=datasets, - components=query.components, - bind=query.bind, - check=query.check, - **query.kwargs(), - ) - return [r.to_simple() for r in records] diff --git a/python/lsst/daf/butler/server_models.py b/python/lsst/daf/butler/server_models.py index 4cb4c5e929..1c34747e33 100644 --- a/python/lsst/daf/butler/server_models.py +++ b/python/lsst/daf/butler/server_models.py @@ -26,287 +26,3 @@ # along with this program. If not, see . """Models used for client/server communication.""" - -__all__ = ( - "QueryDatasetsModel", - "QueryDataIdsModel", - "QueryDimensionRecordsModel", - "ExpressionQueryParameter", - "DatasetsQueryParameter", -) - -import re -from collections.abc import Mapping -from typing import Any, ClassVar - -import pydantic -from lsst.utils.iteration import ensure_iterable -from pydantic import Field - -from ._compat import PYDANTIC_V2, _BaseModelCompat -from .dimensions import DataIdValue, SerializedDataCoordinate -from .utils import globToRegex - -# Simple scalar python types. -ScalarType = int | bool | float | str - -# Bind parameters can have any scalar type. -BindType = dict[str, ScalarType] - -# For serialization purposes a data ID key must be a str. -SimpleDataId = Mapping[str, DataIdValue] - - -# While supporting pydantic v1 and v2 keep this outside the model. -_expression_query_schema_extra = { - "examples": [ - { - "regex": ["^cal.*"], - "glob": ["cal*", "raw"], - } - ] -} - - -class ExpressionQueryParameter(_BaseModelCompat): - """Represents a specification for an expression query. - - Generally used for collection or dataset type expressions. This - implementation returns ``...`` by default. - """ - - _allow_ellipsis: ClassVar[bool] = True - """Control whether expression can match everything.""" - - regex: list[str] | None = Field( - None, - title="List of regular expression strings.", - examples=["^cal.*"], - ) - - glob: list[str] | None = Field( - None, - title="List of globs or explicit strings to use in expression.", - examples=["cal*"], - ) - - if PYDANTIC_V2: - model_config = { - "json_schema_extra": _expression_query_schema_extra, # type: ignore[typeddict-item] - } - else: - - class Config: - """Local configuration overrides for model.""" - - schema_extra = _expression_query_schema_extra - - def expression(self) -> Any: - """Combine regex and glob lists into single expression.""" - if self.glob is None and self.regex is None: - if self._allow_ellipsis: - return ... - # Rather than matching all, interpret this as no expression - # at all. - return None - - expression: list[str | re.Pattern] = [] - if self.regex is not None: - for r in self.regex: - expression.append(re.compile(r)) - if self.glob is not None: - regexes = globToRegex(self.glob) - if isinstance(regexes, list): - expression.extend(regexes) - else: - if self._allow_ellipsis: - return ... - raise ValueError("Expression matches everything but that is not allowed.") - return expression - - @classmethod - def from_expression(cls, expression: Any) -> "ExpressionQueryParameter": - """Convert a standard dataset type expression to wire form.""" - if expression is ...: - return cls() - - expressions = ensure_iterable(expression) - params: dict[str, list[str]] = {"glob": [], "regex": []} - for expression in expressions: - if expression is ...: - # This matches everything - return cls() - - if isinstance(expression, re.Pattern): - params["regex"].append(expression.pattern) - elif isinstance(expression, str): - params["glob"].append(expression) - elif hasattr(expression, "name"): - params["glob"].append(expression.name) - else: - raise ValueError(f"Unrecognized type given to expression: {expression!r}") - - # Clean out empty dicts. - for k in list(params): - if not params[k]: - del params[k] - - return cls(**params) - - -class DatasetsQueryParameter(ExpressionQueryParameter): - """Represents a specification for a dataset expression query. - - This differs from the standard expression query in that an empty - expression will return `None` rather than ``...``. - """ - - _allow_ellipsis: ClassVar[bool] = False - - -# Shared field definitions -Where = Field( - "", - title="String expression similar to a SQL WHERE clause.", - examples=["detector = 5 AND instrument = 'HSC'"], -) -Collections = Field( - None, - title="An expression that identifies the collections to search.", -) -Datasets = Field( - None, - title="An expression that identifies dataset types to search (must not match all datasets).", -) -OptionalDimensions = Field( - None, - title="Relevant dimensions to include.", - examples=["detector", "physical_filter"], -) -Dimensions = Field( - ..., - title="Relevant dimensions to include.", - examples=["detector", "physical_filter"], -) -DataId = Field( - None, - title="Data ID to constrain the query.", -) -FindFirst = Field( - False, - title="Control whether only first matching dataset ref or type is returned.", -) -Components = Field( - None, - title="Control how expressions apply to components.", -) -Bind = Field( - None, - title="Mapping to use to inject values into the WHERE parameter clause.", -) -Check = Field( - True, - title="Control whether to check the query for consistency.", -) -KeywordArgs = Field( - None, - title="Additional parameters to use when standardizing the supplied data ID.", -) - - -class QueryBaseModel(_BaseModelCompat): - """Base model for all query models.""" - - if PYDANTIC_V2: - - @pydantic.field_validator("keyword_args", check_fields=False) # type: ignore[attr-defined] - @classmethod - def _check_keyword_args(cls, v: SimpleDataId) -> SimpleDataId | None: - """Convert kwargs into None if empty. - - This retains the property at its default value and can therefore - remove it from serialization. - - The validator will be ignored if the subclass does not have this - property in its model. - """ - if not v: - return None - return v - - else: - - @pydantic.validator("keyword_args", check_fields=False) - def _check_keyword_args(cls, v, values) -> SimpleDataId | None: # type: ignore # noqa: N805 - """Convert kwargs into None if empty. - - This retains the property at its default value and can therefore - remove it from serialization. - - The validator will be ignored if the subclass does not have this - property in its model. - """ - if not v: - return None - return v - - def kwargs(self) -> SimpleDataId: - """Return keyword args, converting None to a `dict`. - - Returns - ------- - **kwargs - The keword arguments stored in the model. `None` is converted - to an empty dict. Returns empty dict if the ``keyword_args`` - property is not defined. - """ - try: - # mypy does not know about the except - kwargs = self.keyword_args # type: ignore - except AttributeError: - kwargs = {} - if kwargs is None: - return {} - return kwargs - - -class QueryDatasetsModel(QueryBaseModel): - """Information needed for a registry dataset query.""" - - datasetType: ExpressionQueryParameter = Field(..., title="Dataset types to query. Can match all.") - collections: ExpressionQueryParameter | None = Collections - dimensions: list[str] | None = OptionalDimensions - dataId: SerializedDataCoordinate | None = DataId - where: str = Where - findFirst: bool = FindFirst - components: bool | None = Components - bind: BindType | None = Bind - check: bool = Check - keyword_args: SimpleDataId | None = KeywordArgs # mypy refuses to allow kwargs in model - - -class QueryDataIdsModel(QueryBaseModel): - """Information needed to query data IDs.""" - - dimensions: list[str] = Dimensions - dataId: SerializedDataCoordinate | None = DataId - datasets: DatasetsQueryParameter | None = Datasets - collections: ExpressionQueryParameter | None = Collections - where: str = Where - components: bool | None = Components - bind: BindType | None = Bind - check: bool = Check - keyword_args: SimpleDataId | None = KeywordArgs # mypy refuses to allow kwargs in model - - -class QueryDimensionRecordsModel(QueryBaseModel): - """Information needed to query the dimension records.""" - - dataId: SerializedDataCoordinate | None = DataId - datasets: DatasetsQueryParameter | None = Datasets - collections: ExpressionQueryParameter | None = Collections - where: str = Where - components: bool | None = Components - bind: SimpleDataId | None = Bind - check: bool = Check - keyword_args: SimpleDataId | None = KeywordArgs # mypy refuses to allow kwargs in model diff --git a/tests/test_server.py b/tests/test_server.py index 724db51144..de0bc682dd 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -88,10 +88,6 @@ def test_simple(self): self.assertEqual(response.status_code, 200) self.assertIn("Butler Server", response.json()) - response = self.client.get("/butler/butler.json") - self.assertEqual(response.status_code, 200) - self.assertIn("registry", response.json()) - response = self.client.get("/butler/v1/universe") self.assertEqual(response.status_code, 200) self.assertIn("namespace", response.json())