diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b7ee4d1..740a21b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -61,7 +61,7 @@ jobs: strategy: matrix: - python-version: ['3.10', '3.11', '3.12', '3.13'] + python-version: ['3.11', '3.12', '3.13'] steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 diff --git a/.gitignore b/.gitignore index b6e4761..5f87790 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,5 @@ dmypy.json # Pyre type checker .pyre/ + +_version.py diff --git a/astropylibrarian/algolia/client.py b/astropylibrarian/algolia/client.py index 7e10e52..477b656 100644 --- a/astropylibrarian/algolia/client.py +++ b/astropylibrarian/algolia/client.py @@ -3,32 +3,19 @@ dry-run operations. """ -from __future__ import annotations - import logging import uuid from copy import deepcopy -from typing import ( - TYPE_CHECKING, - Any, - AsyncIterator, - Dict, - Iterator, - List, - Optional, - Type, - Union, -) - -from algoliasearch.search_client import SearchClient - -if TYPE_CHECKING: - from types import TracebackType +from types import TracebackType +from typing import Any, AsyncIterator, Iterator, Type, Union - from algoliasearch.search_index_async import SearchIndexAsync +from algoliasearch.search.client import SearchClient +from algoliasearch.search.models.batch_response import BatchResponse +from algoliasearch.search.models.browse_params_object import BrowseParamsObject +from algoliasearch.search.models.browse_response import BrowseResponse +from algoliasearch.search.models.deleted_at_response import DeletedAtResponse - -AlgoliaIndexType = Union["SearchIndexAsync", "MockAlgoliaIndex"] +AlgoliaIndexType = Union["AlgoliaIndex", "MockAlgoliaIndex"] """Type annotation alias supporting the return types of the `AlgoliaIndex` and `MockAlgoliaIndex` context managers. """ @@ -80,23 +67,36 @@ class AlgoliaIndex(BaseAlgoliaIndex): Name of the Algolia index. """ - async def __aenter__(self) -> SearchIndexAsync: + async def __aenter__(self) -> SearchClient: self._logger.debug("Opening algolia client") - self.algolia_client = SearchClient.create(self.app_id, self._key) - self._logger.debug("Initializing algolia index") - self.index = self.algolia_client.init_index(self.name) - return self.index + self.algolia_client = SearchClient(self.app_id, self._key) + return self.algolia_client async def __aexit__( self, - exc_type: Optional[Type[BaseException]], - exc: Optional[Exception], - tb: Optional[TracebackType], + exc_type: Type[BaseException] | None, + exc: Exception | None, + tb: TracebackType | None, ) -> None: self._logger.debug("Closing algolia client") - await self.algolia_client.close_async() + await self.algolia_client.close() self._logger.debug("Finished closing algolia client") + async def browse_objects_async( + self, browse_params: BrowseParamsObject + ) -> BrowseResponse: + return await self.algolia_client.browse_objects( + index_name=self.name, aggregator=None, browse_params=browse_params + ) + + async def save_objects_async( + self, objects: list[dict[str, Any]] + ) -> list[BatchResponse]: + return self.algolia_client.save_objects(self.name, objects) + + async def delete_objects_async(self, objectids: list[str]) -> list[BatchResponse]: + return self.algolia_client.delete_objects(self.name, objectids) + class MockAlgoliaIndex(BaseAlgoliaIndex): """A mock Algolia index client. @@ -117,30 +117,30 @@ class MockAlgoliaIndex(BaseAlgoliaIndex): async def __aenter__(self) -> "MockAlgoliaIndex": self._logger.debug("Creating mock Algolia index") - self._saved_objects: List[Dict] = [] + self._saved_objects: list[dict] = [] return self async def __aexit__( self, - exc_type: Optional[Type[BaseException]], - exc: Optional[Exception], - tb: Optional[TracebackType], + exc_type: Type[BaseException] | None, + exc: Exception | None, + tb: TracebackType | None, ) -> None: self._logger.debug("Closing MockAlgoliaIndex") async def save_objects_async( self, - objects: Union[List[Dict], Iterator[Dict]], - request_options: Optional[Dict[str, Any]] = None, - ) -> MockMultiResponse: + objects: list[dict] | Iterator[dict], + request_options: dict[str, Any] | None = None, + ) -> "MockMultiResponse": """Mock implementation of save_objects_async.""" for obj in objects: self._saved_objects.append(deepcopy(obj)) return MockMultiResponse() async def browse_objects_async( - self, search_settings: Dict[str, Any] - ) -> AsyncIterator[Dict[str, Any]]: + self, search_settings: dict[str, Any] + ) -> AsyncIterator[dict[str, Any]]: self._logger.debug("Got search settings %s", search_settings) # FIXME need to flesh out this mock: # - provide a way to seed data @@ -148,8 +148,10 @@ async def browse_objects_async( for _ in range(5): yield {} - async def delete_objects_async(self, objectids: List[str]) -> List[str]: - return objectids + async def delete_objects_async( + self, objectids: list[str] + ) -> list[DeletedAtResponse]: + return [DeletedAtResponse(task_id=0, deleted_at="") for _ in objectids] class MockMultiResponse: diff --git a/astropylibrarian/workflows/deleterooturl.py b/astropylibrarian/workflows/deleterooturl.py index 7db3f67..3df7e1d 100644 --- a/astropylibrarian/workflows/deleterooturl.py +++ b/astropylibrarian/workflows/deleterooturl.py @@ -1,28 +1,23 @@ # Licensed under a 3-clause BSD style license - see LICENSE.rst """Workflow for deleting all Algolia records associated with a root URL.""" -from __future__ import annotations - import logging -from typing import TYPE_CHECKING - -from astropylibrarian.algolia.client import escape_facet_value +from typing import Any, AsyncIterator -if TYPE_CHECKING: - from typing import Any, AsyncIterator, Dict, List +from algoliasearch.search.models.browse_params_object import BrowseParamsObject - from astropylibrarian.algolia.client import AlgoliaIndexType +from astropylibrarian.algolia.client import AlgoliaIndexType, escape_facet_value logger = logging.getLogger(__name__) async def delete_root_url( *, root_url: str, algolia_index: AlgoliaIndexType -) -> List[str]: +) -> list[str]: """Delete all Algolia records associated with a ``root_url``.""" - object_ids: List[str] = [] + object_ids: list[str] = [] async for record in search_for_records( - index=algolia_index, root_url=root_url + algolia_index=algolia_index, root_url=root_url ): if record["root_url"] != root_url: logger.warning( @@ -35,8 +30,8 @@ async def delete_root_url( logger.debug("Found %d objects for deletion", len(object_ids)) - response = await algolia_index.delete_objects_async(object_ids) - logger.debug("Algolia response:\n%s", response.raw_responses) + responses = await algolia_index.delete_objects_async(object_ids) + logger.debug("Algolia response:\n%s", responses) logger.info("Deleted %d objects", len(object_ids)) @@ -44,16 +39,13 @@ async def delete_root_url( async def search_for_records( - *, index: AlgoliaIndexType, root_url: str -) -> AsyncIterator[Dict[str, Any]]: + *, algolia_index: AlgoliaIndexType, root_url: str +) -> AsyncIterator[dict[str, Any]]: filters = f"root_url:{escape_facet_value(root_url)}" logger.debug("Filter:\n%s", filters) - async for result in index.browse_objects_async( - { - "filters": filters, - "attributesToRetrieve": ["root_url"], - "attributesToHighlight": [], - } - ): + obj = BrowseParamsObject( + filters=filters, attributes_to_retrieve=["root_url"], attributes_to_highlight=[] + ) + async for result in algolia_index.browse_objects_async(obj): yield result diff --git a/astropylibrarian/workflows/expirerecords.py b/astropylibrarian/workflows/expirerecords.py index 19877c9..a264a7e 100644 --- a/astropylibrarian/workflows/expirerecords.py +++ b/astropylibrarian/workflows/expirerecords.py @@ -6,6 +6,8 @@ import logging from typing import TYPE_CHECKING +from algoliasearch.search.models.browse_params_object import BrowseParamsObject + from astropylibrarian.algolia.client import escape_facet_value if TYPE_CHECKING: @@ -27,21 +29,20 @@ async def expire_old_records( " AND NOT " f"root_url:{escape_facet_value(root_url)}" ) - search_settings = { - "filters": filters, - "attributesToRetrieve": ["root_url", "index_epoch"], - "attributesToHighlight": [], - } + + obj = BrowseParamsObject( + filters=filters, + attributes_to_retrieve=["root_url", "index_epoch"], + attributes_to_highlight=[], + ) old_object_ids: List[str] = [] - async for r in algolia_index.browse_objects_async(search_settings): + async for r in algolia_index.browse_objects_async(obj): # Double check that we're deleting the right things. if r["root_url"] != root_url: logger.warning("root_url does not match: %s", r["baseUrl"]) continue if r["surrogateKey"] == index_epoch: - logger.warning( - "index_epoch matches current epoch: %s", r["index_epoch"] - ) + logger.warning("index_epoch matches current epoch: %s", r["index_epoch"]) continue old_object_ids.append(r["objectID"]) diff --git a/astropylibrarian/workflows/indextutorial.py b/astropylibrarian/workflows/indextutorial.py index c8a066b..977d11f 100644 --- a/astropylibrarian/workflows/indextutorial.py +++ b/astropylibrarian/workflows/indextutorial.py @@ -13,7 +13,7 @@ from pathlib import Path from typing import TYPE_CHECKING, List -import algoliasearch.exceptions +from algoliasearch.http.exceptions import RequestException from astropylibrarian.algolia.client import generate_index_epoch from astropylibrarian.reducers.tutorial import get_tutorial_reducer @@ -171,7 +171,7 @@ async def index_tutorial( saved_object_ids: List[str] = [] try: response = await algolia_index.save_objects_async(records) - except algoliasearch.exceptions.RequestException as e: + except RequestException as e: logger.error( "Error saving objects for tutorial %s:\n%s", tutorial_html.url, diff --git a/pyproject.toml b/pyproject.toml index a2bbcaf..8efd5af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,46 @@ +[project] +name = "astropy-librarian" +authors = [ + {name = "J.Sick Codes Inc.", email = "hi@jsick.codes"}, + {name = "Jeff Jennings", email = "jeffjennings@users.noreply.github.com"}, + {name = "Adrian Price-Whelan", email = "adrian.prw@gmail.com"}, +] +license = {text = "BSD 3-Clause License"} +description = "The content crawler that supplies Astropy's web search." +readme = {file = "README.rst", content-type = "text/x-rst"} +requires-python = ">=3.11" +classifiers = [ + "Development Status :: 3 - Alpha", + "License :: OSI Approved :: BSD License", +] +dependencies = [ + "lxml", + "cssselect", + "algoliasearch>=4,<5", + "aiohttp", + "async_timeout", + "PyYAML", + "pydantic", + "typer", + "more-itertools", +] +dynamic = ["version"] + + +[project.optional-dependencies] +dev = [ + "pytest>=6.1", + "pytest-doctestplus", + "types-setuptools", + "types-PyYAML", +] + +[project.urls] +Homepage = "https://github.com/jonathansick/astropy-librarian" + +[project.scripts] +astropylibrarian = "astropylibrarian.cli.app:app" + [build-system] requires = [ "setuptools>=64", @@ -9,19 +52,35 @@ build-backend = "setuptools.build_meta" [tool.setuptools_scm] version_file = "astropylibrarian/_version.py" +[tool.pytest.ini_options] +doctest_plus = "enabled" + +[tool.flake8] +max-line-length = 79 + +[tool.mypy] +disallow_untyped_defs = true +disallow_incomplete_defs = true +ignore_missing_imports = true +show_error_codes = true +strict_equality = true +warn_redundant_casts = true +warn_unreachable = true +warn_unused_ignores = true + [tool.black] line-length = 79 target-version = ["py311"] exclude = ''' /( - \.eggs - | \.git - | \.mypy_cache - | \.tox - | \.venv - | _build - | build - | dist + \.eggs + | \.git + | \.mypy_cache + | \.tox + | \.venv + | _build + | build + | dist )/ ''' diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 087a05e..0000000 --- a/setup.cfg +++ /dev/null @@ -1,61 +0,0 @@ -[metadata] -name = astropy-librarian -provides = astropylibrarian -author = J.Sick Codes Inc. -author_email = hi@jsick.codes -license = BSD 3-Clause License -license_file = LICENSE.rst -url = https://github.com/jonathansick/astropy-librarian -description = The content crawler that supplies Astropy's web search. -long_description = file: README.rst, file: LICENSE.rst -long_description_content_type = text/x-rst -classifiers = - Development Status :: 3 - Alpha - License :: OSI Approved :: BSD License - -[options] -zip_safe = False -include_package_data = True -python_requires = >=3.7 -packages = find: -setup_requires = - setuptools_scm # legacy backup for pyproject.toml usage -install_requires = - lxml - cssselect - algoliasearch>=2.1.0,<3.0.0 - # Pinning next two to match Algolia docs - # https://www.algolia.com/doc/api-client/advanced/asynchronous-environments/python/language=python - aiohttp>=2.0,<4.0 - async_timeout>=4.0.3 - PyYAML - pydantic - typer - more-itertools - -[options.extras_require] -dev = - pytest>=6.1 - pytest-doctestplus - types-setuptools - types-PyYAML - -[options.entry_points] -console_scripts = - astropylibrarian = astropylibrarian.cli.app:app - -[tool:pytest] -doctest_plus = enabled - -[flake8] -max-line-length = 79 - -[mypy] -disallow_untyped_defs = True -disallow_incomplete_defs = True -ignore_missing_imports = True -show_error_codes = True -strict_equality = True -warn_redundant_casts = True -warn_unreachable = True -warn_unused_ignores = True diff --git a/setup.py b/setup.py deleted file mode 100644 index 4ad1361..0000000 --- a/setup.py +++ /dev/null @@ -1,3 +0,0 @@ -from setuptools import setup - -setup(use_scm_version=True) # backup for pyproject.toml usage