From 3f519de092419b43e6f059eb892d870791b1bf2c Mon Sep 17 00:00:00 2001 From: Adrian Price-Whelan Date: Fri, 11 Oct 2024 10:27:49 -0400 Subject: [PATCH 1/7] switch to pyproject from setuptools --- .gitignore | 2 ++ pyproject.toml | 76 ++++++++++++++++++++++++++++++++++++++++++++------ setup.cfg | 61 ---------------------------------------- setup.py | 3 -- 4 files changed, 70 insertions(+), 72 deletions(-) delete mode 100644 setup.cfg delete mode 100644 setup.py diff --git a/.gitignore b/.gitignore index b6e4761..5f87790 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,5 @@ dmypy.json # Pyre type checker .pyre/ + +_version.py diff --git a/pyproject.toml b/pyproject.toml index a2bbcaf..3863001 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,47 @@ +[project] +name = "astropy-librarian" +provides = ["astropylibrarian"] +authors = [ + {name = "J.Sick Codes Inc.", email = "hi@jsick.codes"}, + {name = "Jeff Jennings", email = "jeffjennings@users.noreply.github.com"}, + {name = "Adrian Price-Whelan", email = "adrian.prw@gmail.com"}, +] +license = {text = "BSD 3-Clause License"} +description = "The content crawler that supplies Astropy's web search." +readme = {file = ["README.rst", "LICENSE.rst"], content-type = "text/x-rst"} +requires-python = ">=3.7" +classifiers = [ + "Development Status :: 3 - Alpha", + "License :: OSI Approved :: BSD License", +] +dependencies = [ + "lxml", + "cssselect", + "algoliasearch>=2.1.0,<3.0.0", + "aiohttp>=2.0,<4.0", + "async_timeout>=4.0.3", + "PyYAML", + "pydantic", + "typer", + "more-itertools", +] +dynamic = ["version"] + + +[project.optional-dependencies] +dev = [ + "pytest>=6.1", + "pytest-doctestplus", + "types-setuptools", + "types-PyYAML", +] + +[project.urls] +Homepage = "https://github.com/jonathansick/astropy-librarian" + +[project.scripts] +astropylibrarian = "astropylibrarian.cli.app:app" + [build-system] requires = [ "setuptools>=64", @@ -9,19 +53,35 @@ build-backend = "setuptools.build_meta" [tool.setuptools_scm] version_file = "astropylibrarian/_version.py" +[tool.pytest.ini_options] +doctest_plus = "enabled" + +[tool.flake8] +max-line-length = 79 + +[tool.mypy] +disallow_untyped_defs = true +disallow_incomplete_defs = true +ignore_missing_imports = true +show_error_codes = true +strict_equality = true +warn_redundant_casts = true +warn_unreachable = true +warn_unused_ignores = true + [tool.black] line-length = 79 target-version = ["py311"] exclude = ''' /( - \.eggs - | \.git - | \.mypy_cache - | \.tox - | \.venv - | _build - | build - | dist + \.eggs + | \.git + | \.mypy_cache + | \.tox + | \.venv + | _build + | build + | dist )/ ''' diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 087a05e..0000000 --- a/setup.cfg +++ /dev/null @@ -1,61 +0,0 @@ -[metadata] -name = astropy-librarian -provides = astropylibrarian -author = J.Sick Codes Inc. -author_email = hi@jsick.codes -license = BSD 3-Clause License -license_file = LICENSE.rst -url = https://github.com/jonathansick/astropy-librarian -description = The content crawler that supplies Astropy's web search. -long_description = file: README.rst, file: LICENSE.rst -long_description_content_type = text/x-rst -classifiers = - Development Status :: 3 - Alpha - License :: OSI Approved :: BSD License - -[options] -zip_safe = False -include_package_data = True -python_requires = >=3.7 -packages = find: -setup_requires = - setuptools_scm # legacy backup for pyproject.toml usage -install_requires = - lxml - cssselect - algoliasearch>=2.1.0,<3.0.0 - # Pinning next two to match Algolia docs - # https://www.algolia.com/doc/api-client/advanced/asynchronous-environments/python/language=python - aiohttp>=2.0,<4.0 - async_timeout>=4.0.3 - PyYAML - pydantic - typer - more-itertools - -[options.extras_require] -dev = - pytest>=6.1 - pytest-doctestplus - types-setuptools - types-PyYAML - -[options.entry_points] -console_scripts = - astropylibrarian = astropylibrarian.cli.app:app - -[tool:pytest] -doctest_plus = enabled - -[flake8] -max-line-length = 79 - -[mypy] -disallow_untyped_defs = True -disallow_incomplete_defs = True -ignore_missing_imports = True -show_error_codes = True -strict_equality = True -warn_redundant_casts = True -warn_unreachable = True -warn_unused_ignores = True diff --git a/setup.py b/setup.py deleted file mode 100644 index 4ad1361..0000000 --- a/setup.py +++ /dev/null @@ -1,3 +0,0 @@ -from setuptools import setup - -setup(use_scm_version=True) # backup for pyproject.toml usage From e4b870017539b369fa48ee8c54df52569fd4308a Mon Sep 17 00:00:00 2001 From: Adrian Price-Whelan Date: Fri, 11 Oct 2024 10:31:26 -0400 Subject: [PATCH 2/7] fix readme def in pyproject --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3863001..ac960d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,5 @@ [project] name = "astropy-librarian" -provides = ["astropylibrarian"] authors = [ {name = "J.Sick Codes Inc.", email = "hi@jsick.codes"}, {name = "Jeff Jennings", email = "jeffjennings@users.noreply.github.com"}, @@ -8,7 +7,7 @@ authors = [ ] license = {text = "BSD 3-Clause License"} description = "The content crawler that supplies Astropy's web search." -readme = {file = ["README.rst", "LICENSE.rst"], content-type = "text/x-rst"} +readme = {file = "README.rst", content-type = "text/x-rst"} requires-python = ">=3.7" classifiers = [ "Development Status :: 3 - Alpha", From d2ea6fb3a40a5ce2e5bb8e9cf28ed5064a718462 Mon Sep 17 00:00:00 2001 From: Adrian Price-Whelan Date: Fri, 11 Oct 2024 10:32:03 -0400 Subject: [PATCH 3/7] unpin algolia and update min python --- pyproject.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ac960d1..e9c173a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ authors = [ license = {text = "BSD 3-Clause License"} description = "The content crawler that supplies Astropy's web search." readme = {file = "README.rst", content-type = "text/x-rst"} -requires-python = ">=3.7" +requires-python = ">=3.11" classifiers = [ "Development Status :: 3 - Alpha", "License :: OSI Approved :: BSD License", @@ -16,9 +16,9 @@ classifiers = [ dependencies = [ "lxml", "cssselect", - "algoliasearch>=2.1.0,<3.0.0", - "aiohttp>=2.0,<4.0", - "async_timeout>=4.0.3", + "algoliasearch", + "aiohttp", + "async_timeout", "PyYAML", "pydantic", "typer", From 9f8a692e9556e0383d672913b6c60fe1e8c96677 Mon Sep 17 00:00:00 2001 From: Adrian Price-Whelan Date: Fri, 11 Oct 2024 10:40:44 -0400 Subject: [PATCH 4/7] update typing --- astropylibrarian/algolia/client.py | 35 ++++++++++++------------------ 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/astropylibrarian/algolia/client.py b/astropylibrarian/algolia/client.py index 7e10e52..9624475 100644 --- a/astropylibrarian/algolia/client.py +++ b/astropylibrarian/algolia/client.py @@ -3,8 +3,6 @@ dry-run operations. """ -from __future__ import annotations - import logging import uuid from copy import deepcopy @@ -12,12 +10,8 @@ TYPE_CHECKING, Any, AsyncIterator, - Dict, Iterator, - List, - Optional, Type, - Union, ) from algoliasearch.search_client import SearchClient @@ -27,8 +21,7 @@ from algoliasearch.search_index_async import SearchIndexAsync - -AlgoliaIndexType = Union["SearchIndexAsync", "MockAlgoliaIndex"] +AlgoliaIndexType = "SearchIndexAsync" | "MockAlgoliaIndex" """Type annotation alias supporting the return types of the `AlgoliaIndex` and `MockAlgoliaIndex` context managers. """ @@ -89,9 +82,9 @@ async def __aenter__(self) -> SearchIndexAsync: async def __aexit__( self, - exc_type: Optional[Type[BaseException]], - exc: Optional[Exception], - tb: Optional[TracebackType], + exc_type: Type[BaseException] | None, + exc: Exception | None, + tb: TracebackType | None, ) -> None: self._logger.debug("Closing algolia client") await self.algolia_client.close_async() @@ -117,30 +110,30 @@ class MockAlgoliaIndex(BaseAlgoliaIndex): async def __aenter__(self) -> "MockAlgoliaIndex": self._logger.debug("Creating mock Algolia index") - self._saved_objects: List[Dict] = [] + self._saved_objects: list[dict] = [] return self async def __aexit__( self, - exc_type: Optional[Type[BaseException]], - exc: Optional[Exception], - tb: Optional[TracebackType], + exc_type: Type[BaseException] | None, + exc: Exception | None, + tb: TracebackType | None, ) -> None: self._logger.debug("Closing MockAlgoliaIndex") async def save_objects_async( self, - objects: Union[List[Dict], Iterator[Dict]], - request_options: Optional[Dict[str, Any]] = None, - ) -> MockMultiResponse: + objects: list[dict] | Iterator[dict], + request_options: dict[str, Any] | None = None, + ) -> "MockMultiResponse": """Mock implementation of save_objects_async.""" for obj in objects: self._saved_objects.append(deepcopy(obj)) return MockMultiResponse() async def browse_objects_async( - self, search_settings: Dict[str, Any] - ) -> AsyncIterator[Dict[str, Any]]: + self, search_settings: dict[str, Any] + ) -> AsyncIterator[dict[str, Any]]: self._logger.debug("Got search settings %s", search_settings) # FIXME need to flesh out this mock: # - provide a way to seed data @@ -148,7 +141,7 @@ async def browse_objects_async( for _ in range(5): yield {} - async def delete_objects_async(self, objectids: List[str]) -> List[str]: + async def delete_objects_async(self, objectids: list[str]) -> list[str]: return objectids From 63a4fa0049fc8b9548d5d7c45a94c8c376fce2e9 Mon Sep 17 00:00:00 2001 From: Adrian Price-Whelan Date: Fri, 11 Oct 2024 10:42:34 -0400 Subject: [PATCH 5/7] typo --- astropylibrarian/algolia/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astropylibrarian/algolia/client.py b/astropylibrarian/algolia/client.py index 9624475..d5b0791 100644 --- a/astropylibrarian/algolia/client.py +++ b/astropylibrarian/algolia/client.py @@ -21,7 +21,7 @@ from algoliasearch.search_index_async import SearchIndexAsync -AlgoliaIndexType = "SearchIndexAsync" | "MockAlgoliaIndex" +AlgoliaIndexType = SearchIndexAsync | "MockAlgoliaIndex" """Type annotation alias supporting the return types of the `AlgoliaIndex` and `MockAlgoliaIndex` context managers. """ From 3dad69e6e4de3065e008646d724939a59debef7b Mon Sep 17 00:00:00 2001 From: Adrian Price-Whelan Date: Fri, 11 Oct 2024 17:44:29 -0400 Subject: [PATCH 6/7] updates to be compatible with algolia v4 --- astropylibrarian/algolia/client.py | 53 ++++++++++++--------- astropylibrarian/workflows/deleterooturl.py | 36 ++++++-------- astropylibrarian/workflows/expirerecords.py | 19 ++++---- astropylibrarian/workflows/indextutorial.py | 4 +- pyproject.toml | 2 +- 5 files changed, 58 insertions(+), 56 deletions(-) diff --git a/astropylibrarian/algolia/client.py b/astropylibrarian/algolia/client.py index d5b0791..477b656 100644 --- a/astropylibrarian/algolia/client.py +++ b/astropylibrarian/algolia/client.py @@ -6,22 +6,16 @@ import logging import uuid from copy import deepcopy -from typing import ( - TYPE_CHECKING, - Any, - AsyncIterator, - Iterator, - Type, -) +from types import TracebackType +from typing import Any, AsyncIterator, Iterator, Type, Union -from algoliasearch.search_client import SearchClient +from algoliasearch.search.client import SearchClient +from algoliasearch.search.models.batch_response import BatchResponse +from algoliasearch.search.models.browse_params_object import BrowseParamsObject +from algoliasearch.search.models.browse_response import BrowseResponse +from algoliasearch.search.models.deleted_at_response import DeletedAtResponse -if TYPE_CHECKING: - from types import TracebackType - - from algoliasearch.search_index_async import SearchIndexAsync - -AlgoliaIndexType = SearchIndexAsync | "MockAlgoliaIndex" +AlgoliaIndexType = Union["AlgoliaIndex", "MockAlgoliaIndex"] """Type annotation alias supporting the return types of the `AlgoliaIndex` and `MockAlgoliaIndex` context managers. """ @@ -73,12 +67,10 @@ class AlgoliaIndex(BaseAlgoliaIndex): Name of the Algolia index. """ - async def __aenter__(self) -> SearchIndexAsync: + async def __aenter__(self) -> SearchClient: self._logger.debug("Opening algolia client") - self.algolia_client = SearchClient.create(self.app_id, self._key) - self._logger.debug("Initializing algolia index") - self.index = self.algolia_client.init_index(self.name) - return self.index + self.algolia_client = SearchClient(self.app_id, self._key) + return self.algolia_client async def __aexit__( self, @@ -87,9 +79,24 @@ async def __aexit__( tb: TracebackType | None, ) -> None: self._logger.debug("Closing algolia client") - await self.algolia_client.close_async() + await self.algolia_client.close() self._logger.debug("Finished closing algolia client") + async def browse_objects_async( + self, browse_params: BrowseParamsObject + ) -> BrowseResponse: + return await self.algolia_client.browse_objects( + index_name=self.name, aggregator=None, browse_params=browse_params + ) + + async def save_objects_async( + self, objects: list[dict[str, Any]] + ) -> list[BatchResponse]: + return self.algolia_client.save_objects(self.name, objects) + + async def delete_objects_async(self, objectids: list[str]) -> list[BatchResponse]: + return self.algolia_client.delete_objects(self.name, objectids) + class MockAlgoliaIndex(BaseAlgoliaIndex): """A mock Algolia index client. @@ -141,8 +148,10 @@ async def browse_objects_async( for _ in range(5): yield {} - async def delete_objects_async(self, objectids: list[str]) -> list[str]: - return objectids + async def delete_objects_async( + self, objectids: list[str] + ) -> list[DeletedAtResponse]: + return [DeletedAtResponse(task_id=0, deleted_at="") for _ in objectids] class MockMultiResponse: diff --git a/astropylibrarian/workflows/deleterooturl.py b/astropylibrarian/workflows/deleterooturl.py index 7db3f67..3df7e1d 100644 --- a/astropylibrarian/workflows/deleterooturl.py +++ b/astropylibrarian/workflows/deleterooturl.py @@ -1,28 +1,23 @@ # Licensed under a 3-clause BSD style license - see LICENSE.rst """Workflow for deleting all Algolia records associated with a root URL.""" -from __future__ import annotations - import logging -from typing import TYPE_CHECKING - -from astropylibrarian.algolia.client import escape_facet_value +from typing import Any, AsyncIterator -if TYPE_CHECKING: - from typing import Any, AsyncIterator, Dict, List +from algoliasearch.search.models.browse_params_object import BrowseParamsObject - from astropylibrarian.algolia.client import AlgoliaIndexType +from astropylibrarian.algolia.client import AlgoliaIndexType, escape_facet_value logger = logging.getLogger(__name__) async def delete_root_url( *, root_url: str, algolia_index: AlgoliaIndexType -) -> List[str]: +) -> list[str]: """Delete all Algolia records associated with a ``root_url``.""" - object_ids: List[str] = [] + object_ids: list[str] = [] async for record in search_for_records( - index=algolia_index, root_url=root_url + algolia_index=algolia_index, root_url=root_url ): if record["root_url"] != root_url: logger.warning( @@ -35,8 +30,8 @@ async def delete_root_url( logger.debug("Found %d objects for deletion", len(object_ids)) - response = await algolia_index.delete_objects_async(object_ids) - logger.debug("Algolia response:\n%s", response.raw_responses) + responses = await algolia_index.delete_objects_async(object_ids) + logger.debug("Algolia response:\n%s", responses) logger.info("Deleted %d objects", len(object_ids)) @@ -44,16 +39,13 @@ async def delete_root_url( async def search_for_records( - *, index: AlgoliaIndexType, root_url: str -) -> AsyncIterator[Dict[str, Any]]: + *, algolia_index: AlgoliaIndexType, root_url: str +) -> AsyncIterator[dict[str, Any]]: filters = f"root_url:{escape_facet_value(root_url)}" logger.debug("Filter:\n%s", filters) - async for result in index.browse_objects_async( - { - "filters": filters, - "attributesToRetrieve": ["root_url"], - "attributesToHighlight": [], - } - ): + obj = BrowseParamsObject( + filters=filters, attributes_to_retrieve=["root_url"], attributes_to_highlight=[] + ) + async for result in algolia_index.browse_objects_async(obj): yield result diff --git a/astropylibrarian/workflows/expirerecords.py b/astropylibrarian/workflows/expirerecords.py index 19877c9..a264a7e 100644 --- a/astropylibrarian/workflows/expirerecords.py +++ b/astropylibrarian/workflows/expirerecords.py @@ -6,6 +6,8 @@ import logging from typing import TYPE_CHECKING +from algoliasearch.search.models.browse_params_object import BrowseParamsObject + from astropylibrarian.algolia.client import escape_facet_value if TYPE_CHECKING: @@ -27,21 +29,20 @@ async def expire_old_records( " AND NOT " f"root_url:{escape_facet_value(root_url)}" ) - search_settings = { - "filters": filters, - "attributesToRetrieve": ["root_url", "index_epoch"], - "attributesToHighlight": [], - } + + obj = BrowseParamsObject( + filters=filters, + attributes_to_retrieve=["root_url", "index_epoch"], + attributes_to_highlight=[], + ) old_object_ids: List[str] = [] - async for r in algolia_index.browse_objects_async(search_settings): + async for r in algolia_index.browse_objects_async(obj): # Double check that we're deleting the right things. if r["root_url"] != root_url: logger.warning("root_url does not match: %s", r["baseUrl"]) continue if r["surrogateKey"] == index_epoch: - logger.warning( - "index_epoch matches current epoch: %s", r["index_epoch"] - ) + logger.warning("index_epoch matches current epoch: %s", r["index_epoch"]) continue old_object_ids.append(r["objectID"]) diff --git a/astropylibrarian/workflows/indextutorial.py b/astropylibrarian/workflows/indextutorial.py index c8a066b..977d11f 100644 --- a/astropylibrarian/workflows/indextutorial.py +++ b/astropylibrarian/workflows/indextutorial.py @@ -13,7 +13,7 @@ from pathlib import Path from typing import TYPE_CHECKING, List -import algoliasearch.exceptions +from algoliasearch.http.exceptions import RequestException from astropylibrarian.algolia.client import generate_index_epoch from astropylibrarian.reducers.tutorial import get_tutorial_reducer @@ -171,7 +171,7 @@ async def index_tutorial( saved_object_ids: List[str] = [] try: response = await algolia_index.save_objects_async(records) - except algoliasearch.exceptions.RequestException as e: + except RequestException as e: logger.error( "Error saving objects for tutorial %s:\n%s", tutorial_html.url, diff --git a/pyproject.toml b/pyproject.toml index e9c173a..8efd5af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ classifiers = [ dependencies = [ "lxml", "cssselect", - "algoliasearch", + "algoliasearch>=4,<5", "aiohttp", "async_timeout", "PyYAML", From 79477cb1974808bbd225eeb1449cdf0d08c2755c Mon Sep 17 00:00:00 2001 From: Jeff Jennings Date: Thu, 24 Oct 2024 15:20:10 -0400 Subject: [PATCH 7/7] CI: drop py 3.10 --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6d8531a..6100def 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -61,7 +61,7 @@ jobs: strategy: matrix: - python-version: ['3.10', '3.11', '3.12', '3.13'] + python-version: ['3.11', '3.12', '3.13'] steps: - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0