Skip to content

Commit

Permalink
Merge pull request #39 from adrn/algolia-unpin
Browse files Browse the repository at this point in the history
Update to be consistent with v4 of algoliasearch
  • Loading branch information
jeffjennings authored Oct 29, 2024
2 parents cf70b83 + 79477cb commit a0bb8e8
Show file tree
Hide file tree
Showing 9 changed files with 139 additions and 147 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:

strategy:
matrix:
python-version: ['3.10', '3.11', '3.12', '3.13']
python-version: ['3.11', '3.12', '3.13']

steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,5 @@ dmypy.json

# Pyre type checker
.pyre/

_version.py
84 changes: 43 additions & 41 deletions astropylibrarian/algolia/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,19 @@
dry-run operations.
"""

from __future__ import annotations

import logging
import uuid
from copy import deepcopy
from typing import (
TYPE_CHECKING,
Any,
AsyncIterator,
Dict,
Iterator,
List,
Optional,
Type,
Union,
)

from algoliasearch.search_client import SearchClient

if TYPE_CHECKING:
from types import TracebackType
from types import TracebackType
from typing import Any, AsyncIterator, Iterator, Type, Union

from algoliasearch.search_index_async import SearchIndexAsync
from algoliasearch.search.client import SearchClient
from algoliasearch.search.models.batch_response import BatchResponse
from algoliasearch.search.models.browse_params_object import BrowseParamsObject
from algoliasearch.search.models.browse_response import BrowseResponse
from algoliasearch.search.models.deleted_at_response import DeletedAtResponse


AlgoliaIndexType = Union["SearchIndexAsync", "MockAlgoliaIndex"]
AlgoliaIndexType = Union["AlgoliaIndex", "MockAlgoliaIndex"]
"""Type annotation alias supporting the return types of the `AlgoliaIndex` and
`MockAlgoliaIndex` context managers.
"""
Expand Down Expand Up @@ -80,23 +67,36 @@ class AlgoliaIndex(BaseAlgoliaIndex):
Name of the Algolia index.
"""

async def __aenter__(self) -> SearchIndexAsync:
async def __aenter__(self) -> SearchClient:
self._logger.debug("Opening algolia client")
self.algolia_client = SearchClient.create(self.app_id, self._key)
self._logger.debug("Initializing algolia index")
self.index = self.algolia_client.init_index(self.name)
return self.index
self.algolia_client = SearchClient(self.app_id, self._key)
return self.algolia_client

async def __aexit__(
self,
exc_type: Optional[Type[BaseException]],
exc: Optional[Exception],
tb: Optional[TracebackType],
exc_type: Type[BaseException] | None,
exc: Exception | None,
tb: TracebackType | None,
) -> None:
self._logger.debug("Closing algolia client")
await self.algolia_client.close_async()
await self.algolia_client.close()
self._logger.debug("Finished closing algolia client")

async def browse_objects_async(
self, browse_params: BrowseParamsObject
) -> BrowseResponse:
return await self.algolia_client.browse_objects(
index_name=self.name, aggregator=None, browse_params=browse_params
)

async def save_objects_async(
self, objects: list[dict[str, Any]]
) -> list[BatchResponse]:
return self.algolia_client.save_objects(self.name, objects)

async def delete_objects_async(self, objectids: list[str]) -> list[BatchResponse]:
return self.algolia_client.delete_objects(self.name, objectids)


class MockAlgoliaIndex(BaseAlgoliaIndex):
"""A mock Algolia index client.
Expand All @@ -117,39 +117,41 @@ class MockAlgoliaIndex(BaseAlgoliaIndex):

async def __aenter__(self) -> "MockAlgoliaIndex":
self._logger.debug("Creating mock Algolia index")
self._saved_objects: List[Dict] = []
self._saved_objects: list[dict] = []
return self

async def __aexit__(
self,
exc_type: Optional[Type[BaseException]],
exc: Optional[Exception],
tb: Optional[TracebackType],
exc_type: Type[BaseException] | None,
exc: Exception | None,
tb: TracebackType | None,
) -> None:
self._logger.debug("Closing MockAlgoliaIndex")

async def save_objects_async(
self,
objects: Union[List[Dict], Iterator[Dict]],
request_options: Optional[Dict[str, Any]] = None,
) -> MockMultiResponse:
objects: list[dict] | Iterator[dict],
request_options: dict[str, Any] | None = None,
) -> "MockMultiResponse":
"""Mock implementation of save_objects_async."""
for obj in objects:
self._saved_objects.append(deepcopy(obj))
return MockMultiResponse()

async def browse_objects_async(
self, search_settings: Dict[str, Any]
) -> AsyncIterator[Dict[str, Any]]:
self, search_settings: dict[str, Any]
) -> AsyncIterator[dict[str, Any]]:
self._logger.debug("Got search settings %s", search_settings)
# FIXME need to flesh out this mock:
# - provide a way to seed data
# - use attributesToRetrieve to inform what attributes are sent back
for _ in range(5):
yield {}

async def delete_objects_async(self, objectids: List[str]) -> List[str]:
return objectids
async def delete_objects_async(
self, objectids: list[str]
) -> list[DeletedAtResponse]:
return [DeletedAtResponse(task_id=0, deleted_at="") for _ in objectids]


class MockMultiResponse:
Expand Down
36 changes: 14 additions & 22 deletions astropylibrarian/workflows/deleterooturl.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,23 @@
# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""Workflow for deleting all Algolia records associated with a root URL."""

from __future__ import annotations

import logging
from typing import TYPE_CHECKING

from astropylibrarian.algolia.client import escape_facet_value
from typing import Any, AsyncIterator

if TYPE_CHECKING:
from typing import Any, AsyncIterator, Dict, List
from algoliasearch.search.models.browse_params_object import BrowseParamsObject

from astropylibrarian.algolia.client import AlgoliaIndexType
from astropylibrarian.algolia.client import AlgoliaIndexType, escape_facet_value

logger = logging.getLogger(__name__)


async def delete_root_url(
*, root_url: str, algolia_index: AlgoliaIndexType
) -> List[str]:
) -> list[str]:
"""Delete all Algolia records associated with a ``root_url``."""
object_ids: List[str] = []
object_ids: list[str] = []
async for record in search_for_records(
index=algolia_index, root_url=root_url
algolia_index=algolia_index, root_url=root_url
):
if record["root_url"] != root_url:
logger.warning(
Expand All @@ -35,25 +30,22 @@ async def delete_root_url(

logger.debug("Found %d objects for deletion", len(object_ids))

response = await algolia_index.delete_objects_async(object_ids)
logger.debug("Algolia response:\n%s", response.raw_responses)
responses = await algolia_index.delete_objects_async(object_ids)
logger.debug("Algolia response:\n%s", responses)

logger.info("Deleted %d objects", len(object_ids))

return object_ids


async def search_for_records(
*, index: AlgoliaIndexType, root_url: str
) -> AsyncIterator[Dict[str, Any]]:
*, algolia_index: AlgoliaIndexType, root_url: str
) -> AsyncIterator[dict[str, Any]]:
filters = f"root_url:{escape_facet_value(root_url)}"
logger.debug("Filter:\n%s", filters)

async for result in index.browse_objects_async(
{
"filters": filters,
"attributesToRetrieve": ["root_url"],
"attributesToHighlight": [],
}
):
obj = BrowseParamsObject(
filters=filters, attributes_to_retrieve=["root_url"], attributes_to_highlight=[]
)
async for result in algolia_index.browse_objects_async(obj):
yield result
19 changes: 10 additions & 9 deletions astropylibrarian/workflows/expirerecords.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import logging
from typing import TYPE_CHECKING

from algoliasearch.search.models.browse_params_object import BrowseParamsObject

from astropylibrarian.algolia.client import escape_facet_value

if TYPE_CHECKING:
Expand All @@ -27,21 +29,20 @@ async def expire_old_records(
" AND NOT "
f"root_url:{escape_facet_value(root_url)}"
)
search_settings = {
"filters": filters,
"attributesToRetrieve": ["root_url", "index_epoch"],
"attributesToHighlight": [],
}

obj = BrowseParamsObject(
filters=filters,
attributes_to_retrieve=["root_url", "index_epoch"],
attributes_to_highlight=[],
)
old_object_ids: List[str] = []
async for r in algolia_index.browse_objects_async(search_settings):
async for r in algolia_index.browse_objects_async(obj):
# Double check that we're deleting the right things.
if r["root_url"] != root_url:
logger.warning("root_url does not match: %s", r["baseUrl"])
continue
if r["surrogateKey"] == index_epoch:
logger.warning(
"index_epoch matches current epoch: %s", r["index_epoch"]
)
logger.warning("index_epoch matches current epoch: %s", r["index_epoch"])
continue
old_object_ids.append(r["objectID"])

Expand Down
4 changes: 2 additions & 2 deletions astropylibrarian/workflows/indextutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from pathlib import Path
from typing import TYPE_CHECKING, List

import algoliasearch.exceptions
from algoliasearch.http.exceptions import RequestException

from astropylibrarian.algolia.client import generate_index_epoch
from astropylibrarian.reducers.tutorial import get_tutorial_reducer
Expand Down Expand Up @@ -171,7 +171,7 @@ async def index_tutorial(
saved_object_ids: List[str] = []
try:
response = await algolia_index.save_objects_async(records)
except algoliasearch.exceptions.RequestException as e:
except RequestException as e:
logger.error(
"Error saving objects for tutorial %s:\n%s",
tutorial_html.url,
Expand Down
75 changes: 67 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,46 @@
[project]
name = "astropy-librarian"
authors = [
{name = "J.Sick Codes Inc.", email = "[email protected]"},
{name = "Jeff Jennings", email = "[email protected]"},
{name = "Adrian Price-Whelan", email = "[email protected]"},
]
license = {text = "BSD 3-Clause License"}
description = "The content crawler that supplies Astropy's web search."
readme = {file = "README.rst", content-type = "text/x-rst"}
requires-python = ">=3.11"
classifiers = [
"Development Status :: 3 - Alpha",
"License :: OSI Approved :: BSD License",
]
dependencies = [
"lxml",
"cssselect",
"algoliasearch>=4,<5",
"aiohttp",
"async_timeout",
"PyYAML",
"pydantic",
"typer",
"more-itertools",
]
dynamic = ["version"]


[project.optional-dependencies]
dev = [
"pytest>=6.1",
"pytest-doctestplus",
"types-setuptools",
"types-PyYAML",
]

[project.urls]
Homepage = "https://github.com/jonathansick/astropy-librarian"

[project.scripts]
astropylibrarian = "astropylibrarian.cli.app:app"

[build-system]
requires = [
"setuptools>=64",
Expand All @@ -9,19 +52,35 @@ build-backend = "setuptools.build_meta"
[tool.setuptools_scm]
version_file = "astropylibrarian/_version.py"

[tool.pytest.ini_options]
doctest_plus = "enabled"

[tool.flake8]
max-line-length = 79

[tool.mypy]
disallow_untyped_defs = true
disallow_incomplete_defs = true
ignore_missing_imports = true
show_error_codes = true
strict_equality = true
warn_redundant_casts = true
warn_unreachable = true
warn_unused_ignores = true

[tool.black]
line-length = 79
target-version = ["py311"]
exclude = '''
/(
\.eggs
| \.git
| \.mypy_cache
| \.tox
| \.venv
| _build
| build
| dist
\.eggs
| \.git
| \.mypy_cache
| \.tox
| \.venv
| _build
| build
| dist
)/
'''

Expand Down
Loading

0 comments on commit a0bb8e8

Please sign in to comment.