Skip to content

Commit

Permalink
[pre-commit.ci lite] apply automatic fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
pre-commit-ci-lite[bot] authored Feb 4, 2025
1 parent 2c7489a commit 2f10564
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 15 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -309,4 +309,4 @@ tests/example2.*
# Client data
paperqa/clients/client_data/retractions.csv

rag-qa-benchmarking/
rag-qa-benchmarking/
25 changes: 14 additions & 11 deletions paperqa/agents/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ async def searcher(self) -> Searcher:
index.reload()
self._searcher = index.searcher()
return self._searcher

@property
async def writer(self) -> IndexWriter:
if not self._writer:
Expand Down Expand Up @@ -277,7 +277,7 @@ async def filecheck(self, filename: str, body: str | None = None) -> bool:
async def mark_failed_document(self, path: str | os.PathLike) -> None:
(await self.index_files)[str(path)] = FAILED_DOCUMENT_ADD_ID
self.changed = True

async def release_lock(self) -> None:
"""Remove any stale lock files from the index metadata directory."""
index_meta_dir = pathlib.Path(str(await self.index_filename))
Expand All @@ -286,9 +286,7 @@ async def release_lock(self) -> None:
lock_file.unlink()
logger.info(f"Removed stale lock file: {lock_file}")
except Exception as ex:
logger.exception(
f"Could not remove stale lock file: {lock_file}: {ex}"
)
logger.exception(f"Could not remove stale lock file: {lock_file}: {ex}")

async def add_document(
self,
Expand Down Expand Up @@ -345,7 +343,7 @@ async def _add_document() -> None:
f" within {lock_acquisition_max_retries} attempts."
)
raise

async def commit(self) -> None:
"""Commit all pending changes to the index."""
if self._writer:
Expand All @@ -354,7 +352,6 @@ async def commit(self) -> None:
self._searcher = None
self._writer = None


@staticmethod
@retry(
stop=stop_after_attempt(1000),
Expand Down Expand Up @@ -484,6 +481,7 @@ async def maybe_get_manifest(

FAILED_DOCUMENT_ADD_ID = "ERROR"


def get_manifest_kwargs(
manifest: dict[str, Any], manifest_fallback_location: str, file_location: str
) -> dict[str, Any]:
Expand All @@ -495,7 +493,10 @@ def get_manifest_kwargs(
return manifest_entry.model_dump()
return {}


processed = 0


async def process_file(
rel_file_path: anyio.Path,
search_index: SearchIndex,
Expand All @@ -505,7 +506,7 @@ async def process_file(
progress_bar_update: Callable[[], Any] | None = None,
) -> None:
global processed

abs_file_path = (
pathlib.Path(settings.agent.index.paper_directory).absolute() / rel_file_path
)
Expand All @@ -521,7 +522,9 @@ async def process_file(
if not await search_index.filecheck(filename=file_location):
logger.info(f"New file to index: {file_location}...")

manifest_kwargs = get_manifest_kwargs(manifest, manifest_fallback_location, file_location)
manifest_kwargs = get_manifest_kwargs(
manifest, manifest_fallback_location, file_location
)

tmp_docs = Docs()
try:
Expand Down Expand Up @@ -560,13 +563,13 @@ async def process_file(
},
document=tmp_docs,
)

processed += 1
if processed == settings.agent.index.concurrency:
await search_index.save_index()
logger.info(f"Saved index after processing {processed} files.")
processed = 0

logger.info(f"Complete ({title}).")

# Update progress bar for either a new or previously indexed file
Expand Down
3 changes: 1 addition & 2 deletions paperqa/readers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import asyncio
import os
from math import ceil
from pathlib import Path
Expand All @@ -20,8 +21,6 @@
from paperqa.utils import ImpossibleParsingError
from paperqa.version import __version__ as pqa_version

import asyncio


def parse_pdf_to_pages(
path: str | os.PathLike, page_size_limit: int | None = None
Expand Down
2 changes: 1 addition & 1 deletion tests/test_agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ async def crashing_aadd(*args, **kwargs) -> str | None:
) as mock_aadd,
):
index = await get_directory_index(settings=agent_test_settings)

assert len(await index.index_files) == num_source_files
assert (
mock_aadd.await_count != num_source_files
Expand Down

0 comments on commit 2f10564

Please sign in to comment.