Skip to content

Commit

Permalink
Final fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Ansh5461 committed Apr 18, 2024
1 parent 48d9abe commit f7b8f74
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 103 deletions.
3 changes: 1 addition & 2 deletions querent/common/types/querent_event.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from typing import Any
from enum import Enum


class EventType(Enum):
class EventType:
Graph = "Graph"
Vector = "Vector"
Terminate="Terminate"
Expand Down
4 changes: 2 additions & 2 deletions querent/core/base_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ async def _listen_for_state_changes(self):
if new_state.payload == "Terminate":
break
new_state = {
"event_type": str(new_state.event_type.value),
"event_type": str(new_state.event_type),
"timestamp": new_state.timestamp,
"payload": new_state.payload,
"file": new_state.file,
Expand Down Expand Up @@ -221,7 +221,7 @@ async def _inner_worker():
none_counter = 0
while not self.termination_event.is_set():
retries = 0
await asyncio.sleep(30)
await asyncio.sleep(5)
data = await self.input_queue.get()
try:
if isinstance(data, IngestedMessages):
Expand Down
6 changes: 2 additions & 4 deletions querent/ingestors/pdfs/pdf_ingestor_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,20 +65,19 @@ async def ingest(
data=None,
error=None,
doc_source=chunk_bytes.doc_source,
is_token_stream=True,
)
collected_bytes += chunk_bytes.data
except Exception as e:
# at the queue level, we can sample out the error
yield IngestedTokens(file=current_file, data=None, error=f"Exception: {e}", doc_source=chunk_bytes.doc_source, is_token_stream=True)
yield IngestedTokens(file=current_file, data=None, error=f"Exception: {e}", doc_source=chunk_bytes.doc_source)
finally:
# process the last file
try:
async for page_text in self.extract_and_process_pdf(
CollectedBytes(file=current_file, data=collected_bytes), chunk_bytes.doc_source
):
yield page_text
yield IngestedTokens(file=current_file, data=None, error=None, doc_source=chunk_bytes.doc_source, is_token_stream=True)
yield IngestedTokens(file=current_file, data=None, error=None, doc_source=chunk_bytes.doc_source)
except Exception as exc:
yield IngestedTokens(
file=current_file,
Expand Down Expand Up @@ -109,7 +108,6 @@ async def extract_and_process_pdf(
data=processed_text,
error=collected_bytes.error,
doc_source=doc_source,
is_token_stream=True,
)
# async for image_result in self.extract_images_and_ocr(
# page,
Expand Down
95 changes: 0 additions & 95 deletions querent/storage/s3-data-management.py

This file was deleted.

0 comments on commit f7b8f74

Please sign in to comment.