-
-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refatora task de agregação de diários (#81)
- **Move interfaces para seus respectivos recursos** - **Remove imports não usados** - **Adiciona tasks.run_task** - **Modifica ordem de comandos no Dockerfile** - **Muda versão do Python para 3.9 no Dockerfile** - **Otimiza uso de memória na task create_aggregates**
- Loading branch information
Showing
37 changed files
with
631 additions
and
442 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
from .interfaces import TextExtractorInterface | ||
from .text_extraction import ApacheTikaTextExtractor, create_apache_tika_text_extraction |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
import abc | ||
|
||
class TextExtractorInterface(abc.ABC): | ||
@abc.abstractmethod | ||
def extract_text(self, filepath: str) -> str: | ||
""" | ||
Extract the text from the given file | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
from .interfaces import DatabaseInterface | ||
from .postgresql import PostgreSQL, create_database_interface |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
from typing import Dict, Iterable, Tuple | ||
import abc | ||
|
||
|
||
class DatabaseInterface(abc.ABC): | ||
""" | ||
Interface to abstract the iteraction with the database storing data used by the | ||
tasks | ||
""" | ||
|
||
@abc.abstractmethod | ||
def _commit_changes(self, command: str, data: Dict) -> None: | ||
""" | ||
Make a change in the database and commit it | ||
""" | ||
|
||
@abc.abstractmethod | ||
def select(self, command: str) -> Iterable[Tuple]: | ||
""" | ||
Select entries from the database | ||
""" | ||
|
||
@abc.abstractmethod | ||
def insert(self, command: str, data: Dict) -> None: | ||
""" | ||
Insert entries into the database | ||
""" | ||
|
||
@abc.abstractmethod | ||
def update(self, command: str, data: Dict) -> None: | ||
""" | ||
Update entries from the database | ||
""" | ||
|
||
@abc.abstractmethod | ||
def delete(self, command: str, data: Dict) -> None: | ||
""" | ||
Delete entries from the database | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
from .interfaces import IndexInterface | ||
from .opensearch import create_index_interface |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
from typing import Dict, Iterable | ||
import abc | ||
|
||
|
||
class IndexInterface(abc.ABC): | ||
""" | ||
Interface to abstract the interaction with the index system | ||
""" | ||
|
||
@abc.abstractmethod | ||
def create_index(self, index_name: str, body: Dict) -> None: | ||
""" | ||
Create the index used by the application | ||
""" | ||
|
||
@abc.abstractmethod | ||
def refresh_index(self, index_name: str) -> None: | ||
""" | ||
Refreshes the index to make it up-to-date for future searches | ||
""" | ||
|
||
@abc.abstractmethod | ||
def index_document( | ||
self, document: Dict, document_id: str, index: str, refresh: bool | ||
) -> None: | ||
""" | ||
Upload document to the index | ||
""" | ||
|
||
@abc.abstractmethod | ||
def search(self, query: Dict, index: str) -> Dict: | ||
""" | ||
Searches the index with the provided query | ||
""" | ||
|
||
@abc.abstractmethod | ||
def paginated_search( | ||
self, query: Dict, index: str, keep_alive: str | ||
) -> Iterable[Dict]: | ||
""" | ||
Searches the index with the provided query, with pagination | ||
""" | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,4 @@ | ||
from .__main__ import ( | ||
is_debug_enabled, | ||
enable_debug_if_necessary, | ||
start_to_process_pending_gazettes, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
from .digital_ocean_spaces import DigitalOceanSpaces, create_storage_interface | ||
from .interfaces import StorageInterface |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
from typing import Union | ||
from pathlib import Path | ||
import abc | ||
from io import BytesIO | ||
|
||
|
||
class StorageInterface(abc.ABC): | ||
""" | ||
Interface to abstract the interaction with the object store system. | ||
""" | ||
|
||
@abc.abstractmethod | ||
def get_file(self, file_to_be_downloaded: Union[str, Path], destination) -> None: | ||
""" | ||
Download the given file key in the destination on the host | ||
""" | ||
|
||
@abc.abstractmethod | ||
def upload_content(self, file_key: str, content_to_be_uploaded: Union[str, BytesIO]) -> None: | ||
""" | ||
Upload the given content to the destination on the host | ||
""" | ||
|
||
@abc.abstractmethod | ||
def copy_file(self, source_file_key: str, destination_file_key: str) -> None: | ||
""" | ||
Copy the given source file to the destination place on the host | ||
""" | ||
|
||
@abc.abstractmethod | ||
def delete_file(self, file_key: str) -> None: | ||
""" | ||
Delete a file on the host. | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,23 @@ | ||
from .create_index import create_gazettes_index, create_themed_excerpts_index | ||
from .create_aggregates_table import create_aggregates_table | ||
from .gazette_excerpts_embedding_reranking import embedding_rerank_excerpts | ||
from .gazette_excerpts_entities_tagging import tag_entities_in_excerpts | ||
from .gazette_text_extraction import extract_text_from_gazettes | ||
from .gazette_themed_excerpts_extraction import extract_themed_excerpts_from_gazettes | ||
from .gazette_themes_listing import get_themes | ||
from .gazette_txt_to_xml import create_aggregates | ||
from .interfaces import ( | ||
DatabaseInterface, | ||
StorageInterface, | ||
IndexInterface, | ||
TextExtractorInterface, | ||
) | ||
from .list_gazettes_to_be_processed import get_gazettes_to_be_processed | ||
from .list_territories import get_territories | ||
from importlib import import_module | ||
|
||
|
||
AVAILABLE_TASKS = { | ||
"create_aggregates": "tasks.gazette_txt_to_xml", | ||
"create_gazettes_index": "tasks.create_index", | ||
"create_aggregates_table": "tasks.create_aggregates_table", | ||
"create_themed_excerpts_index": "tasks.create_index", | ||
"embedding_rerank_excerpts": "tasks.gazette_excerpts_embedding_reranking", | ||
"extract_text_from_gazettes": "tasks.gazette_text_extraction", | ||
"extract_themed_excerpts_from_gazettes": "tasks.gazette_themed_excerpts_extraction", | ||
"get_gazettes_to_be_processed": "tasks.list_gazettes_to_be_processed", | ||
"get_themes": "tasks.gazette_themes_listing", | ||
"get_territories": "tasks.list_territories", | ||
"tag_entities_in_excerpts": "tasks.gazette_excerpts_entities_tagging", | ||
} | ||
|
||
|
||
def run_task(task_name: str, *args, **kwargs): | ||
module = AVAILABLE_TASKS[task_name] | ||
mod = import_module(module) | ||
task = getattr(mod, task_name) | ||
return task(*args, **kwargs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.