Skip to content

Commit

Permalink
Move imports of manager_config inside the _get_manager_config function
Browse files Browse the repository at this point in the history
  • Loading branch information
NastyBoget committed Dec 20, 2023
1 parent cdf9619 commit bacf328
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 49 deletions.
89 changes: 45 additions & 44 deletions dedoc/manager_config.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,52 @@
from typing import Optional

from dedoc.attachments_handler.attachments_handler import AttachmentsHandler
from dedoc.converters.concrete_converters.binary_converter import BinaryConverter
from dedoc.converters.concrete_converters.docx_converter import DocxConverter
from dedoc.converters.concrete_converters.excel_converter import ExcelConverter
from dedoc.converters.concrete_converters.pdf_converter import PDFConverter
from dedoc.converters.concrete_converters.png_converter import PNGConverter
from dedoc.converters.concrete_converters.pptx_converter import PptxConverter
from dedoc.converters.concrete_converters.txt_converter import TxtConverter
from dedoc.converters.converter_composition import ConverterComposition
from dedoc.metadata_extractors.concrete_metadata_extractors.base_metadata_extractor import BaseMetadataExtractor
from dedoc.metadata_extractors.concrete_metadata_extractors.docx_metadata_extractor import DocxMetadataExtractor
from dedoc.metadata_extractors.concrete_metadata_extractors.image_metadata_extractor import ImageMetadataExtractor
from dedoc.metadata_extractors.concrete_metadata_extractors.note_metadata_extarctor import NoteMetadataExtractor
from dedoc.metadata_extractors.concrete_metadata_extractors.pdf_metadata_extractor import PdfMetadataExtractor
from dedoc.metadata_extractors.metadata_extractor_composition import MetadataExtractorComposition
from dedoc.readers.archive_reader.archive_reader import ArchiveReader
from dedoc.readers.csv_reader.csv_reader import CSVReader
from dedoc.readers.docx_reader.docx_reader import DocxReader
from dedoc.readers.email_reader.email_reader import EmailReader
from dedoc.readers.excel_reader.excel_reader import ExcelReader
from dedoc.readers.html_reader.html_reader import HtmlReader
from dedoc.readers.json_reader.json_reader import JsonReader
from dedoc.readers.mhtml_reader.mhtml_reader import MhtmlReader
from dedoc.readers.note_reader.note_reader import NoteReader
from dedoc.readers.pdf_reader.pdf_auto_reader.pdf_auto_reader import PdfAutoReader
from dedoc.readers.pdf_reader.pdf_image_reader.pdf_image_reader import PdfImageReader
from dedoc.readers.pdf_reader.pdf_txtlayer_reader.pdf_tabby_reader import PdfTabbyReader
from dedoc.readers.pdf_reader.pdf_txtlayer_reader.pdf_txtlayer_reader import PdfTxtlayerReader
from dedoc.readers.pptx_reader.pptx_reader import PptxReader
from dedoc.readers.reader_composition import ReaderComposition
from dedoc.readers.txt_reader.raw_text_reader import RawTextReader
from dedoc.structure_constructors.concrete_structure_constructors.linear_constructor import LinearConstructor
from dedoc.structure_constructors.concrete_structure_constructors.tree_constructor import TreeConstructor
from dedoc.structure_constructors.structure_constructor_composition import StructureConstructorComposition
from dedoc.structure_extractors.concrete_structure_extractors.classifying_law_structure_extractor import ClassifyingLawStructureExtractor
from dedoc.structure_extractors.concrete_structure_extractors.default_structure_extractor import DefaultStructureExtractor
from dedoc.structure_extractors.concrete_structure_extractors.diploma_structure_extractor import DiplomaStructureExtractor
from dedoc.structure_extractors.concrete_structure_extractors.foiv_law_structure_extractor import FoivLawStructureExtractor
from dedoc.structure_extractors.concrete_structure_extractors.law_structure_excractor import LawStructureExtractor
from dedoc.structure_extractors.concrete_structure_extractors.tz_structure_extractor import TzStructureExtractor
from dedoc.structure_extractors.structure_extractor_composition import StructureExtractorComposition

"""MANAGER SETTINGS"""


def _get_manager_config(config: dict) -> dict:
"""
Imports are here in order not to do all of them when someone does `import dedoc`
"""
from dedoc.attachments_handler.attachments_handler import AttachmentsHandler
from dedoc.converters.concrete_converters.binary_converter import BinaryConverter
from dedoc.converters.concrete_converters.docx_converter import DocxConverter
from dedoc.converters.concrete_converters.excel_converter import ExcelConverter
from dedoc.converters.concrete_converters.pdf_converter import PDFConverter
from dedoc.converters.concrete_converters.png_converter import PNGConverter
from dedoc.converters.concrete_converters.pptx_converter import PptxConverter
from dedoc.converters.concrete_converters.txt_converter import TxtConverter
from dedoc.converters.converter_composition import ConverterComposition
from dedoc.metadata_extractors.concrete_metadata_extractors.base_metadata_extractor import BaseMetadataExtractor
from dedoc.metadata_extractors.concrete_metadata_extractors.docx_metadata_extractor import DocxMetadataExtractor
from dedoc.metadata_extractors.concrete_metadata_extractors.image_metadata_extractor import ImageMetadataExtractor
from dedoc.metadata_extractors.concrete_metadata_extractors.note_metadata_extarctor import NoteMetadataExtractor
from dedoc.metadata_extractors.concrete_metadata_extractors.pdf_metadata_extractor import PdfMetadataExtractor
from dedoc.metadata_extractors.metadata_extractor_composition import MetadataExtractorComposition
from dedoc.readers.archive_reader.archive_reader import ArchiveReader
from dedoc.readers.csv_reader.csv_reader import CSVReader
from dedoc.readers.docx_reader.docx_reader import DocxReader
from dedoc.readers.email_reader.email_reader import EmailReader
from dedoc.readers.excel_reader.excel_reader import ExcelReader
from dedoc.readers.html_reader.html_reader import HtmlReader
from dedoc.readers.json_reader.json_reader import JsonReader
from dedoc.readers.mhtml_reader.mhtml_reader import MhtmlReader
from dedoc.readers.note_reader.note_reader import NoteReader
from dedoc.readers.pdf_reader.pdf_auto_reader.pdf_auto_reader import PdfAutoReader
from dedoc.readers.pdf_reader.pdf_image_reader.pdf_image_reader import PdfImageReader
from dedoc.readers.pdf_reader.pdf_txtlayer_reader.pdf_tabby_reader import PdfTabbyReader
from dedoc.readers.pdf_reader.pdf_txtlayer_reader.pdf_txtlayer_reader import PdfTxtlayerReader
from dedoc.readers.pptx_reader.pptx_reader import PptxReader
from dedoc.readers.reader_composition import ReaderComposition
from dedoc.readers.txt_reader.raw_text_reader import RawTextReader
from dedoc.structure_constructors.concrete_structure_constructors.linear_constructor import LinearConstructor
from dedoc.structure_constructors.concrete_structure_constructors.tree_constructor import TreeConstructor
from dedoc.structure_constructors.structure_constructor_composition import StructureConstructorComposition
from dedoc.structure_extractors.concrete_structure_extractors.classifying_law_structure_extractor import ClassifyingLawStructureExtractor
from dedoc.structure_extractors.concrete_structure_extractors.default_structure_extractor import DefaultStructureExtractor
from dedoc.structure_extractors.concrete_structure_extractors.diploma_structure_extractor import DiplomaStructureExtractor
from dedoc.structure_extractors.concrete_structure_extractors.foiv_law_structure_extractor import FoivLawStructureExtractor
from dedoc.structure_extractors.concrete_structure_extractors.law_structure_excractor import LawStructureExtractor
from dedoc.structure_extractors.concrete_structure_extractors.tz_structure_extractor import TzStructureExtractor
from dedoc.structure_extractors.structure_extractor_composition import StructureExtractorComposition

converters = [
DocxConverter(config=config),
ExcelConverter(config=config),
Expand Down
7 changes: 2 additions & 5 deletions dedoc/train_dataset/train_dataset_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@
import numpy as np
from PIL.Image import Image

from dedoc.data_structures.line_with_meta import LineWithMeta
from dedoc.readers.pdf_reader.data_classes.page_with_bboxes import PageWithBBox


def __to_pil(image: np.ndarray) -> Image:
return PIL.Image.fromarray(image)
Expand All @@ -33,7 +30,7 @@ def _get_images_path(config: dict, document_name: str) -> str:
return os.path.join(get_path_original_documents(config), document_name.split(".")[0])


def save_page_with_bbox(page: PageWithBBox, document_name: str, *, config: dict) -> None:
def save_page_with_bbox(page: "PageWithBBox", document_name: str, *, config: dict) -> None: # noqa
__create_images_path(config)
uid = document_name
images_path = _get_images_path(config=config, document_name=document_name)
Expand Down Expand Up @@ -63,7 +60,7 @@ def _convert2zip(config: dict, document_name: str) -> str:
return archive_filename


def save_line_with_meta(lines: List[LineWithMeta], original_document: str, *, config: dict) -> None:
def save_line_with_meta(lines: List["LineWithMeta"], original_document: str, *, config: dict) -> None: # noqa

__create_images_path(config)
if original_document.endswith((".jpg", ".png", ".pdf")):
Expand Down

0 comments on commit bacf328

Please sign in to comment.