Skip to content

Commit

Permalink
fix code style
Browse files Browse the repository at this point in the history
  • Loading branch information
Nikita Shevtsov committed Oct 20, 2023
1 parent 10fd82c commit 8f93644
Show file tree
Hide file tree
Showing 8 changed files with 22 additions and 29 deletions.
6 changes: 3 additions & 3 deletions dedoc/attachments_handler/attachments_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ def _handle_attachments(self, document: UnstructuredDocument, parameters: dict)
def __get_empty_document(self, document_parser: "DedocManager", attachment: AttachedFile, parameters: dict) -> ParsedDocument: # noqa
attachment_dir, attachment_name = os.path.split(attachment.get_filename_in_path())
metadata = document_parser.document_metadata_extractor.extract_metadata(directory=attachment_dir,
filename=attachment_name, converted_filename=attachment_name,
original_filename=attachment.get_original_filename(),
parameters=parameters)
filename=attachment_name, converted_filename=attachment_name,
original_filename=attachment.get_original_filename(),
parameters=parameters)
metadata = DocumentMetadata(**metadata)
return ParsedDocument(content=get_empty_content(), metadata=metadata)
16 changes: 7 additions & 9 deletions dedoc/metadata_extractors/abstract_metadata_extractor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from abc import ABC, abstractmethod
from typing import Optional, Dict

from dedoc.data_structures.unstructured_document import UnstructuredDocument
from typing import Optional


class AbstractMetadataExtractor(ABC):
Expand All @@ -24,12 +22,12 @@ def can_extract(self,

@abstractmethod
def extract_metadata(self,
directory: str,
filename: str,
converted_filename: str,
original_filename: str,
parameters: Optional[dict] = None,
other_fields: Optional[dict] = None) -> dict:
directory: str,
filename: str,
converted_filename: str,
original_filename: str,
parameters: Optional[dict] = None,
other_fields: Optional[dict] = None) -> dict:
"""
Extract metadata from file if possible, i.e. method :meth:`can_extract` returned True.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import os
from base64 import b64encode
from typing import Optional, Dict
from typing import Optional

from dedoc.data_structures.unstructured_document import UnstructuredDocument
from dedoc.metadata_extractors.abstract_metadata_extractor import AbstractMetadataExtractor
from dedoc.utils.utils import get_file_mime_type

Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import os
from datetime import datetime
from typing import Optional, Dict
from typing import Optional

import docx
from docx.opc.exceptions import PackageNotFoundError

from dedoc.data_structures.unstructured_document import UnstructuredDocument
from dedoc.metadata_extractors.concrete_metadata_extractors.base_metadata_extractor import BaseMetadataExtractor


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from PIL import ExifTags, Image
from dateutil import parser

from dedoc.data_structures.unstructured_document import UnstructuredDocument
from dedoc.metadata_extractors.concrete_metadata_extractors.base_metadata_extractor import BaseMetadataExtractor


Expand Down Expand Up @@ -67,12 +66,12 @@ def can_extract(self,
return filename.lower().endswith((".png", ".jpg", ".jpeg"))

def extract_metadata(self,
directory: str,
filename: str,
converted_filename: str,
original_filename: str,
parameters: dict = None,
other_fields: Optional[dict] = None) -> dict:
directory: str,
filename: str,
converted_filename: str,
original_filename: str,
parameters: dict = None,
other_fields: Optional[dict] = None) -> dict:
"""
Add the predefined list of metadata for images.
Look to the :meth:`~dedoc.metadata_extractors.AbstractMetadataExtractor.extract_metadata` documentation to get the information about parameters.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import os
import pickle
from typing import Optional, Dict
from typing import Optional

from dedoc.common.exceptions.bad_file_error import BadFileFormatError
from dedoc.data_structures.unstructured_document import UnstructuredDocument
from dedoc.metadata_extractors.concrete_metadata_extractors.base_metadata_extractor import BaseMetadataExtractor


Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import logging
import os
from typing import Optional, Dict
from typing import Optional

from PyPDF2 import PdfFileReader
from PyPDF2.utils import PdfReadError

from dedoc.data_structures.unstructured_document import UnstructuredDocument
from dedoc.metadata_extractors.concrete_metadata_extractors.base_metadata_extractor import BaseMetadataExtractor
from dedoc.utils.utils import convert_datetime

Expand Down Expand Up @@ -71,7 +70,7 @@ def extract_metadata(self,
Look to the :meth:`~dedoc.metadata_extractors.AbstractMetadataExtractor.extract_metadata` documentation to get the information about parameters.
"""
result = super().extract_metadata(directory=directory, filename=filename, converted_filename=converted_filename,
original_filename=original_filename, parameters=parameters, other_fields=other_fields)
original_filename=original_filename, parameters=parameters, other_fields=other_fields)
path = os.path.join(directory, filename)
pdf_fields = self._get_pdf_info(path)
if len(pdf_fields) > 0:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, List, Optional
from typing import List, Optional

from dedoc.metadata_extractors.abstract_metadata_extractor import AbstractMetadataExtractor

Expand Down

0 comments on commit 8f93644

Please sign in to comment.