Skip to content

Commit

Permalink
Fix tests
Browse files Browse the repository at this point in the history
  • Loading branch information
NastyBoget committed May 15, 2024
1 parent 8d5c2b3 commit 676ae6e
Show file tree
Hide file tree
Showing 9 changed files with 23 additions and 21 deletions.
6 changes: 3 additions & 3 deletions dedoc/attachments_extractors/abstract_attachment_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ def __init__(self, *, config: Optional[dict] = None, recognized_extensions: Opti
"""
self.config = {} if config is None else config
self.logger = self.config.get("logger", logging.getLogger())
self.__recognized_extensions = {} if recognized_extensions is None else recognized_extensions
self.__recognized_mimes = {} if recognized_mimes is None else recognized_mimes
self._recognized_extensions = {} if recognized_extensions is None else recognized_extensions
self._recognized_mimes = {} if recognized_mimes is None else recognized_mimes

def can_extract(self,
file_path: Optional[str] = None,
Expand All @@ -40,7 +40,7 @@ def can_extract(self,
:return: the indicator of possibility to get attachments of this file
"""
mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension)
return extension.lower() in self.__recognized_extensions or mime in self.__recognized_mimes
return extension.lower() in self._recognized_extensions or mime in self._recognized_mimes

@abstractmethod
def extract(self, file_path: str, parameters: Optional[dict] = None) -> List[AttachedFile]:
Expand Down
6 changes: 3 additions & 3 deletions dedoc/converters/concrete_converters/abstract_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ def __init__(self, *, config: Optional[dict] = None, converted_extensions: Optio
self.period_checking = 0.05
self.config = {} if config is None else config
self.logger = self.config.get("logger", logging.getLogger())
self.__converted_extensions = {} if converted_extensions is None else converted_extensions
self.__converted_mimes = {} if converted_mimes is None else converted_mimes
self._converted_extensions = {} if converted_extensions is None else converted_extensions
self._converted_mimes = {} if converted_mimes is None else converted_mimes

def can_convert(self,
file_path: Optional[str] = None,
Expand All @@ -41,7 +41,7 @@ def can_convert(self,
:return: the indicator of possibility to convert this file
"""
mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension)
return extension.lower() in self.__converted_extensions or mime in self.__converted_mimes
return extension.lower() in self._converted_extensions or mime in self._converted_mimes

@abstractmethod
def convert(self, file_path: str, parameters: Optional[dict] = None) -> str:
Expand Down
6 changes: 3 additions & 3 deletions dedoc/metadata_extractors/abstract_metadata_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ def __init__(self, *, config: Optional[dict] = None, recognized_extensions: Opti
"""
self.config = {} if config is None else config
self.logger = self.config.get("logger", logging.getLogger())
self.__recognized_extensions = {} if recognized_extensions is None else recognized_extensions
self.__recognized_mimes = {} if recognized_mimes is None else recognized_mimes
self._recognized_extensions = {} if recognized_extensions is None else recognized_extensions
self._recognized_mimes = {} if recognized_mimes is None else recognized_mimes

def can_extract(self,
file_path: str,
Expand All @@ -44,7 +44,7 @@ def can_extract(self,
file_dir, file_name, converted_filename, original_filename = self._get_names(file_path, converted_filename, original_filename)
converted_file_path = os.path.join(file_dir, converted_filename)
mime, extension = get_mime_extension(file_path=converted_file_path, mime=mime, extension=extension)
return extension.lower() in self.__recognized_extensions or mime in self.__recognized_mimes
return extension.lower() in self._recognized_extensions or mime in self._recognized_mimes

@abstractmethod
def extract(self,
Expand Down
6 changes: 3 additions & 3 deletions dedoc/readers/base_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ def __init__(self, *, config: Optional[dict] = None, recognized_extensions: Opti
"""
self.config = {} if config is None else config
self.logger = self.config.get("logger", logging.getLogger())
self.__recognized_extensions = {} if recognized_extensions is None else recognized_extensions
self.__recognized_mimes = {} if recognized_mimes is None else recognized_mimes
self._recognized_extensions = {} if recognized_extensions is None else recognized_extensions
self._recognized_mimes = {} if recognized_mimes is None else recognized_mimes

def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None, extension: Optional[str] = None, parameters: Optional[dict] = None) -> bool:
"""
Expand All @@ -40,7 +40,7 @@ def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None,
:return: True if this reader can handle the file, False otherwise
"""
mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension)
return extension.lower() in self.__recognized_extensions or mime in self.__recognized_mimes
return extension.lower() in self._recognized_extensions or mime in self._recognized_mimes

@abstractmethod
def read(self, file_path: str, parameters: Optional[dict] = None) -> UnstructuredDocument:
Expand Down
4 changes: 2 additions & 2 deletions dedoc/readers/email_reader/email_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None,
mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension)
# this code differs from BaseReader because .eml and .mhtml files have the same mime type
if extension:
return extension.lower() in self.__recognized_extensions
return mime in self.__recognized_mimes
return extension.lower() in self._recognized_extensions
return mime in self._recognized_mimes

def read(self, file_path: str, parameters: Optional[dict] = None) -> UnstructuredDocument:
"""
Expand Down
4 changes: 2 additions & 2 deletions dedoc/readers/mhtml_reader/mhtml_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None,
mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension)
# this code differs from BaseReader because .eml and .mhtml files have the same mime type
if extension:
return extension.lower() in self.__recognized_extensions
return mime in self.__recognized_mimes
return extension.lower() in self._recognized_extensions
return mime in self._recognized_mimes

def read(self, file_path: str, parameters: Optional[dict] = None) -> UnstructuredDocument:
"""
Expand Down
2 changes: 1 addition & 1 deletion dedoc/readers/pdf_reader/pdf_base_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def _process_one_page(self, image: np.ndarray, parameters: ParametersForParseDoc

def _get_images(self, path: str, page_from: int, page_to: int) -> Iterator[np.ndarray]:
mime = get_file_mime_type(path)
mime = get_file_mime_by_content(path) if mime not in self.__recognized_mimes else mime
mime = get_file_mime_by_content(path) if mime not in self._recognized_mimes else mime
if mime in mimes.pdf_like_format:
yield from self._split_pdf2image(path, page_from, page_to)
elif mime in mimes.image_like_format or path.lower().endswith(tuple(extensions.image_like_format)):
Expand Down
4 changes: 2 additions & 2 deletions dedoc/readers/txt_reader/raw_text_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ def can_read(self, file_path: Optional[str] = None, mime: Optional[str] = None,
mime, extension = get_mime_extension(file_path=file_path, mime=mime, extension=extension)
# this code differs from BaseReader because other formats can have text/plain mime type
if extension:
return extension.lower() in self.__recognized_extensions
return mime in self.__recognized_mimes
return extension.lower() in self._recognized_extensions
return mime in self._recognized_mimes

def read(self, file_path: str, parameters: Optional[dict] = None) -> UnstructuredDocument:
"""
Expand Down
6 changes: 4 additions & 2 deletions tests/run_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@
def run_test_group(group_name: str) -> None:
runner = CustomTextTestRunner(verbosity=2)
loader = unittest.TestLoader()
test_suite = loader.discover(os.path.join(os.path.dirname(os.path.abspath(__file__)), group_name + "_tests"), pattern=f"{TEST_PREFIX}*.py")
runner.run(test_suite)
test_suite = loader.discover(os.path.join(os.path.dirname(os.path.abspath(__file__)), f"{group_name}_tests"), pattern=f"{TEST_PREFIX}*.py")
result = runner.run(test_suite)
if len(result.errors) > 0 or len(result.failures) > 0:
exit(1)


if __name__ == "__main__":
Expand Down

0 comments on commit 676ae6e

Please sign in to comment.