From 9235cd5c007a4fe83dffc7122c06286dfff8f029 Mon Sep 17 00:00:00 2001 From: sunveil <mikhailov@icc.ru> Date: Tue, 21 Nov 2023 16:04:30 +0800 Subject: [PATCH] Fix tabby page slice parameters --- .../pdf_reader/pdf_txtlayer_reader/pdf_tabby_reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_tabby_reader.py b/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_tabby_reader.py index c7b722e8..972cc37c 100644 --- a/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_tabby_reader.py +++ b/dedoc/readers/pdf_reader/pdf_txtlayer_reader/pdf_tabby_reader.py @@ -118,12 +118,12 @@ def __extract(self, path: str, parameters: dict, warnings: list)\ # in java tabby reader page numeration starts with 1, end_page is included first_tabby_page = first_page + 1 if first_page is not None else 1 - last_tabby_page = None if last_page is not None and last_page > page_count else last_page + last_tabby_page = page_count if (last_page is None) or (last_page is not None and last_page > page_count) else last_page self.logger.info(f"Reading PDF pages from {first_tabby_page} to {last_tabby_page}") document = self.__process_pdf(path=path, start_page=first_tabby_page, end_page=last_tabby_page) pages = document.get("pages", []) - for page in pages[first_page:last_page]: + for page in pages: page_lines = self.__get_lines_with_location(page, file_hash) if page_lines: all_lines.extend(page_lines)