diff --git a/src/pypdfium2/_cli/_parsers.py b/src/pypdfium2/_cli/_parsers.py index abffe4e5d..25a60e8bd 100644 --- a/src/pypdfium2/_cli/_parsers.py +++ b/src/pypdfium2/_cli/_parsers.py @@ -3,8 +3,9 @@ import os import sys -import argparse import logging +import argparse +import itertools from pathlib import Path import pypdfium2._helpers as pdfium import pypdfium2.internal as pdfium_i @@ -91,6 +92,21 @@ def get_input(args, init_forms=False, **kwargs): return pdf +# dummy more_itertools.peekable().__bool__ alternative + +def _postpeek_generator(value, iterator): + yield value + yield from iterator + +def iterator_hasvalue(iterator): + try: + first_value = next(iterator) + except StopIteration: + return False, None + else: + return True, _postpeek_generator(first_value, iterator) + + if sys.version_info >= (3, 9): from argparse import BooleanOptionalAction diff --git a/src/pypdfium2/_cli/pageobjects.py b/src/pypdfium2/_cli/pageobjects.py index 933fe0ab8..53e25b881 100644 --- a/src/pypdfium2/_cli/pageobjects.py +++ b/src/pypdfium2/_cli/pageobjects.py @@ -3,7 +3,6 @@ # TODO test-confirm filter and info params -from itertools import chain from collections import OrderedDict import pypdfium2._helpers as pdfium import pypdfium2.internal as pdfium_i @@ -13,6 +12,7 @@ add_n_digits, get_input, round_list, + iterator_hasvalue, ) @@ -83,17 +83,13 @@ def main(args): for i in args.pages: page = pdf[i] - obj_searcher = page.get_objects(args.filter, max_depth=args.max_depth) - # note, more_itertools.peekable() could handle this more elegantly - try: - first_obj = next(obj_searcher) - except StopIteration: - continue + hasvalue, obj_searcher = iterator_hasvalue( page.get_objects(args.filter, max_depth=args.max_depth) ) + if not hasvalue: continue print(f"# Page {i+1}") count = 0 - for obj in chain([first_obj], obj_searcher): + for obj in obj_searcher: pad_0 = " " * obj.level pad_1 = pad_0 + " " diff --git a/src/pypdfium2/_cli/render.py b/src/pypdfium2/_cli/render.py index b87233f2a..329d2a663 100644 --- a/src/pypdfium2/_cli/render.py +++ b/src/pypdfium2/_cli/render.py @@ -17,6 +17,7 @@ from pypdfium2._cli._parsers import ( add_input, get_input, setup_logging, + iterator_hasvalue, BooleanOptionalAction, ) @@ -288,37 +289,26 @@ def _saving_hook(self, out_path, bitmap, page, postproc_kwargs): @classmethod def postprocess(cls, src_image, bitmap, page, invert_lightness, exclude_images): dst_image = src_image - if invert_lightness: - if bitmap.format == pdfium_c.FPDFBitmap_Gray: dst_image = ~src_image else: - - if bitmap.rev_byteorder: - convert_to = cv2.COLOR_RGB2HLS - convert_from = cv2.COLOR_HLS2RGB - else: - convert_to = cv2.COLOR_BGR2HLS - convert_from = cv2.COLOR_HLS2BGR - + convert_to, convert_from = (cv2.COLOR_RGB2HLS, cv2.COLOR_HLS2RGB) if bitmap.rev_byteorder else (cv2.COLOR_BGR2HLS, cv2.COLOR_HLS2BGR) dst_image = cv2.cvtColor(dst_image, convert_to) h, l, s = cv2.split(dst_image) l = ~l dst_image = cv2.merge([h, l, s]) dst_image = cv2.cvtColor(dst_image, convert_from) - if exclude_images: - assert bitmap.format != pdfium_c.FPDFBitmap_BGRx, "Not sure how to paste with mask on {RGB,BGR}X image using cv2" + assert bitmap.format != pdfium_c.FPDFBitmap_BGRx, "Not sure how to paste with mask on {RGB,BGR}X image using cv2" # FIXME? posconv = bitmap.get_posconv(page) - image_objs = list(page.get_objects([pdfium_c.FPDF_PAGEOBJ_IMAGE], max_depth=1)) - if len(image_objs) > 0: + have_images, obj_searcher = iterator_hasvalue( page.get_objects([pdfium_c.FPDF_PAGEOBJ_IMAGE], max_depth=1) ) + if have_images: mask = np.zeros((bitmap.height, bitmap.width, 1), np.uint8) - for obj in image_objs: + for obj in obj_searcher: qpoints = np.array([posconv.to_bitmap(x, y) for x, y in obj.get_quad_points()], np.int32) cv2.fillPoly(mask, [qpoints], 1) dst_image = cv2.copyTo(src_image, mask=mask, dst=dst_image) - return dst_image