diff --git a/src/pypdfium2/_cli/_parsers.py b/src/pypdfium2/_cli/_parsers.py index abffe4e5d..6ff8bdfa6 100644 --- a/src/pypdfium2/_cli/_parsers.py +++ b/src/pypdfium2/_cli/_parsers.py @@ -3,8 +3,8 @@ import os import sys -import argparse import logging +import argparse from pathlib import Path import pypdfium2._helpers as pdfium import pypdfium2.internal as pdfium_i @@ -91,6 +91,20 @@ def get_input(args, init_forms=False, **kwargs): return pdf +# dummy more_itertools.peekable().__bool__ alternative + +def _postpeek_generator(value, iterator): + yield value; yield from iterator + +def iterator_hasvalue(iterator): + try: + first_value = next(iterator) + except StopIteration: + return False, None + else: + return True, _postpeek_generator(first_value, iterator) + + if sys.version_info >= (3, 9): from argparse import BooleanOptionalAction diff --git a/src/pypdfium2/_cli/pageobjects.py b/src/pypdfium2/_cli/pageobjects.py index 933fe0ab8..7272d08f4 100644 --- a/src/pypdfium2/_cli/pageobjects.py +++ b/src/pypdfium2/_cli/pageobjects.py @@ -3,7 +3,6 @@ # TODO test-confirm filter and info params -from itertools import chain from collections import OrderedDict import pypdfium2._helpers as pdfium import pypdfium2.internal as pdfium_i @@ -13,6 +12,7 @@ add_n_digits, get_input, round_list, + iterator_hasvalue, ) @@ -43,7 +43,7 @@ def attach(parser): ) parser.add_argument( "--info", - nargs = "*", + nargs = "+", type = str.lower, choices = INFO_PARAMS, default = INFO_PARAMS, @@ -76,24 +76,21 @@ def main(args): if args.filter: args.filter = [pdfium_i.ObjectTypeToConst[t] for t in args.filter] - show_pos = (PARAM_POS in args.info) - show_imageinfo = (PARAM_IMGINFO in args.info) - total_count = 0 + show_pos = PARAM_POS in args.info + show_imginfo = PARAM_IMGINFO in args.info + assert show_pos or show_imginfo + total_count = 0 for i in args.pages: page = pdf[i] - obj_searcher = page.get_objects(args.filter, max_depth=args.max_depth) - # note, more_itertools.peekable() could handle this more elegantly - try: - first_obj = next(obj_searcher) - except StopIteration: - continue + hasvalue, obj_searcher = iterator_hasvalue( page.get_objects(args.filter, max_depth=args.max_depth) ) + if not hasvalue: continue print(f"# Page {i+1}") count = 0 - for obj in chain([first_obj], obj_searcher): + for obj in obj_searcher: pad_0 = " " * obj.level pad_1 = pad_0 + " " @@ -106,7 +103,7 @@ def main(args): quad_bounds = obj.get_quad_points() print(pad_1 + f"Quad Points: {[round_list(p, args.n_digits) for p in quad_bounds]}") - if show_imageinfo and isinstance(obj, pdfium.PdfImage): + if show_imginfo and isinstance(obj, pdfium.PdfImage): print(pad_1 + f"Filters: {obj.get_filters()}") metadata = obj.get_metadata() assert (metadata.width, metadata.height) == obj.get_px_size() diff --git a/src/pypdfium2/_cli/render.py b/src/pypdfium2/_cli/render.py index b87233f2a..329d2a663 100644 --- a/src/pypdfium2/_cli/render.py +++ b/src/pypdfium2/_cli/render.py @@ -17,6 +17,7 @@ from pypdfium2._cli._parsers import ( add_input, get_input, setup_logging, + iterator_hasvalue, BooleanOptionalAction, ) @@ -288,37 +289,26 @@ def _saving_hook(self, out_path, bitmap, page, postproc_kwargs): @classmethod def postprocess(cls, src_image, bitmap, page, invert_lightness, exclude_images): dst_image = src_image - if invert_lightness: - if bitmap.format == pdfium_c.FPDFBitmap_Gray: dst_image = ~src_image else: - - if bitmap.rev_byteorder: - convert_to = cv2.COLOR_RGB2HLS - convert_from = cv2.COLOR_HLS2RGB - else: - convert_to = cv2.COLOR_BGR2HLS - convert_from = cv2.COLOR_HLS2BGR - + convert_to, convert_from = (cv2.COLOR_RGB2HLS, cv2.COLOR_HLS2RGB) if bitmap.rev_byteorder else (cv2.COLOR_BGR2HLS, cv2.COLOR_HLS2BGR) dst_image = cv2.cvtColor(dst_image, convert_to) h, l, s = cv2.split(dst_image) l = ~l dst_image = cv2.merge([h, l, s]) dst_image = cv2.cvtColor(dst_image, convert_from) - if exclude_images: - assert bitmap.format != pdfium_c.FPDFBitmap_BGRx, "Not sure how to paste with mask on {RGB,BGR}X image using cv2" + assert bitmap.format != pdfium_c.FPDFBitmap_BGRx, "Not sure how to paste with mask on {RGB,BGR}X image using cv2" # FIXME? posconv = bitmap.get_posconv(page) - image_objs = list(page.get_objects([pdfium_c.FPDF_PAGEOBJ_IMAGE], max_depth=1)) - if len(image_objs) > 0: + have_images, obj_searcher = iterator_hasvalue( page.get_objects([pdfium_c.FPDF_PAGEOBJ_IMAGE], max_depth=1) ) + if have_images: mask = np.zeros((bitmap.height, bitmap.width, 1), np.uint8) - for obj in image_objs: + for obj in obj_searcher: qpoints = np.array([posconv.to_bitmap(x, y) for x, y in obj.get_quad_points()], np.int32) cv2.fillPoly(mask, [qpoints], 1) dst_image = cv2.copyTo(src_image, mask=mask, dst=dst_image) - return dst_image