Skip to content

Commit

Permalink
CLI(renderer/pageobjects): slightly improve code style
Browse files Browse the repository at this point in the history
  • Loading branch information
mara004 committed Oct 30, 2024
1 parent 7f12cee commit 195ce71
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 30 deletions.
16 changes: 15 additions & 1 deletion src/pypdfium2/_cli/_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

import os
import sys
import argparse
import logging
import argparse
from pathlib import Path
import pypdfium2._helpers as pdfium
import pypdfium2.internal as pdfium_i
Expand Down Expand Up @@ -91,6 +91,20 @@ def get_input(args, init_forms=False, **kwargs):
return pdf


# dummy more_itertools.peekable().__bool__ alternative

def _postpeek_generator(value, iterator):
yield value; yield from iterator

def iterator_hasvalue(iterator):
try:
first_value = next(iterator)
except StopIteration:
return False, None
else:
return True, _postpeek_generator(first_value, iterator)


if sys.version_info >= (3, 9):
from argparse import BooleanOptionalAction

Expand Down
23 changes: 10 additions & 13 deletions src/pypdfium2/_cli/pageobjects.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

# TODO test-confirm filter and info params

from itertools import chain
from collections import OrderedDict
import pypdfium2._helpers as pdfium
import pypdfium2.internal as pdfium_i
Expand All @@ -13,6 +12,7 @@
add_n_digits,
get_input,
round_list,
iterator_hasvalue,
)


Expand Down Expand Up @@ -43,7 +43,7 @@ def attach(parser):
)
parser.add_argument(
"--info",
nargs = "*",
nargs = "+",
type = str.lower,
choices = INFO_PARAMS,
default = INFO_PARAMS,
Expand Down Expand Up @@ -76,24 +76,21 @@ def main(args):
if args.filter:
args.filter = [pdfium_i.ObjectTypeToConst[t] for t in args.filter]

show_pos = (PARAM_POS in args.info)
show_imageinfo = (PARAM_IMGINFO in args.info)
total_count = 0
show_pos = PARAM_POS in args.info
show_imginfo = PARAM_IMGINFO in args.info
assert show_pos or show_imginfo

total_count = 0
for i in args.pages:

page = pdf[i]
obj_searcher = page.get_objects(args.filter, max_depth=args.max_depth)
# note, more_itertools.peekable() could handle this more elegantly
try:
first_obj = next(obj_searcher)
except StopIteration:
continue
hasvalue, obj_searcher = iterator_hasvalue( page.get_objects(args.filter, max_depth=args.max_depth) )
if not hasvalue: continue

print(f"# Page {i+1}")
count = 0

for obj in chain([first_obj], obj_searcher):
for obj in obj_searcher:

pad_0 = " " * obj.level
pad_1 = pad_0 + " "
Expand All @@ -106,7 +103,7 @@ def main(args):
quad_bounds = obj.get_quad_points()
print(pad_1 + f"Quad Points: {[round_list(p, args.n_digits) for p in quad_bounds]}")

if show_imageinfo and isinstance(obj, pdfium.PdfImage):
if show_imginfo and isinstance(obj, pdfium.PdfImage):
print(pad_1 + f"Filters: {obj.get_filters()}")
metadata = obj.get_metadata()
assert (metadata.width, metadata.height) == obj.get_px_size()
Expand Down
22 changes: 6 additions & 16 deletions src/pypdfium2/_cli/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pypdfium2._cli._parsers import (
add_input, get_input,
setup_logging,
iterator_hasvalue,
BooleanOptionalAction,
)

Expand Down Expand Up @@ -288,37 +289,26 @@ def _saving_hook(self, out_path, bitmap, page, postproc_kwargs):
@classmethod
def postprocess(cls, src_image, bitmap, page, invert_lightness, exclude_images):
dst_image = src_image

if invert_lightness:

if bitmap.format == pdfium_c.FPDFBitmap_Gray:
dst_image = ~src_image
else:

if bitmap.rev_byteorder:
convert_to = cv2.COLOR_RGB2HLS
convert_from = cv2.COLOR_HLS2RGB
else:
convert_to = cv2.COLOR_BGR2HLS
convert_from = cv2.COLOR_HLS2BGR

convert_to, convert_from = (cv2.COLOR_RGB2HLS, cv2.COLOR_HLS2RGB) if bitmap.rev_byteorder else (cv2.COLOR_BGR2HLS, cv2.COLOR_HLS2BGR)
dst_image = cv2.cvtColor(dst_image, convert_to)
h, l, s = cv2.split(dst_image)
l = ~l
dst_image = cv2.merge([h, l, s])
dst_image = cv2.cvtColor(dst_image, convert_from)

if exclude_images:
assert bitmap.format != pdfium_c.FPDFBitmap_BGRx, "Not sure how to paste with mask on {RGB,BGR}X image using cv2"
assert bitmap.format != pdfium_c.FPDFBitmap_BGRx, "Not sure how to paste with mask on {RGB,BGR}X image using cv2" # FIXME?
posconv = bitmap.get_posconv(page)
image_objs = list(page.get_objects([pdfium_c.FPDF_PAGEOBJ_IMAGE], max_depth=1))
if len(image_objs) > 0:
have_images, obj_searcher = iterator_hasvalue( page.get_objects([pdfium_c.FPDF_PAGEOBJ_IMAGE], max_depth=1) )
if have_images:
mask = np.zeros((bitmap.height, bitmap.width, 1), np.uint8)
for obj in image_objs:
for obj in obj_searcher:
qpoints = np.array([posconv.to_bitmap(x, y) for x, y in obj.get_quad_points()], np.int32)
cv2.fillPoly(mask, [qpoints], 1)
dst_image = cv2.copyTo(src_image, mask=mask, dst=dst_image)

return dst_image


Expand Down

0 comments on commit 195ce71

Please sign in to comment.