From 6ddcc2efb083b6774265b7849bb5f48194fde11c Mon Sep 17 00:00:00 2001 From: aslehigh <30606063+aslehigh@users.noreply.github.com> Date: Fri, 29 May 2020 10:17:59 -0400 Subject: [PATCH] Decode command output to UTF-8 strings. (#40) Co-authored-by: Adam Lehigh --- pypdftk.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/pypdftk.py b/pypdftk.py index 3f7a994..7925c8c 100644 --- a/pypdftk.py +++ b/pypdftk.py @@ -41,7 +41,7 @@ def check_output(*popenargs, **kwargs): def run_command(command, shell=False): ''' run a system command and yield output ''' p = check_output(command, shell=shell) - return p.split(b'\n') + return p.decode("utf-8").splitlines() try: run_command([PDFTK_PATH]) @@ -52,8 +52,8 @@ def run_command(command, shell=False): def get_num_pages(pdf_path): ''' return number of pages in a given PDF file ''' for line in run_command([PDFTK_PATH, pdf_path, 'dump_data']): - if line.lower().startswith(b'numberofpages'): - return int(line.split(b':')[1]) + if line.lower().startswith('numberofpages'): + return int(line.split(':')[1]) return 0 @@ -89,11 +89,7 @@ def dump_data_fields(pdf_path): Return list of dicts of all fields in a PDF. ''' cmd = "%s %s dump_data_fields" % (PDFTK_PATH, pdf_path) - # Either can return strings with : - # field_data = map(lambda x: x.decode("utf-8").split(': ', 1), run_command(cmd, True)) - # Or return bytes with : (will break tests) - # field_data = map(lambda x: x.split(b': ', 1), run_command(cmd, True)) - field_data = map(lambda x: x.decode("utf-8").split(': ', 1), run_command(cmd, True)) + field_data = map(lambda x: x.split(': ', 1), run_command(cmd, True)) fields = [list(group) for k, group in itertools.groupby(field_data, lambda x: len(x) == 1) if not k] return [dict(f) for f in fields]