diff --git a/pdfplumber/utils.py b/pdfplumber/utils.py index 8aafec12..be623e78 100644 --- a/pdfplumber/utils.py +++ b/pdfplumber/utils.py @@ -62,7 +62,7 @@ def decode_text(s): Decodes a PDFDocEncoding string to Unicode. Adds py3 compatability to pdfminer's version. """ - if s.startswith(b'\xfe\xff'): + if type(s) == bytes and s.startswith(b'\xfe\xff'): return six.text_type(s[2:], 'utf-16be', 'ignore') else: ords = (ord(c) if type(c) == str else c for c in s) diff --git a/tests/pdfs/issue-33-lorem-ipsum.pdf b/tests/pdfs/issue-33-lorem-ipsum.pdf new file mode 100644 index 00000000..719aa488 Binary files /dev/null and b/tests/pdfs/issue-33-lorem-ipsum.pdf differ diff --git a/tests/test-issues.py b/tests/test-issues.py index 3510be7c..d94faa47 100644 --- a/tests/test-issues.py +++ b/tests/test-issues.py @@ -77,10 +77,17 @@ def test_issue_14(self): pdf = pdfplumber.from_path( os.path.join(HERE, "pdfs/cupertino_usd_4-6-16.pdf") ) - len(pdf.objects) + assert len(pdf.objects) def test_issue_21(self): pdf = pdfplumber.from_path( os.path.join(HERE, "pdfs/150109DSP-Milw-505-90D.pdf") ) - len(pdf.objects) + assert len(pdf.objects) + + def test_issue_33(self): + pdf = pdfplumber.from_path( + os.path.join(HERE, "pdfs/issue-33-lorem-ipsum.pdf") + ) + assert len(pdf.metadata.keys()) +