Skip to content

Commit

Permalink
Update uncompress.py
Browse files Browse the repository at this point in the history
  • Loading branch information
Kaos599 authored Oct 30, 2024
1 parent 981db9f commit b44039a
Showing 1 changed file with 15 additions and 18 deletions.
33 changes: 15 additions & 18 deletions pdfly/uncompress.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# uncompress.py
"""Provides functionality to uncompress PDF files."""
from pathlib import Path
import zlib
from typing import cast
"""
Module for uncompressing PDF content streams.
"""

from pathlib import Path
from pypdf import PdfReader, PdfWriter
from pypdf.generic import IndirectObject, StreamObject
from pypdf.generic import IndirectObject
import zlib


def main(pdf: Path, output: Path) -> None:
Expand All @@ -17,34 +17,31 @@ def main(pdf: Path, output: Path) -> None:
contents = page["/Contents"]
if isinstance(contents, IndirectObject):
contents = contents.get_object()
# Handle multiple content streams or single
if isinstance(contents, list):
for content_obj in contents:
if isinstance(content_obj, IndirectObject):
content_stream = content_obj.get_object()
decompress_content_stream(content_stream)
elif isinstance(contents, StreamObject): # type: ignore[unreachable]
for content in contents:
decompress_content_stream(content)
else:
decompress_content_stream(contents)


writer.add_page(page)

with open(output, "wb") as fp:
writer.write(fp)

orig_size = pdf.stat().st_size
uncomp_size = output.stat().st_size
ratio = uncomp_size / orig_size

print(f"Original Size : {orig_size:,}")
print(f"Uncompressed Size: {uncomp_size:,} ({ratio * 100:.1f}% of original)")
print(f"Uncompressed Size: {uncomp_size:,} ({(uncomp_size / orig_size) * 100:.1f}% of original)")


def decompress_content_stream(content: StreamObject) -> None: # type: ignore[type-arg]
def decompress_content_stream(content: IndirectObject) -> None:
"""Decompress a content stream if it uses FlateDecode"""
if content.get("/Filter") == "/FlateDecode":
try:
compressed_data = content.get_data()
uncompressed_data = zlib.decompress(compressed_data)
content.set_data(uncompressed_data)
content.update({"/Filter": None}) # type: ignore[arg-type]
del content["/Filter"] # Remove compression flag
except zlib.error as e:
print(f"Decompression error: {e}")

0 comments on commit b44039a

Please sign in to comment.