From 850688a35096bfd78e863e41c62b0aae46de6c6f Mon Sep 17 00:00:00 2001 From: Harsh <115716485+Kaos599@users.noreply.github.com> Date: Wed, 30 Oct 2024 19:58:02 +0530 Subject: [PATCH] Create uncompress.py --- pdfly/uncompress.py | 50 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 pdfly/uncompress.py diff --git a/pdfly/uncompress.py b/pdfly/uncompress.py new file mode 100644 index 0000000..d0636ce --- /dev/null +++ b/pdfly/uncompress.py @@ -0,0 +1,50 @@ +# uncompress.py +"""Provides functionality to uncompress PDF files.""" +from pathlib import Path +import zlib +from typing import cast + +from pypdf import PdfReader, PdfWriter +from pypdf.generic import IndirectObject, StreamObject + + +def main(pdf: Path, output: Path) -> None: + reader = PdfReader(pdf) + writer = PdfWriter() + + for page in reader.pages: + if "/Contents" in page: + contents = page["/Contents"] + if isinstance(contents, IndirectObject): + contents = contents.get_object() + + if isinstance(contents, list): + for content_obj in contents: + if isinstance(content_obj, IndirectObject): + content_stream = content_obj.get_object() + decompress_content_stream(content_stream) + elif isinstance(contents, StreamObject): # type: ignore[unreachable] + decompress_content_stream(contents) + + + writer.add_page(page) + + with open(output, "wb") as fp: + writer.write(fp) + + orig_size = pdf.stat().st_size + uncomp_size = output.stat().st_size + ratio = uncomp_size / orig_size + print(f"Original Size : {orig_size:,}") + print(f"Uncompressed Size: {uncomp_size:,} ({ratio * 100:.1f}% of original)") + + +def decompress_content_stream(content: StreamObject) -> None: # type: ignore[type-arg] + if content.get("/Filter") == "/FlateDecode": + try: + compressed_data = content.get_data() + uncompressed_data = zlib.decompress(compressed_data) + content.set_data(uncompressed_data) + content.update({"/Filter": None}) # type: ignore[arg-type] + except zlib.error as e: + print(f"Decompression error: {e}")