From 32c4ec0b0b4d3ec58f1c58098a078da68de421c7 Mon Sep 17 00:00:00 2001 From: Lucas Cimon <925560+Lucas-C@users.noreply.github.com> Date: Fri, 15 Nov 2024 15:01:59 +0100 Subject: [PATCH] Pleasing ruff --- pdfly/cli.py | 2 +- pdfly/uncompress.py | 104 +++++++++++++++++------------------ pyproject.toml | 1 - tests/conftest.py | 9 +-- tests/test_extract_images.py | 2 - tests/test_uncompress.py | 9 +-- tests/test_update_offsets.py | 4 +- 7 files changed, 65 insertions(+), 66 deletions(-) diff --git a/pdfly/cli.py b/pdfly/cli.py index 7ceeb40..9c5fa31 100644 --- a/pdfly/cli.py +++ b/pdfly/cli.py @@ -16,10 +16,10 @@ import pdfly.metadata import pdfly.pagemeta import pdfly.rm +import pdfly.uncompress import pdfly.up2 import pdfly.update_offsets import pdfly.x2pdf -import pdfly.uncompress def version_callback(value: bool) -> None: diff --git a/pdfly/uncompress.py b/pdfly/uncompress.py index a834384..a543473 100644 --- a/pdfly/uncompress.py +++ b/pdfly/uncompress.py @@ -1,52 +1,52 @@ -"""Module for uncompressing PDF content streams.""" - -from pathlib import Path -from typing import Optional -import zlib - -from pypdf import PdfReader, PdfWriter -from pypdf.generic import IndirectObject, PdfObject - - -def main(pdf: Path, output: Path) -> None: - reader = PdfReader(pdf) - writer = PdfWriter() - - for page in reader.pages: - if "/Contents" in page: - contents: Optional[PdfObject] = page["/Contents"] - if isinstance(contents, IndirectObject): - contents = contents.get_object() - if contents is not None: - if isinstance(contents, list): - for content in contents: - if isinstance(content, IndirectObject): - decompress_content_stream(content) - elif isinstance(contents, IndirectObject): - decompress_content_stream(contents) - writer.add_page(page) - - with open(output, "wb") as fp: - writer.write(fp) - - orig_size = pdf.stat().st_size - uncomp_size = output.stat().st_size - - print(f"Original Size : {orig_size:,}") - print( - f"Uncompressed Size: {uncomp_size:,} ({(uncomp_size / orig_size) * 100:.1f}% of original)" - ) - - -def decompress_content_stream(content: IndirectObject) -> None: - """Decompress a content stream if it uses FlateDecode.""" - if content.get("/Filter") == "/FlateDecode": - try: - compressed_data = content.get_data() - uncompressed_data = zlib.decompress(compressed_data) - content.set_data(uncompressed_data) - del content["/Filter"] - except zlib.error as error: - print( - f"Some content stream with /FlateDecode failed to be decompressed: {error}" - ) +"""Module for uncompressing PDF content streams.""" + +import zlib +from pathlib import Path +from typing import Optional + +from pypdf import PdfReader, PdfWriter +from pypdf.generic import IndirectObject, PdfObject + + +def main(pdf: Path, output: Path) -> None: + reader = PdfReader(pdf) + writer = PdfWriter() + + for page in reader.pages: + if "/Contents" in page: + contents: Optional[PdfObject] = page["/Contents"] + if isinstance(contents, IndirectObject): + contents = contents.get_object() + if contents is not None: + if isinstance(contents, list): + for content in contents: + if isinstance(content, IndirectObject): + decompress_content_stream(content) + elif isinstance(contents, IndirectObject): + decompress_content_stream(contents) + writer.add_page(page) + + with open(output, "wb") as fp: + writer.write(fp) + + orig_size = pdf.stat().st_size + uncomp_size = output.stat().st_size + + print(f"Original Size : {orig_size:,}") + print( + f"Uncompressed Size: {uncomp_size:,} ({(uncomp_size / orig_size) * 100:.1f}% of original)" + ) + + +def decompress_content_stream(content: IndirectObject) -> None: + """Decompress a content stream if it uses FlateDecode.""" + if content.get("/Filter") == "/FlateDecode": + try: + compressed_data = content.get_data() + uncompressed_data = zlib.decompress(compressed_data) + content.set_data(uncompressed_data) + del content["/Filter"] + except zlib.error as error: + print( + f"Some content stream with /FlateDecode failed to be decompressed: {error}" + ) diff --git a/pyproject.toml b/pyproject.toml index 6adc60c..7394a52 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -138,7 +138,6 @@ ignore = [ "SLF001", # Private member accessed "INP001", # File `docs/conf.py` is part of an implicit namespace package. Add an `__init__.py`. "FA100", # Missing `from __future__ import annotations`, but uses `typing.Optional` - "I001" #Imports not at the top of the file. ] [tool.ruff.mccabe] diff --git a/tests/conftest.py b/tests/conftest.py index 9ab40d4..181d60c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,8 +3,9 @@ import os from pathlib import Path -from fpdf import FPDF import pytest +from fpdf import FPDF + from pdfly.cli import entry_point try: @@ -35,7 +36,7 @@ def run_cli(args): return error.code -@pytest.fixture +@pytest.fixture() def two_pages_pdf_filepath(tmp_path): "A PDF with 2 pages, and a different image on each page" # Note: prior to v2.7.9, fpdf2 produced incorrect /Resources dicts for each page (cf. fpdf2 PR #1133), @@ -50,7 +51,7 @@ def two_pages_pdf_filepath(tmp_path): return pdf_filepath -@pytest.fixture +@pytest.fixture() def pdf_file_100(tmp_path): """A PDF with 100 pages; each has only the page index on it.""" pdf = FPDF() @@ -65,7 +66,7 @@ def pdf_file_100(tmp_path): return pdf_filepath -@pytest.fixture +@pytest.fixture() def pdf_file_abc(tmp_path): """A PDF with 100 pages; each has only the page index on it.""" pdf = FPDF() diff --git a/tests/test_extract_images.py b/tests/test_extract_images.py index 3a3025e..4a0df2b 100644 --- a/tests/test_extract_images.py +++ b/tests/test_extract_images.py @@ -1,5 +1,3 @@ -import pytest - from .conftest import RESOURCES_ROOT, chdir, run_cli diff --git a/tests/test_uncompress.py b/tests/test_uncompress.py index d3239ad..dc19723 100644 --- a/tests/test_uncompress.py +++ b/tests/test_uncompress.py @@ -1,10 +1,13 @@ """Tests for the `uncompress` command.""" -import pytest from pathlib import Path -from pdfly.cli import entry_point + +import pytest +from pypdf import PdfReader from typer.testing import CliRunner +from pdfly.cli import entry_point + runner = CliRunner() @@ -28,8 +31,6 @@ def test_uncompress_all_sample_files( output_pdf_filepath.exists() ), f"Output PDF {output_pdf_filepath} does not exist." - from pypdf import PdfReader - reader = PdfReader(str(output_pdf_filepath)) for page in reader.pages: contents = page.get("/Contents") diff --git a/tests/test_update_offsets.py b/tests/test_update_offsets.py index bd5d506..c239577 100644 --- a/tests/test_update_offsets.py +++ b/tests/test_update_offsets.py @@ -4,12 +4,12 @@ Here should only be end-to-end tests. """ +import re from pathlib import Path import pytest -import re -from .conftest import RESOURCES_ROOT, chdir, run_cli +from .conftest import RESOURCES_ROOT, run_cli def test_update_offsets(capsys, tmp_path: Path) -> None: