Improve insert_jpeg() API

pypdfium2-team · Oct 16, 2022 · 1c6bde4 · 1c6bde4
1 parent 9998d5a
commit 1c6bde4
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 19 deletions.
diff --git a/src/pypdfium2/_helpers/pageobject.py b/src/pypdfium2/_helpers/pageobject.py
@@ -141,20 +141,21 @@ def load_jpeg(self, buffer, pages=None, inline=False, autoclose=True):
         """
         Load a JPEG into the image object.
         
-        For a new image, 1 pixel will correspond to a length of 1 canvas unit, which is 1/72in by default, leading to a resolution of 72dpi.
-        :meth:`~PdfPageObject.transform` may be used to scale and position the image.
+        Position and size of the image are defined by its matrix.
+        If the image is new, it will appear as a tiny square of 1x1 units on the bottom left corner of the page.
+        Use :class:`.PdfMatrix` and :meth:`.set_matrix` to adjust the position.
         
-        If replacing an image, the existing transform matrix will be used.
+        If replacing an image, the existing matrix will be preserved.
         If aspect ratios do not match, the new image will be squashed into the old image's boundaries.
-        The matrix may be corrected manually to prevent distortion.
+        Modify the matrix manually if you wish to prevent this.
         
         Parameters:
             buffer (typing.BinaryIO):
                 A readable byte buffer to access the JPEG data.
             pages (typing.Sequence[PdfPage] | None):
-                A list of pages that might contain the image.
-                May be :data:`None` or empty if the image is known not to be part of any page yet.
-                If the image exists multiple times, all occurrences found on the given pages will be replaced.
+                If replacing an image, pass in a list of loaded pages that might contain the it, to update their cache.
+                (The same image may be shown multiple times in different transforms across a PDF.)
+                If the image object handle is new, this parameter may be :data:`None` or an empty list.
             inline (bool):
                 Whether to load the image content into memory.
                 If :data:`True`, the buffer may be closed after this function call.
@@ -196,13 +197,6 @@ def load_jpeg(self, buffer, pages=None, inline=False, autoclose=True):
                 self.pdf._data_closer.append(buffer)
 
         metadata = self.get_info()
-        matrix = self.get_matrix()
-        if matrix == PdfMatrix():
-            # if the image's matrix is the identity matrix, it will appear as a tiny square of 1x1 canvas units
-            # hence, we scale to the image's width and height in pixels to achieve a sane default size
-            matrix.scale(metadata.width, metadata.height)
-            self.set_matrix(matrix)
-
         return (metadata.width, metadata.height)
 
 

diff --git a/tests/helpers/test_page.py b/tests/helpers/test_page.py
@@ -101,9 +101,15 @@ def test_new_jpeg():
     image_a = pdfium.PdfImageObject.new(pdf)
     buffer = open(TestFiles.mona_lisa, "rb")
     width, height = image_a.load_jpeg(buffer, autoclose=True)
-    assert image_a.get_matrix() == pdfium.PdfMatrix(width, 0, 0, height, 0, 0)
+
     assert len(pdf._data_holder) == 2
     assert pdf._data_closer == [buffer]
+
+    assert image_a.get_matrix() == pdfium.PdfMatrix()
+    matrix = pdfium.PdfMatrix()
+    matrix.scale(width, height)
+    image_a.set_matrix(matrix)
+    assert image_a.get_matrix() == pdfium.PdfMatrix(width, 0, 0, height, 0, 0)
     page.insert_object(image_a)
 
     metadata = image_a.get_info()
@@ -123,10 +129,12 @@ def test_new_jpeg():
     image_b = pdfium.PdfImageObject.new(pdf)
     with open(TestFiles.mona_lisa, "rb") as buffer:
         image_b.load_jpeg(buffer, inline=True, autoclose=False)
-    image_b.get_matrix() == pdfium.PdfMatrix(width, 0, 0, height, 0, 0)
-    matrix_b = pdfium.PdfMatrix()
-    matrix_b.translate(width, 0)
-    image_b.transform(matrix_b)
+
+    assert image_b.get_matrix() == pdfium.PdfMatrix()
+    matrix = pdfium.PdfMatrix()
+    matrix.scale(width, height)
+    matrix.translate(width, 0)
+    image_b.set_matrix(matrix)
     image_b.get_matrix() == pdfium.PdfMatrix(width, 0, 0, height, width, 0)
     page.insert_object(image_b)