How can I use FzPathWalker2
with FzDevice2
?
#3929
-
I am working on extracting vector drawings from PDF files, and I reach the point where I need to use a custom implementation of So I am trying to get the path information that I get in import fitz
from pymupdf import Page
from pymupdf.mupdf import FzColorParams, FzColorspace, FzContext, FzDevice2, FzMatrix, FzPath, FzPathWalker2, FzContext, FzMatrix, fz_colorspace
import ctypes
def get_color(colorspace: FzColorspace, color):
color = ctypes.cast(int(color), ctypes.POINTER(ctypes.c_float))
if colorspace.fz_colorspace_is_rgb():
return (color[0], color[1], color[2])
if colorspace.fz_colorspace_is_gray():
return (color[0], color[0], color[0])
if colorspace.fz_colorspace_is_cmyk():
return (color[0], color[1], color[2], color[3])
return None
class PathWalker(FzPathWalker2):
def __init__(self):
super().__init__()
self.use_virtual_moveto(True)
self.use_virtual_lineto(True)
self.use_virtual_curveto(True)
self.use_virtual_closepath(True)
def moveto(self, arg_0, arg_2, arg_3):
print('moveto', arg_0, arg_2, arg_3)
def lineto(self, arg_0, arg_2, arg_3):
print('lineto', arg_0, arg_2, arg_3)
def curveto(self, arg_0, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7):
print('curveto', arg_0, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7)
def closepath(self, arg_0):
print('closepath', arg_0)
class ExtractGeometryDevice(FzDevice2):
"""PyMuPDF Device for extracting images from a PDF file."""
def __init__(self):
super().__init__()
self.use_virtual_fill_path()
def run_page(self, page: Page):
fitz.mupdf.fz_run_page(page.this, self, fitz.mupdf.FzMatrix(), fitz.mupdf.FzCookie())
def fill_path(self, ctx: FzContext, path, even_odd: int, ctm: FzMatrix, colorspace: fz_colorspace, color_p, alpha: float, color_params: FzColorParams):
import faulthandler
faulthandler.enable()
color = get_color(FzColorspace(colorspace), color_p)
print('fill path', path, even_odd, ctm, color, alpha, color_params)
walker = PathWalker()
FzPath(path).fz_walk_path(walker, None)
# ll_fz_walk_path(path, walker, ctypes.c_void_p(0))
if __name__ == '__main__':
import sys
pdf_path = sys.argv[1]
pdf = fitz.open(pdf_path)
page = pdf[0]
device = ExtractGeometryDevice()
device.run_page(page)
pdf.close() But it results in a Program output:
|
Beta Was this translation helpful? Give feedback.
Replies: 3 comments 1 reply
-
From what I can tell, I am using the API correctly, Trying to debug with I am passing Edit: I can pass a random SWIG object as GDB InfoBacktrace:
|
Beta Was this translation helpful? Give feedback.
-
Apparently, if I pass import fitz
from pymupdf import Page
from pymupdf.mupdf import FzColorParams, FzColorspace, FzContext, FzDevice2, FzMatrix, FzPath, FzPathWalker2, FzContext, FzMatrix, fz_colorspace, ll_fz_walk_path
import ctypes
def get_color(colorspace: FzColorspace, color):
color = ctypes.cast(int(color), ctypes.POINTER(ctypes.c_float))
if colorspace.fz_colorspace_is_rgb():
return (color[0], color[1], color[2])
if colorspace.fz_colorspace_is_gray():
return (color[0], color[0], color[0])
if colorspace.fz_colorspace_is_cmyk():
return (color[0], color[1], color[2], color[3])
return None
class PathWalker(FzPathWalker2):
def __init__(self):
super().__init__()
self.use_virtual_moveto(True)
self.use_virtual_lineto(True)
self.use_virtual_curveto(True)
self.use_virtual_closepath(True)
def moveto(self, arg_0, arg_2, arg_3):
print('moveto', arg_0, arg_2, arg_3)
def lineto(self, arg_0, arg_2, arg_3):
print('lineto', arg_0, arg_2, arg_3)
def curveto(self, arg_0, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7):
print('curveto', arg_0, arg_2, arg_3, arg_4, arg_5, arg_6, arg_7)
def closepath(self, arg_0):
print('closepath', arg_0)
class ExtractGeometryDevice(FzDevice2):
"""PyMuPDF Device for extracting images from a PDF file."""
def __init__(self):
super().__init__()
self.use_virtual_fill_path()
def run_page(self, page: Page):
fitz.mupdf.fz_run_page(page.this, self, fitz.mupdf.FzMatrix(), fitz.mupdf.FzCookie())
def fill_path(self, ctx: FzContext, path, even_odd: int, ctm: FzMatrix, colorspace: fz_colorspace, color_p, alpha: float, color_params: FzColorParams):
import faulthandler
faulthandler.enable()
color = get_color(FzColorspace(colorspace), color_p)
print('fill path', path, even_odd, ctm, color, alpha, color_params)
walker = PathWalker()
# ll_fz_keep_path(path)
# FzPath(path).fz_walk_path(walker, walker.m_internal)
ll_fz_walk_path(path, walker.m_internal, walker.m_internal)
if __name__ == '__main__':
import sys
pdf_path = sys.argv[1]
pdf = fitz.open(pdf_path)
page = pdf[0]
device = ExtractGeometryDevice()
device.run_page(page)
pdf.close() |
Beta Was this translation helpful? Give feedback.
-
This is a tricky area, well done making things work! There's some documentation about how to use MuPDF's Python bindings with callbacks, see: https://mupdf.readthedocs.io/en/latest/language-bindings.html#making-mupdf-function-pointers-call-python-code There's also some code in
Some things to be aware of:
I hope that's of some use, i need to go now, but will take another look tomorrow and try to answer any questions you may have. |
Beta Was this translation helpful? Give feedback.
This is a tricky area, well done making things work!
There's some documentation about how to use MuPDF's Python bindings with callbacks, see: https://mupdf.readthedocs.io/en/latest/language-bindings.html#making-mupdf-function-pointers-call-python-code
There's also some code in
pymupdf/__init__.py
that uses a class derived frommupdf.FzPathWalker2
, which will probably be helpful.In particular,
pymupdf/__init__.py:jm_lineart_path()
does:Which i think is close to what you've ended up with. The underlying reason why this works is that the C++ callbacks default to assuming that arg1 (arg0 is fz_conte…