Skip to content

Commit

Permalink
remove PyMuPDF dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
masci committed May 7, 2024
1 parent 3c2a4fe commit 2a64135
Show file tree
Hide file tree
Showing 5 changed files with 5 additions and 344 deletions.
7 changes: 3 additions & 4 deletions .github/workflows/license_compliance.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,9 @@ jobs:
# Exclusions in the vanilla distribution must be explicitly motivated
#
# - tqdm is MLP but there are no better alternatives
# - PyMuPDF is optional
# - pinecone-client is optional
# - psycopg2 is optional
exclude: "(?i)^(PyMuPDF|tqdm|pinecone-client|psycopg2).*"
exclude: "(?i)^(tqdm|pinecone-client|psycopg2).*"

# We keep the license inventory on FOSSA
- name: Send license report to Fossa
Expand Down Expand Up @@ -199,7 +198,7 @@ jobs:
# Special cases:
# - pyzmq is flagged because dual-licensed, but we assume using BSD
# - tqdm is MLP but there are no better alternatives
exclude: "(?i)^(astroid|certifi|chardet|num2words|nvidia-|pathspec|pinecone-client|psycopg2|pylint|PyMuPDF|pyzmq|tqdm).*"
exclude: "(?i)^(astroid|certifi|chardet|num2words|nvidia-|pathspec|pinecone-client|psycopg2|pylint|pyzmq|tqdm).*"

- name: Print report
if: ${{ always() }}
Expand Down Expand Up @@ -272,7 +271,7 @@ jobs:
# Special cases:
# - pyzmq is flagged because dual-licensed, but we assume using BSD
# - tqdm is MLP but there are no better alternatives
exclude: "(?i)^(astroid|certifi|chardet|num2words|nvidia-|pathspec|pinecone-client|psycopg2|pylint|PyMuPDF|pyzmq|tqdm).*"
exclude: "(?i)^(astroid|certifi|chardet|num2words|nvidia-|pathspec|pinecone-client|psycopg2|pylint|pyzmq|tqdm).*"

- name: Print report
if: ${{ always() }}
Expand Down
12 changes: 1 addition & 11 deletions haystack/nodes/file_converter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,6 @@
from haystack.nodes.file_converter.txt import TextConverter
from haystack.nodes.file_converter.azure import AzureConverter
from haystack.nodes.file_converter.parsr import ParsrConverter


try:
with LazyImport() as fitz_import:
# Try to use PyMuPDF, if not available fall back to xpdf
from haystack.nodes.file_converter.pdf import PDFToTextConverter # type: ignore

fitz_import.check()
except (ModuleNotFoundError, ImportError):
from haystack.nodes.file_converter.pdf_xpdf import PDFToTextConverter # type: ignore # pylint: disable=reimported,ungrouped-imports

from haystack.nodes.file_converter.pdf_xpdf import PDFToTextConverter
from haystack.nodes.file_converter.markdown import MarkdownConverter
from haystack.nodes.file_converter.image import ImageToTextConverter
307 changes: 0 additions & 307 deletions haystack/nodes/file_converter/pdf.py

This file was deleted.

Loading

0 comments on commit 2a64135

Please sign in to comment.