diff --git a/.github/workflows/license_compliance.yml b/.github/workflows/license_compliance.yml
index b3e0aba19a..bbaa2395cb 100644
--- a/.github/workflows/license_compliance.yml
+++ b/.github/workflows/license_compliance.yml
@@ -42,10 +42,9 @@ jobs:
           # Exclusions in the vanilla distribution must be explicitly motivated
           #
           # - tqdm is MLP but there are no better alternatives
-          # - PyMuPDF is optional
           # - pinecone-client is optional
           # - psycopg2 is optional
-          exclude: "(?i)^(PyMuPDF|tqdm|pinecone-client|psycopg2).*"
+          exclude: "(?i)^(tqdm|pinecone-client|psycopg2).*"
 
       # We keep the license inventory on FOSSA
       - name: Send license report to Fossa
@@ -199,7 +198,7 @@ jobs:
           # Special cases:
           # - pyzmq is flagged because dual-licensed, but we assume using BSD
           # - tqdm is MLP but there are no better alternatives
-          exclude: "(?i)^(astroid|certifi|chardet|num2words|nvidia-|pathspec|pinecone-client|psycopg2|pylint|PyMuPDF|pyzmq|tqdm).*"
+          exclude: "(?i)^(astroid|certifi|chardet|num2words|nvidia-|pathspec|pinecone-client|psycopg2|pylint|pyzmq|tqdm).*"
 
       - name: Print report
         if: ${{ always() }}
@@ -272,7 +271,7 @@ jobs:
           # Special cases:
           # - pyzmq is flagged because dual-licensed, but we assume using BSD
           # - tqdm is MLP but there are no better alternatives
-          exclude: "(?i)^(astroid|certifi|chardet|num2words|nvidia-|pathspec|pinecone-client|psycopg2|pylint|PyMuPDF|pyzmq|tqdm).*"
+          exclude: "(?i)^(astroid|certifi|chardet|num2words|nvidia-|pathspec|pinecone-client|psycopg2|pylint|pyzmq|tqdm).*"
 
       - name: Print report
         if: ${{ always() }}
diff --git a/haystack/nodes/file_converter/__init__.py b/haystack/nodes/file_converter/__init__.py
index 76a5dd1aa3..a37bc4b499 100644
--- a/haystack/nodes/file_converter/__init__.py
+++ b/haystack/nodes/file_converter/__init__.py
@@ -9,16 +9,6 @@
 from haystack.nodes.file_converter.txt import TextConverter
 from haystack.nodes.file_converter.azure import AzureConverter
 from haystack.nodes.file_converter.parsr import ParsrConverter
-
-
-try:
-    with LazyImport() as fitz_import:
-        # Try to use PyMuPDF, if not available fall back to xpdf
-        from haystack.nodes.file_converter.pdf import PDFToTextConverter  # type: ignore
-
-    fitz_import.check()
-except (ModuleNotFoundError, ImportError):
-    from haystack.nodes.file_converter.pdf_xpdf import PDFToTextConverter  # type: ignore  # pylint: disable=reimported,ungrouped-imports
-
+from haystack.nodes.file_converter.pdf_xpdf import PDFToTextConverter
 from haystack.nodes.file_converter.markdown import MarkdownConverter
 from haystack.nodes.file_converter.image import ImageToTextConverter
diff --git a/haystack/nodes/file_converter/pdf.py b/haystack/nodes/file_converter/pdf.py
deleted file mode 100644
index e5348671dc..0000000000
--- a/haystack/nodes/file_converter/pdf.py
+++ /dev/null
@@ -1,307 +0,0 @@
-import logging
-import os
-import warnings
-from concurrent.futures import ProcessPoolExecutor
-from multiprocessing import cpu_count
-from pathlib import Path
-from typing import Any, Dict, List, Literal, Optional, Union
-
-import fitz
-from more_itertools import divide
-
-from haystack.nodes.file_converter.base import BaseConverter
-from haystack.schema import Document
-
-
-logger = logging.getLogger(__name__)
-
-
-class PDFToTextConverter(BaseConverter):
-    def __init__(
-        self,
-        remove_numeric_tables: bool = False,
-        valid_languages: Optional[List[str]] = None,
-        id_hash_keys: Optional[List[str]] = None,
-        encoding: Optional[str] = None,
-        keep_physical_layout: Optional[bool] = None,
-        sort_by_position: bool = False,
-        ocr: Optional[Literal["auto", "full"]] = None,
-        ocr_language: str = "eng",
-        multiprocessing: Union[bool, int] = True,
-    ) -> None:
-        """
-        :param remove_numeric_tables: This option uses heuristics to remove numeric rows from the tables.
-                                      The tabular structures in documents might be noise for the reader model if it
-                                      does not have table parsing capability for finding answers. However, tables
-                                      may also have long strings that could possible candidate for searching answers.
-                                      The rows containing strings are thus retained in this option.
-        :param valid_languages: validate languages from a list of languages specified in the ISO 639-1
-                                (https://en.wikipedia.org/wiki/ISO_639-1) format.
-                                This option can be used to add test for encoding errors. If the extracted text is
-                                not one of the valid languages, then it might likely be encoding error resulting
-                                in garbled text.
-        :param id_hash_keys: Generate the document id from a custom list of strings that refer to the document's
-            attributes. If you want to ensure you don't have duplicate documents in your DocumentStore but texts are
-            not unique, you can modify the metadata and pass e.g. `"meta"` to this field (e.g. [`"content"`, `"meta"`]).
-            In this case the id will be generated by using the content and the defined metadata.
-        :param encoding: This parameter is being deprecated.
-                         It will be automatically detected by PyMuPDF.
-        :param keep_physical_layout: This parameter is being deprecated.
-        :param sort_by_position: Specifies whether to sort the extracted text by positional coordinates or logical reading order.
-                        If set to True, the text is sorted first by vertical position, and then by horizontal position.
-                        If set to False (default), the logical reading order in the PDF is used.
-        :param ocr: Specifies whether to use OCR to extract text from images in the PDF. If set to "auto", OCR is used only to extract text
-                    from images and integrate into the existing text. If set to "full", OCR is used to extract text from the entire PDF.
-        :param ocr_language: Specifies the language to use for OCR. The default language is English, which language code is `eng`.
-                For a list of supported languages and the respective codes access https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html.
-                You can combine multiple languages by passing a string with the language codes separated by `+`. For example, to use English and German, pass `eng+deu`.
-        :param multiprocessing: We use multiprocessing to speed up PyMuPDF conversion, you can disable it by setting it to False.
-                                If set to True (the default value), the total number of cores is used. To specify the number of cores to use, set it to an integer.
-        """
-        super().__init__(
-            remove_numeric_tables=remove_numeric_tables, valid_languages=valid_languages, id_hash_keys=id_hash_keys
-        )
-
-        self.sort_by_position = sort_by_position
-        self.multiprocessing = multiprocessing
-        self.ocr = ocr
-        self.ocr_language = ocr_language
-
-        if ocr is not None:
-            if ocr not in ["auto", "full"]:
-                raise ValueError("The ocr parameter must be either 'auto' or 'full'.")
-            self._check_tessdata()
-
-        if encoding:
-            warnings.warn(
-                "The encoding parameter is being deprecated. It will be automatically detected by PyMuPDF.",
-                DeprecationWarning,
-            )
-
-        if keep_physical_layout:
-            warnings.warn("The keep_physical_layout parameter is being deprecated.", DeprecationWarning)
-
-    def convert(
-        self,
-        file_path: Path,
-        meta: Optional[Dict[str, Any]] = None,
-        remove_numeric_tables: Optional[bool] = None,
-        valid_languages: Optional[List[str]] = None,
-        encoding: Optional[str] = None,
-        id_hash_keys: Optional[List[str]] = None,
-        start_page: Optional[int] = None,
-        end_page: Optional[int] = None,
-        keep_physical_layout: Optional[bool] = None,
-        sort_by_position: Optional[bool] = None,
-        ocr: Optional[Literal["auto", "full"]] = None,
-        ocr_language: Optional[str] = None,
-        multiprocessing: Optional[Union[bool, int]] = None,
-    ) -> List[Document]:
-        """
-        Extract text from a PDF file and convert it to a Document.
-        :param file_path: Path to the .pdf file you want to convert
-        :param meta: Optional dictionary with metadata that shall be attached to all resulting documents.
-                     Can be any custom keys and values.
-        :param remove_numeric_tables: This option uses heuristics to remove numeric rows from the tables.
-                                      The tabular structures in documents might be noise for the reader model if it
-                                      does not have table parsing capability for finding answers. However, tables
-                                      may also have long strings that could possible candidate for searching answers.
-                                      The rows containing strings are thus retained in this option.
-        :param valid_languages: validate languages from a list of languages specified in the ISO 639-1
-                                (https://en.wikipedia.org/wiki/ISO_639-1) format.
-                                This option can be used to add test for encoding errors. If the extracted text is
-                                not one of the valid languages, then it might likely be encoding error resulting
-                                in garbled text.
-        :param encoding: This parameter is being deprecated.
-                         It will be automatically detected by PyMuPDF.
-        :param keep_physical_layout: This parameter is being deprecated.
-        :param sort_by_position: Specifies whether to sort the extracted text by positional coordinates or logical reading order.
-                        If set to True, the text is sorted first by vertical position, and then by horizontal position.
-                        If set to False (default), the logical reading order in the PDF is used.
-        :param id_hash_keys: Generate the document id from a custom list of strings that refer to the document's
-            attributes. If you want to ensure you don't have duplicate documents in your DocumentStore but texts are
-            not unique, you can modify the metadata and pass e.g. `"meta"` to this field (e.g. [`"content"`, `"meta"`]).
-            In this case the id will be generated by using the content and the defined metadata.
-        :param start_page: The page number where to start the conversion
-        :param end_page: The page number where to end the conversion.
-        :param ocr: Specifies whether to use OCR to extract text from images in the PDF. If set to "auto", OCR is used only to extract text
-                    from images and integrate into the existing text. If set to "full", OCR is used to extract text from the entire PDF.
-                    To use this feature you must install Tesseract-OCR. For more information, see https://github.com/tesseract-ocr/tesseract#installing-tesseract.
-        :param ocr_language: Specifies the language to use for OCR. The default language is English, which language code is `eng`.
-                For a list of supported languages and the respective codes access https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html.
-                You can combine multiple languages by passing a string with the language codes separated by `+`. For example, to use English and German, pass `eng+deu`.
-        :param multiprocessing: We use multiprocessing to speed up PyMuPDF conversion, you can disable it by setting it to False.
-                                If set to None (the default value), the value defined in the class initialization is used.
-                                If set to True, the total number of cores is used. To specify the number of cores to use, set it to an integer.
-        """
-        if remove_numeric_tables is None:
-            remove_numeric_tables = self.remove_numeric_tables
-        if valid_languages is None:
-            valid_languages = self.valid_languages
-        if id_hash_keys is None:
-            id_hash_keys = self.id_hash_keys
-        if multiprocessing is None:
-            multiprocessing = self.multiprocessing
-        if sort_by_position is None:
-            sort_by_position = self.sort_by_position
-        if ocr is None:
-            ocr = self.ocr
-        if ocr_language is None:
-            ocr_language = self.ocr_language
-
-        if encoding:
-            warnings.warn(
-                "The encoding parameter is being deprecated. It will be automatically detected by PyMuPDF.",
-                DeprecationWarning,
-            )
-
-        if keep_physical_layout:
-            warnings.warn("The keep_physical_layout parameter is being deprecated.", DeprecationWarning)
-
-        if ocr is not None:
-            if ocr not in ["auto", "full"]:
-                raise ValueError("The ocr parameter must be either 'auto' or 'full'.")
-            self._check_tessdata()
-
-        pages = self._read_pdf(
-            file_path,
-            sort_by_position=sort_by_position,
-            start_page=start_page,
-            end_page=end_page,
-            ocr=ocr,
-            ocr_language=ocr_language,
-            multiprocessing=multiprocessing,
-        )
-
-        cleaned_pages = []
-        for page in pages:
-            lines = page.splitlines()
-            cleaned_lines = []
-            for line in lines:
-                words = line.split()
-                digits = [word for word in words if any(i.isdigit() for i in word)]
-
-                # remove lines having > 40% of words as digits AND not ending with a period(.)
-                if (
-                    remove_numeric_tables
-                    and words
-                    and len(digits) / len(words) > 0.4
-                    and not line.strip().endswith(".")
-                ):
-                    logger.debug("Removing line '%s' from %s", line, file_path)
-                    continue
-                cleaned_lines.append(line)
-
-            page = "\n".join(cleaned_lines)
-            cleaned_pages.append(page)
-
-        if valid_languages:
-            document_text = "".join(cleaned_pages)
-            if not self.validate_language(document_text, valid_languages):
-                logger.warning(
-                    "The language for %s is not one of %s. The file may not have "
-                    "been decoded in the correct text format.",
-                    file_path,
-                    valid_languages,
-                )
-
-        text = "\f".join(cleaned_pages)
-        document = Document(content=text, meta=meta, id_hash_keys=id_hash_keys)
-        return [document]
-
-    def _check_tessdata(self):
-        if os.getenv("TESSDATA_PREFIX") is None:
-            raise EnvironmentError(
-                """
-                To enable OCR support via PDFToTextConverter, you need to install Tesseract:
-                    - Windows: choco install tesseract-ocr
-                    - Linux (Ubuntu): sudo apt-get install tesseract-ocr
-                    - Mac: brew install tesseract
-                After that, you need to set the environment variable TESSDATA_PREFIX to the path
-                of your Tesseract data directory. Typically this is:
-                    - Windows: C:\\Program Files\\Tesseract-OCR\\tessdata
-                    - Linux (Ubuntu): /usr/share/tesseract-ocr/4.00/tessdata
-                    - Mac (Intel):  /usr/local/Cellar/tesseract/5.3.0_1/share/tessdata
-                    - Mac (M1/M2): /opt/homebrew/Cellar/tesseract/5.3.0_1/share/tessdata
-                """
-            )
-
-    def _get_text_parallel(self, page_mp):
-        idx, filename, parts, sort_by_position, ocr, ocr_language = page_mp
-
-        doc = fitz.open(filename)
-
-        text = ""
-        for i in parts[idx]:
-            page = doc[i]
-            partial_tp = None
-            if ocr is not None:
-                full = ocr == "full"
-                partial_tp = page.get_textpage_ocr(flags=0, full=full, dpi=300, language=ocr_language)
-            text += page.get_text("text", textpage=partial_tp, sort=sort_by_position) + "\f"
-
-        return text
-
-    def _read_pdf(
-        self,
-        file_path: Path,
-        ocr_language: str,
-        sort_by_position: bool = False,
-        start_page: Optional[int] = None,
-        end_page: Optional[int] = None,
-        ocr: Optional[Literal["auto", "full"]] = None,
-        multiprocessing: Optional[Union[bool, int]] = None,
-    ) -> List[str]:
-        """
-        Extract pages from the pdf file at file_path.
-
-        :param file_path: path of the pdf file
-        :param sort_by_position: Specifies whether to sort the extracted text by positional coordinates or logical reading order.
-                        If set to True, the text is sorted first by vertical position, and then by horizontal position.
-                        If set to False (default), the logical reading order in the PDF is used.
-        :param start_page: The page number where to start the conversion, starting from 1.
-        :param end_page: The page number where to end the conversion.
-        :param encoding: This parameter is being deprecated.
-                         It will be automatically detected by PyMuPDF.
-        :param multiprocessing: We use multiprocessing to speed up PyMuPDF conversion, you can disable it by setting it to False.
-                                If set to None (the default value), the value defined in the class initialization is used.
-                                If set to True, the total number of cores is used. To specify the number of cores to use, set it to an integer.
-        """
-        if start_page is None:
-            start_page = 0
-        else:
-            start_page = start_page - 1
-
-        doc = fitz.open(file_path)
-        page_count = int(doc.page_count)
-
-        if end_page is None or (end_page is not None and end_page > page_count):
-            end_page = page_count
-
-        document = ""
-
-        if not multiprocessing:
-            for i in range(start_page, end_page):
-                page = doc[i]
-                partial_tp = None
-                if ocr is not None:
-                    full = ocr == "full"
-                    partial_tp = page.get_textpage_ocr(flags=0, full=full, dpi=300, language=ocr_language)
-                document += page.get_text("text", textpage=partial_tp, sort=sort_by_position) + "\f"
-        else:
-            cpu = cpu_count() if isinstance(multiprocessing, bool) else multiprocessing
-            page_list = list(range(start_page, end_page))
-            cpu = cpu if len(page_list) > cpu else len(page_list)
-            parts = divide(cpu, page_list)
-            pages_mp = [(i, file_path, parts, sort_by_position, ocr, ocr_language) for i in range(cpu)]
-
-            with ProcessPoolExecutor(max_workers=cpu) as pool:
-                results = pool.map(self._get_text_parallel, pages_mp)
-                for page in results:
-                    document += page
-
-        document = "\f" * start_page + document  # tracking skipped pages for correct page numbering
-        pages = document.split("\f")
-        pages = pages[:-1]  # the last page in the split is always empty.
-
-        return pages
diff --git a/haystack/nodes/retriever/link_content.py b/haystack/nodes/retriever/link_content.py
index 7f353cb7ff..19a84ce0ce 100644
--- a/haystack/nodes/retriever/link_content.py
+++ b/haystack/nodes/retriever/link_content.py
@@ -20,9 +20,6 @@
 
 logger = logging.getLogger(__name__)
 
-with LazyImport("Run 'pip install farm-haystack[pdf]'") as fitz_import:
-    import fitz
-
 
 def html_content_handler(response: Response) -> Optional[str]:
     """
@@ -34,20 +31,6 @@ def html_content_handler(response: Response) -> Optional[str]:
     return extractor.get_content(response.text)
 
 
-def pdf_content_handler(response: Response) -> Optional[str]:
-    """
-    Extracts text from PDF response stream using the PyMuPDF library.
-
-    :param response: Response object from the request.
-    :return: The extracted text.
-    """
-    file_path = io.BytesIO(response.content)
-    with fitz.open(stream=file_path, filetype="pdf") as doc:
-        text = "\f".join([page.get_text() for page in doc])
-
-    return text.encode("ascii", errors="ignore").decode()
-
-
 class LinkContentFetcher(BaseComponent):
     """
     LinkContentFetcher fetches content from a URL and converts it into a list of Document objects.
@@ -153,8 +136,6 @@ def __init__(
 
         # register default content handlers
         self._register_content_handler("text/html", html_content_handler)
-        if fitz_import.is_successful():
-            self._register_content_handler("application/pdf", pdf_content_handler)
 
         # register custom content handlers, can override default handlers
         if content_handlers:
diff --git a/pyproject.toml b/pyproject.toml
index b26cac3724..5f576ac52a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -161,9 +161,7 @@ file-conversion = [
   "python-magic; platform_system != 'Windows'",  # Depends on libmagic: https://pypi.org/project/python-magic/
   "python-magic-bin; platform_system == 'Windows'",  # Needs to be installed without python-magic, otherwise Windows CI gets stuck.
 ]
-pdf = [
-  "PyMuPDF>=1.18.16" ,  # PDF text extraction alternative to xpdf; please check AGPLv3 license
-]
+pdf = []
 ocr = [
   "pytesseract>0.3.7",
   "pdf2image>1.14",