Exclude deps & split into optional parts (#1551)

mindee · Apr 23, 2024 · f21ac32 · f21ac32
1 parent 248afe8
commit f21ac32
Show file tree

Hide file tree

Showing 23 changed files with 262 additions and 209 deletions.
diff --git a/.conda/meta.yaml b/.conda/meta.yaml
@@ -24,7 +24,6 @@ requirements:
     - numpy >=1.16.0, <2.0.0
     - scipy >=1.4.0, <2.0.0
     - pillow >=9.2.0
-    - matplotlib >=3.1.0
     - h5py >=3.1.0, <4.0.0
     - opencv >=4.5.0, <5.0.0
     - pypdfium2-team::pypdfium2_helpers >=4.0.0, <5.0.0
@@ -33,10 +32,7 @@ requirements:
     - langdetect >=1.0.9, <2.0.0
     - rapidfuzz >=3.0.0, <4.0.0
     - huggingface_hub >=0.20.0, <1.0.0
-    - matplotlib >=3.1.0
-    - weasyprint >=55.0
     - defusedxml >=0.7.0
-    - mplcursors >=0.3
     - anyascii >=0.3.2
     - tqdm >=4.30.0
 

diff --git a/.github/workflows/builds.yml b/.github/workflows/builds.yml
@@ -41,12 +41,12 @@ jobs:
         name: Install package (TF)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[tf] --upgrade
+          pip install -e .[tf,viz,html] --upgrade
       - if: matrix.framework == 'pytorch'
         name: Install package (PT)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[torch] --upgrade
+          pip install -e .[torch,viz,html] --upgrade
       - name: Import package
         run: python -c "import doctr; print(doctr.__version__)"
 

diff --git a/.github/workflows/demo.yml b/.github/workflows/demo.yml
@@ -42,13 +42,13 @@ jobs:
         name: Install dependencies (TF)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[tf] --upgrade
+          pip install -e .[tf,viz,html] --upgrade
           pip install -r demo/tf-requirements.txt
       - if: matrix.framework == 'pytorch'
         name: Install dependencies (PT)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[torch] --upgrade
+          pip install -e .[torch,viz,html] --upgrade
           pip install -r demo/pt-requirements.txt
 
       - if: matrix.framework == 'tensorflow'

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -27,7 +27,7 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[tf]
+          pip install -e .[tf,viz,html]
           pip install -e .[docs]
 
       - name: Build documentation

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -28,7 +28,7 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[tf] --upgrade
+          pip install -e .[tf,viz,html] --upgrade
           pip install -e .[testing]
       - name: Run unittests
         run: |
@@ -61,7 +61,7 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[tf] --upgrade
+          pip install -e .[tf,viz,html] --upgrade
           pip install -e .[testing]
       - name: Run unittests
         run: |
@@ -94,7 +94,7 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[torch] --upgrade
+          pip install -e .[torch,viz,html] --upgrade
           pip install -e .[testing]
 
       - name: Run unittests

diff --git a/.github/workflows/public_docker_images.yml b/.github/workflows/public_docker_images.yml
@@ -23,7 +23,7 @@ jobs:
       matrix:
         # Must match version at https://www.python.org/ftp/python/
         python: ["3.9.18", "3.10.13", "3.11.8"]
-        framework: ["tf", "torch"]
+        framework: ["tf", "torch", "tf,viz,html", "torch,viz,html"]
         system: ["cpu", "gpu"]
 
     # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job.

diff --git a/.github/workflows/pull_requests.yml b/.github/workflows/pull_requests.yml
@@ -22,7 +22,7 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[tf] --upgrade
+          pip install -e .[tf,viz,html] --upgrade
           pip install -e .[docs]
 
       - name: Build documentation

diff --git a/.github/workflows/references.yml b/.github/workflows/references.yml
@@ -42,14 +42,14 @@ jobs:
         name: Install dependencies (TF)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[tf] --upgrade
+          pip install -e .[tf,viz,html] --upgrade
           pip install -r references/requirements.txt
           sudo apt-get update && sudo apt-get install fonts-freefont-ttf -y
       - if: matrix.framework == 'pytorch'
         name: Install dependencies (PT)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[torch] --upgrade
+          pip install -e .[torch,viz,html] --upgrade
           pip install -r references/requirements.txt
           sudo apt-get update && sudo apt-get install fonts-freefont-ttf -y
       - if: matrix.framework == 'tensorflow'
@@ -94,13 +94,13 @@ jobs:
         name: Install dependencies (TF)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[tf] --upgrade
+          pip install -e .[tf,viz,html] --upgrade
           pip install -r references/requirements.txt
       - if: matrix.framework == 'pytorch'
         name: Install dependencies (PT)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[torch] --upgrade
+          pip install -e .[torch,viz,html] --upgrade
           pip install -r references/requirements.txt
       - name: Download and extract detection toy set
         run: |
@@ -160,13 +160,13 @@ jobs:
         name: Install dependencies (TF)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[tf] --upgrade
+          pip install -e .[tf,viz,html] --upgrade
           pip install -r references/requirements.txt
       - if: matrix.framework == 'pytorch'
         name: Install dependencies (PT)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[torch] --upgrade
+          pip install -e .[torch,viz,html] --upgrade
           pip install -r references/requirements.txt
       - name: Download and extract toy set
         run: |
@@ -211,12 +211,12 @@ jobs:
         name: Install dependencies (TF)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[tf] --upgrade
+          pip install -e .[tf,viz,html] --upgrade
       - if: matrix.framework == 'pytorch'
         name: Install dependencies (PT)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[torch] --upgrade
+          pip install -e .[torch,viz,html] --upgrade
       - if: matrix.framework == 'tensorflow'
         name: Evaluate text recognition (TF)
         run: python references/recognition/evaluate_tensorflow.py crnn_mobilenet_v3_small --dataset IIIT5K -b 32
@@ -255,12 +255,12 @@ jobs:
         name: Install dependencies (TF)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[tf] --upgrade
+          pip install -e .[tf,viz,html] --upgrade
       - if: matrix.framework == 'pytorch'
         name: Install dependencies (PT)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[torch] --upgrade
+          pip install -e .[torch,viz,html] --upgrade
       - if: matrix.framework == 'tensorflow'
         name: Benchmark latency (TF)
         run: python references/recognition/latency_tensorflow.py crnn_mobilenet_v3_small --it 5
@@ -303,13 +303,13 @@ jobs:
         name: Install dependencies (TF)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[tf] --upgrade
+          pip install -e .[tf,viz,html] --upgrade
           pip install -r references/requirements.txt
       - if: matrix.framework == 'pytorch'
         name: Install dependencies (PT)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[torch] --upgrade
+          pip install -e .[torch,viz,html] --upgrade
           pip install -r references/requirements.txt
       - name: Download and extract toy set
         run: |
@@ -354,13 +354,13 @@ jobs:
         name: Install dependencies (TF)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[tf] --upgrade
+          pip install -e .[tf,viz,html] --upgrade
           pip install -r references/requirements.txt
       - if: matrix.framework == 'pytorch'
         name: Install dependencies (PT)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[torch] --upgrade
+          pip install -e .[torch,viz,html] --upgrade
           pip install -r references/requirements.txt
       - if: matrix.framework == 'tensorflow'
         name: Evaluate text detection (TF)
@@ -400,12 +400,12 @@ jobs:
         name: Install dependencies (TF)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[tf] --upgrade
+          pip install -e .[tf,viz,html] --upgrade
       - if: matrix.framework == 'pytorch'
         name: Install dependencies (PT)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[torch] --upgrade
+          pip install -e .[torch,viz,html] --upgrade
       - if: matrix.framework == 'tensorflow'
         name: Benchmark latency (TF)
         run: python references/detection/latency_tensorflow.py db_mobilenet_v3_large --it 5 --size 512
@@ -444,12 +444,12 @@ jobs:
         name: Install dependencies (TF)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[tf] --upgrade
+          pip install -e .[tf,viz,html] --upgrade
       - if: matrix.framework == 'pytorch'
         name: Install dependencies (PT)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[torch] --upgrade
+          pip install -e .[torch,viz,html] --upgrade
       - if: matrix.framework == 'pytorch'
         name: Benchmark latency (PT)
         run: python references/obj_detection/latency_pytorch.py fasterrcnn_mobilenet_v3_large_fpn --it 5 --size 512
diff --git a/.github/workflows/scripts.yml b/.github/workflows/scripts.yml
@@ -41,12 +41,12 @@ jobs:
         name: Install package (TF)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[tf] --upgrade
+          pip install -e .[tf,viz,html] --upgrade
       - if: matrix.framework == 'pytorch'
         name: Install package (PT)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[torch] --upgrade
+          pip install -e .[torch,viz,html] --upgrade
 
       - name: Run analysis script
         run: |
@@ -87,12 +87,12 @@ jobs:
         name: Install package (TF)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[tf] --upgrade
+          pip install -e .[tf,viz,html] --upgrade
       - if: matrix.framework == 'pytorch'
         name: Install package (PT)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[torch] --upgrade
+          pip install -e .[torch,viz,html] --upgrade
 
       - name: Run detection script
         run: |
@@ -133,12 +133,12 @@ jobs:
         name: Install package (TF)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[tf] --upgrade
+          pip install -e .[tf,viz,html] --upgrade
       - if: matrix.framework == 'pytorch'
         name: Install package (PT)
         run: |
           python -m pip install --upgrade pip
-          pip install -e .[torch] --upgrade
+          pip install -e .[torch,viz,html] --upgrade
       - name: Run evaluation script
         run: |
           python scripts/evaluate.py db_resnet50 crnn_vgg16_bn --samples 10

diff --git a/README.md b/README.md
@@ -37,7 +37,7 @@ from doctr.io import DocumentFile
 pdf_doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
 # Image
 single_img_doc = DocumentFile.from_images("path/to/your/img.jpg")
-# Webpage
+# Webpage (requires `weasyprint` to be installed)
 webpage_doc = DocumentFile.from_url("https://www.yoursite.com")
 # Multiple page images
 multi_img_doc = DocumentFile.from_images(["path/to/page1.jpg", "path/to/page2.jpg"])
@@ -75,6 +75,7 @@ If both options are set to False, the predictor will always fit and return rotat
 To interpret your model's predictions, you can visualize them interactively as follows:
 
 ```python
+# Display the result (requires matplotlib & mplcursors to be installed)
 result.show()
 ```
 
@@ -136,16 +137,6 @@ The KIE predictor results per page are in a dictionary format with each key repr
 
 Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
 
-Since we use [weasyprint](https://weasyprint.org/), you will need extra dependencies if you are not running Linux.
-
-For MacOS users, you can install them as follows:
-
-```shell
-brew install cairo pango gdk-pixbuf libffi
-```
-
-For Windows users, those dependencies are included in GTK. You can find the latest installer over [here](https://github.com/tschoonj/GTK-for-Windows-Runtime-Environment-Installer/releases).
-
 ### Latest release
 
 You can then install the latest release of the package using [pypi](https://pypi.org/project/python-doctr/) as follows:
@@ -350,9 +341,15 @@ Your API should now be running locally on your port 8002. Access your automatica
 
 ```python
 import requests
+
+headers = {"accept": "application/json"}
+params = {"det_arch": "db_resnet50", "reco_arch": "crnn_vgg16_bn"}
+
 with open('/path/to/your/doc.jpg', 'rb') as f:
-    data = f.read()
-response = requests.post("http://localhost:8002/ocr", files={'file': data}).json()
+    files = [  # application/pdf, image/jpeg, image/png supported
+        ("files", ("doc.jpg", f.read(), "image/jpeg")),
+    ]
+print(requests.post("http://localhost:8080/ocr", headers=headers, params=params, files=files).json())
 ```
 
 ### Example notebooks

diff --git a/docs/README.md b/docs/README.md
@@ -8,6 +8,6 @@ To install only the `docs` environment, you can do:
 ```bash
 # Make sure you are at the root of the repository before executing these commands
 python -m pip install --upgrade pip
-pip install -e .[tf]  # or .[torch]
+pip install -e .[tf,viz,html]  # or .[torch,viz,html]
 pip install -e .[docs]
 ```
diff --git a/docs/source/getting_started/installing.rst b/docs/source/getting_started/installing.rst
@@ -19,17 +19,6 @@ For MacBooks with M1 chip, you will need some additional packages or specific ve
 * `TensorFlow 2 Metal Plugin <https://developer.apple.com/metal/tensorflow-plugin/>`_
 * `PyTorch >= 1.12.0 <https://pytorch.org/get-started/locally/#start-locally>`_
 
-If you are running another OS than Linux, you will need a few extra dependencies.
-
-For MacOS users, you can install them using `Homebrew <https://brew.sh/>`_ as follows:
-
-.. code:: shell
-
-    brew install cairo pango gdk-pixbuf libffi
-
-For Windows users, those dependencies are included in GTK. You can find the latest installer over `here <https://github.com/tschoonj/GTK-for-Windows-Runtime-Environment-Installer/releases>`_.
-
-
 Via Python Package
 ==================
 
@@ -49,12 +38,18 @@ We strive towards reducing framework-specific dependencies to a minimum, but som
         .. code:: bash
 
             pip install "python-doctr[tf]"
+            # or with preinstalled packages for visualization & html support
+            pip install "python-doctr[tf,viz,html]"
 
     .. tab:: PyTorch
 
         .. code:: bash
 
             pip install "python-doctr[torch]"
+            # or with preinstalled packages for visualization & html support
+            pip install "python-doctr[torch,viz,html]"
+
+
 
 
 Via Conda (Only for Linux)