Skip to content

Commit

Permalink
[Fixes / docs] Add more vocabs / Fix Style / HF hub / API Dep (#1412)
Browse files Browse the repository at this point in the history
  • Loading branch information
felixdittrich92 authored Jan 3, 2024
1 parent 5105f98 commit ed927bd
Show file tree
Hide file tree
Showing 169 changed files with 685 additions and 749 deletions.
2 changes: 1 addition & 1 deletion .github/verify_pr_labels.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/style.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
architecture: x64
- name: Run ruff
run: |
pip install ruff
pip install ruff --upgrade
ruff --version
ruff check --diff .
Expand All @@ -48,7 +48,7 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install -e .[dev] --upgrade
pip install mypy
pip install mypy --upgrade
- name: Run mypy
run: |
mypy --version
Expand Down
2 changes: 1 addition & 1 deletion api/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM tiangolo/uvicorn-gunicorn-fastapi:python3.8-slim
FROM tiangolo/uvicorn-gunicorn-fastapi:python3.9-slim

WORKDIR /app

Expand Down
2 changes: 1 addition & 1 deletion api/app/config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion api/app/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion api/app/routes/detection.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion api/app/routes/kie.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion api/app/routes/ocr.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion api/app/routes/recognition.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion api/app/schemas.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion api/app/vision.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion api/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.masonry.api"

[tool.poetry]
name = "doctr-api"
version = "0.7.0a0"
version = "0.8.0a0"
description = "Backend template for your OCR API with docTR"
authors = ["Mindee <[email protected]>"]
license = "Apache-2.0"
Expand Down
2 changes: 1 addition & 1 deletion demo/app.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion demo/backend/pytorch.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion demo/backend/tensorflow.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion docs/source/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Changelog
=========

v0.7.0 (2023-09-09)
v0.7.0 (2024-09-09)
-------------------
Release note: `v0.7.0 <https://github.com/mindee/doctr/releases/tag/v0.7.0>`_

Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
24 changes: 24 additions & 0 deletions docs/source/modules/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ of vocabs.
* - spanish
- 116
- 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~°£€¥¢฿áéíóúüñÁÉÍÓÚÜÑ¡¿
* - italian
- 120
- 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~°£€¥¢฿àèéìíîòóùúÀÈÉÌÍÎÒÓÙÚ
* - german
- 108
- 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~°£€¥¢฿äöüßÄÖÜẞ
Expand All @@ -130,11 +133,32 @@ of vocabs.
* - czech
- 130
- 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~°£€¥¢฿áčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ
* - polish
- 118
- 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~°£€¥¢฿ąćęłńóśźżĄĆĘŁŃÓŚŹŻ
* - dutch
- 114
- 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~°£€¥¢฿áéíóúüñÁÉÍÓÚÜÑ
* - norwegian
- 106
- 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~°£€¥¢฿æøåÆØÅ
* - danish
- 106
- 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~°£€¥¢฿æøåÆØÅ
* - finnish
- 104
- 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~°£€¥¢฿äöÄÖ
* - swedish
- 106
- 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~°£€¥¢฿åäöÅÄÖ
* - vietnamese
- 234
- 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~°£€¥¢฿áàảạãăắằẳẵặâấầẩẫậéèẻẽẹêếềểễệóòỏõọôốồổộỗơớờởợỡúùủũụưứừửữựiíìỉĩịýỳỷỹỵÁÀẢẠÃĂẮẰẲẴẶÂẤẦẨẪẬÉÈẺẼẸÊẾỀỂỄỆÓÒỎÕỌÔỐỒỔỘỖƠỚỜỞỢỠÚÙỦŨỤƯỨỪỬỮỰIÍÌỈĨỊÝỲỶỸỴ
* - hebrew
- 123
- 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~°£€¥¢฿אבגדהוזחטיכלמנסעפצקרשת₪
* - multilingual
- 195
- english & french & german & italian & spanish & portuguese & czech & polish & dutch & norwegian & danish & finnish & swedish & §
.. autofunction:: encode_sequences
4 changes: 4 additions & 0 deletions docs/source/using_doctr/sharing_models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,7 @@ Recognition
+---------------------------------+---------------------------------------------------+---------------------+------------------------+
| crnn_vgg16_bn (dummy) | Felix92/doctr-tf-crnn-vgg16-bn-french | french | TensorFlow |
+---------------------------------+---------------------------------------------------+---------------------+------------------------+
| crnn_vgg16_bn | tilman-rassy/doctr-crnn-vgg16-bn-fascan-v1 | french + german + § | PyTorch |
+---------------------------------+---------------------------------------------------+---------------------+------------------------+
| parseq | Felix92/doctr-torch-parseq-multilingual-v1 | multilingual | PyTorch |
+---------------------------------+---------------------------------------------------+---------------------+------------------------+
9 changes: 5 additions & 4 deletions doctr/datasets/cord.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down Expand Up @@ -110,9 +110,10 @@ def __init__(
for crop, label in zip(crops, list(text_targets)):
self.data.append((crop, label))
else:
self.data.append(
(img_path, dict(boxes=np.asarray(box_targets, dtype=int).clip(min=0), labels=list(text_targets)))
)
self.data.append((
img_path,
dict(boxes=np.asarray(box_targets, dtype=int).clip(min=0), labels=list(text_targets)),
))

self.root = tmp_root

Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/datasets/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/datasets/pytorch.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/datasets/tensorflow.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/detection.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/doc_artefacts.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
12 changes: 5 additions & 7 deletions doctr/datasets/funsd.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down Expand Up @@ -101,12 +101,10 @@ def __init__(
if not any(char in label for char in ["☑", "☐", "\uf703", "\uf702"]):
self.data.append((crop, label))
else:
self.data.append(
(
img_path,
dict(boxes=np.asarray(box_targets, dtype=np_dtype), labels=list(text_targets)),
)
)
self.data.append((
img_path,
dict(boxes=np.asarray(box_targets, dtype=np_dtype), labels=list(text_targets)),
))

self.root = tmp_root

Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/generator/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/generator/pytorch.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/generator/tensorflow.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/ic03.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/ic13.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
9 changes: 5 additions & 4 deletions doctr/datasets/iiit5k.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down Expand Up @@ -92,9 +92,10 @@ def __init__(
box_targets = [[box[0], box[1], box[0] + box[2], box[1] + box[3]] for box in box_targets]

# label are casted to list where each char corresponds to the character's bounding box
self.data.append(
(_raw_path, dict(boxes=np.asarray(box_targets, dtype=np_dtype), labels=list(_raw_label)))
)
self.data.append((
_raw_path,
dict(boxes=np.asarray(box_targets, dtype=np_dtype), labels=list(_raw_label)),
))

self.root = tmp_root

Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/iiithws.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/imgur5k.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/loader.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/mjsynth.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/ocr.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/recognition.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/sroie.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/svhn.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/svt.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/synthtext.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2021-2023, Mindee.
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
Expand Down
Loading

0 comments on commit ed927bd

Please sign in to comment.