diff --git a/.doctrees/environment.pickle b/.doctrees/environment.pickle index 046e0ba834..b4e556c016 100644 Binary files a/.doctrees/environment.pickle and b/.doctrees/environment.pickle differ diff --git a/latest/_sources/using_doctr/using_models.rst.txt b/latest/_sources/using_doctr/using_models.rst.txt index 208e0956bb..27c087096a 100644 --- a/latest/_sources/using_doctr/using_models.rst.txt +++ b/latest/_sources/using_doctr/using_models.rst.txt @@ -279,6 +279,19 @@ For instance, this snippet instantiates an end-to-end ocr_predictor working with from doctr.model import ocr_predictor model = ocr_predictor('linknet_resnet18', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True) +To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying `DocumentBuilder`: + +* `resolve_lines`: whether words should be automatically grouped into lines (default: True) +* `resolve_blocks`: whether lines should be automatically grouped into blocks (default: True) +* `paragraph_break`: relative length of the minimum space separating paragraphs (default: 0.035) + +For example to disable the automatic grouping of lines into blocks: + +.. code:: python3 + + from doctr.model import ocr_predictor + model = ocr_predictor(pretrained=True, resolve_blocks=False) + What should I do with the output? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -304,6 +317,14 @@ Here is a typical `Document` layout:: )] ) +To get only the text content of the `Document`, you can use the `render` method:: + + text_output = result.render() + +For reference, here is the output for the `Document` above:: + + No. RECEIPT DATE + You can also export them as a nested dict, more appropriate for JSON format:: json_output = result.export() diff --git a/latest/searchindex.js b/latest/searchindex.js index 9a783a0a2f..df9dfd3265 100644 --- a/latest/searchindex.js +++ b/latest/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["changelog", "contributing/code_of_conduct", "contributing/contributing", "getting_started/installing", "index", "modules/datasets", "modules/io", "modules/models", "modules/transforms", "modules/utils", "notebooks", "using_doctr/custom_models_training", "using_doctr/running_on_aws", "using_doctr/sharing_models", "using_doctr/using_datasets", "using_doctr/using_model_export", "using_doctr/using_models"], "filenames": ["changelog.rst", "contributing/code_of_conduct.md", "contributing/contributing.md", "getting_started/installing.rst", "index.rst", "modules/datasets.rst", "modules/io.rst", "modules/models.rst", "modules/transforms.rst", "modules/utils.rst", "notebooks.rst", "using_doctr/custom_models_training.rst", "using_doctr/running_on_aws.rst", "using_doctr/sharing_models.rst", "using_doctr/using_datasets.rst", "using_doctr/using_model_export.rst", "using_doctr/using_models.rst"], "titles": ["Changelog", "Contributor Covenant Code of Conduct", "Contributing to docTR", "Installation", "docTR: Document Text Recognition", "doctr.datasets", "doctr.io", "doctr.models", "doctr.transforms", "doctr.utils", "docTR Notebooks", "Train your own model", "AWS Lambda", "Share your model with the community", "Choose a ready to use dataset", "Preparing your model for inference", "Choosing the right model"], "terms": {"releas": [0, 3], "note": [0, 2, 5, 7, 13, 15], "we": [1, 2, 3, 4, 6, 8, 13, 14, 15, 16], "member": 1, "leader": 1, "make": [1, 2, 9, 12, 13, 15, 16], "particip": 1, "commun": 1, "harass": 1, "free": [1, 2, 13], "experi": 1, "everyon": 1, "regardless": 1, "ag": 1, "bodi": [1, 16], "size": [1, 5, 6, 8, 9, 16], "visibl": 1, "invis": 1, "disabl": [1, 12], "ethnic": 1, "sex": 1, "characterist": 1, "gender": 1, "ident": 1, "express": [1, 8], "level": [1, 5, 9, 16], "educ": 1, "socio": 1, "econom": 1, "statu": 1, "nation": 1, "person": [1, 14], "appear": 1, "race": 1, "religion": 1, "sexual": 1, "orient": [1, 6, 7, 16], "act": 1, "interact": [1, 6, 9], "wai": [1, 4, 14], "contribut": 1, "an": [1, 2, 4, 5, 6, 7, 9, 15, 16], "open": [1, 2, 13, 15], "welcom": 1, "divers": 1, "inclus": 1, "healthi": 1, "exampl": [1, 2, 4, 5, 7, 13], "behavior": [1, 16], "posit": [1, 9], "environ": [1, 12], "includ": [1, 3, 5, 14, 15], "demonstr": 1, "empathi": 1, "kind": 1, "toward": [1, 3], "other": [1, 2], "peopl": 1, "Being": 1, "respect": 1, "differ": 1, "opinion": 1, "viewpoint": 1, "give": 1, "gracefulli": 1, "accept": 1, "construct": 1, "feedback": 1, "apolog": 1, "those": [1, 3, 6, 16], "affect": 1, "mistak": 1, "learn": [1, 4, 7, 15, 16], "from": [1, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 16], "focus": [1, 5], "what": 1, "i": [1, 2, 5, 6, 7, 8, 9, 12, 13, 14, 15], "best": 1, "just": 1, "u": [1, 2], "individu": 1, "overal": [1, 7], "unaccept": 1, "The": [1, 2, 5, 6, 9, 12, 16], "us": [1, 2, 3, 5, 7, 9, 11, 12, 13, 16], "languag": [1, 4, 5, 6, 7, 13, 16], "imageri": 1, "attent": [1, 7], "advanc": 1, "ani": [1, 5, 6, 7, 8, 9, 16], "troll": 1, "insult": 1, "derogatori": 1, "comment": 1, "polit": 1, "attack": 1, "public": [1, 4], "privat": 1, "publish": 1, "inform": [1, 2, 4, 5, 14], "physic": [1, 6], "email": 1, "address": [1, 6], "without": [1, 5, 7], "explicit": 1, "permiss": 1, "which": [1, 7, 12, 14, 16], "could": 1, "reason": [1, 4, 5], "consid": [1, 2, 5, 6, 9, 16], "inappropri": 1, "profession": 1, "set": [1, 5, 7, 9, 12, 16], "ar": [1, 2, 3, 5, 6, 8, 9, 10, 14, 16], "clarifi": 1, "take": [1, 5, 16], "appropri": [1, 2, 16], "fair": 1, "action": 1, "thei": [1, 9], "deem": 1, "threaten": 1, "offens": 1, "harm": 1, "have": [1, 2, 9, 11, 13, 14, 16], "right": [1, 7, 9], "remov": 1, "edit": 1, "reject": 1, "commit": 1, "wiki": 1, "issu": [1, 2, 13], "align": [1, 6], "thi": [1, 2, 3, 5, 9, 11, 12, 13, 14, 15, 16], "moder": 1, "decis": 1, "when": [1, 2, 7], "appli": [1, 5, 8], "within": 1, "all": [1, 2, 5, 6, 8, 9, 14, 16], "space": 1, "also": [1, 7, 13, 14, 16], "offici": [1, 7], "repres": [1, 9, 15, 16], "e": [1, 2, 3, 6, 7], "mail": 1, "post": [1, 16], "via": 1, "social": 1, "media": 1, "account": [1, 13], "appoint": 1, "onlin": 1, "offlin": 1, "event": 1, "instanc": [1, 16], "abus": 1, "otherwis": [1, 6, 9], "mai": [1, 2], "report": 1, "contact": 1, "minde": [1, 3, 4, 7], "com": [1, 3, 6, 7, 13], "complaint": 1, "review": 1, "investig": 1, "promptli": 1, "fairli": 1, "oblig": 1, "privaci": 1, "secur": [1, 12], "incid": 1, "follow": [1, 2, 3, 5, 8, 9, 11, 12, 13, 16], "impact": 1, "determin": 1, "consequ": 1, "violat": 1, "unprofession": 1, "unwelcom": 1, "A": [1, 2, 4, 5, 6, 7, 10, 15], "written": [1, 6], "provid": [1, 2, 4, 13, 14, 16], "clariti": 1, "around": 1, "natur": [1, 4, 5], "explan": [1, 16], "why": 1, "wa": 1, "apologi": 1, "request": [1, 13], "through": [1, 8, 14], "singl": [1, 2, 4, 5], "seri": 1, "continu": 1, "No": [1, 16], "involv": [1, 16], "unsolicit": 1, "specifi": [1, 5, 6], "period": 1, "time": [1, 4, 7, 9, 14], "avoid": [1, 3], "well": [1, 15], "extern": [1, 14], "channel": [1, 2, 6, 8], "like": 1, "term": 1, "lead": 1, "seriou": 1, "sustain": 1, "sort": 1, "allow": 1, "dure": 1, "pattern": 1, "aggress": 1, "disparag": 1, "class": [1, 5, 6, 8, 9, 16], "adapt": 1, "version": [1, 2, 3, 15, 16], "0": [1, 3, 5, 8, 9, 11, 14, 16], "avail": [1, 4, 8], "http": [1, 3, 5, 6, 7, 13, 16], "www": [1, 6, 16], "org": [1, 5, 7, 16], "_": [1, 5, 7], "html": [1, 2, 6, 16], "were": [1, 6, 16], "inspir": [1, 8], "mozilla": 1, "": [1, 6, 7, 9, 13], "ladder": 1, "For": [1, 2, 3, 11, 16], "answer": 1, "common": [1, 8, 9, 15], "question": 1, "about": [1, 14, 16], "see": [1, 2], "faq": 1, "translat": 1, "everyth": [2, 16], "you": [2, 3, 5, 6, 7, 11, 12, 13, 14, 15, 16], "need": [2, 3, 5, 9, 11, 12, 13, 16], "know": 2, "effici": [2, 4, 5, 7], "project": [2, 14], "packag": [2, 4, 9, 12, 14], "python": 2, "doc": [2, 6, 15, 16], "librari": [2, 3, 10, 11], "build": [2, 3], "script": [2, 14], "refer": [2, 3, 11, 13, 14, 16], "train": [2, 5, 7, 8, 13, 14, 15, 16], "demo": [2, 4], "small": [2, 7], "app": 2, "showcas": 2, "capabl": [2, 10, 16], "api": [2, 4], "minim": [2, 4], "templat": [2, 4], "deploi": 2, "rest": [2, 8, 9], "ensur": 2, "proper": 2, "mainten": 2, "github": [2, 3, 7, 13], "worklow": 2, "run": [2, 3, 7], "job": 2, "coverag": 2, "codecov": 2, "back": 2, "result": [2, 5, 6, 10, 13, 16], "As": 2, "contributor": 2, "onli": [2, 7, 8, 9, 13, 14, 15, 16], "your": [2, 4, 6, 9, 16], "ad": [2, 7, 8], "whether": [2, 5, 6, 8, 9, 14], "encount": 2, "problem": 2, "suggest": [2, 13], "input": [2, 6, 7, 8, 15, 16], "ha": [2, 5, 9, 14], "valu": [2, 6, 8, 16], "can": [2, 3, 11, 12, 13, 14, 16], "purpos": 2, "advis": 2, "first": [2, 5], "check": [2, 13, 16], "topic": 2, "wasn": 2, "t": [2, 5, 11, 16], "alreadi": 2, "cover": 2, "close": 2, "If": [2, 3, 6, 7, 11, 16], "feel": [2, 13], "new": [2, 9], "one": [2, 5, 7, 8, 11, 13, 16], "do": [2, 3, 7], "so": [2, 3, 5, 7, 13, 14], "whenev": 2, "possibl": [2, 9, 13, 16], "enough": [2, 16], "jump": 2, "wonder": 2, "how": [2, 11, 13, 14], "someth": 2, "more": [2, 9, 14, 16], "gener": [2, 4, 7], "should": [2, 5, 6, 8, 9], "out": [2, 7, 8, 9, 16], "discuss": 2, "q": 2, "forum": 2, "specif": [2, 3, 9, 11, 14, 16], "stackoverflow": 2, "addit": [2, 3, 6], "depend": [2, 3, 4], "command": 2, "m": [2, 9, 16], "pip": [2, 3], "upgrad": 2, "dev": [2, 12], "pre": [2, 7], "docstr": 2, "In": [2, 5, 14], "pleas": 2, "googl": 2, "eas": 2, "process": [2, 4, 6, 11, 16], "later": 2, "messag": 2, "udac": 2, "guid": 2, "order": [2, 5, 6, 8], "same": [2, 6, 9, 14, 16], "ci": 2, "workflow": 2, "unittest": 2, "local": [2, 4, 5, 7, 9, 14, 16], "To": [2, 3, 12, 13, 16], "togeth": [2, 6], "current": [2, 16], "built": 2, "sphinx": 2, "thank": 2, "our": [2, 7, 16], "file": [2, 5], "been": [2, 9, 14, 16], "rebuilt": 2, "want": [2, 15, 16], "forc": 2, "complet": 2, "rebuild": 2, "delet": 2, "_build": 2, "directori": [2, 12], "addition": [2, 16], "clear": 2, "web": [2, 6], "browser": [2, 4], "cach": [2, 5, 12], "modif": 2, "now": 2, "locat": [2, 6, 16], "index": [2, 6], "wish": 2, "somewher": 2, "els": 2, "than": [2, 3, 9, 13], "join": 2, "slack": 2, "where": [2, 6, 8, 9], "find": [2, 3, 14], "requir": [3, 8], "3": [3, 4, 6, 7, 8, 9, 15, 16], "8": [3, 7, 8, 16], "higher": [3, 5, 16], "whichev": 3, "o": 3, "least": 3, "tensorflow": [3, 4, 6, 7, 8, 11, 13, 15, 16], "pytorch": [3, 4, 7, 8, 11, 13, 15, 16], "correspond": [3, 6, 16], "page": [3, 5, 7, 9, 16], "2": [3, 4, 5, 6, 8, 16], "macbook": 3, "m1": 3, "chip": 3, "some": [3, 10, 13, 14], "metal": 3, "plugin": 3, "1": [3, 5, 6, 7, 8, 9, 11, 14, 16], "12": [3, 16], "anoth": [3, 7, 11, 14], "linux": 3, "few": [3, 15, 16], "extra": 3, "maco": 3, "user": [3, 4, 6, 10], "them": [3, 5, 16], "homebrew": 3, "brew": 3, "cairo": 3, "pango": 3, "gdk": 3, "pixbuf": 3, "libffi": 3, "window": [3, 7, 9], "gtk": 3, "latest": [3, 16], "over": [3, 5, 9, 16], "here": [3, 8, 10, 14, 16], "last": [3, 5], "stabl": 3, "doctr": [3, 11, 12, 13, 14, 16], "strive": 3, "reduc": [3, 8], "framework": [3, 13, 14, 16], "minimum": [3, 5, 8, 9], "necessari": [3, 11, 12], "featur": [3, 7, 9, 10], "develop": 3, "third": 3, "parti": 3, "miss": 3, "tf": [3, 6, 7, 8, 13, 15], "torch": [3, 8, 11, 13, 15], "mode": 3, "clone": 3, "state": [4, 9], "art": 4, "optic": [4, 16], "charact": [4, 5, 6, 9, 14, 16], "made": 4, "seamless": 4, "access": [4, 6, 14, 16], "anyon": 4, "power": 4, "easi": [4, 9, 13], "extract": [4, 5], "valuabl": 4, "autom": 4, "seamlessli": [4, 16], "understand": [4, 5, 16], "task": [4, 5, 7, 13, 14, 16], "ocr": [4, 5, 7, 9, 13, 14], "predictor": [4, 6, 7, 11, 13, 15], "pars": [4, 5], "textual": [4, 5, 6, 7, 16], "identifi": 4, "each": [4, 5, 6, 7, 8, 9, 14, 16], "word": [4, 5, 7, 9, 16], "research": 4, "quickli": 4, "compar": 4, "own": 4, "architectur": [4, 7, 13], "speed": [4, 7], "perform": [4, 6, 7, 8, 9, 12, 15, 16], "robust": [4, 5], "stage": 4, "pretrain": [4, 7, 9, 11, 15, 16], "paramet": [4, 6, 7, 15], "friendli": 4, "line": [4, 7, 9, 16], "code": [4, 6], "load": [4, 5, 7], "googlevis": 4, "aw": [4, 16], "textract": [4, 16], "optim": 4, "infer": [4, 7, 8], "both": [4, 5, 8, 14, 16], "cpu": [4, 11], "gpu": [4, 15], "light": 4, "activ": 4, "maintain": 4, "integr": [4, 13, 14], "deploy": 4, "dbnet": [4, 7], "real": [4, 7, 8], "scene": [4, 5, 7], "differenti": [4, 7], "binar": [4, 7, 16], "linknet": [4, 7], "exploit": [4, 7], "encod": [4, 5, 6, 7, 16], "represent": [4, 7], "semant": [4, 7], "segment": [4, 7, 16], "sar": [4, 7], "show": [4, 6, 7, 9, 11, 13], "attend": [4, 7], "read": [4, 5, 7], "simpl": [4, 7], "strong": [4, 7], "baselin": [4, 7, 16], "irregular": [4, 7, 14], "crnn": [4, 7, 13], "end": [4, 5, 7, 9], "trainabl": [4, 7], "neural": [4, 5, 7, 15], "network": [4, 5, 7, 15], "imag": [4, 5, 6, 7, 8, 9, 13, 14, 16], "base": [4, 7], "sequenc": [4, 5, 6, 7, 9, 16], "Its": [4, 7], "applic": [4, 7], "master": [4, 7, 16], "multi": [4, 7], "aspect": [4, 7, 8, 16], "non": [4, 5, 6, 7, 8, 9], "vitstr": [4, 7, 15], "vision": [4, 5, 7], "transform": [4, 5, 7], "fast": [4, 5, 7], "parseq": [4, 7, 13, 16], "permut": [4, 7], "autoregress": [4, 7], "funsd": [4, 5, 14, 16], "form": [4, 5, 16], "noisi": [4, 5], "scan": [4, 5], "cord": [4, 5, 14, 16], "consolid": [4, 5], "receipt": [4, 5, 16], "forpost": [4, 5], "sroie": [4, 5, 14], "icdar": [4, 5], "2019": 4, "iiit": [4, 5], "5k": [4, 5], "cvit": 4, "street": [4, 5], "view": [4, 5], "synthtext": [4, 5, 14], "visual": 4, "geometri": [4, 6, 16], "group": 4, "svhn": [4, 5, 14], "digit": [4, 5, 14], "unsupervis": 4, "ic03": [4, 5, 14], "2003": [4, 5], "ic13": [4, 5, 14], "2013": [4, 5], "imgur5k": [4, 5, 14], "textstylebrush": [4, 5], "transfer": [4, 5], "aesthet": [4, 5], "mjsynth": [4, 5, 14], "synthet": 4, "data": [4, 5, 6, 8, 9, 11, 13], "artifici": [4, 5], "iiithw": [4, 5, 14], "wildreceipt": [4, 5, 14], "spatial": [4, 5, 6, 9], "dual": [4, 5], "modal": [4, 5], "graph": [4, 5, 6], "kei": [4, 5], "bool": [5, 6, 7, 8, 9], "true": [5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16], "use_polygon": [5, 9, 14], "fals": [5, 6, 7, 8, 9, 11, 16], "recognition_task": [5, 14], "kwarg": [5, 6, 7, 9], "sourc": [5, 6, 7, 8, 9, 13], "document": [5, 7, 9, 10, 14, 16], "import": [5, 6, 7, 8, 9, 11, 13, 14, 15, 16], "train_set": [5, 14], "download": [5, 14], "img": [5, 8, 14], "target": [5, 6, 8, 9, 14], "subset": [5, 16], "polygon": [5, 9, 16], "rotat": [5, 6, 7, 8, 9, 14, 16], "bound": [5, 6, 7, 8, 9, 16], "box": [5, 6, 7, 8, 9, 14, 16], "instead": [5, 6, 7], "straight": [5, 7, 14, 16], "ones": [5, 8, 9], "recognit": [5, 9, 11], "keyword": [5, 6, 7, 9], "argument": [5, 6, 7, 9, 16], "visiondataset": 5, "icdar2019": 5, "competit": 5, "iiit5k": [5, 14], "bmvc": 5, "2012": 5, "text": [5, 6, 7, 9, 14], "prior": 5, "svt": [5, 14], "ucsd": 5, "comput": [5, 9, 15, 16], "hous": 5, "number": [5, 8, 9, 16], "localis": 5, "repositori": [5, 7, 13], "websit": 5, "entri": 5, "futur": 5, "direct": 5, "img_fold": [5, 14], "str": [5, 6, 7, 8, 9], "label_fold": 5, "label": [5, 8, 9, 14], "part": [5, 8, 16], "challeng": 5, "task2": 5, "2015": 5, "path": [5, 6, 14], "challenge2_training_task12_imag": 5, "challenge2_training_task1_gt": 5, "test_set": 5, "challenge2_test_task12_imag": 5, "challenge2_test_task1_gt": 5, "folder": 5, "annot": 5, "abstractdataset": 5, "label_path": [5, 14], "handwrit": 5, "dataset_info": 5, "imgur5k_annot": 5, "json": [5, 14, 16], "pure": 5, "mnt": 5, "ramdisk": 5, "max": [5, 8, 9], "90kdict32px": 5, "imlist": 5, "txt": 5, "hw": 5, "images_90k_norm": 5, "90k": 5, "docartefact": [5, 14], "object": [5, 9, 10, 16], "detect": [5, 9, 10, 11], "element": [5, 6, 7, 9, 16], "varieti": 5, "arxiv": [5, 7], "ab": 5, "2103": 5, "14470v1": 5, "test": [5, 14], "charactergener": [5, 14], "implement": [5, 6, 7, 8, 9, 16], "d": [5, 14], "abdef": [5, 14], "num_sampl": [5, 14], "100": [5, 8, 9, 14, 16], "vocabulari": [5, 11, 13], "sampl": [5, 14, 16], "iter": [5, 8, 14, 16], "cache_sampl": 5, "firsthand": 5, "font_famili": [5, 9], "font": [5, 9], "img_transform": 5, "compos": [5, 16], "sample_transform": 5, "wordgener": [5, 14], "min_char": [5, 14], "int": [5, 6, 8, 9], "max_char": [5, 14], "list": [5, 6, 8, 9, 13], "none": [5, 6, 7, 8, 9, 16], "callabl": [5, 8], "tupl": [5, 6, 8, 9], "32": [5, 7, 8, 11, 14, 15, 16], "maximum": [5, 8], "detectiondataset": [5, 14], "recognitiondataset": [5, 14], "labels_path": [5, 14], "contain": [5, 14], "ocrdataset": [5, 14], "label_fil": [5, 14], "jpg": [5, 6, 13], "root": 5, "shuffl": [5, 8], "batch_siz": [5, 11, 14, 15], "drop_last": 5, "num_work": 5, "collate_fn": 5, "wrapper": [5, 8], "train_load": [5, 14], "train_it": [5, 14], "next": [5, 14], "befor": [5, 7, 8, 16], "pass": [5, 6, 7, 16], "batch": [5, 7, 8, 14, 16], "drop": 5, "isn": 5, "full": [5, 9, 16], "worker": 5, "function": [5, 8, 9], "merg": 5, "sinc": [5, 14], "content": [5, 6, 9, 16], "properli": 5, "model": [5, 9, 12, 14], "interpret": [5, 6], "multipl": [5, 6, 8, 16], "name": [5, 7, 15, 16], "10": [5, 9, 16], "0123456789": 5, "hindi_digit": 5, "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": 5, "ascii_lett": 5, "52": [5, 16], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 5, "punctuat": 5, "currenc": 5, "5": [5, 8, 9, 16], "ancient_greek": 5, "48": [5, 16], "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": 5, "arabic_lett": 5, "37": [5, 16], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": 5, "persian_lett": 5, "\u067e\u0686\u06a2\u06a4\u06af": 5, "arabic_diacrit": 5, "arabic_punctu": 5, "latin": 5, "94": [5, 16], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 5, "english": [5, 14], "legacy_french": 5, "123": 5, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": 5, "french": [5, 11, 13, 16], "126": 5, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": 5, "portugues": 5, "131": 5, "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": 5, "spanish": 5, "116": 5, "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": 5, "italian": 5, "120": 5, "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": 5, "german": [5, 11, 13], "108": 5, "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": 5, "arab": 5, "101": 5, "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": 5, "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": 5, "czech": 5, "130": 5, "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": 5, "polish": 5, "118": 5, "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": 5, "dutch": 5, "114": 5, "norwegian": 5, "106": 5, "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": 5, "danish": 5, "finnish": 5, "104": 5, "\u00e4\u00f6\u00e4\u00f6": 5, "swedish": 5, "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": 5, "vietnames": 5, "234": 5, "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": 5, "hebrew": 5, "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": 5, "multilingu": [5, 13], "195": 5, "encode_sequ": 5, "target_s": 5, "eo": 5, "pad": [5, 7, 8, 16], "dynamic_seq_length": 5, "ndarrai": [5, 6, 8, 9], "given": [5, 6, 8, 9, 16], "map": [5, 7], "n": [5, 9], "length": 5, "Of": 5, "string": [5, 6, 9, 16], "option": [5, 7, 11], "start": 5, "case": [5, 9], "upper": [5, 8], "enabl": [5, 6], "dynam": 5, "tensor": [5, 6, 8, 16], "modul": [6, 7, 8, 9, 16], "easili": [6, 9, 11, 13, 14, 16], "export": [6, 7, 9, 10, 16], "analysi": 6, "format": [6, 9, 11, 14, 15, 16], "organ": 6, "uninterrupt": [6, 16], "confid": [6, 9, 16], "float": [6, 8, 9, 15], "associ": 6, "predict": [6, 7, 9, 16], "xmin": 6, "ymin": 6, "xmax": 6, "ymax": 6, "coordin": [6, 16], "rel": [6, 8, 9, 16], "collect": 6, "meant": [6, 15], "two": [6, 12], "column": 6, "horizont": [6, 8], "resolv": 6, "default": [6, 9, 11, 12], "smallest": 6, "enclos": 6, "g": [6, 7], "qr": 6, "pictur": 6, "chart": 6, "signatur": 6, "logo": [6, 14], "etc": 6, "artefact_typ": 6, "type": [6, 9, 13, 15, 16], "sever": [6, 8, 16], "its": [6, 7, 8, 9, 14, 16], "titl": [6, 16], "underneath": 6, "page_idx": [6, 16], "dimens": [6, 9, 16], "dict": [6, 9, 16], "numpi": [6, 7, 9, 16], "arrai": [6, 8, 9], "uint8": [6, 7, 9, 16], "raw": [6, 9], "pixel": [6, 8, 16], "height": 6, "width": 6, "dictionari": [6, 9], "angl": [6, 8], "degress": 6, "preserve_aspect_ratio": [6, 7, 8, 11, 16], "overlai": 6, "displai": [6, 9], "matplotlib": [6, 9], "pyplot": [6, 9], "method": [6, 8, 16], "high": 6, "convers": 6, "read_pdf": 6, "byte": [6, 16], "scale": [6, 7, 8, 9], "rgb_mode": 6, "password": 6, "pdf": [6, 7, 10], "convert": [6, 8], "render": 6, "72dpi": 6, "output": [6, 8, 15], "rgb": [6, 8], "bgr": 6, "unlock": 6, "encrypt": 6, "pypdfium2": 6, "pdfpage": 6, "decod": 6, "shape": [6, 7, 8, 9, 16], "h": [6, 7, 8], "x": [6, 8, 9], "w": [6, 7, 8, 9], "c": [6, 9], "read_img_as_numpi": 6, "output_s": [6, 8], "rgb_output": 6, "expect": [6, 8, 9], "read_img_as_tensor": 6, "img_path": 6, "dtype": [6, 7, 8, 9, 15], "float32": [6, 7, 8, 15], "desir": 6, "relat": 6, "divid": 6, "255": [6, 7, 8, 9, 16], "decode_img_as_tensor": 6, "img_cont": 6, "stream": 6, "read_html": 6, "url": 6, "yoursit": 6, "weasyprint": 6, "documentfil": [6, 13], "extens": 6, "classmethod": 6, "from_pdf": 6, "binari": [6, 15, 16], "from_url": 6, "from_imag": [6, 13], "page1": 6, "png": 6, "page2": 6, "vgg16_bn_r": 7, "vgg": 7, "16": [7, 15, 16], "describ": [7, 9], "veri": 7, "deep": [7, 16], "convolut": 7, "larg": [7, 13], "modifi": [7, 12, 16], "normal": [7, 8], "rectangular": 7, "pool": 7, "simpler": 7, "head": [7, 16], "input_tensor": 7, "random": [7, 8, 9, 16], "uniform": [7, 8], "512": 7, "maxval": [7, 8], "imagenet": 7, "extractor": 7, "resnet18": [7, 13], "resnet": 7, "18": 7, "residu": 7, "boolean": [7, 16], "resnet34": 7, "34": [7, 16], "resnet50": [7, 13], "50": [7, 14, 16], "resnet31": 7, "downsiz": 7, "4": [7, 8, 9, 16], "mobilenet_v3_smal": 7, "mobilenetv3": 7, "search": 7, "kera": [7, 15], "mobilenet_v3_larg": 7, "mobilenet_v3_small_r": 7, "mobilenet_v3_large_r": 7, "mobilenet_v3_small_orient": 7, "magc_resnet31": 7, "global": 7, "context": 7, "224": [7, 8], "vit_": 7, "visiontransform": 7, "worth": 7, "16x16": 7, "patch": [7, 9], "unoffici": 7, "config": 7, "vit_b": 7, "b": [7, 9, 16], "textnet_tini": 7, "textnet": 7, "faster": [7, 15], "arbitrarili": 7, "detector": 7, "minimalist": 7, "kernel": [7, 8], "czczup": 7, "tini": 7, "textnet_smal": 7, "textnet_bas": 7, "crop_orientation_predictor": 7, "arch": [7, 13], "croporientationpredictor": 7, "np": [7, 8, 9, 16], "classif_mobilenet_v3_smal": 7, "input_crop": 7, "rand": [7, 8, 9, 15, 16], "600": [7, 9, 16], "800": [7, 9, 14, 16], "astyp": [7, 9, 16], "crop": [7, 8, 14, 16], "dataset": [7, 11, 16], "linknet_resnet18": [7, 11, 16], "1024": [7, 9, 11, 16], "linknet_resnet34": [7, 15, 16], "linknet_resnet50": [7, 16], "db_resnet50": [7, 11, 13, 16], "backbon": 7, "db_mobilenet_v3_larg": [7, 13, 16], "mobilenet": [7, 13], "v3": [7, 13, 16], "detection_predictor": [7, 16], "assume_straight_pag": [7, 16], "detectionpredictor": [7, 11], "input_pag": [7, 9, 16], "itself": [7, 13], "fit": [7, 16], "crnn_vgg16_bn": [7, 11, 13, 16], "128": [7, 11, 15, 16], "crnn_mobilenet_v3_smal": [7, 15, 16], "crnn_mobilenet_v3_larg": [7, 13, 16], "sar_resnet31": [7, 16], "31": 7, "64": [7, 8, 16], "256": 7, "paper": 7, "1910": 7, "02562": 7, "keywoard": 7, "vitstr_smal": [7, 11, 15, 16], "vitstr_bas": [7, 16], "recognition_predictor": [7, 16], "recognitionpredictor": [7, 11], "ocr_predictor": [7, 11, 13, 15, 16], "det_arch": [7, 11, 13, 15], "reco_arch": [7, 11, 13, 15], "pretrained_backbon": [7, 11], "symmetric_pad": [7, 8, 16], "export_as_straight_box": [7, 16], "detect_orient": 7, "straighten_pag": 7, "detect_languag": 7, "ocrpredictor": [7, 11], "up": [7, 16], "assum": 7, "preserv": [7, 8, 16], "ratio": [7, 8, 16], "symmetr": [7, 8, 16], "bottom": [7, 16], "final": 7, "potenti": 7, "estim": 7, "slightli": 7, "deterior": 7, "latenc": 7, "median": 7, "Then": 7, "again": 7, "improv": 7, "kie_predictor": [7, 11], "kiepredictor": 7, "kie": [7, 11], "login_to_hub": [7, 13], "login": 7, "huggingfac": 7, "hub": 7, "from_hub": [7, 13], "repo_id": [7, 13], "instanti": [7, 16], "hf": 7, "fasterrcnn_mobilenet_v3_large_fpn": 7, "repo": 7, "hf_hub_download": 7, "snapshot_download": 7, "checkpoint": 7, "push_to_hf_hub": [7, 13], "model_nam": [7, 13, 15], "save": [7, 14], "configur": 7, "my": 7, "procedur": 8, "draw": [8, 9], "design": 8, "torchvis": 8, "resiz": [8, 16], "bilinear": 8, "transfo": 8, "minval": 8, "interpol": 8, "zero": [8, 9], "while": [8, 16], "done": 8, "mean": [8, 9, 11], "std": [8, 11], "gaussian": 8, "distribut": 8, "485": 8, "456": 8, "406": 8, "229": [8, 14], "225": 8, "averag": [8, 16], "per": [8, 16], "standard": 8, "deviat": 8, "lambdatransform": 8, "fn": 8, "lambda": 8, "tograi": 8, "num_output_channel": 8, "grayscal": 8, "colorinvers": 8, "min_val": 8, "tranform": 8, "color": [8, 9], "shift": 8, "randomli": 8, "invert": 8, "6": [8, 16], "rang": 8, "randombright": 8, "max_delta": 8, "adjust": 8, "bright": 8, "delta": 8, "offset": 8, "add": [8, 9, 13, 16], "pick": 8, "p": [8, 9, 16], "probabl": 8, "randomcontrast": 8, "contrast": 8, "contrast_factor": 8, "factor": 8, "randomsatur": 8, "satur": 8, "hsv": 8, "increas": 8, "randomhu": 8, "hue": 8, "randomgamma": 8, "min_gamma": 8, "max_gamma": 8, "min_gain": 8, "max_gain": 8, "gamma": 8, "correct": 8, "neg": 8, "lower": [8, 9, 16], "param": [8, 16], "constant": 8, "multipli": 8, "randomjpegqu": 8, "min_qual": 8, "60": 8, "max_qual": 8, "jpeg": 8, "qualiti": 8, "dimension": 8, "between": [8, 9, 16], "randomrot": 8, "max_angl": 8, "expand": 8, "degre": 8, "uniformli": 8, "randomcrop": 8, "08": [8, 16], "75": [8, 16], "33": [8, 16], "min_area": 8, "max_area": 8, "min_ratio": 8, "max_ratio": 8, "gaussianblur": 8, "kernel_shap": 8, "blur": 8, "min": 8, "channelshuffl": 8, "gaussiannois": 8, "nois": 8, "randomhorizontalflip": 8, "flip": 8, "int64": [8, 9], "randomshadow": 8, "opacity_rang": 8, "shade": 8, "opac": 8, "It": [8, 13, 15], "consecut": [8, 16], "sequenti": [8, 16], "oneof": 8, "jpegqual": 8, "randomappli": 8, "regroup": 9, "core": [9, 16], "complementari": 9, "sens": 9, "visualize_pag": 9, "words_onli": 9, "display_artefact": 9, "add_label": 9, "figur": 9, "block": [9, 16], "plt": 9, "ocr_db_crnn": 9, "artefact": [9, 10, 16], "figsiz": 9, "largest": 9, "side": 9, "plot": 9, "static": 9, "top": [9, 16], "synthesize_pag": 9, "draw_proba": 9, "respons": 9, "blank": 9, "blue": 9, "red": 9, "font_siz": 9, "13": [9, 16], "famili": 9, "synthes": 9, "metric": [9, 16], "assess": 9, "textmatch": 9, "match": [9, 16], "accuraci": 9, "aggreg": [9, 14], "foral": 9, "y": 9, "mathcal": 9, "frac": 9, "sum": 9, "limits_": 9, "f_": 9, "y_i": 9, "x_i": 9, "indic": 9, "defin": [9, 15], "f_a": 9, "left": [9, 16], "begin": 9, "ll": 9, "mbox": 9, "strictli": 9, "integ": 9, "updat": 9, "hello": [9, 16], "world": [9, 16], "summari": 9, "gt": 9, "pred": 9, "groung": 9, "truth": 9, "exact": [9, 16], "score": 9, "counterpart": 9, "unidecod": 9, "localizationconfus": 9, "iou_thresh": 9, "mask_shap": 9, "use_broadcast": 9, "confus": 9, "iou": 9, "recal": [9, 16], "g_": 9, "precis": [9, 16], "meaniou": 9, "j": 9, "y_j": 9, "being": [9, 16], "intersect": 9, "union": 9, "g_x": 9, "assign": 9, "_i": 9, "geq": 9, "ground": 9, "asarrai": 9, "70": [9, 16], "110": 9, "95": [9, 16], "200": 9, "150": [9, 16], "pair": 9, "broadcast": 9, "consum": 9, "memori": [9, 12, 15], "either": [9, 16], "ocrmetr": 9, "l": 9, "hat": 9, "h_": 9, "b_j": 9, "l_j": 9, "gt_box": 9, "pred_box": 9, "gt_label": 9, "pred_label": 9, "comparison": [9, 16], "detectionmetr": 9, "c_j": 9, "compil": [10, 16], "better": [10, 16], "leverag": 10, "descript": 10, "colab": 10, "quicktour": 10, "present": 10, "main": 10, "produc": [10, 16], "searchabl": 10, "don": [11, 16], "meet": 11, "detail": [11, 16], "link": 11, "section": [11, 13, 15, 16], "det_model": [11, 13], "load_weight": 11, "path_to_checkpoint": 11, "weight": 11, "reco_model": [11, 13], "det_param": 11, "path_to_pt": 11, "map_loc": 11, "load_state_dict": 11, "reco_param": 11, "vocab": [11, 13, 14, 16], "class_nam": 11, "total": 11, "date": [11, 16], "preprocessor": [11, 16], "det_predictor": [11, 16], "798": 11, "785": 11, "772": 11, "264": 11, "2749": 11, "287": 11, "reco_predictor": 11, "694": 11, "695": 11, "693": 11, "299": 11, "296": 11, "301": 11, "polici": 12, "restrict": 12, "write": 12, "outsid": 12, "tmp": 12, "work": [12, 16], "step": 12, "usag": [12, 15], "multiprocess": 12, "doctr_multiprocessing_dis": 12, "variabl": 12, "becaus": 12, "shm": 12, "share": [12, 14], "chang": 12, "By": 12, "doctr_cache_dir": 12, "focu": 13, "love": 13, "appreci": 13, "interfac": 13, "io": 13, "custom": [13, 16], "felix92": 13, "db": 13, "vgg16": 13, "bn": 13, "plug": 13, "obj_detect": 13, "exist": 13, "overwritten": 13, "prerequisit": 13, "creat": 13, "co": 13, "instal": 13, "git": 13, "lf": 13, "my_awesome_model": 13, "v1": 13, "directli": [13, 16], "after": [13, 16], "python3": 13, "train_tensorflow": 13, "py": 13, "train_pytorch": 13, "tabl": 13, "pull": 13, "dummi": 13, "tilman": 13, "rassi": 13, "fascan": 13, "evalu": [14, 16], "predefin": 14, "prefer": 14, "signific": 14, "valid": 14, "149": 14, "626": 14, "360": 14, "2000": 14, "3000": 14, "249": 14, "33402": 14, "13068": 14, "772875": 14, "85875": 14, "246": 14, "233": 14, "resourc": 14, "7149": 14, "796": 14, "handwritten": 14, "1268": 14, "472": 14, "21888": 14, "8707": 14, "33608": 14, "19342": 14, "uppercas": 14, "19370": 14, "2186": 14, "257": 14, "647": 14, "73257": 14, "26032": 14, "7100000": 14, "707470": 14, "1156": 14, "1107": 14, "849": 14, "1095": 14, "207901": 14, "22672": 14, "7581382": 14, "1337891": 14, "7141797": 14, "793533": 14, "49377": 14, "19598": 14, "alwai": 14, "regular": 14, "2700": 14, "300": 14, "background": 14, "qr_code": 14, "bar_cod": 14, "photo": 14, "classif": 14, "mani": [14, 16], "sensit": 14, "abl": [14, 16], "howev": 14, "guidanc": 14, "tool": 14, "further": 14, "anot": 14, "handl": 14, "underli": 14, "defer": 14, "dataload": 14, "good": 15, "achiev": 15, "might": [15, 16], "tune": 15, "thing": [15, 16], "product": 15, "readi": 15, "help": 15, "support": [15, 16], "devic": 15, "fp16": 15, "point": 15, "occupi": 15, "bit": 15, "advantag": 15, "less": [15, 16], "mixed_precis": 15, "set_global_polici": 15, "mixed_float16": 15, "cuda": 15, "re": 15, "exchang": 15, "interoper": 15, "machin": 15, "structur": [15, 16], "layer": 15, "metadata": 15, "util": 15, "export_model_to_onnx": 15, "input_shap": 15, "dummy_input": 15, "tensorspec": 15, "model_path": 15, "come": 15, "soon": 15, "seen": 16, "onc": 16, "separ": 16, "compon": 16, "charg": 16, "usabl": 16, "backend": 16, "along": 16, "processor": 16, "reusabl": 16, "consist": 16, "delimit": 16, "2d": 16, "corner": 16, "flag": 16, "belong": 16, "skew": 16, "comprehens": 16, "benchmark": 16, "publicli": 16, "sec": 16, "25": 16, "84": 16, "39": 16, "85": 16, "86": 16, "93": 16, "83": 16, "24": 16, "80": 16, "29": 16, "90": 16, "67": 16, "76": 16, "11": 16, "81": 16, "71": 16, "7": 16, "21": 16, "82": 16, "20": 16, "49": 16, "87": 16, "63": 16, "17": 16, "28": 16, "51": 16, "46": 16, "db_resnet34": 16, "22": 16, "89": 16, "74": 16, "56": 16, "68": 16, "92": 16, "61": 16, "41": 16, "00": 16, "79": 16, "38": 16, "88": 16, "62": 16, "26": 16, "06": 16, "78": 16, "47": 16, "54": 16, "abov": 16, "cf": 16, "disclaim": 16, "combin": 16, "199": 16, "second": 16, "warmup": 16, "phase": 16, "measur": 16, "1000": 16, "obtain": 16, "11th": 16, "gen": 16, "intel": 16, "r": 16, "tm": 16, "i7": 16, "11800h": 16, "30ghz": 16, "wrap": 16, "useabl": 16, "favorit": 16, "dummy_img": 16, "area": 16, "send": 16, "snippet": 16, "transcrib": 16, "partial": 16, "15": 16, "9": 16, "73": 16, "44": 16, "14": 16, "55": 16, "58": 16, "57": 16, "66": 16, "01": 16, "98": 16, "23": 16, "69": 16, "99": 16, "91": 16, "05": 16, "09": 16, "96": 16, "40": 16, "53": 16, "most": 16, "print": 16, "cfg": 16, "30595": 16, "45": 16, "72": 16, "43": 16, "65": 16, "77": 16, "30": 16, "07": 16, "27": 16, "gvision": 16, "59": 16, "03": 16, "azur": 16, "recogn": 16, "42": 16, "go": 16, "mention": 16, "still": 16, "return": 16, "nest": 16, "get": 16, "typic": 16, "layout": 16, "340": 16, "json_output": 16, "1357421875": 16, "0361328125": 16, "8564453125": 16, "8603515625": 16, "914085328578949": 16, "5478515625": 16, "06640625": 16, "5810546875": 16, "0966796875": 16, "9949972033500671": 16, "51171875": 16, "1630859375": 16, "9578408598899841": 16, "1396484375": 16, "3232421875": 16, "185546875": 16, "3515625": 16, "outpout": 16, "xml": 16, "hocr": 16, "export_as_xml": 16, "xml_output": 16, "xml_bytes_str": 16, "xml_element": 16, "utf": 16, "xmln": 16, "w3": 16, "1999": 16, "xhtml": 16, "lang": 16, "en": 16, "meta": 16, "equiv": 16, "charset": 16, "system": 16, "ocr_pag": 16, "ocr_carea": 16, "ocr_par": 16, "ocr_lin": 16, "ocrx_word": 16, "div": 16, "id": 16, "page_1": 16, "bbox": 16, "3456": 16, "ppageno": 16, "block_1_1": 16, "857": 16, "529": 16, "2504": 16, "2710": 16, "par_1_1": 16, "span": 16, "line_1_1": 16, "x_size": 16, "x_descend": 16, "x_ascend": 16, "word_1_1": 16, "1552": 16, "540": 16, "1778": 16, "580": 16, "x_wconf": 16, "word_1_2": 16, "1782": 16, "1900": 16, "583": 16, "word_1_3": 16, "1420": 16, "597": 16, "1684": 16, "641": 16, "threshold": 16, "region": 16, "accur": 16, "postprocessor": 16, "bin_thresh": 16, "box_thresh": 16, "hook": 16, "manipul": 16, "customhook": 16, "def": 16, "__call__": 16, "self": 16, "loc_pr": 16, "Be": 16, "awar": 16, "my_hook": 16, "middl": 16, "pipelin": 16, "add_hook": 16, "execut": 16}, "objects": {"doctr.datasets": [[5, 0, 1, "", "CORD"], [5, 0, 1, "", "CharacterGenerator"], [5, 0, 1, "", "DetectionDataset"], [5, 0, 1, "", "DocArtefacts"], [5, 0, 1, "", "FUNSD"], [5, 0, 1, "", "IC03"], [5, 0, 1, "", "IC13"], [5, 0, 1, "", "IIIT5K"], [5, 0, 1, "", "IIITHWS"], [5, 0, 1, "", "IMGUR5K"], [5, 0, 1, "", "MJSynth"], [5, 0, 1, "", "OCRDataset"], [5, 0, 1, "", "RecognitionDataset"], [5, 0, 1, "", "SROIE"], [5, 0, 1, "", "SVHN"], [5, 0, 1, "", "SVT"], [5, 0, 1, "", "SynthText"], [5, 0, 1, "", "WILDRECEIPT"], [5, 0, 1, "", "WordGenerator"], [5, 1, 1, "", "encode_sequences"]], "doctr.datasets.loader": [[5, 0, 1, "", "DataLoader"]], "doctr.io": [[6, 0, 1, "", "Artefact"], [6, 0, 1, "", "Block"], [6, 0, 1, "", "Document"], [6, 0, 1, "", "DocumentFile"], [6, 0, 1, "", "Line"], [6, 0, 1, "", "Page"], [6, 0, 1, "", "Word"], [6, 1, 1, "", "decode_img_as_tensor"], [6, 1, 1, "", "read_html"], [6, 1, 1, "", "read_img_as_numpy"], [6, 1, 1, "", "read_img_as_tensor"], [6, 1, 1, "", "read_pdf"]], "doctr.io.Document": [[6, 2, 1, "", "show"]], "doctr.io.DocumentFile": [[6, 2, 1, "", "from_images"], [6, 2, 1, "", "from_pdf"], [6, 2, 1, "", "from_url"]], "doctr.io.Page": [[6, 2, 1, "", "show"]], "doctr.models.classification": [[7, 1, 1, "", "crop_orientation_predictor"], [7, 1, 1, "", "magc_resnet31"], [7, 1, 1, "", "mobilenet_v3_large"], [7, 1, 1, "", "mobilenet_v3_large_r"], [7, 1, 1, "", "mobilenet_v3_small"], [7, 1, 1, "", "mobilenet_v3_small_orientation"], [7, 1, 1, "", "mobilenet_v3_small_r"], [7, 1, 1, "", "resnet18"], [7, 1, 1, "", "resnet31"], [7, 1, 1, "", "resnet34"], [7, 1, 1, "", "resnet50"], [7, 1, 1, "", "textnet_base"], [7, 1, 1, "", "textnet_small"], [7, 1, 1, "", "textnet_tiny"], [7, 1, 1, "", "vgg16_bn_r"], [7, 1, 1, "", "vit_b"], [7, 1, 1, "", "vit_s"]], "doctr.models.detection": [[7, 1, 1, "", "db_mobilenet_v3_large"], [7, 1, 1, "", "db_resnet50"], [7, 1, 1, "", "detection_predictor"], [7, 1, 1, "", "linknet_resnet18"], [7, 1, 1, "", "linknet_resnet34"], [7, 1, 1, "", "linknet_resnet50"]], "doctr.models.factory": [[7, 1, 1, "", "from_hub"], [7, 1, 1, "", "login_to_hub"], [7, 1, 1, "", "push_to_hf_hub"]], "doctr.models": [[7, 1, 1, "", "kie_predictor"], [7, 1, 1, "", "ocr_predictor"]], "doctr.models.recognition": [[7, 1, 1, "", "crnn_mobilenet_v3_large"], [7, 1, 1, "", "crnn_mobilenet_v3_small"], [7, 1, 1, "", "crnn_vgg16_bn"], [7, 1, 1, "", "master"], [7, 1, 1, "", "parseq"], [7, 1, 1, "", "recognition_predictor"], [7, 1, 1, "", "sar_resnet31"], [7, 1, 1, "", "vitstr_base"], [7, 1, 1, "", "vitstr_small"]], "doctr.transforms": [[8, 0, 1, "", "ChannelShuffle"], [8, 0, 1, "", "ColorInversion"], [8, 0, 1, "", "Compose"], [8, 0, 1, "", "GaussianBlur"], [8, 0, 1, "", "GaussianNoise"], [8, 0, 1, "", "LambdaTransformation"], [8, 0, 1, "", "Normalize"], [8, 0, 1, "", "OneOf"], [8, 0, 1, "", "RandomApply"], [8, 0, 1, "", "RandomBrightness"], [8, 0, 1, "", "RandomContrast"], [8, 0, 1, "", "RandomCrop"], [8, 0, 1, "", "RandomGamma"], [8, 0, 1, "", "RandomHorizontalFlip"], [8, 0, 1, "", "RandomHue"], [8, 0, 1, "", "RandomJpegQuality"], [8, 0, 1, "", "RandomRotate"], [8, 0, 1, "", "RandomSaturation"], [8, 0, 1, "", "RandomShadow"], [8, 0, 1, "", "Resize"], [8, 0, 1, "", "ToGray"]], "doctr.utils.metrics": [[9, 0, 1, "", "DetectionMetric"], [9, 0, 1, "", "LocalizationConfusion"], [9, 0, 1, "", "OCRMetric"], [9, 0, 1, "", "TextMatch"]], "doctr.utils.metrics.DetectionMetric": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.LocalizationConfusion": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.OCRMetric": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.TextMatch": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.visualization": [[9, 1, 1, "", "synthesize_page"], [9, 1, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:class", "1": "py:function", "2": "py:method"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"], "2": ["py", "method", "Python method"]}, "titleterms": {"changelog": 0, "v0": 0, "7": 0, "0": 0, "2024": 0, "09": 0, "6": 0, "2022": 0, "29": 0, "5": 0, "1": [0, 1], "03": 0, "22": 0, "2021": 0, "12": 0, "31": 0, "4": [0, 1], "11": 0, "10": 0, "01": 0, "3": [0, 1], "08": 0, "27": 0, "07": 0, "02": 0, "2": [0, 1], "05": 0, "28": 0, "18": 0, "contributor": 1, "coven": 1, "code": [1, 2], "conduct": 1, "our": 1, "pledg": 1, "standard": 1, "enforc": 1, "respons": 1, "scope": 1, "guidelin": 1, "correct": 1, "warn": 1, "temporari": 1, "ban": 1, "perman": 1, "attribut": 1, "contribut": 2, "doctr": [2, 4, 5, 6, 7, 8, 9, 10, 15], "codebas": 2, "structur": [2, 6], "continu": 2, "integr": 2, "feedback": 2, "featur": [2, 4], "request": 2, "bug": 2, "report": 2, "question": 2, "develop": 2, "mode": 2, "instal": [2, 3], "commit": 2, "unit": 2, "test": 2, "qualiti": 2, "style": 2, "verif": 2, "modifi": 2, "document": [2, 4, 6], "let": 2, "": 2, "connect": 2, "prerequisit": 3, "via": 3, "python": 3, "packag": 3, "git": 3, "text": [4, 16], "recognit": [4, 7, 13, 14, 16], "main": 4, "model": [4, 7, 11, 13, 15, 16], "zoo": [4, 7], "detect": [4, 7, 13, 14, 16], "support": [4, 5, 8], "dataset": [4, 5, 14], "arg": [5, 6, 7, 8, 9], "synthet": [5, 14], "gener": [5, 14], "custom": [5, 11], "loader": 5, "dataload": 5, "vocab": 5, "return": [5, 6, 7, 9], "io": 6, "word": 6, "line": 6, "artefact": 6, "block": 6, "page": 6, "file": 6, "read": 6, "classif": [7, 13], "factori": 7, "transform": 8, "compos": 8, "util": 9, "visual": 9, "task": 9, "evalu": 9, "notebook": 10, "train": 11, "your": [11, 13, 14, 15], "own": [11, 14], "load": [11, 13, 14], "aw": 12, "lambda": 12, "share": 13, "commun": 13, "from": 13, "huggingfac": 13, "hub": 13, "push": 13, "pretrain": 13, "name": 13, "convent": 13, "choos": [14, 16], "readi": 14, "us": [14, 15], "avail": [14, 16], "object": 14, "data": 14, "prepar": 15, "infer": 15, "optim": 15, "half": 15, "precis": 15, "export": 15, "onnx": 15, "right": 16, "architectur": 16, "predictor": 16, "end": 16, "ocr": 16, "two": 16, "stage": 16, "approach": 16, "what": 16, "should": 16, "i": 16, "do": 16, "output": 16, "advanc": 16, "option": 16}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"Changelog": [[0, "changelog"]], "v0.7.0 (2024-09-09)": [[0, "v0-7-0-2024-09-09"]], "v0.6.0 (2022-09-29)": [[0, "v0-6-0-2022-09-29"]], "v0.5.1 (2022-03-22)": [[0, "v0-5-1-2022-03-22"]], "v0.5.0 (2021-12-31)": [[0, "v0-5-0-2021-12-31"]], "v0.4.1 (2021-11-22)": [[0, "v0-4-1-2021-11-22"]], "v0.4.0 (2021-10-01)": [[0, "v0-4-0-2021-10-01"]], "v0.3.1 (2021-08-27)": [[0, "v0-3-1-2021-08-27"]], "v0.3.0 (2021-07-02)": [[0, "v0-3-0-2021-07-02"]], "v0.2.1 (2021-05-28)": [[0, "v0-2-1-2021-05-28"]], "v0.2.0 (2021-05-11)": [[0, "v0-2-0-2021-05-11"]], "v0.1.1 (2021-03-18)": [[0, "v0-1-1-2021-03-18"]], "v0.1.0 (2021-03-05)": [[0, "v0-1-0-2021-03-05"]], "Contributor Covenant Code of Conduct": [[1, "contributor-covenant-code-of-conduct"]], "Our Pledge": [[1, "our-pledge"]], "Our Standards": [[1, "our-standards"]], "Enforcement Responsibilities": [[1, "enforcement-responsibilities"]], "Scope": [[1, "scope"]], "Enforcement": [[1, "enforcement"]], "Enforcement Guidelines": [[1, "enforcement-guidelines"]], "1. Correction": [[1, "correction"]], "2. Warning": [[1, "warning"]], "3. Temporary Ban": [[1, "temporary-ban"]], "4. Permanent Ban": [[1, "permanent-ban"]], "Attribution": [[1, "attribution"]], "Contributing to docTR": [[2, "contributing-to-doctr"]], "Codebase structure": [[2, "codebase-structure"]], "Continuous Integration": [[2, "continuous-integration"]], "Feedback": [[2, "feedback"]], "Feature requests & bug report": [[2, "feature-requests-bug-report"]], "Questions": [[2, "questions"]], "Developing docTR": [[2, "developing-doctr"]], "Developer mode installation": [[2, "developer-mode-installation"]], "Commits": [[2, "commits"]], "Unit tests": [[2, "unit-tests"]], "Code quality": [[2, "code-quality"]], "Code style verification": [[2, "code-style-verification"]], "Modifying the documentation": [[2, "modifying-the-documentation"]], "Let\u2019s connect": [[2, "let-s-connect"]], "Installation": [[3, "installation"]], "Prerequisites": [[3, "prerequisites"]], "Via Python Package": [[3, "via-python-package"]], "Via Git": [[3, "via-git"]], "docTR: Document Text Recognition": [[4, "doctr-document-text-recognition"]], "Main Features": [[4, "main-features"]], "Model zoo": [[4, "model-zoo"]], "Text detection models": [[4, "text-detection-models"]], "Text recognition models": [[4, "text-recognition-models"]], "Supported datasets": [[4, "supported-datasets"]], "doctr.datasets": [[5, "doctr-datasets"], [5, "datasets"]], "Args:": [[5, "args"], [5, "id4"], [5, "id7"], [5, "id10"], [5, "id13"], [5, "id16"], [5, "id19"], [5, "id22"], [5, "id25"], [5, "id29"], [5, "id32"], [5, "id37"], [5, "id40"], [5, "id46"], [5, "id49"], [5, "id50"], [5, "id51"], [5, "id54"], [5, "id57"], [5, "id60"], [5, "id61"], [6, "args"], [6, "id2"], [6, "id3"], [6, "id4"], [6, "id5"], [6, "id6"], [6, "id7"], [6, "id10"], [6, "id12"], [6, "id14"], [6, "id16"], [6, "id20"], [6, "id24"], [6, "id28"], [7, "args"], [7, "id3"], [7, "id8"], [7, "id13"], [7, "id17"], [7, "id21"], [7, "id26"], [7, "id31"], [7, "id36"], [7, "id41"], [7, "id45"], [7, "id49"], [7, "id54"], [7, "id58"], [7, "id63"], [7, "id68"], [7, "id72"], [7, "id76"], [7, "id81"], [7, "id86"], [7, "id90"], [7, "id95"], [7, "id99"], [7, "id103"], [7, "id108"], [7, "id113"], [7, "id118"], [7, "id122"], [7, "id126"], [7, "id131"], [7, "id135"], [7, "id139"], [7, "id143"], [7, "id145"], [7, "id147"], [7, "id149"], [8, "args"], [8, "id1"], [8, "id2"], [8, "id3"], [8, "id4"], [8, "id5"], [8, "id6"], [8, "id7"], [8, "id8"], [8, "id9"], [8, "id10"], [8, "id11"], [8, "id12"], [8, "id13"], [8, "id14"], [8, "id15"], [8, "id16"], [8, "id17"], [8, "id18"], [9, "args"], [9, "id3"], [9, "id5"], [9, "id6"], [9, "id7"], [9, "id8"], [9, "id9"], [9, "id10"], [9, "id11"]], "Synthetic dataset generator": [[5, "synthetic-dataset-generator"], [14, "synthetic-dataset-generator"]], "Custom dataset loader": [[5, "custom-dataset-loader"]], "Dataloader": [[5, "dataloader"]], "Supported Vocabs": [[5, "supported-vocabs"]], "docTR Vocabs": [[5, "id62"]], "Returns:": [[5, "returns"], [6, "returns"], [6, "id11"], [6, "id13"], [6, "id15"], [6, "id19"], [6, "id23"], [6, "id27"], [6, "id31"], [7, "returns"], [7, "id6"], [7, "id11"], [7, "id16"], [7, "id20"], [7, "id24"], [7, "id29"], [7, "id34"], [7, "id39"], [7, "id44"], [7, "id48"], [7, "id52"], [7, "id57"], [7, "id61"], [7, "id66"], [7, "id71"], [7, "id75"], [7, "id79"], [7, "id84"], [7, "id89"], [7, "id93"], [7, "id98"], [7, "id102"], [7, "id106"], [7, "id111"], [7, "id116"], [7, "id121"], [7, "id125"], [7, "id129"], [7, "id134"], [7, "id138"], [7, "id142"], [7, "id144"], [7, "id146"], [7, "id148"], [9, "returns"], [9, "id4"]], "doctr.io": [[6, "doctr-io"]], "Document structure": [[6, "document-structure"]], "Word": [[6, "word"]], "Line": [[6, "line"]], "Artefact": [[6, "artefact"]], "Block": [[6, "block"]], "Page": [[6, "page"]], "Document": [[6, "document"]], "File reading": [[6, "file-reading"]], "doctr.models": [[7, "doctr-models"]], "doctr.models.classification": [[7, "doctr-models-classification"]], "doctr.models.detection": [[7, "doctr-models-detection"]], "doctr.models.recognition": [[7, "doctr-models-recognition"]], "doctr.models.zoo": [[7, "doctr-models-zoo"]], "doctr.models.factory": [[7, "doctr-models-factory"]], "doctr.transforms": [[8, "doctr-transforms"]], "Supported transformations": [[8, "supported-transformations"]], "Composing transformations": [[8, "composing-transformations"]], "doctr.utils": [[9, "doctr-utils"]], "Visualization": [[9, "visualization"]], "Task evaluation": [[9, "task-evaluation"]], "docTR Notebooks": [[10, "doctr-notebooks"]], "Train your own model": [[11, "train-your-own-model"]], "Loading your custom trained model": [[11, "loading-your-custom-trained-model"]], "AWS Lambda": [[12, "aws-lambda"]], "Share your model with the community": [[13, "share-your-model-with-the-community"]], "Loading from Huggingface Hub": [[13, "loading-from-huggingface-hub"]], "Pushing to the Huggingface Hub": [[13, "pushing-to-the-huggingface-hub"]], "Pretrained community models": [[13, "pretrained-community-models"]], "Naming conventions": [[13, "naming-conventions"]], "Classification": [[13, "classification"]], "Detection": [[13, "detection"], [14, "detection"]], "Recognition": [[13, "recognition"], [14, "recognition"]], "Choose a ready to use dataset": [[14, "choose-a-ready-to-use-dataset"]], "Available Datasets": [[14, "available-datasets"]], "Object Detection": [[14, "object-detection"]], "Use your own datasets": [[14, "use-your-own-datasets"]], "Data Loading": [[14, "data-loading"]], "Preparing your model for inference": [[15, "preparing-your-model-for-inference"]], "Model optimization": [[15, "model-optimization"]], "Half-precision": [[15, "half-precision"]], "Export to ONNX": [[15, "export-to-onnx"]], "Using your ONNX exported model in docTR": [[15, "using-your-onnx-exported-model-in-doctr"]], "Choosing the right model": [[16, "choosing-the-right-model"]], "Text Detection": [[16, "text-detection"]], "Available architectures": [[16, "available-architectures"], [16, "id1"], [16, "id2"]], "Detection predictors": [[16, "detection-predictors"]], "Text Recognition": [[16, "text-recognition"]], "Recognition predictors": [[16, "recognition-predictors"]], "End-to-End OCR": [[16, "end-to-end-ocr"]], "Two-stage approaches": [[16, "two-stage-approaches"]], "What should I do with the output?": [[16, "what-should-i-do-with-the-output"]], "Advanced options": [[16, "advanced-options"]]}, "indexentries": {"cord (class in doctr.datasets)": [[5, "doctr.datasets.CORD"]], "charactergenerator (class in doctr.datasets)": [[5, "doctr.datasets.CharacterGenerator"]], "dataloader (class in doctr.datasets.loader)": [[5, "doctr.datasets.loader.DataLoader"]], "detectiondataset (class in doctr.datasets)": [[5, "doctr.datasets.DetectionDataset"]], "docartefacts (class in doctr.datasets)": [[5, "doctr.datasets.DocArtefacts"]], "funsd (class in doctr.datasets)": [[5, "doctr.datasets.FUNSD"]], "ic03 (class in doctr.datasets)": [[5, "doctr.datasets.IC03"]], "ic13 (class in doctr.datasets)": [[5, "doctr.datasets.IC13"]], "iiit5k (class in doctr.datasets)": [[5, "doctr.datasets.IIIT5K"]], "iiithws (class in doctr.datasets)": [[5, "doctr.datasets.IIITHWS"]], "imgur5k (class in doctr.datasets)": [[5, "doctr.datasets.IMGUR5K"]], "mjsynth (class in doctr.datasets)": [[5, "doctr.datasets.MJSynth"]], "ocrdataset (class in doctr.datasets)": [[5, "doctr.datasets.OCRDataset"]], "recognitiondataset (class in doctr.datasets)": [[5, "doctr.datasets.RecognitionDataset"]], "sroie (class in doctr.datasets)": [[5, "doctr.datasets.SROIE"]], "svhn (class in doctr.datasets)": [[5, "doctr.datasets.SVHN"]], "svt (class in doctr.datasets)": [[5, "doctr.datasets.SVT"]], "synthtext (class in doctr.datasets)": [[5, "doctr.datasets.SynthText"]], "wildreceipt (class in doctr.datasets)": [[5, "doctr.datasets.WILDRECEIPT"]], "wordgenerator (class in doctr.datasets)": [[5, "doctr.datasets.WordGenerator"]], "encode_sequences() (in module doctr.datasets)": [[5, "doctr.datasets.encode_sequences"]], "artefact (class in doctr.io)": [[6, "doctr.io.Artefact"]], "block (class in doctr.io)": [[6, "doctr.io.Block"]], "document (class in doctr.io)": [[6, "doctr.io.Document"]], "documentfile (class in doctr.io)": [[6, "doctr.io.DocumentFile"]], "line (class in doctr.io)": [[6, "doctr.io.Line"]], "page (class in doctr.io)": [[6, "doctr.io.Page"]], "word (class in doctr.io)": [[6, "doctr.io.Word"]], "decode_img_as_tensor() (in module doctr.io)": [[6, "doctr.io.decode_img_as_tensor"]], "from_images() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_images"]], "from_pdf() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_pdf"]], "from_url() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_url"]], "read_html() (in module doctr.io)": [[6, "doctr.io.read_html"]], "read_img_as_numpy() (in module doctr.io)": [[6, "doctr.io.read_img_as_numpy"]], "read_img_as_tensor() (in module doctr.io)": [[6, "doctr.io.read_img_as_tensor"]], "read_pdf() (in module doctr.io)": [[6, "doctr.io.read_pdf"]], "show() (doctr.io.document method)": [[6, "doctr.io.Document.show"]], "show() (doctr.io.page method)": [[6, "doctr.io.Page.show"]], "crnn_mobilenet_v3_large() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_mobilenet_v3_large"]], "crnn_mobilenet_v3_small() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_mobilenet_v3_small"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_vgg16_bn"]], "crop_orientation_predictor() (in module doctr.models.classification)": [[7, "doctr.models.classification.crop_orientation_predictor"]], "db_mobilenet_v3_large() (in module doctr.models.detection)": [[7, "doctr.models.detection.db_mobilenet_v3_large"]], "db_resnet50() (in module doctr.models.detection)": [[7, "doctr.models.detection.db_resnet50"]], "detection_predictor() (in module doctr.models.detection)": [[7, "doctr.models.detection.detection_predictor"]], "from_hub() (in module doctr.models.factory)": [[7, "doctr.models.factory.from_hub"]], "kie_predictor() (in module doctr.models)": [[7, "doctr.models.kie_predictor"]], "linknet_resnet18() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet18"]], "linknet_resnet34() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet34"]], "linknet_resnet50() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet50"]], "login_to_hub() (in module doctr.models.factory)": [[7, "doctr.models.factory.login_to_hub"]], "magc_resnet31() (in module doctr.models.classification)": [[7, "doctr.models.classification.magc_resnet31"]], "master() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.master"]], "mobilenet_v3_large() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_large"]], "mobilenet_v3_large_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_large_r"]], "mobilenet_v3_small() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small"]], "mobilenet_v3_small_orientation() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small_orientation"]], "mobilenet_v3_small_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small_r"]], "ocr_predictor() (in module doctr.models)": [[7, "doctr.models.ocr_predictor"]], "parseq() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.parseq"]], "push_to_hf_hub() (in module doctr.models.factory)": [[7, "doctr.models.factory.push_to_hf_hub"]], "recognition_predictor() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.recognition_predictor"]], "resnet18() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet18"]], "resnet31() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet31"]], "resnet34() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet34"]], "resnet50() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet50"]], "sar_resnet31() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.sar_resnet31"]], "textnet_base() (in module doctr.models.classification)": [[7, "doctr.models.classification.textnet_base"]], "textnet_small() (in module doctr.models.classification)": [[7, "doctr.models.classification.textnet_small"]], "textnet_tiny() (in module doctr.models.classification)": [[7, "doctr.models.classification.textnet_tiny"]], "vgg16_bn_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.vgg16_bn_r"]], "vit_b() (in module doctr.models.classification)": [[7, "doctr.models.classification.vit_b"]], "vit_s() (in module doctr.models.classification)": [[7, "doctr.models.classification.vit_s"]], "vitstr_base() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.vitstr_base"]], "vitstr_small() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.vitstr_small"]], "channelshuffle (class in doctr.transforms)": [[8, "doctr.transforms.ChannelShuffle"]], "colorinversion (class in doctr.transforms)": [[8, "doctr.transforms.ColorInversion"]], "compose (class in doctr.transforms)": [[8, "doctr.transforms.Compose"]], "gaussianblur (class in doctr.transforms)": [[8, "doctr.transforms.GaussianBlur"]], "gaussiannoise (class in doctr.transforms)": [[8, "doctr.transforms.GaussianNoise"]], "lambdatransformation (class in doctr.transforms)": [[8, "doctr.transforms.LambdaTransformation"]], "normalize (class in doctr.transforms)": [[8, "doctr.transforms.Normalize"]], "oneof (class in doctr.transforms)": [[8, "doctr.transforms.OneOf"]], "randomapply (class in doctr.transforms)": [[8, "doctr.transforms.RandomApply"]], "randombrightness (class in doctr.transforms)": [[8, "doctr.transforms.RandomBrightness"]], "randomcontrast (class in doctr.transforms)": [[8, "doctr.transforms.RandomContrast"]], "randomcrop (class in doctr.transforms)": [[8, "doctr.transforms.RandomCrop"]], "randomgamma (class in doctr.transforms)": [[8, "doctr.transforms.RandomGamma"]], "randomhorizontalflip (class in doctr.transforms)": [[8, "doctr.transforms.RandomHorizontalFlip"]], "randomhue (class in doctr.transforms)": [[8, "doctr.transforms.RandomHue"]], "randomjpegquality (class in doctr.transforms)": [[8, "doctr.transforms.RandomJpegQuality"]], "randomrotate (class in doctr.transforms)": [[8, "doctr.transforms.RandomRotate"]], "randomsaturation (class in doctr.transforms)": [[8, "doctr.transforms.RandomSaturation"]], "randomshadow (class in doctr.transforms)": [[8, "doctr.transforms.RandomShadow"]], "resize (class in doctr.transforms)": [[8, "doctr.transforms.Resize"]], "togray (class in doctr.transforms)": [[8, "doctr.transforms.ToGray"]], "detectionmetric (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.DetectionMetric"]], "localizationconfusion (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.OCRMetric"]], "textmatch (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.TextMatch"]], "summary() (doctr.utils.metrics.detectionmetric method)": [[9, "doctr.utils.metrics.DetectionMetric.summary"]], "summary() (doctr.utils.metrics.localizationconfusion method)": [[9, "doctr.utils.metrics.LocalizationConfusion.summary"]], "summary() (doctr.utils.metrics.ocrmetric method)": [[9, "doctr.utils.metrics.OCRMetric.summary"]], "summary() (doctr.utils.metrics.textmatch method)": [[9, "doctr.utils.metrics.TextMatch.summary"]], "synthesize_page() (in module doctr.utils.visualization)": [[9, "doctr.utils.visualization.synthesize_page"]], "update() (doctr.utils.metrics.detectionmetric method)": [[9, "doctr.utils.metrics.DetectionMetric.update"]], "update() (doctr.utils.metrics.localizationconfusion method)": [[9, "doctr.utils.metrics.LocalizationConfusion.update"]], "update() (doctr.utils.metrics.ocrmetric method)": [[9, "doctr.utils.metrics.OCRMetric.update"]], "update() (doctr.utils.metrics.textmatch method)": [[9, "doctr.utils.metrics.TextMatch.update"]], "visualize_page() (in module doctr.utils.visualization)": [[9, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["changelog", "contributing/code_of_conduct", "contributing/contributing", "getting_started/installing", "index", "modules/datasets", "modules/io", "modules/models", "modules/transforms", "modules/utils", "notebooks", "using_doctr/custom_models_training", "using_doctr/running_on_aws", "using_doctr/sharing_models", "using_doctr/using_datasets", "using_doctr/using_model_export", "using_doctr/using_models"], "filenames": ["changelog.rst", "contributing/code_of_conduct.md", "contributing/contributing.md", "getting_started/installing.rst", "index.rst", "modules/datasets.rst", "modules/io.rst", "modules/models.rst", "modules/transforms.rst", "modules/utils.rst", "notebooks.rst", "using_doctr/custom_models_training.rst", "using_doctr/running_on_aws.rst", "using_doctr/sharing_models.rst", "using_doctr/using_datasets.rst", "using_doctr/using_model_export.rst", "using_doctr/using_models.rst"], "titles": ["Changelog", "Contributor Covenant Code of Conduct", "Contributing to docTR", "Installation", "docTR: Document Text Recognition", "doctr.datasets", "doctr.io", "doctr.models", "doctr.transforms", "doctr.utils", "docTR Notebooks", "Train your own model", "AWS Lambda", "Share your model with the community", "Choose a ready to use dataset", "Preparing your model for inference", "Choosing the right model"], "terms": {"releas": [0, 3], "note": [0, 2, 5, 7, 13, 15], "we": [1, 2, 3, 4, 6, 8, 13, 14, 15, 16], "member": 1, "leader": 1, "make": [1, 2, 9, 12, 13, 15, 16], "particip": 1, "commun": 1, "harass": 1, "free": [1, 2, 13], "experi": 1, "everyon": 1, "regardless": 1, "ag": 1, "bodi": [1, 16], "size": [1, 5, 6, 8, 9, 16], "visibl": 1, "invis": 1, "disabl": [1, 12, 16], "ethnic": 1, "sex": 1, "characterist": 1, "gender": 1, "ident": 1, "express": [1, 8], "level": [1, 5, 9, 16], "educ": 1, "socio": 1, "econom": 1, "statu": 1, "nation": 1, "person": [1, 14], "appear": 1, "race": 1, "religion": 1, "sexual": 1, "orient": [1, 6, 7, 16], "act": 1, "interact": [1, 6, 9], "wai": [1, 4, 14], "contribut": 1, "an": [1, 2, 4, 5, 6, 7, 9, 15, 16], "open": [1, 2, 13, 15], "welcom": 1, "divers": 1, "inclus": 1, "healthi": 1, "exampl": [1, 2, 4, 5, 7, 13, 16], "behavior": [1, 16], "posit": [1, 9], "environ": [1, 12], "includ": [1, 3, 5, 14, 15], "demonstr": 1, "empathi": 1, "kind": 1, "toward": [1, 3], "other": [1, 2], "peopl": 1, "Being": 1, "respect": 1, "differ": 1, "opinion": 1, "viewpoint": 1, "give": 1, "gracefulli": 1, "accept": 1, "construct": 1, "feedback": 1, "apolog": 1, "those": [1, 3, 6, 16], "affect": 1, "mistak": 1, "learn": [1, 4, 7, 15, 16], "from": [1, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 16], "focus": [1, 5], "what": 1, "i": [1, 2, 5, 6, 7, 8, 9, 12, 13, 14, 15], "best": 1, "just": 1, "u": [1, 2], "individu": 1, "overal": [1, 7], "unaccept": 1, "The": [1, 2, 5, 6, 9, 12, 16], "us": [1, 2, 3, 5, 7, 9, 11, 12, 13, 16], "languag": [1, 4, 5, 6, 7, 13, 16], "imageri": 1, "attent": [1, 7], "advanc": 1, "ani": [1, 5, 6, 7, 8, 9, 16], "troll": 1, "insult": 1, "derogatori": 1, "comment": 1, "polit": 1, "attack": 1, "public": [1, 4], "privat": 1, "publish": 1, "inform": [1, 2, 4, 5, 14], "physic": [1, 6], "email": 1, "address": [1, 6], "without": [1, 5, 7], "explicit": 1, "permiss": 1, "which": [1, 7, 12, 14, 16], "could": 1, "reason": [1, 4, 5], "consid": [1, 2, 5, 6, 9, 16], "inappropri": 1, "profession": 1, "set": [1, 5, 7, 9, 12, 16], "ar": [1, 2, 3, 5, 6, 8, 9, 10, 14, 16], "clarifi": 1, "take": [1, 5, 16], "appropri": [1, 2, 16], "fair": 1, "action": 1, "thei": [1, 9], "deem": 1, "threaten": 1, "offens": 1, "harm": 1, "have": [1, 2, 9, 11, 13, 14, 16], "right": [1, 7, 9], "remov": 1, "edit": 1, "reject": 1, "commit": 1, "wiki": 1, "issu": [1, 2, 13], "align": [1, 6], "thi": [1, 2, 3, 5, 9, 11, 12, 13, 14, 15, 16], "moder": 1, "decis": 1, "when": [1, 2, 7], "appli": [1, 5, 8], "within": 1, "all": [1, 2, 5, 6, 8, 9, 14, 16], "space": [1, 16], "also": [1, 7, 13, 14, 16], "offici": [1, 7], "repres": [1, 9, 15, 16], "e": [1, 2, 3, 6, 7], "mail": 1, "post": [1, 16], "via": 1, "social": 1, "media": 1, "account": [1, 13], "appoint": 1, "onlin": 1, "offlin": 1, "event": 1, "instanc": [1, 16], "abus": 1, "otherwis": [1, 6, 9], "mai": [1, 2], "report": 1, "contact": 1, "minde": [1, 3, 4, 7], "com": [1, 3, 6, 7, 13], "complaint": 1, "review": 1, "investig": 1, "promptli": 1, "fairli": 1, "oblig": 1, "privaci": 1, "secur": [1, 12], "incid": 1, "follow": [1, 2, 3, 5, 8, 9, 11, 12, 13, 16], "impact": 1, "determin": 1, "consequ": 1, "violat": 1, "unprofession": 1, "unwelcom": 1, "A": [1, 2, 4, 5, 6, 7, 10, 15], "written": [1, 6], "provid": [1, 2, 4, 13, 14, 16], "clariti": 1, "around": 1, "natur": [1, 4, 5], "explan": [1, 16], "why": 1, "wa": 1, "apologi": 1, "request": [1, 13], "through": [1, 8, 14], "singl": [1, 2, 4, 5], "seri": 1, "continu": 1, "No": [1, 16], "involv": [1, 16], "unsolicit": 1, "specifi": [1, 5, 6], "period": 1, "time": [1, 4, 7, 9, 14], "avoid": [1, 3], "well": [1, 15], "extern": [1, 14], "channel": [1, 2, 6, 8], "like": 1, "term": 1, "lead": 1, "seriou": 1, "sustain": 1, "sort": 1, "allow": 1, "dure": 1, "pattern": 1, "aggress": 1, "disparag": 1, "class": [1, 5, 6, 8, 9, 16], "adapt": 1, "version": [1, 2, 3, 15, 16], "0": [1, 3, 5, 8, 9, 11, 14, 16], "avail": [1, 4, 8], "http": [1, 3, 5, 6, 7, 13, 16], "www": [1, 6, 16], "org": [1, 5, 7, 16], "_": [1, 5, 7], "html": [1, 2, 6, 16], "were": [1, 6, 16], "inspir": [1, 8], "mozilla": 1, "": [1, 6, 7, 9, 13], "ladder": 1, "For": [1, 2, 3, 11, 16], "answer": 1, "common": [1, 8, 9, 15], "question": 1, "about": [1, 14, 16], "see": [1, 2], "faq": 1, "translat": 1, "everyth": [2, 16], "you": [2, 3, 5, 6, 7, 11, 12, 13, 14, 15, 16], "need": [2, 3, 5, 9, 11, 12, 13, 16], "know": 2, "effici": [2, 4, 5, 7], "project": [2, 14], "packag": [2, 4, 9, 12, 14], "python": 2, "doc": [2, 6, 15, 16], "librari": [2, 3, 10, 11], "build": [2, 3], "script": [2, 14], "refer": [2, 3, 11, 13, 14, 16], "train": [2, 5, 7, 8, 13, 14, 15, 16], "demo": [2, 4], "small": [2, 7], "app": 2, "showcas": 2, "capabl": [2, 10, 16], "api": [2, 4], "minim": [2, 4], "templat": [2, 4], "deploi": 2, "rest": [2, 8, 9], "ensur": 2, "proper": 2, "mainten": 2, "github": [2, 3, 7, 13], "worklow": 2, "run": [2, 3, 7], "job": 2, "coverag": 2, "codecov": 2, "back": 2, "result": [2, 5, 6, 10, 13, 16], "As": 2, "contributor": 2, "onli": [2, 7, 8, 9, 13, 14, 15, 16], "your": [2, 4, 6, 9, 16], "ad": [2, 7, 8], "whether": [2, 5, 6, 8, 9, 14, 16], "encount": 2, "problem": 2, "suggest": [2, 13], "input": [2, 6, 7, 8, 15, 16], "ha": [2, 5, 9, 14], "valu": [2, 6, 8, 16], "can": [2, 3, 11, 12, 13, 14, 16], "purpos": 2, "advis": 2, "first": [2, 5], "check": [2, 13, 16], "topic": 2, "wasn": 2, "t": [2, 5, 11, 16], "alreadi": 2, "cover": 2, "close": 2, "If": [2, 3, 6, 7, 11, 16], "feel": [2, 13], "new": [2, 9], "one": [2, 5, 7, 8, 11, 13, 16], "do": [2, 3, 7], "so": [2, 3, 5, 7, 13, 14], "whenev": 2, "possibl": [2, 9, 13, 16], "enough": [2, 16], "jump": 2, "wonder": 2, "how": [2, 11, 13, 14], "someth": 2, "more": [2, 9, 14, 16], "gener": [2, 4, 7], "should": [2, 5, 6, 8, 9], "out": [2, 7, 8, 9, 16], "discuss": 2, "q": 2, "forum": 2, "specif": [2, 3, 9, 11, 14, 16], "stackoverflow": 2, "addit": [2, 3, 6], "depend": [2, 3, 4], "command": 2, "m": [2, 9, 16], "pip": [2, 3], "upgrad": 2, "dev": [2, 12], "pre": [2, 7], "docstr": 2, "In": [2, 5, 14], "pleas": 2, "googl": 2, "eas": 2, "process": [2, 4, 6, 11, 16], "later": 2, "messag": 2, "udac": 2, "guid": 2, "order": [2, 5, 6, 8], "same": [2, 6, 9, 14, 16], "ci": 2, "workflow": 2, "unittest": 2, "local": [2, 4, 5, 7, 9, 14, 16], "To": [2, 3, 12, 13, 16], "togeth": [2, 6], "current": [2, 16], "built": 2, "sphinx": 2, "thank": 2, "our": [2, 7, 16], "file": [2, 5], "been": [2, 9, 14, 16], "rebuilt": 2, "want": [2, 15, 16], "forc": 2, "complet": 2, "rebuild": 2, "delet": 2, "_build": 2, "directori": [2, 12], "addition": [2, 16], "clear": 2, "web": [2, 6], "browser": [2, 4], "cach": [2, 5, 12], "modif": 2, "now": 2, "locat": [2, 6, 16], "index": [2, 6], "wish": 2, "somewher": 2, "els": 2, "than": [2, 3, 9, 13], "join": 2, "slack": 2, "where": [2, 6, 8, 9], "find": [2, 3, 14], "requir": [3, 8], "3": [3, 4, 6, 7, 8, 9, 15, 16], "8": [3, 7, 8, 16], "higher": [3, 5, 16], "whichev": 3, "o": 3, "least": 3, "tensorflow": [3, 4, 6, 7, 8, 11, 13, 15, 16], "pytorch": [3, 4, 7, 8, 11, 13, 15, 16], "correspond": [3, 6, 16], "page": [3, 5, 7, 9, 16], "2": [3, 4, 5, 6, 8, 16], "macbook": 3, "m1": 3, "chip": 3, "some": [3, 10, 13, 14], "metal": 3, "plugin": 3, "1": [3, 5, 6, 7, 8, 9, 11, 14, 16], "12": [3, 16], "anoth": [3, 7, 11, 14], "linux": 3, "few": [3, 15, 16], "extra": 3, "maco": 3, "user": [3, 4, 6, 10], "them": [3, 5, 16], "homebrew": 3, "brew": 3, "cairo": 3, "pango": 3, "gdk": 3, "pixbuf": 3, "libffi": 3, "window": [3, 7, 9], "gtk": 3, "latest": [3, 16], "over": [3, 5, 9, 16], "here": [3, 8, 10, 14, 16], "last": [3, 5], "stabl": 3, "doctr": [3, 11, 12, 13, 14, 16], "strive": 3, "reduc": [3, 8], "framework": [3, 13, 14, 16], "minimum": [3, 5, 8, 9, 16], "necessari": [3, 11, 12], "featur": [3, 7, 9, 10], "develop": 3, "third": 3, "parti": 3, "miss": 3, "tf": [3, 6, 7, 8, 13, 15], "torch": [3, 8, 11, 13, 15], "mode": 3, "clone": 3, "state": [4, 9], "art": 4, "optic": [4, 16], "charact": [4, 5, 6, 9, 14, 16], "made": 4, "seamless": 4, "access": [4, 6, 14, 16], "anyon": 4, "power": 4, "easi": [4, 9, 13], "extract": [4, 5], "valuabl": 4, "autom": 4, "seamlessli": [4, 16], "understand": [4, 5, 16], "task": [4, 5, 7, 13, 14, 16], "ocr": [4, 5, 7, 9, 13, 14], "predictor": [4, 6, 7, 11, 13, 15], "pars": [4, 5], "textual": [4, 5, 6, 7, 16], "identifi": 4, "each": [4, 5, 6, 7, 8, 9, 14, 16], "word": [4, 5, 7, 9, 16], "research": 4, "quickli": 4, "compar": 4, "own": 4, "architectur": [4, 7, 13], "speed": [4, 7], "perform": [4, 6, 7, 8, 9, 12, 15, 16], "robust": [4, 5], "stage": 4, "pretrain": [4, 7, 9, 11, 15, 16], "paramet": [4, 6, 7, 15], "friendli": 4, "line": [4, 7, 9, 16], "code": [4, 6], "load": [4, 5, 7], "googlevis": 4, "aw": [4, 16], "textract": [4, 16], "optim": 4, "infer": [4, 7, 8], "both": [4, 5, 8, 14, 16], "cpu": [4, 11], "gpu": [4, 15], "light": 4, "activ": 4, "maintain": 4, "integr": [4, 13, 14], "deploy": 4, "dbnet": [4, 7], "real": [4, 7, 8], "scene": [4, 5, 7], "differenti": [4, 7], "binar": [4, 7, 16], "linknet": [4, 7], "exploit": [4, 7], "encod": [4, 5, 6, 7, 16], "represent": [4, 7], "semant": [4, 7], "segment": [4, 7, 16], "sar": [4, 7], "show": [4, 6, 7, 9, 11, 13], "attend": [4, 7], "read": [4, 5, 7], "simpl": [4, 7], "strong": [4, 7], "baselin": [4, 7, 16], "irregular": [4, 7, 14], "crnn": [4, 7, 13], "end": [4, 5, 7, 9], "trainabl": [4, 7], "neural": [4, 5, 7, 15], "network": [4, 5, 7, 15], "imag": [4, 5, 6, 7, 8, 9, 13, 14, 16], "base": [4, 7], "sequenc": [4, 5, 6, 7, 9, 16], "Its": [4, 7], "applic": [4, 7], "master": [4, 7, 16], "multi": [4, 7], "aspect": [4, 7, 8, 16], "non": [4, 5, 6, 7, 8, 9], "vitstr": [4, 7, 15], "vision": [4, 5, 7], "transform": [4, 5, 7], "fast": [4, 5, 7], "parseq": [4, 7, 13, 16], "permut": [4, 7], "autoregress": [4, 7], "funsd": [4, 5, 14, 16], "form": [4, 5, 16], "noisi": [4, 5], "scan": [4, 5], "cord": [4, 5, 14, 16], "consolid": [4, 5], "receipt": [4, 5, 16], "forpost": [4, 5], "sroie": [4, 5, 14], "icdar": [4, 5], "2019": 4, "iiit": [4, 5], "5k": [4, 5], "cvit": 4, "street": [4, 5], "view": [4, 5], "synthtext": [4, 5, 14], "visual": 4, "geometri": [4, 6, 16], "group": [4, 16], "svhn": [4, 5, 14], "digit": [4, 5, 14], "unsupervis": 4, "ic03": [4, 5, 14], "2003": [4, 5], "ic13": [4, 5, 14], "2013": [4, 5], "imgur5k": [4, 5, 14], "textstylebrush": [4, 5], "transfer": [4, 5], "aesthet": [4, 5], "mjsynth": [4, 5, 14], "synthet": 4, "data": [4, 5, 6, 8, 9, 11, 13], "artifici": [4, 5], "iiithw": [4, 5, 14], "wildreceipt": [4, 5, 14], "spatial": [4, 5, 6, 9], "dual": [4, 5], "modal": [4, 5], "graph": [4, 5, 6], "kei": [4, 5], "bool": [5, 6, 7, 8, 9], "true": [5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16], "use_polygon": [5, 9, 14], "fals": [5, 6, 7, 8, 9, 11, 16], "recognition_task": [5, 14], "kwarg": [5, 6, 7, 9], "sourc": [5, 6, 7, 8, 9, 13], "document": [5, 7, 9, 10, 14, 16], "import": [5, 6, 7, 8, 9, 11, 13, 14, 15, 16], "train_set": [5, 14], "download": [5, 14], "img": [5, 8, 14], "target": [5, 6, 8, 9, 14], "subset": [5, 16], "polygon": [5, 9, 16], "rotat": [5, 6, 7, 8, 9, 14, 16], "bound": [5, 6, 7, 8, 9, 16], "box": [5, 6, 7, 8, 9, 14, 16], "instead": [5, 6, 7], "straight": [5, 7, 14, 16], "ones": [5, 8, 9], "recognit": [5, 9, 11], "keyword": [5, 6, 7, 9], "argument": [5, 6, 7, 9, 16], "visiondataset": 5, "icdar2019": 5, "competit": 5, "iiit5k": [5, 14], "bmvc": 5, "2012": 5, "text": [5, 6, 7, 9, 14], "prior": 5, "svt": [5, 14], "ucsd": 5, "comput": [5, 9, 15, 16], "hous": 5, "number": [5, 8, 9, 16], "localis": 5, "repositori": [5, 7, 13], "websit": 5, "entri": 5, "futur": 5, "direct": 5, "img_fold": [5, 14], "str": [5, 6, 7, 8, 9], "label_fold": 5, "label": [5, 8, 9, 14], "part": [5, 8, 16], "challeng": 5, "task2": 5, "2015": 5, "path": [5, 6, 14], "challenge2_training_task12_imag": 5, "challenge2_training_task1_gt": 5, "test_set": 5, "challenge2_test_task12_imag": 5, "challenge2_test_task1_gt": 5, "folder": 5, "annot": 5, "abstractdataset": 5, "label_path": [5, 14], "handwrit": 5, "dataset_info": 5, "imgur5k_annot": 5, "json": [5, 14, 16], "pure": 5, "mnt": 5, "ramdisk": 5, "max": [5, 8, 9], "90kdict32px": 5, "imlist": 5, "txt": 5, "hw": 5, "images_90k_norm": 5, "90k": 5, "docartefact": [5, 14], "object": [5, 9, 10, 16], "detect": [5, 9, 10, 11], "element": [5, 6, 7, 9, 16], "varieti": 5, "arxiv": [5, 7], "ab": 5, "2103": 5, "14470v1": 5, "test": [5, 14], "charactergener": [5, 14], "implement": [5, 6, 7, 8, 9, 16], "d": [5, 14], "abdef": [5, 14], "num_sampl": [5, 14], "100": [5, 8, 9, 14, 16], "vocabulari": [5, 11, 13], "sampl": [5, 14, 16], "iter": [5, 8, 14, 16], "cache_sampl": 5, "firsthand": 5, "font_famili": [5, 9], "font": [5, 9], "img_transform": 5, "compos": [5, 16], "sample_transform": 5, "wordgener": [5, 14], "min_char": [5, 14], "int": [5, 6, 8, 9], "max_char": [5, 14], "list": [5, 6, 8, 9, 13], "none": [5, 6, 7, 8, 9, 16], "callabl": [5, 8], "tupl": [5, 6, 8, 9], "32": [5, 7, 8, 11, 14, 15, 16], "maximum": [5, 8], "detectiondataset": [5, 14], "recognitiondataset": [5, 14], "labels_path": [5, 14], "contain": [5, 14], "ocrdataset": [5, 14], "label_fil": [5, 14], "jpg": [5, 6, 13], "root": 5, "shuffl": [5, 8], "batch_siz": [5, 11, 14, 15], "drop_last": 5, "num_work": 5, "collate_fn": 5, "wrapper": [5, 8], "train_load": [5, 14], "train_it": [5, 14], "next": [5, 14], "befor": [5, 7, 8, 16], "pass": [5, 6, 7, 16], "batch": [5, 7, 8, 14, 16], "drop": 5, "isn": 5, "full": [5, 9, 16], "worker": 5, "function": [5, 8, 9], "merg": 5, "sinc": [5, 14], "content": [5, 6, 9, 16], "properli": 5, "model": [5, 9, 12, 14], "interpret": [5, 6], "multipl": [5, 6, 8, 16], "name": [5, 7, 15, 16], "10": [5, 9, 16], "0123456789": 5, "hindi_digit": 5, "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": 5, "ascii_lett": 5, "52": [5, 16], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 5, "punctuat": 5, "currenc": 5, "5": [5, 8, 9, 16], "ancient_greek": 5, "48": [5, 16], "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": 5, "arabic_lett": 5, "37": [5, 16], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": 5, "persian_lett": 5, "\u067e\u0686\u06a2\u06a4\u06af": 5, "arabic_diacrit": 5, "arabic_punctu": 5, "latin": 5, "94": [5, 16], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 5, "english": [5, 14], "legacy_french": 5, "123": 5, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": 5, "french": [5, 11, 13, 16], "126": 5, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": 5, "portugues": 5, "131": 5, "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": 5, "spanish": 5, "116": 5, "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": 5, "italian": 5, "120": 5, "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": 5, "german": [5, 11, 13], "108": 5, "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": 5, "arab": 5, "101": 5, "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": 5, "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": 5, "czech": 5, "130": 5, "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": 5, "polish": 5, "118": 5, "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": 5, "dutch": 5, "114": 5, "norwegian": 5, "106": 5, "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": 5, "danish": 5, "finnish": 5, "104": 5, "\u00e4\u00f6\u00e4\u00f6": 5, "swedish": 5, "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": 5, "vietnames": 5, "234": 5, "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": 5, "hebrew": 5, "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": 5, "multilingu": [5, 13], "195": 5, "encode_sequ": 5, "target_s": 5, "eo": 5, "pad": [5, 7, 8, 16], "dynamic_seq_length": 5, "ndarrai": [5, 6, 8, 9], "given": [5, 6, 8, 9, 16], "map": [5, 7], "n": [5, 9], "length": [5, 16], "Of": 5, "string": [5, 6, 9, 16], "option": [5, 7, 11], "start": 5, "case": [5, 9], "upper": [5, 8], "enabl": [5, 6], "dynam": 5, "tensor": [5, 6, 8, 16], "modul": [6, 7, 8, 9, 16], "easili": [6, 9, 11, 13, 14, 16], "export": [6, 7, 9, 10, 16], "analysi": 6, "format": [6, 9, 11, 14, 15, 16], "organ": 6, "uninterrupt": [6, 16], "confid": [6, 9, 16], "float": [6, 8, 9, 15], "associ": 6, "predict": [6, 7, 9, 16], "xmin": 6, "ymin": 6, "xmax": 6, "ymax": 6, "coordin": [6, 16], "rel": [6, 8, 9, 16], "collect": 6, "meant": [6, 15], "two": [6, 12], "column": 6, "horizont": [6, 8], "resolv": 6, "default": [6, 9, 11, 12, 16], "smallest": 6, "enclos": 6, "g": [6, 7], "qr": 6, "pictur": 6, "chart": 6, "signatur": 6, "logo": [6, 14], "etc": 6, "artefact_typ": 6, "type": [6, 9, 13, 15, 16], "sever": [6, 8, 16], "its": [6, 7, 8, 9, 14, 16], "titl": [6, 16], "underneath": 6, "page_idx": [6, 16], "dimens": [6, 9, 16], "dict": [6, 9, 16], "numpi": [6, 7, 9, 16], "arrai": [6, 8, 9], "uint8": [6, 7, 9, 16], "raw": [6, 9], "pixel": [6, 8, 16], "height": 6, "width": 6, "dictionari": [6, 9], "angl": [6, 8], "degress": 6, "preserve_aspect_ratio": [6, 7, 8, 11, 16], "overlai": 6, "displai": [6, 9], "matplotlib": [6, 9], "pyplot": [6, 9], "method": [6, 8, 16], "high": 6, "convers": 6, "read_pdf": 6, "byte": [6, 16], "scale": [6, 7, 8, 9], "rgb_mode": 6, "password": 6, "pdf": [6, 7, 10], "convert": [6, 8], "render": [6, 16], "72dpi": 6, "output": [6, 8, 15], "rgb": [6, 8], "bgr": 6, "unlock": 6, "encrypt": 6, "pypdfium2": 6, "pdfpage": 6, "decod": 6, "shape": [6, 7, 8, 9, 16], "h": [6, 7, 8], "x": [6, 8, 9], "w": [6, 7, 8, 9], "c": [6, 9], "read_img_as_numpi": 6, "output_s": [6, 8], "rgb_output": 6, "expect": [6, 8, 9], "read_img_as_tensor": 6, "img_path": 6, "dtype": [6, 7, 8, 9, 15], "float32": [6, 7, 8, 15], "desir": 6, "relat": 6, "divid": 6, "255": [6, 7, 8, 9, 16], "decode_img_as_tensor": 6, "img_cont": 6, "stream": 6, "read_html": 6, "url": 6, "yoursit": 6, "weasyprint": 6, "documentfil": [6, 13], "extens": 6, "classmethod": 6, "from_pdf": 6, "binari": [6, 15, 16], "from_url": 6, "from_imag": [6, 13], "page1": 6, "png": 6, "page2": 6, "vgg16_bn_r": 7, "vgg": 7, "16": [7, 15, 16], "describ": [7, 9], "veri": 7, "deep": [7, 16], "convolut": 7, "larg": [7, 13], "modifi": [7, 12, 16], "normal": [7, 8], "rectangular": 7, "pool": 7, "simpler": 7, "head": [7, 16], "input_tensor": 7, "random": [7, 8, 9, 16], "uniform": [7, 8], "512": 7, "maxval": [7, 8], "imagenet": 7, "extractor": 7, "resnet18": [7, 13], "resnet": 7, "18": 7, "residu": 7, "boolean": [7, 16], "resnet34": 7, "34": [7, 16], "resnet50": [7, 13], "50": [7, 14, 16], "resnet31": 7, "downsiz": 7, "4": [7, 8, 9, 16], "mobilenet_v3_smal": 7, "mobilenetv3": 7, "search": 7, "kera": [7, 15], "mobilenet_v3_larg": 7, "mobilenet_v3_small_r": 7, "mobilenet_v3_large_r": 7, "mobilenet_v3_small_orient": 7, "magc_resnet31": 7, "global": 7, "context": 7, "224": [7, 8], "vit_": 7, "visiontransform": 7, "worth": 7, "16x16": 7, "patch": [7, 9], "unoffici": 7, "config": 7, "vit_b": 7, "b": [7, 9, 16], "textnet_tini": 7, "textnet": 7, "faster": [7, 15], "arbitrarili": 7, "detector": 7, "minimalist": 7, "kernel": [7, 8], "czczup": 7, "tini": 7, "textnet_smal": 7, "textnet_bas": 7, "crop_orientation_predictor": 7, "arch": [7, 13], "croporientationpredictor": 7, "np": [7, 8, 9, 16], "classif_mobilenet_v3_smal": 7, "input_crop": 7, "rand": [7, 8, 9, 15, 16], "600": [7, 9, 16], "800": [7, 9, 14, 16], "astyp": [7, 9, 16], "crop": [7, 8, 14, 16], "dataset": [7, 11, 16], "linknet_resnet18": [7, 11, 16], "1024": [7, 9, 11, 16], "linknet_resnet34": [7, 15, 16], "linknet_resnet50": [7, 16], "db_resnet50": [7, 11, 13, 16], "backbon": 7, "db_mobilenet_v3_larg": [7, 13, 16], "mobilenet": [7, 13], "v3": [7, 13, 16], "detection_predictor": [7, 16], "assume_straight_pag": [7, 16], "detectionpredictor": [7, 11], "input_pag": [7, 9, 16], "itself": [7, 13], "fit": [7, 16], "crnn_vgg16_bn": [7, 11, 13, 16], "128": [7, 11, 15, 16], "crnn_mobilenet_v3_smal": [7, 15, 16], "crnn_mobilenet_v3_larg": [7, 13, 16], "sar_resnet31": [7, 16], "31": 7, "64": [7, 8, 16], "256": 7, "paper": 7, "1910": 7, "02562": 7, "keywoard": 7, "vitstr_smal": [7, 11, 15, 16], "vitstr_bas": [7, 16], "recognition_predictor": [7, 16], "recognitionpredictor": [7, 11], "ocr_predictor": [7, 11, 13, 15, 16], "det_arch": [7, 11, 13, 15], "reco_arch": [7, 11, 13, 15], "pretrained_backbon": [7, 11], "symmetric_pad": [7, 8, 16], "export_as_straight_box": [7, 16], "detect_orient": 7, "straighten_pag": 7, "detect_languag": 7, "ocrpredictor": [7, 11], "up": [7, 16], "assum": 7, "preserv": [7, 8, 16], "ratio": [7, 8, 16], "symmetr": [7, 8, 16], "bottom": [7, 16], "final": 7, "potenti": 7, "estim": 7, "slightli": 7, "deterior": 7, "latenc": 7, "median": 7, "Then": 7, "again": 7, "improv": 7, "kie_predictor": [7, 11], "kiepredictor": 7, "kie": [7, 11], "login_to_hub": [7, 13], "login": 7, "huggingfac": 7, "hub": 7, "from_hub": [7, 13], "repo_id": [7, 13], "instanti": [7, 16], "hf": 7, "fasterrcnn_mobilenet_v3_large_fpn": 7, "repo": 7, "hf_hub_download": 7, "snapshot_download": 7, "checkpoint": 7, "push_to_hf_hub": [7, 13], "model_nam": [7, 13, 15], "save": [7, 14], "configur": 7, "my": 7, "procedur": 8, "draw": [8, 9], "design": 8, "torchvis": 8, "resiz": [8, 16], "bilinear": 8, "transfo": 8, "minval": 8, "interpol": 8, "zero": [8, 9], "while": [8, 16], "done": 8, "mean": [8, 9, 11], "std": [8, 11], "gaussian": 8, "distribut": 8, "485": 8, "456": 8, "406": 8, "229": [8, 14], "225": 8, "averag": [8, 16], "per": [8, 16], "standard": 8, "deviat": 8, "lambdatransform": 8, "fn": 8, "lambda": 8, "tograi": 8, "num_output_channel": 8, "grayscal": 8, "colorinvers": 8, "min_val": 8, "tranform": 8, "color": [8, 9], "shift": 8, "randomli": 8, "invert": 8, "6": [8, 16], "rang": 8, "randombright": 8, "max_delta": 8, "adjust": 8, "bright": 8, "delta": 8, "offset": 8, "add": [8, 9, 13, 16], "pick": 8, "p": [8, 9, 16], "probabl": 8, "randomcontrast": 8, "contrast": 8, "contrast_factor": 8, "factor": 8, "randomsatur": 8, "satur": 8, "hsv": 8, "increas": 8, "randomhu": 8, "hue": 8, "randomgamma": 8, "min_gamma": 8, "max_gamma": 8, "min_gain": 8, "max_gain": 8, "gamma": 8, "correct": 8, "neg": 8, "lower": [8, 9, 16], "param": [8, 16], "constant": 8, "multipli": 8, "randomjpegqu": 8, "min_qual": 8, "60": 8, "max_qual": 8, "jpeg": 8, "qualiti": 8, "dimension": 8, "between": [8, 9, 16], "randomrot": 8, "max_angl": 8, "expand": 8, "degre": 8, "uniformli": 8, "randomcrop": 8, "08": [8, 16], "75": [8, 16], "33": [8, 16], "min_area": 8, "max_area": 8, "min_ratio": 8, "max_ratio": 8, "gaussianblur": 8, "kernel_shap": 8, "blur": 8, "min": 8, "channelshuffl": 8, "gaussiannois": 8, "nois": 8, "randomhorizontalflip": 8, "flip": 8, "int64": [8, 9], "randomshadow": 8, "opacity_rang": 8, "shade": 8, "opac": 8, "It": [8, 13, 15], "consecut": [8, 16], "sequenti": [8, 16], "oneof": 8, "jpegqual": 8, "randomappli": 8, "regroup": 9, "core": [9, 16], "complementari": 9, "sens": 9, "visualize_pag": 9, "words_onli": 9, "display_artefact": 9, "add_label": 9, "figur": 9, "block": [9, 16], "plt": 9, "ocr_db_crnn": 9, "artefact": [9, 10, 16], "figsiz": 9, "largest": 9, "side": 9, "plot": 9, "static": 9, "top": [9, 16], "synthesize_pag": 9, "draw_proba": 9, "respons": 9, "blank": 9, "blue": 9, "red": 9, "font_siz": 9, "13": [9, 16], "famili": 9, "synthes": 9, "metric": [9, 16], "assess": 9, "textmatch": 9, "match": [9, 16], "accuraci": 9, "aggreg": [9, 14], "foral": 9, "y": 9, "mathcal": 9, "frac": 9, "sum": 9, "limits_": 9, "f_": 9, "y_i": 9, "x_i": 9, "indic": 9, "defin": [9, 15], "f_a": 9, "left": [9, 16], "begin": 9, "ll": 9, "mbox": 9, "strictli": 9, "integ": 9, "updat": 9, "hello": [9, 16], "world": [9, 16], "summari": 9, "gt": 9, "pred": 9, "groung": 9, "truth": 9, "exact": [9, 16], "score": 9, "counterpart": 9, "unidecod": 9, "localizationconfus": 9, "iou_thresh": 9, "mask_shap": 9, "use_broadcast": 9, "confus": 9, "iou": 9, "recal": [9, 16], "g_": 9, "precis": [9, 16], "meaniou": 9, "j": 9, "y_j": 9, "being": [9, 16], "intersect": 9, "union": 9, "g_x": 9, "assign": 9, "_i": 9, "geq": 9, "ground": 9, "asarrai": 9, "70": [9, 16], "110": 9, "95": [9, 16], "200": 9, "150": [9, 16], "pair": 9, "broadcast": 9, "consum": 9, "memori": [9, 12, 15], "either": [9, 16], "ocrmetr": 9, "l": 9, "hat": 9, "h_": 9, "b_j": 9, "l_j": 9, "gt_box": 9, "pred_box": 9, "gt_label": 9, "pred_label": 9, "comparison": [9, 16], "detectionmetr": 9, "c_j": 9, "compil": [10, 16], "better": [10, 16], "leverag": 10, "descript": 10, "colab": 10, "quicktour": 10, "present": 10, "main": 10, "produc": [10, 16], "searchabl": 10, "don": [11, 16], "meet": 11, "detail": [11, 16], "link": 11, "section": [11, 13, 15, 16], "det_model": [11, 13], "load_weight": 11, "path_to_checkpoint": 11, "weight": 11, "reco_model": [11, 13], "det_param": 11, "path_to_pt": 11, "map_loc": 11, "load_state_dict": 11, "reco_param": 11, "vocab": [11, 13, 14, 16], "class_nam": 11, "total": 11, "date": [11, 16], "preprocessor": [11, 16], "det_predictor": [11, 16], "798": 11, "785": 11, "772": 11, "264": 11, "2749": 11, "287": 11, "reco_predictor": 11, "694": 11, "695": 11, "693": 11, "299": 11, "296": 11, "301": 11, "polici": 12, "restrict": 12, "write": 12, "outsid": 12, "tmp": 12, "work": [12, 16], "step": 12, "usag": [12, 15], "multiprocess": 12, "doctr_multiprocessing_dis": 12, "variabl": 12, "becaus": 12, "shm": 12, "share": [12, 14], "chang": 12, "By": 12, "doctr_cache_dir": 12, "focu": 13, "love": 13, "appreci": 13, "interfac": 13, "io": 13, "custom": [13, 16], "felix92": 13, "db": 13, "vgg16": 13, "bn": 13, "plug": 13, "obj_detect": 13, "exist": 13, "overwritten": 13, "prerequisit": 13, "creat": 13, "co": 13, "instal": 13, "git": 13, "lf": 13, "my_awesome_model": 13, "v1": 13, "directli": [13, 16], "after": [13, 16], "python3": 13, "train_tensorflow": 13, "py": 13, "train_pytorch": 13, "tabl": 13, "pull": 13, "dummi": 13, "tilman": 13, "rassi": 13, "fascan": 13, "evalu": [14, 16], "predefin": 14, "prefer": 14, "signific": 14, "valid": 14, "149": 14, "626": 14, "360": 14, "2000": 14, "3000": 14, "249": 14, "33402": 14, "13068": 14, "772875": 14, "85875": 14, "246": 14, "233": 14, "resourc": 14, "7149": 14, "796": 14, "handwritten": 14, "1268": 14, "472": 14, "21888": 14, "8707": 14, "33608": 14, "19342": 14, "uppercas": 14, "19370": 14, "2186": 14, "257": 14, "647": 14, "73257": 14, "26032": 14, "7100000": 14, "707470": 14, "1156": 14, "1107": 14, "849": 14, "1095": 14, "207901": 14, "22672": 14, "7581382": 14, "1337891": 14, "7141797": 14, "793533": 14, "49377": 14, "19598": 14, "alwai": 14, "regular": 14, "2700": 14, "300": 14, "background": 14, "qr_code": 14, "bar_cod": 14, "photo": 14, "classif": 14, "mani": [14, 16], "sensit": 14, "abl": [14, 16], "howev": 14, "guidanc": 14, "tool": 14, "further": 14, "anot": 14, "handl": [14, 16], "underli": [14, 16], "defer": 14, "dataload": 14, "good": 15, "achiev": 15, "might": [15, 16], "tune": 15, "thing": [15, 16], "product": 15, "readi": 15, "help": 15, "support": [15, 16], "devic": 15, "fp16": 15, "point": 15, "occupi": 15, "bit": 15, "advantag": 15, "less": [15, 16], "mixed_precis": 15, "set_global_polici": 15, "mixed_float16": 15, "cuda": 15, "re": 15, "exchang": 15, "interoper": 15, "machin": 15, "structur": [15, 16], "layer": 15, "metadata": 15, "util": 15, "export_model_to_onnx": 15, "input_shap": 15, "dummy_input": 15, "tensorspec": 15, "model_path": 15, "come": 15, "soon": 15, "seen": 16, "onc": 16, "separ": 16, "compon": 16, "charg": 16, "usabl": 16, "backend": 16, "along": 16, "processor": 16, "reusabl": 16, "consist": 16, "delimit": 16, "2d": 16, "corner": 16, "flag": 16, "belong": 16, "skew": 16, "comprehens": 16, "benchmark": 16, "publicli": 16, "sec": 16, "25": 16, "84": 16, "39": 16, "85": 16, "86": 16, "93": 16, "83": 16, "24": 16, "80": 16, "29": 16, "90": 16, "67": 16, "76": 16, "11": 16, "81": 16, "71": 16, "7": 16, "21": 16, "82": 16, "20": 16, "49": 16, "87": 16, "63": 16, "17": 16, "28": 16, "51": 16, "46": 16, "db_resnet34": 16, "22": 16, "89": 16, "74": 16, "56": 16, "68": 16, "92": 16, "61": 16, "41": 16, "00": 16, "79": 16, "38": 16, "88": 16, "62": 16, "26": 16, "06": 16, "78": 16, "47": 16, "54": 16, "abov": 16, "cf": 16, "disclaim": 16, "combin": 16, "199": 16, "second": 16, "warmup": 16, "phase": 16, "measur": 16, "1000": 16, "obtain": 16, "11th": 16, "gen": 16, "intel": 16, "r": 16, "tm": 16, "i7": 16, "11800h": 16, "30ghz": 16, "wrap": 16, "useabl": 16, "favorit": 16, "dummy_img": 16, "area": 16, "send": 16, "snippet": 16, "transcrib": 16, "partial": 16, "15": 16, "9": 16, "73": 16, "44": 16, "14": 16, "55": 16, "58": 16, "57": 16, "66": 16, "01": 16, "98": 16, "23": 16, "69": 16, "99": 16, "91": 16, "05": 16, "09": 16, "96": 16, "40": 16, "53": 16, "most": 16, "print": 16, "cfg": 16, "30595": 16, "45": 16, "72": 16, "43": 16, "65": 16, "77": 16, "30": 16, "07": 16, "27": 16, "gvision": 16, "59": 16, "03": 16, "azur": 16, "recogn": 16, "42": 16, "go": 16, "mention": 16, "still": 16, "return": 16, "documentbuild": 16, "resolve_lin": 16, "automat": 16, "resolve_block": 16, "paragraph_break": 16, "paragraph": 16, "035": 16, "nest": 16, "get": 16, "typic": 16, "layout": 16, "340": 16, "text_output": 16, "json_output": 16, "1357421875": 16, "0361328125": 16, "8564453125": 16, "8603515625": 16, "914085328578949": 16, "5478515625": 16, "06640625": 16, "5810546875": 16, "0966796875": 16, "9949972033500671": 16, "51171875": 16, "1630859375": 16, "9578408598899841": 16, "1396484375": 16, "3232421875": 16, "185546875": 16, "3515625": 16, "outpout": 16, "xml": 16, "hocr": 16, "export_as_xml": 16, "xml_output": 16, "xml_bytes_str": 16, "xml_element": 16, "utf": 16, "xmln": 16, "w3": 16, "1999": 16, "xhtml": 16, "lang": 16, "en": 16, "meta": 16, "equiv": 16, "charset": 16, "system": 16, "ocr_pag": 16, "ocr_carea": 16, "ocr_par": 16, "ocr_lin": 16, "ocrx_word": 16, "div": 16, "id": 16, "page_1": 16, "bbox": 16, "3456": 16, "ppageno": 16, "block_1_1": 16, "857": 16, "529": 16, "2504": 16, "2710": 16, "par_1_1": 16, "span": 16, "line_1_1": 16, "x_size": 16, "x_descend": 16, "x_ascend": 16, "word_1_1": 16, "1552": 16, "540": 16, "1778": 16, "580": 16, "x_wconf": 16, "word_1_2": 16, "1782": 16, "1900": 16, "583": 16, "word_1_3": 16, "1420": 16, "597": 16, "1684": 16, "641": 16, "threshold": 16, "region": 16, "accur": 16, "postprocessor": 16, "bin_thresh": 16, "box_thresh": 16, "hook": 16, "manipul": 16, "customhook": 16, "def": 16, "__call__": 16, "self": 16, "loc_pr": 16, "Be": 16, "awar": 16, "my_hook": 16, "middl": 16, "pipelin": 16, "add_hook": 16, "execut": 16}, "objects": {"doctr.datasets": [[5, 0, 1, "", "CORD"], [5, 0, 1, "", "CharacterGenerator"], [5, 0, 1, "", "DetectionDataset"], [5, 0, 1, "", "DocArtefacts"], [5, 0, 1, "", "FUNSD"], [5, 0, 1, "", "IC03"], [5, 0, 1, "", "IC13"], [5, 0, 1, "", "IIIT5K"], [5, 0, 1, "", "IIITHWS"], [5, 0, 1, "", "IMGUR5K"], [5, 0, 1, "", "MJSynth"], [5, 0, 1, "", "OCRDataset"], [5, 0, 1, "", "RecognitionDataset"], [5, 0, 1, "", "SROIE"], [5, 0, 1, "", "SVHN"], [5, 0, 1, "", "SVT"], [5, 0, 1, "", "SynthText"], [5, 0, 1, "", "WILDRECEIPT"], [5, 0, 1, "", "WordGenerator"], [5, 1, 1, "", "encode_sequences"]], "doctr.datasets.loader": [[5, 0, 1, "", "DataLoader"]], "doctr.io": [[6, 0, 1, "", "Artefact"], [6, 0, 1, "", "Block"], [6, 0, 1, "", "Document"], [6, 0, 1, "", "DocumentFile"], [6, 0, 1, "", "Line"], [6, 0, 1, "", "Page"], [6, 0, 1, "", "Word"], [6, 1, 1, "", "decode_img_as_tensor"], [6, 1, 1, "", "read_html"], [6, 1, 1, "", "read_img_as_numpy"], [6, 1, 1, "", "read_img_as_tensor"], [6, 1, 1, "", "read_pdf"]], "doctr.io.Document": [[6, 2, 1, "", "show"]], "doctr.io.DocumentFile": [[6, 2, 1, "", "from_images"], [6, 2, 1, "", "from_pdf"], [6, 2, 1, "", "from_url"]], "doctr.io.Page": [[6, 2, 1, "", "show"]], "doctr.models.classification": [[7, 1, 1, "", "crop_orientation_predictor"], [7, 1, 1, "", "magc_resnet31"], [7, 1, 1, "", "mobilenet_v3_large"], [7, 1, 1, "", "mobilenet_v3_large_r"], [7, 1, 1, "", "mobilenet_v3_small"], [7, 1, 1, "", "mobilenet_v3_small_orientation"], [7, 1, 1, "", "mobilenet_v3_small_r"], [7, 1, 1, "", "resnet18"], [7, 1, 1, "", "resnet31"], [7, 1, 1, "", "resnet34"], [7, 1, 1, "", "resnet50"], [7, 1, 1, "", "textnet_base"], [7, 1, 1, "", "textnet_small"], [7, 1, 1, "", "textnet_tiny"], [7, 1, 1, "", "vgg16_bn_r"], [7, 1, 1, "", "vit_b"], [7, 1, 1, "", "vit_s"]], "doctr.models.detection": [[7, 1, 1, "", "db_mobilenet_v3_large"], [7, 1, 1, "", "db_resnet50"], [7, 1, 1, "", "detection_predictor"], [7, 1, 1, "", "linknet_resnet18"], [7, 1, 1, "", "linknet_resnet34"], [7, 1, 1, "", "linknet_resnet50"]], "doctr.models.factory": [[7, 1, 1, "", "from_hub"], [7, 1, 1, "", "login_to_hub"], [7, 1, 1, "", "push_to_hf_hub"]], "doctr.models": [[7, 1, 1, "", "kie_predictor"], [7, 1, 1, "", "ocr_predictor"]], "doctr.models.recognition": [[7, 1, 1, "", "crnn_mobilenet_v3_large"], [7, 1, 1, "", "crnn_mobilenet_v3_small"], [7, 1, 1, "", "crnn_vgg16_bn"], [7, 1, 1, "", "master"], [7, 1, 1, "", "parseq"], [7, 1, 1, "", "recognition_predictor"], [7, 1, 1, "", "sar_resnet31"], [7, 1, 1, "", "vitstr_base"], [7, 1, 1, "", "vitstr_small"]], "doctr.transforms": [[8, 0, 1, "", "ChannelShuffle"], [8, 0, 1, "", "ColorInversion"], [8, 0, 1, "", "Compose"], [8, 0, 1, "", "GaussianBlur"], [8, 0, 1, "", "GaussianNoise"], [8, 0, 1, "", "LambdaTransformation"], [8, 0, 1, "", "Normalize"], [8, 0, 1, "", "OneOf"], [8, 0, 1, "", "RandomApply"], [8, 0, 1, "", "RandomBrightness"], [8, 0, 1, "", "RandomContrast"], [8, 0, 1, "", "RandomCrop"], [8, 0, 1, "", "RandomGamma"], [8, 0, 1, "", "RandomHorizontalFlip"], [8, 0, 1, "", "RandomHue"], [8, 0, 1, "", "RandomJpegQuality"], [8, 0, 1, "", "RandomRotate"], [8, 0, 1, "", "RandomSaturation"], [8, 0, 1, "", "RandomShadow"], [8, 0, 1, "", "Resize"], [8, 0, 1, "", "ToGray"]], "doctr.utils.metrics": [[9, 0, 1, "", "DetectionMetric"], [9, 0, 1, "", "LocalizationConfusion"], [9, 0, 1, "", "OCRMetric"], [9, 0, 1, "", "TextMatch"]], "doctr.utils.metrics.DetectionMetric": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.LocalizationConfusion": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.OCRMetric": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.TextMatch": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.visualization": [[9, 1, 1, "", "synthesize_page"], [9, 1, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:class", "1": "py:function", "2": "py:method"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"], "2": ["py", "method", "Python method"]}, "titleterms": {"changelog": 0, "v0": 0, "7": 0, "0": 0, "2024": 0, "09": 0, "6": 0, "2022": 0, "29": 0, "5": 0, "1": [0, 1], "03": 0, "22": 0, "2021": 0, "12": 0, "31": 0, "4": [0, 1], "11": 0, "10": 0, "01": 0, "3": [0, 1], "08": 0, "27": 0, "07": 0, "02": 0, "2": [0, 1], "05": 0, "28": 0, "18": 0, "contributor": 1, "coven": 1, "code": [1, 2], "conduct": 1, "our": 1, "pledg": 1, "standard": 1, "enforc": 1, "respons": 1, "scope": 1, "guidelin": 1, "correct": 1, "warn": 1, "temporari": 1, "ban": 1, "perman": 1, "attribut": 1, "contribut": 2, "doctr": [2, 4, 5, 6, 7, 8, 9, 10, 15], "codebas": 2, "structur": [2, 6], "continu": 2, "integr": 2, "feedback": 2, "featur": [2, 4], "request": 2, "bug": 2, "report": 2, "question": 2, "develop": 2, "mode": 2, "instal": [2, 3], "commit": 2, "unit": 2, "test": 2, "qualiti": 2, "style": 2, "verif": 2, "modifi": 2, "document": [2, 4, 6], "let": 2, "": 2, "connect": 2, "prerequisit": 3, "via": 3, "python": 3, "packag": 3, "git": 3, "text": [4, 16], "recognit": [4, 7, 13, 14, 16], "main": 4, "model": [4, 7, 11, 13, 15, 16], "zoo": [4, 7], "detect": [4, 7, 13, 14, 16], "support": [4, 5, 8], "dataset": [4, 5, 14], "arg": [5, 6, 7, 8, 9], "synthet": [5, 14], "gener": [5, 14], "custom": [5, 11], "loader": 5, "dataload": 5, "vocab": 5, "return": [5, 6, 7, 9], "io": 6, "word": 6, "line": 6, "artefact": 6, "block": 6, "page": 6, "file": 6, "read": 6, "classif": [7, 13], "factori": 7, "transform": 8, "compos": 8, "util": 9, "visual": 9, "task": 9, "evalu": 9, "notebook": 10, "train": 11, "your": [11, 13, 14, 15], "own": [11, 14], "load": [11, 13, 14], "aw": 12, "lambda": 12, "share": 13, "commun": 13, "from": 13, "huggingfac": 13, "hub": 13, "push": 13, "pretrain": 13, "name": 13, "convent": 13, "choos": [14, 16], "readi": 14, "us": [14, 15], "avail": [14, 16], "object": 14, "data": 14, "prepar": 15, "infer": 15, "optim": 15, "half": 15, "precis": 15, "export": 15, "onnx": 15, "right": 16, "architectur": 16, "predictor": 16, "end": 16, "ocr": 16, "two": 16, "stage": 16, "approach": 16, "what": 16, "should": 16, "i": 16, "do": 16, "output": 16, "advanc": 16, "option": 16}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"Changelog": [[0, "changelog"]], "v0.7.0 (2024-09-09)": [[0, "v0-7-0-2024-09-09"]], "v0.6.0 (2022-09-29)": [[0, "v0-6-0-2022-09-29"]], "v0.5.1 (2022-03-22)": [[0, "v0-5-1-2022-03-22"]], "v0.5.0 (2021-12-31)": [[0, "v0-5-0-2021-12-31"]], "v0.4.1 (2021-11-22)": [[0, "v0-4-1-2021-11-22"]], "v0.4.0 (2021-10-01)": [[0, "v0-4-0-2021-10-01"]], "v0.3.1 (2021-08-27)": [[0, "v0-3-1-2021-08-27"]], "v0.3.0 (2021-07-02)": [[0, "v0-3-0-2021-07-02"]], "v0.2.1 (2021-05-28)": [[0, "v0-2-1-2021-05-28"]], "v0.2.0 (2021-05-11)": [[0, "v0-2-0-2021-05-11"]], "v0.1.1 (2021-03-18)": [[0, "v0-1-1-2021-03-18"]], "v0.1.0 (2021-03-05)": [[0, "v0-1-0-2021-03-05"]], "Contributor Covenant Code of Conduct": [[1, "contributor-covenant-code-of-conduct"]], "Our Pledge": [[1, "our-pledge"]], "Our Standards": [[1, "our-standards"]], "Enforcement Responsibilities": [[1, "enforcement-responsibilities"]], "Scope": [[1, "scope"]], "Enforcement": [[1, "enforcement"]], "Enforcement Guidelines": [[1, "enforcement-guidelines"]], "1. Correction": [[1, "correction"]], "2. Warning": [[1, "warning"]], "3. Temporary Ban": [[1, "temporary-ban"]], "4. Permanent Ban": [[1, "permanent-ban"]], "Attribution": [[1, "attribution"]], "Contributing to docTR": [[2, "contributing-to-doctr"]], "Codebase structure": [[2, "codebase-structure"]], "Continuous Integration": [[2, "continuous-integration"]], "Feedback": [[2, "feedback"]], "Feature requests & bug report": [[2, "feature-requests-bug-report"]], "Questions": [[2, "questions"]], "Developing docTR": [[2, "developing-doctr"]], "Developer mode installation": [[2, "developer-mode-installation"]], "Commits": [[2, "commits"]], "Unit tests": [[2, "unit-tests"]], "Code quality": [[2, "code-quality"]], "Code style verification": [[2, "code-style-verification"]], "Modifying the documentation": [[2, "modifying-the-documentation"]], "Let\u2019s connect": [[2, "let-s-connect"]], "Installation": [[3, "installation"]], "Prerequisites": [[3, "prerequisites"]], "Via Python Package": [[3, "via-python-package"]], "Via Git": [[3, "via-git"]], "docTR: Document Text Recognition": [[4, "doctr-document-text-recognition"]], "Main Features": [[4, "main-features"]], "Model zoo": [[4, "model-zoo"]], "Text detection models": [[4, "text-detection-models"]], "Text recognition models": [[4, "text-recognition-models"]], "Supported datasets": [[4, "supported-datasets"]], "doctr.datasets": [[5, "doctr-datasets"], [5, "datasets"]], "Args:": [[5, "args"], [5, "id4"], [5, "id7"], [5, "id10"], [5, "id13"], [5, "id16"], [5, "id19"], [5, "id22"], [5, "id25"], [5, "id29"], [5, "id32"], [5, "id37"], [5, "id40"], [5, "id46"], [5, "id49"], [5, "id50"], [5, "id51"], [5, "id54"], [5, "id57"], [5, "id60"], [5, "id61"], [6, "args"], [6, "id2"], [6, "id3"], [6, "id4"], [6, "id5"], [6, "id6"], [6, "id7"], [6, "id10"], [6, "id12"], [6, "id14"], [6, "id16"], [6, "id20"], [6, "id24"], [6, "id28"], [7, "args"], [7, "id3"], [7, "id8"], [7, "id13"], [7, "id17"], [7, "id21"], [7, "id26"], [7, "id31"], [7, "id36"], [7, "id41"], [7, "id45"], [7, "id49"], [7, "id54"], [7, "id58"], [7, "id63"], [7, "id68"], [7, "id72"], [7, "id76"], [7, "id81"], [7, "id86"], [7, "id90"], [7, "id95"], [7, "id99"], [7, "id103"], [7, "id108"], [7, "id113"], [7, "id118"], [7, "id122"], [7, "id126"], [7, "id131"], [7, "id135"], [7, "id139"], [7, "id143"], [7, "id145"], [7, "id147"], [7, "id149"], [8, "args"], [8, "id1"], [8, "id2"], [8, "id3"], [8, "id4"], [8, "id5"], [8, "id6"], [8, "id7"], [8, "id8"], [8, "id9"], [8, "id10"], [8, "id11"], [8, "id12"], [8, "id13"], [8, "id14"], [8, "id15"], [8, "id16"], [8, "id17"], [8, "id18"], [9, "args"], [9, "id3"], [9, "id5"], [9, "id6"], [9, "id7"], [9, "id8"], [9, "id9"], [9, "id10"], [9, "id11"]], "Synthetic dataset generator": [[5, "synthetic-dataset-generator"], [14, "synthetic-dataset-generator"]], "Custom dataset loader": [[5, "custom-dataset-loader"]], "Dataloader": [[5, "dataloader"]], "Supported Vocabs": [[5, "supported-vocabs"]], "docTR Vocabs": [[5, "id62"]], "Returns:": [[5, "returns"], [6, "returns"], [6, "id11"], [6, "id13"], [6, "id15"], [6, "id19"], [6, "id23"], [6, "id27"], [6, "id31"], [7, "returns"], [7, "id6"], [7, "id11"], [7, "id16"], [7, "id20"], [7, "id24"], [7, "id29"], [7, "id34"], [7, "id39"], [7, "id44"], [7, "id48"], [7, "id52"], [7, "id57"], [7, "id61"], [7, "id66"], [7, "id71"], [7, "id75"], [7, "id79"], [7, "id84"], [7, "id89"], [7, "id93"], [7, "id98"], [7, "id102"], [7, "id106"], [7, "id111"], [7, "id116"], [7, "id121"], [7, "id125"], [7, "id129"], [7, "id134"], [7, "id138"], [7, "id142"], [7, "id144"], [7, "id146"], [7, "id148"], [9, "returns"], [9, "id4"]], "doctr.io": [[6, "doctr-io"]], "Document structure": [[6, "document-structure"]], "Word": [[6, "word"]], "Line": [[6, "line"]], "Artefact": [[6, "artefact"]], "Block": [[6, "block"]], "Page": [[6, "page"]], "Document": [[6, "document"]], "File reading": [[6, "file-reading"]], "doctr.models": [[7, "doctr-models"]], "doctr.models.classification": [[7, "doctr-models-classification"]], "doctr.models.detection": [[7, "doctr-models-detection"]], "doctr.models.recognition": [[7, "doctr-models-recognition"]], "doctr.models.zoo": [[7, "doctr-models-zoo"]], "doctr.models.factory": [[7, "doctr-models-factory"]], "doctr.transforms": [[8, "doctr-transforms"]], "Supported transformations": [[8, "supported-transformations"]], "Composing transformations": [[8, "composing-transformations"]], "doctr.utils": [[9, "doctr-utils"]], "Visualization": [[9, "visualization"]], "Task evaluation": [[9, "task-evaluation"]], "docTR Notebooks": [[10, "doctr-notebooks"]], "Train your own model": [[11, "train-your-own-model"]], "Loading your custom trained model": [[11, "loading-your-custom-trained-model"]], "AWS Lambda": [[12, "aws-lambda"]], "Share your model with the community": [[13, "share-your-model-with-the-community"]], "Loading from Huggingface Hub": [[13, "loading-from-huggingface-hub"]], "Pushing to the Huggingface Hub": [[13, "pushing-to-the-huggingface-hub"]], "Pretrained community models": [[13, "pretrained-community-models"]], "Naming conventions": [[13, "naming-conventions"]], "Classification": [[13, "classification"]], "Detection": [[13, "detection"], [14, "detection"]], "Recognition": [[13, "recognition"], [14, "recognition"]], "Choose a ready to use dataset": [[14, "choose-a-ready-to-use-dataset"]], "Available Datasets": [[14, "available-datasets"]], "Object Detection": [[14, "object-detection"]], "Use your own datasets": [[14, "use-your-own-datasets"]], "Data Loading": [[14, "data-loading"]], "Preparing your model for inference": [[15, "preparing-your-model-for-inference"]], "Model optimization": [[15, "model-optimization"]], "Half-precision": [[15, "half-precision"]], "Export to ONNX": [[15, "export-to-onnx"]], "Using your ONNX exported model in docTR": [[15, "using-your-onnx-exported-model-in-doctr"]], "Choosing the right model": [[16, "choosing-the-right-model"]], "Text Detection": [[16, "text-detection"]], "Available architectures": [[16, "available-architectures"], [16, "id1"], [16, "id2"]], "Detection predictors": [[16, "detection-predictors"]], "Text Recognition": [[16, "text-recognition"]], "Recognition predictors": [[16, "recognition-predictors"]], "End-to-End OCR": [[16, "end-to-end-ocr"]], "Two-stage approaches": [[16, "two-stage-approaches"]], "What should I do with the output?": [[16, "what-should-i-do-with-the-output"]], "Advanced options": [[16, "advanced-options"]]}, "indexentries": {"cord (class in doctr.datasets)": [[5, "doctr.datasets.CORD"]], "charactergenerator (class in doctr.datasets)": [[5, "doctr.datasets.CharacterGenerator"]], "dataloader (class in doctr.datasets.loader)": [[5, "doctr.datasets.loader.DataLoader"]], "detectiondataset (class in doctr.datasets)": [[5, "doctr.datasets.DetectionDataset"]], "docartefacts (class in doctr.datasets)": [[5, "doctr.datasets.DocArtefacts"]], "funsd (class in doctr.datasets)": [[5, "doctr.datasets.FUNSD"]], "ic03 (class in doctr.datasets)": [[5, "doctr.datasets.IC03"]], "ic13 (class in doctr.datasets)": [[5, "doctr.datasets.IC13"]], "iiit5k (class in doctr.datasets)": [[5, "doctr.datasets.IIIT5K"]], "iiithws (class in doctr.datasets)": [[5, "doctr.datasets.IIITHWS"]], "imgur5k (class in doctr.datasets)": [[5, "doctr.datasets.IMGUR5K"]], "mjsynth (class in doctr.datasets)": [[5, "doctr.datasets.MJSynth"]], "ocrdataset (class in doctr.datasets)": [[5, "doctr.datasets.OCRDataset"]], "recognitiondataset (class in doctr.datasets)": [[5, "doctr.datasets.RecognitionDataset"]], "sroie (class in doctr.datasets)": [[5, "doctr.datasets.SROIE"]], "svhn (class in doctr.datasets)": [[5, "doctr.datasets.SVHN"]], "svt (class in doctr.datasets)": [[5, "doctr.datasets.SVT"]], "synthtext (class in doctr.datasets)": [[5, "doctr.datasets.SynthText"]], "wildreceipt (class in doctr.datasets)": [[5, "doctr.datasets.WILDRECEIPT"]], "wordgenerator (class in doctr.datasets)": [[5, "doctr.datasets.WordGenerator"]], "encode_sequences() (in module doctr.datasets)": [[5, "doctr.datasets.encode_sequences"]], "artefact (class in doctr.io)": [[6, "doctr.io.Artefact"]], "block (class in doctr.io)": [[6, "doctr.io.Block"]], "document (class in doctr.io)": [[6, "doctr.io.Document"]], "documentfile (class in doctr.io)": [[6, "doctr.io.DocumentFile"]], "line (class in doctr.io)": [[6, "doctr.io.Line"]], "page (class in doctr.io)": [[6, "doctr.io.Page"]], "word (class in doctr.io)": [[6, "doctr.io.Word"]], "decode_img_as_tensor() (in module doctr.io)": [[6, "doctr.io.decode_img_as_tensor"]], "from_images() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_images"]], "from_pdf() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_pdf"]], "from_url() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_url"]], "read_html() (in module doctr.io)": [[6, "doctr.io.read_html"]], "read_img_as_numpy() (in module doctr.io)": [[6, "doctr.io.read_img_as_numpy"]], "read_img_as_tensor() (in module doctr.io)": [[6, "doctr.io.read_img_as_tensor"]], "read_pdf() (in module doctr.io)": [[6, "doctr.io.read_pdf"]], "show() (doctr.io.document method)": [[6, "doctr.io.Document.show"]], "show() (doctr.io.page method)": [[6, "doctr.io.Page.show"]], "crnn_mobilenet_v3_large() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_mobilenet_v3_large"]], "crnn_mobilenet_v3_small() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_mobilenet_v3_small"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_vgg16_bn"]], "crop_orientation_predictor() (in module doctr.models.classification)": [[7, "doctr.models.classification.crop_orientation_predictor"]], "db_mobilenet_v3_large() (in module doctr.models.detection)": [[7, "doctr.models.detection.db_mobilenet_v3_large"]], "db_resnet50() (in module doctr.models.detection)": [[7, "doctr.models.detection.db_resnet50"]], "detection_predictor() (in module doctr.models.detection)": [[7, "doctr.models.detection.detection_predictor"]], "from_hub() (in module doctr.models.factory)": [[7, "doctr.models.factory.from_hub"]], "kie_predictor() (in module doctr.models)": [[7, "doctr.models.kie_predictor"]], "linknet_resnet18() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet18"]], "linknet_resnet34() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet34"]], "linknet_resnet50() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet50"]], "login_to_hub() (in module doctr.models.factory)": [[7, "doctr.models.factory.login_to_hub"]], "magc_resnet31() (in module doctr.models.classification)": [[7, "doctr.models.classification.magc_resnet31"]], "master() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.master"]], "mobilenet_v3_large() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_large"]], "mobilenet_v3_large_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_large_r"]], "mobilenet_v3_small() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small"]], "mobilenet_v3_small_orientation() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small_orientation"]], "mobilenet_v3_small_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small_r"]], "ocr_predictor() (in module doctr.models)": [[7, "doctr.models.ocr_predictor"]], "parseq() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.parseq"]], "push_to_hf_hub() (in module doctr.models.factory)": [[7, "doctr.models.factory.push_to_hf_hub"]], "recognition_predictor() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.recognition_predictor"]], "resnet18() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet18"]], "resnet31() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet31"]], "resnet34() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet34"]], "resnet50() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet50"]], "sar_resnet31() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.sar_resnet31"]], "textnet_base() (in module doctr.models.classification)": [[7, "doctr.models.classification.textnet_base"]], "textnet_small() (in module doctr.models.classification)": [[7, "doctr.models.classification.textnet_small"]], "textnet_tiny() (in module doctr.models.classification)": [[7, "doctr.models.classification.textnet_tiny"]], "vgg16_bn_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.vgg16_bn_r"]], "vit_b() (in module doctr.models.classification)": [[7, "doctr.models.classification.vit_b"]], "vit_s() (in module doctr.models.classification)": [[7, "doctr.models.classification.vit_s"]], "vitstr_base() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.vitstr_base"]], "vitstr_small() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.vitstr_small"]], "channelshuffle (class in doctr.transforms)": [[8, "doctr.transforms.ChannelShuffle"]], "colorinversion (class in doctr.transforms)": [[8, "doctr.transforms.ColorInversion"]], "compose (class in doctr.transforms)": [[8, "doctr.transforms.Compose"]], "gaussianblur (class in doctr.transforms)": [[8, "doctr.transforms.GaussianBlur"]], "gaussiannoise (class in doctr.transforms)": [[8, "doctr.transforms.GaussianNoise"]], "lambdatransformation (class in doctr.transforms)": [[8, "doctr.transforms.LambdaTransformation"]], "normalize (class in doctr.transforms)": [[8, "doctr.transforms.Normalize"]], "oneof (class in doctr.transforms)": [[8, "doctr.transforms.OneOf"]], "randomapply (class in doctr.transforms)": [[8, "doctr.transforms.RandomApply"]], "randombrightness (class in doctr.transforms)": [[8, "doctr.transforms.RandomBrightness"]], "randomcontrast (class in doctr.transforms)": [[8, "doctr.transforms.RandomContrast"]], "randomcrop (class in doctr.transforms)": [[8, "doctr.transforms.RandomCrop"]], "randomgamma (class in doctr.transforms)": [[8, "doctr.transforms.RandomGamma"]], "randomhorizontalflip (class in doctr.transforms)": [[8, "doctr.transforms.RandomHorizontalFlip"]], "randomhue (class in doctr.transforms)": [[8, "doctr.transforms.RandomHue"]], "randomjpegquality (class in doctr.transforms)": [[8, "doctr.transforms.RandomJpegQuality"]], "randomrotate (class in doctr.transforms)": [[8, "doctr.transforms.RandomRotate"]], "randomsaturation (class in doctr.transforms)": [[8, "doctr.transforms.RandomSaturation"]], "randomshadow (class in doctr.transforms)": [[8, "doctr.transforms.RandomShadow"]], "resize (class in doctr.transforms)": [[8, "doctr.transforms.Resize"]], "togray (class in doctr.transforms)": [[8, "doctr.transforms.ToGray"]], "detectionmetric (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.DetectionMetric"]], "localizationconfusion (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.OCRMetric"]], "textmatch (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.TextMatch"]], "summary() (doctr.utils.metrics.detectionmetric method)": [[9, "doctr.utils.metrics.DetectionMetric.summary"]], "summary() (doctr.utils.metrics.localizationconfusion method)": [[9, "doctr.utils.metrics.LocalizationConfusion.summary"]], "summary() (doctr.utils.metrics.ocrmetric method)": [[9, "doctr.utils.metrics.OCRMetric.summary"]], "summary() (doctr.utils.metrics.textmatch method)": [[9, "doctr.utils.metrics.TextMatch.summary"]], "synthesize_page() (in module doctr.utils.visualization)": [[9, "doctr.utils.visualization.synthesize_page"]], "update() (doctr.utils.metrics.detectionmetric method)": [[9, "doctr.utils.metrics.DetectionMetric.update"]], "update() (doctr.utils.metrics.localizationconfusion method)": [[9, "doctr.utils.metrics.LocalizationConfusion.update"]], "update() (doctr.utils.metrics.ocrmetric method)": [[9, "doctr.utils.metrics.OCRMetric.update"]], "update() (doctr.utils.metrics.textmatch method)": [[9, "doctr.utils.metrics.TextMatch.update"]], "visualize_page() (in module doctr.utils.visualization)": [[9, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file diff --git a/latest/using_doctr/using_models.html b/latest/using_doctr/using_models.html index b2e6a5d739..b471cdc1f1 100644 --- a/latest/using_doctr/using_models.html +++ b/latest/using_doctr/using_models.html @@ -836,6 +836,17 @@

Two-stage approachesmodel = ocr_predictor('linknet_resnet18', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True) +

To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying DocumentBuilder:

+ +

For example to disable the automatic grouping of lines into blocks:

+
from doctr.model import ocr_predictor
+model = ocr_predictor(pretrained=True, resolve_blocks=False)
+
+

What should I do with the output?#

@@ -859,6 +870,14 @@

What should I do with the output?) +

To get only the text content of the Document, you can use the render method:

+
text_output = result.render()
+
+
+

For reference, here is the output for the Document above:

+
No. RECEIPT DATE
+
+

You can also export them as a nested dict, more appropriate for JSON format:

json_output = result.export()
 
diff --git a/v0.1.0/_sources/using_doctr/using_models.rst.txt b/v0.1.0/_sources/using_doctr/using_models.rst.txt index 208e0956bb..27c087096a 100644 --- a/v0.1.0/_sources/using_doctr/using_models.rst.txt +++ b/v0.1.0/_sources/using_doctr/using_models.rst.txt @@ -279,6 +279,19 @@ For instance, this snippet instantiates an end-to-end ocr_predictor working with from doctr.model import ocr_predictor model = ocr_predictor('linknet_resnet18', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True) +To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying `DocumentBuilder`: + +* `resolve_lines`: whether words should be automatically grouped into lines (default: True) +* `resolve_blocks`: whether lines should be automatically grouped into blocks (default: True) +* `paragraph_break`: relative length of the minimum space separating paragraphs (default: 0.035) + +For example to disable the automatic grouping of lines into blocks: + +.. code:: python3 + + from doctr.model import ocr_predictor + model = ocr_predictor(pretrained=True, resolve_blocks=False) + What should I do with the output? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -304,6 +317,14 @@ Here is a typical `Document` layout:: )] ) +To get only the text content of the `Document`, you can use the `render` method:: + + text_output = result.render() + +For reference, here is the output for the `Document` above:: + + No. RECEIPT DATE + You can also export them as a nested dict, more appropriate for JSON format:: json_output = result.export() diff --git a/v0.1.0/searchindex.js b/v0.1.0/searchindex.js index 69eea388e7..904aae303d 100644 --- a/v0.1.0/searchindex.js +++ b/v0.1.0/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["documents", "index", "installing", "models", "utils"], "filenames": ["documents.rst", "index.rst", "installing.rst", "models.rst", "utils.rst"], "titles": ["doctr.documents", "DocTR: Document Text Recognition", "Installation", "doctr.models", "doctr.utils"], "terms": {"releas": 2, "note": [], "we": [], "member": [], "leader": [], "make": [3, 4], "particip": [], "commun": [], "harass": [], "free": [], "experi": [], "everyon": [], "regardless": [], "ag": [], "bodi": [], "size": [0, 3], "visibl": [], "invis": [], "disabl": [], "ethnic": [], "sex": [], "characterist": [], "gender": [], "ident": [], "express": [], "level": [], "educ": [], "socio": [], "econom": [], "statu": [], "nation": [], "person": [], "appear": [], "race": [], "religion": [], "sexual": [], "orient": 0, "act": [], "interact": [], "wai": 1, "contribut": [], "an": [0, 1, 3], "open": [], "welcom": [], "divers": [], "inclus": [], "healthi": [], "exampl": [0, 3, 4], "behavior": [], "posit": [], "environ": [], "includ": 3, "demonstr": [], "empathi": [], "kind": [], "toward": [], "other": [], "peopl": [], "Being": [], "respect": [], "differ": [], "opinion": [], "viewpoint": [], "give": [], "gracefulli": [], "accept": [], "construct": [], "feedback": [], "apolog": [], "those": 3, "affect": [], "mistak": [], "learn": 3, "from": [0, 1, 3, 4], "focus": [], "what": [], "i": 3, "best": [], "just": [], "u": [], "individu": [], "overal": [], "unaccept": [], "The": [0, 3], "us": [2, 3, 4], "languag": [0, 1], "imageri": [], "attent": [], "advanc": [], "ani": [0, 3, 4], "troll": [], "insult": [], "derogatori": [], "comment": [], "polit": [], "attack": [], "public": [], "privat": [], "publish": [], "inform": [1, 3], "physic": [], "email": [], "address": [], "without": 3, "explicit": [], "permiss": [], "which": 3, "could": [], "reason": [], "consid": [], "inappropri": [], "profession": [], "set": 3, "ar": [0, 4], "clarifi": [], "take": [], "appropri": [], "fair": [], "action": [], "thei": [], "deem": [], "threaten": [], "offens": [], "harm": [], "have": 4, "right": 3, "remov": [], "edit": [], "reject": [], "commit": [], "wiki": [], "issu": [], "align": [], "thi": [2, 3, 4], "moder": [], "decis": [], "when": [], "appli": [], "within": [], "all": 0, "space": [], "also": [], "offici": [], "repres": [], "e": 2, "mail": [], "post": [], "via": 1, "social": [], "media": [], "account": [], "appoint": [], "onlin": [], "offlin": [], "event": [], "instanc": 3, "abus": [], "otherwis": [], "mai": [], "report": [], "contact": [], "minde": 2, "com": 2, "complaint": [], "review": [], "investig": [], "promptli": [], "fairli": [], "oblig": [], "privaci": [], "secur": [], "incid": [], "follow": 3, "impact": [], "determin": [], "consequ": [], "violat": [], "unprofession": [], "unwelcom": [], "A": 3, "written": [], "provid": [1, 3], "clariti": [], "around": 3, "natur": 1, "explan": [], "why": [], "wa": [], "apologi": [], "request": [], "through": [], "singl": [], "seri": [], "continu": [], "No": [], "involv": 3, "unsolicit": [], "specifi": 0, "period": [], "time": 3, "avoid": [], "well": [], "extern": [], "channel": [0, 3], "like": [], "term": [], "lead": [], "seriou": [], "sustain": [], "sort": [], "allow": [], "dure": [], "pattern": [], "aggress": [], "disparag": [], "class": 0, "adapt": [], "version": 3, "0": 4, "avail": [], "http": 2, "www": [], "org": [], "_": [], "html": [], "were": [], "inspir": [], "mozilla": [], "": [0, 4], "ladder": [], "For": 3, "answer": [], "common": [], "question": [], "about": [], "see": [], "faq": [], "translat": [], "everyth": [], "you": 3, "need": 4, "know": [], "effici": [], "project": [], "packag": 4, "python": 1, "doc": 0, "librari": 2, "build": [], "script": [], "refer": [], "train": 3, "demo": [], "small": [], "app": [], "showcas": [], "capabl": [], "api": [], "minim": [], "templat": [], "deploi": [], "rest": 4, "ensur": [], "proper": [], "mainten": [], "github": 2, "worklow": [], "run": [], "job": [], "coverag": [], "codecov": [], "back": [], "result": 0, "As": [], "contributor": [], "onli": 4, "your": [0, 1, 4], "ad": [], "whether": [0, 4], "encount": [], "problem": [], "suggest": [], "input": [0, 3], "ha": [], "valu": 0, "can": 3, "purpos": 3, "advis": [], "first": [], "check": [], "topic": [], "wasn": [], "t": [], "alreadi": [], "cover": [], "close": [], "If": [0, 3], "feel": [], "new": [], "one": 3, "do": [], "so": [], "whenev": [], "possibl": [], "enough": [], "jump": [], "wonder": [], "how": [], "someth": [], "more": [], "gener": [], "should": [0, 4], "out": [3, 4], "discuss": [], "q": [], "forum": [], "specif": [], "stackoverflow": [], "addit": [], "depend": [], "command": [], "m": [], "pip": 2, "upgrad": [], "dev": [], "pre": [], "docstr": [], "In": 3, "pleas": [], "googl": [], "eas": [], "process": [0, 1], "later": [], "messag": [], "udac": [], "guid": [], "order": [0, 3], "same": 4, "ci": [], "workflow": [], "unittest": [], "local": 3, "To": [], "togeth": 3, "current": [], "built": [], "sphinx": [], "thank": [], "our": [], "file": [], "been": [], "rebuilt": [], "want": [], "forc": [], "complet": [], "rebuild": [], "delet": [], "_build": [], "directori": [], "addition": [], "clear": [], "web": [], "browser": [], "cach": [], "modif": [], "now": [], "locat": [], "index": 0, "wish": [], "somewher": [], "els": [], "than": 4, "join": [], "slack": [], "where": 0, "find": [], "requir": 2, "3": [0, 2, 3, 4], "8": [], "higher": [], "whichev": [], "o": [], "least": [], "tensorflow": 3, "pytorch": [], "correspond": 3, "page": [0, 4], "2": [], "macbook": [], "m1": [], "chip": [], "some": [], "metal": [], "plugin": [], "1": [3, 4], "12": [], "anoth": [], "linux": [], "few": [], "extra": [], "maco": [], "user": 0, "them": [], "homebrew": [], "brew": [], "cairo": [], "pango": [], "gdk": [], "pixbuf": [], "libffi": [], "window": [], "gtk": [], "latest": [], "over": [], "here": [], "last": [2, 3], "stabl": 2, "doctr": 2, "strive": [], "reduc": [], "framework": [], "minimum": [], "necessari": [], "featur": [3, 4], "develop": [], "third": [], "parti": [], "miss": [], "tf": 3, "torch": [], "mode": 2, "clone": 2, "state": 1, "art": 1, "optic": 3, "charact": 3, "made": [], "seamless": [], "access": 0, "anyon": [], "power": 1, "easi": [1, 4], "extract": [1, 3], "valuabl": 1, "autom": 1, "seamlessli": [], "understand": 1, "task": [1, 3], "ocr": [], "predictor": [], "pars": [], "textual": 0, "identifi": 3, "each": [0, 3], "word": [0, 4], "research": 1, "quickli": 1, "compar": 1, "own": 1, "architectur": [1, 3], "speed": [], "perform": [0, 1, 3], "robust": [], "stage": [], "pretrain": [3, 4], "paramet": [0, 3, 4], "friendli": [], "line": [0, 4], "code": [], "load": [], "googlevis": [], "aw": [], "textract": [], "optim": [], "infer": [], "both": [], "cpu": [], "gpu": [], "light": [], "activ": [], "maintain": [], "integr": [], "deploy": [], "dbnet": 3, "real": 3, "scene": 3, "differenti": 3, "binar": 3, "linknet": [], "exploit": [], "encod": [], "represent": [], "semant": [], "segment": 3, "sar": 3, "show": [3, 4], "attend": 3, "read": 3, "simpl": 3, "strong": 3, "baselin": 3, "irregular": 3, "crnn": 3, "end": [], "trainabl": [], "neural": [], "network": [], "imag": [0, 3, 4], "base": [], "sequenc": 3, "Its": [], "applic": [], "master": [], "multi": [], "aspect": [], "non": [0, 4], "vitstr": [], "vision": [], "transform": [], "fast": [], "parseq": [], "permut": [], "autoregress": [], "funsd": [], "form": [], "noisi": [], "scan": [], "cord": [], "consolid": [], "receipt": [], "forpost": [], "sroie": [], "icdar": [], "2019": [], "iiit": [], "5k": [], "cvit": [], "street": [], "view": [], "synthtext": [], "visual": [], "geometri": 0, "group": [], "svhn": [], "digit": [], "unsupervis": [], "ic03": [], "2003": [], "ic13": [], "2013": [], "imgur5k": [], "textstylebrush": [], "transfer": [], "aesthet": [], "mjsynth": [], "synthet": [], "data": [0, 3], "artifici": [], "iiithw": [], "wildreceipt": [], "spatial": [], "dual": [], "modal": [], "graph": [], "kei": [], "bool": [0, 3, 4], "true": [0, 3, 4], "use_polygon": [], "fals": 3, "recognition_task": [], "kwarg": [0, 3], "sourc": [0, 3, 4], "document": [3, 4], "import": [0, 3, 4], "train_set": [], "download": [], "img": [], "target": 3, "subset": [], "polygon": [], "rotat": 0, "bound": [0, 3], "box": [0, 3], "instead": 0, "straight": [], "ones": [], "recognit": [], "keyword": [], "argument": [], "visiondataset": [], "icdar2019": [], "competit": [], "iiit5k": [], "bmvc": [], "2012": [], "text": 0, "prior": [], "svt": [], "ucsd": [], "comput": [], "hous": [], "number": [], "localis": [], "repositori": [], "websit": [], "entri": [], "futur": [], "direct": [], "img_fold": [], "str": [0, 4], "label_fold": [], "label": [], "part": [], "challeng": [], "task2": [], "2015": [], "path": 0, "challenge2_training_task12_imag": [], "challenge2_training_task1_gt": [], "test_set": [], "challenge2_test_task12_imag": [], "challenge2_test_task1_gt": [], "folder": [], "annot": [], "abstractdataset": [], "label_path": [], "handwrit": [], "dataset_info": [], "imgur5k_annot": [], "json": [], "pure": [], "mnt": [], "ramdisk": [], "max": [], "90kdict32px": [], "imlist": [], "txt": [], "hw": [], "images_90k_norm": [], "90k": [], "docartefact": [], "object": [], "detect": [], "element": [0, 3], "varieti": [], "arxiv": [], "ab": [], "2103": [], "14470v1": [], "test": [], "charactergener": [], "implement": [0, 3], "d": [], "abdef": [], "num_sampl": [], "100": [], "vocabulari": [], "sampl": [], "iter": [], "cache_sampl": [], "firsthand": [], "font_famili": [], "font": [], "img_transform": [], "compos": 3, "sample_transform": [], "wordgener": [], "min_char": [], "int": [0, 3], "max_char": [], "list": 0, "none": [0, 4], "callabl": [], "tupl": [0, 3], "32": 3, "maximum": [], "detectiondataset": [], "recognitiondataset": [], "labels_path": [], "contain": [], "ocrdataset": [], "label_fil": [], "jpg": 0, "root": [], "shuffl": [], "batch_siz": [], "drop_last": [], "num_work": [], "collate_fn": [], "wrapper": [], "train_load": [], "train_it": [], "next": [], "befor": [], "pass": 3, "batch": 3, "drop": [], "isn": [], "full": [3, 4], "worker": [], "function": [3, 4], "merg": [], "sinc": [], "content": 0, "properli": [], "model": [1, 4], "interpret": [], "multipl": [], "name": [], "10": [], "0123456789": [], "hindi_digit": [], "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "ascii_lett": [], "52": [], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": [], "punctuat": [], "currenc": [], "5": [], "ancient_greek": [], "48": [], "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": [], "arabic_lett": [], "37": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": [], "persian_lett": [], "\u067e\u0686\u06a2\u06a4\u06af": [], "arabic_diacrit": [], "arabic_punctu": [], "latin": [], "94": [], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": [], "english": [], "legacy_french": [], "123": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": [], "french": [], "126": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": [], "portugues": [], "131": [], "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": [], "spanish": [], "116": [], "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": [], "italian": [], "120": [], "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": [], "german": [], "108": [], "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": [], "arab": [], "101": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": [], "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "czech": [], "130": [], "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": [], "polish": [], "118": [], "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": [], "dutch": [], "114": [], "norwegian": [], "106": [], "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": [], "danish": [], "finnish": [], "104": [], "\u00e4\u00f6\u00e4\u00f6": [], "swedish": [], "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": [], "vietnames": [], "234": [], "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": [], "hebrew": [], "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": [], "multilingu": [], "195": [], "encode_sequ": [], "target_s": [], "eo": [], "pad": 3, "dynamic_seq_length": [], "ndarrai": [0, 4], "given": 3, "map": 3, "n": [], "length": [], "Of": [], "string": [0, 3], "option": [], "start": [], "case": [], "upper": [], "enabl": 0, "dynam": [], "tensor": 3, "modul": [0, 3, 4], "easili": 0, "export": [0, 4], "analysi": [0, 3], "format": [0, 3], "organ": 0, "uninterrupt": [], "confid": 0, "float": 0, "associ": 0, "predict": [0, 4], "xmin": 0, "ymin": 0, "xmax": 0, "ymax": 0, "coordin": 0, "rel": 0, "collect": 0, "meant": [], "two": [], "column": [], "horizont": [], "resolv": 0, "default": [0, 3], "smallest": 0, "enclos": 0, "g": [], "qr": [], "pictur": [], "chart": [], "signatur": [], "logo": [], "etc": [], "artefact_typ": 0, "type": [0, 3], "sever": [], "its": [], "titl": [], "underneath": [], "page_idx": 0, "dimens": [0, 3, 4], "dict": [0, 4], "numpi": [0, 3, 4], "arrai": 4, "uint8": [3, 4], "raw": 0, "pixel": 0, "height": 0, "width": 0, "dictionari": 0, "angl": 0, "degress": 0, "preserve_aspect_ratio": [], "overlai": [], "displai": 4, "matplotlib": 4, "pyplot": 4, "method": [], "high": 0, "convers": 0, "read_pdf": 0, "byte": 3, "scale": [], "rgb_mode": [], "password": [], "pdf": 0, "convert": [0, 3], "render": [], "72dpi": [], "output": 0, "rgb": 0, "bgr": 0, "unlock": [], "encrypt": [], "pypdfium2": [], "pdfpage": [], "decod": 0, "shape": [0, 3, 4], "h": 0, "x": 0, "w": 0, "c": 2, "read_img_as_numpi": [], "output_s": 0, "rgb_output": 0, "expect": [0, 3], "read_img_as_tensor": [], "img_path": [], "dtype": 3, "float32": 3, "desir": [], "relat": [], "divid": [], "255": [3, 4], "decode_img_as_tensor": [], "img_cont": [], "stream": [], "read_html": [], "url": [], "yoursit": [], "weasyprint": [], "documentfil": [], "extens": [], "classmethod": [], "from_pdf": [], "binari": 3, "from_url": [], "from_imag": [], "page1": [], "png": [], "page2": [], "vgg16_bn_r": [], "vgg": 3, "16": 3, "describ": 3, "veri": [], "deep": 3, "convolut": 3, "larg": [], "modifi": [], "normal": 3, "rectangular": [], "pool": [], "simpler": [], "head": [], "input_tensor": 3, "random": [3, 4], "uniform": 3, "512": 3, "maxval": 3, "imagenet": 3, "extractor": 3, "resnet18": [], "resnet": 3, "18": [], "residu": [], "boolean": [], "resnet34": [], "34": [], "resnet50": [], "50": 3, "resnet31": [], "downsiz": [], "4": [], "mobilenet_v3_smal": [], "mobilenetv3": [], "search": [], "kera": 3, "mobilenet_v3_larg": [], "mobilenet_v3_small_r": [], "mobilenet_v3_large_r": [], "mobilenet_v3_small_orient": [], "magc_resnet31": [], "global": [], "context": [], "224": 3, "vit_": [], "visiontransform": [], "worth": [], "16x16": [], "patch": [], "unoffici": [], "config": [], "vit_b": [], "b": [], "textnet_tini": [], "textnet": [], "faster": [], "arbitrarili": [], "detector": [], "minimalist": [], "kernel": [], "czczup": [], "tini": [], "textnet_smal": [], "textnet_bas": [], "crop_orientation_predictor": [], "arch": [], "croporientationpredictor": [], "np": [3, 4], "classif_mobilenet_v3_smal": [], "input_crop": [], "rand": [3, 4], "600": [3, 4], "800": [3, 4], "astyp": [3, 4], "crop": 3, "dataset": [], "linknet_resnet18": [], "1024": 3, "linknet_resnet34": [], "linknet_resnet50": [], "db_resnet50": 3, "backbon": 3, "db_mobilenet_v3_larg": [], "mobilenet": [], "v3": [], "detection_predictor": [], "assume_straight_pag": [], "detectionpredictor": 3, "input_pag": [3, 4], "itself": [], "fit": [], "crnn_vgg16_bn": 3, "128": 3, "crnn_mobilenet_v3_smal": [], "crnn_mobilenet_v3_larg": [], "sar_resnet31": [], "31": [], "64": 3, "256": 3, "paper": [], "1910": [], "02562": [], "keywoard": [], "vitstr_smal": [], "vitstr_bas": [], "recognition_predictor": [], "recognitionpredictor": 3, "ocr_predictor": [], "det_arch": [], "reco_arch": [], "pretrained_backbon": [], "symmetric_pad": [], "export_as_straight_box": [], "detect_orient": [], "straighten_pag": [], "detect_languag": [], "ocrpredictor": 3, "up": [], "assum": [], "preserv": [], "ratio": [], "symmetr": [], "bottom": [], "final": [], "potenti": 3, "estim": [], "slightli": [], "deterior": [], "latenc": [], "median": [], "Then": [], "again": [], "improv": [], "kie_predictor": [], "kiepredictor": [], "kie": [], "login_to_hub": [], "login": [], "huggingfac": [], "hub": [], "from_hub": [], "repo_id": [], "instanti": [], "hf": [], "fasterrcnn_mobilenet_v3_large_fpn": [], "repo": [], "hf_hub_download": [], "snapshot_download": [], "checkpoint": [], "push_to_hf_hub": [], "model_nam": [], "save": [], "configur": [], "my": [], "procedur": [], "draw": [], "design": [], "torchvis": [], "resiz": 3, "bilinear": 3, "transfo": [], "minval": [], "interpol": 3, "zero": 3, "while": [], "done": [], "mean": [], "std": [], "gaussian": [], "distribut": [], "485": [], "456": [], "406": [], "229": [], "225": [], "averag": [], "per": [], "standard": [], "deviat": [], "lambdatransform": [], "fn": [], "lambda": [], "tograi": [], "num_output_channel": [], "grayscal": [], "colorinvers": [], "min_val": [], "tranform": [], "color": [], "shift": [], "randomli": [], "invert": [], "6": 2, "rang": [], "randombright": [], "max_delta": [], "adjust": [], "bright": [], "delta": [], "offset": [], "add": [], "pick": [], "p": [], "probabl": [], "randomcontrast": [], "contrast": [], "contrast_factor": [], "factor": [], "randomsatur": [], "satur": [], "hsv": [], "increas": [], "randomhu": [], "hue": [], "randomgamma": [], "min_gamma": [], "max_gamma": [], "min_gain": [], "max_gain": [], "gamma": [], "correct": [], "neg": [], "lower": [], "param": 3, "constant": [], "multipli": [], "randomjpegqu": [], "min_qual": [], "60": [], "max_qual": [], "jpeg": [], "qualiti": [], "dimension": [], "between": [], "randomrot": [], "max_angl": [], "expand": [], "degre": [], "uniformli": [], "randomcrop": [], "08": [], "75": [], "33": [], "min_area": [], "max_area": [], "min_ratio": [], "max_ratio": [], "gaussianblur": [], "kernel_shap": [], "blur": [], "min": [], "channelshuffl": [], "gaussiannois": [], "nois": [], "randomhorizontalflip": [], "flip": [], "int64": [], "randomshadow": [], "opacity_rang": [], "shade": [], "opac": [], "It": [], "consecut": 3, "sequenti": 3, "oneof": [], "jpegqual": [], "randomappli": [], "regroup": 4, "core": 4, "complementari": 4, "sens": 4, "visualize_pag": 4, "words_onli": 4, "display_artefact": [], "add_label": [], "figur": [], "block": [0, 3, 4], "plt": 4, "ocr_db_crnn": [3, 4], "artefact": 0, "figsiz": [], "largest": [], "side": [], "plot": [], "static": [], "top": [], "synthesize_pag": [], "draw_proba": [], "respons": [], "blank": [], "blue": [], "red": [], "font_siz": [], "13": [], "famili": [], "synthes": [], "metric": [], "assess": [], "textmatch": [], "match": [], "accuraci": 3, "aggreg": [], "foral": [], "y": [], "mathcal": [], "frac": [], "sum": [], "limits_": [], "f_": [], "y_i": [], "x_i": [], "indic": [], "defin": [], "f_a": [], "left": [], "begin": [], "ll": [], "mbox": [], "strictli": [], "integ": [], "updat": [], "hello": [], "world": [], "summari": [], "gt": [], "pred": [], "groung": [], "truth": [], "exact": [], "score": [], "counterpart": [], "unidecod": [], "localizationconfus": [], "iou_thresh": [], "mask_shap": [], "use_broadcast": [], "confus": [], "iou": [], "recal": 3, "g_": [], "precis": 3, "meaniou": [], "j": [], "y_j": [], "being": [], "intersect": [], "union": [], "g_x": [], "assign": [], "_i": [], "geq": [], "ground": [], "asarrai": [], "70": [], "110": [], "95": [], "200": [], "150": [], "pair": [], "broadcast": [], "consum": [], "memori": [], "either": 3, "ocrmetr": [], "l": [], "hat": [], "h_": [], "b_j": [], "l_j": [], "gt_box": [], "pred_box": [], "gt_label": [], "pred_label": [], "comparison": [], "detectionmetr": [], "c_j": [], "compil": [], "better": [], "leverag": [], "descript": [], "colab": [], "quicktour": [], "present": [], "main": [], "produc": 3, "searchabl": [], "don": [], "meet": [], "detail": [], "link": [], "section": [], "det_model": [], "load_weight": [], "path_to_checkpoint": [], "weight": [], "reco_model": [], "det_param": [], "path_to_pt": [], "map_loc": [], "load_state_dict": [], "reco_param": [], "vocab": [], "class_nam": [], "total": [], "date": [], "preprocessor": 3, "det_predictor": [], "798": [], "785": [], "772": [], "264": [], "2749": [], "287": [], "reco_predictor": [], "694": [], "695": [], "693": [], "299": [], "296": [], "301": [], "polici": [], "restrict": [], "write": [], "outsid": [], "tmp": [], "work": [], "step": [], "usag": 3, "multiprocess": [], "doctr_multiprocessing_dis": [], "variabl": [], "becaus": [], "shm": [], "share": [], "chang": [], "By": [], "doctr_cache_dir": [], "focu": [], "love": [], "appreci": [], "interfac": [], "io": [], "custom": [], "felix92": [], "db": [], "vgg16": 3, "bn": [], "plug": [], "obj_detect": [], "exist": [], "overwritten": [], "prerequisit": [], "creat": [], "co": [], "instal": 1, "git": 1, "lf": [], "my_awesome_model": [], "v1": [], "directli": 3, "after": [], "python3": [], "train_tensorflow": [], "py": [], "train_pytorch": [], "tabl": [], "pull": [], "dummi": [], "tilman": [], "rassi": [], "fascan": [], "evalu": [], "predefin": [], "prefer": [], "signific": [], "valid": [], "149": [], "626": [], "360": [], "2000": [], "3000": [], "249": [], "33402": [], "13068": [], "772875": [], "85875": [], "246": [], "233": [], "resourc": [], "7149": [], "796": [], "handwritten": [], "1268": [], "472": [], "21888": [], "8707": [], "33608": [], "19342": [], "uppercas": [], "19370": [], "2186": [], "257": [], "647": [], "73257": [], "26032": [], "7100000": [], "707470": [], "1156": [], "1107": [], "849": [], "1095": [], "207901": [], "22672": [], "7581382": [], "1337891": [], "7141797": [], "793533": [], "49377": [], "19598": [], "alwai": [], "regular": [], "2700": [], "300": [], "background": [], "qr_code": [], "bar_cod": [], "photo": [], "classif": 3, "mani": [], "sensit": [], "abl": [], "howev": [], "guidanc": [], "tool": [], "further": [], "anot": [], "handl": [], "underli": [], "defer": [], "dataload": [], "good": [], "achiev": [], "might": [], "tune": [], "thing": [], "product": [], "readi": [], "help": [], "support": [], "devic": [], "fp16": 3, "point": [], "occupi": [], "bit": [], "advantag": [], "less": [], "mixed_precis": [], "set_global_polici": [], "mixed_float16": [], "cuda": [], "re": [], "exchang": [], "interoper": [], "machin": [], "structur": 3, "layer": [], "metadata": [], "util": [1, 3], "export_model_to_onnx": [], "input_shap": 3, "dummy_input": [], "tensorspec": [], "model_path": [], "come": [], "soon": [], "seen": 3, "onc": 3, "separ": 3, "compon": 3, "charg": 3, "usabl": 3, "backend": 3, "along": [], "processor": [], "reusabl": 3, "consist": [], "delimit": [], "2d": [], "corner": [], "flag": [], "belong": [], "skew": [], "comprehens": [], "benchmark": [], "publicli": [], "sec": [], "25": [], "84": [], "39": [], "85": [], "86": [], "93": [], "83": [], "24": [], "80": [], "29": [], "90": [], "67": [], "76": [], "11": [], "81": [], "71": [], "7": [], "21": [], "82": [], "20": [], "49": [], "87": [], "63": [], "17": [], "28": [], "51": [], "46": [], "db_resnet34": [], "22": [], "89": [], "74": [], "56": [], "68": [], "92": [], "61": [], "41": [], "00": [], "79": [], "38": [], "88": [], "62": [], "26": [], "06": [], "78": [], "47": [], "54": [], "abov": [], "cf": [], "disclaim": [], "combin": 3, "199": [], "second": [], "warmup": [], "phase": [], "measur": [], "1000": [], "obtain": [], "11th": [], "gen": [], "intel": [], "r": [], "tm": [], "i7": [], "11800h": [], "30ghz": [], "wrap": [], "useabl": [], "favorit": [], "dummy_img": [], "area": [], "send": [], "snippet": [], "transcrib": [], "partial": [], "15": [], "9": [], "73": [], "44": [], "14": [], "55": [], "58": [], "57": [], "66": [], "01": [], "98": [], "23": [], "69": [], "99": [], "91": [], "05": [], "09": [], "96": [], "40": [], "53": [], "most": 3, "print": [], "cfg": [], "30595": [], "45": [], "72": [], "43": [], "65": [], "77": [], "30": [], "07": [], "27": [], "gvision": [], "59": [], "03": [], "azur": [], "recogn": [], "42": [], "go": [], "mention": [], "still": [], "return": [0, 3], "nest": [], "get": [], "typic": [], "layout": [], "340": [], "json_output": [], "1357421875": [], "0361328125": [], "8564453125": [], "8603515625": [], "914085328578949": [], "5478515625": [], "06640625": [], "5810546875": [], "0966796875": [], "9949972033500671": [], "51171875": [], "1630859375": [], "9578408598899841": [], "1396484375": [], "3232421875": [], "185546875": [], "3515625": [], "outpout": [], "xml": [], "hocr": [], "export_as_xml": [], "xml_output": [], "xml_bytes_str": [], "xml_element": [], "utf": [], "xmln": [], "w3": [], "1999": [], "xhtml": [], "lang": [], "en": [], "meta": [], "equiv": [], "charset": [], "system": [], "ocr_pag": [], "ocr_carea": [], "ocr_par": [], "ocr_lin": [], "ocrx_word": [], "div": [], "id": [], "page_1": [], "bbox": [], "3456": [], "ppageno": [], "block_1_1": [], "857": [], "529": [], "2504": [], "2710": [], "par_1_1": [], "span": [], "line_1_1": [], "x_size": [], "x_descend": [], "x_ascend": [], "word_1_1": [], "1552": [], "540": [], "1778": [], "580": [], "x_wconf": [], "word_1_2": [], "1782": [], "1900": [], "583": [], "word_1_3": [], "1420": [], "597": [], "1684": [], "641": [], "threshold": [], "region": [], "accur": [], "postprocessor": 3, "bin_thresh": [], "box_thresh": [], "hook": [], "manipul": [], "customhook": [], "def": [], "__call__": [], "self": [], "loc_pr": [], "Be": [], "awar": [], "my_hook": [], "middl": [], "pipelin": [], "add_hook": [], "execut": [], "file_path": 0, "read_img": 0, "seemlessli": 1, "conda": 1, "newer": 2, "developp": 2, "fp": 3, "scheme": 3, "deform": 3, "statist": 3, "turn": 3, "easier": 3, "let": 3, "db_resnet50_predictor": 3, "sar_vgg16_bn": 3, "rnn": 3, "enhanc": 3, "symbol": 3, "crnn_vgg16_bn_predictor": 3, "sar_vgg16_bn_predictor": 3, "16bn": 3, "convert_to_tflit": 3, "tf_model": 3, "tflite": 3, "conv_sequ": 3, "relu": 3, "kernel_s": 3, "serialized_model": 3, "convert_to_fp16": 3, "half": 3, "serial": 3, "quantize_model": 3, "quantiz": 3, "exclud": 3}, "objects": {"": [[1, 0, 0, "-", "doctr"]], "doctr.documents": [[0, 1, 1, "", "Artefact"], [0, 1, 1, "", "Block"], [0, 1, 1, "", "Document"], [0, 1, 1, "", "Line"], [0, 1, 1, "", "Page"], [0, 1, 1, "", "Word"], [0, 2, 1, "", "read_img"], [0, 2, 1, "", "read_pdf"]], "doctr.models.detection": [[3, 2, 1, "", "db_resnet50"], [3, 2, 1, "", "db_resnet50_predictor"]], "doctr.models.export": [[3, 2, 1, "", "convert_to_fp16"], [3, 2, 1, "", "convert_to_tflite"], [3, 2, 1, "", "quantize_model"]], "doctr.models.recognition": [[3, 2, 1, "", "crnn_vgg16_bn"], [3, 2, 1, "", "crnn_vgg16_bn_predictor"], [3, 2, 1, "", "sar_vgg16_bn"], [3, 2, 1, "", "sar_vgg16_bn_predictor"]], "doctr.models.zoo": [[3, 2, 1, "", "ocr_db_crnn"]], "doctr.utils.visualization": [[4, 2, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "function", "Python function"]}, "titleterms": {"changelog": [], "v0": [], "7": [], "0": [], "2024": [], "09": [], "6": [], "2022": [], "29": [], "5": [], "1": [], "03": [], "22": [], "2021": [], "12": [], "31": [], "4": [], "11": [], "10": [], "01": [], "3": [], "08": [], "27": [], "07": [], "02": [], "2": [], "05": [], "28": [], "18": [], "contributor": [], "coven": [], "code": [], "conduct": [], "our": [], "pledg": [], "standard": [], "enforc": [], "respons": [], "scope": [], "guidelin": [], "correct": [], "warn": [], "temporari": [], "ban": [], "perman": [], "attribut": [], "contribut": [], "doctr": [0, 1, 3, 4], "codebas": [], "structur": 0, "continu": [], "integr": [], "feedback": [], "featur": [], "request": [], "bug": [], "report": [], "question": [], "develop": [], "mode": [], "instal": 2, "commit": [], "unit": [], "test": [], "qualiti": [], "style": [], "verif": [], "modifi": [], "document": [0, 1], "let": [], "": [], "connect": [], "prerequisit": [], "via": 2, "python": 2, "packag": [1, 2], "git": 2, "text": [1, 3], "recognit": [1, 3], "main": [], "model": 3, "zoo": 3, "detect": 3, "support": [], "dataset": [], "arg": [], "synthet": [], "gener": [], "custom": [], "loader": [], "dataload": [], "vocab": [], "return": [], "io": [], "word": [], "line": [], "artefact": [], "block": [], "page": [], "file": 0, "read": 0, "classif": [], "factori": [], "transform": [], "compos": [], "util": 4, "visual": 4, "task": [], "evalu": [], "notebook": [], "train": [], "your": [], "own": [], "load": [], "aw": [], "lambda": [], "share": [], "commun": [], "from": [], "huggingfac": [], "hub": [], "push": [], "pretrain": [], "name": [], "convent": [], "choos": [], "readi": [], "us": [], "avail": [], "object": [], "data": [], "prepar": [], "infer": [], "optim": [], "half": [], "precis": [], "export": 3, "onnx": [], "right": [], "architectur": [], "predictor": 3, "end": 3, "ocr": 3, "two": 3, "stage": 3, "approach": 3, "what": [], "should": [], "i": [], "do": [], "output": 3, "advanc": [], "option": [], "get": 1, "start": 1, "conda": 2, "pre": 3, "process": 3, "post": 3}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"doctr.documents": [[0, "doctr-documents"]], "Document structure": [[0, "document-structure"]], "File reading": [[0, "file-reading"]], "DocTR: Document Text Recognition": [[1, "doctr-document-text-recognition"]], "Getting Started": [[1, null]], "Package Documentation": [[1, null]], "Installation": [[2, "installation"]], "Via Python Package": [[2, "via-python-package"]], "Via Conda": [[2, "via-conda"]], "Via Git": [[2, "via-git"]], "doctr.models": [[3, "doctr-models"]], "Text Detection": [[3, "text-detection"]], "Text detection model zoo": [[3, "id2"]], "Pre-processing for detection": [[3, "pre-processing-for-detection"]], "Detection models": [[3, "detection-models"]], "Post-processing outputs": [[3, "post-processing-outputs"], [3, "id1"]], "Detection predictors": [[3, "detection-predictors"]], "Text Recognition": [[3, "text-recognition"]], "Text recognition model zoo": [[3, "id3"]], "Pre-processing for recognition": [[3, "pre-processing-for-recognition"]], "Recognition models": [[3, "recognition-models"]], "Recognition predictors": [[3, "recognition-predictors"]], "End-to-End OCR": [[3, "end-to-end-ocr"]], "Two-stage approaches": [[3, "two-stage-approaches"]], "Model export": [[3, "model-export"]], "doctr.utils": [[4, "doctr-utils"]], "Visualization": [[4, "visualization"]]}, "indexentries": {"artefact (class in doctr.documents)": [[0, "doctr.documents.Artefact"]], "block (class in doctr.documents)": [[0, "doctr.documents.Block"]], "document (class in doctr.documents)": [[0, "doctr.documents.Document"]], "line (class in doctr.documents)": [[0, "doctr.documents.Line"]], "page (class in doctr.documents)": [[0, "doctr.documents.Page"]], "word (class in doctr.documents)": [[0, "doctr.documents.Word"]], "read_img() (in module doctr.documents)": [[0, "doctr.documents.read_img"]], "read_pdf() (in module doctr.documents)": [[0, "doctr.documents.read_pdf"]], "doctr": [[1, "module-doctr"]], "module": [[1, "module-doctr"]], "convert_to_fp16() (in module doctr.models.export)": [[3, "doctr.models.export.convert_to_fp16"]], "convert_to_tflite() (in module doctr.models.export)": [[3, "doctr.models.export.convert_to_tflite"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[3, "doctr.models.recognition.crnn_vgg16_bn"]], "crnn_vgg16_bn_predictor() (in module doctr.models.recognition)": [[3, "doctr.models.recognition.crnn_vgg16_bn_predictor"]], "db_resnet50() (in module doctr.models.detection)": [[3, "doctr.models.detection.db_resnet50"]], "db_resnet50_predictor() (in module doctr.models.detection)": [[3, "doctr.models.detection.db_resnet50_predictor"]], "ocr_db_crnn() (in module doctr.models.zoo)": [[3, "doctr.models.zoo.ocr_db_crnn"]], "quantize_model() (in module doctr.models.export)": [[3, "doctr.models.export.quantize_model"]], "sar_vgg16_bn() (in module doctr.models.recognition)": [[3, "doctr.models.recognition.sar_vgg16_bn"]], "sar_vgg16_bn_predictor() (in module doctr.models.recognition)": [[3, "doctr.models.recognition.sar_vgg16_bn_predictor"]], "visualize_page() (in module doctr.utils.visualization)": [[4, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["documents", "index", "installing", "models", "utils"], "filenames": ["documents.rst", "index.rst", "installing.rst", "models.rst", "utils.rst"], "titles": ["doctr.documents", "DocTR: Document Text Recognition", "Installation", "doctr.models", "doctr.utils"], "terms": {"releas": 2, "note": [], "we": [], "member": [], "leader": [], "make": [3, 4], "particip": [], "commun": [], "harass": [], "free": [], "experi": [], "everyon": [], "regardless": [], "ag": [], "bodi": [], "size": [0, 3], "visibl": [], "invis": [], "disabl": [], "ethnic": [], "sex": [], "characterist": [], "gender": [], "ident": [], "express": [], "level": [], "educ": [], "socio": [], "econom": [], "statu": [], "nation": [], "person": [], "appear": [], "race": [], "religion": [], "sexual": [], "orient": 0, "act": [], "interact": [], "wai": 1, "contribut": [], "an": [0, 1, 3], "open": [], "welcom": [], "divers": [], "inclus": [], "healthi": [], "exampl": [0, 3, 4], "behavior": [], "posit": [], "environ": [], "includ": 3, "demonstr": [], "empathi": [], "kind": [], "toward": [], "other": [], "peopl": [], "Being": [], "respect": [], "differ": [], "opinion": [], "viewpoint": [], "give": [], "gracefulli": [], "accept": [], "construct": [], "feedback": [], "apolog": [], "those": 3, "affect": [], "mistak": [], "learn": 3, "from": [0, 1, 3, 4], "focus": [], "what": [], "i": 3, "best": [], "just": [], "u": [], "individu": [], "overal": [], "unaccept": [], "The": [0, 3], "us": [2, 3, 4], "languag": [0, 1], "imageri": [], "attent": [], "advanc": [], "ani": [0, 3, 4], "troll": [], "insult": [], "derogatori": [], "comment": [], "polit": [], "attack": [], "public": [], "privat": [], "publish": [], "inform": [1, 3], "physic": [], "email": [], "address": [], "without": 3, "explicit": [], "permiss": [], "which": 3, "could": [], "reason": [], "consid": [], "inappropri": [], "profession": [], "set": 3, "ar": [0, 4], "clarifi": [], "take": [], "appropri": [], "fair": [], "action": [], "thei": [], "deem": [], "threaten": [], "offens": [], "harm": [], "have": 4, "right": 3, "remov": [], "edit": [], "reject": [], "commit": [], "wiki": [], "issu": [], "align": [], "thi": [2, 3, 4], "moder": [], "decis": [], "when": [], "appli": [], "within": [], "all": 0, "space": [], "also": [], "offici": [], "repres": [], "e": 2, "mail": [], "post": [], "via": 1, "social": [], "media": [], "account": [], "appoint": [], "onlin": [], "offlin": [], "event": [], "instanc": 3, "abus": [], "otherwis": [], "mai": [], "report": [], "contact": [], "minde": 2, "com": 2, "complaint": [], "review": [], "investig": [], "promptli": [], "fairli": [], "oblig": [], "privaci": [], "secur": [], "incid": [], "follow": 3, "impact": [], "determin": [], "consequ": [], "violat": [], "unprofession": [], "unwelcom": [], "A": 3, "written": [], "provid": [1, 3], "clariti": [], "around": 3, "natur": 1, "explan": [], "why": [], "wa": [], "apologi": [], "request": [], "through": [], "singl": [], "seri": [], "continu": [], "No": [], "involv": 3, "unsolicit": [], "specifi": 0, "period": [], "time": 3, "avoid": [], "well": [], "extern": [], "channel": [0, 3], "like": [], "term": [], "lead": [], "seriou": [], "sustain": [], "sort": [], "allow": [], "dure": [], "pattern": [], "aggress": [], "disparag": [], "class": 0, "adapt": [], "version": 3, "0": 4, "avail": [], "http": 2, "www": [], "org": [], "_": [], "html": [], "were": [], "inspir": [], "mozilla": [], "": [0, 4], "ladder": [], "For": 3, "answer": [], "common": [], "question": [], "about": [], "see": [], "faq": [], "translat": [], "everyth": [], "you": 3, "need": 4, "know": [], "effici": [], "project": [], "packag": 4, "python": 1, "doc": 0, "librari": 2, "build": [], "script": [], "refer": [], "train": 3, "demo": [], "small": [], "app": [], "showcas": [], "capabl": [], "api": [], "minim": [], "templat": [], "deploi": [], "rest": 4, "ensur": [], "proper": [], "mainten": [], "github": 2, "worklow": [], "run": [], "job": [], "coverag": [], "codecov": [], "back": [], "result": 0, "As": [], "contributor": [], "onli": 4, "your": [0, 1, 4], "ad": [], "whether": [0, 4], "encount": [], "problem": [], "suggest": [], "input": [0, 3], "ha": [], "valu": 0, "can": 3, "purpos": 3, "advis": [], "first": [], "check": [], "topic": [], "wasn": [], "t": [], "alreadi": [], "cover": [], "close": [], "If": [0, 3], "feel": [], "new": [], "one": 3, "do": [], "so": [], "whenev": [], "possibl": [], "enough": [], "jump": [], "wonder": [], "how": [], "someth": [], "more": [], "gener": [], "should": [0, 4], "out": [3, 4], "discuss": [], "q": [], "forum": [], "specif": [], "stackoverflow": [], "addit": [], "depend": [], "command": [], "m": [], "pip": 2, "upgrad": [], "dev": [], "pre": [], "docstr": [], "In": 3, "pleas": [], "googl": [], "eas": [], "process": [0, 1], "later": [], "messag": [], "udac": [], "guid": [], "order": [0, 3], "same": 4, "ci": [], "workflow": [], "unittest": [], "local": 3, "To": [], "togeth": 3, "current": [], "built": [], "sphinx": [], "thank": [], "our": [], "file": [], "been": [], "rebuilt": [], "want": [], "forc": [], "complet": [], "rebuild": [], "delet": [], "_build": [], "directori": [], "addition": [], "clear": [], "web": [], "browser": [], "cach": [], "modif": [], "now": [], "locat": [], "index": 0, "wish": [], "somewher": [], "els": [], "than": 4, "join": [], "slack": [], "where": 0, "find": [], "requir": 2, "3": [0, 2, 3, 4], "8": [], "higher": [], "whichev": [], "o": [], "least": [], "tensorflow": 3, "pytorch": [], "correspond": 3, "page": [0, 4], "2": [], "macbook": [], "m1": [], "chip": [], "some": [], "metal": [], "plugin": [], "1": [3, 4], "12": [], "anoth": [], "linux": [], "few": [], "extra": [], "maco": [], "user": 0, "them": [], "homebrew": [], "brew": [], "cairo": [], "pango": [], "gdk": [], "pixbuf": [], "libffi": [], "window": [], "gtk": [], "latest": [], "over": [], "here": [], "last": [2, 3], "stabl": 2, "doctr": 2, "strive": [], "reduc": [], "framework": [], "minimum": [], "necessari": [], "featur": [3, 4], "develop": [], "third": [], "parti": [], "miss": [], "tf": 3, "torch": [], "mode": 2, "clone": 2, "state": 1, "art": 1, "optic": 3, "charact": 3, "made": [], "seamless": [], "access": 0, "anyon": [], "power": 1, "easi": [1, 4], "extract": [1, 3], "valuabl": 1, "autom": 1, "seamlessli": [], "understand": 1, "task": [1, 3], "ocr": [], "predictor": [], "pars": [], "textual": 0, "identifi": 3, "each": [0, 3], "word": [0, 4], "research": 1, "quickli": 1, "compar": 1, "own": 1, "architectur": [1, 3], "speed": [], "perform": [0, 1, 3], "robust": [], "stage": [], "pretrain": [3, 4], "paramet": [0, 3, 4], "friendli": [], "line": [0, 4], "code": [], "load": [], "googlevis": [], "aw": [], "textract": [], "optim": [], "infer": [], "both": [], "cpu": [], "gpu": [], "light": [], "activ": [], "maintain": [], "integr": [], "deploy": [], "dbnet": 3, "real": 3, "scene": 3, "differenti": 3, "binar": 3, "linknet": [], "exploit": [], "encod": [], "represent": [], "semant": [], "segment": 3, "sar": 3, "show": [3, 4], "attend": 3, "read": 3, "simpl": 3, "strong": 3, "baselin": 3, "irregular": 3, "crnn": 3, "end": [], "trainabl": [], "neural": [], "network": [], "imag": [0, 3, 4], "base": [], "sequenc": 3, "Its": [], "applic": [], "master": [], "multi": [], "aspect": [], "non": [0, 4], "vitstr": [], "vision": [], "transform": [], "fast": [], "parseq": [], "permut": [], "autoregress": [], "funsd": [], "form": [], "noisi": [], "scan": [], "cord": [], "consolid": [], "receipt": [], "forpost": [], "sroie": [], "icdar": [], "2019": [], "iiit": [], "5k": [], "cvit": [], "street": [], "view": [], "synthtext": [], "visual": [], "geometri": 0, "group": [], "svhn": [], "digit": [], "unsupervis": [], "ic03": [], "2003": [], "ic13": [], "2013": [], "imgur5k": [], "textstylebrush": [], "transfer": [], "aesthet": [], "mjsynth": [], "synthet": [], "data": [0, 3], "artifici": [], "iiithw": [], "wildreceipt": [], "spatial": [], "dual": [], "modal": [], "graph": [], "kei": [], "bool": [0, 3, 4], "true": [0, 3, 4], "use_polygon": [], "fals": 3, "recognition_task": [], "kwarg": [0, 3], "sourc": [0, 3, 4], "document": [3, 4], "import": [0, 3, 4], "train_set": [], "download": [], "img": [], "target": 3, "subset": [], "polygon": [], "rotat": 0, "bound": [0, 3], "box": [0, 3], "instead": 0, "straight": [], "ones": [], "recognit": [], "keyword": [], "argument": [], "visiondataset": [], "icdar2019": [], "competit": [], "iiit5k": [], "bmvc": [], "2012": [], "text": 0, "prior": [], "svt": [], "ucsd": [], "comput": [], "hous": [], "number": [], "localis": [], "repositori": [], "websit": [], "entri": [], "futur": [], "direct": [], "img_fold": [], "str": [0, 4], "label_fold": [], "label": [], "part": [], "challeng": [], "task2": [], "2015": [], "path": 0, "challenge2_training_task12_imag": [], "challenge2_training_task1_gt": [], "test_set": [], "challenge2_test_task12_imag": [], "challenge2_test_task1_gt": [], "folder": [], "annot": [], "abstractdataset": [], "label_path": [], "handwrit": [], "dataset_info": [], "imgur5k_annot": [], "json": [], "pure": [], "mnt": [], "ramdisk": [], "max": [], "90kdict32px": [], "imlist": [], "txt": [], "hw": [], "images_90k_norm": [], "90k": [], "docartefact": [], "object": [], "detect": [], "element": [0, 3], "varieti": [], "arxiv": [], "ab": [], "2103": [], "14470v1": [], "test": [], "charactergener": [], "implement": [0, 3], "d": [], "abdef": [], "num_sampl": [], "100": [], "vocabulari": [], "sampl": [], "iter": [], "cache_sampl": [], "firsthand": [], "font_famili": [], "font": [], "img_transform": [], "compos": 3, "sample_transform": [], "wordgener": [], "min_char": [], "int": [0, 3], "max_char": [], "list": 0, "none": [0, 4], "callabl": [], "tupl": [0, 3], "32": 3, "maximum": [], "detectiondataset": [], "recognitiondataset": [], "labels_path": [], "contain": [], "ocrdataset": [], "label_fil": [], "jpg": 0, "root": [], "shuffl": [], "batch_siz": [], "drop_last": [], "num_work": [], "collate_fn": [], "wrapper": [], "train_load": [], "train_it": [], "next": [], "befor": [], "pass": 3, "batch": 3, "drop": [], "isn": [], "full": [3, 4], "worker": [], "function": [3, 4], "merg": [], "sinc": [], "content": 0, "properli": [], "model": [1, 4], "interpret": [], "multipl": [], "name": [], "10": [], "0123456789": [], "hindi_digit": [], "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "ascii_lett": [], "52": [], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": [], "punctuat": [], "currenc": [], "5": [], "ancient_greek": [], "48": [], "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": [], "arabic_lett": [], "37": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": [], "persian_lett": [], "\u067e\u0686\u06a2\u06a4\u06af": [], "arabic_diacrit": [], "arabic_punctu": [], "latin": [], "94": [], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": [], "english": [], "legacy_french": [], "123": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": [], "french": [], "126": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": [], "portugues": [], "131": [], "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": [], "spanish": [], "116": [], "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": [], "italian": [], "120": [], "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": [], "german": [], "108": [], "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": [], "arab": [], "101": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": [], "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "czech": [], "130": [], "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": [], "polish": [], "118": [], "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": [], "dutch": [], "114": [], "norwegian": [], "106": [], "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": [], "danish": [], "finnish": [], "104": [], "\u00e4\u00f6\u00e4\u00f6": [], "swedish": [], "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": [], "vietnames": [], "234": [], "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": [], "hebrew": [], "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": [], "multilingu": [], "195": [], "encode_sequ": [], "target_s": [], "eo": [], "pad": 3, "dynamic_seq_length": [], "ndarrai": [0, 4], "given": 3, "map": 3, "n": [], "length": [], "Of": [], "string": [0, 3], "option": [], "start": [], "case": [], "upper": [], "enabl": 0, "dynam": [], "tensor": 3, "modul": [0, 3, 4], "easili": 0, "export": [0, 4], "analysi": [0, 3], "format": [0, 3], "organ": 0, "uninterrupt": [], "confid": 0, "float": 0, "associ": 0, "predict": [0, 4], "xmin": 0, "ymin": 0, "xmax": 0, "ymax": 0, "coordin": 0, "rel": 0, "collect": 0, "meant": [], "two": [], "column": [], "horizont": [], "resolv": 0, "default": [0, 3], "smallest": 0, "enclos": 0, "g": [], "qr": [], "pictur": [], "chart": [], "signatur": [], "logo": [], "etc": [], "artefact_typ": 0, "type": [0, 3], "sever": [], "its": [], "titl": [], "underneath": [], "page_idx": 0, "dimens": [0, 3, 4], "dict": [0, 4], "numpi": [0, 3, 4], "arrai": 4, "uint8": [3, 4], "raw": 0, "pixel": 0, "height": 0, "width": 0, "dictionari": 0, "angl": 0, "degress": 0, "preserve_aspect_ratio": [], "overlai": [], "displai": 4, "matplotlib": 4, "pyplot": 4, "method": [], "high": 0, "convers": 0, "read_pdf": 0, "byte": 3, "scale": [], "rgb_mode": [], "password": [], "pdf": 0, "convert": [0, 3], "render": [], "72dpi": [], "output": 0, "rgb": 0, "bgr": 0, "unlock": [], "encrypt": [], "pypdfium2": [], "pdfpage": [], "decod": 0, "shape": [0, 3, 4], "h": 0, "x": 0, "w": 0, "c": 2, "read_img_as_numpi": [], "output_s": 0, "rgb_output": 0, "expect": [0, 3], "read_img_as_tensor": [], "img_path": [], "dtype": 3, "float32": 3, "desir": [], "relat": [], "divid": [], "255": [3, 4], "decode_img_as_tensor": [], "img_cont": [], "stream": [], "read_html": [], "url": [], "yoursit": [], "weasyprint": [], "documentfil": [], "extens": [], "classmethod": [], "from_pdf": [], "binari": 3, "from_url": [], "from_imag": [], "page1": [], "png": [], "page2": [], "vgg16_bn_r": [], "vgg": 3, "16": 3, "describ": 3, "veri": [], "deep": 3, "convolut": 3, "larg": [], "modifi": [], "normal": 3, "rectangular": [], "pool": [], "simpler": [], "head": [], "input_tensor": 3, "random": [3, 4], "uniform": 3, "512": 3, "maxval": 3, "imagenet": 3, "extractor": 3, "resnet18": [], "resnet": 3, "18": [], "residu": [], "boolean": [], "resnet34": [], "34": [], "resnet50": [], "50": 3, "resnet31": [], "downsiz": [], "4": [], "mobilenet_v3_smal": [], "mobilenetv3": [], "search": [], "kera": 3, "mobilenet_v3_larg": [], "mobilenet_v3_small_r": [], "mobilenet_v3_large_r": [], "mobilenet_v3_small_orient": [], "magc_resnet31": [], "global": [], "context": [], "224": 3, "vit_": [], "visiontransform": [], "worth": [], "16x16": [], "patch": [], "unoffici": [], "config": [], "vit_b": [], "b": [], "textnet_tini": [], "textnet": [], "faster": [], "arbitrarili": [], "detector": [], "minimalist": [], "kernel": [], "czczup": [], "tini": [], "textnet_smal": [], "textnet_bas": [], "crop_orientation_predictor": [], "arch": [], "croporientationpredictor": [], "np": [3, 4], "classif_mobilenet_v3_smal": [], "input_crop": [], "rand": [3, 4], "600": [3, 4], "800": [3, 4], "astyp": [3, 4], "crop": 3, "dataset": [], "linknet_resnet18": [], "1024": 3, "linknet_resnet34": [], "linknet_resnet50": [], "db_resnet50": 3, "backbon": 3, "db_mobilenet_v3_larg": [], "mobilenet": [], "v3": [], "detection_predictor": [], "assume_straight_pag": [], "detectionpredictor": 3, "input_pag": [3, 4], "itself": [], "fit": [], "crnn_vgg16_bn": 3, "128": 3, "crnn_mobilenet_v3_smal": [], "crnn_mobilenet_v3_larg": [], "sar_resnet31": [], "31": [], "64": 3, "256": 3, "paper": [], "1910": [], "02562": [], "keywoard": [], "vitstr_smal": [], "vitstr_bas": [], "recognition_predictor": [], "recognitionpredictor": 3, "ocr_predictor": [], "det_arch": [], "reco_arch": [], "pretrained_backbon": [], "symmetric_pad": [], "export_as_straight_box": [], "detect_orient": [], "straighten_pag": [], "detect_languag": [], "ocrpredictor": 3, "up": [], "assum": [], "preserv": [], "ratio": [], "symmetr": [], "bottom": [], "final": [], "potenti": 3, "estim": [], "slightli": [], "deterior": [], "latenc": [], "median": [], "Then": [], "again": [], "improv": [], "kie_predictor": [], "kiepredictor": [], "kie": [], "login_to_hub": [], "login": [], "huggingfac": [], "hub": [], "from_hub": [], "repo_id": [], "instanti": [], "hf": [], "fasterrcnn_mobilenet_v3_large_fpn": [], "repo": [], "hf_hub_download": [], "snapshot_download": [], "checkpoint": [], "push_to_hf_hub": [], "model_nam": [], "save": [], "configur": [], "my": [], "procedur": [], "draw": [], "design": [], "torchvis": [], "resiz": 3, "bilinear": 3, "transfo": [], "minval": [], "interpol": 3, "zero": 3, "while": [], "done": [], "mean": [], "std": [], "gaussian": [], "distribut": [], "485": [], "456": [], "406": [], "229": [], "225": [], "averag": [], "per": [], "standard": [], "deviat": [], "lambdatransform": [], "fn": [], "lambda": [], "tograi": [], "num_output_channel": [], "grayscal": [], "colorinvers": [], "min_val": [], "tranform": [], "color": [], "shift": [], "randomli": [], "invert": [], "6": 2, "rang": [], "randombright": [], "max_delta": [], "adjust": [], "bright": [], "delta": [], "offset": [], "add": [], "pick": [], "p": [], "probabl": [], "randomcontrast": [], "contrast": [], "contrast_factor": [], "factor": [], "randomsatur": [], "satur": [], "hsv": [], "increas": [], "randomhu": [], "hue": [], "randomgamma": [], "min_gamma": [], "max_gamma": [], "min_gain": [], "max_gain": [], "gamma": [], "correct": [], "neg": [], "lower": [], "param": 3, "constant": [], "multipli": [], "randomjpegqu": [], "min_qual": [], "60": [], "max_qual": [], "jpeg": [], "qualiti": [], "dimension": [], "between": [], "randomrot": [], "max_angl": [], "expand": [], "degre": [], "uniformli": [], "randomcrop": [], "08": [], "75": [], "33": [], "min_area": [], "max_area": [], "min_ratio": [], "max_ratio": [], "gaussianblur": [], "kernel_shap": [], "blur": [], "min": [], "channelshuffl": [], "gaussiannois": [], "nois": [], "randomhorizontalflip": [], "flip": [], "int64": [], "randomshadow": [], "opacity_rang": [], "shade": [], "opac": [], "It": [], "consecut": 3, "sequenti": 3, "oneof": [], "jpegqual": [], "randomappli": [], "regroup": 4, "core": 4, "complementari": 4, "sens": 4, "visualize_pag": 4, "words_onli": 4, "display_artefact": [], "add_label": [], "figur": [], "block": [0, 3, 4], "plt": 4, "ocr_db_crnn": [3, 4], "artefact": 0, "figsiz": [], "largest": [], "side": [], "plot": [], "static": [], "top": [], "synthesize_pag": [], "draw_proba": [], "respons": [], "blank": [], "blue": [], "red": [], "font_siz": [], "13": [], "famili": [], "synthes": [], "metric": [], "assess": [], "textmatch": [], "match": [], "accuraci": 3, "aggreg": [], "foral": [], "y": [], "mathcal": [], "frac": [], "sum": [], "limits_": [], "f_": [], "y_i": [], "x_i": [], "indic": [], "defin": [], "f_a": [], "left": [], "begin": [], "ll": [], "mbox": [], "strictli": [], "integ": [], "updat": [], "hello": [], "world": [], "summari": [], "gt": [], "pred": [], "groung": [], "truth": [], "exact": [], "score": [], "counterpart": [], "unidecod": [], "localizationconfus": [], "iou_thresh": [], "mask_shap": [], "use_broadcast": [], "confus": [], "iou": [], "recal": 3, "g_": [], "precis": 3, "meaniou": [], "j": [], "y_j": [], "being": [], "intersect": [], "union": [], "g_x": [], "assign": [], "_i": [], "geq": [], "ground": [], "asarrai": [], "70": [], "110": [], "95": [], "200": [], "150": [], "pair": [], "broadcast": [], "consum": [], "memori": [], "either": 3, "ocrmetr": [], "l": [], "hat": [], "h_": [], "b_j": [], "l_j": [], "gt_box": [], "pred_box": [], "gt_label": [], "pred_label": [], "comparison": [], "detectionmetr": [], "c_j": [], "compil": [], "better": [], "leverag": [], "descript": [], "colab": [], "quicktour": [], "present": [], "main": [], "produc": 3, "searchabl": [], "don": [], "meet": [], "detail": [], "link": [], "section": [], "det_model": [], "load_weight": [], "path_to_checkpoint": [], "weight": [], "reco_model": [], "det_param": [], "path_to_pt": [], "map_loc": [], "load_state_dict": [], "reco_param": [], "vocab": [], "class_nam": [], "total": [], "date": [], "preprocessor": 3, "det_predictor": [], "798": [], "785": [], "772": [], "264": [], "2749": [], "287": [], "reco_predictor": [], "694": [], "695": [], "693": [], "299": [], "296": [], "301": [], "polici": [], "restrict": [], "write": [], "outsid": [], "tmp": [], "work": [], "step": [], "usag": 3, "multiprocess": [], "doctr_multiprocessing_dis": [], "variabl": [], "becaus": [], "shm": [], "share": [], "chang": [], "By": [], "doctr_cache_dir": [], "focu": [], "love": [], "appreci": [], "interfac": [], "io": [], "custom": [], "felix92": [], "db": [], "vgg16": 3, "bn": [], "plug": [], "obj_detect": [], "exist": [], "overwritten": [], "prerequisit": [], "creat": [], "co": [], "instal": 1, "git": 1, "lf": [], "my_awesome_model": [], "v1": [], "directli": 3, "after": [], "python3": [], "train_tensorflow": [], "py": [], "train_pytorch": [], "tabl": [], "pull": [], "dummi": [], "tilman": [], "rassi": [], "fascan": [], "evalu": [], "predefin": [], "prefer": [], "signific": [], "valid": [], "149": [], "626": [], "360": [], "2000": [], "3000": [], "249": [], "33402": [], "13068": [], "772875": [], "85875": [], "246": [], "233": [], "resourc": [], "7149": [], "796": [], "handwritten": [], "1268": [], "472": [], "21888": [], "8707": [], "33608": [], "19342": [], "uppercas": [], "19370": [], "2186": [], "257": [], "647": [], "73257": [], "26032": [], "7100000": [], "707470": [], "1156": [], "1107": [], "849": [], "1095": [], "207901": [], "22672": [], "7581382": [], "1337891": [], "7141797": [], "793533": [], "49377": [], "19598": [], "alwai": [], "regular": [], "2700": [], "300": [], "background": [], "qr_code": [], "bar_cod": [], "photo": [], "classif": 3, "mani": [], "sensit": [], "abl": [], "howev": [], "guidanc": [], "tool": [], "further": [], "anot": [], "handl": [], "underli": [], "defer": [], "dataload": [], "good": [], "achiev": [], "might": [], "tune": [], "thing": [], "product": [], "readi": [], "help": [], "support": [], "devic": [], "fp16": 3, "point": [], "occupi": [], "bit": [], "advantag": [], "less": [], "mixed_precis": [], "set_global_polici": [], "mixed_float16": [], "cuda": [], "re": [], "exchang": [], "interoper": [], "machin": [], "structur": 3, "layer": [], "metadata": [], "util": [1, 3], "export_model_to_onnx": [], "input_shap": 3, "dummy_input": [], "tensorspec": [], "model_path": [], "come": [], "soon": [], "seen": 3, "onc": 3, "separ": 3, "compon": 3, "charg": 3, "usabl": 3, "backend": 3, "along": [], "processor": [], "reusabl": 3, "consist": [], "delimit": [], "2d": [], "corner": [], "flag": [], "belong": [], "skew": [], "comprehens": [], "benchmark": [], "publicli": [], "sec": [], "25": [], "84": [], "39": [], "85": [], "86": [], "93": [], "83": [], "24": [], "80": [], "29": [], "90": [], "67": [], "76": [], "11": [], "81": [], "71": [], "7": [], "21": [], "82": [], "20": [], "49": [], "87": [], "63": [], "17": [], "28": [], "51": [], "46": [], "db_resnet34": [], "22": [], "89": [], "74": [], "56": [], "68": [], "92": [], "61": [], "41": [], "00": [], "79": [], "38": [], "88": [], "62": [], "26": [], "06": [], "78": [], "47": [], "54": [], "abov": [], "cf": [], "disclaim": [], "combin": 3, "199": [], "second": [], "warmup": [], "phase": [], "measur": [], "1000": [], "obtain": [], "11th": [], "gen": [], "intel": [], "r": [], "tm": [], "i7": [], "11800h": [], "30ghz": [], "wrap": [], "useabl": [], "favorit": [], "dummy_img": [], "area": [], "send": [], "snippet": [], "transcrib": [], "partial": [], "15": [], "9": [], "73": [], "44": [], "14": [], "55": [], "58": [], "57": [], "66": [], "01": [], "98": [], "23": [], "69": [], "99": [], "91": [], "05": [], "09": [], "96": [], "40": [], "53": [], "most": 3, "print": [], "cfg": [], "30595": [], "45": [], "72": [], "43": [], "65": [], "77": [], "30": [], "07": [], "27": [], "gvision": [], "59": [], "03": [], "azur": [], "recogn": [], "42": [], "go": [], "mention": [], "still": [], "return": [0, 3], "documentbuild": [], "resolve_lin": [], "automat": [], "resolve_block": [], "paragraph_break": [], "paragraph": [], "035": [], "nest": [], "get": [], "typic": [], "layout": [], "340": [], "text_output": [], "json_output": [], "1357421875": [], "0361328125": [], "8564453125": [], "8603515625": [], "914085328578949": [], "5478515625": [], "06640625": [], "5810546875": [], "0966796875": [], "9949972033500671": [], "51171875": [], "1630859375": [], "9578408598899841": [], "1396484375": [], "3232421875": [], "185546875": [], "3515625": [], "outpout": [], "xml": [], "hocr": [], "export_as_xml": [], "xml_output": [], "xml_bytes_str": [], "xml_element": [], "utf": [], "xmln": [], "w3": [], "1999": [], "xhtml": [], "lang": [], "en": [], "meta": [], "equiv": [], "charset": [], "system": [], "ocr_pag": [], "ocr_carea": [], "ocr_par": [], "ocr_lin": [], "ocrx_word": [], "div": [], "id": [], "page_1": [], "bbox": [], "3456": [], "ppageno": [], "block_1_1": [], "857": [], "529": [], "2504": [], "2710": [], "par_1_1": [], "span": [], "line_1_1": [], "x_size": [], "x_descend": [], "x_ascend": [], "word_1_1": [], "1552": [], "540": [], "1778": [], "580": [], "x_wconf": [], "word_1_2": [], "1782": [], "1900": [], "583": [], "word_1_3": [], "1420": [], "597": [], "1684": [], "641": [], "threshold": [], "region": [], "accur": [], "postprocessor": 3, "bin_thresh": [], "box_thresh": [], "hook": [], "manipul": [], "customhook": [], "def": [], "__call__": [], "self": [], "loc_pr": [], "Be": [], "awar": [], "my_hook": [], "middl": [], "pipelin": [], "add_hook": [], "execut": [], "file_path": 0, "read_img": 0, "seemlessli": 1, "conda": 1, "newer": 2, "developp": 2, "fp": 3, "scheme": 3, "deform": 3, "statist": 3, "turn": 3, "easier": 3, "let": 3, "db_resnet50_predictor": 3, "sar_vgg16_bn": 3, "rnn": 3, "enhanc": 3, "symbol": 3, "crnn_vgg16_bn_predictor": 3, "sar_vgg16_bn_predictor": 3, "16bn": 3, "convert_to_tflit": 3, "tf_model": 3, "tflite": 3, "conv_sequ": 3, "relu": 3, "kernel_s": 3, "serialized_model": 3, "convert_to_fp16": 3, "half": 3, "serial": 3, "quantize_model": 3, "quantiz": 3, "exclud": 3}, "objects": {"": [[1, 0, 0, "-", "doctr"]], "doctr.documents": [[0, 1, 1, "", "Artefact"], [0, 1, 1, "", "Block"], [0, 1, 1, "", "Document"], [0, 1, 1, "", "Line"], [0, 1, 1, "", "Page"], [0, 1, 1, "", "Word"], [0, 2, 1, "", "read_img"], [0, 2, 1, "", "read_pdf"]], "doctr.models.detection": [[3, 2, 1, "", "db_resnet50"], [3, 2, 1, "", "db_resnet50_predictor"]], "doctr.models.export": [[3, 2, 1, "", "convert_to_fp16"], [3, 2, 1, "", "convert_to_tflite"], [3, 2, 1, "", "quantize_model"]], "doctr.models.recognition": [[3, 2, 1, "", "crnn_vgg16_bn"], [3, 2, 1, "", "crnn_vgg16_bn_predictor"], [3, 2, 1, "", "sar_vgg16_bn"], [3, 2, 1, "", "sar_vgg16_bn_predictor"]], "doctr.models.zoo": [[3, 2, 1, "", "ocr_db_crnn"]], "doctr.utils.visualization": [[4, 2, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "function", "Python function"]}, "titleterms": {"changelog": [], "v0": [], "7": [], "0": [], "2024": [], "09": [], "6": [], "2022": [], "29": [], "5": [], "1": [], "03": [], "22": [], "2021": [], "12": [], "31": [], "4": [], "11": [], "10": [], "01": [], "3": [], "08": [], "27": [], "07": [], "02": [], "2": [], "05": [], "28": [], "18": [], "contributor": [], "coven": [], "code": [], "conduct": [], "our": [], "pledg": [], "standard": [], "enforc": [], "respons": [], "scope": [], "guidelin": [], "correct": [], "warn": [], "temporari": [], "ban": [], "perman": [], "attribut": [], "contribut": [], "doctr": [0, 1, 3, 4], "codebas": [], "structur": 0, "continu": [], "integr": [], "feedback": [], "featur": [], "request": [], "bug": [], "report": [], "question": [], "develop": [], "mode": [], "instal": 2, "commit": [], "unit": [], "test": [], "qualiti": [], "style": [], "verif": [], "modifi": [], "document": [0, 1], "let": [], "": [], "connect": [], "prerequisit": [], "via": 2, "python": 2, "packag": [1, 2], "git": 2, "text": [1, 3], "recognit": [1, 3], "main": [], "model": 3, "zoo": 3, "detect": 3, "support": [], "dataset": [], "arg": [], "synthet": [], "gener": [], "custom": [], "loader": [], "dataload": [], "vocab": [], "return": [], "io": [], "word": [], "line": [], "artefact": [], "block": [], "page": [], "file": 0, "read": 0, "classif": [], "factori": [], "transform": [], "compos": [], "util": 4, "visual": 4, "task": [], "evalu": [], "notebook": [], "train": [], "your": [], "own": [], "load": [], "aw": [], "lambda": [], "share": [], "commun": [], "from": [], "huggingfac": [], "hub": [], "push": [], "pretrain": [], "name": [], "convent": [], "choos": [], "readi": [], "us": [], "avail": [], "object": [], "data": [], "prepar": [], "infer": [], "optim": [], "half": [], "precis": [], "export": 3, "onnx": [], "right": [], "architectur": [], "predictor": 3, "end": 3, "ocr": 3, "two": 3, "stage": 3, "approach": 3, "what": [], "should": [], "i": [], "do": [], "output": 3, "advanc": [], "option": [], "get": 1, "start": 1, "conda": 2, "pre": 3, "process": 3, "post": 3}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"doctr.documents": [[0, "doctr-documents"]], "Document structure": [[0, "document-structure"]], "File reading": [[0, "file-reading"]], "DocTR: Document Text Recognition": [[1, "doctr-document-text-recognition"]], "Getting Started": [[1, null]], "Package Documentation": [[1, null]], "Installation": [[2, "installation"]], "Via Python Package": [[2, "via-python-package"]], "Via Conda": [[2, "via-conda"]], "Via Git": [[2, "via-git"]], "doctr.models": [[3, "doctr-models"]], "Text Detection": [[3, "text-detection"]], "Text detection model zoo": [[3, "id2"]], "Pre-processing for detection": [[3, "pre-processing-for-detection"]], "Detection models": [[3, "detection-models"]], "Post-processing outputs": [[3, "post-processing-outputs"], [3, "id1"]], "Detection predictors": [[3, "detection-predictors"]], "Text Recognition": [[3, "text-recognition"]], "Text recognition model zoo": [[3, "id3"]], "Pre-processing for recognition": [[3, "pre-processing-for-recognition"]], "Recognition models": [[3, "recognition-models"]], "Recognition predictors": [[3, "recognition-predictors"]], "End-to-End OCR": [[3, "end-to-end-ocr"]], "Two-stage approaches": [[3, "two-stage-approaches"]], "Model export": [[3, "model-export"]], "doctr.utils": [[4, "doctr-utils"]], "Visualization": [[4, "visualization"]]}, "indexentries": {"artefact (class in doctr.documents)": [[0, "doctr.documents.Artefact"]], "block (class in doctr.documents)": [[0, "doctr.documents.Block"]], "document (class in doctr.documents)": [[0, "doctr.documents.Document"]], "line (class in doctr.documents)": [[0, "doctr.documents.Line"]], "page (class in doctr.documents)": [[0, "doctr.documents.Page"]], "word (class in doctr.documents)": [[0, "doctr.documents.Word"]], "read_img() (in module doctr.documents)": [[0, "doctr.documents.read_img"]], "read_pdf() (in module doctr.documents)": [[0, "doctr.documents.read_pdf"]], "doctr": [[1, "module-doctr"]], "module": [[1, "module-doctr"]], "convert_to_fp16() (in module doctr.models.export)": [[3, "doctr.models.export.convert_to_fp16"]], "convert_to_tflite() (in module doctr.models.export)": [[3, "doctr.models.export.convert_to_tflite"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[3, "doctr.models.recognition.crnn_vgg16_bn"]], "crnn_vgg16_bn_predictor() (in module doctr.models.recognition)": [[3, "doctr.models.recognition.crnn_vgg16_bn_predictor"]], "db_resnet50() (in module doctr.models.detection)": [[3, "doctr.models.detection.db_resnet50"]], "db_resnet50_predictor() (in module doctr.models.detection)": [[3, "doctr.models.detection.db_resnet50_predictor"]], "ocr_db_crnn() (in module doctr.models.zoo)": [[3, "doctr.models.zoo.ocr_db_crnn"]], "quantize_model() (in module doctr.models.export)": [[3, "doctr.models.export.quantize_model"]], "sar_vgg16_bn() (in module doctr.models.recognition)": [[3, "doctr.models.recognition.sar_vgg16_bn"]], "sar_vgg16_bn_predictor() (in module doctr.models.recognition)": [[3, "doctr.models.recognition.sar_vgg16_bn_predictor"]], "visualize_page() (in module doctr.utils.visualization)": [[4, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file diff --git a/v0.1.0/using_doctr/using_models.html b/v0.1.0/using_doctr/using_models.html index b2e6a5d739..b471cdc1f1 100644 --- a/v0.1.0/using_doctr/using_models.html +++ b/v0.1.0/using_doctr/using_models.html @@ -836,6 +836,17 @@

Two-stage approachesmodel = ocr_predictor('linknet_resnet18', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True)

+

To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying DocumentBuilder:

+
    +
  • resolve_lines: whether words should be automatically grouped into lines (default: True)

  • +
  • resolve_blocks: whether lines should be automatically grouped into blocks (default: True)

  • +
  • paragraph_break: relative length of the minimum space separating paragraphs (default: 0.035)

  • +
+

For example to disable the automatic grouping of lines into blocks:

+
from doctr.model import ocr_predictor
+model = ocr_predictor(pretrained=True, resolve_blocks=False)
+
+

What should I do with the output?#

@@ -859,6 +870,14 @@

What should I do with the output?) +

To get only the text content of the Document, you can use the render method:

+
text_output = result.render()
+
+
+

For reference, here is the output for the Document above:

+
No. RECEIPT DATE
+
+

You can also export them as a nested dict, more appropriate for JSON format:

json_output = result.export()
 
diff --git a/v0.1.1/_sources/using_doctr/using_models.rst.txt b/v0.1.1/_sources/using_doctr/using_models.rst.txt index 208e0956bb..27c087096a 100644 --- a/v0.1.1/_sources/using_doctr/using_models.rst.txt +++ b/v0.1.1/_sources/using_doctr/using_models.rst.txt @@ -279,6 +279,19 @@ For instance, this snippet instantiates an end-to-end ocr_predictor working with from doctr.model import ocr_predictor model = ocr_predictor('linknet_resnet18', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True) +To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying `DocumentBuilder`: + +* `resolve_lines`: whether words should be automatically grouped into lines (default: True) +* `resolve_blocks`: whether lines should be automatically grouped into blocks (default: True) +* `paragraph_break`: relative length of the minimum space separating paragraphs (default: 0.035) + +For example to disable the automatic grouping of lines into blocks: + +.. code:: python3 + + from doctr.model import ocr_predictor + model = ocr_predictor(pretrained=True, resolve_blocks=False) + What should I do with the output? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -304,6 +317,14 @@ Here is a typical `Document` layout:: )] ) +To get only the text content of the `Document`, you can use the `render` method:: + + text_output = result.render() + +For reference, here is the output for the `Document` above:: + + No. RECEIPT DATE + You can also export them as a nested dict, more appropriate for JSON format:: json_output = result.export() diff --git a/v0.1.1/searchindex.js b/v0.1.1/searchindex.js index 59537ad2b5..e16c39c106 100644 --- a/v0.1.1/searchindex.js +++ b/v0.1.1/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["datasets", "documents", "index", "installing", "models", "utils"], "filenames": ["datasets.rst", "documents.rst", "index.rst", "installing.rst", "models.rst", "utils.rst"], "titles": ["doctr.datasets", "doctr.documents", "DocTR: Document Text Recognition", "Installation", "doctr.models", "doctr.utils"], "terms": {"releas": 3, "note": [], "we": [], "member": [], "leader": [], "make": [4, 5], "particip": [], "commun": [], "harass": [], "free": [], "experi": [], "everyon": [], "regardless": [], "ag": [], "bodi": [], "size": [1, 4], "visibl": [], "invis": [], "disabl": [], "ethnic": [], "sex": [], "characterist": [], "gender": [], "ident": [], "express": [], "level": [4, 5], "educ": [], "socio": [], "econom": [], "statu": [], "nation": [], "person": [], "appear": [], "race": [], "religion": [], "sexual": [], "orient": 1, "act": [], "interact": [], "wai": 2, "contribut": [], "an": [0, 1, 2, 4, 5], "open": [], "welcom": [], "divers": [], "inclus": [], "healthi": [], "exampl": [0, 1, 4, 5], "behavior": [], "posit": 5, "environ": [], "includ": 4, "demonstr": [], "empathi": [], "kind": [], "toward": [], "other": [], "peopl": [], "Being": [], "respect": [], "differ": [], "opinion": [], "viewpoint": [], "give": [], "gracefulli": [], "accept": [], "construct": [], "feedback": [], "apolog": [], "those": 4, "affect": [], "mistak": [], "learn": 4, "from": [0, 1, 2, 4, 5], "focus": [], "what": [], "i": [0, 4, 5], "best": [], "just": [], "u": [], "individu": [], "overal": [], "unaccept": [], "The": [0, 1, 4, 5], "us": [3, 4, 5], "languag": [1, 2], "imageri": [], "attent": [], "advanc": [], "ani": [0, 1, 4, 5], "troll": [], "insult": [], "derogatori": [], "comment": [], "polit": [], "attack": [], "public": [], "privat": [], "publish": [], "inform": [2, 4], "physic": [], "email": [], "address": [], "without": 4, "explicit": [], "permiss": [], "which": 4, "could": [], "reason": [], "consid": 5, "inappropri": [], "profession": [], "set": [0, 4, 5], "ar": [0, 1, 4, 5], "clarifi": [], "take": [], "appropri": [], "fair": [], "action": [], "thei": [], "deem": [], "threaten": [], "offens": [], "harm": [], "have": [0, 4, 5], "right": [4, 5], "remov": [], "edit": [], "reject": [], "commit": [], "wiki": [], "issu": [], "align": [], "thi": [3, 4, 5], "moder": [], "decis": [], "when": 5, "appli": [], "within": [], "all": [0, 1, 4, 5], "space": [], "also": [], "offici": [], "repres": 4, "e": 3, "mail": [], "post": [], "via": 2, "social": [], "media": [], "account": [], "appoint": [], "onlin": [], "offlin": [], "event": [], "instanc": 4, "abus": [], "otherwis": 5, "mai": [], "report": [], "contact": [], "minde": 3, "com": 3, "complaint": [], "review": [], "investig": [], "promptli": [], "fairli": [], "oblig": [], "privaci": [], "secur": [], "incid": [], "follow": [4, 5], "impact": [], "determin": [], "consequ": [], "violat": [], "unprofession": [], "unwelcom": [], "A": [0, 4], "written": [], "provid": [2, 4], "clariti": [], "around": 4, "natur": 2, "explan": 4, "why": [], "wa": [], "apologi": [], "request": [], "through": 0, "singl": [], "seri": [], "continu": [], "No": [], "involv": 4, "unsolicit": [], "specifi": 1, "period": [], "time": [0, 4, 5], "avoid": [], "well": [], "extern": [], "channel": [1, 4], "like": [], "term": [], "lead": [], "seriou": [], "sustain": [], "sort": [], "allow": [], "dure": [], "pattern": [], "aggress": [], "disparag": [], "class": [0, 1, 5], "adapt": [], "version": 4, "0": [0, 4, 5], "avail": 4, "http": 3, "www": [], "org": [], "_": [], "html": [], "were": [], "inspir": [], "mozilla": [], "": [1, 5], "ladder": [], "For": 4, "answer": [], "common": 5, "question": [], "about": 4, "see": [], "faq": [], "translat": [], "everyth": [], "you": 4, "need": 5, "know": [], "effici": 0, "project": [], "packag": 5, "python": 2, "doc": 1, "librari": 3, "build": [], "script": [], "refer": [], "train": [0, 4], "demo": [], "small": [], "app": [], "showcas": [], "capabl": 4, "api": [], "minim": [], "templat": [], "deploi": [], "rest": 5, "ensur": [], "proper": [], "mainten": [], "github": 3, "worklow": [], "run": [], "job": [], "coverag": [], "codecov": [], "back": [], "result": 1, "As": [], "contributor": [], "onli": 5, "your": [0, 1, 2, 5], "ad": [], "whether": [0, 1, 5], "encount": [], "problem": [], "suggest": [], "input": [1, 4], "ha": [0, 5], "valu": 1, "can": [0, 4], "purpos": 4, "advis": [], "first": [], "check": [], "topic": [], "wasn": [], "t": [], "alreadi": [], "cover": [], "close": [], "If": [1, 4], "feel": [], "new": [], "one": [0, 4], "do": [], "so": [], "whenev": [], "possibl": 5, "enough": 4, "jump": [], "wonder": [], "how": [], "someth": [], "more": [], "gener": [], "should": [0, 1, 5], "out": [4, 5], "discuss": [], "q": [], "forum": [], "specif": 5, "stackoverflow": [], "addit": [], "depend": [], "command": [], "m": 5, "pip": 3, "upgrad": [], "dev": [], "pre": [], "docstr": [], "In": 4, "pleas": [], "googl": 4, "eas": [], "process": [1, 2], "later": [], "messag": [], "udac": [], "guid": [], "order": [1, 4], "same": 5, "ci": [], "workflow": [], "unittest": [], "local": [4, 5], "To": [], "togeth": 4, "current": [], "built": [], "sphinx": [], "thank": [], "our": [], "file": 0, "been": [4, 5], "rebuilt": [], "want": [], "forc": [], "complet": [], "rebuild": [], "delet": [], "_build": [], "directori": [], "addition": [], "clear": [], "web": [], "browser": [], "cach": [], "modif": [], "now": [], "locat": [], "index": 1, "wish": [], "somewher": [], "els": [], "than": 5, "join": [], "slack": [], "where": [1, 5], "find": [], "requir": 3, "3": [1, 3, 4, 5], "8": [], "higher": [], "whichev": [], "o": [], "least": [], "tensorflow": 4, "pytorch": [], "correspond": 4, "page": [1, 4, 5], "2": 5, "macbook": [], "m1": [], "chip": [], "some": [], "metal": [], "plugin": [], "1": [4, 5], "12": [], "anoth": [], "linux": [], "few": [], "extra": [], "maco": [], "user": 1, "them": 0, "homebrew": [], "brew": [], "cairo": [], "pango": [], "gdk": [], "pixbuf": [], "libffi": [], "window": [], "gtk": [], "latest": [], "over": 5, "here": 0, "last": [3, 4], "stabl": 3, "doctr": 3, "strive": [], "reduc": [], "framework": 0, "minimum": 5, "necessari": [], "featur": [4, 5], "develop": [], "third": [], "parti": [], "miss": [], "tf": 4, "torch": [], "mode": 3, "clone": 3, "state": 2, "art": 2, "optic": 4, "charact": [4, 5], "made": [], "seamless": [], "access": [0, 1], "anyon": [], "power": 2, "easi": [2, 5], "extract": [0, 2, 4], "valuabl": 2, "autom": 2, "seamlessli": [], "understand": [0, 2], "task": [2, 4], "ocr": 5, "predictor": [], "pars": [], "textual": [0, 1], "identifi": 4, "each": [1, 4], "word": [1, 4, 5], "research": 2, "quickli": 2, "compar": 2, "own": 2, "architectur": [2, 4], "speed": [], "perform": [1, 2, 4, 5], "robust": [], "stage": [], "pretrain": [4, 5], "paramet": [0, 1, 4, 5], "friendli": [], "line": [1, 5], "code": [], "load": [], "googlevis": [], "aw": 4, "textract": 4, "optim": [], "infer": [], "both": 4, "cpu": [], "gpu": [], "light": [], "activ": [], "maintain": [], "integr": [], "deploy": [], "dbnet": 4, "real": 4, "scene": 4, "differenti": 4, "binar": 4, "linknet": [], "exploit": [], "encod": 0, "represent": [], "semant": [], "segment": 4, "sar": 4, "show": [4, 5], "attend": 4, "read": 4, "simpl": 4, "strong": 4, "baselin": 4, "irregular": 4, "crnn": 4, "end": 5, "trainabl": [], "neural": [], "network": [], "imag": [1, 4, 5], "base": [], "sequenc": [4, 5], "Its": [], "applic": [], "master": [], "multi": [], "aspect": [], "non": [1, 5], "vitstr": [], "vision": 4, "transform": [], "fast": [], "parseq": [], "permut": [], "autoregress": [], "funsd": [0, 4], "form": 0, "noisi": 0, "scan": 0, "cord": [], "consolid": [], "receipt": [], "forpost": [], "sroie": [], "icdar": [], "2019": [], "iiit": [], "5k": [], "cvit": [], "street": [], "view": [], "synthtext": [], "visual": [], "geometri": 1, "group": [], "svhn": [], "digit": [], "unsupervis": [], "ic03": [], "2003": [], "ic13": [], "2013": [], "imgur5k": [], "textstylebrush": [], "transfer": [], "aesthet": [], "mjsynth": [], "synthet": [], "data": [1, 4], "artifici": [], "iiithw": [], "wildreceipt": [], "spatial": [], "dual": [], "modal": [], "graph": [], "kei": [], "bool": [0, 1, 4, 5], "true": [0, 1, 4, 5], "use_polygon": [], "fals": [0, 4, 5], "recognition_task": [], "kwarg": [0, 1, 4], "sourc": [0, 1, 4, 5], "document": [0, 4, 5], "import": [0, 1, 4, 5], "train_set": 0, "download": 0, "img": 0, "target": [0, 4], "subset": [0, 4], "polygon": [], "rotat": 1, "bound": [1, 4, 5], "box": [1, 4, 5], "instead": 1, "straight": [], "ones": [], "recognit": 5, "keyword": 0, "argument": 0, "visiondataset": 0, "icdar2019": [], "competit": [], "iiit5k": [], "bmvc": [], "2012": [], "text": 1, "prior": [], "svt": [], "ucsd": [], "comput": 5, "hous": [], "number": 5, "localis": [], "repositori": [], "websit": [], "entri": [], "futur": [], "direct": [], "img_fold": [], "str": [0, 1, 5], "label_fold": [], "label": [], "part": [], "challeng": [], "task2": [], "2015": [], "path": 1, "challenge2_training_task12_imag": [], "challenge2_training_task1_gt": [], "test_set": [], "challenge2_test_task12_imag": [], "challenge2_test_task1_gt": [], "folder": [], "annot": [], "abstractdataset": [], "label_path": [], "handwrit": [], "dataset_info": [], "imgur5k_annot": [], "json": [], "pure": [], "mnt": [], "ramdisk": [], "max": 5, "90kdict32px": [], "imlist": [], "txt": [], "hw": [], "images_90k_norm": [], "90k": [], "docartefact": [], "object": 0, "detect": [], "element": [1, 4], "varieti": [], "arxiv": [], "ab": [], "2103": [], "14470v1": [], "test": [], "charactergener": [], "implement": [0, 1, 4, 5], "d": [], "abdef": [], "num_sampl": [], "100": 5, "vocabulari": [], "sampl": [], "iter": [], "cache_sampl": [], "firsthand": [], "font_famili": [], "font": [], "img_transform": [], "compos": 4, "sample_transform": [], "wordgener": [], "min_char": [], "int": [1, 4, 5], "max_char": [], "list": 1, "none": [0, 1, 5], "callabl": [], "tupl": [1, 4], "32": 4, "maximum": 5, "detectiondataset": [], "recognitiondataset": [], "labels_path": [], "contain": [], "ocrdataset": [], "label_fil": [], "jpg": 1, "root": [], "shuffl": [], "batch_siz": [], "drop_last": [], "num_work": [], "collate_fn": [], "wrapper": [], "train_load": [], "train_it": [], "next": [], "befor": [], "pass": 4, "batch": 4, "drop": [], "isn": [], "full": [4, 5], "worker": [], "function": [4, 5], "merg": [], "sinc": 0, "content": [0, 1], "properli": 0, "model": [0, 2, 5], "interpret": 0, "multipl": 0, "name": 0, "10": [], "0123456789": [], "hindi_digit": [], "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "ascii_lett": [], "52": [], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": [], "punctuat": [], "currenc": [], "5": 5, "ancient_greek": [], "48": [], "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": [], "arabic_lett": [], "37": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": [], "persian_lett": [], "\u067e\u0686\u06a2\u06a4\u06af": [], "arabic_diacrit": [], "arabic_punctu": [], "latin": [], "94": [], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": [], "english": [], "legacy_french": [], "123": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": [], "french": [], "126": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": [], "portugues": [], "131": [], "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": [], "spanish": [], "116": [], "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": [], "italian": [], "120": [], "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": [], "german": [], "108": [], "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": [], "arab": [], "101": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": [], "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "czech": [], "130": [], "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": [], "polish": [], "118": [], "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": [], "dutch": [], "114": [], "norwegian": [], "106": [], "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": [], "danish": [], "finnish": [], "104": [], "\u00e4\u00f6\u00e4\u00f6": [], "swedish": [], "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": [], "vietnames": [], "234": [], "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": [], "hebrew": [], "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": [], "multilingu": [], "195": [], "encode_sequ": [], "target_s": [], "eo": [], "pad": 4, "dynamic_seq_length": [], "ndarrai": [1, 5], "given": [0, 4, 5], "map": 4, "n": 5, "length": [], "Of": [], "string": [1, 4], "option": [], "start": [], "case": 5, "upper": [], "enabl": 1, "dynam": [], "tensor": 4, "modul": [1, 4, 5], "easili": [1, 5], "export": [1, 5], "analysi": [1, 4], "format": [1, 4], "organ": 1, "uninterrupt": [], "confid": 1, "float": [1, 5], "associ": 1, "predict": [1, 5], "xmin": 1, "ymin": 1, "xmax": 1, "ymax": 1, "coordin": 1, "rel": 1, "collect": 1, "meant": [], "two": [], "column": [], "horizont": [], "resolv": 1, "default": [1, 4], "smallest": 1, "enclos": 1, "g": [], "qr": [], "pictur": [], "chart": [], "signatur": [], "logo": [], "etc": [], "artefact_typ": 1, "type": [1, 4], "sever": [], "its": [], "titl": [], "underneath": [], "page_idx": 1, "dimens": [1, 4, 5], "dict": [1, 5], "numpi": [1, 4, 5], "arrai": 5, "uint8": [4, 5], "raw": 1, "pixel": 1, "height": 1, "width": 1, "dictionari": 1, "angl": 1, "degress": 1, "preserve_aspect_ratio": [], "overlai": [], "displai": 5, "matplotlib": 5, "pyplot": 5, "method": [], "high": 1, "convers": 1, "read_pdf": 1, "byte": 4, "scale": [], "rgb_mode": [], "password": [], "pdf": 1, "convert": [1, 4], "render": [], "72dpi": [], "output": 1, "rgb": 1, "bgr": 1, "unlock": [], "encrypt": [], "pypdfium2": [], "pdfpage": [], "decod": 1, "shape": [1, 4, 5], "h": 1, "x": [1, 5], "w": [1, 5], "c": [], "read_img_as_numpi": [], "output_s": 1, "rgb_output": 1, "expect": [0, 1, 4], "read_img_as_tensor": [], "img_path": [], "dtype": 4, "float32": 4, "desir": [], "relat": [], "divid": [], "255": [4, 5], "decode_img_as_tensor": [], "img_cont": [], "stream": [], "read_html": [], "url": 0, "yoursit": [], "weasyprint": [], "documentfil": [], "extens": [], "classmethod": [], "from_pdf": [], "binari": 4, "from_url": [], "from_imag": [], "page1": [], "png": [], "page2": [], "vgg16_bn_r": [], "vgg": 4, "16": 4, "describ": 4, "veri": [], "deep": 4, "convolut": 4, "larg": [], "modifi": [], "normal": 4, "rectangular": [], "pool": [], "simpler": [], "head": [], "input_tensor": 4, "random": [4, 5], "uniform": 4, "512": [], "maxval": 4, "imagenet": 4, "extractor": 4, "resnet18": [], "resnet": 4, "18": [], "residu": [], "boolean": [], "resnet34": [], "34": [], "resnet50": [], "50": 4, "resnet31": [], "downsiz": [], "4": [], "mobilenet_v3_smal": [], "mobilenetv3": [], "search": [], "kera": 4, "mobilenet_v3_larg": [], "mobilenet_v3_small_r": [], "mobilenet_v3_large_r": [], "mobilenet_v3_small_orient": [], "magc_resnet31": [], "global": [], "context": [], "224": 4, "vit_": [], "visiontransform": [], "worth": [], "16x16": [], "patch": [], "unoffici": [], "config": [], "vit_b": [], "b": 5, "textnet_tini": [], "textnet": [], "faster": [], "arbitrarili": [], "detector": [], "minimalist": [], "kernel": [], "czczup": [], "tini": [], "textnet_smal": [], "textnet_bas": [], "crop_orientation_predictor": [], "arch": [], "croporientationpredictor": [], "np": [4, 5], "classif_mobilenet_v3_smal": [], "input_crop": [], "rand": [4, 5], "600": [4, 5], "800": [4, 5], "astyp": [4, 5], "crop": 4, "dataset": [2, 4], "linknet_resnet18": [], "1024": 4, "linknet_resnet34": [], "linknet_resnet50": [], "db_resnet50": 4, "backbon": 4, "db_mobilenet_v3_larg": [], "mobilenet": [], "v3": [], "detection_predictor": [], "assume_straight_pag": [], "detectionpredictor": 4, "input_pag": [4, 5], "itself": [], "fit": [], "crnn_vgg16_bn": 4, "128": 4, "crnn_mobilenet_v3_smal": [], "crnn_mobilenet_v3_larg": [], "sar_resnet31": 4, "31": 4, "64": 4, "256": 4, "paper": [], "1910": [], "02562": [], "keywoard": [], "vitstr_smal": [], "vitstr_bas": [], "recognition_predictor": [], "recognitionpredictor": 4, "ocr_predictor": [], "det_arch": [], "reco_arch": [], "pretrained_backbon": [], "symmetric_pad": [], "export_as_straight_box": [], "detect_orient": [], "straighten_pag": [], "detect_languag": [], "ocrpredictor": 4, "up": [], "assum": [], "preserv": [], "ratio": [], "symmetr": [], "bottom": [], "final": [], "potenti": 4, "estim": [], "slightli": [], "deterior": [], "latenc": [], "median": [], "Then": [], "again": [], "improv": [], "kie_predictor": [], "kiepredictor": [], "kie": [], "login_to_hub": [], "login": [], "huggingfac": [], "hub": [], "from_hub": [], "repo_id": [], "instanti": [], "hf": [], "fasterrcnn_mobilenet_v3_large_fpn": [], "repo": [], "hf_hub_download": [], "snapshot_download": [], "checkpoint": [], "push_to_hf_hub": [], "model_nam": [], "save": 0, "configur": [], "my": [], "procedur": [], "draw": [], "design": [], "torchvis": [], "resiz": 4, "bilinear": 4, "transfo": [], "minval": [], "interpol": 4, "zero": 4, "while": [], "done": [], "mean": 5, "std": [], "gaussian": [], "distribut": [], "485": [], "456": [], "406": [], "229": [], "225": [], "averag": [], "per": [], "standard": [], "deviat": [], "lambdatransform": [], "fn": [], "lambda": [], "tograi": [], "num_output_channel": [], "grayscal": [], "colorinvers": [], "min_val": [], "tranform": [], "color": [], "shift": [], "randomli": [], "invert": [], "6": 3, "rang": [], "randombright": [], "max_delta": [], "adjust": [], "bright": [], "delta": [], "offset": [], "add": [], "pick": [], "p": [], "probabl": [], "randomcontrast": [], "contrast": [], "contrast_factor": [], "factor": [], "randomsatur": [], "satur": [], "hsv": [], "increas": [], "randomhu": [], "hue": [], "randomgamma": [], "min_gamma": [], "max_gamma": [], "min_gain": [], "max_gain": [], "gamma": [], "correct": [], "neg": [], "lower": [], "param": 4, "constant": [], "multipli": [], "randomjpegqu": [], "min_qual": [], "60": [], "max_qual": [], "jpeg": [], "qualiti": [], "dimension": [], "between": 5, "randomrot": [], "max_angl": [], "expand": [], "degre": [], "uniformli": [], "randomcrop": [], "08": [], "75": [], "33": [], "min_area": [], "max_area": [], "min_ratio": [], "max_ratio": [], "gaussianblur": [], "kernel_shap": [], "blur": [], "min": [], "channelshuffl": [], "gaussiannois": [], "nois": [], "randomhorizontalflip": [], "flip": [], "int64": [], "randomshadow": [], "opacity_rang": [], "shade": [], "opac": [], "It": [], "consecut": 4, "sequenti": 4, "oneof": [], "jpegqual": [], "randomappli": [], "regroup": 5, "core": [0, 5], "complementari": 5, "sens": 5, "visualize_pag": 5, "words_onli": 5, "display_artefact": [], "add_label": [], "figur": [], "block": [1, 4, 5], "plt": 5, "ocr_db_crnn": 5, "artefact": 1, "figsiz": [], "largest": [], "side": [], "plot": [], "static": [], "top": [], "synthesize_pag": [], "draw_proba": [], "respons": [], "blank": [], "blue": [], "red": [], "font_siz": [], "13": [], "famili": [], "synthes": [], "metric": [4, 5], "assess": 5, "textmatch": [], "match": 5, "accuraci": [4, 5], "aggreg": 5, "foral": 5, "y": 5, "mathcal": 5, "frac": 5, "sum": 5, "limits_": 5, "f_": 5, "y_i": 5, "x_i": 5, "indic": 5, "defin": 5, "f_a": 5, "left": 5, "begin": 5, "ll": 5, "mbox": 5, "strictli": 5, "integ": 5, "updat": 5, "hello": 5, "world": 5, "summari": 5, "gt": [], "pred": [], "groung": [], "truth": 5, "exact": 5, "score": [], "counterpart": [], "unidecod": [], "localizationconfus": 5, "iou_thresh": 5, "mask_shap": [], "use_broadcast": [], "confus": 5, "iou": 5, "recal": [4, 5], "g_": 5, "precis": [4, 5], "meaniou": 5, "j": 5, "y_j": 5, "being": [4, 5], "intersect": 5, "union": 5, "g_x": 5, "assign": 5, "_i": 5, "geq": 5, "ground": 5, "asarrai": 5, "70": 5, "110": 5, "95": 5, "200": 5, "150": 5, "pair": 5, "broadcast": [], "consum": [], "memori": [], "either": 4, "ocrmetr": 5, "l": 5, "hat": 5, "h_": 5, "b_j": 5, "l_j": 5, "gt_box": [], "pred_box": [], "gt_label": [], "pred_label": [], "comparison": [], "detectionmetr": [], "c_j": [], "compil": [], "better": [], "leverag": [], "descript": [], "colab": [], "quicktour": [], "present": 0, "main": [], "produc": 4, "searchabl": [], "don": [], "meet": [], "detail": [], "link": [], "section": [], "det_model": [], "load_weight": [], "path_to_checkpoint": [], "weight": [], "reco_model": [], "det_param": [], "path_to_pt": [], "map_loc": [], "load_state_dict": [], "reco_param": [], "vocab": [], "class_nam": [], "total": [], "date": [], "preprocessor": 4, "det_predictor": [], "798": [], "785": [], "772": [], "264": [], "2749": [], "287": [], "reco_predictor": [], "694": [], "695": [], "693": [], "299": [], "296": [], "301": [], "polici": [], "restrict": [], "write": [], "outsid": [], "tmp": [], "work": [], "step": [], "usag": 4, "multiprocess": [], "doctr_multiprocessing_dis": [], "variabl": [], "becaus": [], "shm": [], "share": [], "chang": [], "By": [], "doctr_cache_dir": [], "focu": [], "love": [], "appreci": [], "interfac": [], "io": [], "custom": [], "felix92": [], "db": [], "vgg16": 4, "bn": [], "plug": [], "obj_detect": [], "exist": [], "overwritten": [], "prerequisit": [], "creat": [], "co": [], "instal": 2, "git": 2, "lf": [], "my_awesome_model": [], "v1": [], "directli": 4, "after": [], "python3": [], "train_tensorflow": [], "py": [], "train_pytorch": [], "tabl": [], "pull": [], "dummi": [], "tilman": [], "rassi": [], "fascan": [], "evalu": [0, 4], "predefin": 0, "prefer": 0, "signific": 0, "valid": [], "149": [], "626": [], "360": [], "2000": [], "3000": [], "249": [], "33402": [], "13068": [], "772875": [], "85875": [], "246": [], "233": [], "resourc": [], "7149": [], "796": [], "handwritten": [], "1268": [], "472": [], "21888": [], "8707": [], "33608": [], "19342": [], "uppercas": [], "19370": [], "2186": [], "257": [], "647": [], "73257": [], "26032": [], "7100000": [], "707470": [], "1156": [], "1107": [], "849": [], "1095": [], "207901": [], "22672": [], "7581382": [], "1337891": [], "7141797": [], "793533": [], "49377": [], "19598": [], "alwai": [], "regular": [], "2700": [], "300": [], "background": [], "qr_code": [], "bar_cod": [], "photo": [], "classif": 4, "mani": [], "sensit": [], "abl": [], "howev": [], "guidanc": [], "tool": [], "further": [], "anot": [], "handl": 0, "underli": [], "defer": [], "dataload": [], "good": [], "achiev": [], "might": 4, "tune": [], "thing": [], "product": [], "readi": [], "help": [], "support": [], "devic": [], "fp16": 4, "point": [], "occupi": [], "bit": [], "advantag": [], "less": [], "mixed_precis": [], "set_global_polici": [], "mixed_float16": [], "cuda": [], "re": 0, "exchang": [], "interoper": [], "machin": [], "structur": 4, "layer": [], "metadata": [], "util": [2, 4], "export_model_to_onnx": [], "input_shap": 4, "dummy_input": [], "tensorspec": [], "model_path": [], "come": [], "soon": [], "seen": 4, "onc": [0, 4], "separ": 4, "compon": 4, "charg": 4, "usabl": 4, "backend": 4, "along": [], "processor": [], "reusabl": 4, "consist": [], "delimit": [], "2d": [], "corner": [], "flag": [], "belong": [], "skew": [], "comprehens": [], "benchmark": [], "publicli": [], "sec": [], "25": [], "84": [], "39": [], "85": [], "86": [], "93": [], "83": [], "24": [], "80": [], "29": [], "90": [], "67": [], "76": [], "11": [], "81": [], "71": [], "7": [], "21": [], "82": [], "20": [], "49": [], "87": [], "63": [], "17": [], "28": [], "51": [], "46": [], "db_resnet34": [], "22": [], "89": [], "74": [], "56": [], "68": [], "92": [], "61": [], "41": [], "00": [], "79": [], "38": [], "88": [], "62": [], "26": [], "06": [], "78": [], "47": [], "54": [], "abov": 4, "cf": 4, "disclaim": 4, "combin": 4, "199": 4, "second": [], "warmup": [], "phase": [], "measur": [], "1000": [], "obtain": [], "11th": [], "gen": [], "intel": [], "r": [], "tm": [], "i7": [], "11800h": [], "30ghz": [], "wrap": [], "useabl": [], "favorit": [], "dummy_img": [], "area": [], "send": [], "snippet": [], "transcrib": [], "partial": [], "15": [], "9": [], "73": [], "44": [], "14": [], "55": [], "58": [], "57": [], "66": [], "01": [], "98": [], "23": [], "69": [], "99": [], "91": [], "05": [], "09": [], "96": [], "40": [], "53": [], "most": 4, "print": [], "cfg": [], "30595": 4, "45": [], "72": [], "43": [], "65": [], "77": [], "30": [], "07": [], "27": [], "gvision": [], "59": [], "03": [], "azur": [], "recogn": [], "42": [], "go": [], "mention": [], "still": [], "return": [1, 4], "nest": [], "get": [], "typic": [], "layout": [], "340": [], "json_output": [], "1357421875": [], "0361328125": [], "8564453125": [], "8603515625": [], "914085328578949": [], "5478515625": [], "06640625": [], "5810546875": [], "0966796875": [], "9949972033500671": [], "51171875": [], "1630859375": [], "9578408598899841": [], "1396484375": [], "3232421875": [], "185546875": [], "3515625": [], "outpout": [], "xml": [], "hocr": [], "export_as_xml": [], "xml_output": [], "xml_bytes_str": [], "xml_element": [], "utf": [], "xmln": [], "w3": [], "1999": [], "xhtml": [], "lang": [], "en": [], "meta": [], "equiv": [], "charset": [], "system": [], "ocr_pag": [], "ocr_carea": [], "ocr_par": [], "ocr_lin": [], "ocrx_word": [], "div": [], "id": [], "page_1": [], "bbox": [], "3456": [], "ppageno": [], "block_1_1": [], "857": [], "529": [], "2504": [], "2710": [], "par_1_1": [], "span": [], "line_1_1": [], "x_size": [], "x_descend": [], "x_ascend": [], "word_1_1": [], "1552": [], "540": [], "1778": [], "580": [], "x_wconf": [], "word_1_2": [], "1782": [], "1900": [], "583": [], "word_1_3": [], "1420": [], "597": [], "1684": [], "641": [], "threshold": [], "region": [], "accur": [], "postprocessor": 4, "bin_thresh": [], "box_thresh": [], "hook": [], "manipul": [], "customhook": [], "def": [], "__call__": [], "self": [], "loc_pr": [], "Be": [], "awar": [], "my_hook": [], "middl": [], "pipelin": [], "add_hook": [], "execut": [], "file_path": 1, "read_img": 1, "seemlessli": 2, "conda": [], "newer": 3, "developp": 3, "fp": 4, "scheme": 4, "deform": 4, "statist": 4, "turn": 4, "easier": 4, "let": 4, "db_resnet50_predictor": 4, "sar_vgg16_bn": 4, "rnn": 4, "enhanc": 4, "symbol": 4, "crnn_vgg16_bn_predictor": 4, "sar_vgg16_bn_predictor": 4, "16bn": 4, "convert_to_tflit": 4, "tf_model": 4, "tflite": 4, "conv_sequ": 4, "relu": 4, "kernel_s": 4, "serialized_model": 4, "convert_to_fp16": 4, "half": 4, "serial": 4, "quantize_model": 4, "quantiz": 4, "exclud": 4, "inherit": 0, "abstract": 0, "verifi": 0, "file_nam": 0, "file_hash": 0, "extract_arch": 0, "overwrit": 0, "sha256": 0, "archiv": 0, "disk": 0, "775": 4, "856": 4, "860": 4, "862": 4, "863": 4, "sar_resnet31_predictor": 4, "ocr_db_crnn_vgg": 4, "652": 4, "721": 4, "ocr_db_sar_vgg": 4, "653": 4, "ocr_db_sar_resnet": 4, "665": 4, "735": 4, "595": 4, "625": 4, "781": 4, "830": 4, "exactmatch": 5, "ignore_cas": 5, "ignore_acc": 5, "ignor": 5, "letter": 5, "accent": 5, "error": 5, "max_dist": 5, "levenshtein": 5, "distanc": 5}, "objects": {"": [[2, 0, 0, "-", "doctr"]], "doctr.datasets": [[0, 1, 1, "", "FUNSD"]], "doctr.datasets.core": [[0, 1, 1, "", "VisionDataset"]], "doctr.documents": [[1, 1, 1, "", "Artefact"], [1, 1, 1, "", "Block"], [1, 1, 1, "", "Document"], [1, 1, 1, "", "Line"], [1, 1, 1, "", "Page"], [1, 1, 1, "", "Word"], [1, 2, 1, "", "read_img"], [1, 2, 1, "", "read_pdf"]], "doctr.models.detection": [[4, 2, 1, "", "db_resnet50"], [4, 2, 1, "", "db_resnet50_predictor"]], "doctr.models.export": [[4, 2, 1, "", "convert_to_fp16"], [4, 2, 1, "", "convert_to_tflite"], [4, 2, 1, "", "quantize_model"]], "doctr.models.recognition": [[4, 2, 1, "", "crnn_vgg16_bn"], [4, 2, 1, "", "crnn_vgg16_bn_predictor"], [4, 2, 1, "", "sar_resnet31"], [4, 2, 1, "", "sar_resnet31_predictor"], [4, 2, 1, "", "sar_vgg16_bn"], [4, 2, 1, "", "sar_vgg16_bn_predictor"]], "doctr.models.zoo": [[4, 2, 1, "", "ocr_db_crnn_vgg"], [4, 2, 1, "", "ocr_db_sar_resnet"], [4, 2, 1, "", "ocr_db_sar_vgg"]], "doctr.utils.metrics": [[5, 1, 1, "", "ExactMatch"], [5, 1, 1, "", "LocalizationConfusion"], [5, 1, 1, "", "OCRMetric"]], "doctr.utils.visualization": [[5, 2, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "function", "Python function"]}, "titleterms": {"changelog": [], "v0": [], "7": [], "0": [], "2024": [], "09": [], "6": [], "2022": [], "29": [], "5": [], "1": [], "03": [], "22": [], "2021": [], "12": [], "31": [], "4": [], "11": [], "10": [], "01": [], "3": [], "08": [], "27": [], "07": [], "02": [], "2": [], "05": [], "28": [], "18": [], "contributor": [], "coven": [], "code": [], "conduct": [], "our": [], "pledg": [], "standard": [], "enforc": [], "respons": [], "scope": [], "guidelin": [], "correct": [], "warn": [], "temporari": [], "ban": [], "perman": [], "attribut": [], "contribut": [], "doctr": [0, 1, 2, 4, 5], "codebas": [], "structur": 1, "continu": [], "integr": [], "feedback": [], "featur": [], "request": [], "bug": [], "report": [], "question": [], "develop": [], "mode": [], "instal": 3, "commit": [], "unit": [], "test": [], "qualiti": [], "style": [], "verif": [], "modifi": [], "document": [1, 2], "let": [], "": [], "connect": [], "prerequisit": [], "via": 3, "python": 3, "packag": [2, 3], "git": 3, "text": [2, 4], "recognit": [2, 4], "main": [], "model": 4, "zoo": 4, "detect": 4, "support": 0, "dataset": 0, "arg": [], "synthet": [], "gener": [], "custom": [], "loader": [], "dataload": [], "vocab": 0, "return": [], "io": [], "word": [], "line": [], "artefact": [], "block": [], "page": [], "file": 1, "read": 1, "classif": [], "factori": [], "transform": [], "compos": [], "util": 5, "visual": 5, "task": 5, "evalu": 5, "notebook": [], "train": [], "your": [], "own": [], "load": [], "aw": [], "lambda": [], "share": [], "commun": [], "from": [], "huggingfac": [], "hub": [], "push": [], "pretrain": [], "name": [], "convent": [], "choos": [], "readi": [], "us": [], "avail": 0, "object": [], "data": [], "prepar": [], "infer": [], "optim": [], "half": [], "precis": [], "export": 4, "onnx": [], "right": [], "architectur": [], "predictor": 4, "end": 4, "ocr": 4, "two": 4, "stage": 4, "approach": 4, "what": [], "should": [], "i": [], "do": [], "output": 4, "advanc": [], "option": [], "get": 2, "start": 2, "conda": [], "pre": 4, "process": 4, "post": 4}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"doctr.datasets": [[0, "doctr-datasets"]], "Available Datasets": [[0, "available-datasets"]], "Supported Vocabs": [[0, "supported-vocabs"]], "doctr.documents": [[1, "doctr-documents"]], "Document structure": [[1, "document-structure"]], "File reading": [[1, "file-reading"]], "DocTR: Document Text Recognition": [[2, "doctr-document-text-recognition"]], "Getting Started": [[2, null]], "Package Documentation": [[2, null]], "Installation": [[3, "installation"]], "Via Python Package": [[3, "via-python-package"]], "Via Git": [[3, "via-git"]], "doctr.models": [[4, "doctr-models"]], "Text Detection": [[4, "text-detection"]], "Text detection model zoo": [[4, "id2"]], "Pre-processing for detection": [[4, "pre-processing-for-detection"]], "Detection models": [[4, "detection-models"]], "Post-processing detections": [[4, "post-processing-detections"]], "Detection predictors": [[4, "detection-predictors"]], "Text Recognition": [[4, "text-recognition"]], "Text recognition model zoo": [[4, "id3"]], "Pre-processing for recognition": [[4, "pre-processing-for-recognition"]], "Recognition models": [[4, "recognition-models"]], "Post-processing outputs": [[4, "post-processing-outputs"]], "Recognition predictors": [[4, "recognition-predictors"]], "End-to-End OCR": [[4, "end-to-end-ocr"]], "end-to-end model zoo": [[4, "id4"]], "Two-stage approaches": [[4, "two-stage-approaches"]], "Model export": [[4, "model-export"]], "doctr.utils": [[5, "doctr-utils"]], "Visualization": [[5, "visualization"]], "Task evaluation": [[5, "task-evaluation"]]}, "indexentries": {"funsd (class in doctr.datasets)": [[0, "doctr.datasets.FUNSD"]], "visiondataset (class in doctr.datasets.core)": [[0, "doctr.datasets.core.VisionDataset"]], "artefact (class in doctr.documents)": [[1, "doctr.documents.Artefact"]], "block (class in doctr.documents)": [[1, "doctr.documents.Block"]], "document (class in doctr.documents)": [[1, "doctr.documents.Document"]], "line (class in doctr.documents)": [[1, "doctr.documents.Line"]], "page (class in doctr.documents)": [[1, "doctr.documents.Page"]], "word (class in doctr.documents)": [[1, "doctr.documents.Word"]], "read_img() (in module doctr.documents)": [[1, "doctr.documents.read_img"]], "read_pdf() (in module doctr.documents)": [[1, "doctr.documents.read_pdf"]], "doctr": [[2, "module-doctr"]], "module": [[2, "module-doctr"]], "convert_to_fp16() (in module doctr.models.export)": [[4, "doctr.models.export.convert_to_fp16"]], "convert_to_tflite() (in module doctr.models.export)": [[4, "doctr.models.export.convert_to_tflite"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[4, "doctr.models.recognition.crnn_vgg16_bn"]], "crnn_vgg16_bn_predictor() (in module doctr.models.recognition)": [[4, "doctr.models.recognition.crnn_vgg16_bn_predictor"]], "db_resnet50() (in module doctr.models.detection)": [[4, "doctr.models.detection.db_resnet50"]], "db_resnet50_predictor() (in module doctr.models.detection)": [[4, "doctr.models.detection.db_resnet50_predictor"]], "ocr_db_crnn_vgg() (in module doctr.models.zoo)": [[4, "doctr.models.zoo.ocr_db_crnn_vgg"]], "ocr_db_sar_resnet() (in module doctr.models.zoo)": [[4, "doctr.models.zoo.ocr_db_sar_resnet"]], "ocr_db_sar_vgg() (in module doctr.models.zoo)": [[4, "doctr.models.zoo.ocr_db_sar_vgg"]], "quantize_model() (in module doctr.models.export)": [[4, "doctr.models.export.quantize_model"]], "sar_resnet31() (in module doctr.models.recognition)": [[4, "doctr.models.recognition.sar_resnet31"]], "sar_resnet31_predictor() (in module doctr.models.recognition)": [[4, "doctr.models.recognition.sar_resnet31_predictor"]], "sar_vgg16_bn() (in module doctr.models.recognition)": [[4, "doctr.models.recognition.sar_vgg16_bn"]], "sar_vgg16_bn_predictor() (in module doctr.models.recognition)": [[4, "doctr.models.recognition.sar_vgg16_bn_predictor"]], "exactmatch (class in doctr.utils.metrics)": [[5, "doctr.utils.metrics.ExactMatch"]], "localizationconfusion (class in doctr.utils.metrics)": [[5, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[5, "doctr.utils.metrics.OCRMetric"]], "visualize_page() (in module doctr.utils.visualization)": [[5, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["datasets", "documents", "index", "installing", "models", "utils"], "filenames": ["datasets.rst", "documents.rst", "index.rst", "installing.rst", "models.rst", "utils.rst"], "titles": ["doctr.datasets", "doctr.documents", "DocTR: Document Text Recognition", "Installation", "doctr.models", "doctr.utils"], "terms": {"releas": 3, "note": [], "we": [], "member": [], "leader": [], "make": [4, 5], "particip": [], "commun": [], "harass": [], "free": [], "experi": [], "everyon": [], "regardless": [], "ag": [], "bodi": [], "size": [1, 4], "visibl": [], "invis": [], "disabl": [], "ethnic": [], "sex": [], "characterist": [], "gender": [], "ident": [], "express": [], "level": [4, 5], "educ": [], "socio": [], "econom": [], "statu": [], "nation": [], "person": [], "appear": [], "race": [], "religion": [], "sexual": [], "orient": 1, "act": [], "interact": [], "wai": 2, "contribut": [], "an": [0, 1, 2, 4, 5], "open": [], "welcom": [], "divers": [], "inclus": [], "healthi": [], "exampl": [0, 1, 4, 5], "behavior": [], "posit": 5, "environ": [], "includ": 4, "demonstr": [], "empathi": [], "kind": [], "toward": [], "other": [], "peopl": [], "Being": [], "respect": [], "differ": [], "opinion": [], "viewpoint": [], "give": [], "gracefulli": [], "accept": [], "construct": [], "feedback": [], "apolog": [], "those": 4, "affect": [], "mistak": [], "learn": 4, "from": [0, 1, 2, 4, 5], "focus": [], "what": [], "i": [0, 4, 5], "best": [], "just": [], "u": [], "individu": [], "overal": [], "unaccept": [], "The": [0, 1, 4, 5], "us": [3, 4, 5], "languag": [1, 2], "imageri": [], "attent": [], "advanc": [], "ani": [0, 1, 4, 5], "troll": [], "insult": [], "derogatori": [], "comment": [], "polit": [], "attack": [], "public": [], "privat": [], "publish": [], "inform": [2, 4], "physic": [], "email": [], "address": [], "without": 4, "explicit": [], "permiss": [], "which": 4, "could": [], "reason": [], "consid": 5, "inappropri": [], "profession": [], "set": [0, 4, 5], "ar": [0, 1, 4, 5], "clarifi": [], "take": [], "appropri": [], "fair": [], "action": [], "thei": [], "deem": [], "threaten": [], "offens": [], "harm": [], "have": [0, 4, 5], "right": [4, 5], "remov": [], "edit": [], "reject": [], "commit": [], "wiki": [], "issu": [], "align": [], "thi": [3, 4, 5], "moder": [], "decis": [], "when": 5, "appli": [], "within": [], "all": [0, 1, 4, 5], "space": [], "also": [], "offici": [], "repres": 4, "e": 3, "mail": [], "post": [], "via": 2, "social": [], "media": [], "account": [], "appoint": [], "onlin": [], "offlin": [], "event": [], "instanc": 4, "abus": [], "otherwis": 5, "mai": [], "report": [], "contact": [], "minde": 3, "com": 3, "complaint": [], "review": [], "investig": [], "promptli": [], "fairli": [], "oblig": [], "privaci": [], "secur": [], "incid": [], "follow": [4, 5], "impact": [], "determin": [], "consequ": [], "violat": [], "unprofession": [], "unwelcom": [], "A": [0, 4], "written": [], "provid": [2, 4], "clariti": [], "around": 4, "natur": 2, "explan": 4, "why": [], "wa": [], "apologi": [], "request": [], "through": 0, "singl": [], "seri": [], "continu": [], "No": [], "involv": 4, "unsolicit": [], "specifi": 1, "period": [], "time": [0, 4, 5], "avoid": [], "well": [], "extern": [], "channel": [1, 4], "like": [], "term": [], "lead": [], "seriou": [], "sustain": [], "sort": [], "allow": [], "dure": [], "pattern": [], "aggress": [], "disparag": [], "class": [0, 1, 5], "adapt": [], "version": 4, "0": [0, 4, 5], "avail": 4, "http": 3, "www": [], "org": [], "_": [], "html": [], "were": [], "inspir": [], "mozilla": [], "": [1, 5], "ladder": [], "For": 4, "answer": [], "common": 5, "question": [], "about": 4, "see": [], "faq": [], "translat": [], "everyth": [], "you": 4, "need": 5, "know": [], "effici": 0, "project": [], "packag": 5, "python": 2, "doc": 1, "librari": 3, "build": [], "script": [], "refer": [], "train": [0, 4], "demo": [], "small": [], "app": [], "showcas": [], "capabl": 4, "api": [], "minim": [], "templat": [], "deploi": [], "rest": 5, "ensur": [], "proper": [], "mainten": [], "github": 3, "worklow": [], "run": [], "job": [], "coverag": [], "codecov": [], "back": [], "result": 1, "As": [], "contributor": [], "onli": 5, "your": [0, 1, 2, 5], "ad": [], "whether": [0, 1, 5], "encount": [], "problem": [], "suggest": [], "input": [1, 4], "ha": [0, 5], "valu": 1, "can": [0, 4], "purpos": 4, "advis": [], "first": [], "check": [], "topic": [], "wasn": [], "t": [], "alreadi": [], "cover": [], "close": [], "If": [1, 4], "feel": [], "new": [], "one": [0, 4], "do": [], "so": [], "whenev": [], "possibl": 5, "enough": 4, "jump": [], "wonder": [], "how": [], "someth": [], "more": [], "gener": [], "should": [0, 1, 5], "out": [4, 5], "discuss": [], "q": [], "forum": [], "specif": 5, "stackoverflow": [], "addit": [], "depend": [], "command": [], "m": 5, "pip": 3, "upgrad": [], "dev": [], "pre": [], "docstr": [], "In": 4, "pleas": [], "googl": 4, "eas": [], "process": [1, 2], "later": [], "messag": [], "udac": [], "guid": [], "order": [1, 4], "same": 5, "ci": [], "workflow": [], "unittest": [], "local": [4, 5], "To": [], "togeth": 4, "current": [], "built": [], "sphinx": [], "thank": [], "our": [], "file": 0, "been": [4, 5], "rebuilt": [], "want": [], "forc": [], "complet": [], "rebuild": [], "delet": [], "_build": [], "directori": [], "addition": [], "clear": [], "web": [], "browser": [], "cach": [], "modif": [], "now": [], "locat": [], "index": 1, "wish": [], "somewher": [], "els": [], "than": 5, "join": [], "slack": [], "where": [1, 5], "find": [], "requir": 3, "3": [1, 3, 4, 5], "8": [], "higher": [], "whichev": [], "o": [], "least": [], "tensorflow": 4, "pytorch": [], "correspond": 4, "page": [1, 4, 5], "2": 5, "macbook": [], "m1": [], "chip": [], "some": [], "metal": [], "plugin": [], "1": [4, 5], "12": [], "anoth": [], "linux": [], "few": [], "extra": [], "maco": [], "user": 1, "them": 0, "homebrew": [], "brew": [], "cairo": [], "pango": [], "gdk": [], "pixbuf": [], "libffi": [], "window": [], "gtk": [], "latest": [], "over": 5, "here": 0, "last": [3, 4], "stabl": 3, "doctr": 3, "strive": [], "reduc": [], "framework": 0, "minimum": 5, "necessari": [], "featur": [4, 5], "develop": [], "third": [], "parti": [], "miss": [], "tf": 4, "torch": [], "mode": 3, "clone": 3, "state": 2, "art": 2, "optic": 4, "charact": [4, 5], "made": [], "seamless": [], "access": [0, 1], "anyon": [], "power": 2, "easi": [2, 5], "extract": [0, 2, 4], "valuabl": 2, "autom": 2, "seamlessli": [], "understand": [0, 2], "task": [2, 4], "ocr": 5, "predictor": [], "pars": [], "textual": [0, 1], "identifi": 4, "each": [1, 4], "word": [1, 4, 5], "research": 2, "quickli": 2, "compar": 2, "own": 2, "architectur": [2, 4], "speed": [], "perform": [1, 2, 4, 5], "robust": [], "stage": [], "pretrain": [4, 5], "paramet": [0, 1, 4, 5], "friendli": [], "line": [1, 5], "code": [], "load": [], "googlevis": [], "aw": 4, "textract": 4, "optim": [], "infer": [], "both": 4, "cpu": [], "gpu": [], "light": [], "activ": [], "maintain": [], "integr": [], "deploy": [], "dbnet": 4, "real": 4, "scene": 4, "differenti": 4, "binar": 4, "linknet": [], "exploit": [], "encod": 0, "represent": [], "semant": [], "segment": 4, "sar": 4, "show": [4, 5], "attend": 4, "read": 4, "simpl": 4, "strong": 4, "baselin": 4, "irregular": 4, "crnn": 4, "end": 5, "trainabl": [], "neural": [], "network": [], "imag": [1, 4, 5], "base": [], "sequenc": [4, 5], "Its": [], "applic": [], "master": [], "multi": [], "aspect": [], "non": [1, 5], "vitstr": [], "vision": 4, "transform": [], "fast": [], "parseq": [], "permut": [], "autoregress": [], "funsd": [0, 4], "form": 0, "noisi": 0, "scan": 0, "cord": [], "consolid": [], "receipt": [], "forpost": [], "sroie": [], "icdar": [], "2019": [], "iiit": [], "5k": [], "cvit": [], "street": [], "view": [], "synthtext": [], "visual": [], "geometri": 1, "group": [], "svhn": [], "digit": [], "unsupervis": [], "ic03": [], "2003": [], "ic13": [], "2013": [], "imgur5k": [], "textstylebrush": [], "transfer": [], "aesthet": [], "mjsynth": [], "synthet": [], "data": [1, 4], "artifici": [], "iiithw": [], "wildreceipt": [], "spatial": [], "dual": [], "modal": [], "graph": [], "kei": [], "bool": [0, 1, 4, 5], "true": [0, 1, 4, 5], "use_polygon": [], "fals": [0, 4, 5], "recognition_task": [], "kwarg": [0, 1, 4], "sourc": [0, 1, 4, 5], "document": [0, 4, 5], "import": [0, 1, 4, 5], "train_set": 0, "download": 0, "img": 0, "target": [0, 4], "subset": [0, 4], "polygon": [], "rotat": 1, "bound": [1, 4, 5], "box": [1, 4, 5], "instead": 1, "straight": [], "ones": [], "recognit": 5, "keyword": 0, "argument": 0, "visiondataset": 0, "icdar2019": [], "competit": [], "iiit5k": [], "bmvc": [], "2012": [], "text": 1, "prior": [], "svt": [], "ucsd": [], "comput": 5, "hous": [], "number": 5, "localis": [], "repositori": [], "websit": [], "entri": [], "futur": [], "direct": [], "img_fold": [], "str": [0, 1, 5], "label_fold": [], "label": [], "part": [], "challeng": [], "task2": [], "2015": [], "path": 1, "challenge2_training_task12_imag": [], "challenge2_training_task1_gt": [], "test_set": [], "challenge2_test_task12_imag": [], "challenge2_test_task1_gt": [], "folder": [], "annot": [], "abstractdataset": [], "label_path": [], "handwrit": [], "dataset_info": [], "imgur5k_annot": [], "json": [], "pure": [], "mnt": [], "ramdisk": [], "max": 5, "90kdict32px": [], "imlist": [], "txt": [], "hw": [], "images_90k_norm": [], "90k": [], "docartefact": [], "object": 0, "detect": [], "element": [1, 4], "varieti": [], "arxiv": [], "ab": [], "2103": [], "14470v1": [], "test": [], "charactergener": [], "implement": [0, 1, 4, 5], "d": [], "abdef": [], "num_sampl": [], "100": 5, "vocabulari": [], "sampl": [], "iter": [], "cache_sampl": [], "firsthand": [], "font_famili": [], "font": [], "img_transform": [], "compos": 4, "sample_transform": [], "wordgener": [], "min_char": [], "int": [1, 4, 5], "max_char": [], "list": 1, "none": [0, 1, 5], "callabl": [], "tupl": [1, 4], "32": 4, "maximum": 5, "detectiondataset": [], "recognitiondataset": [], "labels_path": [], "contain": [], "ocrdataset": [], "label_fil": [], "jpg": 1, "root": [], "shuffl": [], "batch_siz": [], "drop_last": [], "num_work": [], "collate_fn": [], "wrapper": [], "train_load": [], "train_it": [], "next": [], "befor": [], "pass": 4, "batch": 4, "drop": [], "isn": [], "full": [4, 5], "worker": [], "function": [4, 5], "merg": [], "sinc": 0, "content": [0, 1], "properli": 0, "model": [0, 2, 5], "interpret": 0, "multipl": 0, "name": 0, "10": [], "0123456789": [], "hindi_digit": [], "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "ascii_lett": [], "52": [], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": [], "punctuat": [], "currenc": [], "5": 5, "ancient_greek": [], "48": [], "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": [], "arabic_lett": [], "37": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": [], "persian_lett": [], "\u067e\u0686\u06a2\u06a4\u06af": [], "arabic_diacrit": [], "arabic_punctu": [], "latin": [], "94": [], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": [], "english": [], "legacy_french": [], "123": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": [], "french": [], "126": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": [], "portugues": [], "131": [], "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": [], "spanish": [], "116": [], "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": [], "italian": [], "120": [], "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": [], "german": [], "108": [], "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": [], "arab": [], "101": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": [], "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "czech": [], "130": [], "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": [], "polish": [], "118": [], "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": [], "dutch": [], "114": [], "norwegian": [], "106": [], "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": [], "danish": [], "finnish": [], "104": [], "\u00e4\u00f6\u00e4\u00f6": [], "swedish": [], "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": [], "vietnames": [], "234": [], "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": [], "hebrew": [], "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": [], "multilingu": [], "195": [], "encode_sequ": [], "target_s": [], "eo": [], "pad": 4, "dynamic_seq_length": [], "ndarrai": [1, 5], "given": [0, 4, 5], "map": 4, "n": 5, "length": [], "Of": [], "string": [1, 4], "option": [], "start": [], "case": 5, "upper": [], "enabl": 1, "dynam": [], "tensor": 4, "modul": [1, 4, 5], "easili": [1, 5], "export": [1, 5], "analysi": [1, 4], "format": [1, 4], "organ": 1, "uninterrupt": [], "confid": 1, "float": [1, 5], "associ": 1, "predict": [1, 5], "xmin": 1, "ymin": 1, "xmax": 1, "ymax": 1, "coordin": 1, "rel": 1, "collect": 1, "meant": [], "two": [], "column": [], "horizont": [], "resolv": 1, "default": [1, 4], "smallest": 1, "enclos": 1, "g": [], "qr": [], "pictur": [], "chart": [], "signatur": [], "logo": [], "etc": [], "artefact_typ": 1, "type": [1, 4], "sever": [], "its": [], "titl": [], "underneath": [], "page_idx": 1, "dimens": [1, 4, 5], "dict": [1, 5], "numpi": [1, 4, 5], "arrai": 5, "uint8": [4, 5], "raw": 1, "pixel": 1, "height": 1, "width": 1, "dictionari": 1, "angl": 1, "degress": 1, "preserve_aspect_ratio": [], "overlai": [], "displai": 5, "matplotlib": 5, "pyplot": 5, "method": [], "high": 1, "convers": 1, "read_pdf": 1, "byte": 4, "scale": [], "rgb_mode": [], "password": [], "pdf": 1, "convert": [1, 4], "render": [], "72dpi": [], "output": 1, "rgb": 1, "bgr": 1, "unlock": [], "encrypt": [], "pypdfium2": [], "pdfpage": [], "decod": 1, "shape": [1, 4, 5], "h": 1, "x": [1, 5], "w": [1, 5], "c": [], "read_img_as_numpi": [], "output_s": 1, "rgb_output": 1, "expect": [0, 1, 4], "read_img_as_tensor": [], "img_path": [], "dtype": 4, "float32": 4, "desir": [], "relat": [], "divid": [], "255": [4, 5], "decode_img_as_tensor": [], "img_cont": [], "stream": [], "read_html": [], "url": 0, "yoursit": [], "weasyprint": [], "documentfil": [], "extens": [], "classmethod": [], "from_pdf": [], "binari": 4, "from_url": [], "from_imag": [], "page1": [], "png": [], "page2": [], "vgg16_bn_r": [], "vgg": 4, "16": 4, "describ": 4, "veri": [], "deep": 4, "convolut": 4, "larg": [], "modifi": [], "normal": 4, "rectangular": [], "pool": [], "simpler": [], "head": [], "input_tensor": 4, "random": [4, 5], "uniform": 4, "512": [], "maxval": 4, "imagenet": 4, "extractor": 4, "resnet18": [], "resnet": 4, "18": [], "residu": [], "boolean": [], "resnet34": [], "34": [], "resnet50": [], "50": 4, "resnet31": [], "downsiz": [], "4": [], "mobilenet_v3_smal": [], "mobilenetv3": [], "search": [], "kera": 4, "mobilenet_v3_larg": [], "mobilenet_v3_small_r": [], "mobilenet_v3_large_r": [], "mobilenet_v3_small_orient": [], "magc_resnet31": [], "global": [], "context": [], "224": 4, "vit_": [], "visiontransform": [], "worth": [], "16x16": [], "patch": [], "unoffici": [], "config": [], "vit_b": [], "b": 5, "textnet_tini": [], "textnet": [], "faster": [], "arbitrarili": [], "detector": [], "minimalist": [], "kernel": [], "czczup": [], "tini": [], "textnet_smal": [], "textnet_bas": [], "crop_orientation_predictor": [], "arch": [], "croporientationpredictor": [], "np": [4, 5], "classif_mobilenet_v3_smal": [], "input_crop": [], "rand": [4, 5], "600": [4, 5], "800": [4, 5], "astyp": [4, 5], "crop": 4, "dataset": [2, 4], "linknet_resnet18": [], "1024": 4, "linknet_resnet34": [], "linknet_resnet50": [], "db_resnet50": 4, "backbon": 4, "db_mobilenet_v3_larg": [], "mobilenet": [], "v3": [], "detection_predictor": [], "assume_straight_pag": [], "detectionpredictor": 4, "input_pag": [4, 5], "itself": [], "fit": [], "crnn_vgg16_bn": 4, "128": 4, "crnn_mobilenet_v3_smal": [], "crnn_mobilenet_v3_larg": [], "sar_resnet31": 4, "31": 4, "64": 4, "256": 4, "paper": [], "1910": [], "02562": [], "keywoard": [], "vitstr_smal": [], "vitstr_bas": [], "recognition_predictor": [], "recognitionpredictor": 4, "ocr_predictor": [], "det_arch": [], "reco_arch": [], "pretrained_backbon": [], "symmetric_pad": [], "export_as_straight_box": [], "detect_orient": [], "straighten_pag": [], "detect_languag": [], "ocrpredictor": 4, "up": [], "assum": [], "preserv": [], "ratio": [], "symmetr": [], "bottom": [], "final": [], "potenti": 4, "estim": [], "slightli": [], "deterior": [], "latenc": [], "median": [], "Then": [], "again": [], "improv": [], "kie_predictor": [], "kiepredictor": [], "kie": [], "login_to_hub": [], "login": [], "huggingfac": [], "hub": [], "from_hub": [], "repo_id": [], "instanti": [], "hf": [], "fasterrcnn_mobilenet_v3_large_fpn": [], "repo": [], "hf_hub_download": [], "snapshot_download": [], "checkpoint": [], "push_to_hf_hub": [], "model_nam": [], "save": 0, "configur": [], "my": [], "procedur": [], "draw": [], "design": [], "torchvis": [], "resiz": 4, "bilinear": 4, "transfo": [], "minval": [], "interpol": 4, "zero": 4, "while": [], "done": [], "mean": 5, "std": [], "gaussian": [], "distribut": [], "485": [], "456": [], "406": [], "229": [], "225": [], "averag": [], "per": [], "standard": [], "deviat": [], "lambdatransform": [], "fn": [], "lambda": [], "tograi": [], "num_output_channel": [], "grayscal": [], "colorinvers": [], "min_val": [], "tranform": [], "color": [], "shift": [], "randomli": [], "invert": [], "6": 3, "rang": [], "randombright": [], "max_delta": [], "adjust": [], "bright": [], "delta": [], "offset": [], "add": [], "pick": [], "p": [], "probabl": [], "randomcontrast": [], "contrast": [], "contrast_factor": [], "factor": [], "randomsatur": [], "satur": [], "hsv": [], "increas": [], "randomhu": [], "hue": [], "randomgamma": [], "min_gamma": [], "max_gamma": [], "min_gain": [], "max_gain": [], "gamma": [], "correct": [], "neg": [], "lower": [], "param": 4, "constant": [], "multipli": [], "randomjpegqu": [], "min_qual": [], "60": [], "max_qual": [], "jpeg": [], "qualiti": [], "dimension": [], "between": 5, "randomrot": [], "max_angl": [], "expand": [], "degre": [], "uniformli": [], "randomcrop": [], "08": [], "75": [], "33": [], "min_area": [], "max_area": [], "min_ratio": [], "max_ratio": [], "gaussianblur": [], "kernel_shap": [], "blur": [], "min": [], "channelshuffl": [], "gaussiannois": [], "nois": [], "randomhorizontalflip": [], "flip": [], "int64": [], "randomshadow": [], "opacity_rang": [], "shade": [], "opac": [], "It": [], "consecut": 4, "sequenti": 4, "oneof": [], "jpegqual": [], "randomappli": [], "regroup": 5, "core": [0, 5], "complementari": 5, "sens": 5, "visualize_pag": 5, "words_onli": 5, "display_artefact": [], "add_label": [], "figur": [], "block": [1, 4, 5], "plt": 5, "ocr_db_crnn": 5, "artefact": 1, "figsiz": [], "largest": [], "side": [], "plot": [], "static": [], "top": [], "synthesize_pag": [], "draw_proba": [], "respons": [], "blank": [], "blue": [], "red": [], "font_siz": [], "13": [], "famili": [], "synthes": [], "metric": [4, 5], "assess": 5, "textmatch": [], "match": 5, "accuraci": [4, 5], "aggreg": 5, "foral": 5, "y": 5, "mathcal": 5, "frac": 5, "sum": 5, "limits_": 5, "f_": 5, "y_i": 5, "x_i": 5, "indic": 5, "defin": 5, "f_a": 5, "left": 5, "begin": 5, "ll": 5, "mbox": 5, "strictli": 5, "integ": 5, "updat": 5, "hello": 5, "world": 5, "summari": 5, "gt": [], "pred": [], "groung": [], "truth": 5, "exact": 5, "score": [], "counterpart": [], "unidecod": [], "localizationconfus": 5, "iou_thresh": 5, "mask_shap": [], "use_broadcast": [], "confus": 5, "iou": 5, "recal": [4, 5], "g_": 5, "precis": [4, 5], "meaniou": 5, "j": 5, "y_j": 5, "being": [4, 5], "intersect": 5, "union": 5, "g_x": 5, "assign": 5, "_i": 5, "geq": 5, "ground": 5, "asarrai": 5, "70": 5, "110": 5, "95": 5, "200": 5, "150": 5, "pair": 5, "broadcast": [], "consum": [], "memori": [], "either": 4, "ocrmetr": 5, "l": 5, "hat": 5, "h_": 5, "b_j": 5, "l_j": 5, "gt_box": [], "pred_box": [], "gt_label": [], "pred_label": [], "comparison": [], "detectionmetr": [], "c_j": [], "compil": [], "better": [], "leverag": [], "descript": [], "colab": [], "quicktour": [], "present": 0, "main": [], "produc": 4, "searchabl": [], "don": [], "meet": [], "detail": [], "link": [], "section": [], "det_model": [], "load_weight": [], "path_to_checkpoint": [], "weight": [], "reco_model": [], "det_param": [], "path_to_pt": [], "map_loc": [], "load_state_dict": [], "reco_param": [], "vocab": [], "class_nam": [], "total": [], "date": [], "preprocessor": 4, "det_predictor": [], "798": [], "785": [], "772": [], "264": [], "2749": [], "287": [], "reco_predictor": [], "694": [], "695": [], "693": [], "299": [], "296": [], "301": [], "polici": [], "restrict": [], "write": [], "outsid": [], "tmp": [], "work": [], "step": [], "usag": 4, "multiprocess": [], "doctr_multiprocessing_dis": [], "variabl": [], "becaus": [], "shm": [], "share": [], "chang": [], "By": [], "doctr_cache_dir": [], "focu": [], "love": [], "appreci": [], "interfac": [], "io": [], "custom": [], "felix92": [], "db": [], "vgg16": 4, "bn": [], "plug": [], "obj_detect": [], "exist": [], "overwritten": [], "prerequisit": [], "creat": [], "co": [], "instal": 2, "git": 2, "lf": [], "my_awesome_model": [], "v1": [], "directli": 4, "after": [], "python3": [], "train_tensorflow": [], "py": [], "train_pytorch": [], "tabl": [], "pull": [], "dummi": [], "tilman": [], "rassi": [], "fascan": [], "evalu": [0, 4], "predefin": 0, "prefer": 0, "signific": 0, "valid": [], "149": [], "626": [], "360": [], "2000": [], "3000": [], "249": [], "33402": [], "13068": [], "772875": [], "85875": [], "246": [], "233": [], "resourc": [], "7149": [], "796": [], "handwritten": [], "1268": [], "472": [], "21888": [], "8707": [], "33608": [], "19342": [], "uppercas": [], "19370": [], "2186": [], "257": [], "647": [], "73257": [], "26032": [], "7100000": [], "707470": [], "1156": [], "1107": [], "849": [], "1095": [], "207901": [], "22672": [], "7581382": [], "1337891": [], "7141797": [], "793533": [], "49377": [], "19598": [], "alwai": [], "regular": [], "2700": [], "300": [], "background": [], "qr_code": [], "bar_cod": [], "photo": [], "classif": 4, "mani": [], "sensit": [], "abl": [], "howev": [], "guidanc": [], "tool": [], "further": [], "anot": [], "handl": 0, "underli": [], "defer": [], "dataload": [], "good": [], "achiev": [], "might": 4, "tune": [], "thing": [], "product": [], "readi": [], "help": [], "support": [], "devic": [], "fp16": 4, "point": [], "occupi": [], "bit": [], "advantag": [], "less": [], "mixed_precis": [], "set_global_polici": [], "mixed_float16": [], "cuda": [], "re": 0, "exchang": [], "interoper": [], "machin": [], "structur": 4, "layer": [], "metadata": [], "util": [2, 4], "export_model_to_onnx": [], "input_shap": 4, "dummy_input": [], "tensorspec": [], "model_path": [], "come": [], "soon": [], "seen": 4, "onc": [0, 4], "separ": 4, "compon": 4, "charg": 4, "usabl": 4, "backend": 4, "along": [], "processor": [], "reusabl": 4, "consist": [], "delimit": [], "2d": [], "corner": [], "flag": [], "belong": [], "skew": [], "comprehens": [], "benchmark": [], "publicli": [], "sec": [], "25": [], "84": [], "39": [], "85": [], "86": [], "93": [], "83": [], "24": [], "80": [], "29": [], "90": [], "67": [], "76": [], "11": [], "81": [], "71": [], "7": [], "21": [], "82": [], "20": [], "49": [], "87": [], "63": [], "17": [], "28": [], "51": [], "46": [], "db_resnet34": [], "22": [], "89": [], "74": [], "56": [], "68": [], "92": [], "61": [], "41": [], "00": [], "79": [], "38": [], "88": [], "62": [], "26": [], "06": [], "78": [], "47": [], "54": [], "abov": 4, "cf": 4, "disclaim": 4, "combin": 4, "199": 4, "second": [], "warmup": [], "phase": [], "measur": [], "1000": [], "obtain": [], "11th": [], "gen": [], "intel": [], "r": [], "tm": [], "i7": [], "11800h": [], "30ghz": [], "wrap": [], "useabl": [], "favorit": [], "dummy_img": [], "area": [], "send": [], "snippet": [], "transcrib": [], "partial": [], "15": [], "9": [], "73": [], "44": [], "14": [], "55": [], "58": [], "57": [], "66": [], "01": [], "98": [], "23": [], "69": [], "99": [], "91": [], "05": [], "09": [], "96": [], "40": [], "53": [], "most": 4, "print": [], "cfg": [], "30595": 4, "45": [], "72": [], "43": [], "65": [], "77": [], "30": [], "07": [], "27": [], "gvision": [], "59": [], "03": [], "azur": [], "recogn": [], "42": [], "go": [], "mention": [], "still": [], "return": [1, 4], "documentbuild": [], "resolve_lin": [], "automat": [], "resolve_block": [], "paragraph_break": [], "paragraph": [], "035": [], "nest": [], "get": [], "typic": [], "layout": [], "340": [], "text_output": [], "json_output": [], "1357421875": [], "0361328125": [], "8564453125": [], "8603515625": [], "914085328578949": [], "5478515625": [], "06640625": [], "5810546875": [], "0966796875": [], "9949972033500671": [], "51171875": [], "1630859375": [], "9578408598899841": [], "1396484375": [], "3232421875": [], "185546875": [], "3515625": [], "outpout": [], "xml": [], "hocr": [], "export_as_xml": [], "xml_output": [], "xml_bytes_str": [], "xml_element": [], "utf": [], "xmln": [], "w3": [], "1999": [], "xhtml": [], "lang": [], "en": [], "meta": [], "equiv": [], "charset": [], "system": [], "ocr_pag": [], "ocr_carea": [], "ocr_par": [], "ocr_lin": [], "ocrx_word": [], "div": [], "id": [], "page_1": [], "bbox": [], "3456": [], "ppageno": [], "block_1_1": [], "857": [], "529": [], "2504": [], "2710": [], "par_1_1": [], "span": [], "line_1_1": [], "x_size": [], "x_descend": [], "x_ascend": [], "word_1_1": [], "1552": [], "540": [], "1778": [], "580": [], "x_wconf": [], "word_1_2": [], "1782": [], "1900": [], "583": [], "word_1_3": [], "1420": [], "597": [], "1684": [], "641": [], "threshold": [], "region": [], "accur": [], "postprocessor": 4, "bin_thresh": [], "box_thresh": [], "hook": [], "manipul": [], "customhook": [], "def": [], "__call__": [], "self": [], "loc_pr": [], "Be": [], "awar": [], "my_hook": [], "middl": [], "pipelin": [], "add_hook": [], "execut": [], "file_path": 1, "read_img": 1, "seemlessli": 2, "conda": [], "newer": 3, "developp": 3, "fp": 4, "scheme": 4, "deform": 4, "statist": 4, "turn": 4, "easier": 4, "let": 4, "db_resnet50_predictor": 4, "sar_vgg16_bn": 4, "rnn": 4, "enhanc": 4, "symbol": 4, "crnn_vgg16_bn_predictor": 4, "sar_vgg16_bn_predictor": 4, "16bn": 4, "convert_to_tflit": 4, "tf_model": 4, "tflite": 4, "conv_sequ": 4, "relu": 4, "kernel_s": 4, "serialized_model": 4, "convert_to_fp16": 4, "half": 4, "serial": 4, "quantize_model": 4, "quantiz": 4, "exclud": 4, "inherit": 0, "abstract": 0, "verifi": 0, "file_nam": 0, "file_hash": 0, "extract_arch": 0, "overwrit": 0, "sha256": 0, "archiv": 0, "disk": 0, "775": 4, "856": 4, "860": 4, "862": 4, "863": 4, "sar_resnet31_predictor": 4, "ocr_db_crnn_vgg": 4, "652": 4, "721": 4, "ocr_db_sar_vgg": 4, "653": 4, "ocr_db_sar_resnet": 4, "665": 4, "735": 4, "595": 4, "625": 4, "781": 4, "830": 4, "exactmatch": 5, "ignore_cas": 5, "ignore_acc": 5, "ignor": 5, "letter": 5, "accent": 5, "error": 5, "max_dist": 5, "levenshtein": 5, "distanc": 5}, "objects": {"": [[2, 0, 0, "-", "doctr"]], "doctr.datasets": [[0, 1, 1, "", "FUNSD"]], "doctr.datasets.core": [[0, 1, 1, "", "VisionDataset"]], "doctr.documents": [[1, 1, 1, "", "Artefact"], [1, 1, 1, "", "Block"], [1, 1, 1, "", "Document"], [1, 1, 1, "", "Line"], [1, 1, 1, "", "Page"], [1, 1, 1, "", "Word"], [1, 2, 1, "", "read_img"], [1, 2, 1, "", "read_pdf"]], "doctr.models.detection": [[4, 2, 1, "", "db_resnet50"], [4, 2, 1, "", "db_resnet50_predictor"]], "doctr.models.export": [[4, 2, 1, "", "convert_to_fp16"], [4, 2, 1, "", "convert_to_tflite"], [4, 2, 1, "", "quantize_model"]], "doctr.models.recognition": [[4, 2, 1, "", "crnn_vgg16_bn"], [4, 2, 1, "", "crnn_vgg16_bn_predictor"], [4, 2, 1, "", "sar_resnet31"], [4, 2, 1, "", "sar_resnet31_predictor"], [4, 2, 1, "", "sar_vgg16_bn"], [4, 2, 1, "", "sar_vgg16_bn_predictor"]], "doctr.models.zoo": [[4, 2, 1, "", "ocr_db_crnn_vgg"], [4, 2, 1, "", "ocr_db_sar_resnet"], [4, 2, 1, "", "ocr_db_sar_vgg"]], "doctr.utils.metrics": [[5, 1, 1, "", "ExactMatch"], [5, 1, 1, "", "LocalizationConfusion"], [5, 1, 1, "", "OCRMetric"]], "doctr.utils.visualization": [[5, 2, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "function", "Python function"]}, "titleterms": {"changelog": [], "v0": [], "7": [], "0": [], "2024": [], "09": [], "6": [], "2022": [], "29": [], "5": [], "1": [], "03": [], "22": [], "2021": [], "12": [], "31": [], "4": [], "11": [], "10": [], "01": [], "3": [], "08": [], "27": [], "07": [], "02": [], "2": [], "05": [], "28": [], "18": [], "contributor": [], "coven": [], "code": [], "conduct": [], "our": [], "pledg": [], "standard": [], "enforc": [], "respons": [], "scope": [], "guidelin": [], "correct": [], "warn": [], "temporari": [], "ban": [], "perman": [], "attribut": [], "contribut": [], "doctr": [0, 1, 2, 4, 5], "codebas": [], "structur": 1, "continu": [], "integr": [], "feedback": [], "featur": [], "request": [], "bug": [], "report": [], "question": [], "develop": [], "mode": [], "instal": 3, "commit": [], "unit": [], "test": [], "qualiti": [], "style": [], "verif": [], "modifi": [], "document": [1, 2], "let": [], "": [], "connect": [], "prerequisit": [], "via": 3, "python": 3, "packag": [2, 3], "git": 3, "text": [2, 4], "recognit": [2, 4], "main": [], "model": 4, "zoo": 4, "detect": 4, "support": 0, "dataset": 0, "arg": [], "synthet": [], "gener": [], "custom": [], "loader": [], "dataload": [], "vocab": 0, "return": [], "io": [], "word": [], "line": [], "artefact": [], "block": [], "page": [], "file": 1, "read": 1, "classif": [], "factori": [], "transform": [], "compos": [], "util": 5, "visual": 5, "task": 5, "evalu": 5, "notebook": [], "train": [], "your": [], "own": [], "load": [], "aw": [], "lambda": [], "share": [], "commun": [], "from": [], "huggingfac": [], "hub": [], "push": [], "pretrain": [], "name": [], "convent": [], "choos": [], "readi": [], "us": [], "avail": 0, "object": [], "data": [], "prepar": [], "infer": [], "optim": [], "half": [], "precis": [], "export": 4, "onnx": [], "right": [], "architectur": [], "predictor": 4, "end": 4, "ocr": 4, "two": 4, "stage": 4, "approach": 4, "what": [], "should": [], "i": [], "do": [], "output": 4, "advanc": [], "option": [], "get": 2, "start": 2, "conda": [], "pre": 4, "process": 4, "post": 4}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"doctr.datasets": [[0, "doctr-datasets"]], "Available Datasets": [[0, "available-datasets"]], "Supported Vocabs": [[0, "supported-vocabs"]], "doctr.documents": [[1, "doctr-documents"]], "Document structure": [[1, "document-structure"]], "File reading": [[1, "file-reading"]], "DocTR: Document Text Recognition": [[2, "doctr-document-text-recognition"]], "Getting Started": [[2, null]], "Package Documentation": [[2, null]], "Installation": [[3, "installation"]], "Via Python Package": [[3, "via-python-package"]], "Via Git": [[3, "via-git"]], "doctr.models": [[4, "doctr-models"]], "Text Detection": [[4, "text-detection"]], "Text detection model zoo": [[4, "id2"]], "Pre-processing for detection": [[4, "pre-processing-for-detection"]], "Detection models": [[4, "detection-models"]], "Post-processing detections": [[4, "post-processing-detections"]], "Detection predictors": [[4, "detection-predictors"]], "Text Recognition": [[4, "text-recognition"]], "Text recognition model zoo": [[4, "id3"]], "Pre-processing for recognition": [[4, "pre-processing-for-recognition"]], "Recognition models": [[4, "recognition-models"]], "Post-processing outputs": [[4, "post-processing-outputs"]], "Recognition predictors": [[4, "recognition-predictors"]], "End-to-End OCR": [[4, "end-to-end-ocr"]], "end-to-end model zoo": [[4, "id4"]], "Two-stage approaches": [[4, "two-stage-approaches"]], "Model export": [[4, "model-export"]], "doctr.utils": [[5, "doctr-utils"]], "Visualization": [[5, "visualization"]], "Task evaluation": [[5, "task-evaluation"]]}, "indexentries": {"funsd (class in doctr.datasets)": [[0, "doctr.datasets.FUNSD"]], "visiondataset (class in doctr.datasets.core)": [[0, "doctr.datasets.core.VisionDataset"]], "artefact (class in doctr.documents)": [[1, "doctr.documents.Artefact"]], "block (class in doctr.documents)": [[1, "doctr.documents.Block"]], "document (class in doctr.documents)": [[1, "doctr.documents.Document"]], "line (class in doctr.documents)": [[1, "doctr.documents.Line"]], "page (class in doctr.documents)": [[1, "doctr.documents.Page"]], "word (class in doctr.documents)": [[1, "doctr.documents.Word"]], "read_img() (in module doctr.documents)": [[1, "doctr.documents.read_img"]], "read_pdf() (in module doctr.documents)": [[1, "doctr.documents.read_pdf"]], "doctr": [[2, "module-doctr"]], "module": [[2, "module-doctr"]], "convert_to_fp16() (in module doctr.models.export)": [[4, "doctr.models.export.convert_to_fp16"]], "convert_to_tflite() (in module doctr.models.export)": [[4, "doctr.models.export.convert_to_tflite"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[4, "doctr.models.recognition.crnn_vgg16_bn"]], "crnn_vgg16_bn_predictor() (in module doctr.models.recognition)": [[4, "doctr.models.recognition.crnn_vgg16_bn_predictor"]], "db_resnet50() (in module doctr.models.detection)": [[4, "doctr.models.detection.db_resnet50"]], "db_resnet50_predictor() (in module doctr.models.detection)": [[4, "doctr.models.detection.db_resnet50_predictor"]], "ocr_db_crnn_vgg() (in module doctr.models.zoo)": [[4, "doctr.models.zoo.ocr_db_crnn_vgg"]], "ocr_db_sar_resnet() (in module doctr.models.zoo)": [[4, "doctr.models.zoo.ocr_db_sar_resnet"]], "ocr_db_sar_vgg() (in module doctr.models.zoo)": [[4, "doctr.models.zoo.ocr_db_sar_vgg"]], "quantize_model() (in module doctr.models.export)": [[4, "doctr.models.export.quantize_model"]], "sar_resnet31() (in module doctr.models.recognition)": [[4, "doctr.models.recognition.sar_resnet31"]], "sar_resnet31_predictor() (in module doctr.models.recognition)": [[4, "doctr.models.recognition.sar_resnet31_predictor"]], "sar_vgg16_bn() (in module doctr.models.recognition)": [[4, "doctr.models.recognition.sar_vgg16_bn"]], "sar_vgg16_bn_predictor() (in module doctr.models.recognition)": [[4, "doctr.models.recognition.sar_vgg16_bn_predictor"]], "exactmatch (class in doctr.utils.metrics)": [[5, "doctr.utils.metrics.ExactMatch"]], "localizationconfusion (class in doctr.utils.metrics)": [[5, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[5, "doctr.utils.metrics.OCRMetric"]], "visualize_page() (in module doctr.utils.visualization)": [[5, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file diff --git a/v0.1.1/using_doctr/using_models.html b/v0.1.1/using_doctr/using_models.html index b2e6a5d739..b471cdc1f1 100644 --- a/v0.1.1/using_doctr/using_models.html +++ b/v0.1.1/using_doctr/using_models.html @@ -836,6 +836,17 @@

Two-stage approachesmodel = ocr_predictor('linknet_resnet18', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True)

+

To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying DocumentBuilder:

+
    +
  • resolve_lines: whether words should be automatically grouped into lines (default: True)

  • +
  • resolve_blocks: whether lines should be automatically grouped into blocks (default: True)

  • +
  • paragraph_break: relative length of the minimum space separating paragraphs (default: 0.035)

  • +
+

For example to disable the automatic grouping of lines into blocks:

+
from doctr.model import ocr_predictor
+model = ocr_predictor(pretrained=True, resolve_blocks=False)
+
+

What should I do with the output?#

@@ -859,6 +870,14 @@

What should I do with the output?) +

To get only the text content of the Document, you can use the render method:

+
text_output = result.render()
+
+
+

For reference, here is the output for the Document above:

+
No. RECEIPT DATE
+
+

You can also export them as a nested dict, more appropriate for JSON format:

json_output = result.export()
 
diff --git a/v0.2.0/_sources/using_doctr/using_models.rst.txt b/v0.2.0/_sources/using_doctr/using_models.rst.txt index 208e0956bb..27c087096a 100644 --- a/v0.2.0/_sources/using_doctr/using_models.rst.txt +++ b/v0.2.0/_sources/using_doctr/using_models.rst.txt @@ -279,6 +279,19 @@ For instance, this snippet instantiates an end-to-end ocr_predictor working with from doctr.model import ocr_predictor model = ocr_predictor('linknet_resnet18', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True) +To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying `DocumentBuilder`: + +* `resolve_lines`: whether words should be automatically grouped into lines (default: True) +* `resolve_blocks`: whether lines should be automatically grouped into blocks (default: True) +* `paragraph_break`: relative length of the minimum space separating paragraphs (default: 0.035) + +For example to disable the automatic grouping of lines into blocks: + +.. code:: python3 + + from doctr.model import ocr_predictor + model = ocr_predictor(pretrained=True, resolve_blocks=False) + What should I do with the output? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -304,6 +317,14 @@ Here is a typical `Document` layout:: )] ) +To get only the text content of the `Document`, you can use the `render` method:: + + text_output = result.render() + +For reference, here is the output for the `Document` above:: + + No. RECEIPT DATE + You can also export them as a nested dict, more appropriate for JSON format:: json_output = result.export() diff --git a/v0.2.0/searchindex.js b/v0.2.0/searchindex.js index c6e5948291..1b88c853ef 100644 --- a/v0.2.0/searchindex.js +++ b/v0.2.0/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["datasets", "documents", "index", "installing", "models", "transforms", "utils"], "filenames": ["datasets.rst", "documents.rst", "index.rst", "installing.rst", "models.rst", "transforms.rst", "utils.rst"], "titles": ["doctr.datasets", "doctr.documents", "DocTR: Document Text Recognition", "Installation", "doctr.models", "doctr.transforms", "doctr.utils"], "terms": {"releas": 3, "note": [], "we": [1, 2, 4, 5], "member": [], "leader": [], "make": [4, 6], "particip": [], "commun": [], "harass": [], "free": [], "experi": 4, "everyon": [], "regardless": [], "ag": [], "bodi": [], "size": [0, 1, 4, 5], "visibl": [], "invis": [], "disabl": [], "ethnic": [], "sex": [], "characterist": [], "gender": [], "ident": [], "express": 5, "level": [4, 6], "educ": [], "socio": [], "econom": [], "statu": [], "nation": [], "person": [], "appear": [], "race": [], "religion": [], "sexual": [], "orient": 1, "act": [], "interact": 6, "wai": [0, 2, 4], "contribut": [], "an": [0, 1, 2, 4, 6], "open": [], "welcom": [], "divers": [], "inclus": [], "healthi": [], "exampl": [0, 1, 4, 5, 6], "behavior": [], "posit": 6, "environ": [], "includ": 4, "demonstr": [], "empathi": [], "kind": [], "toward": [], "other": [], "peopl": [], "Being": [], "respect": [], "differ": [], "opinion": [], "viewpoint": [], "give": [], "gracefulli": [], "accept": [], "construct": [], "feedback": [], "apolog": [], "those": [1, 4], "affect": [], "mistak": [], "learn": 4, "from": [0, 1, 2, 4, 5, 6], "focus": [], "what": [], "i": [0, 1, 2, 4, 5, 6], "best": [], "just": 4, "u": [], "individu": [], "overal": [], "unaccept": [], "The": [0, 1, 4, 6], "us": [0, 3, 6], "languag": [1, 2], "imageri": [], "attent": [], "advanc": [], "ani": [0, 1, 2, 4, 6], "troll": [], "insult": [], "derogatori": [], "comment": [], "polit": [], "attack": [], "public": 2, "privat": [], "publish": [], "inform": [0, 2, 4], "physic": 1, "email": [], "address": 1, "without": 4, "explicit": [], "permiss": [], "which": 4, "could": [], "reason": [], "consid": [1, 6], "inappropri": [], "profession": [], "set": [0, 4, 6], "ar": [0, 1, 4, 5, 6], "clarifi": [], "take": [], "appropri": [], "fair": [], "action": [], "thei": [], "deem": [], "threaten": [], "offens": [], "harm": [], "have": [0, 4, 6], "right": [4, 6], "remov": [], "edit": [], "reject": [], "commit": [], "wiki": [], "issu": [], "align": 1, "thi": [2, 3, 4, 6], "moder": [], "decis": [], "when": 6, "appli": [0, 5], "within": [], "all": [0, 1, 2, 4, 5, 6], "space": [], "also": [], "offici": [], "repres": [1, 4], "e": [1, 3], "mail": [], "post": [], "via": 2, "social": [], "media": [], "account": [], "appoint": [], "onlin": [], "offlin": [], "event": [], "instanc": 4, "abus": [], "otherwis": 6, "mai": [], "report": [], "contact": [], "minde": 3, "com": [1, 3], "complaint": [], "review": [], "investig": [], "promptli": [], "fairli": [], "oblig": [], "privaci": [], "secur": [], "incid": [], "follow": [4, 5, 6], "impact": [], "determin": [], "consequ": [], "violat": [], "unprofession": [], "unwelcom": [], "A": [0, 1, 2, 4], "written": 1, "provid": [2, 4], "clariti": [], "around": 4, "natur": 2, "explan": 4, "why": [], "wa": [], "apologi": [], "request": [], "through": [0, 5], "singl": [], "seri": [], "continu": [], "No": [], "involv": 4, "unsolicit": [], "specifi": 1, "period": [], "time": [0, 2, 4, 6], "avoid": [], "well": [], "extern": [], "channel": [1, 4, 5], "like": [], "term": [], "lead": [], "seriou": [], "sustain": [], "sort": [], "allow": [], "dure": [], "pattern": [], "aggress": [], "disparag": [], "class": [0, 1, 5, 6], "adapt": [], "version": 4, "0": [0, 4, 5, 6], "avail": [4, 5], "http": [1, 3], "www": 1, "org": [], "_": [0, 4], "html": [], "were": 1, "inspir": 5, "mozilla": [], "": [1, 6], "ladder": [], "For": 4, "answer": [], "common": [5, 6], "question": [], "about": 4, "see": [], "faq": [], "translat": [], "everyth": [], "you": 4, "need": 6, "know": [], "effici": [0, 2, 4], "project": [], "packag": [2, 6], "python": 2, "doc": [1, 4], "librari": 3, "build": [], "script": [], "refer": [], "train": [0, 4, 5], "demo": [], "small": 2, "app": [], "showcas": [], "capabl": 4, "api": [], "minim": [], "templat": 1, "deploi": [], "rest": [5, 6], "ensur": [], "proper": [], "mainten": [], "github": 3, "worklow": [], "run": [], "job": [], "coverag": [], "codecov": [], "back": [], "result": 1, "As": [], "contributor": [], "onli": [5, 6], "your": [0, 1, 4, 6], "ad": 5, "whether": [0, 1, 6], "encount": [], "problem": [], "suggest": [], "input": [1, 4, 5], "ha": [0, 6], "valu": [1, 5], "can": [0, 4], "purpos": 4, "advis": [], "first": [], "check": [], "topic": [], "wasn": [], "t": 0, "alreadi": [], "cover": [], "close": [], "If": [1, 4], "feel": [], "new": [], "one": [0, 4, 5], "do": [], "so": [], "whenev": [], "possibl": 6, "enough": 4, "jump": [], "wonder": [], "how": [], "someth": [], "more": [], "gener": [], "should": [0, 1, 6], "out": [4, 5, 6], "discuss": [], "q": [], "forum": [], "specif": [0, 6], "stackoverflow": [], "addit": [], "depend": 2, "command": [], "m": 6, "pip": 3, "upgrad": [], "dev": [], "pre": [], "docstr": [], "In": 4, "pleas": [], "googl": [], "eas": [], "process": [1, 2], "later": [], "messag": [], "udac": [], "guid": [], "order": [0, 1, 4], "same": [1, 6], "ci": [], "workflow": [], "unittest": [], "local": [2, 4, 6], "To": [], "togeth": [1, 4], "current": [], "built": [], "sphinx": [], "thank": [], "our": [2, 4], "file": 0, "been": [4, 6], "rebuilt": [], "want": [], "forc": [], "complet": [], "rebuild": [], "delet": [], "_build": [], "directori": [], "addition": 4, "clear": [], "web": 1, "browser": [], "cach": [], "modif": [], "now": [], "locat": [], "index": 1, "wish": [], "somewher": [], "els": [], "than": 6, "join": [], "slack": [], "where": [1, 6], "find": [], "requir": [3, 5], "3": [1, 2, 3, 4, 5, 6], "8": [4, 5], "higher": [], "whichev": [], "o": [], "least": [], "tensorflow": [2, 4, 5], "pytorch": [], "correspond": 4, "page": [4, 6], "2": [2, 4, 5, 6], "macbook": [], "m1": [], "chip": [], "some": [], "metal": [], "plugin": [], "1": [0, 4, 5, 6], "12": 4, "anoth": [0, 4], "linux": [], "few": [], "extra": [], "maco": [], "user": [1, 2], "them": 0, "homebrew": [], "brew": [], "cairo": [], "pango": [], "gdk": [], "pixbuf": [], "libffi": [], "window": 6, "gtk": [], "latest": [], "over": 6, "here": [0, 5], "last": [0, 3, 4], "stabl": 3, "doctr": 3, "strive": [], "reduc": 5, "framework": 0, "minimum": 6, "necessari": [], "featur": [4, 6], "develop": [], "third": [], "parti": [], "miss": [], "tf": [4, 5], "torch": [], "mode": 3, "clone": 3, "state": 2, "art": 2, "optic": [2, 4], "charact": [0, 1, 2, 4, 6], "made": 2, "seamless": 2, "access": [0, 1, 2], "anyon": 2, "power": 2, "easi": [2, 6], "extract": [0, 2], "valuabl": 2, "autom": 2, "seamlessli": [], "understand": [0, 2], "task": [0, 2, 4], "ocr": [0, 2, 6], "predictor": [], "pars": [0, 2], "textual": [0, 1, 2], "identifi": [2, 4], "each": [0, 1, 2, 4, 5], "word": [2, 4, 6], "research": 2, "quickli": 2, "compar": 2, "own": 2, "architectur": [2, 4], "speed": [2, 4], "perform": [1, 2, 4, 5, 6], "robust": 2, "stage": 2, "pretrain": [2, 4, 6], "paramet": [0, 1, 4, 5, 6], "friendli": 2, "line": [2, 6], "code": [1, 2], "load": [2, 4], "googlevis": 2, "aw": [2, 4], "textract": [2, 4], "optim": 2, "infer": 5, "both": [2, 4, 5], "cpu": [2, 4], "gpu": 2, "light": 2, "activ": [], "maintain": 2, "integr": [], "deploy": [], "dbnet": 4, "real": [2, 4, 5], "scene": [2, 4], "differenti": [2, 4], "binar": [2, 4], "linknet": [2, 4], "exploit": [2, 4], "encod": [0, 2, 4], "represent": [2, 4], "semant": [2, 4], "segment": [2, 4], "sar": [2, 4], "show": [2, 4, 6], "attend": [2, 4], "read": [2, 4], "simpl": [2, 4], "strong": [2, 4], "baselin": [2, 4], "irregular": [2, 4], "crnn": [2, 4], "end": [0, 2, 6], "trainabl": [2, 4], "neural": [2, 4], "network": [2, 4], "imag": [0, 1, 2, 4, 5, 6], "base": [2, 4], "sequenc": [0, 1, 2, 4, 6], "Its": [2, 4], "applic": [2, 4], "master": [], "multi": [], "aspect": 5, "non": [1, 5, 6], "vitstr": [], "vision": [], "transform": [0, 2], "fast": [0, 2], "parseq": [], "permut": [], "autoregress": [], "funsd": [0, 2, 4], "form": [0, 2], "noisi": [0, 2], "scan": [0, 2], "cord": [0, 2, 4], "consolid": [0, 2], "receipt": [0, 2], "forpost": [0, 2], "sroie": 0, "icdar": [], "2019": [], "iiit": [], "5k": [], "cvit": [], "street": [], "view": [], "synthtext": [], "visual": [], "geometri": 1, "group": [], "svhn": [], "digit": 0, "unsupervis": [], "ic03": [], "2003": [], "ic13": [], "2013": [], "imgur5k": [], "textstylebrush": [], "transfer": [], "aesthet": [], "mjsynth": [], "synthet": [], "data": [1, 2, 4, 5], "artifici": [], "iiithw": [], "wildreceipt": [], "spatial": 1, "dual": [], "modal": [], "graph": 1, "kei": [], "bool": [0, 1, 4, 5, 6], "true": [0, 1, 4, 5, 6], "use_polygon": [], "fals": [0, 4, 5, 6], "recognition_task": [], "kwarg": [0, 1, 4, 6], "sourc": [0, 1, 4, 5, 6], "document": [0, 4, 6], "import": [0, 1, 4, 5, 6], "train_set": 0, "download": 0, "img": [0, 5], "target": [0, 1, 4, 5], "subset": [0, 4], "polygon": [], "rotat": 1, "bound": [1, 4, 5, 6], "box": [1, 4, 6], "instead": 1, "straight": [], "ones": [], "recognit": 6, "keyword": [0, 1], "argument": [0, 1], "visiondataset": 0, "icdar2019": 0, "competit": 0, "iiit5k": [], "bmvc": [], "2012": [], "text": 1, "prior": [], "svt": [], "ucsd": [], "comput": [4, 6], "hous": [], "number": [0, 5, 6], "localis": [], "repositori": 2, "websit": [], "entri": [], "futur": [], "direct": [], "img_fold": [], "str": [0, 1, 4, 5, 6], "label_fold": [], "label": [], "part": 5, "challeng": [], "task2": [], "2015": [], "path": [1, 4], "challenge2_training_task12_imag": [], "challenge2_training_task1_gt": [], "test_set": [], "challenge2_test_task12_imag": [], "challenge2_test_task1_gt": [], "folder": 4, "annot": 1, "abstractdataset": [], "label_path": [], "handwrit": [], "dataset_info": [], "imgur5k_annot": [], "json": [], "pure": [], "mnt": [], "ramdisk": [], "max": 6, "90kdict32px": [], "imlist": [], "txt": [], "hw": [], "images_90k_norm": [], "90k": [], "docartefact": [], "object": 0, "detect": [], "element": [0, 1, 4], "varieti": [], "arxiv": [], "ab": [], "2103": [], "14470v1": [], "test": [], "charactergener": [], "implement": [0, 1, 4, 5, 6], "d": [], "abdef": [], "num_sampl": [], "100": [4, 5, 6], "vocabulari": [], "sampl": 0, "iter": 0, "cache_sampl": [], "firsthand": [], "font_famili": [], "font": [], "img_transform": [], "compos": [0, 2, 4], "sample_transform": 0, "wordgener": [], "min_char": [], "int": [0, 1, 4, 5, 6], "max_char": [], "list": [0, 1, 5], "none": [0, 1], "callabl": [0, 5], "tupl": [1, 4, 5], "32": [0, 4, 5], "maximum": [0, 6], "detectiondataset": [], "recognitiondataset": [], "labels_path": [], "contain": [], "ocrdataset": 0, "label_fil": [], "jpg": 1, "root": [], "shuffl": 0, "batch_siz": 0, "drop_last": 0, "num_work": [], "collate_fn": [], "wrapper": [0, 5], "train_load": 0, "train_it": 0, "next": 0, "befor": 0, "pass": [0, 4], "batch": [0, 4, 5], "drop": 0, "isn": 0, "full": [0, 4, 6], "worker": 0, "function": [4, 5, 6], "merg": [], "sinc": 0, "content": [0, 1], "properli": 0, "model": [0, 6], "interpret": [0, 1], "multipl": [0, 1, 5], "name": [0, 4], "10": [0, 6], "0123456789": 0, "hindi_digit": [], "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "ascii_lett": 0, "52": 0, "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 0, "punctuat": 0, "currenc": 0, "5": [0, 5, 6], "ancient_greek": [], "48": [], "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": [], "arabic_lett": [], "37": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": [], "persian_lett": [], "\u067e\u0686\u06a2\u06a4\u06af": [], "arabic_diacrit": [], "arabic_punctu": [], "latin": 0, "94": [], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 0, "english": [], "legacy_french": [], "123": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": 0, "french": [0, 4], "126": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": [], "portugues": [], "131": [], "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": [], "spanish": [], "116": [], "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": [], "italian": [], "120": [], "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": [], "german": [], "108": [], "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": [], "arab": [], "101": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": [], "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "czech": [], "130": [], "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": [], "polish": [], "118": [], "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": [], "dutch": [], "114": [], "norwegian": [], "106": [], "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": [], "danish": [], "finnish": [], "104": [], "\u00e4\u00f6\u00e4\u00f6": [], "swedish": [], "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": [], "vietnames": [], "234": [], "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": [], "hebrew": [], "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": [], "multilingu": [], "195": [], "encode_sequ": 0, "target_s": 0, "eo": 0, "pad": [0, 4, 5], "dynamic_seq_length": [], "ndarrai": [0, 1, 6], "given": [0, 4, 6], "map": [0, 4], "n": [0, 6], "length": 0, "Of": 0, "string": [0, 1, 4], "option": [], "start": [], "case": 6, "upper": 5, "enabl": 1, "dynam": [], "tensor": [0, 4, 5], "modul": [1, 4, 5, 6], "easili": [1, 2, 4, 6], "export": [1, 6], "analysi": [1, 4], "format": [1, 4], "organ": 1, "uninterrupt": 1, "confid": 1, "float": [1, 5, 6], "associ": 1, "predict": [1, 6], "xmin": 1, "ymin": 1, "xmax": 1, "ymax": 1, "coordin": 1, "rel": 1, "collect": 1, "meant": 1, "two": 1, "column": 1, "horizont": 1, "resolv": 1, "default": [1, 4], "smallest": 1, "enclos": 1, "g": 1, "qr": 1, "pictur": 1, "chart": 1, "signatur": 1, "logo": 1, "etc": 1, "artefact_typ": 1, "type": [1, 4], "sever": [1, 5], "its": [0, 1], "titl": 1, "underneath": 1, "page_idx": 1, "dimens": [1, 4, 6], "dict": [1, 6], "numpi": [1, 4, 6], "arrai": 6, "uint8": [4, 6], "raw": 1, "pixel": [1, 5], "height": 1, "width": 1, "dictionari": 1, "angl": 1, "degress": 1, "preserve_aspect_ratio": 5, "overlai": [], "displai": 6, "matplotlib": 6, "pyplot": 6, "method": 5, "high": 1, "convers": 1, "read_pdf": 1, "byte": [1, 4], "scale": 6, "rgb_mode": [], "password": [], "pdf": 1, "convert": [1, 4, 5], "render": [], "72dpi": [], "output": [1, 5], "rgb": [1, 5], "bgr": 1, "unlock": [], "encrypt": [], "pypdfium2": [], "pdfpage": [], "decod": 1, "shape": [1, 4, 5, 6], "h": 1, "x": [1, 5, 6], "w": [1, 6], "c": [], "read_img_as_numpi": [], "output_s": [1, 5], "rgb_output": 1, "expect": [0, 1, 4, 5], "read_img_as_tensor": [], "img_path": [], "dtype": 4, "float32": 4, "desir": [], "relat": [], "divid": [], "255": [4, 5, 6], "decode_img_as_tensor": [], "img_cont": [], "stream": 1, "read_html": 1, "url": [0, 1], "yoursit": 1, "weasyprint": [], "documentfil": 1, "extens": 1, "classmethod": 1, "from_pdf": 1, "binari": [1, 4], "from_url": 1, "from_imag": 1, "page1": 1, "png": 1, "page2": 1, "vgg16_bn_r": [], "vgg": 4, "16": 4, "describ": 4, "veri": 2, "deep": 4, "convolut": 2, "larg": [], "modifi": [], "normal": [4, 5], "rectangular": [], "pool": [], "simpler": [], "head": [], "input_tensor": 4, "random": [4, 5, 6], "uniform": [4, 5], "512": [], "maxval": [4, 5], "imagenet": [], "extractor": 4, "resnet18": [], "resnet": 4, "18": [], "residu": [], "boolean": [], "resnet34": [], "34": [], "resnet50": [], "50": 4, "resnet31": [], "downsiz": [], "4": [], "mobilenet_v3_smal": [], "mobilenetv3": [], "search": [], "kera": 4, "mobilenet_v3_larg": [], "mobilenet_v3_small_r": [], "mobilenet_v3_large_r": [], "mobilenet_v3_small_orient": [], "magc_resnet31": [], "global": [], "context": [], "224": [4, 5], "vit_": [], "visiontransform": [], "worth": [], "16x16": [], "patch": [], "unoffici": [], "config": [], "vit_b": [], "b": 6, "textnet_tini": [], "textnet": [], "faster": [], "arbitrarili": [], "detector": [], "minimalist": [], "kernel": [], "czczup": [], "tini": [], "textnet_smal": [], "textnet_bas": [], "crop_orientation_predictor": [], "arch": 4, "croporientationpredictor": [], "np": [4, 6], "classif_mobilenet_v3_smal": [], "input_crop": [], "rand": [4, 6], "600": [4, 6], "800": [4, 6], "astyp": [4, 6], "crop": 4, "dataset": 4, "linknet_resnet18": [], "1024": 4, "linknet_resnet34": [], "linknet_resnet50": [], "db_resnet50": 4, "backbon": 4, "db_mobilenet_v3_larg": [], "mobilenet": [], "v3": [], "detection_predictor": 4, "assume_straight_pag": [], "detectionpredictor": 4, "input_pag": [4, 6], "itself": [], "fit": [], "crnn_vgg16_bn": 4, "128": 4, "crnn_mobilenet_v3_smal": [], "crnn_mobilenet_v3_larg": [], "sar_resnet31": 4, "31": 4, "64": [4, 5], "256": 4, "paper": [], "1910": [], "02562": [], "keywoard": [], "vitstr_smal": [], "vitstr_bas": [], "recognition_predictor": 4, "recognitionpredictor": 4, "ocr_predictor": 4, "det_arch": 4, "reco_arch": 4, "pretrained_backbon": [], "symmetric_pad": [], "export_as_straight_box": [], "detect_orient": [], "straighten_pag": [], "detect_languag": [], "ocrpredictor": 4, "up": 4, "assum": [], "preserv": 5, "ratio": 5, "symmetr": [], "bottom": [], "final": [], "potenti": 4, "estim": [], "slightli": [], "deterior": [], "latenc": [], "median": [], "Then": 4, "again": [], "improv": [], "kie_predictor": [], "kiepredictor": [], "kie": [], "login_to_hub": [], "login": [], "huggingfac": [], "hub": [], "from_hub": [], "repo_id": [], "instanti": 4, "hf": [], "fasterrcnn_mobilenet_v3_large_fpn": [], "repo": [], "hf_hub_download": [], "snapshot_download": [], "checkpoint": [], "push_to_hf_hub": [], "model_nam": [], "save": [0, 4], "configur": [], "my": [], "procedur": 5, "draw": 5, "design": 5, "torchvis": 5, "resiz": [4, 5], "bilinear": [4, 5], "transfo": 5, "minval": 5, "interpol": [4, 5], "zero": [4, 5], "while": [], "done": [], "mean": [5, 6], "std": 5, "gaussian": 5, "distribut": 5, "485": 5, "456": 5, "406": 5, "229": 5, "225": 5, "averag": [4, 5], "per": [4, 5], "standard": 5, "deviat": 5, "lambdatransform": 5, "fn": 5, "lambda": 5, "tograi": 5, "num_output_channel": [], "grayscal": 5, "colorinvers": 5, "min_val": 5, "tranform": 5, "color": 5, "shift": 5, "randomli": 5, "invert": 5, "6": [3, 4, 5], "rang": 5, "randombright": 5, "max_delta": 5, "adjust": 5, "bright": 5, "delta": 5, "offset": 5, "add": 5, "pick": 5, "p": 5, "probabl": 5, "randomcontrast": 5, "contrast": 5, "contrast_factor": 5, "factor": 5, "randomsatur": 5, "satur": 5, "hsv": 5, "increas": 5, "randomhu": 5, "hue": 5, "randomgamma": 5, "min_gamma": 5, "max_gamma": 5, "min_gain": 5, "max_gain": 5, "gamma": 5, "correct": 5, "neg": 5, "lower": 5, "param": [4, 5], "constant": 5, "multipli": 5, "randomjpegqu": 5, "min_qual": 5, "60": 5, "max_qual": 5, "jpeg": 5, "qualiti": 5, "dimension": 5, "between": [5, 6], "randomrot": [], "max_angl": [], "expand": [], "degre": [], "uniformli": [], "randomcrop": [], "08": [], "75": [], "33": [], "min_area": [], "max_area": [], "min_ratio": [], "max_ratio": [], "gaussianblur": [], "kernel_shap": [], "blur": [], "min": [], "channelshuffl": [], "gaussiannois": [], "nois": [], "randomhorizontalflip": [], "flip": [], "int64": [], "randomshadow": [], "opacity_rang": [], "shade": [], "opac": [], "It": 5, "consecut": [4, 5], "sequenti": [4, 5], "oneof": 5, "jpegqual": 5, "randomappli": 5, "regroup": 6, "core": [0, 6], "complementari": 6, "sens": 6, "visualize_pag": 6, "words_onli": 6, "display_artefact": [], "add_label": 6, "figur": 6, "block": [4, 6], "plt": 6, "ocr_db_crnn": 6, "artefact": [], "figsiz": 6, "largest": 6, "side": 6, "plot": [], "static": [], "top": [], "synthesize_pag": [], "draw_proba": [], "respons": [], "blank": [], "blue": [], "red": [], "font_siz": [], "13": [], "famili": [], "synthes": [], "metric": [4, 6], "assess": 6, "textmatch": [], "match": [2, 6], "accuraci": 6, "aggreg": [0, 6], "foral": 6, "y": 6, "mathcal": 6, "frac": 6, "sum": 6, "limits_": 6, "f_": 6, "y_i": 6, "x_i": 6, "indic": 6, "defin": 6, "f_a": 6, "left": 6, "begin": 6, "ll": 6, "mbox": 6, "strictli": 6, "integ": 6, "updat": 6, "hello": 6, "world": 6, "summari": 6, "gt": [], "pred": [], "groung": [], "truth": 6, "exact": 6, "score": [], "counterpart": [], "unidecod": [], "localizationconfus": 6, "iou_thresh": 6, "mask_shap": [], "use_broadcast": [], "confus": 6, "iou": 6, "recal": [4, 6], "g_": 6, "precis": [4, 6], "meaniou": 6, "j": 6, "y_j": 6, "being": [4, 6], "intersect": 6, "union": 6, "g_x": 6, "assign": 6, "_i": 6, "geq": 6, "ground": 6, "asarrai": 6, "70": 6, "110": 6, "95": 6, "200": 6, "150": 6, "pair": 6, "broadcast": [], "consum": [], "memori": [], "either": 4, "ocrmetr": 6, "l": 6, "hat": 6, "h_": 6, "b_j": 6, "l_j": 6, "gt_box": [], "pred_box": [], "gt_label": [], "pred_label": [], "comparison": [], "detectionmetr": [], "c_j": [], "compil": [], "better": [], "leverag": [], "descript": [], "colab": [], "quicktour": [], "present": 0, "main": [], "produc": 4, "searchabl": [], "don": [], "meet": [], "detail": [], "link": [], "section": [], "det_model": [], "load_weight": [], "path_to_checkpoint": [], "weight": [], "reco_model": [], "det_param": [], "path_to_pt": [], "map_loc": [], "load_state_dict": [], "reco_param": [], "vocab": 4, "class_nam": [], "total": [], "date": [], "preprocessor": 4, "det_predictor": [], "798": [], "785": [], "772": [], "264": [], "2749": [], "287": [], "reco_predictor": [], "694": [], "695": [], "693": [], "299": [], "296": [], "301": [], "polici": [], "restrict": [], "write": [], "outsid": [], "tmp": [], "work": [], "step": [], "usag": 4, "multiprocess": [], "doctr_multiprocessing_dis": [], "variabl": [], "becaus": [], "shm": [], "share": [], "chang": [], "By": [], "doctr_cache_dir": [], "focu": [], "love": [], "appreci": [], "interfac": [], "io": [], "custom": [], "felix92": [], "db": 2, "vgg16": 4, "bn": [], "plug": [], "obj_detect": [], "exist": [], "overwritten": [], "prerequisit": [], "creat": [], "co": [], "instal": 2, "git": 2, "lf": [], "my_awesome_model": [], "v1": [], "directli": 4, "after": [], "python3": [], "train_tensorflow": [], "py": [], "train_pytorch": [], "tabl": [], "pull": [], "dummi": [], "tilman": [], "rassi": [], "fascan": [], "evalu": [0, 4], "predefin": 0, "prefer": 0, "signific": 0, "valid": [], "149": [], "626": [], "360": [], "2000": [], "3000": [], "249": [], "33402": [], "13068": [], "772875": [], "85875": [], "246": [], "233": [], "resourc": [], "7149": [], "796": [], "handwritten": [], "1268": [], "472": [], "21888": [], "8707": [], "33608": [], "19342": [], "uppercas": [], "19370": [], "2186": [], "257": [], "647": [], "73257": [], "26032": [], "7100000": [], "707470": [], "1156": [], "1107": [], "849": [], "1095": [], "207901": [], "22672": [], "7581382": [], "1337891": [], "7141797": [], "793533": [], "49377": [], "19598": [], "alwai": [], "regular": [], "2700": [], "300": [], "background": [], "qr_code": [], "bar_cod": [], "photo": [], "classif": 4, "mani": [], "sensit": [], "abl": [], "howev": [], "guidanc": [], "tool": [], "further": [], "anot": [], "handl": 0, "underli": 0, "defer": 0, "dataload": 0, "good": [], "achiev": [], "might": 4, "tune": 2, "thing": [], "product": [], "readi": [], "help": [], "support": 4, "devic": [], "fp16": 4, "point": [], "occupi": [], "bit": [], "advantag": [], "less": [], "mixed_precis": [], "set_global_polici": [], "mixed_float16": [], "cuda": [], "re": 0, "exchang": [], "interoper": [], "machin": [], "structur": 4, "layer": [], "metadata": [], "util": [2, 4], "export_model_to_onnx": [], "input_shap": 4, "dummy_input": [], "tensorspec": [], "model_path": [], "come": [], "soon": [], "seen": 4, "onc": [0, 4], "separ": 4, "compon": 4, "charg": 4, "usabl": 4, "backend": 4, "along": [], "processor": [], "reusabl": 4, "consist": [], "delimit": [], "2d": [], "corner": [], "flag": [], "belong": [], "skew": [], "comprehens": [], "benchmark": [], "publicli": [], "sec": [], "25": [], "84": [], "39": [], "85": 4, "86": [], "93": [], "83": 4, "24": [], "80": [], "29": [], "90": [], "67": [], "76": [], "11": [], "81": [], "71": [], "7": 4, "21": [], "82": [], "20": [], "49": 4, "87": [], "63": [], "17": [], "28": [], "51": [], "46": [], "db_resnet34": [], "22": [], "89": [], "74": [], "56": [], "68": [], "92": [], "61": [], "41": [], "00": [], "79": [], "38": [], "88": [], "62": [], "26": [], "06": [], "78": [], "47": [], "54": [], "abov": 4, "cf": 4, "disclaim": 4, "combin": 4, "199": 4, "second": 4, "warmup": [], "phase": [], "measur": 4, "1000": 4, "obtain": [], "11th": [], "gen": [], "intel": [], "r": [], "tm": [], "i7": [], "11800h": [], "30ghz": [], "wrap": [], "useabl": [], "favorit": [], "dummy_img": [], "area": [], "send": [], "snippet": [], "transcrib": [], "partial": [], "15": [], "9": [], "73": [], "44": [], "14": [], "55": [], "58": [], "57": [], "66": [], "01": [], "98": [], "23": [], "69": [], "99": [], "91": [], "05": [], "09": [], "96": 0, "40": [], "53": [], "most": 4, "print": [], "cfg": [], "30595": 4, "45": [], "72": [], "43": [], "65": [], "77": [], "30": [], "07": [], "27": 4, "gvision": 4, "59": [], "03": [], "azur": [], "recogn": [], "42": [], "go": [], "mention": [], "still": [], "return": [0, 1, 4], "nest": [], "get": 1, "typic": [], "layout": [], "340": [], "json_output": [], "1357421875": [], "0361328125": [], "8564453125": [], "8603515625": [], "914085328578949": [], "5478515625": [], "06640625": [], "5810546875": [], "0966796875": [], "9949972033500671": [], "51171875": [], "1630859375": [], "9578408598899841": [], "1396484375": [], "3232421875": [], "185546875": [], "3515625": [], "outpout": [], "xml": [], "hocr": [], "export_as_xml": [], "xml_output": [], "xml_bytes_str": [], "xml_element": [], "utf": [], "xmln": [], "w3": [], "1999": [], "xhtml": [], "lang": [], "en": [], "meta": [], "equiv": [], "charset": [], "system": [], "ocr_pag": [], "ocr_carea": [], "ocr_par": [], "ocr_lin": [], "ocrx_word": [], "div": [], "id": [], "page_1": [], "bbox": [], "3456": [], "ppageno": [], "block_1_1": [], "857": [], "529": [], "2504": [], "2710": [], "par_1_1": [], "span": [], "line_1_1": [], "x_size": [], "x_descend": [], "x_ascend": [], "word_1_1": [], "1552": [], "540": [], "1778": [], "580": [], "x_wconf": [], "word_1_2": [], "1782": [], "1900": [], "583": [], "word_1_3": [], "1420": [], "597": [], "1684": [], "641": [], "threshold": [], "region": [], "accur": [], "postprocessor": 4, "bin_thresh": [], "box_thresh": [], "hook": [], "manipul": [], "customhook": [], "def": [], "__call__": [], "self": [], "loc_pr": [], "Be": [], "awar": [], "my_hook": [], "middl": [], "pipelin": [], "add_hook": [], "execut": [], "file_path": [], "read_img": 1, "seemlessli": 2, "conda": [], "newer": 3, "developp": 3, "fp": 4, "scheme": 4, "deform": 4, "statist": 4, "turn": 4, "easier": 4, "let": 4, "db_resnet50_predictor": [], "sar_vgg16_bn": 4, "rnn": [], "enhanc": [], "symbol": 4, "crnn_vgg16_bn_predictor": [], "sar_vgg16_bn_predictor": [], "16bn": [], "convert_to_tflit": 4, "tf_model": 4, "tflite": 4, "conv_sequ": 4, "relu": 4, "kernel_s": 4, "serialized_model": 4, "convert_to_fp16": 4, "half": 4, "serial": 4, "quantize_model": 4, "quantiz": 4, "exclud": 4, "inherit": [0, 4], "abstract": 0, "verifi": 0, "file_nam": 0, "file_hash": 0, "extract_arch": 0, "overwrit": 0, "sha256": 0, "archiv": 0, "disk": 0, "775": [], "856": [], "860": 4, "862": 4, "863": 4, "sar_resnet31_predictor": [], "ocr_db_crnn_vgg": [], "652": [], "721": [], "ocr_db_sar_vgg": [], "653": [], "ocr_db_sar_resnet": [], "665": [], "735": [], "595": 4, "625": 4, "781": 4, "830": 4, "exactmatch": 6, "ignore_cas": 6, "ignore_acc": 6, "ignor": 6, "letter": 6, "accent": 6, "error": 6, "max_dist": 6, "levenshtein": 6, "distanc": 6, "autoclass": 0, "loader": 0, "154": 0, "as_imag": 1, "convert_page_to_numpi": 1, "get_word": 1, "fitz": 1, "gettextword": 1, "get_artefact": 1, "entir": 1, "fulli": 2, "daili": 2, "mix": 2, "fine": 2, "scratch": 2, "special": 2, "recurr": 2, "733": 4, "817": 4, "745": 4, "875": 4, "frame": 4, "feed": 4, "warm": 4, "c5": 4, "x12larg": 4, "xeon": 4, "platinum": 4, "8275l": 4, "913": 4, "917": 4, "921": 4, "crnn_resnet31": 4, "629": 4, "701": 4, "664": 4, "780": 4, "630": 4, "702": 4, "666": 4, "783": 4, "640": 4, "713": 4, "672": 4, "789": 4, "na": 4, "753": 4, "700": 4, "533": 4, "689": 4, "611": 4, "660": 4, "db_sar_vgg": 4, "db_sar_resnet": 4, "db_crnn_vgg": 4, "db_crnn_resnet": 4, "properti": 4, "input_t": 4, "saved_model": 4, "And": 4, "nestedobject": 5}, "objects": {"": [[2, 0, 0, "-", "doctr"]], "doctr.datasets": [[0, 1, 1, "", "CORD"], [0, 1, 1, "", "FUNSD"], [0, 1, 1, "", "SROIE"], [0, 2, 1, "", "encode_sequences"]], "doctr.datasets.core": [[0, 1, 1, "", "VisionDataset"]], "doctr.datasets.loader": [[0, 1, 1, "", "DataLoader"]], "doctr.documents": [[1, 1, 1, "", "Artefact"], [1, 1, 1, "", "Block"], [1, 1, 1, "", "Document"], [1, 1, 1, "", "DocumentFile"], [1, 1, 1, "", "Line"], [1, 1, 1, "", "PDF"], [1, 1, 1, "", "Page"], [1, 1, 1, "", "Word"], [1, 2, 1, "", "read_html"], [1, 2, 1, "", "read_img"], [1, 2, 1, "", "read_pdf"]], "doctr.documents.DocumentFile": [[1, 3, 1, "", "from_images"], [1, 3, 1, "", "from_pdf"], [1, 3, 1, "", "from_url"]], "doctr.documents.PDF": [[1, 3, 1, "", "as_images"], [1, 3, 1, "", "get_artefacts"], [1, 3, 1, "", "get_words"]], "doctr.models.detection": [[4, 2, 1, "", "db_resnet50"], [4, 2, 1, "", "detection_predictor"], [4, 2, 1, "", "linknet"]], "doctr.models.export": [[4, 2, 1, "", "convert_to_fp16"], [4, 2, 1, "", "convert_to_tflite"], [4, 2, 1, "", "quantize_model"]], "doctr.models.recognition": [[4, 2, 1, "", "crnn_vgg16_bn"], [4, 2, 1, "", "recognition_predictor"], [4, 2, 1, "", "sar_resnet31"], [4, 2, 1, "", "sar_vgg16_bn"]], "doctr.models.zoo": [[4, 2, 1, "", "ocr_predictor"]], "doctr.transforms": [[5, 1, 1, "", "ColorInversion"], [5, 1, 1, "", "Compose"], [5, 1, 1, "", "LambdaTransformation"], [5, 1, 1, "", "Normalize"], [5, 1, 1, "", "OneOf"], [5, 1, 1, "", "RandomApply"], [5, 1, 1, "", "RandomBrightness"], [5, 1, 1, "", "RandomContrast"], [5, 1, 1, "", "RandomGamma"], [5, 1, 1, "", "RandomHue"], [5, 1, 1, "", "RandomJpegQuality"], [5, 1, 1, "", "RandomSaturation"], [5, 1, 1, "", "Resize"], [5, 1, 1, "", "ToGray"]], "doctr.utils.metrics": [[6, 1, 1, "", "ExactMatch"], [6, 1, 1, "", "LocalizationConfusion"], [6, 1, 1, "", "OCRMetric"]], "doctr.utils.visualization": [[6, 2, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:function", "3": "py:method"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "function", "Python function"], "3": ["py", "method", "Python method"]}, "titleterms": {"changelog": [], "v0": [], "7": [], "0": [], "2024": [], "09": [], "6": [], "2022": [], "29": [], "5": [], "1": [], "03": [], "22": [], "2021": [], "12": [], "31": [], "4": [], "11": [], "10": [], "01": [], "3": [], "08": [], "27": [], "07": [], "02": [], "2": [], "05": [], "28": [], "18": [], "contributor": [], "coven": [], "code": [], "conduct": [], "our": [], "pledg": [], "standard": [], "enforc": [], "respons": [], "scope": [], "guidelin": [], "correct": [], "warn": [], "temporari": [], "ban": [], "perman": [], "attribut": [], "contribut": [], "doctr": [0, 1, 2, 4, 5, 6], "codebas": [], "structur": 1, "continu": [], "integr": 2, "feedback": [], "featur": 2, "request": [], "bug": [], "report": [], "question": [], "develop": [], "mode": [], "instal": 3, "commit": [], "unit": [], "test": [], "qualiti": [], "style": [], "verif": [], "modifi": [], "document": [1, 2], "let": [], "": [], "connect": [], "prerequisit": [], "via": 3, "python": 3, "packag": 3, "git": 3, "text": [2, 4], "recognit": [2, 4], "main": [], "model": [2, 4], "zoo": 4, "detect": [2, 4], "support": [0, 5], "dataset": [0, 2], "arg": [], "synthet": [], "gener": [], "custom": [], "loader": [], "dataload": [], "vocab": 0, "return": [], "io": [], "word": 1, "line": 1, "artefact": 1, "block": 1, "page": 1, "file": 1, "read": 1, "classif": [], "factori": [], "transform": 5, "compos": 5, "util": 6, "visual": 6, "task": 6, "evalu": 6, "notebook": [], "train": 2, "your": 2, "own": [], "load": 0, "aw": [], "lambda": [], "share": [], "commun": [], "from": [], "huggingfac": [], "hub": [], "push": [], "pretrain": [], "name": [], "convent": [], "choos": [], "readi": [], "us": 4, "avail": 0, "object": [], "data": 0, "prepar": [], "infer": [], "optim": [], "half": [], "precis": [], "export": 4, "onnx": [], "right": [], "architectur": [], "predictor": [2, 4], "end": 4, "ocr": 4, "two": 4, "stage": 4, "approach": 4, "what": [], "should": [], "i": [], "do": [], "output": 4, "advanc": [], "option": [], "get": 2, "start": 2, "conda": [], "pre": 4, "process": 4, "post": 4, "build": 2, "implement": 2, "content": 2, "compress": 4, "savedmodel": 4}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"doctr.datasets": [[0, "doctr-datasets"]], "Available Datasets": [[0, "available-datasets"]], "Data Loading": [[0, "data-loading"]], "Supported Vocabs": [[0, "supported-vocabs"]], "DocTR Vocabs": [[0, "id1"]], "doctr.documents": [[1, "doctr-documents"]], "Document structure": [[1, "document-structure"]], "Word": [[1, "word"]], "Line": [[1, "line"]], "Artefact": [[1, "artefact"]], "Block": [[1, "block"]], "Page": [[1, "page"]], "Document": [[1, "document"]], "File reading": [[1, "file-reading"]], "DocTR: Document Text Recognition": [[2, "doctr-document-text-recognition"]], "Features": [[2, "features"]], "\ud83e\uddd1\u200d\ud83d\udd2c Build & train your predictor": [[2, "scientist-build-train-your-predictor"]], "\ud83e\uddf0 Implemented models": [[2, "toolbox-implemented-models"]], "Detection models": [[2, "detection-models"], [4, "detection-models"]], "Recognition models": [[2, "recognition-models"], [4, "recognition-models"]], "\ud83e\uddfe Integrated datasets": [[2, "receipt-integrated-datasets"]], "Getting Started": [[2, "getting-started"]], "Contents": [[2, "contents"]], "Installation": [[3, "installation"]], "Via Python Package": [[3, "via-python-package"]], "Via Git": [[3, "via-git"]], "doctr.models": [[4, "doctr-models"]], "Text Detection": [[4, "text-detection"]], "Pre-processing for detection": [[4, "pre-processing-for-detection"]], "Post-processing detections": [[4, "post-processing-detections"]], "Detection predictors": [[4, "detection-predictors"]], "Text Recognition": [[4, "text-recognition"]], "Text recognition model zoo": [[4, "id2"]], "Pre-processing for recognition": [[4, "pre-processing-for-recognition"]], "Post-processing outputs": [[4, "post-processing-outputs"]], "Recognition predictors": [[4, "recognition-predictors"]], "End-to-End OCR": [[4, "end-to-end-ocr"]], "Two-stage approaches": [[4, "two-stage-approaches"]], "Model export": [[4, "model-export"]], "Model compression": [[4, "model-compression"]], "Using SavedModel": [[4, "using-savedmodel"]], "doctr.transforms": [[5, "doctr-transforms"]], "Supported transformations": [[5, "supported-transformations"]], "Composing transformations": [[5, "composing-transformations"]], "doctr.utils": [[6, "doctr-utils"]], "Visualization": [[6, "visualization"]], "Task evaluation": [[6, "task-evaluation"]]}, "indexentries": {"cord (class in doctr.datasets)": [[0, "doctr.datasets.CORD"]], "dataloader (class in doctr.datasets.loader)": [[0, "doctr.datasets.loader.DataLoader"]], "funsd (class in doctr.datasets)": [[0, "doctr.datasets.FUNSD"]], "sroie (class in doctr.datasets)": [[0, "doctr.datasets.SROIE"]], "visiondataset (class in doctr.datasets.core)": [[0, "doctr.datasets.core.VisionDataset"]], "encode_sequences() (in module doctr.datasets)": [[0, "doctr.datasets.encode_sequences"]], "artefact (class in doctr.documents)": [[1, "doctr.documents.Artefact"]], "block (class in doctr.documents)": [[1, "doctr.documents.Block"]], "document (class in doctr.documents)": [[1, "doctr.documents.Document"]], "documentfile (class in doctr.documents)": [[1, "doctr.documents.DocumentFile"]], "line (class in doctr.documents)": [[1, "doctr.documents.Line"]], "pdf (class in doctr.documents)": [[1, "doctr.documents.PDF"]], "page (class in doctr.documents)": [[1, "doctr.documents.Page"]], "word (class in doctr.documents)": [[1, "doctr.documents.Word"]], "as_images() (doctr.documents.pdf method)": [[1, "doctr.documents.PDF.as_images"]], "from_images() (doctr.documents.documentfile class method)": [[1, "doctr.documents.DocumentFile.from_images"]], "from_pdf() (doctr.documents.documentfile class method)": [[1, "doctr.documents.DocumentFile.from_pdf"]], "from_url() (doctr.documents.documentfile class method)": [[1, "doctr.documents.DocumentFile.from_url"]], "get_artefacts() (doctr.documents.pdf method)": [[1, "doctr.documents.PDF.get_artefacts"]], "get_words() (doctr.documents.pdf method)": [[1, "doctr.documents.PDF.get_words"]], "read_html() (in module doctr.documents)": [[1, "doctr.documents.read_html"]], "read_img() (in module doctr.documents)": [[1, "doctr.documents.read_img"]], "read_pdf() (in module doctr.documents)": [[1, "doctr.documents.read_pdf"]], "doctr": [[2, "module-doctr"]], "module": [[2, "module-doctr"]], "convert_to_fp16() (in module doctr.models.export)": [[4, "doctr.models.export.convert_to_fp16"]], "convert_to_tflite() (in module doctr.models.export)": [[4, "doctr.models.export.convert_to_tflite"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[4, "doctr.models.recognition.crnn_vgg16_bn"]], "db_resnet50() (in module doctr.models.detection)": [[4, "doctr.models.detection.db_resnet50"]], "detection_predictor() (in module doctr.models.detection)": [[4, "doctr.models.detection.detection_predictor"]], "linknet() (in module doctr.models.detection)": [[4, "doctr.models.detection.linknet"]], "ocr_predictor() (in module doctr.models.zoo)": [[4, "doctr.models.zoo.ocr_predictor"]], "quantize_model() (in module doctr.models.export)": [[4, "doctr.models.export.quantize_model"]], "recognition_predictor() (in module doctr.models.recognition)": [[4, "doctr.models.recognition.recognition_predictor"]], "sar_resnet31() (in module doctr.models.recognition)": [[4, "doctr.models.recognition.sar_resnet31"]], "sar_vgg16_bn() (in module doctr.models.recognition)": [[4, "doctr.models.recognition.sar_vgg16_bn"]], "colorinversion (class in doctr.transforms)": [[5, "doctr.transforms.ColorInversion"]], "compose (class in doctr.transforms)": [[5, "doctr.transforms.Compose"]], "lambdatransformation (class in doctr.transforms)": [[5, "doctr.transforms.LambdaTransformation"]], "normalize (class in doctr.transforms)": [[5, "doctr.transforms.Normalize"]], "oneof (class in doctr.transforms)": [[5, "doctr.transforms.OneOf"]], "randomapply (class in doctr.transforms)": [[5, "doctr.transforms.RandomApply"]], "randombrightness (class in doctr.transforms)": [[5, "doctr.transforms.RandomBrightness"]], "randomcontrast (class in doctr.transforms)": [[5, "doctr.transforms.RandomContrast"]], "randomgamma (class in doctr.transforms)": [[5, "doctr.transforms.RandomGamma"]], "randomhue (class in doctr.transforms)": [[5, "doctr.transforms.RandomHue"]], "randomjpegquality (class in doctr.transforms)": [[5, "doctr.transforms.RandomJpegQuality"]], "randomsaturation (class in doctr.transforms)": [[5, "doctr.transforms.RandomSaturation"]], "resize (class in doctr.transforms)": [[5, "doctr.transforms.Resize"]], "togray (class in doctr.transforms)": [[5, "doctr.transforms.ToGray"]], "exactmatch (class in doctr.utils.metrics)": [[6, "doctr.utils.metrics.ExactMatch"]], "localizationconfusion (class in doctr.utils.metrics)": [[6, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[6, "doctr.utils.metrics.OCRMetric"]], "visualize_page() (in module doctr.utils.visualization)": [[6, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["datasets", "documents", "index", "installing", "models", "transforms", "utils"], "filenames": ["datasets.rst", "documents.rst", "index.rst", "installing.rst", "models.rst", "transforms.rst", "utils.rst"], "titles": ["doctr.datasets", "doctr.documents", "DocTR: Document Text Recognition", "Installation", "doctr.models", "doctr.transforms", "doctr.utils"], "terms": {"releas": 3, "note": [], "we": [1, 2, 4, 5], "member": [], "leader": [], "make": [4, 6], "particip": [], "commun": [], "harass": [], "free": [], "experi": 4, "everyon": [], "regardless": [], "ag": [], "bodi": [], "size": [0, 1, 4, 5], "visibl": [], "invis": [], "disabl": [], "ethnic": [], "sex": [], "characterist": [], "gender": [], "ident": [], "express": 5, "level": [4, 6], "educ": [], "socio": [], "econom": [], "statu": [], "nation": [], "person": [], "appear": [], "race": [], "religion": [], "sexual": [], "orient": 1, "act": [], "interact": 6, "wai": [0, 2, 4], "contribut": [], "an": [0, 1, 2, 4, 6], "open": [], "welcom": [], "divers": [], "inclus": [], "healthi": [], "exampl": [0, 1, 4, 5, 6], "behavior": [], "posit": 6, "environ": [], "includ": 4, "demonstr": [], "empathi": [], "kind": [], "toward": [], "other": [], "peopl": [], "Being": [], "respect": [], "differ": [], "opinion": [], "viewpoint": [], "give": [], "gracefulli": [], "accept": [], "construct": [], "feedback": [], "apolog": [], "those": [1, 4], "affect": [], "mistak": [], "learn": 4, "from": [0, 1, 2, 4, 5, 6], "focus": [], "what": [], "i": [0, 1, 2, 4, 5, 6], "best": [], "just": 4, "u": [], "individu": [], "overal": [], "unaccept": [], "The": [0, 1, 4, 6], "us": [0, 3, 6], "languag": [1, 2], "imageri": [], "attent": [], "advanc": [], "ani": [0, 1, 2, 4, 6], "troll": [], "insult": [], "derogatori": [], "comment": [], "polit": [], "attack": [], "public": 2, "privat": [], "publish": [], "inform": [0, 2, 4], "physic": 1, "email": [], "address": 1, "without": 4, "explicit": [], "permiss": [], "which": 4, "could": [], "reason": [], "consid": [1, 6], "inappropri": [], "profession": [], "set": [0, 4, 6], "ar": [0, 1, 4, 5, 6], "clarifi": [], "take": [], "appropri": [], "fair": [], "action": [], "thei": [], "deem": [], "threaten": [], "offens": [], "harm": [], "have": [0, 4, 6], "right": [4, 6], "remov": [], "edit": [], "reject": [], "commit": [], "wiki": [], "issu": [], "align": 1, "thi": [2, 3, 4, 6], "moder": [], "decis": [], "when": 6, "appli": [0, 5], "within": [], "all": [0, 1, 2, 4, 5, 6], "space": [], "also": [], "offici": [], "repres": [1, 4], "e": [1, 3], "mail": [], "post": [], "via": 2, "social": [], "media": [], "account": [], "appoint": [], "onlin": [], "offlin": [], "event": [], "instanc": 4, "abus": [], "otherwis": 6, "mai": [], "report": [], "contact": [], "minde": 3, "com": [1, 3], "complaint": [], "review": [], "investig": [], "promptli": [], "fairli": [], "oblig": [], "privaci": [], "secur": [], "incid": [], "follow": [4, 5, 6], "impact": [], "determin": [], "consequ": [], "violat": [], "unprofession": [], "unwelcom": [], "A": [0, 1, 2, 4], "written": 1, "provid": [2, 4], "clariti": [], "around": 4, "natur": 2, "explan": 4, "why": [], "wa": [], "apologi": [], "request": [], "through": [0, 5], "singl": [], "seri": [], "continu": [], "No": [], "involv": 4, "unsolicit": [], "specifi": 1, "period": [], "time": [0, 2, 4, 6], "avoid": [], "well": [], "extern": [], "channel": [1, 4, 5], "like": [], "term": [], "lead": [], "seriou": [], "sustain": [], "sort": [], "allow": [], "dure": [], "pattern": [], "aggress": [], "disparag": [], "class": [0, 1, 5, 6], "adapt": [], "version": 4, "0": [0, 4, 5, 6], "avail": [4, 5], "http": [1, 3], "www": 1, "org": [], "_": [0, 4], "html": [], "were": 1, "inspir": 5, "mozilla": [], "": [1, 6], "ladder": [], "For": 4, "answer": [], "common": [5, 6], "question": [], "about": 4, "see": [], "faq": [], "translat": [], "everyth": [], "you": 4, "need": 6, "know": [], "effici": [0, 2, 4], "project": [], "packag": [2, 6], "python": 2, "doc": [1, 4], "librari": 3, "build": [], "script": [], "refer": [], "train": [0, 4, 5], "demo": [], "small": 2, "app": [], "showcas": [], "capabl": 4, "api": [], "minim": [], "templat": 1, "deploi": [], "rest": [5, 6], "ensur": [], "proper": [], "mainten": [], "github": 3, "worklow": [], "run": [], "job": [], "coverag": [], "codecov": [], "back": [], "result": 1, "As": [], "contributor": [], "onli": [5, 6], "your": [0, 1, 4, 6], "ad": 5, "whether": [0, 1, 6], "encount": [], "problem": [], "suggest": [], "input": [1, 4, 5], "ha": [0, 6], "valu": [1, 5], "can": [0, 4], "purpos": 4, "advis": [], "first": [], "check": [], "topic": [], "wasn": [], "t": 0, "alreadi": [], "cover": [], "close": [], "If": [1, 4], "feel": [], "new": [], "one": [0, 4, 5], "do": [], "so": [], "whenev": [], "possibl": 6, "enough": 4, "jump": [], "wonder": [], "how": [], "someth": [], "more": [], "gener": [], "should": [0, 1, 6], "out": [4, 5, 6], "discuss": [], "q": [], "forum": [], "specif": [0, 6], "stackoverflow": [], "addit": [], "depend": 2, "command": [], "m": 6, "pip": 3, "upgrad": [], "dev": [], "pre": [], "docstr": [], "In": 4, "pleas": [], "googl": [], "eas": [], "process": [1, 2], "later": [], "messag": [], "udac": [], "guid": [], "order": [0, 1, 4], "same": [1, 6], "ci": [], "workflow": [], "unittest": [], "local": [2, 4, 6], "To": [], "togeth": [1, 4], "current": [], "built": [], "sphinx": [], "thank": [], "our": [2, 4], "file": 0, "been": [4, 6], "rebuilt": [], "want": [], "forc": [], "complet": [], "rebuild": [], "delet": [], "_build": [], "directori": [], "addition": 4, "clear": [], "web": 1, "browser": [], "cach": [], "modif": [], "now": [], "locat": [], "index": 1, "wish": [], "somewher": [], "els": [], "than": 6, "join": [], "slack": [], "where": [1, 6], "find": [], "requir": [3, 5], "3": [1, 2, 3, 4, 5, 6], "8": [4, 5], "higher": [], "whichev": [], "o": [], "least": [], "tensorflow": [2, 4, 5], "pytorch": [], "correspond": 4, "page": [4, 6], "2": [2, 4, 5, 6], "macbook": [], "m1": [], "chip": [], "some": [], "metal": [], "plugin": [], "1": [0, 4, 5, 6], "12": 4, "anoth": [0, 4], "linux": [], "few": [], "extra": [], "maco": [], "user": [1, 2], "them": 0, "homebrew": [], "brew": [], "cairo": [], "pango": [], "gdk": [], "pixbuf": [], "libffi": [], "window": 6, "gtk": [], "latest": [], "over": 6, "here": [0, 5], "last": [0, 3, 4], "stabl": 3, "doctr": 3, "strive": [], "reduc": 5, "framework": 0, "minimum": 6, "necessari": [], "featur": [4, 6], "develop": [], "third": [], "parti": [], "miss": [], "tf": [4, 5], "torch": [], "mode": 3, "clone": 3, "state": 2, "art": 2, "optic": [2, 4], "charact": [0, 1, 2, 4, 6], "made": 2, "seamless": 2, "access": [0, 1, 2], "anyon": 2, "power": 2, "easi": [2, 6], "extract": [0, 2], "valuabl": 2, "autom": 2, "seamlessli": [], "understand": [0, 2], "task": [0, 2, 4], "ocr": [0, 2, 6], "predictor": [], "pars": [0, 2], "textual": [0, 1, 2], "identifi": [2, 4], "each": [0, 1, 2, 4, 5], "word": [2, 4, 6], "research": 2, "quickli": 2, "compar": 2, "own": 2, "architectur": [2, 4], "speed": [2, 4], "perform": [1, 2, 4, 5, 6], "robust": 2, "stage": 2, "pretrain": [2, 4, 6], "paramet": [0, 1, 4, 5, 6], "friendli": 2, "line": [2, 6], "code": [1, 2], "load": [2, 4], "googlevis": 2, "aw": [2, 4], "textract": [2, 4], "optim": 2, "infer": 5, "both": [2, 4, 5], "cpu": [2, 4], "gpu": 2, "light": 2, "activ": [], "maintain": 2, "integr": [], "deploy": [], "dbnet": 4, "real": [2, 4, 5], "scene": [2, 4], "differenti": [2, 4], "binar": [2, 4], "linknet": [2, 4], "exploit": [2, 4], "encod": [0, 2, 4], "represent": [2, 4], "semant": [2, 4], "segment": [2, 4], "sar": [2, 4], "show": [2, 4, 6], "attend": [2, 4], "read": [2, 4], "simpl": [2, 4], "strong": [2, 4], "baselin": [2, 4], "irregular": [2, 4], "crnn": [2, 4], "end": [0, 2, 6], "trainabl": [2, 4], "neural": [2, 4], "network": [2, 4], "imag": [0, 1, 2, 4, 5, 6], "base": [2, 4], "sequenc": [0, 1, 2, 4, 6], "Its": [2, 4], "applic": [2, 4], "master": [], "multi": [], "aspect": 5, "non": [1, 5, 6], "vitstr": [], "vision": [], "transform": [0, 2], "fast": [0, 2], "parseq": [], "permut": [], "autoregress": [], "funsd": [0, 2, 4], "form": [0, 2], "noisi": [0, 2], "scan": [0, 2], "cord": [0, 2, 4], "consolid": [0, 2], "receipt": [0, 2], "forpost": [0, 2], "sroie": 0, "icdar": [], "2019": [], "iiit": [], "5k": [], "cvit": [], "street": [], "view": [], "synthtext": [], "visual": [], "geometri": 1, "group": [], "svhn": [], "digit": 0, "unsupervis": [], "ic03": [], "2003": [], "ic13": [], "2013": [], "imgur5k": [], "textstylebrush": [], "transfer": [], "aesthet": [], "mjsynth": [], "synthet": [], "data": [1, 2, 4, 5], "artifici": [], "iiithw": [], "wildreceipt": [], "spatial": 1, "dual": [], "modal": [], "graph": 1, "kei": [], "bool": [0, 1, 4, 5, 6], "true": [0, 1, 4, 5, 6], "use_polygon": [], "fals": [0, 4, 5, 6], "recognition_task": [], "kwarg": [0, 1, 4, 6], "sourc": [0, 1, 4, 5, 6], "document": [0, 4, 6], "import": [0, 1, 4, 5, 6], "train_set": 0, "download": 0, "img": [0, 5], "target": [0, 1, 4, 5], "subset": [0, 4], "polygon": [], "rotat": 1, "bound": [1, 4, 5, 6], "box": [1, 4, 6], "instead": 1, "straight": [], "ones": [], "recognit": 6, "keyword": [0, 1], "argument": [0, 1], "visiondataset": 0, "icdar2019": 0, "competit": 0, "iiit5k": [], "bmvc": [], "2012": [], "text": 1, "prior": [], "svt": [], "ucsd": [], "comput": [4, 6], "hous": [], "number": [0, 5, 6], "localis": [], "repositori": 2, "websit": [], "entri": [], "futur": [], "direct": [], "img_fold": [], "str": [0, 1, 4, 5, 6], "label_fold": [], "label": [], "part": 5, "challeng": [], "task2": [], "2015": [], "path": [1, 4], "challenge2_training_task12_imag": [], "challenge2_training_task1_gt": [], "test_set": [], "challenge2_test_task12_imag": [], "challenge2_test_task1_gt": [], "folder": 4, "annot": 1, "abstractdataset": [], "label_path": [], "handwrit": [], "dataset_info": [], "imgur5k_annot": [], "json": [], "pure": [], "mnt": [], "ramdisk": [], "max": 6, "90kdict32px": [], "imlist": [], "txt": [], "hw": [], "images_90k_norm": [], "90k": [], "docartefact": [], "object": 0, "detect": [], "element": [0, 1, 4], "varieti": [], "arxiv": [], "ab": [], "2103": [], "14470v1": [], "test": [], "charactergener": [], "implement": [0, 1, 4, 5, 6], "d": [], "abdef": [], "num_sampl": [], "100": [4, 5, 6], "vocabulari": [], "sampl": 0, "iter": 0, "cache_sampl": [], "firsthand": [], "font_famili": [], "font": [], "img_transform": [], "compos": [0, 2, 4], "sample_transform": 0, "wordgener": [], "min_char": [], "int": [0, 1, 4, 5, 6], "max_char": [], "list": [0, 1, 5], "none": [0, 1], "callabl": [0, 5], "tupl": [1, 4, 5], "32": [0, 4, 5], "maximum": [0, 6], "detectiondataset": [], "recognitiondataset": [], "labels_path": [], "contain": [], "ocrdataset": 0, "label_fil": [], "jpg": 1, "root": [], "shuffl": 0, "batch_siz": 0, "drop_last": 0, "num_work": [], "collate_fn": [], "wrapper": [0, 5], "train_load": 0, "train_it": 0, "next": 0, "befor": 0, "pass": [0, 4], "batch": [0, 4, 5], "drop": 0, "isn": 0, "full": [0, 4, 6], "worker": 0, "function": [4, 5, 6], "merg": [], "sinc": 0, "content": [0, 1], "properli": 0, "model": [0, 6], "interpret": [0, 1], "multipl": [0, 1, 5], "name": [0, 4], "10": [0, 6], "0123456789": 0, "hindi_digit": [], "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "ascii_lett": 0, "52": 0, "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 0, "punctuat": 0, "currenc": 0, "5": [0, 5, 6], "ancient_greek": [], "48": [], "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": [], "arabic_lett": [], "37": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": [], "persian_lett": [], "\u067e\u0686\u06a2\u06a4\u06af": [], "arabic_diacrit": [], "arabic_punctu": [], "latin": 0, "94": [], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 0, "english": [], "legacy_french": [], "123": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": 0, "french": [0, 4], "126": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": [], "portugues": [], "131": [], "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": [], "spanish": [], "116": [], "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": [], "italian": [], "120": [], "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": [], "german": [], "108": [], "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": [], "arab": [], "101": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": [], "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "czech": [], "130": [], "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": [], "polish": [], "118": [], "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": [], "dutch": [], "114": [], "norwegian": [], "106": [], "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": [], "danish": [], "finnish": [], "104": [], "\u00e4\u00f6\u00e4\u00f6": [], "swedish": [], "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": [], "vietnames": [], "234": [], "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": [], "hebrew": [], "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": [], "multilingu": [], "195": [], "encode_sequ": 0, "target_s": 0, "eo": 0, "pad": [0, 4, 5], "dynamic_seq_length": [], "ndarrai": [0, 1, 6], "given": [0, 4, 6], "map": [0, 4], "n": [0, 6], "length": 0, "Of": 0, "string": [0, 1, 4], "option": [], "start": [], "case": 6, "upper": 5, "enabl": 1, "dynam": [], "tensor": [0, 4, 5], "modul": [1, 4, 5, 6], "easili": [1, 2, 4, 6], "export": [1, 6], "analysi": [1, 4], "format": [1, 4], "organ": 1, "uninterrupt": 1, "confid": 1, "float": [1, 5, 6], "associ": 1, "predict": [1, 6], "xmin": 1, "ymin": 1, "xmax": 1, "ymax": 1, "coordin": 1, "rel": 1, "collect": 1, "meant": 1, "two": 1, "column": 1, "horizont": 1, "resolv": 1, "default": [1, 4], "smallest": 1, "enclos": 1, "g": 1, "qr": 1, "pictur": 1, "chart": 1, "signatur": 1, "logo": 1, "etc": 1, "artefact_typ": 1, "type": [1, 4], "sever": [1, 5], "its": [0, 1], "titl": 1, "underneath": 1, "page_idx": 1, "dimens": [1, 4, 6], "dict": [1, 6], "numpi": [1, 4, 6], "arrai": 6, "uint8": [4, 6], "raw": 1, "pixel": [1, 5], "height": 1, "width": 1, "dictionari": 1, "angl": 1, "degress": 1, "preserve_aspect_ratio": 5, "overlai": [], "displai": 6, "matplotlib": 6, "pyplot": 6, "method": 5, "high": 1, "convers": 1, "read_pdf": 1, "byte": [1, 4], "scale": 6, "rgb_mode": [], "password": [], "pdf": 1, "convert": [1, 4, 5], "render": [], "72dpi": [], "output": [1, 5], "rgb": [1, 5], "bgr": 1, "unlock": [], "encrypt": [], "pypdfium2": [], "pdfpage": [], "decod": 1, "shape": [1, 4, 5, 6], "h": 1, "x": [1, 5, 6], "w": [1, 6], "c": [], "read_img_as_numpi": [], "output_s": [1, 5], "rgb_output": 1, "expect": [0, 1, 4, 5], "read_img_as_tensor": [], "img_path": [], "dtype": 4, "float32": 4, "desir": [], "relat": [], "divid": [], "255": [4, 5, 6], "decode_img_as_tensor": [], "img_cont": [], "stream": 1, "read_html": 1, "url": [0, 1], "yoursit": 1, "weasyprint": [], "documentfil": 1, "extens": 1, "classmethod": 1, "from_pdf": 1, "binari": [1, 4], "from_url": 1, "from_imag": 1, "page1": 1, "png": 1, "page2": 1, "vgg16_bn_r": [], "vgg": 4, "16": 4, "describ": 4, "veri": 2, "deep": 4, "convolut": 2, "larg": [], "modifi": [], "normal": [4, 5], "rectangular": [], "pool": [], "simpler": [], "head": [], "input_tensor": 4, "random": [4, 5, 6], "uniform": [4, 5], "512": [], "maxval": [4, 5], "imagenet": [], "extractor": 4, "resnet18": [], "resnet": 4, "18": [], "residu": [], "boolean": [], "resnet34": [], "34": [], "resnet50": [], "50": 4, "resnet31": [], "downsiz": [], "4": [], "mobilenet_v3_smal": [], "mobilenetv3": [], "search": [], "kera": 4, "mobilenet_v3_larg": [], "mobilenet_v3_small_r": [], "mobilenet_v3_large_r": [], "mobilenet_v3_small_orient": [], "magc_resnet31": [], "global": [], "context": [], "224": [4, 5], "vit_": [], "visiontransform": [], "worth": [], "16x16": [], "patch": [], "unoffici": [], "config": [], "vit_b": [], "b": 6, "textnet_tini": [], "textnet": [], "faster": [], "arbitrarili": [], "detector": [], "minimalist": [], "kernel": [], "czczup": [], "tini": [], "textnet_smal": [], "textnet_bas": [], "crop_orientation_predictor": [], "arch": 4, "croporientationpredictor": [], "np": [4, 6], "classif_mobilenet_v3_smal": [], "input_crop": [], "rand": [4, 6], "600": [4, 6], "800": [4, 6], "astyp": [4, 6], "crop": 4, "dataset": 4, "linknet_resnet18": [], "1024": 4, "linknet_resnet34": [], "linknet_resnet50": [], "db_resnet50": 4, "backbon": 4, "db_mobilenet_v3_larg": [], "mobilenet": [], "v3": [], "detection_predictor": 4, "assume_straight_pag": [], "detectionpredictor": 4, "input_pag": [4, 6], "itself": [], "fit": [], "crnn_vgg16_bn": 4, "128": 4, "crnn_mobilenet_v3_smal": [], "crnn_mobilenet_v3_larg": [], "sar_resnet31": 4, "31": 4, "64": [4, 5], "256": 4, "paper": [], "1910": [], "02562": [], "keywoard": [], "vitstr_smal": [], "vitstr_bas": [], "recognition_predictor": 4, "recognitionpredictor": 4, "ocr_predictor": 4, "det_arch": 4, "reco_arch": 4, "pretrained_backbon": [], "symmetric_pad": [], "export_as_straight_box": [], "detect_orient": [], "straighten_pag": [], "detect_languag": [], "ocrpredictor": 4, "up": 4, "assum": [], "preserv": 5, "ratio": 5, "symmetr": [], "bottom": [], "final": [], "potenti": 4, "estim": [], "slightli": [], "deterior": [], "latenc": [], "median": [], "Then": 4, "again": [], "improv": [], "kie_predictor": [], "kiepredictor": [], "kie": [], "login_to_hub": [], "login": [], "huggingfac": [], "hub": [], "from_hub": [], "repo_id": [], "instanti": 4, "hf": [], "fasterrcnn_mobilenet_v3_large_fpn": [], "repo": [], "hf_hub_download": [], "snapshot_download": [], "checkpoint": [], "push_to_hf_hub": [], "model_nam": [], "save": [0, 4], "configur": [], "my": [], "procedur": 5, "draw": 5, "design": 5, "torchvis": 5, "resiz": [4, 5], "bilinear": [4, 5], "transfo": 5, "minval": 5, "interpol": [4, 5], "zero": [4, 5], "while": [], "done": [], "mean": [5, 6], "std": 5, "gaussian": 5, "distribut": 5, "485": 5, "456": 5, "406": 5, "229": 5, "225": 5, "averag": [4, 5], "per": [4, 5], "standard": 5, "deviat": 5, "lambdatransform": 5, "fn": 5, "lambda": 5, "tograi": 5, "num_output_channel": [], "grayscal": 5, "colorinvers": 5, "min_val": 5, "tranform": 5, "color": 5, "shift": 5, "randomli": 5, "invert": 5, "6": [3, 4, 5], "rang": 5, "randombright": 5, "max_delta": 5, "adjust": 5, "bright": 5, "delta": 5, "offset": 5, "add": 5, "pick": 5, "p": 5, "probabl": 5, "randomcontrast": 5, "contrast": 5, "contrast_factor": 5, "factor": 5, "randomsatur": 5, "satur": 5, "hsv": 5, "increas": 5, "randomhu": 5, "hue": 5, "randomgamma": 5, "min_gamma": 5, "max_gamma": 5, "min_gain": 5, "max_gain": 5, "gamma": 5, "correct": 5, "neg": 5, "lower": 5, "param": [4, 5], "constant": 5, "multipli": 5, "randomjpegqu": 5, "min_qual": 5, "60": 5, "max_qual": 5, "jpeg": 5, "qualiti": 5, "dimension": 5, "between": [5, 6], "randomrot": [], "max_angl": [], "expand": [], "degre": [], "uniformli": [], "randomcrop": [], "08": [], "75": [], "33": [], "min_area": [], "max_area": [], "min_ratio": [], "max_ratio": [], "gaussianblur": [], "kernel_shap": [], "blur": [], "min": [], "channelshuffl": [], "gaussiannois": [], "nois": [], "randomhorizontalflip": [], "flip": [], "int64": [], "randomshadow": [], "opacity_rang": [], "shade": [], "opac": [], "It": 5, "consecut": [4, 5], "sequenti": [4, 5], "oneof": 5, "jpegqual": 5, "randomappli": 5, "regroup": 6, "core": [0, 6], "complementari": 6, "sens": 6, "visualize_pag": 6, "words_onli": 6, "display_artefact": [], "add_label": 6, "figur": 6, "block": [4, 6], "plt": 6, "ocr_db_crnn": 6, "artefact": [], "figsiz": 6, "largest": 6, "side": 6, "plot": [], "static": [], "top": [], "synthesize_pag": [], "draw_proba": [], "respons": [], "blank": [], "blue": [], "red": [], "font_siz": [], "13": [], "famili": [], "synthes": [], "metric": [4, 6], "assess": 6, "textmatch": [], "match": [2, 6], "accuraci": 6, "aggreg": [0, 6], "foral": 6, "y": 6, "mathcal": 6, "frac": 6, "sum": 6, "limits_": 6, "f_": 6, "y_i": 6, "x_i": 6, "indic": 6, "defin": 6, "f_a": 6, "left": 6, "begin": 6, "ll": 6, "mbox": 6, "strictli": 6, "integ": 6, "updat": 6, "hello": 6, "world": 6, "summari": 6, "gt": [], "pred": [], "groung": [], "truth": 6, "exact": 6, "score": [], "counterpart": [], "unidecod": [], "localizationconfus": 6, "iou_thresh": 6, "mask_shap": [], "use_broadcast": [], "confus": 6, "iou": 6, "recal": [4, 6], "g_": 6, "precis": [4, 6], "meaniou": 6, "j": 6, "y_j": 6, "being": [4, 6], "intersect": 6, "union": 6, "g_x": 6, "assign": 6, "_i": 6, "geq": 6, "ground": 6, "asarrai": 6, "70": 6, "110": 6, "95": 6, "200": 6, "150": 6, "pair": 6, "broadcast": [], "consum": [], "memori": [], "either": 4, "ocrmetr": 6, "l": 6, "hat": 6, "h_": 6, "b_j": 6, "l_j": 6, "gt_box": [], "pred_box": [], "gt_label": [], "pred_label": [], "comparison": [], "detectionmetr": [], "c_j": [], "compil": [], "better": [], "leverag": [], "descript": [], "colab": [], "quicktour": [], "present": 0, "main": [], "produc": 4, "searchabl": [], "don": [], "meet": [], "detail": [], "link": [], "section": [], "det_model": [], "load_weight": [], "path_to_checkpoint": [], "weight": [], "reco_model": [], "det_param": [], "path_to_pt": [], "map_loc": [], "load_state_dict": [], "reco_param": [], "vocab": 4, "class_nam": [], "total": [], "date": [], "preprocessor": 4, "det_predictor": [], "798": [], "785": [], "772": [], "264": [], "2749": [], "287": [], "reco_predictor": [], "694": [], "695": [], "693": [], "299": [], "296": [], "301": [], "polici": [], "restrict": [], "write": [], "outsid": [], "tmp": [], "work": [], "step": [], "usag": 4, "multiprocess": [], "doctr_multiprocessing_dis": [], "variabl": [], "becaus": [], "shm": [], "share": [], "chang": [], "By": [], "doctr_cache_dir": [], "focu": [], "love": [], "appreci": [], "interfac": [], "io": [], "custom": [], "felix92": [], "db": 2, "vgg16": 4, "bn": [], "plug": [], "obj_detect": [], "exist": [], "overwritten": [], "prerequisit": [], "creat": [], "co": [], "instal": 2, "git": 2, "lf": [], "my_awesome_model": [], "v1": [], "directli": 4, "after": [], "python3": [], "train_tensorflow": [], "py": [], "train_pytorch": [], "tabl": [], "pull": [], "dummi": [], "tilman": [], "rassi": [], "fascan": [], "evalu": [0, 4], "predefin": 0, "prefer": 0, "signific": 0, "valid": [], "149": [], "626": [], "360": [], "2000": [], "3000": [], "249": [], "33402": [], "13068": [], "772875": [], "85875": [], "246": [], "233": [], "resourc": [], "7149": [], "796": [], "handwritten": [], "1268": [], "472": [], "21888": [], "8707": [], "33608": [], "19342": [], "uppercas": [], "19370": [], "2186": [], "257": [], "647": [], "73257": [], "26032": [], "7100000": [], "707470": [], "1156": [], "1107": [], "849": [], "1095": [], "207901": [], "22672": [], "7581382": [], "1337891": [], "7141797": [], "793533": [], "49377": [], "19598": [], "alwai": [], "regular": [], "2700": [], "300": [], "background": [], "qr_code": [], "bar_cod": [], "photo": [], "classif": 4, "mani": [], "sensit": [], "abl": [], "howev": [], "guidanc": [], "tool": [], "further": [], "anot": [], "handl": 0, "underli": 0, "defer": 0, "dataload": 0, "good": [], "achiev": [], "might": 4, "tune": 2, "thing": [], "product": [], "readi": [], "help": [], "support": 4, "devic": [], "fp16": 4, "point": [], "occupi": [], "bit": [], "advantag": [], "less": [], "mixed_precis": [], "set_global_polici": [], "mixed_float16": [], "cuda": [], "re": 0, "exchang": [], "interoper": [], "machin": [], "structur": 4, "layer": [], "metadata": [], "util": [2, 4], "export_model_to_onnx": [], "input_shap": 4, "dummy_input": [], "tensorspec": [], "model_path": [], "come": [], "soon": [], "seen": 4, "onc": [0, 4], "separ": 4, "compon": 4, "charg": 4, "usabl": 4, "backend": 4, "along": [], "processor": [], "reusabl": 4, "consist": [], "delimit": [], "2d": [], "corner": [], "flag": [], "belong": [], "skew": [], "comprehens": [], "benchmark": [], "publicli": [], "sec": [], "25": [], "84": [], "39": [], "85": 4, "86": [], "93": [], "83": 4, "24": [], "80": [], "29": [], "90": [], "67": [], "76": [], "11": [], "81": [], "71": [], "7": 4, "21": [], "82": [], "20": [], "49": 4, "87": [], "63": [], "17": [], "28": [], "51": [], "46": [], "db_resnet34": [], "22": [], "89": [], "74": [], "56": [], "68": [], "92": [], "61": [], "41": [], "00": [], "79": [], "38": [], "88": [], "62": [], "26": [], "06": [], "78": [], "47": [], "54": [], "abov": 4, "cf": 4, "disclaim": 4, "combin": 4, "199": 4, "second": 4, "warmup": [], "phase": [], "measur": 4, "1000": 4, "obtain": [], "11th": [], "gen": [], "intel": [], "r": [], "tm": [], "i7": [], "11800h": [], "30ghz": [], "wrap": [], "useabl": [], "favorit": [], "dummy_img": [], "area": [], "send": [], "snippet": [], "transcrib": [], "partial": [], "15": [], "9": [], "73": [], "44": [], "14": [], "55": [], "58": [], "57": [], "66": [], "01": [], "98": [], "23": [], "69": [], "99": [], "91": [], "05": [], "09": [], "96": 0, "40": [], "53": [], "most": 4, "print": [], "cfg": [], "30595": 4, "45": [], "72": [], "43": [], "65": [], "77": [], "30": [], "07": [], "27": 4, "gvision": 4, "59": [], "03": [], "azur": [], "recogn": [], "42": [], "go": [], "mention": [], "still": [], "return": [0, 1, 4], "documentbuild": [], "resolve_lin": [], "automat": [], "resolve_block": [], "paragraph_break": [], "paragraph": [], "035": [], "nest": [], "get": 1, "typic": [], "layout": [], "340": [], "text_output": [], "json_output": [], "1357421875": [], "0361328125": [], "8564453125": [], "8603515625": [], "914085328578949": [], "5478515625": [], "06640625": [], "5810546875": [], "0966796875": [], "9949972033500671": [], "51171875": [], "1630859375": [], "9578408598899841": [], "1396484375": [], "3232421875": [], "185546875": [], "3515625": [], "outpout": [], "xml": [], "hocr": [], "export_as_xml": [], "xml_output": [], "xml_bytes_str": [], "xml_element": [], "utf": [], "xmln": [], "w3": [], "1999": [], "xhtml": [], "lang": [], "en": [], "meta": [], "equiv": [], "charset": [], "system": [], "ocr_pag": [], "ocr_carea": [], "ocr_par": [], "ocr_lin": [], "ocrx_word": [], "div": [], "id": [], "page_1": [], "bbox": [], "3456": [], "ppageno": [], "block_1_1": [], "857": [], "529": [], "2504": [], "2710": [], "par_1_1": [], "span": [], "line_1_1": [], "x_size": [], "x_descend": [], "x_ascend": [], "word_1_1": [], "1552": [], "540": [], "1778": [], "580": [], "x_wconf": [], "word_1_2": [], "1782": [], "1900": [], "583": [], "word_1_3": [], "1420": [], "597": [], "1684": [], "641": [], "threshold": [], "region": [], "accur": [], "postprocessor": 4, "bin_thresh": [], "box_thresh": [], "hook": [], "manipul": [], "customhook": [], "def": [], "__call__": [], "self": [], "loc_pr": [], "Be": [], "awar": [], "my_hook": [], "middl": [], "pipelin": [], "add_hook": [], "execut": [], "file_path": [], "read_img": 1, "seemlessli": 2, "conda": [], "newer": 3, "developp": 3, "fp": 4, "scheme": 4, "deform": 4, "statist": 4, "turn": 4, "easier": 4, "let": 4, "db_resnet50_predictor": [], "sar_vgg16_bn": 4, "rnn": [], "enhanc": [], "symbol": 4, "crnn_vgg16_bn_predictor": [], "sar_vgg16_bn_predictor": [], "16bn": [], "convert_to_tflit": 4, "tf_model": 4, "tflite": 4, "conv_sequ": 4, "relu": 4, "kernel_s": 4, "serialized_model": 4, "convert_to_fp16": 4, "half": 4, "serial": 4, "quantize_model": 4, "quantiz": 4, "exclud": 4, "inherit": [0, 4], "abstract": 0, "verifi": 0, "file_nam": 0, "file_hash": 0, "extract_arch": 0, "overwrit": 0, "sha256": 0, "archiv": 0, "disk": 0, "775": [], "856": [], "860": 4, "862": 4, "863": 4, "sar_resnet31_predictor": [], "ocr_db_crnn_vgg": [], "652": [], "721": [], "ocr_db_sar_vgg": [], "653": [], "ocr_db_sar_resnet": [], "665": [], "735": [], "595": 4, "625": 4, "781": 4, "830": 4, "exactmatch": 6, "ignore_cas": 6, "ignore_acc": 6, "ignor": 6, "letter": 6, "accent": 6, "error": 6, "max_dist": 6, "levenshtein": 6, "distanc": 6, "autoclass": 0, "loader": 0, "154": 0, "as_imag": 1, "convert_page_to_numpi": 1, "get_word": 1, "fitz": 1, "gettextword": 1, "get_artefact": 1, "entir": 1, "fulli": 2, "daili": 2, "mix": 2, "fine": 2, "scratch": 2, "special": 2, "recurr": 2, "733": 4, "817": 4, "745": 4, "875": 4, "frame": 4, "feed": 4, "warm": 4, "c5": 4, "x12larg": 4, "xeon": 4, "platinum": 4, "8275l": 4, "913": 4, "917": 4, "921": 4, "crnn_resnet31": 4, "629": 4, "701": 4, "664": 4, "780": 4, "630": 4, "702": 4, "666": 4, "783": 4, "640": 4, "713": 4, "672": 4, "789": 4, "na": 4, "753": 4, "700": 4, "533": 4, "689": 4, "611": 4, "660": 4, "db_sar_vgg": 4, "db_sar_resnet": 4, "db_crnn_vgg": 4, "db_crnn_resnet": 4, "properti": 4, "input_t": 4, "saved_model": 4, "And": 4, "nestedobject": 5}, "objects": {"": [[2, 0, 0, "-", "doctr"]], "doctr.datasets": [[0, 1, 1, "", "CORD"], [0, 1, 1, "", "FUNSD"], [0, 1, 1, "", "SROIE"], [0, 2, 1, "", "encode_sequences"]], "doctr.datasets.core": [[0, 1, 1, "", "VisionDataset"]], "doctr.datasets.loader": [[0, 1, 1, "", "DataLoader"]], "doctr.documents": [[1, 1, 1, "", "Artefact"], [1, 1, 1, "", "Block"], [1, 1, 1, "", "Document"], [1, 1, 1, "", "DocumentFile"], [1, 1, 1, "", "Line"], [1, 1, 1, "", "PDF"], [1, 1, 1, "", "Page"], [1, 1, 1, "", "Word"], [1, 2, 1, "", "read_html"], [1, 2, 1, "", "read_img"], [1, 2, 1, "", "read_pdf"]], "doctr.documents.DocumentFile": [[1, 3, 1, "", "from_images"], [1, 3, 1, "", "from_pdf"], [1, 3, 1, "", "from_url"]], "doctr.documents.PDF": [[1, 3, 1, "", "as_images"], [1, 3, 1, "", "get_artefacts"], [1, 3, 1, "", "get_words"]], "doctr.models.detection": [[4, 2, 1, "", "db_resnet50"], [4, 2, 1, "", "detection_predictor"], [4, 2, 1, "", "linknet"]], "doctr.models.export": [[4, 2, 1, "", "convert_to_fp16"], [4, 2, 1, "", "convert_to_tflite"], [4, 2, 1, "", "quantize_model"]], "doctr.models.recognition": [[4, 2, 1, "", "crnn_vgg16_bn"], [4, 2, 1, "", "recognition_predictor"], [4, 2, 1, "", "sar_resnet31"], [4, 2, 1, "", "sar_vgg16_bn"]], "doctr.models.zoo": [[4, 2, 1, "", "ocr_predictor"]], "doctr.transforms": [[5, 1, 1, "", "ColorInversion"], [5, 1, 1, "", "Compose"], [5, 1, 1, "", "LambdaTransformation"], [5, 1, 1, "", "Normalize"], [5, 1, 1, "", "OneOf"], [5, 1, 1, "", "RandomApply"], [5, 1, 1, "", "RandomBrightness"], [5, 1, 1, "", "RandomContrast"], [5, 1, 1, "", "RandomGamma"], [5, 1, 1, "", "RandomHue"], [5, 1, 1, "", "RandomJpegQuality"], [5, 1, 1, "", "RandomSaturation"], [5, 1, 1, "", "Resize"], [5, 1, 1, "", "ToGray"]], "doctr.utils.metrics": [[6, 1, 1, "", "ExactMatch"], [6, 1, 1, "", "LocalizationConfusion"], [6, 1, 1, "", "OCRMetric"]], "doctr.utils.visualization": [[6, 2, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:function", "3": "py:method"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "function", "Python function"], "3": ["py", "method", "Python method"]}, "titleterms": {"changelog": [], "v0": [], "7": [], "0": [], "2024": [], "09": [], "6": [], "2022": [], "29": [], "5": [], "1": [], "03": [], "22": [], "2021": [], "12": [], "31": [], "4": [], "11": [], "10": [], "01": [], "3": [], "08": [], "27": [], "07": [], "02": [], "2": [], "05": [], "28": [], "18": [], "contributor": [], "coven": [], "code": [], "conduct": [], "our": [], "pledg": [], "standard": [], "enforc": [], "respons": [], "scope": [], "guidelin": [], "correct": [], "warn": [], "temporari": [], "ban": [], "perman": [], "attribut": [], "contribut": [], "doctr": [0, 1, 2, 4, 5, 6], "codebas": [], "structur": 1, "continu": [], "integr": 2, "feedback": [], "featur": 2, "request": [], "bug": [], "report": [], "question": [], "develop": [], "mode": [], "instal": 3, "commit": [], "unit": [], "test": [], "qualiti": [], "style": [], "verif": [], "modifi": [], "document": [1, 2], "let": [], "": [], "connect": [], "prerequisit": [], "via": 3, "python": 3, "packag": 3, "git": 3, "text": [2, 4], "recognit": [2, 4], "main": [], "model": [2, 4], "zoo": 4, "detect": [2, 4], "support": [0, 5], "dataset": [0, 2], "arg": [], "synthet": [], "gener": [], "custom": [], "loader": [], "dataload": [], "vocab": 0, "return": [], "io": [], "word": 1, "line": 1, "artefact": 1, "block": 1, "page": 1, "file": 1, "read": 1, "classif": [], "factori": [], "transform": 5, "compos": 5, "util": 6, "visual": 6, "task": 6, "evalu": 6, "notebook": [], "train": 2, "your": 2, "own": [], "load": 0, "aw": [], "lambda": [], "share": [], "commun": [], "from": [], "huggingfac": [], "hub": [], "push": [], "pretrain": [], "name": [], "convent": [], "choos": [], "readi": [], "us": 4, "avail": 0, "object": [], "data": 0, "prepar": [], "infer": [], "optim": [], "half": [], "precis": [], "export": 4, "onnx": [], "right": [], "architectur": [], "predictor": [2, 4], "end": 4, "ocr": 4, "two": 4, "stage": 4, "approach": 4, "what": [], "should": [], "i": [], "do": [], "output": 4, "advanc": [], "option": [], "get": 2, "start": 2, "conda": [], "pre": 4, "process": 4, "post": 4, "build": 2, "implement": 2, "content": 2, "compress": 4, "savedmodel": 4}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"doctr.datasets": [[0, "doctr-datasets"]], "Available Datasets": [[0, "available-datasets"]], "Data Loading": [[0, "data-loading"]], "Supported Vocabs": [[0, "supported-vocabs"]], "DocTR Vocabs": [[0, "id1"]], "doctr.documents": [[1, "doctr-documents"]], "Document structure": [[1, "document-structure"]], "Word": [[1, "word"]], "Line": [[1, "line"]], "Artefact": [[1, "artefact"]], "Block": [[1, "block"]], "Page": [[1, "page"]], "Document": [[1, "document"]], "File reading": [[1, "file-reading"]], "DocTR: Document Text Recognition": [[2, "doctr-document-text-recognition"]], "Features": [[2, "features"]], "\ud83e\uddd1\u200d\ud83d\udd2c Build & train your predictor": [[2, "scientist-build-train-your-predictor"]], "\ud83e\uddf0 Implemented models": [[2, "toolbox-implemented-models"]], "Detection models": [[2, "detection-models"], [4, "detection-models"]], "Recognition models": [[2, "recognition-models"], [4, "recognition-models"]], "\ud83e\uddfe Integrated datasets": [[2, "receipt-integrated-datasets"]], "Getting Started": [[2, "getting-started"]], "Contents": [[2, "contents"]], "Installation": [[3, "installation"]], "Via Python Package": [[3, "via-python-package"]], "Via Git": [[3, "via-git"]], "doctr.models": [[4, "doctr-models"]], "Text Detection": [[4, "text-detection"]], "Pre-processing for detection": [[4, "pre-processing-for-detection"]], "Post-processing detections": [[4, "post-processing-detections"]], "Detection predictors": [[4, "detection-predictors"]], "Text Recognition": [[4, "text-recognition"]], "Text recognition model zoo": [[4, "id2"]], "Pre-processing for recognition": [[4, "pre-processing-for-recognition"]], "Post-processing outputs": [[4, "post-processing-outputs"]], "Recognition predictors": [[4, "recognition-predictors"]], "End-to-End OCR": [[4, "end-to-end-ocr"]], "Two-stage approaches": [[4, "two-stage-approaches"]], "Model export": [[4, "model-export"]], "Model compression": [[4, "model-compression"]], "Using SavedModel": [[4, "using-savedmodel"]], "doctr.transforms": [[5, "doctr-transforms"]], "Supported transformations": [[5, "supported-transformations"]], "Composing transformations": [[5, "composing-transformations"]], "doctr.utils": [[6, "doctr-utils"]], "Visualization": [[6, "visualization"]], "Task evaluation": [[6, "task-evaluation"]]}, "indexentries": {"cord (class in doctr.datasets)": [[0, "doctr.datasets.CORD"]], "dataloader (class in doctr.datasets.loader)": [[0, "doctr.datasets.loader.DataLoader"]], "funsd (class in doctr.datasets)": [[0, "doctr.datasets.FUNSD"]], "sroie (class in doctr.datasets)": [[0, "doctr.datasets.SROIE"]], "visiondataset (class in doctr.datasets.core)": [[0, "doctr.datasets.core.VisionDataset"]], "encode_sequences() (in module doctr.datasets)": [[0, "doctr.datasets.encode_sequences"]], "artefact (class in doctr.documents)": [[1, "doctr.documents.Artefact"]], "block (class in doctr.documents)": [[1, "doctr.documents.Block"]], "document (class in doctr.documents)": [[1, "doctr.documents.Document"]], "documentfile (class in doctr.documents)": [[1, "doctr.documents.DocumentFile"]], "line (class in doctr.documents)": [[1, "doctr.documents.Line"]], "pdf (class in doctr.documents)": [[1, "doctr.documents.PDF"]], "page (class in doctr.documents)": [[1, "doctr.documents.Page"]], "word (class in doctr.documents)": [[1, "doctr.documents.Word"]], "as_images() (doctr.documents.pdf method)": [[1, "doctr.documents.PDF.as_images"]], "from_images() (doctr.documents.documentfile class method)": [[1, "doctr.documents.DocumentFile.from_images"]], "from_pdf() (doctr.documents.documentfile class method)": [[1, "doctr.documents.DocumentFile.from_pdf"]], "from_url() (doctr.documents.documentfile class method)": [[1, "doctr.documents.DocumentFile.from_url"]], "get_artefacts() (doctr.documents.pdf method)": [[1, "doctr.documents.PDF.get_artefacts"]], "get_words() (doctr.documents.pdf method)": [[1, "doctr.documents.PDF.get_words"]], "read_html() (in module doctr.documents)": [[1, "doctr.documents.read_html"]], "read_img() (in module doctr.documents)": [[1, "doctr.documents.read_img"]], "read_pdf() (in module doctr.documents)": [[1, "doctr.documents.read_pdf"]], "doctr": [[2, "module-doctr"]], "module": [[2, "module-doctr"]], "convert_to_fp16() (in module doctr.models.export)": [[4, "doctr.models.export.convert_to_fp16"]], "convert_to_tflite() (in module doctr.models.export)": [[4, "doctr.models.export.convert_to_tflite"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[4, "doctr.models.recognition.crnn_vgg16_bn"]], "db_resnet50() (in module doctr.models.detection)": [[4, "doctr.models.detection.db_resnet50"]], "detection_predictor() (in module doctr.models.detection)": [[4, "doctr.models.detection.detection_predictor"]], "linknet() (in module doctr.models.detection)": [[4, "doctr.models.detection.linknet"]], "ocr_predictor() (in module doctr.models.zoo)": [[4, "doctr.models.zoo.ocr_predictor"]], "quantize_model() (in module doctr.models.export)": [[4, "doctr.models.export.quantize_model"]], "recognition_predictor() (in module doctr.models.recognition)": [[4, "doctr.models.recognition.recognition_predictor"]], "sar_resnet31() (in module doctr.models.recognition)": [[4, "doctr.models.recognition.sar_resnet31"]], "sar_vgg16_bn() (in module doctr.models.recognition)": [[4, "doctr.models.recognition.sar_vgg16_bn"]], "colorinversion (class in doctr.transforms)": [[5, "doctr.transforms.ColorInversion"]], "compose (class in doctr.transforms)": [[5, "doctr.transforms.Compose"]], "lambdatransformation (class in doctr.transforms)": [[5, "doctr.transforms.LambdaTransformation"]], "normalize (class in doctr.transforms)": [[5, "doctr.transforms.Normalize"]], "oneof (class in doctr.transforms)": [[5, "doctr.transforms.OneOf"]], "randomapply (class in doctr.transforms)": [[5, "doctr.transforms.RandomApply"]], "randombrightness (class in doctr.transforms)": [[5, "doctr.transforms.RandomBrightness"]], "randomcontrast (class in doctr.transforms)": [[5, "doctr.transforms.RandomContrast"]], "randomgamma (class in doctr.transforms)": [[5, "doctr.transforms.RandomGamma"]], "randomhue (class in doctr.transforms)": [[5, "doctr.transforms.RandomHue"]], "randomjpegquality (class in doctr.transforms)": [[5, "doctr.transforms.RandomJpegQuality"]], "randomsaturation (class in doctr.transforms)": [[5, "doctr.transforms.RandomSaturation"]], "resize (class in doctr.transforms)": [[5, "doctr.transforms.Resize"]], "togray (class in doctr.transforms)": [[5, "doctr.transforms.ToGray"]], "exactmatch (class in doctr.utils.metrics)": [[6, "doctr.utils.metrics.ExactMatch"]], "localizationconfusion (class in doctr.utils.metrics)": [[6, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[6, "doctr.utils.metrics.OCRMetric"]], "visualize_page() (in module doctr.utils.visualization)": [[6, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file diff --git a/v0.2.0/using_doctr/using_models.html b/v0.2.0/using_doctr/using_models.html index b2e6a5d739..b471cdc1f1 100644 --- a/v0.2.0/using_doctr/using_models.html +++ b/v0.2.0/using_doctr/using_models.html @@ -836,6 +836,17 @@

Two-stage approachesmodel = ocr_predictor('linknet_resnet18', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True)

+

To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying DocumentBuilder:

+
    +
  • resolve_lines: whether words should be automatically grouped into lines (default: True)

  • +
  • resolve_blocks: whether lines should be automatically grouped into blocks (default: True)

  • +
  • paragraph_break: relative length of the minimum space separating paragraphs (default: 0.035)

  • +
+

For example to disable the automatic grouping of lines into blocks:

+
from doctr.model import ocr_predictor
+model = ocr_predictor(pretrained=True, resolve_blocks=False)
+
+

What should I do with the output?#

@@ -859,6 +870,14 @@

What should I do with the output?) +

To get only the text content of the Document, you can use the render method:

+
text_output = result.render()
+
+
+

For reference, here is the output for the Document above:

+
No. RECEIPT DATE
+
+

You can also export them as a nested dict, more appropriate for JSON format:

json_output = result.export()
 
diff --git a/v0.2.1/_sources/using_doctr/using_models.rst.txt b/v0.2.1/_sources/using_doctr/using_models.rst.txt index 208e0956bb..27c087096a 100644 --- a/v0.2.1/_sources/using_doctr/using_models.rst.txt +++ b/v0.2.1/_sources/using_doctr/using_models.rst.txt @@ -279,6 +279,19 @@ For instance, this snippet instantiates an end-to-end ocr_predictor working with from doctr.model import ocr_predictor model = ocr_predictor('linknet_resnet18', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True) +To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying `DocumentBuilder`: + +* `resolve_lines`: whether words should be automatically grouped into lines (default: True) +* `resolve_blocks`: whether lines should be automatically grouped into blocks (default: True) +* `paragraph_break`: relative length of the minimum space separating paragraphs (default: 0.035) + +For example to disable the automatic grouping of lines into blocks: + +.. code:: python3 + + from doctr.model import ocr_predictor + model = ocr_predictor(pretrained=True, resolve_blocks=False) + What should I do with the output? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -304,6 +317,14 @@ Here is a typical `Document` layout:: )] ) +To get only the text content of the `Document`, you can use the `render` method:: + + text_output = result.render() + +For reference, here is the output for the `Document` above:: + + No. RECEIPT DATE + You can also export them as a nested dict, more appropriate for JSON format:: json_output = result.export() diff --git a/v0.2.1/searchindex.js b/v0.2.1/searchindex.js index 39c93f0a9f..2b717e2a9e 100644 --- a/v0.2.1/searchindex.js +++ b/v0.2.1/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["changelog", "datasets", "documents", "index", "installing", "models", "transforms", "utils"], "filenames": ["changelog.rst", "datasets.rst", "documents.rst", "index.rst", "installing.rst", "models.rst", "transforms.rst", "utils.rst"], "titles": ["Changelog", "doctr.datasets", "doctr.documents", "DocTR: Document Text Recognition", "Installation", "doctr.models", "doctr.transforms", "doctr.utils"], "terms": {"releas": [0, 4], "note": 0, "we": [2, 3, 5, 6], "member": [], "leader": [], "make": [5, 7], "particip": [], "commun": [], "harass": [], "free": [], "experi": 5, "everyon": [], "regardless": [], "ag": [], "bodi": [], "size": [1, 2, 5, 6], "visibl": [], "invis": [], "disabl": [], "ethnic": [], "sex": [], "characterist": [], "gender": [], "ident": [], "express": 6, "level": [5, 7], "educ": [], "socio": [], "econom": [], "statu": [], "nation": [], "person": [], "appear": [], "race": [], "religion": [], "sexual": [], "orient": 2, "act": [], "interact": [2, 7], "wai": [1, 3, 5], "contribut": [], "an": [1, 2, 3, 5, 7], "open": [], "welcom": 3, "divers": [], "inclus": [], "healthi": [], "exampl": [1, 2, 5, 6, 7], "behavior": [], "posit": 7, "environ": [], "includ": [4, 5], "demonstr": [], "empathi": [], "kind": [], "toward": [], "other": [], "peopl": [], "Being": [], "respect": [], "differ": [], "opinion": [], "viewpoint": [], "give": [], "gracefulli": [], "accept": [], "construct": [], "feedback": [], "apolog": [], "those": [2, 4, 5], "affect": [], "mistak": [], "learn": 5, "from": [1, 2, 3, 5, 6, 7], "focus": [], "what": [], "i": [1, 2, 5, 6, 7], "best": [], "just": 5, "u": [], "individu": [], "overal": [], "unaccept": [], "The": [1, 2, 5, 7], "us": [1, 4, 7], "languag": [2, 3], "imageri": [], "attent": [], "advanc": [], "ani": [1, 2, 3, 5, 7], "troll": [], "insult": [], "derogatori": [], "comment": [], "polit": [], "attack": [], "public": 3, "privat": 5, "publish": [], "inform": [1, 3, 5], "physic": 2, "email": [], "address": 2, "without": 5, "explicit": [], "permiss": [], "which": 5, "could": [], "reason": [], "consid": [2, 7], "inappropri": [], "profession": [], "set": [1, 5, 7], "ar": [1, 2, 4, 5, 6, 7], "clarifi": [], "take": [], "appropri": [], "fair": [], "action": [], "thei": [], "deem": [], "threaten": [], "offens": [], "harm": [], "have": [1, 5, 7], "right": [5, 7], "remov": [], "edit": [], "reject": [], "commit": [], "wiki": [], "issu": [], "align": 2, "thi": [4, 5, 7], "moder": [], "decis": [], "when": [], "appli": [1, 6], "within": [], "all": [1, 2, 3, 5, 6, 7], "space": [], "also": [], "offici": [], "repres": [2, 5], "e": [2, 4], "mail": [], "post": [], "via": 3, "social": [], "media": [], "account": [], "appoint": [], "onlin": [], "offlin": [], "event": [], "instanc": 5, "abus": [], "otherwis": 7, "mai": [], "report": [], "contact": [], "minde": 4, "com": [2, 4], "complaint": [], "review": [], "investig": [], "promptli": [], "fairli": [], "oblig": [], "privaci": [], "secur": [], "incid": [], "follow": [4, 5, 6, 7], "impact": [], "determin": [], "consequ": [], "violat": [], "unprofession": [], "unwelcom": [], "A": [1, 2, 3, 5], "written": 2, "provid": [3, 5], "clariti": [], "around": 5, "natur": 3, "explan": 5, "why": [], "wa": [], "apologi": [], "request": [], "through": [1, 6], "singl": [], "seri": [], "continu": [], "No": [], "involv": 5, "unsolicit": [], "specifi": 2, "period": [], "time": [1, 5, 7], "avoid": [], "well": [], "extern": [], "channel": [2, 5, 6], "like": [], "term": [], "lead": [], "seriou": [], "sustain": [], "sort": [], "allow": [], "dure": [], "pattern": [], "aggress": [], "disparag": [], "class": [1, 2, 6, 7], "adapt": [], "version": 5, "0": [1, 3, 5, 6, 7], "avail": [3, 5, 6], "http": [2, 4], "www": 2, "org": [], "_": [1, 5], "html": [], "were": 2, "inspir": 6, "mozilla": [], "": [2, 7], "ladder": [], "For": [4, 5], "answer": [], "common": [6, 7], "question": [], "about": 5, "see": [], "faq": [], "translat": [], "everyth": [], "you": [4, 5], "need": [4, 7], "know": [], "effici": [1, 5], "project": [], "packag": 7, "python": 3, "doc": [2, 5], "librari": 4, "build": [], "script": [], "refer": [], "train": [1, 5, 6], "demo": [], "small": 3, "app": [], "showcas": [], "capabl": 5, "api": [], "minim": [], "templat": 2, "deploi": [], "rest": [6, 7], "ensur": [], "proper": [], "mainten": [], "github": 4, "worklow": [], "run": 4, "job": [], "coverag": [], "codecov": [], "back": [], "result": [2, 5], "As": [], "contributor": [], "onli": [6, 7], "your": [1, 2, 5, 7], "ad": 6, "whether": [1, 2, 7], "encount": [], "problem": [], "suggest": [], "input": [2, 5, 6], "ha": [1, 7], "valu": [2, 6], "can": [1, 4, 5], "purpos": 5, "advis": [], "first": [], "check": [], "topic": [], "wasn": [], "t": 1, "alreadi": [], "cover": [], "close": [], "If": [2, 4, 5], "feel": [], "new": [], "one": [1, 5, 6], "do": [], "so": [], "whenev": [], "possibl": 7, "enough": 5, "jump": [], "wonder": [], "how": [], "someth": [], "more": [], "gener": [], "should": [1, 2, 7], "out": [5, 6, 7], "discuss": [], "q": [], "forum": [], "specif": [1, 5, 7], "stackoverflow": [], "addit": [], "depend": [3, 4], "command": [], "m": [5, 7], "pip": 4, "upgrad": [], "dev": [], "pre": [], "docstr": [], "In": 5, "pleas": [], "googl": [], "eas": [], "process": [2, 3], "later": [], "messag": [], "udac": [], "guid": [], "order": [1, 2, 5], "same": [2, 7], "ci": [], "workflow": [], "unittest": [], "local": [1, 3, 5, 7], "To": [], "togeth": [2, 5], "current": [], "built": [], "sphinx": [], "thank": [], "our": 5, "file": [1, 3], "been": [5, 7], "rebuilt": [], "want": [], "forc": [], "complet": [], "rebuild": [], "delet": [], "_build": [], "directori": [], "addition": 5, "clear": [], "web": 2, "browser": [], "cach": [], "modif": [], "now": [], "locat": [], "index": 2, "wish": [], "somewher": [], "els": [], "than": [4, 7], "join": [], "slack": [], "where": [2, 7], "find": 4, "requir": [4, 6], "3": [2, 3, 4, 5, 6, 7], "8": [5, 6], "higher": 4, "whichev": [], "o": 4, "least": [], "tensorflow": [3, 5, 6], "pytorch": [], "correspond": 5, "page": [5, 7], "2": [3, 5, 6], "macbook": [], "m1": [], "chip": [], "some": [], "metal": [], "plugin": [], "1": [1, 3, 5, 6, 7], "12": 5, "anoth": [1, 4, 5], "linux": 4, "few": 4, "extra": 4, "maco": 4, "user": [2, 3, 4], "them": [1, 4], "homebrew": [], "brew": 4, "cairo": 4, "pango": 4, "gdk": 4, "pixbuf": 4, "libffi": 4, "window": [4, 7], "gtk": 4, "latest": 4, "over": [4, 7], "here": [1, 4, 6], "last": [1, 4, 5], "stabl": 4, "doctr": 4, "strive": [], "reduc": 6, "framework": 1, "minimum": 7, "necessari": [], "featur": [5, 7], "develop": [], "third": [], "parti": [], "miss": [], "tf": [5, 6], "torch": [], "mode": 4, "clone": 4, "state": 3, "art": 3, "optic": [3, 5], "charact": [1, 2, 3, 5, 7], "made": 3, "seamless": 3, "access": [1, 2, 3], "anyon": 3, "power": 3, "easi": [3, 7], "extract": [1, 3], "valuabl": 3, "autom": 3, "seamlessli": [], "understand": [1, 3], "task": [1, 3, 5], "ocr": [1, 3, 7], "predictor": [], "pars": [1, 3], "textual": [1, 2, 3], "identifi": [3, 5], "each": [1, 2, 3, 5, 6, 7], "word": [3, 5, 7], "research": 3, "quickli": 3, "compar": 3, "own": 3, "architectur": [3, 5], "speed": [3, 5], "perform": [2, 3, 5, 6, 7], "robust": 3, "stage": 3, "pretrain": [3, 5, 7], "paramet": [1, 2, 3, 5, 6, 7], "friendli": 3, "line": [3, 7], "code": [2, 3], "load": [3, 5], "googlevis": 3, "aw": [3, 5], "textract": [3, 5], "optim": 3, "infer": [3, 6], "both": [3, 5, 6], "cpu": [3, 5], "gpu": 3, "light": 3, "activ": [], "maintain": 3, "integr": 3, "deploy": [], "dbnet": [3, 5], "real": [5, 6], "scene": 5, "differenti": [3, 5], "binar": [3, 5], "linknet": [3, 5], "exploit": 5, "encod": [1, 2, 5], "represent": 5, "semant": 5, "segment": 5, "sar": [3, 5], "show": [2, 3, 5, 7], "attend": [3, 5], "read": [3, 5], "simpl": 5, "strong": 5, "baselin": 5, "irregular": 5, "crnn": [3, 5], "end": [1, 3, 7], "trainabl": 5, "neural": [3, 5], "network": [3, 5], "imag": [1, 2, 5, 6, 7], "base": 5, "sequenc": [1, 2, 5, 7], "Its": 5, "applic": 5, "master": [], "multi": [], "aspect": 6, "non": [2, 6, 7], "vitstr": [], "vision": [], "transform": [1, 3], "fast": 1, "parseq": [], "permut": [], "autoregress": [], "funsd": [1, 3, 5], "form": [1, 3], "noisi": [1, 3], "scan": [1, 3], "cord": [1, 3, 5], "consolid": [1, 3], "receipt": [1, 3, 5], "forpost": [1, 3], "sroie": [1, 3], "icdar": 3, "2019": 3, "iiit": [], "5k": [], "cvit": [], "street": [], "view": [], "synthtext": [], "visual": 3, "geometri": 2, "group": [], "svhn": [], "digit": 1, "unsupervis": [], "ic03": [], "2003": [], "ic13": [], "2013": [], "imgur5k": [], "textstylebrush": [], "transfer": [], "aesthet": [], "mjsynth": [], "synthet": [], "data": [2, 3, 5, 6, 7], "artifici": [], "iiithw": [], "wildreceipt": [], "spatial": 2, "dual": [], "modal": [], "graph": 2, "kei": [], "bool": [1, 2, 5, 6, 7], "true": [1, 2, 5, 6, 7], "use_polygon": [], "fals": [1, 5, 6], "recognition_task": [], "kwarg": [1, 2, 5, 7], "sourc": [1, 2, 5, 6, 7], "document": [1, 5, 7], "import": [1, 2, 5, 6, 7], "train_set": 1, "download": 1, "img": [1, 6], "target": [1, 2, 5, 6], "subset": [1, 5], "polygon": [], "rotat": 2, "bound": [2, 5, 6, 7], "box": [2, 5, 7], "instead": 2, "straight": [], "ones": [], "recognit": 7, "keyword": [1, 2], "argument": [1, 2], "visiondataset": 1, "icdar2019": 1, "competit": 1, "iiit5k": [], "bmvc": [], "2012": [], "text": [2, 7], "prior": [], "svt": [], "ucsd": [], "comput": [5, 7], "hous": [], "number": [1, 6, 7], "localis": [], "repositori": [], "websit": [], "entri": [], "futur": [], "direct": [], "img_fold": 1, "str": [1, 2, 5, 6, 7], "label_fold": [], "label": [1, 7], "part": 6, "challeng": [], "task2": [], "2015": [], "path": [1, 2, 5], "challenge2_training_task12_imag": [], "challenge2_training_task1_gt": [], "test_set": [], "challenge2_test_task12_imag": [], "challenge2_test_task1_gt": [], "folder": [1, 5], "annot": 2, "abstractdataset": [], "label_path": [], "handwrit": [], "dataset_info": [], "imgur5k_annot": [], "json": [], "pure": [], "mnt": [], "ramdisk": [], "max": 7, "90kdict32px": [], "imlist": [], "txt": [], "hw": [], "images_90k_norm": [], "90k": [], "docartefact": [], "object": 1, "detect": [], "element": [1, 2, 5], "varieti": [], "arxiv": [], "ab": [], "2103": [], "14470v1": [], "test": [], "charactergener": [], "implement": [1, 2, 5, 6, 7], "d": [], "abdef": [], "num_sampl": [], "100": [5, 6, 7], "vocabulari": [], "sampl": 1, "iter": 1, "cache_sampl": [], "firsthand": [], "font_famili": [], "font": [], "img_transform": [], "compos": [1, 3, 5], "sample_transform": 1, "wordgener": [], "min_char": [], "int": [1, 2, 5, 6], "max_char": [], "list": [1, 2, 6], "none": [1, 2], "callabl": [1, 6], "tupl": [2, 5, 6, 7], "32": [1, 5, 6], "maximum": 1, "detectiondataset": [], "recognitiondataset": [], "labels_path": [], "contain": [], "ocrdataset": 1, "label_fil": 1, "jpg": [1, 2], "root": 1, "shuffl": 1, "batch_siz": 1, "drop_last": 1, "num_work": [], "collate_fn": [], "wrapper": [1, 6], "train_load": 1, "train_it": 1, "next": 1, "befor": 1, "pass": [1, 5], "batch": [1, 5, 6], "drop": 1, "isn": 1, "full": [1, 5, 7], "worker": 1, "function": [5, 6, 7], "merg": [], "sinc": 1, "content": [1, 2], "properli": 1, "model": [1, 7], "interpret": [1, 2], "multipl": [1, 2, 6], "name": [1, 5], "10": [1, 7], "0123456789": 1, "hindi_digit": [], "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "ascii_lett": 1, "52": [1, 5], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "punctuat": 1, "currenc": 1, "5": [1, 6, 7], "ancient_greek": [], "48": 5, "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": [], "arabic_lett": [], "37": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": [], "persian_lett": [], "\u067e\u0686\u06a2\u06a4\u06af": [], "arabic_diacrit": [], "arabic_punctu": [], "latin": 1, "94": [], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "english": [], "legacy_french": [], "123": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": 1, "french": [1, 5], "126": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": [], "portugues": [], "131": [], "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": [], "spanish": [], "116": [], "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": [], "italian": [], "120": [], "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": [], "german": [], "108": [], "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": [], "arab": [], "101": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": [], "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "czech": [], "130": [], "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": [], "polish": [], "118": [], "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": [], "dutch": [], "114": [], "norwegian": [], "106": [], "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": [], "danish": [], "finnish": [], "104": [], "\u00e4\u00f6\u00e4\u00f6": [], "swedish": [], "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": [], "vietnames": [], "234": [], "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": [], "hebrew": [], "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": [], "multilingu": [], "195": [], "encode_sequ": 1, "target_s": 1, "eo": 1, "pad": [1, 5, 6], "dynamic_seq_length": [], "ndarrai": [1, 2, 7], "given": [1, 2, 5, 7], "map": [1, 5], "n": [1, 5, 7], "length": 1, "Of": 1, "string": [1, 2, 5, 7], "option": [], "start": [], "case": 7, "upper": 6, "enabl": 2, "dynam": [], "tensor": [1, 5, 6], "modul": [2, 5, 6, 7], "easili": [2, 5, 7], "export": [2, 3, 7], "analysi": [2, 5], "format": [2, 5], "organ": 2, "uninterrupt": 2, "confid": 2, "float": [2, 6, 7], "associ": 2, "predict": [2, 7], "xmin": 2, "ymin": 2, "xmax": 2, "ymax": 2, "coordin": 2, "rel": 2, "collect": 2, "meant": 2, "two": 2, "column": 2, "horizont": 2, "resolv": 2, "default": [2, 5], "smallest": 2, "enclos": 2, "g": 2, "qr": 2, "pictur": 2, "chart": 2, "signatur": 2, "logo": 2, "etc": 2, "artefact_typ": 2, "type": [2, 5], "sever": [2, 6], "its": [1, 2, 5, 7], "titl": 2, "underneath": 2, "page_idx": 2, "dimens": [2, 5, 7], "dict": [2, 7], "numpi": [2, 5, 7], "arrai": [2, 7], "uint8": [2, 5, 7], "raw": [2, 7], "pixel": [2, 6], "height": 2, "width": 2, "dictionari": [2, 7], "angl": 2, "degress": 2, "preserve_aspect_ratio": 6, "overlai": 2, "displai": [2, 7], "matplotlib": 7, "pyplot": 7, "method": 6, "high": 2, "convers": 2, "read_pdf": 2, "byte": [2, 5], "scale": 7, "rgb_mode": [], "password": [], "pdf": 2, "convert": [2, 5, 6], "render": [], "72dpi": [], "output": [2, 6], "rgb": [2, 6], "bgr": 2, "unlock": [], "encrypt": [], "pypdfium2": [], "pdfpage": [], "decod": 2, "shape": [2, 5, 6, 7], "h": 2, "x": [2, 6, 7], "w": [2, 7], "c": [], "read_img_as_numpi": [], "output_s": [2, 6], "rgb_output": 2, "expect": [1, 2, 5, 6], "read_img_as_tensor": [], "img_path": [], "dtype": 5, "float32": 5, "desir": [], "relat": [], "divid": [], "255": [5, 6, 7], "decode_img_as_tensor": [], "img_cont": [], "stream": 2, "read_html": 2, "url": [1, 2], "yoursit": 2, "weasyprint": [], "documentfil": 2, "extens": 2, "classmethod": 2, "from_pdf": 2, "binari": [2, 5], "from_url": 2, "from_imag": 2, "page1": 2, "png": 2, "page2": 2, "vgg16_bn_r": [], "vgg": 5, "16": 5, "describ": 5, "veri": [], "deep": 5, "convolut": 3, "larg": [], "modifi": [], "normal": [5, 6], "rectangular": [], "pool": [], "simpler": [], "head": [], "input_tensor": 5, "random": [5, 6, 7], "uniform": [5, 6], "512": [], "maxval": [5, 6], "imagenet": [], "extractor": 5, "resnet18": [], "resnet": 5, "18": 3, "residu": [], "boolean": [], "resnet34": [], "34": [], "resnet50": [], "50": 5, "resnet31": [], "downsiz": [], "4": [], "mobilenet_v3_smal": [], "mobilenetv3": [], "search": [], "kera": 5, "mobilenet_v3_larg": [], "mobilenet_v3_small_r": [], "mobilenet_v3_large_r": [], "mobilenet_v3_small_orient": [], "magc_resnet31": [], "global": [], "context": [], "224": [5, 6], "vit_": [], "visiontransform": [], "worth": [], "16x16": [], "patch": [], "unoffici": [], "config": [], "vit_b": [], "b": 7, "textnet_tini": [], "textnet": [], "faster": [], "arbitrarili": [], "detector": [], "minimalist": [], "kernel": [], "czczup": [], "tini": [], "textnet_smal": [], "textnet_bas": [], "crop_orientation_predictor": [], "arch": 5, "croporientationpredictor": [], "np": [5, 7], "classif_mobilenet_v3_smal": [], "input_crop": [], "rand": [5, 7], "600": [5, 7], "800": [5, 7], "astyp": [5, 7], "crop": 5, "dataset": 5, "linknet_resnet18": [], "1024": 5, "linknet_resnet34": [], "linknet_resnet50": [], "db_resnet50": 5, "backbon": 5, "db_mobilenet_v3_larg": [], "mobilenet": [], "v3": [], "detection_predictor": 5, "assume_straight_pag": [], "detectionpredictor": 5, "input_pag": [5, 7], "itself": [], "fit": [], "crnn_vgg16_bn": 5, "128": 5, "crnn_mobilenet_v3_smal": [], "crnn_mobilenet_v3_larg": [], "sar_resnet31": 5, "31": 5, "64": [5, 6], "256": 5, "paper": [], "1910": [], "02562": [], "keywoard": [], "vitstr_smal": [], "vitstr_bas": [], "recognition_predictor": 5, "recognitionpredictor": 5, "ocr_predictor": 5, "det_arch": 5, "reco_arch": 5, "pretrained_backbon": [], "symmetric_pad": 6, "export_as_straight_box": [], "detect_orient": [], "straighten_pag": [], "detect_languag": [], "ocrpredictor": 5, "up": 5, "assum": [], "preserv": 6, "ratio": 6, "symmetr": 6, "bottom": [], "final": [], "potenti": 5, "estim": [], "slightli": [], "deterior": [], "latenc": [], "median": [], "Then": 5, "again": [], "improv": [], "kie_predictor": [], "kiepredictor": [], "kie": [], "login_to_hub": [], "login": [], "huggingfac": [], "hub": [], "from_hub": [], "repo_id": [], "instanti": 5, "hf": [], "fasterrcnn_mobilenet_v3_large_fpn": [], "repo": [], "hf_hub_download": [], "snapshot_download": [], "checkpoint": [], "push_to_hf_hub": [], "model_nam": [], "save": [1, 5], "configur": [], "my": [], "procedur": 6, "draw": 6, "design": 6, "torchvis": 6, "resiz": [5, 6], "bilinear": [5, 6], "transfo": 6, "minval": 6, "interpol": [5, 6], "zero": [5, 6], "while": 6, "done": 6, "mean": [6, 7], "std": 6, "gaussian": 6, "distribut": 6, "485": 6, "456": 6, "406": 6, "229": 6, "225": 6, "averag": [5, 6], "per": [5, 6], "standard": 6, "deviat": 6, "lambdatransform": 6, "fn": 6, "lambda": 6, "tograi": 6, "num_output_channel": [], "grayscal": 6, "colorinvers": 6, "min_val": 6, "tranform": 6, "color": 6, "shift": 6, "randomli": 6, "invert": 6, "6": [4, 5, 6], "rang": 6, "randombright": 6, "max_delta": 6, "adjust": 6, "bright": 6, "delta": 6, "offset": 6, "add": [6, 7], "pick": 6, "p": 6, "probabl": 6, "randomcontrast": 6, "contrast": 6, "contrast_factor": 6, "factor": 6, "randomsatur": 6, "satur": 6, "hsv": 6, "increas": 6, "randomhu": 6, "hue": 6, "randomgamma": 6, "min_gamma": 6, "max_gamma": 6, "min_gain": 6, "max_gain": 6, "gamma": 6, "correct": 6, "neg": 6, "lower": [6, 7], "param": [5, 6], "constant": 6, "multipli": 6, "randomjpegqu": 6, "min_qual": 6, "60": 6, "max_qual": 6, "jpeg": 6, "qualiti": 6, "dimension": 6, "between": [6, 7], "randomrot": [], "max_angl": [], "expand": [], "degre": [], "uniformli": [], "randomcrop": [], "08": 5, "75": 5, "33": [], "min_area": [], "max_area": [], "min_ratio": [], "max_ratio": [], "gaussianblur": [], "kernel_shap": [], "blur": [], "min": [], "channelshuffl": [], "gaussiannois": [], "nois": [], "randomhorizontalflip": [], "flip": [], "int64": [], "randomshadow": [], "opacity_rang": [], "shade": [], "opac": [], "It": 6, "consecut": [5, 6], "sequenti": [5, 6], "oneof": 6, "jpegqual": 6, "randomappli": 6, "regroup": 7, "core": [1, 7], "complementari": 7, "sens": 7, "visualize_pag": 7, "words_onli": 7, "display_artefact": 7, "add_label": 7, "figur": 7, "block": [5, 7], "plt": 7, "ocr_db_crnn": 7, "artefact": 7, "figsiz": 7, "largest": 7, "side": 7, "plot": 7, "static": 7, "top": 7, "synthesize_pag": [], "draw_proba": [], "respons": [], "blank": [], "blue": [], "red": [], "font_siz": [], "13": 5, "famili": [], "synthes": [], "metric": [5, 7], "assess": 7, "textmatch": 7, "match": [3, 7], "accuraci": 7, "aggreg": [1, 7], "foral": 7, "y": 7, "mathcal": 7, "frac": 7, "sum": 7, "limits_": 7, "f_": 7, "y_i": 7, "x_i": 7, "indic": 7, "defin": 7, "f_a": 7, "left": 7, "begin": 7, "ll": 7, "mbox": 7, "strictli": 7, "integ": 7, "updat": 7, "hello": 7, "world": 7, "summari": 7, "gt": [], "pred": [], "groung": [], "truth": 7, "exact": 7, "score": 7, "counterpart": 7, "unidecod": 7, "localizationconfus": 7, "iou_thresh": 7, "mask_shap": [], "use_broadcast": [], "confus": 7, "iou": 7, "recal": [5, 7], "g_": 7, "precis": [5, 7], "meaniou": 7, "j": 7, "y_j": 7, "being": [5, 7], "intersect": 7, "union": 7, "g_x": 7, "assign": 7, "_i": 7, "geq": 7, "ground": 7, "asarrai": 7, "70": [5, 7], "110": 7, "95": 7, "200": 7, "150": 7, "pair": 7, "broadcast": [], "consum": [], "memori": [], "either": 5, "ocrmetr": 7, "l": 7, "hat": 7, "h_": 7, "b_j": 7, "l_j": 7, "gt_box": [], "pred_box": [], "gt_label": [], "pred_label": [], "comparison": 7, "detectionmetr": [], "c_j": [], "compil": [], "better": [], "leverag": [], "descript": [], "colab": [], "quicktour": [], "present": 1, "main": [], "produc": 5, "searchabl": [], "don": [], "meet": [], "detail": [], "link": [], "section": [], "det_model": [], "load_weight": [], "path_to_checkpoint": [], "weight": [], "reco_model": [], "det_param": [], "path_to_pt": [], "map_loc": [], "load_state_dict": [], "reco_param": [], "vocab": [3, 5], "class_nam": [], "total": [], "date": [], "preprocessor": 5, "det_predictor": [], "798": [], "785": [], "772": [], "264": [], "2749": [], "287": [], "reco_predictor": [], "694": [], "695": [], "693": [], "299": [], "296": [], "301": [], "polici": [], "restrict": [], "write": [], "outsid": [], "tmp": [], "work": [], "step": [], "usag": 5, "multiprocess": [], "doctr_multiprocessing_dis": [], "variabl": [], "becaus": [], "shm": [], "share": [], "chang": [], "By": [], "doctr_cache_dir": [], "focu": [], "love": [], "appreci": [], "interfac": [], "io": [], "custom": [], "felix92": [], "db": [], "vgg16": 5, "bn": [], "plug": [], "obj_detect": [], "exist": [], "overwritten": [], "prerequisit": 3, "creat": [], "co": [], "instal": 3, "git": 3, "lf": [], "my_awesome_model": [], "v1": [], "directli": 5, "after": [], "python3": [], "train_tensorflow": [], "py": [], "train_pytorch": [], "tabl": [], "pull": [], "dummi": [], "tilman": [], "rassi": [], "fascan": [], "evalu": [1, 3, 5], "predefin": 1, "prefer": 1, "signific": 1, "valid": [], "149": [], "626": [], "360": [], "2000": [], "3000": [], "249": [], "33402": [], "13068": [], "772875": [], "85875": [], "246": [], "233": [], "resourc": [], "7149": [], "796": [], "handwritten": [], "1268": [], "472": [], "21888": [], "8707": [], "33608": [], "19342": [], "uppercas": [], "19370": [], "2186": [], "257": [], "647": [], "73257": [], "26032": [], "7100000": [], "707470": [], "1156": [], "1107": [], "849": [], "1095": [], "207901": [], "22672": [], "7581382": [], "1337891": [], "7141797": [], "793533": [], "49377": [], "19598": [], "alwai": [], "regular": [], "2700": [], "300": [], "background": [], "qr_code": [], "bar_cod": [], "photo": [], "classif": 5, "mani": [], "sensit": [], "abl": [], "howev": [], "guidanc": [], "tool": [], "further": [], "anot": [], "handl": 1, "underli": 1, "defer": 1, "dataload": 1, "good": [], "achiev": [], "might": 5, "tune": 3, "thing": [], "product": [], "readi": [], "help": [], "support": 5, "devic": [], "fp16": 5, "point": [], "occupi": [], "bit": [], "advantag": [], "less": [], "mixed_precis": [], "set_global_polici": [], "mixed_float16": [], "cuda": [], "re": 1, "exchang": [], "interoper": [], "machin": [], "structur": [3, 5], "layer": [], "metadata": [], "util": [3, 5], "export_model_to_onnx": [], "input_shap": 5, "dummy_input": [], "tensorspec": [], "model_path": [], "come": [], "soon": [], "seen": 5, "onc": [1, 5], "separ": 5, "compon": 5, "charg": 5, "usabl": 5, "backend": 5, "along": 5, "processor": 5, "reusabl": 5, "consist": [], "delimit": [], "2d": [], "corner": [], "flag": [], "belong": [], "skew": [], "comprehens": [], "benchmark": [], "publicli": [], "sec": [], "25": 5, "84": [], "39": 5, "85": 5, "86": 5, "93": [], "83": 5, "24": [], "80": [], "29": 5, "90": 5, "67": 5, "76": [], "11": 3, "81": 5, "71": [], "7": 5, "21": 5, "82": 5, "20": 5, "49": 5, "87": 5, "63": 5, "17": [], "28": [], "51": [], "46": 5, "db_resnet34": [], "22": [], "89": 5, "74": 5, "56": [], "68": 5, "92": 5, "61": [], "41": [], "00": [], "79": 5, "38": [], "88": [], "62": [], "26": [], "06": [], "78": 5, "47": 5, "54": [], "abov": 5, "cf": 5, "disclaim": 5, "combin": 5, "199": 5, "second": 5, "warmup": [], "phase": [], "measur": 5, "1000": 5, "obtain": [], "11th": [], "gen": [], "intel": [], "r": [], "tm": [], "i7": [], "11800h": [], "30ghz": [], "wrap": [], "useabl": [], "favorit": [], "dummy_img": [], "area": [], "send": [], "snippet": [], "transcrib": [], "partial": [], "15": 5, "9": [], "73": [], "44": [], "14": 5, "55": [], "58": [], "57": [], "66": 5, "01": 5, "98": [], "23": [], "69": 5, "99": [], "91": 5, "05": 3, "09": [], "96": 1, "40": [], "53": 5, "most": 5, "print": [], "cfg": [], "30595": 5, "45": [], "72": [], "43": 5, "65": 5, "77": 5, "30": [], "07": [], "27": 5, "gvision": 5, "59": 5, "03": 3, "azur": [], "recogn": [], "42": [], "go": [], "mention": [], "still": [], "return": [1, 2, 5, 7], "nest": [], "get": 2, "typic": [], "layout": [], "340": [], "json_output": [], "1357421875": [], "0361328125": [], "8564453125": [], "8603515625": [], "914085328578949": [], "5478515625": [], "06640625": [], "5810546875": [], "0966796875": [], "9949972033500671": [], "51171875": [], "1630859375": [], "9578408598899841": [], "1396484375": [], "3232421875": [], "185546875": [], "3515625": [], "outpout": [], "xml": [], "hocr": [], "export_as_xml": [], "xml_output": [], "xml_bytes_str": [], "xml_element": [], "utf": [], "xmln": [], "w3": [], "1999": [], "xhtml": [], "lang": [], "en": [], "meta": [], "equiv": [], "charset": [], "system": [], "ocr_pag": [], "ocr_carea": [], "ocr_par": [], "ocr_lin": [], "ocrx_word": [], "div": [], "id": 5, "page_1": [], "bbox": [], "3456": [], "ppageno": [], "block_1_1": [], "857": [], "529": [], "2504": [], "2710": [], "par_1_1": [], "span": [], "line_1_1": [], "x_size": [], "x_descend": [], "x_ascend": [], "word_1_1": [], "1552": [], "540": [], "1778": [], "580": [], "x_wconf": [], "word_1_2": [], "1782": [], "1900": [], "583": [], "word_1_3": [], "1420": [], "597": [], "1684": [], "641": [], "threshold": [], "region": [], "accur": [], "postprocessor": [], "bin_thresh": [], "box_thresh": [], "hook": [], "manipul": [], "customhook": [], "def": [], "__call__": [], "self": [], "loc_pr": [], "Be": [], "awar": [], "my_hook": [], "middl": [], "pipelin": [], "add_hook": [], "execut": [], "file_path": [], "read_img": 2, "seemlessli": 3, "conda": [], "newer": [], "developp": 4, "fp": 5, "scheme": 5, "deform": 5, "statist": 5, "turn": 5, "easier": 5, "let": 5, "db_resnet50_predictor": [], "sar_vgg16_bn": 5, "rnn": [], "enhanc": [], "symbol": 5, "crnn_vgg16_bn_predictor": [], "sar_vgg16_bn_predictor": [], "16bn": [], "convert_to_tflit": 5, "tf_model": 5, "tflite": 5, "conv_sequ": 5, "relu": 5, "kernel_s": 5, "serialized_model": 5, "convert_to_fp16": 5, "half": 5, "serial": 5, "quantize_model": 5, "quantiz": 5, "exclud": 5, "inherit": [1, 5], "abstract": 1, "verifi": 1, "file_nam": 1, "file_hash": 1, "extract_arch": 1, "overwrit": 1, "sha256": 1, "archiv": 1, "disk": 1, "775": [], "856": [], "860": [], "862": [], "863": [], "sar_resnet31_predictor": [], "ocr_db_crnn_vgg": [], "652": [], "721": [], "ocr_db_sar_vgg": [], "653": [], "ocr_db_sar_resnet": [], "665": [], "735": [], "595": 5, "625": 5, "781": 5, "830": 5, "exactmatch": [], "ignore_cas": [], "ignore_acc": [], "ignor": [], "letter": [], "accent": [], "error": [], "max_dist": [], "levenshtein": [], "distanc": [], "autoclass": [], "loader": 1, "154": 1, "as_imag": 2, "convert_page_to_numpi": 2, "get_word": 2, "fitz": 2, "gettextword": 2, "get_artefact": 2, "entir": 2, "fulli": [], "daili": 3, "mix": 3, "fine": 3, "scratch": 3, "special": 3, "recurr": 3, "733": [], "817": [], "745": [], "875": 5, "frame": 5, "feed": 5, "warm": 5, "c5": 5, "x12larg": 5, "xeon": 5, "platinum": 5, "8275l": 5, "913": [], "917": [], "921": [], "crnn_resnet31": 5, "629": [], "701": [], "664": [], "780": [], "630": [], "702": [], "666": [], "783": [], "640": 5, "713": [], "672": [], "789": [], "na": [], "753": 5, "700": 5, "533": 5, "689": 5, "611": 5, "660": 5, "db_sar_vgg": 5, "db_sar_resnet": 5, "db_crnn_vgg": 5, "db_crnn_resnet": 5, "properti": 5, "input_t": 5, "saved_model": 5, "And": 5, "nestedobject": 6, "changelog": 3, "v0": 3, "2021": 3, "8m": 5, "02": 5, "5m": 5, "1m": 5, "19": 5, "invoic": 5, "flexibl": 7}, "objects": {"doctr.datasets": [[1, 0, 1, "", "CORD"], [1, 0, 1, "", "FUNSD"], [1, 0, 1, "", "OCRDataset"], [1, 0, 1, "", "SROIE"], [1, 1, 1, "", "encode_sequences"]], "doctr.datasets.core": [[1, 0, 1, "", "VisionDataset"]], "doctr.datasets.loader": [[1, 0, 1, "", "DataLoader"]], "doctr.documents": [[2, 0, 1, "", "Artefact"], [2, 0, 1, "", "Block"], [2, 0, 1, "", "Document"], [2, 0, 1, "", "DocumentFile"], [2, 0, 1, "", "Line"], [2, 0, 1, "", "PDF"], [2, 0, 1, "", "Page"], [2, 0, 1, "", "Word"], [2, 1, 1, "", "read_html"], [2, 1, 1, "", "read_img"], [2, 1, 1, "", "read_pdf"]], "doctr.documents.Document": [[2, 2, 1, "", "show"]], "doctr.documents.DocumentFile": [[2, 2, 1, "", "from_images"], [2, 2, 1, "", "from_pdf"], [2, 2, 1, "", "from_url"]], "doctr.documents.PDF": [[2, 2, 1, "", "as_images"], [2, 2, 1, "", "get_artefacts"], [2, 2, 1, "", "get_words"]], "doctr.documents.Page": [[2, 2, 1, "", "show"]], "doctr.models.detection": [[5, 1, 1, "", "db_resnet50"], [5, 1, 1, "", "detection_predictor"], [5, 1, 1, "", "linknet"]], "doctr.models.export": [[5, 1, 1, "", "convert_to_fp16"], [5, 1, 1, "", "convert_to_tflite"], [5, 1, 1, "", "quantize_model"]], "doctr.models.recognition": [[5, 1, 1, "", "crnn_vgg16_bn"], [5, 1, 1, "", "recognition_predictor"], [5, 1, 1, "", "sar_resnet31"], [5, 1, 1, "", "sar_vgg16_bn"]], "doctr.models.zoo": [[5, 1, 1, "", "ocr_predictor"]], "doctr.transforms": [[6, 0, 1, "", "ColorInversion"], [6, 0, 1, "", "Compose"], [6, 0, 1, "", "LambdaTransformation"], [6, 0, 1, "", "Normalize"], [6, 0, 1, "", "OneOf"], [6, 0, 1, "", "RandomApply"], [6, 0, 1, "", "RandomBrightness"], [6, 0, 1, "", "RandomContrast"], [6, 0, 1, "", "RandomGamma"], [6, 0, 1, "", "RandomHue"], [6, 0, 1, "", "RandomJpegQuality"], [6, 0, 1, "", "RandomSaturation"], [6, 0, 1, "", "Resize"], [6, 0, 1, "", "ToGray"]], "doctr.utils.metrics": [[7, 0, 1, "", "LocalizationConfusion"], [7, 0, 1, "", "OCRMetric"], [7, 0, 1, "", "TextMatch"]], "doctr.utils.metrics.LocalizationConfusion": [[7, 2, 1, "", "summary"]], "doctr.utils.metrics.OCRMetric": [[7, 2, 1, "", "summary"]], "doctr.utils.metrics.TextMatch": [[7, 2, 1, "", "summary"]], "doctr.utils.visualization": [[7, 1, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:class", "1": "py:function", "2": "py:method"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"], "2": ["py", "method", "Python method"]}, "titleterms": {"changelog": 0, "v0": 0, "7": [], "0": 0, "2024": [], "09": [], "6": [], "2022": [], "29": [], "5": [], "1": 0, "03": 0, "22": [], "2021": 0, "12": [], "31": [], "4": [], "11": 0, "10": [], "01": [], "3": [], "08": [], "27": [], "07": [], "02": [], "2": 0, "05": 0, "28": [], "18": 0, "contributor": [], "coven": [], "code": [], "conduct": [], "our": [], "pledg": [], "standard": [], "enforc": [], "respons": [], "scope": [], "guidelin": [], "correct": [], "warn": [], "temporari": [], "ban": [], "perman": [], "attribut": [], "contribut": [], "doctr": [1, 2, 3, 5, 6, 7], "codebas": [], "structur": 2, "continu": [], "integr": [], "feedback": [], "featur": 3, "request": [], "bug": [], "report": [], "question": [], "develop": [], "mode": [], "instal": 4, "commit": [], "unit": [], "test": [], "qualiti": [], "style": [], "verif": [], "modifi": [], "document": [2, 3], "let": [], "": [], "connect": [], "prerequisit": 4, "via": 4, "python": 4, "packag": [3, 4], "git": 4, "text": [3, 5], "recognit": [3, 5], "main": 3, "model": [3, 5], "zoo": [3, 5], "detect": [3, 5], "support": [1, 3, 6], "dataset": [1, 3], "arg": [], "synthet": [], "gener": [], "custom": [], "loader": [], "dataload": [], "vocab": 1, "return": [], "io": [], "word": 2, "line": 2, "artefact": 2, "block": 2, "page": 2, "file": 2, "read": 2, "classif": [], "factori": [], "transform": 6, "compos": 6, "util": 7, "visual": 7, "task": 7, "evalu": 7, "notebook": [], "train": 3, "your": 3, "own": [], "load": 1, "aw": [], "lambda": [], "share": [], "commun": [], "from": [], "huggingfac": [], "hub": [], "push": [], "pretrain": [], "name": [], "convent": [], "choos": [], "readi": [], "us": 5, "avail": 1, "object": [], "data": 1, "prepar": [], "infer": [], "optim": [], "half": [], "precis": [], "export": 5, "onnx": [], "right": [], "architectur": [], "predictor": [3, 5], "end": 5, "ocr": 5, "two": 5, "stage": 5, "approach": 5, "what": [], "should": [], "i": [], "do": [], "output": 5, "advanc": [], "option": [], "get": 3, "start": 3, "conda": [], "pre": 5, "process": 5, "post": 5, "build": 3, "implement": [], "content": [], "compress": 5, "savedmodel": 5, "note": 3, "refer": 3}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"Changelog": [[0, "changelog"]], "v0.2.0 (2021-05-11)": [[0, "v0-2-0-2021-05-11"]], "v0.1.1 (2021-03-18)": [[0, "v0-1-1-2021-03-18"]], "v0.1.0 (2021-03-05)": [[0, "v0-1-0-2021-03-05"]], "doctr.datasets": [[1, "doctr-datasets"]], "Available Datasets": [[1, "available-datasets"]], "Data Loading": [[1, "data-loading"]], "Supported Vocabs": [[1, "supported-vocabs"]], "DocTR Vocabs": [[1, "id1"]], "doctr.documents": [[2, "doctr-documents"]], "Document structure": [[2, "document-structure"]], "Word": [[2, "word"]], "Line": [[2, "line"]], "Artefact": [[2, "artefact"]], "Block": [[2, "block"]], "Page": [[2, "page"]], "Document": [[2, "document"]], "File reading": [[2, "file-reading"]], "DocTR: Document Text Recognition": [[3, "doctr-document-text-recognition"]], "Main Features": [[3, "main-features"]], "Getting Started": [[3, "getting-started"]], "Build & train your predictor": [[3, "build-train-your-predictor"]], "Model zoo": [[3, "model-zoo"]], "Text detection models": [[3, "text-detection-models"]], "Text recognition models": [[3, "text-recognition-models"]], "Supported datasets": [[3, "supported-datasets"]], "Notes": [[3, null]], "Package Reference": [[3, null]], "Installation": [[4, "installation"]], "Prerequisites": [[4, "prerequisites"]], "Via Python Package": [[4, "via-python-package"]], "Via Git": [[4, "via-git"]], "doctr.models": [[5, "doctr-models"]], "Text Detection": [[5, "text-detection"]], "Pre-processing for detection": [[5, "pre-processing-for-detection"]], "Detection models": [[5, "detection-models"]], "Post-processing detections": [[5, "post-processing-detections"]], "Detection predictors": [[5, "detection-predictors"]], "Text Recognition": [[5, "text-recognition"]], "Text recognition model zoo": [[5, "id2"]], "Pre-processing for recognition": [[5, "pre-processing-for-recognition"]], "Recognition models": [[5, "recognition-models"]], "Post-processing outputs": [[5, "post-processing-outputs"]], "Recognition predictors": [[5, "recognition-predictors"]], "End-to-End OCR": [[5, "end-to-end-ocr"]], "Two-stage approaches": [[5, "two-stage-approaches"]], "Model export": [[5, "model-export"]], "Model compression": [[5, "model-compression"]], "Using SavedModel": [[5, "using-savedmodel"]], "doctr.transforms": [[6, "doctr-transforms"]], "Supported transformations": [[6, "supported-transformations"]], "Composing transformations": [[6, "composing-transformations"]], "doctr.utils": [[7, "doctr-utils"]], "Visualization": [[7, "visualization"]], "Task evaluation": [[7, "task-evaluation"]]}, "indexentries": {"cord (class in doctr.datasets)": [[1, "doctr.datasets.CORD"]], "dataloader (class in doctr.datasets.loader)": [[1, "doctr.datasets.loader.DataLoader"]], "funsd (class in doctr.datasets)": [[1, "doctr.datasets.FUNSD"]], "ocrdataset (class in doctr.datasets)": [[1, "doctr.datasets.OCRDataset"]], "sroie (class in doctr.datasets)": [[1, "doctr.datasets.SROIE"]], "visiondataset (class in doctr.datasets.core)": [[1, "doctr.datasets.core.VisionDataset"]], "encode_sequences() (in module doctr.datasets)": [[1, "doctr.datasets.encode_sequences"]], "artefact (class in doctr.documents)": [[2, "doctr.documents.Artefact"]], "block (class in doctr.documents)": [[2, "doctr.documents.Block"]], "document (class in doctr.documents)": [[2, "doctr.documents.Document"]], "documentfile (class in doctr.documents)": [[2, "doctr.documents.DocumentFile"]], "line (class in doctr.documents)": [[2, "doctr.documents.Line"]], "pdf (class in doctr.documents)": [[2, "doctr.documents.PDF"]], "page (class in doctr.documents)": [[2, "doctr.documents.Page"]], "word (class in doctr.documents)": [[2, "doctr.documents.Word"]], "as_images() (doctr.documents.pdf method)": [[2, "doctr.documents.PDF.as_images"]], "from_images() (doctr.documents.documentfile class method)": [[2, "doctr.documents.DocumentFile.from_images"]], "from_pdf() (doctr.documents.documentfile class method)": [[2, "doctr.documents.DocumentFile.from_pdf"]], "from_url() (doctr.documents.documentfile class method)": [[2, "doctr.documents.DocumentFile.from_url"]], "get_artefacts() (doctr.documents.pdf method)": [[2, "doctr.documents.PDF.get_artefacts"]], "get_words() (doctr.documents.pdf method)": [[2, "doctr.documents.PDF.get_words"]], "read_html() (in module doctr.documents)": [[2, "doctr.documents.read_html"]], "read_img() (in module doctr.documents)": [[2, "doctr.documents.read_img"]], "read_pdf() (in module doctr.documents)": [[2, "doctr.documents.read_pdf"]], "show() (doctr.documents.document method)": [[2, "doctr.documents.Document.show"]], "show() (doctr.documents.page method)": [[2, "doctr.documents.Page.show"]], "convert_to_fp16() (in module doctr.models.export)": [[5, "doctr.models.export.convert_to_fp16"]], "convert_to_tflite() (in module doctr.models.export)": [[5, "doctr.models.export.convert_to_tflite"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_vgg16_bn"]], "db_resnet50() (in module doctr.models.detection)": [[5, "doctr.models.detection.db_resnet50"]], "detection_predictor() (in module doctr.models.detection)": [[5, "doctr.models.detection.detection_predictor"]], "linknet() (in module doctr.models.detection)": [[5, "doctr.models.detection.linknet"]], "ocr_predictor() (in module doctr.models.zoo)": [[5, "doctr.models.zoo.ocr_predictor"]], "quantize_model() (in module doctr.models.export)": [[5, "doctr.models.export.quantize_model"]], "recognition_predictor() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.recognition_predictor"]], "sar_resnet31() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.sar_resnet31"]], "sar_vgg16_bn() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.sar_vgg16_bn"]], "colorinversion (class in doctr.transforms)": [[6, "doctr.transforms.ColorInversion"]], "compose (class in doctr.transforms)": [[6, "doctr.transforms.Compose"]], "lambdatransformation (class in doctr.transforms)": [[6, "doctr.transforms.LambdaTransformation"]], "normalize (class in doctr.transforms)": [[6, "doctr.transforms.Normalize"]], "oneof (class in doctr.transforms)": [[6, "doctr.transforms.OneOf"]], "randomapply (class in doctr.transforms)": [[6, "doctr.transforms.RandomApply"]], "randombrightness (class in doctr.transforms)": [[6, "doctr.transforms.RandomBrightness"]], "randomcontrast (class in doctr.transforms)": [[6, "doctr.transforms.RandomContrast"]], "randomgamma (class in doctr.transforms)": [[6, "doctr.transforms.RandomGamma"]], "randomhue (class in doctr.transforms)": [[6, "doctr.transforms.RandomHue"]], "randomjpegquality (class in doctr.transforms)": [[6, "doctr.transforms.RandomJpegQuality"]], "randomsaturation (class in doctr.transforms)": [[6, "doctr.transforms.RandomSaturation"]], "resize (class in doctr.transforms)": [[6, "doctr.transforms.Resize"]], "togray (class in doctr.transforms)": [[6, "doctr.transforms.ToGray"]], "localizationconfusion (class in doctr.utils.metrics)": [[7, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[7, "doctr.utils.metrics.OCRMetric"]], "textmatch (class in doctr.utils.metrics)": [[7, "doctr.utils.metrics.TextMatch"]], "summary() (doctr.utils.metrics.localizationconfusion method)": [[7, "doctr.utils.metrics.LocalizationConfusion.summary"]], "summary() (doctr.utils.metrics.ocrmetric method)": [[7, "doctr.utils.metrics.OCRMetric.summary"]], "summary() (doctr.utils.metrics.textmatch method)": [[7, "doctr.utils.metrics.TextMatch.summary"]], "visualize_page() (in module doctr.utils.visualization)": [[7, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["changelog", "datasets", "documents", "index", "installing", "models", "transforms", "utils"], "filenames": ["changelog.rst", "datasets.rst", "documents.rst", "index.rst", "installing.rst", "models.rst", "transforms.rst", "utils.rst"], "titles": ["Changelog", "doctr.datasets", "doctr.documents", "DocTR: Document Text Recognition", "Installation", "doctr.models", "doctr.transforms", "doctr.utils"], "terms": {"releas": [0, 4], "note": 0, "we": [2, 3, 5, 6], "member": [], "leader": [], "make": [5, 7], "particip": [], "commun": [], "harass": [], "free": [], "experi": 5, "everyon": [], "regardless": [], "ag": [], "bodi": [], "size": [1, 2, 5, 6], "visibl": [], "invis": [], "disabl": [], "ethnic": [], "sex": [], "characterist": [], "gender": [], "ident": [], "express": 6, "level": [5, 7], "educ": [], "socio": [], "econom": [], "statu": [], "nation": [], "person": [], "appear": [], "race": [], "religion": [], "sexual": [], "orient": 2, "act": [], "interact": [2, 7], "wai": [1, 3, 5], "contribut": [], "an": [1, 2, 3, 5, 7], "open": [], "welcom": 3, "divers": [], "inclus": [], "healthi": [], "exampl": [1, 2, 5, 6, 7], "behavior": [], "posit": 7, "environ": [], "includ": [4, 5], "demonstr": [], "empathi": [], "kind": [], "toward": [], "other": [], "peopl": [], "Being": [], "respect": [], "differ": [], "opinion": [], "viewpoint": [], "give": [], "gracefulli": [], "accept": [], "construct": [], "feedback": [], "apolog": [], "those": [2, 4, 5], "affect": [], "mistak": [], "learn": 5, "from": [1, 2, 3, 5, 6, 7], "focus": [], "what": [], "i": [1, 2, 5, 6, 7], "best": [], "just": 5, "u": [], "individu": [], "overal": [], "unaccept": [], "The": [1, 2, 5, 7], "us": [1, 4, 7], "languag": [2, 3], "imageri": [], "attent": [], "advanc": [], "ani": [1, 2, 3, 5, 7], "troll": [], "insult": [], "derogatori": [], "comment": [], "polit": [], "attack": [], "public": 3, "privat": 5, "publish": [], "inform": [1, 3, 5], "physic": 2, "email": [], "address": 2, "without": 5, "explicit": [], "permiss": [], "which": 5, "could": [], "reason": [], "consid": [2, 7], "inappropri": [], "profession": [], "set": [1, 5, 7], "ar": [1, 2, 4, 5, 6, 7], "clarifi": [], "take": [], "appropri": [], "fair": [], "action": [], "thei": [], "deem": [], "threaten": [], "offens": [], "harm": [], "have": [1, 5, 7], "right": [5, 7], "remov": [], "edit": [], "reject": [], "commit": [], "wiki": [], "issu": [], "align": 2, "thi": [4, 5, 7], "moder": [], "decis": [], "when": [], "appli": [1, 6], "within": [], "all": [1, 2, 3, 5, 6, 7], "space": [], "also": [], "offici": [], "repres": [2, 5], "e": [2, 4], "mail": [], "post": [], "via": 3, "social": [], "media": [], "account": [], "appoint": [], "onlin": [], "offlin": [], "event": [], "instanc": 5, "abus": [], "otherwis": 7, "mai": [], "report": [], "contact": [], "minde": 4, "com": [2, 4], "complaint": [], "review": [], "investig": [], "promptli": [], "fairli": [], "oblig": [], "privaci": [], "secur": [], "incid": [], "follow": [4, 5, 6, 7], "impact": [], "determin": [], "consequ": [], "violat": [], "unprofession": [], "unwelcom": [], "A": [1, 2, 3, 5], "written": 2, "provid": [3, 5], "clariti": [], "around": 5, "natur": 3, "explan": 5, "why": [], "wa": [], "apologi": [], "request": [], "through": [1, 6], "singl": [], "seri": [], "continu": [], "No": [], "involv": 5, "unsolicit": [], "specifi": 2, "period": [], "time": [1, 5, 7], "avoid": [], "well": [], "extern": [], "channel": [2, 5, 6], "like": [], "term": [], "lead": [], "seriou": [], "sustain": [], "sort": [], "allow": [], "dure": [], "pattern": [], "aggress": [], "disparag": [], "class": [1, 2, 6, 7], "adapt": [], "version": 5, "0": [1, 3, 5, 6, 7], "avail": [3, 5, 6], "http": [2, 4], "www": 2, "org": [], "_": [1, 5], "html": [], "were": 2, "inspir": 6, "mozilla": [], "": [2, 7], "ladder": [], "For": [4, 5], "answer": [], "common": [6, 7], "question": [], "about": 5, "see": [], "faq": [], "translat": [], "everyth": [], "you": [4, 5], "need": [4, 7], "know": [], "effici": [1, 5], "project": [], "packag": 7, "python": 3, "doc": [2, 5], "librari": 4, "build": [], "script": [], "refer": [], "train": [1, 5, 6], "demo": [], "small": 3, "app": [], "showcas": [], "capabl": 5, "api": [], "minim": [], "templat": 2, "deploi": [], "rest": [6, 7], "ensur": [], "proper": [], "mainten": [], "github": 4, "worklow": [], "run": 4, "job": [], "coverag": [], "codecov": [], "back": [], "result": [2, 5], "As": [], "contributor": [], "onli": [6, 7], "your": [1, 2, 5, 7], "ad": 6, "whether": [1, 2, 7], "encount": [], "problem": [], "suggest": [], "input": [2, 5, 6], "ha": [1, 7], "valu": [2, 6], "can": [1, 4, 5], "purpos": 5, "advis": [], "first": [], "check": [], "topic": [], "wasn": [], "t": 1, "alreadi": [], "cover": [], "close": [], "If": [2, 4, 5], "feel": [], "new": [], "one": [1, 5, 6], "do": [], "so": [], "whenev": [], "possibl": 7, "enough": 5, "jump": [], "wonder": [], "how": [], "someth": [], "more": [], "gener": [], "should": [1, 2, 7], "out": [5, 6, 7], "discuss": [], "q": [], "forum": [], "specif": [1, 5, 7], "stackoverflow": [], "addit": [], "depend": [3, 4], "command": [], "m": [5, 7], "pip": 4, "upgrad": [], "dev": [], "pre": [], "docstr": [], "In": 5, "pleas": [], "googl": [], "eas": [], "process": [2, 3], "later": [], "messag": [], "udac": [], "guid": [], "order": [1, 2, 5], "same": [2, 7], "ci": [], "workflow": [], "unittest": [], "local": [1, 3, 5, 7], "To": [], "togeth": [2, 5], "current": [], "built": [], "sphinx": [], "thank": [], "our": 5, "file": [1, 3], "been": [5, 7], "rebuilt": [], "want": [], "forc": [], "complet": [], "rebuild": [], "delet": [], "_build": [], "directori": [], "addition": 5, "clear": [], "web": 2, "browser": [], "cach": [], "modif": [], "now": [], "locat": [], "index": 2, "wish": [], "somewher": [], "els": [], "than": [4, 7], "join": [], "slack": [], "where": [2, 7], "find": 4, "requir": [4, 6], "3": [2, 3, 4, 5, 6, 7], "8": [5, 6], "higher": 4, "whichev": [], "o": 4, "least": [], "tensorflow": [3, 5, 6], "pytorch": [], "correspond": 5, "page": [5, 7], "2": [3, 5, 6], "macbook": [], "m1": [], "chip": [], "some": [], "metal": [], "plugin": [], "1": [1, 3, 5, 6, 7], "12": 5, "anoth": [1, 4, 5], "linux": 4, "few": 4, "extra": 4, "maco": 4, "user": [2, 3, 4], "them": [1, 4], "homebrew": [], "brew": 4, "cairo": 4, "pango": 4, "gdk": 4, "pixbuf": 4, "libffi": 4, "window": [4, 7], "gtk": 4, "latest": 4, "over": [4, 7], "here": [1, 4, 6], "last": [1, 4, 5], "stabl": 4, "doctr": 4, "strive": [], "reduc": 6, "framework": 1, "minimum": 7, "necessari": [], "featur": [5, 7], "develop": [], "third": [], "parti": [], "miss": [], "tf": [5, 6], "torch": [], "mode": 4, "clone": 4, "state": 3, "art": 3, "optic": [3, 5], "charact": [1, 2, 3, 5, 7], "made": 3, "seamless": 3, "access": [1, 2, 3], "anyon": 3, "power": 3, "easi": [3, 7], "extract": [1, 3], "valuabl": 3, "autom": 3, "seamlessli": [], "understand": [1, 3], "task": [1, 3, 5], "ocr": [1, 3, 7], "predictor": [], "pars": [1, 3], "textual": [1, 2, 3], "identifi": [3, 5], "each": [1, 2, 3, 5, 6, 7], "word": [3, 5, 7], "research": 3, "quickli": 3, "compar": 3, "own": 3, "architectur": [3, 5], "speed": [3, 5], "perform": [2, 3, 5, 6, 7], "robust": 3, "stage": 3, "pretrain": [3, 5, 7], "paramet": [1, 2, 3, 5, 6, 7], "friendli": 3, "line": [3, 7], "code": [2, 3], "load": [3, 5], "googlevis": 3, "aw": [3, 5], "textract": [3, 5], "optim": 3, "infer": [3, 6], "both": [3, 5, 6], "cpu": [3, 5], "gpu": 3, "light": 3, "activ": [], "maintain": 3, "integr": 3, "deploy": [], "dbnet": [3, 5], "real": [5, 6], "scene": 5, "differenti": [3, 5], "binar": [3, 5], "linknet": [3, 5], "exploit": 5, "encod": [1, 2, 5], "represent": 5, "semant": 5, "segment": 5, "sar": [3, 5], "show": [2, 3, 5, 7], "attend": [3, 5], "read": [3, 5], "simpl": 5, "strong": 5, "baselin": 5, "irregular": 5, "crnn": [3, 5], "end": [1, 3, 7], "trainabl": 5, "neural": [3, 5], "network": [3, 5], "imag": [1, 2, 5, 6, 7], "base": 5, "sequenc": [1, 2, 5, 7], "Its": 5, "applic": 5, "master": [], "multi": [], "aspect": 6, "non": [2, 6, 7], "vitstr": [], "vision": [], "transform": [1, 3], "fast": 1, "parseq": [], "permut": [], "autoregress": [], "funsd": [1, 3, 5], "form": [1, 3], "noisi": [1, 3], "scan": [1, 3], "cord": [1, 3, 5], "consolid": [1, 3], "receipt": [1, 3, 5], "forpost": [1, 3], "sroie": [1, 3], "icdar": 3, "2019": 3, "iiit": [], "5k": [], "cvit": [], "street": [], "view": [], "synthtext": [], "visual": 3, "geometri": 2, "group": [], "svhn": [], "digit": 1, "unsupervis": [], "ic03": [], "2003": [], "ic13": [], "2013": [], "imgur5k": [], "textstylebrush": [], "transfer": [], "aesthet": [], "mjsynth": [], "synthet": [], "data": [2, 3, 5, 6, 7], "artifici": [], "iiithw": [], "wildreceipt": [], "spatial": 2, "dual": [], "modal": [], "graph": 2, "kei": [], "bool": [1, 2, 5, 6, 7], "true": [1, 2, 5, 6, 7], "use_polygon": [], "fals": [1, 5, 6], "recognition_task": [], "kwarg": [1, 2, 5, 7], "sourc": [1, 2, 5, 6, 7], "document": [1, 5, 7], "import": [1, 2, 5, 6, 7], "train_set": 1, "download": 1, "img": [1, 6], "target": [1, 2, 5, 6], "subset": [1, 5], "polygon": [], "rotat": 2, "bound": [2, 5, 6, 7], "box": [2, 5, 7], "instead": 2, "straight": [], "ones": [], "recognit": 7, "keyword": [1, 2], "argument": [1, 2], "visiondataset": 1, "icdar2019": 1, "competit": 1, "iiit5k": [], "bmvc": [], "2012": [], "text": [2, 7], "prior": [], "svt": [], "ucsd": [], "comput": [5, 7], "hous": [], "number": [1, 6, 7], "localis": [], "repositori": [], "websit": [], "entri": [], "futur": [], "direct": [], "img_fold": 1, "str": [1, 2, 5, 6, 7], "label_fold": [], "label": [1, 7], "part": 6, "challeng": [], "task2": [], "2015": [], "path": [1, 2, 5], "challenge2_training_task12_imag": [], "challenge2_training_task1_gt": [], "test_set": [], "challenge2_test_task12_imag": [], "challenge2_test_task1_gt": [], "folder": [1, 5], "annot": 2, "abstractdataset": [], "label_path": [], "handwrit": [], "dataset_info": [], "imgur5k_annot": [], "json": [], "pure": [], "mnt": [], "ramdisk": [], "max": 7, "90kdict32px": [], "imlist": [], "txt": [], "hw": [], "images_90k_norm": [], "90k": [], "docartefact": [], "object": 1, "detect": [], "element": [1, 2, 5], "varieti": [], "arxiv": [], "ab": [], "2103": [], "14470v1": [], "test": [], "charactergener": [], "implement": [1, 2, 5, 6, 7], "d": [], "abdef": [], "num_sampl": [], "100": [5, 6, 7], "vocabulari": [], "sampl": 1, "iter": 1, "cache_sampl": [], "firsthand": [], "font_famili": [], "font": [], "img_transform": [], "compos": [1, 3, 5], "sample_transform": 1, "wordgener": [], "min_char": [], "int": [1, 2, 5, 6], "max_char": [], "list": [1, 2, 6], "none": [1, 2], "callabl": [1, 6], "tupl": [2, 5, 6, 7], "32": [1, 5, 6], "maximum": 1, "detectiondataset": [], "recognitiondataset": [], "labels_path": [], "contain": [], "ocrdataset": 1, "label_fil": 1, "jpg": [1, 2], "root": 1, "shuffl": 1, "batch_siz": 1, "drop_last": 1, "num_work": [], "collate_fn": [], "wrapper": [1, 6], "train_load": 1, "train_it": 1, "next": 1, "befor": 1, "pass": [1, 5], "batch": [1, 5, 6], "drop": 1, "isn": 1, "full": [1, 5, 7], "worker": 1, "function": [5, 6, 7], "merg": [], "sinc": 1, "content": [1, 2], "properli": 1, "model": [1, 7], "interpret": [1, 2], "multipl": [1, 2, 6], "name": [1, 5], "10": [1, 7], "0123456789": 1, "hindi_digit": [], "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "ascii_lett": 1, "52": [1, 5], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "punctuat": 1, "currenc": 1, "5": [1, 6, 7], "ancient_greek": [], "48": 5, "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": [], "arabic_lett": [], "37": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": [], "persian_lett": [], "\u067e\u0686\u06a2\u06a4\u06af": [], "arabic_diacrit": [], "arabic_punctu": [], "latin": 1, "94": [], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "english": [], "legacy_french": [], "123": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": 1, "french": [1, 5], "126": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": [], "portugues": [], "131": [], "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": [], "spanish": [], "116": [], "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": [], "italian": [], "120": [], "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": [], "german": [], "108": [], "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": [], "arab": [], "101": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": [], "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "czech": [], "130": [], "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": [], "polish": [], "118": [], "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": [], "dutch": [], "114": [], "norwegian": [], "106": [], "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": [], "danish": [], "finnish": [], "104": [], "\u00e4\u00f6\u00e4\u00f6": [], "swedish": [], "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": [], "vietnames": [], "234": [], "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": [], "hebrew": [], "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": [], "multilingu": [], "195": [], "encode_sequ": 1, "target_s": 1, "eo": 1, "pad": [1, 5, 6], "dynamic_seq_length": [], "ndarrai": [1, 2, 7], "given": [1, 2, 5, 7], "map": [1, 5], "n": [1, 5, 7], "length": 1, "Of": 1, "string": [1, 2, 5, 7], "option": [], "start": [], "case": 7, "upper": 6, "enabl": 2, "dynam": [], "tensor": [1, 5, 6], "modul": [2, 5, 6, 7], "easili": [2, 5, 7], "export": [2, 3, 7], "analysi": [2, 5], "format": [2, 5], "organ": 2, "uninterrupt": 2, "confid": 2, "float": [2, 6, 7], "associ": 2, "predict": [2, 7], "xmin": 2, "ymin": 2, "xmax": 2, "ymax": 2, "coordin": 2, "rel": 2, "collect": 2, "meant": 2, "two": 2, "column": 2, "horizont": 2, "resolv": 2, "default": [2, 5], "smallest": 2, "enclos": 2, "g": 2, "qr": 2, "pictur": 2, "chart": 2, "signatur": 2, "logo": 2, "etc": 2, "artefact_typ": 2, "type": [2, 5], "sever": [2, 6], "its": [1, 2, 5, 7], "titl": 2, "underneath": 2, "page_idx": 2, "dimens": [2, 5, 7], "dict": [2, 7], "numpi": [2, 5, 7], "arrai": [2, 7], "uint8": [2, 5, 7], "raw": [2, 7], "pixel": [2, 6], "height": 2, "width": 2, "dictionari": [2, 7], "angl": 2, "degress": 2, "preserve_aspect_ratio": 6, "overlai": 2, "displai": [2, 7], "matplotlib": 7, "pyplot": 7, "method": 6, "high": 2, "convers": 2, "read_pdf": 2, "byte": [2, 5], "scale": 7, "rgb_mode": [], "password": [], "pdf": 2, "convert": [2, 5, 6], "render": [], "72dpi": [], "output": [2, 6], "rgb": [2, 6], "bgr": 2, "unlock": [], "encrypt": [], "pypdfium2": [], "pdfpage": [], "decod": 2, "shape": [2, 5, 6, 7], "h": 2, "x": [2, 6, 7], "w": [2, 7], "c": [], "read_img_as_numpi": [], "output_s": [2, 6], "rgb_output": 2, "expect": [1, 2, 5, 6], "read_img_as_tensor": [], "img_path": [], "dtype": 5, "float32": 5, "desir": [], "relat": [], "divid": [], "255": [5, 6, 7], "decode_img_as_tensor": [], "img_cont": [], "stream": 2, "read_html": 2, "url": [1, 2], "yoursit": 2, "weasyprint": [], "documentfil": 2, "extens": 2, "classmethod": 2, "from_pdf": 2, "binari": [2, 5], "from_url": 2, "from_imag": 2, "page1": 2, "png": 2, "page2": 2, "vgg16_bn_r": [], "vgg": 5, "16": 5, "describ": 5, "veri": [], "deep": 5, "convolut": 3, "larg": [], "modifi": [], "normal": [5, 6], "rectangular": [], "pool": [], "simpler": [], "head": [], "input_tensor": 5, "random": [5, 6, 7], "uniform": [5, 6], "512": [], "maxval": [5, 6], "imagenet": [], "extractor": 5, "resnet18": [], "resnet": 5, "18": 3, "residu": [], "boolean": [], "resnet34": [], "34": [], "resnet50": [], "50": 5, "resnet31": [], "downsiz": [], "4": [], "mobilenet_v3_smal": [], "mobilenetv3": [], "search": [], "kera": 5, "mobilenet_v3_larg": [], "mobilenet_v3_small_r": [], "mobilenet_v3_large_r": [], "mobilenet_v3_small_orient": [], "magc_resnet31": [], "global": [], "context": [], "224": [5, 6], "vit_": [], "visiontransform": [], "worth": [], "16x16": [], "patch": [], "unoffici": [], "config": [], "vit_b": [], "b": 7, "textnet_tini": [], "textnet": [], "faster": [], "arbitrarili": [], "detector": [], "minimalist": [], "kernel": [], "czczup": [], "tini": [], "textnet_smal": [], "textnet_bas": [], "crop_orientation_predictor": [], "arch": 5, "croporientationpredictor": [], "np": [5, 7], "classif_mobilenet_v3_smal": [], "input_crop": [], "rand": [5, 7], "600": [5, 7], "800": [5, 7], "astyp": [5, 7], "crop": 5, "dataset": 5, "linknet_resnet18": [], "1024": 5, "linknet_resnet34": [], "linknet_resnet50": [], "db_resnet50": 5, "backbon": 5, "db_mobilenet_v3_larg": [], "mobilenet": [], "v3": [], "detection_predictor": 5, "assume_straight_pag": [], "detectionpredictor": 5, "input_pag": [5, 7], "itself": [], "fit": [], "crnn_vgg16_bn": 5, "128": 5, "crnn_mobilenet_v3_smal": [], "crnn_mobilenet_v3_larg": [], "sar_resnet31": 5, "31": 5, "64": [5, 6], "256": 5, "paper": [], "1910": [], "02562": [], "keywoard": [], "vitstr_smal": [], "vitstr_bas": [], "recognition_predictor": 5, "recognitionpredictor": 5, "ocr_predictor": 5, "det_arch": 5, "reco_arch": 5, "pretrained_backbon": [], "symmetric_pad": 6, "export_as_straight_box": [], "detect_orient": [], "straighten_pag": [], "detect_languag": [], "ocrpredictor": 5, "up": 5, "assum": [], "preserv": 6, "ratio": 6, "symmetr": 6, "bottom": [], "final": [], "potenti": 5, "estim": [], "slightli": [], "deterior": [], "latenc": [], "median": [], "Then": 5, "again": [], "improv": [], "kie_predictor": [], "kiepredictor": [], "kie": [], "login_to_hub": [], "login": [], "huggingfac": [], "hub": [], "from_hub": [], "repo_id": [], "instanti": 5, "hf": [], "fasterrcnn_mobilenet_v3_large_fpn": [], "repo": [], "hf_hub_download": [], "snapshot_download": [], "checkpoint": [], "push_to_hf_hub": [], "model_nam": [], "save": [1, 5], "configur": [], "my": [], "procedur": 6, "draw": 6, "design": 6, "torchvis": 6, "resiz": [5, 6], "bilinear": [5, 6], "transfo": 6, "minval": 6, "interpol": [5, 6], "zero": [5, 6], "while": 6, "done": 6, "mean": [6, 7], "std": 6, "gaussian": 6, "distribut": 6, "485": 6, "456": 6, "406": 6, "229": 6, "225": 6, "averag": [5, 6], "per": [5, 6], "standard": 6, "deviat": 6, "lambdatransform": 6, "fn": 6, "lambda": 6, "tograi": 6, "num_output_channel": [], "grayscal": 6, "colorinvers": 6, "min_val": 6, "tranform": 6, "color": 6, "shift": 6, "randomli": 6, "invert": 6, "6": [4, 5, 6], "rang": 6, "randombright": 6, "max_delta": 6, "adjust": 6, "bright": 6, "delta": 6, "offset": 6, "add": [6, 7], "pick": 6, "p": 6, "probabl": 6, "randomcontrast": 6, "contrast": 6, "contrast_factor": 6, "factor": 6, "randomsatur": 6, "satur": 6, "hsv": 6, "increas": 6, "randomhu": 6, "hue": 6, "randomgamma": 6, "min_gamma": 6, "max_gamma": 6, "min_gain": 6, "max_gain": 6, "gamma": 6, "correct": 6, "neg": 6, "lower": [6, 7], "param": [5, 6], "constant": 6, "multipli": 6, "randomjpegqu": 6, "min_qual": 6, "60": 6, "max_qual": 6, "jpeg": 6, "qualiti": 6, "dimension": 6, "between": [6, 7], "randomrot": [], "max_angl": [], "expand": [], "degre": [], "uniformli": [], "randomcrop": [], "08": 5, "75": 5, "33": [], "min_area": [], "max_area": [], "min_ratio": [], "max_ratio": [], "gaussianblur": [], "kernel_shap": [], "blur": [], "min": [], "channelshuffl": [], "gaussiannois": [], "nois": [], "randomhorizontalflip": [], "flip": [], "int64": [], "randomshadow": [], "opacity_rang": [], "shade": [], "opac": [], "It": 6, "consecut": [5, 6], "sequenti": [5, 6], "oneof": 6, "jpegqual": 6, "randomappli": 6, "regroup": 7, "core": [1, 7], "complementari": 7, "sens": 7, "visualize_pag": 7, "words_onli": 7, "display_artefact": 7, "add_label": 7, "figur": 7, "block": [5, 7], "plt": 7, "ocr_db_crnn": 7, "artefact": 7, "figsiz": 7, "largest": 7, "side": 7, "plot": 7, "static": 7, "top": 7, "synthesize_pag": [], "draw_proba": [], "respons": [], "blank": [], "blue": [], "red": [], "font_siz": [], "13": 5, "famili": [], "synthes": [], "metric": [5, 7], "assess": 7, "textmatch": 7, "match": [3, 7], "accuraci": 7, "aggreg": [1, 7], "foral": 7, "y": 7, "mathcal": 7, "frac": 7, "sum": 7, "limits_": 7, "f_": 7, "y_i": 7, "x_i": 7, "indic": 7, "defin": 7, "f_a": 7, "left": 7, "begin": 7, "ll": 7, "mbox": 7, "strictli": 7, "integ": 7, "updat": 7, "hello": 7, "world": 7, "summari": 7, "gt": [], "pred": [], "groung": [], "truth": 7, "exact": 7, "score": 7, "counterpart": 7, "unidecod": 7, "localizationconfus": 7, "iou_thresh": 7, "mask_shap": [], "use_broadcast": [], "confus": 7, "iou": 7, "recal": [5, 7], "g_": 7, "precis": [5, 7], "meaniou": 7, "j": 7, "y_j": 7, "being": [5, 7], "intersect": 7, "union": 7, "g_x": 7, "assign": 7, "_i": 7, "geq": 7, "ground": 7, "asarrai": 7, "70": [5, 7], "110": 7, "95": 7, "200": 7, "150": 7, "pair": 7, "broadcast": [], "consum": [], "memori": [], "either": 5, "ocrmetr": 7, "l": 7, "hat": 7, "h_": 7, "b_j": 7, "l_j": 7, "gt_box": [], "pred_box": [], "gt_label": [], "pred_label": [], "comparison": 7, "detectionmetr": [], "c_j": [], "compil": [], "better": [], "leverag": [], "descript": [], "colab": [], "quicktour": [], "present": 1, "main": [], "produc": 5, "searchabl": [], "don": [], "meet": [], "detail": [], "link": [], "section": [], "det_model": [], "load_weight": [], "path_to_checkpoint": [], "weight": [], "reco_model": [], "det_param": [], "path_to_pt": [], "map_loc": [], "load_state_dict": [], "reco_param": [], "vocab": [3, 5], "class_nam": [], "total": [], "date": [], "preprocessor": 5, "det_predictor": [], "798": [], "785": [], "772": [], "264": [], "2749": [], "287": [], "reco_predictor": [], "694": [], "695": [], "693": [], "299": [], "296": [], "301": [], "polici": [], "restrict": [], "write": [], "outsid": [], "tmp": [], "work": [], "step": [], "usag": 5, "multiprocess": [], "doctr_multiprocessing_dis": [], "variabl": [], "becaus": [], "shm": [], "share": [], "chang": [], "By": [], "doctr_cache_dir": [], "focu": [], "love": [], "appreci": [], "interfac": [], "io": [], "custom": [], "felix92": [], "db": [], "vgg16": 5, "bn": [], "plug": [], "obj_detect": [], "exist": [], "overwritten": [], "prerequisit": 3, "creat": [], "co": [], "instal": 3, "git": 3, "lf": [], "my_awesome_model": [], "v1": [], "directli": 5, "after": [], "python3": [], "train_tensorflow": [], "py": [], "train_pytorch": [], "tabl": [], "pull": [], "dummi": [], "tilman": [], "rassi": [], "fascan": [], "evalu": [1, 3, 5], "predefin": 1, "prefer": 1, "signific": 1, "valid": [], "149": [], "626": [], "360": [], "2000": [], "3000": [], "249": [], "33402": [], "13068": [], "772875": [], "85875": [], "246": [], "233": [], "resourc": [], "7149": [], "796": [], "handwritten": [], "1268": [], "472": [], "21888": [], "8707": [], "33608": [], "19342": [], "uppercas": [], "19370": [], "2186": [], "257": [], "647": [], "73257": [], "26032": [], "7100000": [], "707470": [], "1156": [], "1107": [], "849": [], "1095": [], "207901": [], "22672": [], "7581382": [], "1337891": [], "7141797": [], "793533": [], "49377": [], "19598": [], "alwai": [], "regular": [], "2700": [], "300": [], "background": [], "qr_code": [], "bar_cod": [], "photo": [], "classif": 5, "mani": [], "sensit": [], "abl": [], "howev": [], "guidanc": [], "tool": [], "further": [], "anot": [], "handl": 1, "underli": 1, "defer": 1, "dataload": 1, "good": [], "achiev": [], "might": 5, "tune": 3, "thing": [], "product": [], "readi": [], "help": [], "support": 5, "devic": [], "fp16": 5, "point": [], "occupi": [], "bit": [], "advantag": [], "less": [], "mixed_precis": [], "set_global_polici": [], "mixed_float16": [], "cuda": [], "re": 1, "exchang": [], "interoper": [], "machin": [], "structur": [3, 5], "layer": [], "metadata": [], "util": [3, 5], "export_model_to_onnx": [], "input_shap": 5, "dummy_input": [], "tensorspec": [], "model_path": [], "come": [], "soon": [], "seen": 5, "onc": [1, 5], "separ": 5, "compon": 5, "charg": 5, "usabl": 5, "backend": 5, "along": 5, "processor": 5, "reusabl": 5, "consist": [], "delimit": [], "2d": [], "corner": [], "flag": [], "belong": [], "skew": [], "comprehens": [], "benchmark": [], "publicli": [], "sec": [], "25": 5, "84": [], "39": 5, "85": 5, "86": 5, "93": [], "83": 5, "24": [], "80": [], "29": 5, "90": 5, "67": 5, "76": [], "11": 3, "81": 5, "71": [], "7": 5, "21": 5, "82": 5, "20": 5, "49": 5, "87": 5, "63": 5, "17": [], "28": [], "51": [], "46": 5, "db_resnet34": [], "22": [], "89": 5, "74": 5, "56": [], "68": 5, "92": 5, "61": [], "41": [], "00": [], "79": 5, "38": [], "88": [], "62": [], "26": [], "06": [], "78": 5, "47": 5, "54": [], "abov": 5, "cf": 5, "disclaim": 5, "combin": 5, "199": 5, "second": 5, "warmup": [], "phase": [], "measur": 5, "1000": 5, "obtain": [], "11th": [], "gen": [], "intel": [], "r": [], "tm": [], "i7": [], "11800h": [], "30ghz": [], "wrap": [], "useabl": [], "favorit": [], "dummy_img": [], "area": [], "send": [], "snippet": [], "transcrib": [], "partial": [], "15": 5, "9": [], "73": [], "44": [], "14": 5, "55": [], "58": [], "57": [], "66": 5, "01": 5, "98": [], "23": [], "69": 5, "99": [], "91": 5, "05": 3, "09": [], "96": 1, "40": [], "53": 5, "most": 5, "print": [], "cfg": [], "30595": 5, "45": [], "72": [], "43": 5, "65": 5, "77": 5, "30": [], "07": [], "27": 5, "gvision": 5, "59": 5, "03": 3, "azur": [], "recogn": [], "42": [], "go": [], "mention": [], "still": [], "return": [1, 2, 5, 7], "documentbuild": [], "resolve_lin": [], "automat": [], "resolve_block": [], "paragraph_break": [], "paragraph": [], "035": [], "nest": [], "get": 2, "typic": [], "layout": [], "340": [], "text_output": [], "json_output": [], "1357421875": [], "0361328125": [], "8564453125": [], "8603515625": [], "914085328578949": [], "5478515625": [], "06640625": [], "5810546875": [], "0966796875": [], "9949972033500671": [], "51171875": [], "1630859375": [], "9578408598899841": [], "1396484375": [], "3232421875": [], "185546875": [], "3515625": [], "outpout": [], "xml": [], "hocr": [], "export_as_xml": [], "xml_output": [], "xml_bytes_str": [], "xml_element": [], "utf": [], "xmln": [], "w3": [], "1999": [], "xhtml": [], "lang": [], "en": [], "meta": [], "equiv": [], "charset": [], "system": [], "ocr_pag": [], "ocr_carea": [], "ocr_par": [], "ocr_lin": [], "ocrx_word": [], "div": [], "id": 5, "page_1": [], "bbox": [], "3456": [], "ppageno": [], "block_1_1": [], "857": [], "529": [], "2504": [], "2710": [], "par_1_1": [], "span": [], "line_1_1": [], "x_size": [], "x_descend": [], "x_ascend": [], "word_1_1": [], "1552": [], "540": [], "1778": [], "580": [], "x_wconf": [], "word_1_2": [], "1782": [], "1900": [], "583": [], "word_1_3": [], "1420": [], "597": [], "1684": [], "641": [], "threshold": [], "region": [], "accur": [], "postprocessor": [], "bin_thresh": [], "box_thresh": [], "hook": [], "manipul": [], "customhook": [], "def": [], "__call__": [], "self": [], "loc_pr": [], "Be": [], "awar": [], "my_hook": [], "middl": [], "pipelin": [], "add_hook": [], "execut": [], "file_path": [], "read_img": 2, "seemlessli": 3, "conda": [], "newer": [], "developp": 4, "fp": 5, "scheme": 5, "deform": 5, "statist": 5, "turn": 5, "easier": 5, "let": 5, "db_resnet50_predictor": [], "sar_vgg16_bn": 5, "rnn": [], "enhanc": [], "symbol": 5, "crnn_vgg16_bn_predictor": [], "sar_vgg16_bn_predictor": [], "16bn": [], "convert_to_tflit": 5, "tf_model": 5, "tflite": 5, "conv_sequ": 5, "relu": 5, "kernel_s": 5, "serialized_model": 5, "convert_to_fp16": 5, "half": 5, "serial": 5, "quantize_model": 5, "quantiz": 5, "exclud": 5, "inherit": [1, 5], "abstract": 1, "verifi": 1, "file_nam": 1, "file_hash": 1, "extract_arch": 1, "overwrit": 1, "sha256": 1, "archiv": 1, "disk": 1, "775": [], "856": [], "860": [], "862": [], "863": [], "sar_resnet31_predictor": [], "ocr_db_crnn_vgg": [], "652": [], "721": [], "ocr_db_sar_vgg": [], "653": [], "ocr_db_sar_resnet": [], "665": [], "735": [], "595": 5, "625": 5, "781": 5, "830": 5, "exactmatch": [], "ignore_cas": [], "ignore_acc": [], "ignor": [], "letter": [], "accent": [], "error": [], "max_dist": [], "levenshtein": [], "distanc": [], "autoclass": [], "loader": 1, "154": 1, "as_imag": 2, "convert_page_to_numpi": 2, "get_word": 2, "fitz": 2, "gettextword": 2, "get_artefact": 2, "entir": 2, "fulli": [], "daili": 3, "mix": 3, "fine": 3, "scratch": 3, "special": 3, "recurr": 3, "733": [], "817": [], "745": [], "875": 5, "frame": 5, "feed": 5, "warm": 5, "c5": 5, "x12larg": 5, "xeon": 5, "platinum": 5, "8275l": 5, "913": [], "917": [], "921": [], "crnn_resnet31": 5, "629": [], "701": [], "664": [], "780": [], "630": [], "702": [], "666": [], "783": [], "640": 5, "713": [], "672": [], "789": [], "na": [], "753": 5, "700": 5, "533": 5, "689": 5, "611": 5, "660": 5, "db_sar_vgg": 5, "db_sar_resnet": 5, "db_crnn_vgg": 5, "db_crnn_resnet": 5, "properti": 5, "input_t": 5, "saved_model": 5, "And": 5, "nestedobject": 6, "changelog": 3, "v0": 3, "2021": 3, "8m": 5, "02": 5, "5m": 5, "1m": 5, "19": 5, "invoic": 5, "flexibl": 7}, "objects": {"doctr.datasets": [[1, 0, 1, "", "CORD"], [1, 0, 1, "", "FUNSD"], [1, 0, 1, "", "OCRDataset"], [1, 0, 1, "", "SROIE"], [1, 1, 1, "", "encode_sequences"]], "doctr.datasets.core": [[1, 0, 1, "", "VisionDataset"]], "doctr.datasets.loader": [[1, 0, 1, "", "DataLoader"]], "doctr.documents": [[2, 0, 1, "", "Artefact"], [2, 0, 1, "", "Block"], [2, 0, 1, "", "Document"], [2, 0, 1, "", "DocumentFile"], [2, 0, 1, "", "Line"], [2, 0, 1, "", "PDF"], [2, 0, 1, "", "Page"], [2, 0, 1, "", "Word"], [2, 1, 1, "", "read_html"], [2, 1, 1, "", "read_img"], [2, 1, 1, "", "read_pdf"]], "doctr.documents.Document": [[2, 2, 1, "", "show"]], "doctr.documents.DocumentFile": [[2, 2, 1, "", "from_images"], [2, 2, 1, "", "from_pdf"], [2, 2, 1, "", "from_url"]], "doctr.documents.PDF": [[2, 2, 1, "", "as_images"], [2, 2, 1, "", "get_artefacts"], [2, 2, 1, "", "get_words"]], "doctr.documents.Page": [[2, 2, 1, "", "show"]], "doctr.models.detection": [[5, 1, 1, "", "db_resnet50"], [5, 1, 1, "", "detection_predictor"], [5, 1, 1, "", "linknet"]], "doctr.models.export": [[5, 1, 1, "", "convert_to_fp16"], [5, 1, 1, "", "convert_to_tflite"], [5, 1, 1, "", "quantize_model"]], "doctr.models.recognition": [[5, 1, 1, "", "crnn_vgg16_bn"], [5, 1, 1, "", "recognition_predictor"], [5, 1, 1, "", "sar_resnet31"], [5, 1, 1, "", "sar_vgg16_bn"]], "doctr.models.zoo": [[5, 1, 1, "", "ocr_predictor"]], "doctr.transforms": [[6, 0, 1, "", "ColorInversion"], [6, 0, 1, "", "Compose"], [6, 0, 1, "", "LambdaTransformation"], [6, 0, 1, "", "Normalize"], [6, 0, 1, "", "OneOf"], [6, 0, 1, "", "RandomApply"], [6, 0, 1, "", "RandomBrightness"], [6, 0, 1, "", "RandomContrast"], [6, 0, 1, "", "RandomGamma"], [6, 0, 1, "", "RandomHue"], [6, 0, 1, "", "RandomJpegQuality"], [6, 0, 1, "", "RandomSaturation"], [6, 0, 1, "", "Resize"], [6, 0, 1, "", "ToGray"]], "doctr.utils.metrics": [[7, 0, 1, "", "LocalizationConfusion"], [7, 0, 1, "", "OCRMetric"], [7, 0, 1, "", "TextMatch"]], "doctr.utils.metrics.LocalizationConfusion": [[7, 2, 1, "", "summary"]], "doctr.utils.metrics.OCRMetric": [[7, 2, 1, "", "summary"]], "doctr.utils.metrics.TextMatch": [[7, 2, 1, "", "summary"]], "doctr.utils.visualization": [[7, 1, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:class", "1": "py:function", "2": "py:method"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"], "2": ["py", "method", "Python method"]}, "titleterms": {"changelog": 0, "v0": 0, "7": [], "0": 0, "2024": [], "09": [], "6": [], "2022": [], "29": [], "5": [], "1": 0, "03": 0, "22": [], "2021": 0, "12": [], "31": [], "4": [], "11": 0, "10": [], "01": [], "3": [], "08": [], "27": [], "07": [], "02": [], "2": 0, "05": 0, "28": [], "18": 0, "contributor": [], "coven": [], "code": [], "conduct": [], "our": [], "pledg": [], "standard": [], "enforc": [], "respons": [], "scope": [], "guidelin": [], "correct": [], "warn": [], "temporari": [], "ban": [], "perman": [], "attribut": [], "contribut": [], "doctr": [1, 2, 3, 5, 6, 7], "codebas": [], "structur": 2, "continu": [], "integr": [], "feedback": [], "featur": 3, "request": [], "bug": [], "report": [], "question": [], "develop": [], "mode": [], "instal": 4, "commit": [], "unit": [], "test": [], "qualiti": [], "style": [], "verif": [], "modifi": [], "document": [2, 3], "let": [], "": [], "connect": [], "prerequisit": 4, "via": 4, "python": 4, "packag": [3, 4], "git": 4, "text": [3, 5], "recognit": [3, 5], "main": 3, "model": [3, 5], "zoo": [3, 5], "detect": [3, 5], "support": [1, 3, 6], "dataset": [1, 3], "arg": [], "synthet": [], "gener": [], "custom": [], "loader": [], "dataload": [], "vocab": 1, "return": [], "io": [], "word": 2, "line": 2, "artefact": 2, "block": 2, "page": 2, "file": 2, "read": 2, "classif": [], "factori": [], "transform": 6, "compos": 6, "util": 7, "visual": 7, "task": 7, "evalu": 7, "notebook": [], "train": 3, "your": 3, "own": [], "load": 1, "aw": [], "lambda": [], "share": [], "commun": [], "from": [], "huggingfac": [], "hub": [], "push": [], "pretrain": [], "name": [], "convent": [], "choos": [], "readi": [], "us": 5, "avail": 1, "object": [], "data": 1, "prepar": [], "infer": [], "optim": [], "half": [], "precis": [], "export": 5, "onnx": [], "right": [], "architectur": [], "predictor": [3, 5], "end": 5, "ocr": 5, "two": 5, "stage": 5, "approach": 5, "what": [], "should": [], "i": [], "do": [], "output": 5, "advanc": [], "option": [], "get": 3, "start": 3, "conda": [], "pre": 5, "process": 5, "post": 5, "build": 3, "implement": [], "content": [], "compress": 5, "savedmodel": 5, "note": 3, "refer": 3}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"Changelog": [[0, "changelog"]], "v0.2.0 (2021-05-11)": [[0, "v0-2-0-2021-05-11"]], "v0.1.1 (2021-03-18)": [[0, "v0-1-1-2021-03-18"]], "v0.1.0 (2021-03-05)": [[0, "v0-1-0-2021-03-05"]], "doctr.datasets": [[1, "doctr-datasets"]], "Available Datasets": [[1, "available-datasets"]], "Data Loading": [[1, "data-loading"]], "Supported Vocabs": [[1, "supported-vocabs"]], "DocTR Vocabs": [[1, "id1"]], "doctr.documents": [[2, "doctr-documents"]], "Document structure": [[2, "document-structure"]], "Word": [[2, "word"]], "Line": [[2, "line"]], "Artefact": [[2, "artefact"]], "Block": [[2, "block"]], "Page": [[2, "page"]], "Document": [[2, "document"]], "File reading": [[2, "file-reading"]], "DocTR: Document Text Recognition": [[3, "doctr-document-text-recognition"]], "Main Features": [[3, "main-features"]], "Getting Started": [[3, "getting-started"]], "Build & train your predictor": [[3, "build-train-your-predictor"]], "Model zoo": [[3, "model-zoo"]], "Text detection models": [[3, "text-detection-models"]], "Text recognition models": [[3, "text-recognition-models"]], "Supported datasets": [[3, "supported-datasets"]], "Notes": [[3, null]], "Package Reference": [[3, null]], "Installation": [[4, "installation"]], "Prerequisites": [[4, "prerequisites"]], "Via Python Package": [[4, "via-python-package"]], "Via Git": [[4, "via-git"]], "doctr.models": [[5, "doctr-models"]], "Text Detection": [[5, "text-detection"]], "Pre-processing for detection": [[5, "pre-processing-for-detection"]], "Detection models": [[5, "detection-models"]], "Post-processing detections": [[5, "post-processing-detections"]], "Detection predictors": [[5, "detection-predictors"]], "Text Recognition": [[5, "text-recognition"]], "Text recognition model zoo": [[5, "id2"]], "Pre-processing for recognition": [[5, "pre-processing-for-recognition"]], "Recognition models": [[5, "recognition-models"]], "Post-processing outputs": [[5, "post-processing-outputs"]], "Recognition predictors": [[5, "recognition-predictors"]], "End-to-End OCR": [[5, "end-to-end-ocr"]], "Two-stage approaches": [[5, "two-stage-approaches"]], "Model export": [[5, "model-export"]], "Model compression": [[5, "model-compression"]], "Using SavedModel": [[5, "using-savedmodel"]], "doctr.transforms": [[6, "doctr-transforms"]], "Supported transformations": [[6, "supported-transformations"]], "Composing transformations": [[6, "composing-transformations"]], "doctr.utils": [[7, "doctr-utils"]], "Visualization": [[7, "visualization"]], "Task evaluation": [[7, "task-evaluation"]]}, "indexentries": {"cord (class in doctr.datasets)": [[1, "doctr.datasets.CORD"]], "dataloader (class in doctr.datasets.loader)": [[1, "doctr.datasets.loader.DataLoader"]], "funsd (class in doctr.datasets)": [[1, "doctr.datasets.FUNSD"]], "ocrdataset (class in doctr.datasets)": [[1, "doctr.datasets.OCRDataset"]], "sroie (class in doctr.datasets)": [[1, "doctr.datasets.SROIE"]], "visiondataset (class in doctr.datasets.core)": [[1, "doctr.datasets.core.VisionDataset"]], "encode_sequences() (in module doctr.datasets)": [[1, "doctr.datasets.encode_sequences"]], "artefact (class in doctr.documents)": [[2, "doctr.documents.Artefact"]], "block (class in doctr.documents)": [[2, "doctr.documents.Block"]], "document (class in doctr.documents)": [[2, "doctr.documents.Document"]], "documentfile (class in doctr.documents)": [[2, "doctr.documents.DocumentFile"]], "line (class in doctr.documents)": [[2, "doctr.documents.Line"]], "pdf (class in doctr.documents)": [[2, "doctr.documents.PDF"]], "page (class in doctr.documents)": [[2, "doctr.documents.Page"]], "word (class in doctr.documents)": [[2, "doctr.documents.Word"]], "as_images() (doctr.documents.pdf method)": [[2, "doctr.documents.PDF.as_images"]], "from_images() (doctr.documents.documentfile class method)": [[2, "doctr.documents.DocumentFile.from_images"]], "from_pdf() (doctr.documents.documentfile class method)": [[2, "doctr.documents.DocumentFile.from_pdf"]], "from_url() (doctr.documents.documentfile class method)": [[2, "doctr.documents.DocumentFile.from_url"]], "get_artefacts() (doctr.documents.pdf method)": [[2, "doctr.documents.PDF.get_artefacts"]], "get_words() (doctr.documents.pdf method)": [[2, "doctr.documents.PDF.get_words"]], "read_html() (in module doctr.documents)": [[2, "doctr.documents.read_html"]], "read_img() (in module doctr.documents)": [[2, "doctr.documents.read_img"]], "read_pdf() (in module doctr.documents)": [[2, "doctr.documents.read_pdf"]], "show() (doctr.documents.document method)": [[2, "doctr.documents.Document.show"]], "show() (doctr.documents.page method)": [[2, "doctr.documents.Page.show"]], "convert_to_fp16() (in module doctr.models.export)": [[5, "doctr.models.export.convert_to_fp16"]], "convert_to_tflite() (in module doctr.models.export)": [[5, "doctr.models.export.convert_to_tflite"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_vgg16_bn"]], "db_resnet50() (in module doctr.models.detection)": [[5, "doctr.models.detection.db_resnet50"]], "detection_predictor() (in module doctr.models.detection)": [[5, "doctr.models.detection.detection_predictor"]], "linknet() (in module doctr.models.detection)": [[5, "doctr.models.detection.linknet"]], "ocr_predictor() (in module doctr.models.zoo)": [[5, "doctr.models.zoo.ocr_predictor"]], "quantize_model() (in module doctr.models.export)": [[5, "doctr.models.export.quantize_model"]], "recognition_predictor() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.recognition_predictor"]], "sar_resnet31() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.sar_resnet31"]], "sar_vgg16_bn() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.sar_vgg16_bn"]], "colorinversion (class in doctr.transforms)": [[6, "doctr.transforms.ColorInversion"]], "compose (class in doctr.transforms)": [[6, "doctr.transforms.Compose"]], "lambdatransformation (class in doctr.transforms)": [[6, "doctr.transforms.LambdaTransformation"]], "normalize (class in doctr.transforms)": [[6, "doctr.transforms.Normalize"]], "oneof (class in doctr.transforms)": [[6, "doctr.transforms.OneOf"]], "randomapply (class in doctr.transforms)": [[6, "doctr.transforms.RandomApply"]], "randombrightness (class in doctr.transforms)": [[6, "doctr.transforms.RandomBrightness"]], "randomcontrast (class in doctr.transforms)": [[6, "doctr.transforms.RandomContrast"]], "randomgamma (class in doctr.transforms)": [[6, "doctr.transforms.RandomGamma"]], "randomhue (class in doctr.transforms)": [[6, "doctr.transforms.RandomHue"]], "randomjpegquality (class in doctr.transforms)": [[6, "doctr.transforms.RandomJpegQuality"]], "randomsaturation (class in doctr.transforms)": [[6, "doctr.transforms.RandomSaturation"]], "resize (class in doctr.transforms)": [[6, "doctr.transforms.Resize"]], "togray (class in doctr.transforms)": [[6, "doctr.transforms.ToGray"]], "localizationconfusion (class in doctr.utils.metrics)": [[7, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[7, "doctr.utils.metrics.OCRMetric"]], "textmatch (class in doctr.utils.metrics)": [[7, "doctr.utils.metrics.TextMatch"]], "summary() (doctr.utils.metrics.localizationconfusion method)": [[7, "doctr.utils.metrics.LocalizationConfusion.summary"]], "summary() (doctr.utils.metrics.ocrmetric method)": [[7, "doctr.utils.metrics.OCRMetric.summary"]], "summary() (doctr.utils.metrics.textmatch method)": [[7, "doctr.utils.metrics.TextMatch.summary"]], "visualize_page() (in module doctr.utils.visualization)": [[7, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file diff --git a/v0.2.1/using_doctr/using_models.html b/v0.2.1/using_doctr/using_models.html index b2e6a5d739..b471cdc1f1 100644 --- a/v0.2.1/using_doctr/using_models.html +++ b/v0.2.1/using_doctr/using_models.html @@ -836,6 +836,17 @@

Two-stage approachesmodel = ocr_predictor('linknet_resnet18', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True)

+

To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying DocumentBuilder:

+
    +
  • resolve_lines: whether words should be automatically grouped into lines (default: True)

  • +
  • resolve_blocks: whether lines should be automatically grouped into blocks (default: True)

  • +
  • paragraph_break: relative length of the minimum space separating paragraphs (default: 0.035)

  • +
+

For example to disable the automatic grouping of lines into blocks:

+
from doctr.model import ocr_predictor
+model = ocr_predictor(pretrained=True, resolve_blocks=False)
+
+

What should I do with the output?#

@@ -859,6 +870,14 @@

What should I do with the output?) +

To get only the text content of the Document, you can use the render method:

+
text_output = result.render()
+
+
+

For reference, here is the output for the Document above:

+
No. RECEIPT DATE
+
+

You can also export them as a nested dict, more appropriate for JSON format:

json_output = result.export()
 
diff --git a/v0.3.0/_sources/using_doctr/using_models.rst.txt b/v0.3.0/_sources/using_doctr/using_models.rst.txt index 208e0956bb..27c087096a 100644 --- a/v0.3.0/_sources/using_doctr/using_models.rst.txt +++ b/v0.3.0/_sources/using_doctr/using_models.rst.txt @@ -279,6 +279,19 @@ For instance, this snippet instantiates an end-to-end ocr_predictor working with from doctr.model import ocr_predictor model = ocr_predictor('linknet_resnet18', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True) +To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying `DocumentBuilder`: + +* `resolve_lines`: whether words should be automatically grouped into lines (default: True) +* `resolve_blocks`: whether lines should be automatically grouped into blocks (default: True) +* `paragraph_break`: relative length of the minimum space separating paragraphs (default: 0.035) + +For example to disable the automatic grouping of lines into blocks: + +.. code:: python3 + + from doctr.model import ocr_predictor + model = ocr_predictor(pretrained=True, resolve_blocks=False) + What should I do with the output? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -304,6 +317,14 @@ Here is a typical `Document` layout:: )] ) +To get only the text content of the `Document`, you can use the `render` method:: + + text_output = result.render() + +For reference, here is the output for the `Document` above:: + + No. RECEIPT DATE + You can also export them as a nested dict, more appropriate for JSON format:: json_output = result.export() diff --git a/v0.3.0/searchindex.js b/v0.3.0/searchindex.js index 311fdf497e..3aaea9c430 100644 --- a/v0.3.0/searchindex.js +++ b/v0.3.0/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["changelog", "datasets", "documents", "index", "installing", "models", "transforms", "utils"], "filenames": ["changelog.rst", "datasets.rst", "documents.rst", "index.rst", "installing.rst", "models.rst", "transforms.rst", "utils.rst"], "titles": ["Changelog", "doctr.datasets", "doctr.documents", "DocTR: Document Text Recognition", "Installation", "doctr.models", "doctr.transforms", "doctr.utils"], "terms": {"releas": [0, 4], "note": 0, "we": [2, 3, 5, 6], "member": [], "leader": [], "make": [5, 7], "particip": [], "commun": [], "harass": [], "free": [], "experi": 5, "everyon": [], "regardless": [], "ag": [], "bodi": [], "size": [1, 2, 5, 6], "visibl": [], "invis": [], "disabl": [], "ethnic": [], "sex": [], "characterist": [], "gender": [], "ident": [], "express": 6, "level": [5, 7], "educ": [], "socio": [], "econom": [], "statu": [], "nation": [], "person": [], "appear": [], "race": [], "religion": [], "sexual": [], "orient": 2, "act": [], "interact": [2, 7], "wai": [1, 3, 5], "contribut": [], "an": [1, 2, 3, 5, 7], "open": [], "welcom": 3, "divers": [], "inclus": [], "healthi": [], "exampl": [1, 2, 5, 6, 7], "behavior": [], "posit": 7, "environ": [], "includ": [4, 5], "demonstr": [], "empathi": [], "kind": [], "toward": [], "other": [], "peopl": [], "Being": [], "respect": [], "differ": [], "opinion": [], "viewpoint": [], "give": [], "gracefulli": [], "accept": [], "construct": [], "feedback": [], "apolog": [], "those": [2, 4, 5], "affect": [], "mistak": [], "learn": 5, "from": [1, 2, 3, 5, 6, 7], "focus": [], "what": [], "i": [1, 2, 5, 6, 7], "best": [], "just": 5, "u": [], "individu": [], "overal": [], "unaccept": [], "The": [1, 2, 5, 7], "us": [1, 4, 7], "languag": [2, 3], "imageri": [], "attent": [], "advanc": [], "ani": [1, 2, 3, 5, 6, 7], "troll": [], "insult": [], "derogatori": [], "comment": [], "polit": [], "attack": [], "public": 3, "privat": 5, "publish": [], "inform": [1, 3, 5], "physic": 2, "email": [], "address": 2, "without": 5, "explicit": [], "permiss": [], "which": 5, "could": [], "reason": [], "consid": [1, 2, 7], "inappropri": [], "profession": [], "set": [1, 5, 7], "ar": [1, 2, 4, 5, 6, 7], "clarifi": [], "take": [], "appropri": [], "fair": [], "action": [], "thei": [], "deem": [], "threaten": [], "offens": [], "harm": [], "have": [1, 5, 7], "right": [5, 7], "remov": [], "edit": [], "reject": [], "commit": [], "wiki": [], "issu": [], "align": 2, "thi": [4, 5, 7], "moder": [], "decis": [], "when": [], "appli": [1, 6], "within": [], "all": [1, 2, 3, 5, 6, 7], "space": [], "also": [], "offici": [], "repres": [2, 5], "e": [2, 4], "mail": [], "post": 5, "via": 3, "social": [], "media": [], "account": [], "appoint": [], "onlin": [], "offlin": [], "event": [], "instanc": 5, "abus": [], "otherwis": 7, "mai": [], "report": [], "contact": [], "minde": 4, "com": [2, 4], "complaint": [], "review": [], "investig": [], "promptli": [], "fairli": [], "oblig": [], "privaci": [], "secur": [], "incid": [], "follow": [1, 4, 5, 6, 7], "impact": [], "determin": [], "consequ": [], "violat": [], "unprofession": [], "unwelcom": [], "A": [1, 2, 3, 5], "written": 2, "provid": [3, 5], "clariti": [], "around": 5, "natur": 3, "explan": 5, "why": [], "wa": [], "apologi": [], "request": [], "through": [1, 6], "singl": [], "seri": [], "continu": [], "No": [], "involv": 5, "unsolicit": [], "specifi": 2, "period": [], "time": [1, 5, 7], "avoid": [], "well": [], "extern": [], "channel": [2, 5, 6], "like": [], "term": [], "lead": [], "seriou": [], "sustain": [], "sort": [], "allow": [], "dure": [], "pattern": [], "aggress": [], "disparag": [], "class": [1, 2, 6, 7], "adapt": [], "version": 5, "0": [1, 3, 5, 6, 7], "avail": [3, 5, 6], "http": [2, 4, 5], "www": 2, "org": 5, "_": [1, 5], "html": [], "were": 2, "inspir": 6, "mozilla": [], "": [2, 7], "ladder": [], "For": [4, 5], "answer": [], "common": [6, 7], "question": [], "about": 5, "see": [], "faq": [], "translat": [], "everyth": [], "you": [4, 5], "need": [4, 7], "know": [], "effici": [1, 5], "project": [], "packag": 7, "python": 3, "doc": [2, 5], "librari": 4, "build": [], "script": [], "refer": 4, "train": [1, 5, 6], "demo": [], "small": 3, "app": [], "showcas": [], "capabl": 5, "api": [], "minim": [], "templat": 2, "deploi": [], "rest": [6, 7], "ensur": [], "proper": [], "mainten": [], "github": 4, "worklow": [], "run": 4, "job": [], "coverag": [], "codecov": [], "back": [], "result": [2, 5], "As": [], "contributor": [], "onli": [6, 7], "your": [1, 2, 5, 7], "ad": 6, "whether": [1, 2, 7], "encount": [], "problem": [], "suggest": [], "input": [2, 5, 6], "ha": [1, 7], "valu": [2, 6], "can": [1, 4, 5], "purpos": [], "advis": [], "first": [], "check": [], "topic": [], "wasn": [], "t": 1, "alreadi": [], "cover": [], "close": [], "If": [2, 4, 5], "feel": [], "new": [], "one": [1, 5, 6], "do": 4, "so": [1, 4], "whenev": [], "possibl": 7, "enough": 5, "jump": [], "wonder": [], "how": [], "someth": [], "more": [], "gener": [], "should": [1, 2, 7], "out": [5, 6, 7], "discuss": [], "q": [], "forum": [], "specif": [1, 5, 7], "stackoverflow": [], "addit": [], "depend": [3, 4], "command": [], "m": [5, 7], "pip": 4, "upgrad": [], "dev": [], "pre": [], "docstr": [], "In": [1, 5], "pleas": [], "googl": [], "eas": [], "process": [2, 3], "later": [], "messag": [], "udac": [], "guid": [], "order": [1, 2, 5], "same": [2, 7], "ci": [], "workflow": [], "unittest": [], "local": [1, 3, 5, 7], "To": [], "togeth": [2, 5], "current": [], "built": [], "sphinx": [], "thank": [], "our": 5, "file": [1, 3], "been": [5, 7], "rebuilt": [], "want": [], "forc": [], "complet": [], "rebuild": [], "delet": [], "_build": [], "directori": [], "addition": 5, "clear": [], "web": 2, "browser": [], "cach": [], "modif": [], "now": 3, "locat": [], "index": 2, "wish": [], "somewher": [], "els": [], "than": [4, 7], "join": [], "slack": [], "where": [2, 7], "find": 4, "requir": [4, 6], "3": [2, 3, 4, 5, 6, 7], "8": [5, 6], "higher": 4, "whichev": 4, "o": 4, "least": 4, "tensorflow": [3, 4, 5, 6], "pytorch": [3, 4], "correspond": [4, 5], "page": [4, 5, 7], "2": [3, 5, 6], "macbook": [], "m1": [], "chip": [], "some": [], "metal": [], "plugin": [], "1": [1, 3, 5, 6, 7], "12": 5, "anoth": [1, 4, 5], "linux": 4, "few": 4, "extra": 4, "maco": 4, "user": [2, 3, 4], "them": [1, 4], "homebrew": [], "brew": 4, "cairo": 4, "pango": 4, "gdk": 4, "pixbuf": 4, "libffi": 4, "window": [4, 7], "gtk": 4, "latest": 4, "over": [4, 7], "here": [1, 4, 6], "last": [1, 4, 5], "stabl": 4, "doctr": 4, "strive": [], "reduc": 6, "framework": 1, "minimum": 7, "necessari": [], "featur": [5, 7], "develop": [], "third": [], "parti": [], "miss": [], "tf": [5, 6], "torch": [], "mode": 4, "clone": 4, "state": 3, "art": 3, "optic": [3, 5], "charact": [1, 2, 3, 5, 7], "made": 3, "seamless": 3, "access": [1, 2, 3], "anyon": 3, "power": 3, "easi": [3, 7], "extract": [1, 3], "valuabl": 3, "autom": 3, "seamlessli": [], "understand": [1, 3], "task": [1, 3, 5], "ocr": [1, 3, 7], "predictor": [], "pars": [1, 3], "textual": [1, 2, 3], "identifi": [3, 5], "each": [1, 2, 3, 5, 6, 7], "word": [3, 5, 7], "research": 3, "quickli": 3, "compar": 3, "own": 3, "architectur": [3, 5], "speed": [3, 5], "perform": [2, 3, 5, 6, 7], "robust": 3, "stage": 3, "pretrain": [3, 5, 7], "paramet": [1, 2, 3, 5, 6, 7], "friendli": 3, "line": [3, 7], "code": [2, 3], "load": [3, 5], "googlevis": 3, "aw": [3, 5], "textract": [3, 5], "optim": 3, "infer": [3, 6], "both": [3, 5, 6], "cpu": [3, 5], "gpu": 3, "light": 3, "activ": [], "maintain": 3, "integr": 3, "deploy": [], "dbnet": [3, 5], "real": [5, 6], "scene": [3, 5], "differenti": [3, 5], "binar": [3, 5], "linknet": [3, 5], "exploit": 5, "encod": [1, 2, 5], "represent": 5, "semant": 5, "segment": 5, "sar": [3, 5], "show": [2, 3, 5, 7], "attend": [3, 5], "read": [3, 5], "simpl": 5, "strong": 5, "baselin": 5, "irregular": 5, "crnn": [3, 5], "end": [1, 3, 7], "trainabl": 5, "neural": [3, 5], "network": [3, 5], "imag": [1, 2, 5, 6, 7], "base": 5, "sequenc": [1, 2, 5, 7], "Its": 5, "applic": 5, "master": [3, 5], "multi": 3, "aspect": [3, 6], "non": [2, 3, 6, 7], "vitstr": [], "vision": [], "transform": [1, 3], "fast": 1, "parseq": [], "permut": [], "autoregress": [], "funsd": [1, 3, 5], "form": [1, 3], "noisi": [1, 3], "scan": [1, 3], "cord": [1, 3, 5], "consolid": [1, 3], "receipt": [1, 3, 5], "forpost": [1, 3], "sroie": [1, 3], "icdar": 3, "2019": 3, "iiit": [], "5k": [], "cvit": [], "street": [], "view": [], "synthtext": [], "visual": 3, "geometri": 2, "group": [], "svhn": [], "digit": 1, "unsupervis": [], "ic03": [], "2003": [], "ic13": [], "2013": [], "imgur5k": [], "textstylebrush": [], "transfer": [], "aesthet": [], "mjsynth": [], "synthet": [], "data": [2, 3, 5, 6, 7], "artifici": [], "iiithw": [], "wildreceipt": [], "spatial": 2, "dual": [], "modal": [], "graph": 2, "kei": [], "bool": [1, 2, 5, 6, 7], "true": [1, 2, 5, 6, 7], "use_polygon": [], "fals": [1, 5, 6, 7], "recognition_task": [], "kwarg": [1, 2, 5, 7], "sourc": [1, 2, 5, 6, 7], "document": [1, 5, 7], "import": [1, 2, 5, 6, 7], "train_set": 1, "download": 1, "img": [1, 6], "target": [1, 2, 5, 6], "subset": [1, 5], "polygon": 1, "rotat": [1, 2], "bound": [1, 2, 6, 7], "box": [1, 2, 7], "instead": [1, 2], "straight": 1, "ones": 1, "recognit": 7, "keyword": [1, 2], "argument": [1, 2], "visiondataset": 1, "icdar2019": 1, "competit": 1, "iiit5k": [], "bmvc": [], "2012": [], "text": [2, 7], "prior": [], "svt": [], "ucsd": [], "comput": [5, 7], "hous": [], "number": [1, 6, 7], "localis": [], "repositori": [], "websit": [], "entri": [], "futur": [], "direct": [], "img_fold": 1, "str": [1, 2, 5, 6, 7], "label_fold": [], "label": [1, 7], "part": 6, "challeng": [], "task2": [], "2015": [], "path": [1, 2, 5], "challenge2_training_task12_imag": [], "challenge2_training_task1_gt": [], "test_set": [], "challenge2_test_task12_imag": [], "challenge2_test_task1_gt": [], "folder": [1, 5], "annot": 2, "abstractdataset": [], "label_path": [], "handwrit": [], "dataset_info": [], "imgur5k_annot": [], "json": [], "pure": [], "mnt": [], "ramdisk": [], "max": 7, "90kdict32px": [], "imlist": [], "txt": [], "hw": [], "images_90k_norm": [], "90k": [], "docartefact": [], "object": 1, "detect": [], "element": [1, 2, 5], "varieti": [], "arxiv": 5, "ab": [], "2103": [], "14470v1": [], "test": [], "charactergener": [], "implement": [1, 2, 5, 6, 7], "d": [], "abdef": [], "num_sampl": [], "100": [5, 6, 7], "vocabulari": [], "sampl": 1, "iter": 1, "cache_sampl": [], "firsthand": [], "font_famili": [], "font": [], "img_transform": [], "compos": [1, 3, 5], "sample_transform": 1, "wordgener": [], "min_char": [], "int": [1, 2, 5, 6, 7], "max_char": [], "list": [1, 2, 6], "none": [1, 2, 7], "callabl": [1, 6], "tupl": [2, 5, 6, 7], "32": [1, 5, 6], "maximum": 1, "detectiondataset": [], "recognitiondataset": [], "labels_path": [], "contain": [], "ocrdataset": 1, "label_fil": 1, "jpg": [1, 2], "root": 1, "shuffl": 1, "batch_siz": 1, "drop_last": 1, "num_work": [], "collate_fn": [], "wrapper": [1, 6], "train_load": 1, "train_it": 1, "next": 1, "befor": 1, "pass": [1, 5], "batch": [1, 5, 6], "drop": 1, "isn": 1, "full": [1, 5, 7], "worker": 1, "function": [5, 6, 7], "merg": [], "sinc": 1, "content": [1, 2], "properli": 1, "model": [1, 7], "interpret": [1, 2], "multipl": [1, 2, 6], "name": [1, 5], "10": [1, 5, 7], "0123456789": 1, "hindi_digit": [], "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "ascii_lett": 1, "52": [1, 5], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "punctuat": 1, "currenc": 1, "5": [1, 6, 7], "ancient_greek": [], "48": 5, "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": [], "arabic_lett": [], "37": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": [], "persian_lett": [], "\u067e\u0686\u06a2\u06a4\u06af": [], "arabic_diacrit": [], "arabic_punctu": [], "latin": 1, "94": [], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "english": [], "legacy_french": [], "123": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": 1, "french": [1, 5], "126": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": [], "portugues": [], "131": [], "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": [], "spanish": [], "116": [], "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": [], "italian": [], "120": [], "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": [], "german": [], "108": [], "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": [], "arab": [], "101": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": [], "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "czech": [], "130": [], "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": [], "polish": [], "118": [], "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": [], "dutch": [], "114": [], "norwegian": [], "106": [], "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": [], "danish": [], "finnish": [], "104": [], "\u00e4\u00f6\u00e4\u00f6": [], "swedish": [], "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": [], "vietnames": [], "234": [], "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": [], "hebrew": [], "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": [], "multilingu": [], "195": [], "encode_sequ": 1, "target_s": 1, "eo": 1, "pad": [1, 5, 6], "dynamic_seq_length": [], "ndarrai": [1, 2, 7], "given": [1, 2, 5, 7], "map": 1, "n": [1, 5, 7], "length": 1, "Of": 1, "string": [1, 2, 5, 7], "option": 1, "start": 1, "case": [1, 7], "upper": 6, "enabl": 2, "dynam": [], "tensor": [1, 5, 6], "modul": [2, 5, 6, 7], "easili": [2, 5, 7], "export": [2, 3, 7], "analysi": [2, 5], "format": [2, 5], "organ": 2, "uninterrupt": 2, "confid": 2, "float": [2, 6, 7], "associ": 2, "predict": [2, 7], "xmin": 2, "ymin": 2, "xmax": 2, "ymax": 2, "coordin": 2, "rel": 2, "collect": 2, "meant": 2, "two": 2, "column": 2, "horizont": 2, "resolv": 2, "default": [2, 5], "smallest": 2, "enclos": 2, "g": 2, "qr": 2, "pictur": 2, "chart": 2, "signatur": 2, "logo": 2, "etc": 2, "artefact_typ": 2, "type": [2, 5], "sever": [2, 6], "its": [1, 2, 5, 7], "titl": 2, "underneath": 2, "page_idx": 2, "dimens": [2, 5, 7], "dict": [2, 7], "numpi": [2, 5, 7], "arrai": [2, 7], "uint8": [2, 5, 7], "raw": [2, 7], "pixel": [2, 6], "height": 2, "width": 2, "dictionari": [2, 7], "angl": 2, "degress": 2, "preserve_aspect_ratio": 6, "overlai": 2, "displai": [2, 7], "matplotlib": 7, "pyplot": 7, "method": 6, "high": 2, "convers": 2, "read_pdf": 2, "byte": [2, 5], "scale": 7, "rgb_mode": [], "password": [], "pdf": [2, 5], "convert": [2, 5, 6], "render": [], "72dpi": [], "output": [2, 5, 6], "rgb": [2, 6], "bgr": 2, "unlock": [], "encrypt": [], "pypdfium2": [], "pdfpage": [], "decod": 2, "shape": [2, 5, 6, 7], "h": 2, "x": [2, 6, 7], "w": [2, 7], "c": [], "read_img_as_numpi": [], "output_s": [2, 6], "rgb_output": 2, "expect": [2, 5, 6], "read_img_as_tensor": [], "img_path": [], "dtype": 5, "float32": 5, "desir": [], "relat": [], "divid": [], "255": [5, 6, 7], "decode_img_as_tensor": [], "img_cont": [], "stream": 2, "read_html": 2, "url": [1, 2], "yoursit": 2, "weasyprint": [], "documentfil": 2, "extens": 2, "classmethod": 2, "from_pdf": 2, "binari": 2, "from_url": 2, "from_imag": 2, "page1": 2, "png": 2, "page2": 2, "vgg16_bn_r": [], "vgg": 5, "16": 5, "describ": 5, "veri": [], "deep": 5, "convolut": 3, "larg": [], "modifi": [], "normal": [5, 6], "rectangular": [], "pool": [], "simpler": [], "head": [], "input_tensor": 5, "random": [5, 6, 7], "uniform": [5, 6], "512": [], "maxval": [5, 6], "imagenet": [], "extractor": 5, "resnet18": [], "resnet": 5, "18": 3, "residu": [], "boolean": [], "resnet34": [], "34": [], "resnet50": [], "50": 5, "resnet31": [], "downsiz": [], "4": [], "mobilenet_v3_smal": [], "mobilenetv3": [], "search": [], "kera": 5, "mobilenet_v3_larg": [], "mobilenet_v3_small_r": [], "mobilenet_v3_large_r": [], "mobilenet_v3_small_orient": [], "magc_resnet31": [], "global": [], "context": [], "224": [5, 6], "vit_": [], "visiontransform": [], "worth": [], "16x16": [], "patch": [], "unoffici": [], "config": [], "vit_b": [], "b": 7, "textnet_tini": [], "textnet": [], "faster": [], "arbitrarili": [], "detector": [], "minimalist": [], "kernel": [], "czczup": [], "tini": [], "textnet_smal": [], "textnet_bas": [], "crop_orientation_predictor": [], "arch": 5, "croporientationpredictor": [], "np": [5, 7], "classif_mobilenet_v3_smal": [], "input_crop": [], "rand": [5, 7], "600": [5, 7], "800": [5, 7], "astyp": [5, 7], "crop": 5, "dataset": 5, "linknet_resnet18": [], "1024": [5, 7], "linknet_resnet34": [], "linknet_resnet50": [], "db_resnet50": 5, "backbon": 5, "db_mobilenet_v3_larg": [], "mobilenet": [], "v3": [], "detection_predictor": 5, "assume_straight_pag": [], "detectionpredictor": 5, "input_pag": [5, 7], "itself": [], "fit": [], "crnn_vgg16_bn": 5, "128": 5, "crnn_mobilenet_v3_smal": [], "crnn_mobilenet_v3_larg": [], "sar_resnet31": 5, "31": 5, "64": [5, 6], "256": 5, "paper": 5, "1910": 5, "02562": 5, "keywoard": [], "vitstr_smal": [], "vitstr_bas": [], "recognition_predictor": 5, "recognitionpredictor": 5, "ocr_predictor": 5, "det_arch": 5, "reco_arch": 5, "pretrained_backbon": [], "symmetric_pad": 6, "export_as_straight_box": [], "detect_orient": [], "straighten_pag": [], "detect_languag": [], "ocrpredictor": 5, "up": 5, "assum": [], "preserv": 6, "ratio": 6, "symmetr": 6, "bottom": [], "final": [], "potenti": 5, "estim": [], "slightli": [], "deterior": [], "latenc": [], "median": [], "Then": 5, "again": [], "improv": [], "kie_predictor": [], "kiepredictor": [], "kie": [], "login_to_hub": [], "login": [], "huggingfac": [], "hub": [], "from_hub": [], "repo_id": [], "instanti": 5, "hf": [], "fasterrcnn_mobilenet_v3_large_fpn": [], "repo": [], "hf_hub_download": [], "snapshot_download": [], "checkpoint": [], "push_to_hf_hub": [], "model_nam": [], "save": [1, 5], "configur": [], "my": [], "procedur": 6, "draw": 6, "design": 6, "torchvis": 6, "resiz": [5, 6], "bilinear": [5, 6], "transfo": 6, "minval": 6, "interpol": [5, 6], "zero": [5, 6], "while": 6, "done": 6, "mean": [6, 7], "std": 6, "gaussian": 6, "distribut": 6, "485": 6, "456": 6, "406": 6, "229": 6, "225": 6, "averag": [5, 6], "per": [5, 6], "standard": 6, "deviat": 6, "lambdatransform": 6, "fn": 6, "lambda": 6, "tograi": 6, "num_output_channel": [], "grayscal": 6, "colorinvers": 6, "min_val": 6, "tranform": 6, "color": 6, "shift": 6, "randomli": 6, "invert": 6, "6": [4, 5, 6], "rang": 6, "randombright": 6, "max_delta": 6, "adjust": 6, "bright": 6, "delta": 6, "offset": 6, "add": [6, 7], "pick": 6, "p": 6, "probabl": 6, "randomcontrast": 6, "contrast": 6, "contrast_factor": 6, "factor": 6, "randomsatur": 6, "satur": 6, "hsv": 6, "increas": 6, "randomhu": 6, "hue": 6, "randomgamma": 6, "min_gamma": 6, "max_gamma": 6, "min_gain": 6, "max_gain": 6, "gamma": 6, "correct": 6, "neg": 6, "lower": [6, 7], "param": [5, 6], "constant": 6, "multipli": 6, "randomjpegqu": 6, "min_qual": 6, "60": 6, "max_qual": 6, "jpeg": 6, "qualiti": 6, "dimension": 6, "between": [6, 7], "randomrot": [], "max_angl": [], "expand": [], "degre": [], "uniformli": [], "randomcrop": [], "08": 5, "75": 5, "33": [], "min_area": [], "max_area": [], "min_ratio": [], "max_ratio": [], "gaussianblur": [], "kernel_shap": [], "blur": [], "min": [], "channelshuffl": [], "gaussiannois": [], "nois": [], "randomhorizontalflip": [], "flip": [], "int64": [], "randomshadow": [], "opacity_rang": [], "shade": [], "opac": [], "It": 6, "consecut": [5, 6], "sequenti": [5, 6], "oneof": 6, "jpegqual": 6, "randomappli": 6, "regroup": 7, "core": 7, "complementari": 7, "sens": 7, "visualize_pag": 7, "words_onli": 7, "display_artefact": 7, "add_label": 7, "figur": 7, "block": [5, 7], "plt": 7, "ocr_db_crnn": 7, "artefact": 7, "figsiz": 7, "largest": 7, "side": 7, "plot": 7, "static": 7, "top": 7, "synthesize_pag": [], "draw_proba": [], "respons": [], "blank": [], "blue": [], "red": [], "font_siz": [], "13": 5, "famili": [], "synthes": [], "metric": [5, 7], "assess": 7, "textmatch": 7, "match": [3, 7], "accuraci": 7, "aggreg": [1, 7], "foral": 7, "y": 7, "mathcal": 7, "frac": 7, "sum": 7, "limits_": 7, "f_": 7, "y_i": 7, "x_i": 7, "indic": 7, "defin": 7, "f_a": 7, "left": 7, "begin": 7, "ll": 7, "mbox": 7, "strictli": 7, "integ": 7, "updat": 7, "hello": 7, "world": 7, "summari": 7, "gt": [], "pred": [], "groung": [], "truth": 7, "exact": 7, "score": 7, "counterpart": 7, "unidecod": 7, "localizationconfus": 7, "iou_thresh": 7, "mask_shap": 7, "use_broadcast": [], "confus": 7, "iou": 7, "recal": [5, 7], "g_": 7, "precis": [5, 7], "meaniou": 7, "j": 7, "y_j": 7, "being": [5, 7], "intersect": 7, "union": 7, "g_x": 7, "assign": 7, "_i": 7, "geq": 7, "ground": 7, "asarrai": 7, "70": [5, 7], "110": 7, "95": 7, "200": 7, "150": 7, "pair": 7, "broadcast": [], "consum": [], "memori": [], "either": 5, "ocrmetr": 7, "l": 7, "hat": 7, "h_": 7, "b_j": 7, "l_j": 7, "gt_box": [], "pred_box": [], "gt_label": [], "pred_label": [], "comparison": 7, "detectionmetr": [], "c_j": [], "compil": [], "better": [], "leverag": [], "descript": [], "colab": [], "quicktour": [], "present": [], "main": [], "produc": 5, "searchabl": [], "don": [], "meet": [], "detail": [], "link": [], "section": [], "det_model": [], "load_weight": [], "path_to_checkpoint": [], "weight": [], "reco_model": [], "det_param": [], "path_to_pt": [], "map_loc": [], "load_state_dict": [], "reco_param": [], "vocab": [3, 5], "class_nam": [], "total": [], "date": [], "preprocessor": 5, "det_predictor": [], "798": [], "785": [], "772": [], "264": [], "2749": [], "287": [], "reco_predictor": [], "694": [], "695": [], "693": [], "299": [], "296": [], "301": [], "polici": [], "restrict": [], "write": [], "outsid": [], "tmp": [], "work": [], "step": [], "usag": 5, "multiprocess": [], "doctr_multiprocessing_dis": [], "variabl": [], "becaus": [], "shm": [], "share": [], "chang": [], "By": [], "doctr_cache_dir": [], "focu": [], "love": [], "appreci": [], "interfac": [], "io": [], "custom": [], "felix92": [], "db": [], "vgg16": 5, "bn": [], "plug": [], "obj_detect": [], "exist": [], "overwritten": [], "prerequisit": 3, "creat": [], "co": [], "instal": 3, "git": 3, "lf": [], "my_awesome_model": [], "v1": [], "directli": 5, "after": [], "python3": [], "train_tensorflow": [], "py": [], "train_pytorch": [], "tabl": [], "pull": [], "dummi": [], "tilman": [], "rassi": [], "fascan": [], "evalu": [1, 3, 5], "predefin": 1, "prefer": 1, "signific": 1, "valid": [], "149": [], "626": [], "360": [], "2000": [], "3000": [], "249": [], "33402": [], "13068": [], "772875": [], "85875": [], "246": [], "233": [], "resourc": [], "7149": [], "796": [], "handwritten": [], "1268": [], "472": [], "21888": [], "8707": [], "33608": [], "19342": [], "uppercas": [], "19370": [], "2186": [], "257": [], "647": [], "73257": [], "26032": [], "7100000": [], "707470": [], "1156": [], "1107": [], "849": [], "1095": [], "207901": [], "22672": [], "7581382": [], "1337891": [], "7141797": [], "793533": [], "49377": [], "19598": [], "alwai": [], "regular": [], "2700": [], "300": [], "background": [], "qr_code": [], "bar_cod": [], "photo": [], "classif": [], "mani": [], "sensit": [], "abl": [], "howev": [], "guidanc": [], "tool": [], "further": [], "anot": [], "handl": 1, "underli": 1, "defer": 1, "dataload": 1, "good": [], "achiev": [], "might": 5, "tune": 3, "thing": [], "product": [], "readi": [], "help": [], "support": 5, "devic": [], "fp16": 5, "point": [], "occupi": [], "bit": [], "advantag": [], "less": [], "mixed_precis": [], "set_global_polici": [], "mixed_float16": [], "cuda": [], "re": [], "exchang": [], "interoper": [], "machin": [], "structur": [3, 5], "layer": [], "metadata": [], "util": [3, 5], "export_model_to_onnx": [], "input_shap": 5, "dummy_input": [], "tensorspec": [], "model_path": [], "come": [], "soon": [], "seen": 5, "onc": 5, "separ": 5, "compon": 5, "charg": 5, "usabl": 5, "backend": 5, "along": 5, "processor": 5, "reusabl": 5, "consist": [], "delimit": [], "2d": [], "corner": [], "flag": [], "belong": [], "skew": [], "comprehens": [], "benchmark": [], "publicli": [], "sec": [], "25": 5, "84": [], "39": 5, "85": 5, "86": 5, "93": [], "83": 5, "24": [], "80": [], "29": 5, "90": 5, "67": 5, "76": [], "11": 3, "81": 5, "71": [], "7": 5, "21": 5, "82": 5, "20": 5, "49": 5, "87": 5, "63": 5, "17": [], "28": 3, "51": [], "46": 5, "db_resnet34": [], "22": [], "89": 5, "74": 5, "56": [], "68": 5, "92": 5, "61": 5, "41": [], "00": 5, "79": 5, "38": [], "88": [], "62": 5, "26": [], "06": [], "78": 5, "47": 5, "54": [], "abov": 5, "cf": 5, "disclaim": 5, "combin": 5, "199": 5, "second": 5, "warmup": [], "phase": [], "measur": 5, "1000": 5, "obtain": [], "11th": [], "gen": [], "intel": [], "r": [], "tm": [], "i7": [], "11800h": [], "30ghz": [], "wrap": [], "useabl": [], "favorit": [], "dummy_img": [], "area": [], "send": [], "snippet": [], "transcrib": [], "partial": [], "15": 5, "9": [], "73": [], "44": [], "14": 5, "55": [], "58": [], "57": [], "66": 5, "01": 5, "98": [], "23": [], "69": 5, "99": [], "91": 5, "05": 3, "09": [], "96": 1, "40": [], "53": 5, "most": 5, "print": [], "cfg": [], "30595": 5, "45": [], "72": [], "43": 5, "65": 5, "77": 5, "30": 5, "07": [], "27": 5, "gvision": 5, "59": 5, "03": 3, "azur": [], "recogn": [], "42": [], "go": [], "mention": [], "still": [], "return": [1, 2, 5, 7], "nest": [], "get": 2, "typic": [], "layout": [], "340": [], "json_output": [], "1357421875": [], "0361328125": [], "8564453125": [], "8603515625": [], "914085328578949": [], "5478515625": [], "06640625": [], "5810546875": [], "0966796875": [], "9949972033500671": [], "51171875": [], "1630859375": [], "9578408598899841": [], "1396484375": [], "3232421875": [], "185546875": [], "3515625": [], "outpout": [], "xml": [], "hocr": [], "export_as_xml": [], "xml_output": [], "xml_bytes_str": [], "xml_element": [], "utf": [], "xmln": [], "w3": [], "1999": [], "xhtml": [], "lang": [], "en": [], "meta": [], "equiv": [], "charset": [], "system": [], "ocr_pag": [], "ocr_carea": [], "ocr_par": [], "ocr_lin": [], "ocrx_word": [], "div": [], "id": 5, "page_1": [], "bbox": [], "3456": [], "ppageno": [], "block_1_1": [], "857": [], "529": [], "2504": [], "2710": [], "par_1_1": [], "span": [], "line_1_1": [], "x_size": [], "x_descend": [], "x_ascend": [], "word_1_1": [], "1552": [], "540": [], "1778": [], "580": [], "x_wconf": [], "word_1_2": [], "1782": [], "1900": [], "583": [], "word_1_3": [], "1420": [], "597": [], "1684": [], "641": [], "threshold": [], "region": [], "accur": [], "postprocessor": [], "bin_thresh": [], "box_thresh": [], "hook": [], "manipul": [], "customhook": [], "def": [], "__call__": [], "self": [], "loc_pr": [], "Be": [], "awar": [], "my_hook": [], "middl": [], "pipelin": [], "add_hook": [], "execut": [], "file_path": [], "read_img": 2, "seemlessli": 3, "conda": [], "newer": [], "developp": 4, "fp": 5, "scheme": 5, "deform": 5, "statist": 5, "turn": [], "easier": 5, "let": 5, "db_resnet50_predictor": [], "sar_vgg16_bn": 5, "rnn": [], "enhanc": [], "symbol": [], "crnn_vgg16_bn_predictor": [], "sar_vgg16_bn_predictor": [], "16bn": [], "convert_to_tflit": 5, "tf_model": 5, "tflite": 5, "conv_sequ": 5, "relu": 5, "kernel_s": 5, "serialized_model": 5, "convert_to_fp16": 5, "half": 5, "serial": 5, "quantize_model": 5, "quantiz": 5, "exclud": 5, "inherit": [1, 5], "abstract": 1, "verifi": 1, "file_nam": 1, "file_hash": 1, "extract_arch": 1, "overwrit": 1, "sha256": [], "archiv": [], "disk": [], "775": [], "856": [], "860": [], "862": [], "863": [], "sar_resnet31_predictor": [], "ocr_db_crnn_vgg": [], "652": [], "721": [], "ocr_db_sar_vgg": [], "653": [], "ocr_db_sar_resnet": [], "665": [], "735": [], "595": [], "625": [], "781": [], "830": [], "exactmatch": [], "ignore_cas": [], "ignore_acc": [], "ignor": [], "letter": [], "accent": [], "error": [], "max_dist": [], "levenshtein": [], "distanc": [], "autoclass": [], "loader": 1, "154": 1, "as_imag": 2, "convert_page_to_numpi": 2, "get_word": 2, "fitz": 2, "gettextword": 2, "get_artefact": 2, "entir": 2, "fulli": [], "daili": 3, "mix": 3, "fine": 3, "scratch": 3, "special": 3, "recurr": 3, "733": [], "817": [], "745": [], "875": [], "frame": 5, "feed": 5, "warm": 5, "c5": 5, "x12larg": 5, "xeon": 5, "platinum": 5, "8275l": 5, "913": [], "917": [], "921": [], "crnn_resnet31": 5, "629": [], "701": [], "664": [], "780": [], "630": [], "702": [], "666": [], "783": [], "640": [], "713": [], "672": [], "789": [], "na": [], "753": [], "700": [], "533": [], "689": [], "611": [], "660": [], "db_sar_vgg": 5, "db_sar_resnet": 5, "db_crnn_vgg": 5, "db_crnn_resnet": 5, "properti": 5, "input_t": 5, "saved_model": 5, "And": 5, "nestedobject": [], "changelog": 3, "v0": 3, "2021": 3, "8m": 5, "02": 5, "5m": 5, "1m": 5, "19": 5, "invoic": 5, "flexibl": 7, "rotated_bbox": [1, 7], "beta": 3, "linknet16": 5, "160": 5}, "objects": {"doctr.datasets": [[1, 0, 1, "", "CORD"], [1, 0, 1, "", "FUNSD"], [1, 0, 1, "", "OCRDataset"], [1, 0, 1, "", "SROIE"], [1, 1, 1, "", "encode_sequences"]], "doctr.datasets.datasets": [[1, 0, 1, "", "VisionDataset"]], "doctr.datasets.loader": [[1, 0, 1, "", "DataLoader"]], "doctr.documents": [[2, 0, 1, "", "Artefact"], [2, 0, 1, "", "Block"], [2, 0, 1, "", "Document"], [2, 0, 1, "", "DocumentFile"], [2, 0, 1, "", "Line"], [2, 0, 1, "", "PDF"], [2, 0, 1, "", "Page"], [2, 0, 1, "", "Word"], [2, 1, 1, "", "read_html"], [2, 1, 1, "", "read_img"], [2, 1, 1, "", "read_pdf"]], "doctr.documents.Document": [[2, 2, 1, "", "show"]], "doctr.documents.DocumentFile": [[2, 2, 1, "", "from_images"], [2, 2, 1, "", "from_pdf"], [2, 2, 1, "", "from_url"]], "doctr.documents.PDF": [[2, 2, 1, "", "as_images"], [2, 2, 1, "", "get_artefacts"], [2, 2, 1, "", "get_words"]], "doctr.documents.Page": [[2, 2, 1, "", "show"]], "doctr.models.detection": [[5, 1, 1, "", "db_resnet50"], [5, 1, 1, "", "detection_predictor"], [5, 1, 1, "", "linknet16"]], "doctr.models.export": [[5, 1, 1, "", "convert_to_fp16"], [5, 1, 1, "", "convert_to_tflite"], [5, 1, 1, "", "quantize_model"]], "doctr.models.recognition": [[5, 1, 1, "", "crnn_vgg16_bn"], [5, 1, 1, "", "master"], [5, 1, 1, "", "recognition_predictor"], [5, 1, 1, "", "sar_resnet31"], [5, 1, 1, "", "sar_vgg16_bn"]], "doctr.models.zoo": [[5, 1, 1, "", "ocr_predictor"]], "doctr.transforms": [[6, 0, 1, "", "ColorInversion"], [6, 0, 1, "", "Compose"], [6, 0, 1, "", "LambdaTransformation"], [6, 0, 1, "", "Normalize"], [6, 0, 1, "", "OneOf"], [6, 0, 1, "", "RandomApply"], [6, 0, 1, "", "RandomBrightness"], [6, 0, 1, "", "RandomContrast"], [6, 0, 1, "", "RandomGamma"], [6, 0, 1, "", "RandomHue"], [6, 0, 1, "", "RandomJpegQuality"], [6, 0, 1, "", "RandomSaturation"], [6, 0, 1, "", "Resize"], [6, 0, 1, "", "ToGray"]], "doctr.utils.metrics": [[7, 0, 1, "", "LocalizationConfusion"], [7, 0, 1, "", "OCRMetric"], [7, 0, 1, "", "TextMatch"]], "doctr.utils.metrics.LocalizationConfusion": [[7, 2, 1, "", "summary"]], "doctr.utils.metrics.OCRMetric": [[7, 2, 1, "", "summary"]], "doctr.utils.metrics.TextMatch": [[7, 2, 1, "", "summary"]], "doctr.utils.visualization": [[7, 1, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:class", "1": "py:function", "2": "py:method"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"], "2": ["py", "method", "Python method"]}, "titleterms": {"changelog": 0, "v0": 0, "7": [], "0": 0, "2024": [], "09": [], "6": [], "2022": [], "29": [], "5": [], "1": 0, "03": 0, "22": [], "2021": 0, "12": [], "31": [], "4": [], "11": 0, "10": [], "01": [], "3": [], "08": [], "27": [], "07": [], "02": [], "2": 0, "05": 0, "28": 0, "18": 0, "contributor": [], "coven": [], "code": [], "conduct": [], "our": [], "pledg": [], "standard": [], "enforc": [], "respons": [], "scope": [], "guidelin": [], "correct": [], "warn": [], "temporari": [], "ban": [], "perman": [], "attribut": [], "contribut": [], "doctr": [1, 2, 3, 5, 6, 7], "codebas": [], "structur": 2, "continu": [], "integr": [], "feedback": [], "featur": 3, "request": [], "bug": [], "report": [], "question": [], "develop": [], "mode": [], "instal": 4, "commit": [], "unit": [], "test": [], "qualiti": [], "style": [], "verif": [], "modifi": [], "document": [2, 3], "let": [], "": [], "connect": [], "prerequisit": 4, "via": 4, "python": 4, "packag": [3, 4], "git": 4, "text": [3, 5], "recognit": [3, 5], "main": 3, "model": [3, 5], "zoo": [3, 5], "detect": [3, 5], "support": [1, 3, 6], "dataset": [1, 3], "arg": [], "synthet": [], "gener": [], "custom": [], "loader": [], "dataload": [], "vocab": 1, "return": [], "io": [], "word": 2, "line": 2, "artefact": 2, "block": 2, "page": 2, "file": 2, "read": 2, "classif": [], "factori": [], "transform": 6, "compos": 6, "util": 7, "visual": 7, "task": 7, "evalu": 7, "notebook": [], "train": 3, "your": 3, "own": [], "load": 1, "aw": [], "lambda": [], "share": [], "commun": [], "from": [], "huggingfac": [], "hub": [], "push": [], "pretrain": [], "name": [], "convent": [], "choos": [], "readi": [], "us": 5, "avail": 1, "object": [], "data": 1, "prepar": [], "infer": [], "optim": [], "half": [], "precis": [], "export": 5, "onnx": [], "right": [], "architectur": [], "predictor": [3, 5], "end": 5, "ocr": 5, "two": 5, "stage": 5, "approach": 5, "what": [], "should": [], "i": [], "do": [], "output": [], "advanc": [], "option": [], "get": 3, "start": 3, "conda": [], "pre": 5, "process": 5, "post": [], "build": 3, "implement": [], "content": [], "compress": 5, "savedmodel": 5, "note": 3, "refer": 3}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"Changelog": [[0, "changelog"]], "v0.2.1 (2021-05-28)": [[0, "v0-2-1-2021-05-28"]], "v0.2.0 (2021-05-11)": [[0, "v0-2-0-2021-05-11"]], "v0.1.1 (2021-03-18)": [[0, "v0-1-1-2021-03-18"]], "v0.1.0 (2021-03-05)": [[0, "v0-1-0-2021-03-05"]], "doctr.datasets": [[1, "doctr-datasets"]], "Available Datasets": [[1, "available-datasets"]], "Data Loading": [[1, "data-loading"]], "Supported Vocabs": [[1, "supported-vocabs"]], "DocTR Vocabs": [[1, "id1"]], "doctr.documents": [[2, "doctr-documents"]], "Document structure": [[2, "document-structure"]], "Word": [[2, "word"]], "Line": [[2, "line"]], "Artefact": [[2, "artefact"]], "Block": [[2, "block"]], "Page": [[2, "page"]], "Document": [[2, "document"]], "File reading": [[2, "file-reading"]], "DocTR: Document Text Recognition": [[3, "doctr-document-text-recognition"]], "Main Features": [[3, "main-features"]], "Getting Started": [[3, "getting-started"]], "Build & train your predictor": [[3, "build-train-your-predictor"]], "Model zoo": [[3, "model-zoo"]], "Text detection models": [[3, "text-detection-models"]], "Text recognition models": [[3, "text-recognition-models"]], "Supported datasets": [[3, "supported-datasets"]], "Notes": [[3, null]], "Package Reference": [[3, null]], "Installation": [[4, "installation"]], "Prerequisites": [[4, "prerequisites"]], "Via Python Package": [[4, "via-python-package"]], "Via Git": [[4, "via-git"]], "doctr.models": [[5, "doctr-models"]], "Text Detection": [[5, "text-detection"]], "Pre-processing for detection": [[5, "pre-processing-for-detection"]], "Detection models": [[5, "detection-models"]], "Detection predictors": [[5, "detection-predictors"]], "Text Recognition": [[5, "text-recognition"]], "Text recognition model zoo": [[5, "id2"]], "Pre-processing for recognition": [[5, "pre-processing-for-recognition"]], "Recognition models": [[5, "recognition-models"]], "Recognition predictors": [[5, "recognition-predictors"]], "End-to-End OCR": [[5, "end-to-end-ocr"]], "Two-stage approaches": [[5, "two-stage-approaches"]], "Model export": [[5, "model-export"]], "Model compression": [[5, "model-compression"]], "Using SavedModel": [[5, "using-savedmodel"]], "doctr.transforms": [[6, "doctr-transforms"]], "Supported transformations": [[6, "supported-transformations"]], "Composing transformations": [[6, "composing-transformations"]], "doctr.utils": [[7, "doctr-utils"]], "Visualization": [[7, "visualization"]], "Task evaluation": [[7, "task-evaluation"]]}, "indexentries": {"cord (class in doctr.datasets)": [[1, "doctr.datasets.CORD"]], "dataloader (class in doctr.datasets.loader)": [[1, "doctr.datasets.loader.DataLoader"]], "funsd (class in doctr.datasets)": [[1, "doctr.datasets.FUNSD"]], "ocrdataset (class in doctr.datasets)": [[1, "doctr.datasets.OCRDataset"]], "sroie (class in doctr.datasets)": [[1, "doctr.datasets.SROIE"]], "visiondataset (class in doctr.datasets.datasets)": [[1, "doctr.datasets.datasets.VisionDataset"]], "encode_sequences() (in module doctr.datasets)": [[1, "doctr.datasets.encode_sequences"]], "artefact (class in doctr.documents)": [[2, "doctr.documents.Artefact"]], "block (class in doctr.documents)": [[2, "doctr.documents.Block"]], "document (class in doctr.documents)": [[2, "doctr.documents.Document"]], "documentfile (class in doctr.documents)": [[2, "doctr.documents.DocumentFile"]], "line (class in doctr.documents)": [[2, "doctr.documents.Line"]], "pdf (class in doctr.documents)": [[2, "doctr.documents.PDF"]], "page (class in doctr.documents)": [[2, "doctr.documents.Page"]], "word (class in doctr.documents)": [[2, "doctr.documents.Word"]], "as_images() (doctr.documents.pdf method)": [[2, "doctr.documents.PDF.as_images"]], "from_images() (doctr.documents.documentfile class method)": [[2, "doctr.documents.DocumentFile.from_images"]], "from_pdf() (doctr.documents.documentfile class method)": [[2, "doctr.documents.DocumentFile.from_pdf"]], "from_url() (doctr.documents.documentfile class method)": [[2, "doctr.documents.DocumentFile.from_url"]], "get_artefacts() (doctr.documents.pdf method)": [[2, "doctr.documents.PDF.get_artefacts"]], "get_words() (doctr.documents.pdf method)": [[2, "doctr.documents.PDF.get_words"]], "read_html() (in module doctr.documents)": [[2, "doctr.documents.read_html"]], "read_img() (in module doctr.documents)": [[2, "doctr.documents.read_img"]], "read_pdf() (in module doctr.documents)": [[2, "doctr.documents.read_pdf"]], "show() (doctr.documents.document method)": [[2, "doctr.documents.Document.show"]], "show() (doctr.documents.page method)": [[2, "doctr.documents.Page.show"]], "convert_to_fp16() (in module doctr.models.export)": [[5, "doctr.models.export.convert_to_fp16"]], "convert_to_tflite() (in module doctr.models.export)": [[5, "doctr.models.export.convert_to_tflite"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_vgg16_bn"]], "db_resnet50() (in module doctr.models.detection)": [[5, "doctr.models.detection.db_resnet50"]], "detection_predictor() (in module doctr.models.detection)": [[5, "doctr.models.detection.detection_predictor"]], "linknet16() (in module doctr.models.detection)": [[5, "doctr.models.detection.linknet16"]], "master() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.master"]], "ocr_predictor() (in module doctr.models.zoo)": [[5, "doctr.models.zoo.ocr_predictor"]], "quantize_model() (in module doctr.models.export)": [[5, "doctr.models.export.quantize_model"]], "recognition_predictor() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.recognition_predictor"]], "sar_resnet31() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.sar_resnet31"]], "sar_vgg16_bn() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.sar_vgg16_bn"]], "colorinversion (class in doctr.transforms)": [[6, "doctr.transforms.ColorInversion"]], "compose (class in doctr.transforms)": [[6, "doctr.transforms.Compose"]], "lambdatransformation (class in doctr.transforms)": [[6, "doctr.transforms.LambdaTransformation"]], "normalize (class in doctr.transforms)": [[6, "doctr.transforms.Normalize"]], "oneof (class in doctr.transforms)": [[6, "doctr.transforms.OneOf"]], "randomapply (class in doctr.transforms)": [[6, "doctr.transforms.RandomApply"]], "randombrightness (class in doctr.transforms)": [[6, "doctr.transforms.RandomBrightness"]], "randomcontrast (class in doctr.transforms)": [[6, "doctr.transforms.RandomContrast"]], "randomgamma (class in doctr.transforms)": [[6, "doctr.transforms.RandomGamma"]], "randomhue (class in doctr.transforms)": [[6, "doctr.transforms.RandomHue"]], "randomjpegquality (class in doctr.transforms)": [[6, "doctr.transforms.RandomJpegQuality"]], "randomsaturation (class in doctr.transforms)": [[6, "doctr.transforms.RandomSaturation"]], "resize (class in doctr.transforms)": [[6, "doctr.transforms.Resize"]], "togray (class in doctr.transforms)": [[6, "doctr.transforms.ToGray"]], "localizationconfusion (class in doctr.utils.metrics)": [[7, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[7, "doctr.utils.metrics.OCRMetric"]], "textmatch (class in doctr.utils.metrics)": [[7, "doctr.utils.metrics.TextMatch"]], "summary() (doctr.utils.metrics.localizationconfusion method)": [[7, "doctr.utils.metrics.LocalizationConfusion.summary"]], "summary() (doctr.utils.metrics.ocrmetric method)": [[7, "doctr.utils.metrics.OCRMetric.summary"]], "summary() (doctr.utils.metrics.textmatch method)": [[7, "doctr.utils.metrics.TextMatch.summary"]], "visualize_page() (in module doctr.utils.visualization)": [[7, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["changelog", "datasets", "documents", "index", "installing", "models", "transforms", "utils"], "filenames": ["changelog.rst", "datasets.rst", "documents.rst", "index.rst", "installing.rst", "models.rst", "transforms.rst", "utils.rst"], "titles": ["Changelog", "doctr.datasets", "doctr.documents", "DocTR: Document Text Recognition", "Installation", "doctr.models", "doctr.transforms", "doctr.utils"], "terms": {"releas": [0, 4], "note": 0, "we": [2, 3, 5, 6], "member": [], "leader": [], "make": [5, 7], "particip": [], "commun": [], "harass": [], "free": [], "experi": 5, "everyon": [], "regardless": [], "ag": [], "bodi": [], "size": [1, 2, 5, 6], "visibl": [], "invis": [], "disabl": [], "ethnic": [], "sex": [], "characterist": [], "gender": [], "ident": [], "express": 6, "level": [5, 7], "educ": [], "socio": [], "econom": [], "statu": [], "nation": [], "person": [], "appear": [], "race": [], "religion": [], "sexual": [], "orient": 2, "act": [], "interact": [2, 7], "wai": [1, 3, 5], "contribut": [], "an": [1, 2, 3, 5, 7], "open": [], "welcom": 3, "divers": [], "inclus": [], "healthi": [], "exampl": [1, 2, 5, 6, 7], "behavior": [], "posit": 7, "environ": [], "includ": [4, 5], "demonstr": [], "empathi": [], "kind": [], "toward": [], "other": [], "peopl": [], "Being": [], "respect": [], "differ": [], "opinion": [], "viewpoint": [], "give": [], "gracefulli": [], "accept": [], "construct": [], "feedback": [], "apolog": [], "those": [2, 4, 5], "affect": [], "mistak": [], "learn": 5, "from": [1, 2, 3, 5, 6, 7], "focus": [], "what": [], "i": [1, 2, 5, 6, 7], "best": [], "just": 5, "u": [], "individu": [], "overal": [], "unaccept": [], "The": [1, 2, 5, 7], "us": [1, 4, 7], "languag": [2, 3], "imageri": [], "attent": [], "advanc": [], "ani": [1, 2, 3, 5, 6, 7], "troll": [], "insult": [], "derogatori": [], "comment": [], "polit": [], "attack": [], "public": 3, "privat": 5, "publish": [], "inform": [1, 3, 5], "physic": 2, "email": [], "address": 2, "without": 5, "explicit": [], "permiss": [], "which": 5, "could": [], "reason": [], "consid": [1, 2, 7], "inappropri": [], "profession": [], "set": [1, 5, 7], "ar": [1, 2, 4, 5, 6, 7], "clarifi": [], "take": [], "appropri": [], "fair": [], "action": [], "thei": [], "deem": [], "threaten": [], "offens": [], "harm": [], "have": [1, 5, 7], "right": [5, 7], "remov": [], "edit": [], "reject": [], "commit": [], "wiki": [], "issu": [], "align": 2, "thi": [4, 5, 7], "moder": [], "decis": [], "when": [], "appli": [1, 6], "within": [], "all": [1, 2, 3, 5, 6, 7], "space": [], "also": [], "offici": [], "repres": [2, 5], "e": [2, 4], "mail": [], "post": 5, "via": 3, "social": [], "media": [], "account": [], "appoint": [], "onlin": [], "offlin": [], "event": [], "instanc": 5, "abus": [], "otherwis": 7, "mai": [], "report": [], "contact": [], "minde": 4, "com": [2, 4], "complaint": [], "review": [], "investig": [], "promptli": [], "fairli": [], "oblig": [], "privaci": [], "secur": [], "incid": [], "follow": [1, 4, 5, 6, 7], "impact": [], "determin": [], "consequ": [], "violat": [], "unprofession": [], "unwelcom": [], "A": [1, 2, 3, 5], "written": 2, "provid": [3, 5], "clariti": [], "around": 5, "natur": 3, "explan": 5, "why": [], "wa": [], "apologi": [], "request": [], "through": [1, 6], "singl": [], "seri": [], "continu": [], "No": [], "involv": 5, "unsolicit": [], "specifi": 2, "period": [], "time": [1, 5, 7], "avoid": [], "well": [], "extern": [], "channel": [2, 5, 6], "like": [], "term": [], "lead": [], "seriou": [], "sustain": [], "sort": [], "allow": [], "dure": [], "pattern": [], "aggress": [], "disparag": [], "class": [1, 2, 6, 7], "adapt": [], "version": 5, "0": [1, 3, 5, 6, 7], "avail": [3, 5, 6], "http": [2, 4, 5], "www": 2, "org": 5, "_": [1, 5], "html": [], "were": 2, "inspir": 6, "mozilla": [], "": [2, 7], "ladder": [], "For": [4, 5], "answer": [], "common": [6, 7], "question": [], "about": 5, "see": [], "faq": [], "translat": [], "everyth": [], "you": [4, 5], "need": [4, 7], "know": [], "effici": [1, 5], "project": [], "packag": 7, "python": 3, "doc": [2, 5], "librari": 4, "build": [], "script": [], "refer": 4, "train": [1, 5, 6], "demo": [], "small": 3, "app": [], "showcas": [], "capabl": 5, "api": [], "minim": [], "templat": 2, "deploi": [], "rest": [6, 7], "ensur": [], "proper": [], "mainten": [], "github": 4, "worklow": [], "run": 4, "job": [], "coverag": [], "codecov": [], "back": [], "result": [2, 5], "As": [], "contributor": [], "onli": [6, 7], "your": [1, 2, 5, 7], "ad": 6, "whether": [1, 2, 7], "encount": [], "problem": [], "suggest": [], "input": [2, 5, 6], "ha": [1, 7], "valu": [2, 6], "can": [1, 4, 5], "purpos": [], "advis": [], "first": [], "check": [], "topic": [], "wasn": [], "t": 1, "alreadi": [], "cover": [], "close": [], "If": [2, 4, 5], "feel": [], "new": [], "one": [1, 5, 6], "do": 4, "so": [1, 4], "whenev": [], "possibl": 7, "enough": 5, "jump": [], "wonder": [], "how": [], "someth": [], "more": [], "gener": [], "should": [1, 2, 7], "out": [5, 6, 7], "discuss": [], "q": [], "forum": [], "specif": [1, 5, 7], "stackoverflow": [], "addit": [], "depend": [3, 4], "command": [], "m": [5, 7], "pip": 4, "upgrad": [], "dev": [], "pre": [], "docstr": [], "In": [1, 5], "pleas": [], "googl": [], "eas": [], "process": [2, 3], "later": [], "messag": [], "udac": [], "guid": [], "order": [1, 2, 5], "same": [2, 7], "ci": [], "workflow": [], "unittest": [], "local": [1, 3, 5, 7], "To": [], "togeth": [2, 5], "current": [], "built": [], "sphinx": [], "thank": [], "our": 5, "file": [1, 3], "been": [5, 7], "rebuilt": [], "want": [], "forc": [], "complet": [], "rebuild": [], "delet": [], "_build": [], "directori": [], "addition": 5, "clear": [], "web": 2, "browser": [], "cach": [], "modif": [], "now": 3, "locat": [], "index": 2, "wish": [], "somewher": [], "els": [], "than": [4, 7], "join": [], "slack": [], "where": [2, 7], "find": 4, "requir": [4, 6], "3": [2, 3, 4, 5, 6, 7], "8": [5, 6], "higher": 4, "whichev": 4, "o": 4, "least": 4, "tensorflow": [3, 4, 5, 6], "pytorch": [3, 4], "correspond": [4, 5], "page": [4, 5, 7], "2": [3, 5, 6], "macbook": [], "m1": [], "chip": [], "some": [], "metal": [], "plugin": [], "1": [1, 3, 5, 6, 7], "12": 5, "anoth": [1, 4, 5], "linux": 4, "few": 4, "extra": 4, "maco": 4, "user": [2, 3, 4], "them": [1, 4], "homebrew": [], "brew": 4, "cairo": 4, "pango": 4, "gdk": 4, "pixbuf": 4, "libffi": 4, "window": [4, 7], "gtk": 4, "latest": 4, "over": [4, 7], "here": [1, 4, 6], "last": [1, 4, 5], "stabl": 4, "doctr": 4, "strive": [], "reduc": 6, "framework": 1, "minimum": 7, "necessari": [], "featur": [5, 7], "develop": [], "third": [], "parti": [], "miss": [], "tf": [5, 6], "torch": [], "mode": 4, "clone": 4, "state": 3, "art": 3, "optic": [3, 5], "charact": [1, 2, 3, 5, 7], "made": 3, "seamless": 3, "access": [1, 2, 3], "anyon": 3, "power": 3, "easi": [3, 7], "extract": [1, 3], "valuabl": 3, "autom": 3, "seamlessli": [], "understand": [1, 3], "task": [1, 3, 5], "ocr": [1, 3, 7], "predictor": [], "pars": [1, 3], "textual": [1, 2, 3], "identifi": [3, 5], "each": [1, 2, 3, 5, 6, 7], "word": [3, 5, 7], "research": 3, "quickli": 3, "compar": 3, "own": 3, "architectur": [3, 5], "speed": [3, 5], "perform": [2, 3, 5, 6, 7], "robust": 3, "stage": 3, "pretrain": [3, 5, 7], "paramet": [1, 2, 3, 5, 6, 7], "friendli": 3, "line": [3, 7], "code": [2, 3], "load": [3, 5], "googlevis": 3, "aw": [3, 5], "textract": [3, 5], "optim": 3, "infer": [3, 6], "both": [3, 5, 6], "cpu": [3, 5], "gpu": 3, "light": 3, "activ": [], "maintain": 3, "integr": 3, "deploy": [], "dbnet": [3, 5], "real": [5, 6], "scene": [3, 5], "differenti": [3, 5], "binar": [3, 5], "linknet": [3, 5], "exploit": 5, "encod": [1, 2, 5], "represent": 5, "semant": 5, "segment": 5, "sar": [3, 5], "show": [2, 3, 5, 7], "attend": [3, 5], "read": [3, 5], "simpl": 5, "strong": 5, "baselin": 5, "irregular": 5, "crnn": [3, 5], "end": [1, 3, 7], "trainabl": 5, "neural": [3, 5], "network": [3, 5], "imag": [1, 2, 5, 6, 7], "base": 5, "sequenc": [1, 2, 5, 7], "Its": 5, "applic": 5, "master": [3, 5], "multi": 3, "aspect": [3, 6], "non": [2, 3, 6, 7], "vitstr": [], "vision": [], "transform": [1, 3], "fast": 1, "parseq": [], "permut": [], "autoregress": [], "funsd": [1, 3, 5], "form": [1, 3], "noisi": [1, 3], "scan": [1, 3], "cord": [1, 3, 5], "consolid": [1, 3], "receipt": [1, 3, 5], "forpost": [1, 3], "sroie": [1, 3], "icdar": 3, "2019": 3, "iiit": [], "5k": [], "cvit": [], "street": [], "view": [], "synthtext": [], "visual": 3, "geometri": 2, "group": [], "svhn": [], "digit": 1, "unsupervis": [], "ic03": [], "2003": [], "ic13": [], "2013": [], "imgur5k": [], "textstylebrush": [], "transfer": [], "aesthet": [], "mjsynth": [], "synthet": [], "data": [2, 3, 5, 6, 7], "artifici": [], "iiithw": [], "wildreceipt": [], "spatial": 2, "dual": [], "modal": [], "graph": 2, "kei": [], "bool": [1, 2, 5, 6, 7], "true": [1, 2, 5, 6, 7], "use_polygon": [], "fals": [1, 5, 6, 7], "recognition_task": [], "kwarg": [1, 2, 5, 7], "sourc": [1, 2, 5, 6, 7], "document": [1, 5, 7], "import": [1, 2, 5, 6, 7], "train_set": 1, "download": 1, "img": [1, 6], "target": [1, 2, 5, 6], "subset": [1, 5], "polygon": 1, "rotat": [1, 2], "bound": [1, 2, 6, 7], "box": [1, 2, 7], "instead": [1, 2], "straight": 1, "ones": 1, "recognit": 7, "keyword": [1, 2], "argument": [1, 2], "visiondataset": 1, "icdar2019": 1, "competit": 1, "iiit5k": [], "bmvc": [], "2012": [], "text": [2, 7], "prior": [], "svt": [], "ucsd": [], "comput": [5, 7], "hous": [], "number": [1, 6, 7], "localis": [], "repositori": [], "websit": [], "entri": [], "futur": [], "direct": [], "img_fold": 1, "str": [1, 2, 5, 6, 7], "label_fold": [], "label": [1, 7], "part": 6, "challeng": [], "task2": [], "2015": [], "path": [1, 2, 5], "challenge2_training_task12_imag": [], "challenge2_training_task1_gt": [], "test_set": [], "challenge2_test_task12_imag": [], "challenge2_test_task1_gt": [], "folder": [1, 5], "annot": 2, "abstractdataset": [], "label_path": [], "handwrit": [], "dataset_info": [], "imgur5k_annot": [], "json": [], "pure": [], "mnt": [], "ramdisk": [], "max": 7, "90kdict32px": [], "imlist": [], "txt": [], "hw": [], "images_90k_norm": [], "90k": [], "docartefact": [], "object": 1, "detect": [], "element": [1, 2, 5], "varieti": [], "arxiv": 5, "ab": [], "2103": [], "14470v1": [], "test": [], "charactergener": [], "implement": [1, 2, 5, 6, 7], "d": [], "abdef": [], "num_sampl": [], "100": [5, 6, 7], "vocabulari": [], "sampl": 1, "iter": 1, "cache_sampl": [], "firsthand": [], "font_famili": [], "font": [], "img_transform": [], "compos": [1, 3, 5], "sample_transform": 1, "wordgener": [], "min_char": [], "int": [1, 2, 5, 6, 7], "max_char": [], "list": [1, 2, 6], "none": [1, 2, 7], "callabl": [1, 6], "tupl": [2, 5, 6, 7], "32": [1, 5, 6], "maximum": 1, "detectiondataset": [], "recognitiondataset": [], "labels_path": [], "contain": [], "ocrdataset": 1, "label_fil": 1, "jpg": [1, 2], "root": 1, "shuffl": 1, "batch_siz": 1, "drop_last": 1, "num_work": [], "collate_fn": [], "wrapper": [1, 6], "train_load": 1, "train_it": 1, "next": 1, "befor": 1, "pass": [1, 5], "batch": [1, 5, 6], "drop": 1, "isn": 1, "full": [1, 5, 7], "worker": 1, "function": [5, 6, 7], "merg": [], "sinc": 1, "content": [1, 2], "properli": 1, "model": [1, 7], "interpret": [1, 2], "multipl": [1, 2, 6], "name": [1, 5], "10": [1, 5, 7], "0123456789": 1, "hindi_digit": [], "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "ascii_lett": 1, "52": [1, 5], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "punctuat": 1, "currenc": 1, "5": [1, 6, 7], "ancient_greek": [], "48": 5, "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": [], "arabic_lett": [], "37": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": [], "persian_lett": [], "\u067e\u0686\u06a2\u06a4\u06af": [], "arabic_diacrit": [], "arabic_punctu": [], "latin": 1, "94": [], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "english": [], "legacy_french": [], "123": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": 1, "french": [1, 5], "126": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": [], "portugues": [], "131": [], "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": [], "spanish": [], "116": [], "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": [], "italian": [], "120": [], "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": [], "german": [], "108": [], "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": [], "arab": [], "101": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": [], "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "czech": [], "130": [], "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": [], "polish": [], "118": [], "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": [], "dutch": [], "114": [], "norwegian": [], "106": [], "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": [], "danish": [], "finnish": [], "104": [], "\u00e4\u00f6\u00e4\u00f6": [], "swedish": [], "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": [], "vietnames": [], "234": [], "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": [], "hebrew": [], "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": [], "multilingu": [], "195": [], "encode_sequ": 1, "target_s": 1, "eo": 1, "pad": [1, 5, 6], "dynamic_seq_length": [], "ndarrai": [1, 2, 7], "given": [1, 2, 5, 7], "map": 1, "n": [1, 5, 7], "length": 1, "Of": 1, "string": [1, 2, 5, 7], "option": 1, "start": 1, "case": [1, 7], "upper": 6, "enabl": 2, "dynam": [], "tensor": [1, 5, 6], "modul": [2, 5, 6, 7], "easili": [2, 5, 7], "export": [2, 3, 7], "analysi": [2, 5], "format": [2, 5], "organ": 2, "uninterrupt": 2, "confid": 2, "float": [2, 6, 7], "associ": 2, "predict": [2, 7], "xmin": 2, "ymin": 2, "xmax": 2, "ymax": 2, "coordin": 2, "rel": 2, "collect": 2, "meant": 2, "two": 2, "column": 2, "horizont": 2, "resolv": 2, "default": [2, 5], "smallest": 2, "enclos": 2, "g": 2, "qr": 2, "pictur": 2, "chart": 2, "signatur": 2, "logo": 2, "etc": 2, "artefact_typ": 2, "type": [2, 5], "sever": [2, 6], "its": [1, 2, 5, 7], "titl": 2, "underneath": 2, "page_idx": 2, "dimens": [2, 5, 7], "dict": [2, 7], "numpi": [2, 5, 7], "arrai": [2, 7], "uint8": [2, 5, 7], "raw": [2, 7], "pixel": [2, 6], "height": 2, "width": 2, "dictionari": [2, 7], "angl": 2, "degress": 2, "preserve_aspect_ratio": 6, "overlai": 2, "displai": [2, 7], "matplotlib": 7, "pyplot": 7, "method": 6, "high": 2, "convers": 2, "read_pdf": 2, "byte": [2, 5], "scale": 7, "rgb_mode": [], "password": [], "pdf": [2, 5], "convert": [2, 5, 6], "render": [], "72dpi": [], "output": [2, 5, 6], "rgb": [2, 6], "bgr": 2, "unlock": [], "encrypt": [], "pypdfium2": [], "pdfpage": [], "decod": 2, "shape": [2, 5, 6, 7], "h": 2, "x": [2, 6, 7], "w": [2, 7], "c": [], "read_img_as_numpi": [], "output_s": [2, 6], "rgb_output": 2, "expect": [2, 5, 6], "read_img_as_tensor": [], "img_path": [], "dtype": 5, "float32": 5, "desir": [], "relat": [], "divid": [], "255": [5, 6, 7], "decode_img_as_tensor": [], "img_cont": [], "stream": 2, "read_html": 2, "url": [1, 2], "yoursit": 2, "weasyprint": [], "documentfil": 2, "extens": 2, "classmethod": 2, "from_pdf": 2, "binari": 2, "from_url": 2, "from_imag": 2, "page1": 2, "png": 2, "page2": 2, "vgg16_bn_r": [], "vgg": 5, "16": 5, "describ": 5, "veri": [], "deep": 5, "convolut": 3, "larg": [], "modifi": [], "normal": [5, 6], "rectangular": [], "pool": [], "simpler": [], "head": [], "input_tensor": 5, "random": [5, 6, 7], "uniform": [5, 6], "512": [], "maxval": [5, 6], "imagenet": [], "extractor": 5, "resnet18": [], "resnet": 5, "18": 3, "residu": [], "boolean": [], "resnet34": [], "34": [], "resnet50": [], "50": 5, "resnet31": [], "downsiz": [], "4": [], "mobilenet_v3_smal": [], "mobilenetv3": [], "search": [], "kera": 5, "mobilenet_v3_larg": [], "mobilenet_v3_small_r": [], "mobilenet_v3_large_r": [], "mobilenet_v3_small_orient": [], "magc_resnet31": [], "global": [], "context": [], "224": [5, 6], "vit_": [], "visiontransform": [], "worth": [], "16x16": [], "patch": [], "unoffici": [], "config": [], "vit_b": [], "b": 7, "textnet_tini": [], "textnet": [], "faster": [], "arbitrarili": [], "detector": [], "minimalist": [], "kernel": [], "czczup": [], "tini": [], "textnet_smal": [], "textnet_bas": [], "crop_orientation_predictor": [], "arch": 5, "croporientationpredictor": [], "np": [5, 7], "classif_mobilenet_v3_smal": [], "input_crop": [], "rand": [5, 7], "600": [5, 7], "800": [5, 7], "astyp": [5, 7], "crop": 5, "dataset": 5, "linknet_resnet18": [], "1024": [5, 7], "linknet_resnet34": [], "linknet_resnet50": [], "db_resnet50": 5, "backbon": 5, "db_mobilenet_v3_larg": [], "mobilenet": [], "v3": [], "detection_predictor": 5, "assume_straight_pag": [], "detectionpredictor": 5, "input_pag": [5, 7], "itself": [], "fit": [], "crnn_vgg16_bn": 5, "128": 5, "crnn_mobilenet_v3_smal": [], "crnn_mobilenet_v3_larg": [], "sar_resnet31": 5, "31": 5, "64": [5, 6], "256": 5, "paper": 5, "1910": 5, "02562": 5, "keywoard": [], "vitstr_smal": [], "vitstr_bas": [], "recognition_predictor": 5, "recognitionpredictor": 5, "ocr_predictor": 5, "det_arch": 5, "reco_arch": 5, "pretrained_backbon": [], "symmetric_pad": 6, "export_as_straight_box": [], "detect_orient": [], "straighten_pag": [], "detect_languag": [], "ocrpredictor": 5, "up": 5, "assum": [], "preserv": 6, "ratio": 6, "symmetr": 6, "bottom": [], "final": [], "potenti": 5, "estim": [], "slightli": [], "deterior": [], "latenc": [], "median": [], "Then": 5, "again": [], "improv": [], "kie_predictor": [], "kiepredictor": [], "kie": [], "login_to_hub": [], "login": [], "huggingfac": [], "hub": [], "from_hub": [], "repo_id": [], "instanti": 5, "hf": [], "fasterrcnn_mobilenet_v3_large_fpn": [], "repo": [], "hf_hub_download": [], "snapshot_download": [], "checkpoint": [], "push_to_hf_hub": [], "model_nam": [], "save": [1, 5], "configur": [], "my": [], "procedur": 6, "draw": 6, "design": 6, "torchvis": 6, "resiz": [5, 6], "bilinear": [5, 6], "transfo": 6, "minval": 6, "interpol": [5, 6], "zero": [5, 6], "while": 6, "done": 6, "mean": [6, 7], "std": 6, "gaussian": 6, "distribut": 6, "485": 6, "456": 6, "406": 6, "229": 6, "225": 6, "averag": [5, 6], "per": [5, 6], "standard": 6, "deviat": 6, "lambdatransform": 6, "fn": 6, "lambda": 6, "tograi": 6, "num_output_channel": [], "grayscal": 6, "colorinvers": 6, "min_val": 6, "tranform": 6, "color": 6, "shift": 6, "randomli": 6, "invert": 6, "6": [4, 5, 6], "rang": 6, "randombright": 6, "max_delta": 6, "adjust": 6, "bright": 6, "delta": 6, "offset": 6, "add": [6, 7], "pick": 6, "p": 6, "probabl": 6, "randomcontrast": 6, "contrast": 6, "contrast_factor": 6, "factor": 6, "randomsatur": 6, "satur": 6, "hsv": 6, "increas": 6, "randomhu": 6, "hue": 6, "randomgamma": 6, "min_gamma": 6, "max_gamma": 6, "min_gain": 6, "max_gain": 6, "gamma": 6, "correct": 6, "neg": 6, "lower": [6, 7], "param": [5, 6], "constant": 6, "multipli": 6, "randomjpegqu": 6, "min_qual": 6, "60": 6, "max_qual": 6, "jpeg": 6, "qualiti": 6, "dimension": 6, "between": [6, 7], "randomrot": [], "max_angl": [], "expand": [], "degre": [], "uniformli": [], "randomcrop": [], "08": 5, "75": 5, "33": [], "min_area": [], "max_area": [], "min_ratio": [], "max_ratio": [], "gaussianblur": [], "kernel_shap": [], "blur": [], "min": [], "channelshuffl": [], "gaussiannois": [], "nois": [], "randomhorizontalflip": [], "flip": [], "int64": [], "randomshadow": [], "opacity_rang": [], "shade": [], "opac": [], "It": 6, "consecut": [5, 6], "sequenti": [5, 6], "oneof": 6, "jpegqual": 6, "randomappli": 6, "regroup": 7, "core": 7, "complementari": 7, "sens": 7, "visualize_pag": 7, "words_onli": 7, "display_artefact": 7, "add_label": 7, "figur": 7, "block": [5, 7], "plt": 7, "ocr_db_crnn": 7, "artefact": 7, "figsiz": 7, "largest": 7, "side": 7, "plot": 7, "static": 7, "top": 7, "synthesize_pag": [], "draw_proba": [], "respons": [], "blank": [], "blue": [], "red": [], "font_siz": [], "13": 5, "famili": [], "synthes": [], "metric": [5, 7], "assess": 7, "textmatch": 7, "match": [3, 7], "accuraci": 7, "aggreg": [1, 7], "foral": 7, "y": 7, "mathcal": 7, "frac": 7, "sum": 7, "limits_": 7, "f_": 7, "y_i": 7, "x_i": 7, "indic": 7, "defin": 7, "f_a": 7, "left": 7, "begin": 7, "ll": 7, "mbox": 7, "strictli": 7, "integ": 7, "updat": 7, "hello": 7, "world": 7, "summari": 7, "gt": [], "pred": [], "groung": [], "truth": 7, "exact": 7, "score": 7, "counterpart": 7, "unidecod": 7, "localizationconfus": 7, "iou_thresh": 7, "mask_shap": 7, "use_broadcast": [], "confus": 7, "iou": 7, "recal": [5, 7], "g_": 7, "precis": [5, 7], "meaniou": 7, "j": 7, "y_j": 7, "being": [5, 7], "intersect": 7, "union": 7, "g_x": 7, "assign": 7, "_i": 7, "geq": 7, "ground": 7, "asarrai": 7, "70": [5, 7], "110": 7, "95": 7, "200": 7, "150": 7, "pair": 7, "broadcast": [], "consum": [], "memori": [], "either": 5, "ocrmetr": 7, "l": 7, "hat": 7, "h_": 7, "b_j": 7, "l_j": 7, "gt_box": [], "pred_box": [], "gt_label": [], "pred_label": [], "comparison": 7, "detectionmetr": [], "c_j": [], "compil": [], "better": [], "leverag": [], "descript": [], "colab": [], "quicktour": [], "present": [], "main": [], "produc": 5, "searchabl": [], "don": [], "meet": [], "detail": [], "link": [], "section": [], "det_model": [], "load_weight": [], "path_to_checkpoint": [], "weight": [], "reco_model": [], "det_param": [], "path_to_pt": [], "map_loc": [], "load_state_dict": [], "reco_param": [], "vocab": [3, 5], "class_nam": [], "total": [], "date": [], "preprocessor": 5, "det_predictor": [], "798": [], "785": [], "772": [], "264": [], "2749": [], "287": [], "reco_predictor": [], "694": [], "695": [], "693": [], "299": [], "296": [], "301": [], "polici": [], "restrict": [], "write": [], "outsid": [], "tmp": [], "work": [], "step": [], "usag": 5, "multiprocess": [], "doctr_multiprocessing_dis": [], "variabl": [], "becaus": [], "shm": [], "share": [], "chang": [], "By": [], "doctr_cache_dir": [], "focu": [], "love": [], "appreci": [], "interfac": [], "io": [], "custom": [], "felix92": [], "db": [], "vgg16": 5, "bn": [], "plug": [], "obj_detect": [], "exist": [], "overwritten": [], "prerequisit": 3, "creat": [], "co": [], "instal": 3, "git": 3, "lf": [], "my_awesome_model": [], "v1": [], "directli": 5, "after": [], "python3": [], "train_tensorflow": [], "py": [], "train_pytorch": [], "tabl": [], "pull": [], "dummi": [], "tilman": [], "rassi": [], "fascan": [], "evalu": [1, 3, 5], "predefin": 1, "prefer": 1, "signific": 1, "valid": [], "149": [], "626": [], "360": [], "2000": [], "3000": [], "249": [], "33402": [], "13068": [], "772875": [], "85875": [], "246": [], "233": [], "resourc": [], "7149": [], "796": [], "handwritten": [], "1268": [], "472": [], "21888": [], "8707": [], "33608": [], "19342": [], "uppercas": [], "19370": [], "2186": [], "257": [], "647": [], "73257": [], "26032": [], "7100000": [], "707470": [], "1156": [], "1107": [], "849": [], "1095": [], "207901": [], "22672": [], "7581382": [], "1337891": [], "7141797": [], "793533": [], "49377": [], "19598": [], "alwai": [], "regular": [], "2700": [], "300": [], "background": [], "qr_code": [], "bar_cod": [], "photo": [], "classif": [], "mani": [], "sensit": [], "abl": [], "howev": [], "guidanc": [], "tool": [], "further": [], "anot": [], "handl": 1, "underli": 1, "defer": 1, "dataload": 1, "good": [], "achiev": [], "might": 5, "tune": 3, "thing": [], "product": [], "readi": [], "help": [], "support": 5, "devic": [], "fp16": 5, "point": [], "occupi": [], "bit": [], "advantag": [], "less": [], "mixed_precis": [], "set_global_polici": [], "mixed_float16": [], "cuda": [], "re": [], "exchang": [], "interoper": [], "machin": [], "structur": [3, 5], "layer": [], "metadata": [], "util": [3, 5], "export_model_to_onnx": [], "input_shap": 5, "dummy_input": [], "tensorspec": [], "model_path": [], "come": [], "soon": [], "seen": 5, "onc": 5, "separ": 5, "compon": 5, "charg": 5, "usabl": 5, "backend": 5, "along": 5, "processor": 5, "reusabl": 5, "consist": [], "delimit": [], "2d": [], "corner": [], "flag": [], "belong": [], "skew": [], "comprehens": [], "benchmark": [], "publicli": [], "sec": [], "25": 5, "84": [], "39": 5, "85": 5, "86": 5, "93": [], "83": 5, "24": [], "80": [], "29": 5, "90": 5, "67": 5, "76": [], "11": 3, "81": 5, "71": [], "7": 5, "21": 5, "82": 5, "20": 5, "49": 5, "87": 5, "63": 5, "17": [], "28": 3, "51": [], "46": 5, "db_resnet34": [], "22": [], "89": 5, "74": 5, "56": [], "68": 5, "92": 5, "61": 5, "41": [], "00": 5, "79": 5, "38": [], "88": [], "62": 5, "26": [], "06": [], "78": 5, "47": 5, "54": [], "abov": 5, "cf": 5, "disclaim": 5, "combin": 5, "199": 5, "second": 5, "warmup": [], "phase": [], "measur": 5, "1000": 5, "obtain": [], "11th": [], "gen": [], "intel": [], "r": [], "tm": [], "i7": [], "11800h": [], "30ghz": [], "wrap": [], "useabl": [], "favorit": [], "dummy_img": [], "area": [], "send": [], "snippet": [], "transcrib": [], "partial": [], "15": 5, "9": [], "73": [], "44": [], "14": 5, "55": [], "58": [], "57": [], "66": 5, "01": 5, "98": [], "23": [], "69": 5, "99": [], "91": 5, "05": 3, "09": [], "96": 1, "40": [], "53": 5, "most": 5, "print": [], "cfg": [], "30595": 5, "45": [], "72": [], "43": 5, "65": 5, "77": 5, "30": 5, "07": [], "27": 5, "gvision": 5, "59": 5, "03": 3, "azur": [], "recogn": [], "42": [], "go": [], "mention": [], "still": [], "return": [1, 2, 5, 7], "documentbuild": [], "resolve_lin": [], "automat": [], "resolve_block": [], "paragraph_break": [], "paragraph": [], "035": [], "nest": [], "get": 2, "typic": [], "layout": [], "340": [], "text_output": [], "json_output": [], "1357421875": [], "0361328125": [], "8564453125": [], "8603515625": [], "914085328578949": [], "5478515625": [], "06640625": [], "5810546875": [], "0966796875": [], "9949972033500671": [], "51171875": [], "1630859375": [], "9578408598899841": [], "1396484375": [], "3232421875": [], "185546875": [], "3515625": [], "outpout": [], "xml": [], "hocr": [], "export_as_xml": [], "xml_output": [], "xml_bytes_str": [], "xml_element": [], "utf": [], "xmln": [], "w3": [], "1999": [], "xhtml": [], "lang": [], "en": [], "meta": [], "equiv": [], "charset": [], "system": [], "ocr_pag": [], "ocr_carea": [], "ocr_par": [], "ocr_lin": [], "ocrx_word": [], "div": [], "id": 5, "page_1": [], "bbox": [], "3456": [], "ppageno": [], "block_1_1": [], "857": [], "529": [], "2504": [], "2710": [], "par_1_1": [], "span": [], "line_1_1": [], "x_size": [], "x_descend": [], "x_ascend": [], "word_1_1": [], "1552": [], "540": [], "1778": [], "580": [], "x_wconf": [], "word_1_2": [], "1782": [], "1900": [], "583": [], "word_1_3": [], "1420": [], "597": [], "1684": [], "641": [], "threshold": [], "region": [], "accur": [], "postprocessor": [], "bin_thresh": [], "box_thresh": [], "hook": [], "manipul": [], "customhook": [], "def": [], "__call__": [], "self": [], "loc_pr": [], "Be": [], "awar": [], "my_hook": [], "middl": [], "pipelin": [], "add_hook": [], "execut": [], "file_path": [], "read_img": 2, "seemlessli": 3, "conda": [], "newer": [], "developp": 4, "fp": 5, "scheme": 5, "deform": 5, "statist": 5, "turn": [], "easier": 5, "let": 5, "db_resnet50_predictor": [], "sar_vgg16_bn": 5, "rnn": [], "enhanc": [], "symbol": [], "crnn_vgg16_bn_predictor": [], "sar_vgg16_bn_predictor": [], "16bn": [], "convert_to_tflit": 5, "tf_model": 5, "tflite": 5, "conv_sequ": 5, "relu": 5, "kernel_s": 5, "serialized_model": 5, "convert_to_fp16": 5, "half": 5, "serial": 5, "quantize_model": 5, "quantiz": 5, "exclud": 5, "inherit": [1, 5], "abstract": 1, "verifi": 1, "file_nam": 1, "file_hash": 1, "extract_arch": 1, "overwrit": 1, "sha256": [], "archiv": [], "disk": [], "775": [], "856": [], "860": [], "862": [], "863": [], "sar_resnet31_predictor": [], "ocr_db_crnn_vgg": [], "652": [], "721": [], "ocr_db_sar_vgg": [], "653": [], "ocr_db_sar_resnet": [], "665": [], "735": [], "595": [], "625": [], "781": [], "830": [], "exactmatch": [], "ignore_cas": [], "ignore_acc": [], "ignor": [], "letter": [], "accent": [], "error": [], "max_dist": [], "levenshtein": [], "distanc": [], "autoclass": [], "loader": 1, "154": 1, "as_imag": 2, "convert_page_to_numpi": 2, "get_word": 2, "fitz": 2, "gettextword": 2, "get_artefact": 2, "entir": 2, "fulli": [], "daili": 3, "mix": 3, "fine": 3, "scratch": 3, "special": 3, "recurr": 3, "733": [], "817": [], "745": [], "875": [], "frame": 5, "feed": 5, "warm": 5, "c5": 5, "x12larg": 5, "xeon": 5, "platinum": 5, "8275l": 5, "913": [], "917": [], "921": [], "crnn_resnet31": 5, "629": [], "701": [], "664": [], "780": [], "630": [], "702": [], "666": [], "783": [], "640": [], "713": [], "672": [], "789": [], "na": [], "753": [], "700": [], "533": [], "689": [], "611": [], "660": [], "db_sar_vgg": 5, "db_sar_resnet": 5, "db_crnn_vgg": 5, "db_crnn_resnet": 5, "properti": 5, "input_t": 5, "saved_model": 5, "And": 5, "nestedobject": [], "changelog": 3, "v0": 3, "2021": 3, "8m": 5, "02": 5, "5m": 5, "1m": 5, "19": 5, "invoic": 5, "flexibl": 7, "rotated_bbox": [1, 7], "beta": 3, "linknet16": 5, "160": 5}, "objects": {"doctr.datasets": [[1, 0, 1, "", "CORD"], [1, 0, 1, "", "FUNSD"], [1, 0, 1, "", "OCRDataset"], [1, 0, 1, "", "SROIE"], [1, 1, 1, "", "encode_sequences"]], "doctr.datasets.datasets": [[1, 0, 1, "", "VisionDataset"]], "doctr.datasets.loader": [[1, 0, 1, "", "DataLoader"]], "doctr.documents": [[2, 0, 1, "", "Artefact"], [2, 0, 1, "", "Block"], [2, 0, 1, "", "Document"], [2, 0, 1, "", "DocumentFile"], [2, 0, 1, "", "Line"], [2, 0, 1, "", "PDF"], [2, 0, 1, "", "Page"], [2, 0, 1, "", "Word"], [2, 1, 1, "", "read_html"], [2, 1, 1, "", "read_img"], [2, 1, 1, "", "read_pdf"]], "doctr.documents.Document": [[2, 2, 1, "", "show"]], "doctr.documents.DocumentFile": [[2, 2, 1, "", "from_images"], [2, 2, 1, "", "from_pdf"], [2, 2, 1, "", "from_url"]], "doctr.documents.PDF": [[2, 2, 1, "", "as_images"], [2, 2, 1, "", "get_artefacts"], [2, 2, 1, "", "get_words"]], "doctr.documents.Page": [[2, 2, 1, "", "show"]], "doctr.models.detection": [[5, 1, 1, "", "db_resnet50"], [5, 1, 1, "", "detection_predictor"], [5, 1, 1, "", "linknet16"]], "doctr.models.export": [[5, 1, 1, "", "convert_to_fp16"], [5, 1, 1, "", "convert_to_tflite"], [5, 1, 1, "", "quantize_model"]], "doctr.models.recognition": [[5, 1, 1, "", "crnn_vgg16_bn"], [5, 1, 1, "", "master"], [5, 1, 1, "", "recognition_predictor"], [5, 1, 1, "", "sar_resnet31"], [5, 1, 1, "", "sar_vgg16_bn"]], "doctr.models.zoo": [[5, 1, 1, "", "ocr_predictor"]], "doctr.transforms": [[6, 0, 1, "", "ColorInversion"], [6, 0, 1, "", "Compose"], [6, 0, 1, "", "LambdaTransformation"], [6, 0, 1, "", "Normalize"], [6, 0, 1, "", "OneOf"], [6, 0, 1, "", "RandomApply"], [6, 0, 1, "", "RandomBrightness"], [6, 0, 1, "", "RandomContrast"], [6, 0, 1, "", "RandomGamma"], [6, 0, 1, "", "RandomHue"], [6, 0, 1, "", "RandomJpegQuality"], [6, 0, 1, "", "RandomSaturation"], [6, 0, 1, "", "Resize"], [6, 0, 1, "", "ToGray"]], "doctr.utils.metrics": [[7, 0, 1, "", "LocalizationConfusion"], [7, 0, 1, "", "OCRMetric"], [7, 0, 1, "", "TextMatch"]], "doctr.utils.metrics.LocalizationConfusion": [[7, 2, 1, "", "summary"]], "doctr.utils.metrics.OCRMetric": [[7, 2, 1, "", "summary"]], "doctr.utils.metrics.TextMatch": [[7, 2, 1, "", "summary"]], "doctr.utils.visualization": [[7, 1, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:class", "1": "py:function", "2": "py:method"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"], "2": ["py", "method", "Python method"]}, "titleterms": {"changelog": 0, "v0": 0, "7": [], "0": 0, "2024": [], "09": [], "6": [], "2022": [], "29": [], "5": [], "1": 0, "03": 0, "22": [], "2021": 0, "12": [], "31": [], "4": [], "11": 0, "10": [], "01": [], "3": [], "08": [], "27": [], "07": [], "02": [], "2": 0, "05": 0, "28": 0, "18": 0, "contributor": [], "coven": [], "code": [], "conduct": [], "our": [], "pledg": [], "standard": [], "enforc": [], "respons": [], "scope": [], "guidelin": [], "correct": [], "warn": [], "temporari": [], "ban": [], "perman": [], "attribut": [], "contribut": [], "doctr": [1, 2, 3, 5, 6, 7], "codebas": [], "structur": 2, "continu": [], "integr": [], "feedback": [], "featur": 3, "request": [], "bug": [], "report": [], "question": [], "develop": [], "mode": [], "instal": 4, "commit": [], "unit": [], "test": [], "qualiti": [], "style": [], "verif": [], "modifi": [], "document": [2, 3], "let": [], "": [], "connect": [], "prerequisit": 4, "via": 4, "python": 4, "packag": [3, 4], "git": 4, "text": [3, 5], "recognit": [3, 5], "main": 3, "model": [3, 5], "zoo": [3, 5], "detect": [3, 5], "support": [1, 3, 6], "dataset": [1, 3], "arg": [], "synthet": [], "gener": [], "custom": [], "loader": [], "dataload": [], "vocab": 1, "return": [], "io": [], "word": 2, "line": 2, "artefact": 2, "block": 2, "page": 2, "file": 2, "read": 2, "classif": [], "factori": [], "transform": 6, "compos": 6, "util": 7, "visual": 7, "task": 7, "evalu": 7, "notebook": [], "train": 3, "your": 3, "own": [], "load": 1, "aw": [], "lambda": [], "share": [], "commun": [], "from": [], "huggingfac": [], "hub": [], "push": [], "pretrain": [], "name": [], "convent": [], "choos": [], "readi": [], "us": 5, "avail": 1, "object": [], "data": 1, "prepar": [], "infer": [], "optim": [], "half": [], "precis": [], "export": 5, "onnx": [], "right": [], "architectur": [], "predictor": [3, 5], "end": 5, "ocr": 5, "two": 5, "stage": 5, "approach": 5, "what": [], "should": [], "i": [], "do": [], "output": [], "advanc": [], "option": [], "get": 3, "start": 3, "conda": [], "pre": 5, "process": 5, "post": [], "build": 3, "implement": [], "content": [], "compress": 5, "savedmodel": 5, "note": 3, "refer": 3}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"Changelog": [[0, "changelog"]], "v0.2.1 (2021-05-28)": [[0, "v0-2-1-2021-05-28"]], "v0.2.0 (2021-05-11)": [[0, "v0-2-0-2021-05-11"]], "v0.1.1 (2021-03-18)": [[0, "v0-1-1-2021-03-18"]], "v0.1.0 (2021-03-05)": [[0, "v0-1-0-2021-03-05"]], "doctr.datasets": [[1, "doctr-datasets"]], "Available Datasets": [[1, "available-datasets"]], "Data Loading": [[1, "data-loading"]], "Supported Vocabs": [[1, "supported-vocabs"]], "DocTR Vocabs": [[1, "id1"]], "doctr.documents": [[2, "doctr-documents"]], "Document structure": [[2, "document-structure"]], "Word": [[2, "word"]], "Line": [[2, "line"]], "Artefact": [[2, "artefact"]], "Block": [[2, "block"]], "Page": [[2, "page"]], "Document": [[2, "document"]], "File reading": [[2, "file-reading"]], "DocTR: Document Text Recognition": [[3, "doctr-document-text-recognition"]], "Main Features": [[3, "main-features"]], "Getting Started": [[3, "getting-started"]], "Build & train your predictor": [[3, "build-train-your-predictor"]], "Model zoo": [[3, "model-zoo"]], "Text detection models": [[3, "text-detection-models"]], "Text recognition models": [[3, "text-recognition-models"]], "Supported datasets": [[3, "supported-datasets"]], "Notes": [[3, null]], "Package Reference": [[3, null]], "Installation": [[4, "installation"]], "Prerequisites": [[4, "prerequisites"]], "Via Python Package": [[4, "via-python-package"]], "Via Git": [[4, "via-git"]], "doctr.models": [[5, "doctr-models"]], "Text Detection": [[5, "text-detection"]], "Pre-processing for detection": [[5, "pre-processing-for-detection"]], "Detection models": [[5, "detection-models"]], "Detection predictors": [[5, "detection-predictors"]], "Text Recognition": [[5, "text-recognition"]], "Text recognition model zoo": [[5, "id2"]], "Pre-processing for recognition": [[5, "pre-processing-for-recognition"]], "Recognition models": [[5, "recognition-models"]], "Recognition predictors": [[5, "recognition-predictors"]], "End-to-End OCR": [[5, "end-to-end-ocr"]], "Two-stage approaches": [[5, "two-stage-approaches"]], "Model export": [[5, "model-export"]], "Model compression": [[5, "model-compression"]], "Using SavedModel": [[5, "using-savedmodel"]], "doctr.transforms": [[6, "doctr-transforms"]], "Supported transformations": [[6, "supported-transformations"]], "Composing transformations": [[6, "composing-transformations"]], "doctr.utils": [[7, "doctr-utils"]], "Visualization": [[7, "visualization"]], "Task evaluation": [[7, "task-evaluation"]]}, "indexentries": {"cord (class in doctr.datasets)": [[1, "doctr.datasets.CORD"]], "dataloader (class in doctr.datasets.loader)": [[1, "doctr.datasets.loader.DataLoader"]], "funsd (class in doctr.datasets)": [[1, "doctr.datasets.FUNSD"]], "ocrdataset (class in doctr.datasets)": [[1, "doctr.datasets.OCRDataset"]], "sroie (class in doctr.datasets)": [[1, "doctr.datasets.SROIE"]], "visiondataset (class in doctr.datasets.datasets)": [[1, "doctr.datasets.datasets.VisionDataset"]], "encode_sequences() (in module doctr.datasets)": [[1, "doctr.datasets.encode_sequences"]], "artefact (class in doctr.documents)": [[2, "doctr.documents.Artefact"]], "block (class in doctr.documents)": [[2, "doctr.documents.Block"]], "document (class in doctr.documents)": [[2, "doctr.documents.Document"]], "documentfile (class in doctr.documents)": [[2, "doctr.documents.DocumentFile"]], "line (class in doctr.documents)": [[2, "doctr.documents.Line"]], "pdf (class in doctr.documents)": [[2, "doctr.documents.PDF"]], "page (class in doctr.documents)": [[2, "doctr.documents.Page"]], "word (class in doctr.documents)": [[2, "doctr.documents.Word"]], "as_images() (doctr.documents.pdf method)": [[2, "doctr.documents.PDF.as_images"]], "from_images() (doctr.documents.documentfile class method)": [[2, "doctr.documents.DocumentFile.from_images"]], "from_pdf() (doctr.documents.documentfile class method)": [[2, "doctr.documents.DocumentFile.from_pdf"]], "from_url() (doctr.documents.documentfile class method)": [[2, "doctr.documents.DocumentFile.from_url"]], "get_artefacts() (doctr.documents.pdf method)": [[2, "doctr.documents.PDF.get_artefacts"]], "get_words() (doctr.documents.pdf method)": [[2, "doctr.documents.PDF.get_words"]], "read_html() (in module doctr.documents)": [[2, "doctr.documents.read_html"]], "read_img() (in module doctr.documents)": [[2, "doctr.documents.read_img"]], "read_pdf() (in module doctr.documents)": [[2, "doctr.documents.read_pdf"]], "show() (doctr.documents.document method)": [[2, "doctr.documents.Document.show"]], "show() (doctr.documents.page method)": [[2, "doctr.documents.Page.show"]], "convert_to_fp16() (in module doctr.models.export)": [[5, "doctr.models.export.convert_to_fp16"]], "convert_to_tflite() (in module doctr.models.export)": [[5, "doctr.models.export.convert_to_tflite"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_vgg16_bn"]], "db_resnet50() (in module doctr.models.detection)": [[5, "doctr.models.detection.db_resnet50"]], "detection_predictor() (in module doctr.models.detection)": [[5, "doctr.models.detection.detection_predictor"]], "linknet16() (in module doctr.models.detection)": [[5, "doctr.models.detection.linknet16"]], "master() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.master"]], "ocr_predictor() (in module doctr.models.zoo)": [[5, "doctr.models.zoo.ocr_predictor"]], "quantize_model() (in module doctr.models.export)": [[5, "doctr.models.export.quantize_model"]], "recognition_predictor() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.recognition_predictor"]], "sar_resnet31() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.sar_resnet31"]], "sar_vgg16_bn() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.sar_vgg16_bn"]], "colorinversion (class in doctr.transforms)": [[6, "doctr.transforms.ColorInversion"]], "compose (class in doctr.transforms)": [[6, "doctr.transforms.Compose"]], "lambdatransformation (class in doctr.transforms)": [[6, "doctr.transforms.LambdaTransformation"]], "normalize (class in doctr.transforms)": [[6, "doctr.transforms.Normalize"]], "oneof (class in doctr.transforms)": [[6, "doctr.transforms.OneOf"]], "randomapply (class in doctr.transforms)": [[6, "doctr.transforms.RandomApply"]], "randombrightness (class in doctr.transforms)": [[6, "doctr.transforms.RandomBrightness"]], "randomcontrast (class in doctr.transforms)": [[6, "doctr.transforms.RandomContrast"]], "randomgamma (class in doctr.transforms)": [[6, "doctr.transforms.RandomGamma"]], "randomhue (class in doctr.transforms)": [[6, "doctr.transforms.RandomHue"]], "randomjpegquality (class in doctr.transforms)": [[6, "doctr.transforms.RandomJpegQuality"]], "randomsaturation (class in doctr.transforms)": [[6, "doctr.transforms.RandomSaturation"]], "resize (class in doctr.transforms)": [[6, "doctr.transforms.Resize"]], "togray (class in doctr.transforms)": [[6, "doctr.transforms.ToGray"]], "localizationconfusion (class in doctr.utils.metrics)": [[7, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[7, "doctr.utils.metrics.OCRMetric"]], "textmatch (class in doctr.utils.metrics)": [[7, "doctr.utils.metrics.TextMatch"]], "summary() (doctr.utils.metrics.localizationconfusion method)": [[7, "doctr.utils.metrics.LocalizationConfusion.summary"]], "summary() (doctr.utils.metrics.ocrmetric method)": [[7, "doctr.utils.metrics.OCRMetric.summary"]], "summary() (doctr.utils.metrics.textmatch method)": [[7, "doctr.utils.metrics.TextMatch.summary"]], "visualize_page() (in module doctr.utils.visualization)": [[7, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file diff --git a/v0.3.0/using_doctr/using_models.html b/v0.3.0/using_doctr/using_models.html index b2e6a5d739..b471cdc1f1 100644 --- a/v0.3.0/using_doctr/using_models.html +++ b/v0.3.0/using_doctr/using_models.html @@ -836,6 +836,17 @@

Two-stage approachesmodel = ocr_predictor('linknet_resnet18', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True)

+

To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying DocumentBuilder:

+
    +
  • resolve_lines: whether words should be automatically grouped into lines (default: True)

  • +
  • resolve_blocks: whether lines should be automatically grouped into blocks (default: True)

  • +
  • paragraph_break: relative length of the minimum space separating paragraphs (default: 0.035)

  • +
+

For example to disable the automatic grouping of lines into blocks:

+
from doctr.model import ocr_predictor
+model = ocr_predictor(pretrained=True, resolve_blocks=False)
+
+

What should I do with the output?#

@@ -859,6 +870,14 @@

What should I do with the output?) +

To get only the text content of the Document, you can use the render method:

+
text_output = result.render()
+
+
+

For reference, here is the output for the Document above:

+
No. RECEIPT DATE
+
+

You can also export them as a nested dict, more appropriate for JSON format:

json_output = result.export()
 
diff --git a/v0.3.1/_sources/using_doctr/using_models.rst.txt b/v0.3.1/_sources/using_doctr/using_models.rst.txt index 208e0956bb..27c087096a 100644 --- a/v0.3.1/_sources/using_doctr/using_models.rst.txt +++ b/v0.3.1/_sources/using_doctr/using_models.rst.txt @@ -279,6 +279,19 @@ For instance, this snippet instantiates an end-to-end ocr_predictor working with from doctr.model import ocr_predictor model = ocr_predictor('linknet_resnet18', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True) +To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying `DocumentBuilder`: + +* `resolve_lines`: whether words should be automatically grouped into lines (default: True) +* `resolve_blocks`: whether lines should be automatically grouped into blocks (default: True) +* `paragraph_break`: relative length of the minimum space separating paragraphs (default: 0.035) + +For example to disable the automatic grouping of lines into blocks: + +.. code:: python3 + + from doctr.model import ocr_predictor + model = ocr_predictor(pretrained=True, resolve_blocks=False) + What should I do with the output? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -304,6 +317,14 @@ Here is a typical `Document` layout:: )] ) +To get only the text content of the `Document`, you can use the `render` method:: + + text_output = result.render() + +For reference, here is the output for the `Document` above:: + + No. RECEIPT DATE + You can also export them as a nested dict, more appropriate for JSON format:: json_output = result.export() diff --git a/v0.3.1/searchindex.js b/v0.3.1/searchindex.js index 774e05f2ee..0241f23ad0 100644 --- a/v0.3.1/searchindex.js +++ b/v0.3.1/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["changelog", "datasets", "index", "installing", "io", "models", "transforms", "utils"], "filenames": ["changelog.rst", "datasets.rst", "index.rst", "installing.rst", "io.rst", "models.rst", "transforms.rst", "utils.rst"], "titles": ["Changelog", "doctr.datasets", "DocTR: Document Text Recognition", "Installation", "doctr.io", "doctr.models", "doctr.transforms", "doctr.utils"], "terms": {"releas": [0, 3], "note": 0, "we": [2, 3, 4, 5, 6], "member": [], "leader": [], "make": [5, 7], "particip": [], "commun": [], "harass": [], "free": [], "experi": 5, "everyon": [], "regardless": [], "ag": [], "bodi": [], "size": [1, 4, 5, 6], "visibl": [], "invis": [], "disabl": [], "ethnic": [], "sex": [], "characterist": [], "gender": [], "ident": [], "express": 6, "level": [5, 7], "educ": [], "socio": [], "econom": [], "statu": [], "nation": [], "person": [], "appear": [], "race": [], "religion": [], "sexual": [], "orient": [4, 5], "act": [], "interact": [4, 7], "wai": [1, 2, 5], "contribut": [], "an": [1, 2, 4, 5, 7], "open": [], "welcom": 2, "divers": [], "inclus": [], "healthi": [], "exampl": [1, 4, 5, 6, 7], "behavior": [], "posit": 7, "environ": [], "includ": [3, 5], "demonstr": [], "empathi": [], "kind": [], "toward": 3, "other": [], "peopl": [], "Being": [], "respect": [], "differ": [], "opinion": [], "viewpoint": [], "give": [], "gracefulli": [], "accept": [], "construct": [], "feedback": [], "apolog": [], "those": [3, 4, 5], "affect": [], "mistak": [], "learn": 5, "from": [1, 2, 4, 5, 6, 7], "focus": [], "what": [], "i": [1, 4, 5, 6, 7], "best": [], "just": 5, "u": 5, "individu": [], "overal": [], "unaccept": [], "The": [1, 4, 5, 7], "us": [1, 3, 7], "languag": [2, 4, 5], "imageri": [], "attent": [], "advanc": [], "ani": [1, 2, 4, 5, 6, 7], "troll": [], "insult": [], "derogatori": [], "comment": [], "polit": [], "attack": [], "public": 2, "privat": 5, "publish": [], "inform": [1, 2, 5], "physic": 4, "email": [], "address": 4, "without": 5, "explicit": [], "permiss": [], "which": 5, "could": [], "reason": [], "consid": [1, 4, 7], "inappropri": [], "profession": [], "set": [1, 5, 7], "ar": [1, 3, 4, 5, 6, 7], "clarifi": [], "take": 1, "appropri": 5, "fair": [], "action": [], "thei": [], "deem": [], "threaten": [], "offens": [], "harm": [], "have": [1, 5, 7], "right": [5, 7], "remov": [], "edit": [], "reject": [], "commit": [], "wiki": [], "issu": [], "align": 4, "thi": [3, 5, 7], "moder": [], "decis": [], "when": [], "appli": [1, 6], "within": [], "all": [1, 2, 4, 5, 6, 7], "space": [], "also": 5, "offici": [], "repres": [4, 5], "e": [3, 4], "mail": [], "post": 5, "via": 2, "social": [], "media": [], "account": [], "appoint": [], "onlin": [], "offlin": [], "event": [], "instanc": 5, "abus": [], "otherwis": 7, "mai": [], "report": [], "contact": [], "minde": 3, "com": [3, 4], "complaint": [], "review": [], "investig": [], "promptli": [], "fairli": [], "oblig": [], "privaci": [], "secur": [], "incid": [], "follow": [1, 3, 5, 6, 7], "impact": [], "determin": [], "consequ": [], "violat": [], "unprofession": [], "unwelcom": [], "A": [1, 2, 4, 5], "written": 4, "provid": [2, 5], "clariti": [], "around": 5, "natur": 2, "explan": 5, "why": [], "wa": [], "apologi": [], "request": [], "through": [1, 6], "singl": [], "seri": [], "continu": [], "No": 5, "involv": 5, "unsolicit": [], "specifi": [1, 4], "period": [], "time": [1, 5, 7], "avoid": 3, "well": [], "extern": [], "channel": [4, 5, 6], "like": [], "term": [], "lead": [], "seriou": [], "sustain": [], "sort": [], "allow": [], "dure": [], "pattern": [], "aggress": [], "disparag": [], "class": [1, 4, 6, 7], "adapt": [], "version": 5, "0": [1, 2, 5, 6, 7], "avail": [2, 5, 6], "http": [3, 4, 5], "www": 4, "org": 5, "_": [1, 5], "html": [], "were": 4, "inspir": 6, "mozilla": [], "": [4, 7], "ladder": [], "For": [3, 5], "answer": [], "common": [6, 7], "question": [], "about": 5, "see": [], "faq": [], "translat": [], "everyth": [], "you": [3, 5], "need": [3, 7], "know": [], "effici": [1, 5], "project": [], "packag": 7, "python": 2, "doc": [4, 5], "librari": 3, "build": 3, "script": [], "refer": [3, 5], "train": [1, 5, 6], "demo": [], "small": 2, "app": [], "showcas": [], "capabl": 5, "api": [], "minim": [], "templat": 4, "deploi": [], "rest": [6, 7], "ensur": [], "proper": [], "mainten": [], "github": 3, "worklow": [], "run": 3, "job": [], "coverag": [], "codecov": [], "back": [], "result": [4, 5], "As": [], "contributor": [], "onli": [6, 7], "your": [1, 4, 5, 7], "ad": 6, "whether": [1, 4, 6, 7], "encount": [], "problem": [], "suggest": [], "input": [4, 5, 6], "ha": [1, 7], "valu": [4, 5, 6], "can": [1, 3, 5], "purpos": [], "advis": [], "first": [], "check": 5, "topic": [], "wasn": [], "t": 1, "alreadi": [], "cover": [], "close": [], "If": [3, 4, 5], "feel": [], "new": [], "one": [1, 5, 6], "do": 3, "so": [1, 3], "whenev": [], "possibl": 7, "enough": 5, "jump": [], "wonder": [], "how": [], "someth": [], "more": 5, "gener": 1, "should": [1, 4, 6, 7], "out": [5, 6, 7], "discuss": [], "q": [], "forum": [], "specif": [1, 3, 5, 7], "stackoverflow": [], "addit": [], "depend": [2, 3], "command": [], "m": [5, 7], "pip": 3, "upgrad": [], "dev": [], "pre": [], "docstr": [], "In": [1, 5], "pleas": [], "googl": [], "eas": [], "process": [2, 4], "later": [], "messag": [], "udac": [], "guid": [], "order": [1, 4, 5], "same": [4, 5, 7], "ci": [], "workflow": [], "unittest": [], "local": [1, 2, 5, 7], "To": [3, 5], "togeth": [4, 5], "current": [], "built": [], "sphinx": [], "thank": [], "our": 5, "file": [1, 2], "been": [5, 7], "rebuilt": [], "want": [], "forc": [], "complet": [], "rebuild": [], "delet": [], "_build": [], "directori": [], "addition": 5, "clear": [], "web": 4, "browser": [], "cach": 1, "modif": [], "now": 2, "locat": 4, "index": 4, "wish": [], "somewher": [], "els": [], "than": [3, 7], "join": [], "slack": [], "where": [4, 7], "find": 3, "requir": [3, 6], "3": [2, 3, 4, 5, 6, 7], "8": [5, 6], "higher": 3, "whichev": 3, "o": 3, "least": 3, "tensorflow": [2, 3, 4, 5, 6], "pytorch": [2, 3], "correspond": [3, 5], "page": [3, 5, 7], "2": [2, 5, 6], "macbook": [], "m1": [], "chip": [], "some": 3, "metal": [], "plugin": [], "1": [1, 2, 5, 6, 7], "12": 5, "anoth": [1, 3, 5], "linux": 3, "few": 3, "extra": 3, "maco": 3, "user": [2, 3, 4], "them": [1, 3, 5], "homebrew": [], "brew": 3, "cairo": 3, "pango": 3, "gdk": 3, "pixbuf": 3, "libffi": 3, "window": [3, 7], "gtk": 3, "latest": 3, "over": [1, 3, 7], "here": [1, 3, 5, 6], "last": [1, 3, 5], "stabl": 3, "doctr": 3, "strive": 3, "reduc": [3, 6], "framework": [1, 3], "minimum": [3, 7], "necessari": 3, "featur": [3, 5, 7], "develop": 3, "third": 3, "parti": 3, "miss": 3, "tf": [3, 4, 5, 6], "torch": 3, "mode": 3, "clone": 3, "state": 2, "art": 2, "optic": [2, 5], "charact": [1, 2, 4, 5, 7], "made": 2, "seamless": 2, "access": [1, 2, 4], "anyon": 2, "power": 2, "easi": [2, 7], "extract": [1, 2], "valuabl": 2, "autom": 2, "seamlessli": [], "understand": [1, 2, 5], "task": [1, 2, 5], "ocr": [1, 2, 7], "predictor": [], "pars": [1, 2], "textual": [1, 2, 4], "identifi": [2, 5], "each": [1, 2, 4, 5, 6, 7], "word": [2, 5, 7], "research": 2, "quickli": 2, "compar": 2, "own": 2, "architectur": [2, 5], "speed": [2, 5], "perform": [2, 4, 5, 6, 7], "robust": 2, "stage": 2, "pretrain": [2, 5, 7], "paramet": [1, 2, 4, 5, 6, 7], "friendli": 2, "line": [2, 5, 7], "code": [2, 3, 4], "load": [2, 5], "googlevis": 2, "aw": [2, 5], "textract": [2, 5], "optim": 2, "infer": [2, 6], "both": [2, 5, 6], "cpu": [2, 5], "gpu": 2, "light": 2, "activ": [], "maintain": 2, "integr": 2, "deploy": [], "dbnet": [2, 5], "real": [5, 6], "scene": [2, 5], "differenti": [2, 5], "binar": [2, 5], "linknet": [2, 5], "exploit": 5, "encod": [1, 4, 5], "represent": 5, "semant": 5, "segment": 5, "sar": [2, 5], "show": [2, 4, 5, 7], "attend": [2, 5], "read": [2, 5], "simpl": 5, "strong": 5, "baselin": 5, "irregular": 5, "crnn": [2, 5], "end": [1, 2, 7], "trainabl": 5, "neural": [2, 5], "network": [2, 5], "imag": [1, 4, 5, 6, 7], "base": 5, "sequenc": [1, 4, 5, 7], "Its": 5, "applic": 5, "master": [2, 5], "multi": 2, "aspect": [2, 6], "non": [2, 4, 6, 7], "vitstr": [], "vision": [], "transform": [1, 2], "fast": 1, "parseq": [], "permut": [], "autoregress": [], "funsd": [1, 2, 5], "form": [1, 2, 5], "noisi": [1, 2], "scan": [1, 2], "cord": [1, 2, 5], "consolid": [1, 2], "receipt": [1, 2, 5], "forpost": [1, 2], "sroie": [1, 2], "icdar": 2, "2019": 2, "iiit": [], "5k": [], "cvit": [], "street": [], "view": [], "synthtext": [], "visual": 2, "geometri": [4, 5], "group": [], "svhn": [], "digit": 1, "unsupervis": [], "ic03": [], "2003": [], "ic13": [], "2013": [], "imgur5k": [], "textstylebrush": [], "transfer": [], "aesthet": [], "mjsynth": [], "synthet": [], "data": [2, 4, 5, 6, 7], "artifici": [], "iiithw": [], "wildreceipt": [], "spatial": 4, "dual": [], "modal": [], "graph": 4, "kei": [], "bool": [1, 4, 5, 6, 7], "true": [1, 4, 5, 6, 7], "use_polygon": [], "fals": [1, 5, 6, 7], "recognition_task": [], "kwarg": [1, 4, 5, 7], "sourc": [1, 4, 5, 6, 7], "document": [1, 5, 7], "import": [1, 4, 5, 6, 7], "train_set": 1, "download": 1, "img": [1, 6], "target": [1, 4, 5, 6], "subset": [1, 5], "polygon": 1, "rotat": [1, 4, 6], "bound": [1, 4, 6, 7], "box": [1, 4, 7], "instead": [1, 4], "straight": 1, "ones": 1, "recognit": 7, "keyword": [1, 4], "argument": [1, 4], "visiondataset": 1, "icdar2019": 1, "competit": 1, "iiit5k": [], "bmvc": [], "2012": [], "text": [4, 7], "prior": [], "svt": [], "ucsd": [], "comput": [5, 7], "hous": [], "number": [1, 6, 7], "localis": [], "repositori": [], "websit": [], "entri": [], "futur": [], "direct": [], "img_fold": 1, "str": [1, 4, 5, 6, 7], "label_fold": [], "label": [1, 7], "part": 6, "challeng": [], "task2": [], "2015": [], "path": [1, 4, 5], "challenge2_training_task12_imag": [], "challenge2_training_task1_gt": [], "test_set": [], "challenge2_test_task12_imag": [], "challenge2_test_task1_gt": [], "folder": [1, 5], "annot": 4, "abstractdataset": [], "label_path": [], "handwrit": [], "dataset_info": [], "imgur5k_annot": [], "json": 5, "pure": [], "mnt": [], "ramdisk": [], "max": 7, "90kdict32px": [], "imlist": [], "txt": [], "hw": [], "images_90k_norm": [], "90k": [], "docartefact": [], "object": [1, 5], "detect": [], "element": [1, 4, 5], "varieti": [], "arxiv": 5, "ab": [], "2103": [], "14470v1": [], "test": [], "charactergener": 1, "implement": [1, 4, 5, 6, 7], "d": 1, "abdef": 1, "num_sampl": 1, "100": [5, 6, 7], "vocabulari": 1, "sampl": 1, "iter": 1, "cache_sampl": 1, "firsthand": 1, "font_famili": [], "font": [], "img_transform": [], "compos": [1, 2, 5], "sample_transform": 1, "wordgener": [], "min_char": [], "int": [1, 4, 5, 6, 7], "max_char": [], "list": [1, 4, 6], "none": [1, 4, 5, 7], "callabl": [1, 6], "tupl": [4, 5, 6, 7], "32": [1, 5, 6], "maximum": [1, 6], "detectiondataset": [], "recognitiondataset": [], "labels_path": [], "contain": [], "ocrdataset": 1, "label_fil": 1, "jpg": [1, 4], "root": 1, "shuffl": 1, "batch_siz": 1, "drop_last": 1, "num_work": [], "collate_fn": 1, "wrapper": [1, 6], "train_load": 1, "train_it": 1, "next": 1, "befor": [1, 6], "pass": [1, 5], "batch": [1, 5, 6], "drop": 1, "isn": 1, "full": [1, 5, 7], "worker": 1, "function": [5, 6, 7], "merg": [], "sinc": 1, "content": [1, 4], "properli": 1, "model": [1, 7], "interpret": [1, 4], "multipl": [1, 4, 6], "name": [1, 5], "10": [1, 5, 7], "0123456789": 1, "hindi_digit": [], "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "ascii_lett": 1, "52": [1, 5], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "punctuat": 1, "currenc": 1, "5": [1, 6, 7], "ancient_greek": [], "48": 5, "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": [], "arabic_lett": [], "37": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": [], "persian_lett": [], "\u067e\u0686\u06a2\u06a4\u06af": [], "arabic_diacrit": [], "arabic_punctu": [], "latin": 1, "94": 5, "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "english": [], "legacy_french": [], "123": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": 1, "french": [1, 5], "126": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": [], "portugues": [], "131": [], "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": [], "spanish": [], "116": [], "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": [], "italian": [], "120": [], "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": [], "german": [], "108": [], "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": [], "arab": [], "101": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": [], "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "czech": [], "130": [], "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": [], "polish": [], "118": [], "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": [], "dutch": [], "114": [], "norwegian": [], "106": [], "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": [], "danish": [], "finnish": [], "104": [], "\u00e4\u00f6\u00e4\u00f6": [], "swedish": [], "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": [], "vietnames": [], "234": [], "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": [], "hebrew": [], "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": [], "multilingu": [], "195": [], "encode_sequ": 1, "target_s": 1, "eo": 1, "pad": [1, 5, 6], "dynamic_seq_length": 1, "ndarrai": [1, 4, 7], "given": [1, 4, 5, 7], "map": 1, "n": [1, 5, 7], "length": 1, "Of": 1, "string": [1, 4, 5, 7], "option": 1, "start": 1, "case": [1, 7], "upper": [1, 6], "enabl": [1, 4], "dynam": 1, "tensor": [1, 4, 5, 6], "modul": [4, 5, 6, 7], "easili": [4, 5, 7], "export": [2, 4, 7], "analysi": [4, 5], "format": [4, 5], "organ": 4, "uninterrupt": 4, "confid": [4, 5], "float": [4, 6, 7], "associ": 4, "predict": [4, 7], "xmin": 4, "ymin": 4, "xmax": 4, "ymax": 4, "coordin": 4, "rel": 4, "collect": 4, "meant": 4, "two": 4, "column": 4, "horizont": 4, "resolv": 4, "default": [4, 5], "smallest": 4, "enclos": 4, "g": 4, "qr": 4, "pictur": 4, "chart": 4, "signatur": 4, "logo": 4, "etc": 4, "artefact_typ": 4, "type": [4, 5], "sever": [4, 6], "its": [1, 4, 5, 7], "titl": 4, "underneath": 4, "page_idx": [4, 5], "dimens": [4, 5, 7], "dict": [4, 5, 7], "numpi": [4, 5, 7], "arrai": [4, 7], "uint8": [4, 5, 7], "raw": [4, 7], "pixel": [4, 6], "height": 4, "width": 4, "dictionari": [4, 7], "angl": [4, 6], "degress": 4, "preserve_aspect_ratio": 6, "overlai": 4, "displai": [4, 7], "matplotlib": 7, "pyplot": 7, "method": 6, "high": 4, "convers": 4, "read_pdf": 4, "byte": [4, 5], "scale": 7, "rgb_mode": [], "password": [], "pdf": [4, 5], "convert": [4, 5, 6], "render": [], "72dpi": [], "output": [4, 6], "rgb": [4, 6], "bgr": 4, "unlock": [], "encrypt": [], "pypdfium2": [], "pdfpage": [], "decod": 4, "shape": [4, 5, 6, 7], "h": 4, "x": [4, 6, 7], "w": [4, 7], "c": [], "read_img_as_numpi": 4, "output_s": [4, 6], "rgb_output": 4, "expect": [4, 5, 6], "read_img_as_tensor": 4, "img_path": 4, "dtype": [4, 5], "float32": [4, 5], "desir": 4, "relat": 4, "divid": 4, "255": [4, 5, 6, 7], "decode_img_as_tensor": 4, "img_cont": 4, "stream": 4, "read_html": 4, "url": [1, 4], "yoursit": 4, "weasyprint": [], "documentfil": 4, "extens": 4, "classmethod": 4, "from_pdf": 4, "binari": 4, "from_url": 4, "from_imag": 4, "page1": 4, "png": 4, "page2": 4, "vgg16_bn_r": [], "vgg": 5, "16": 5, "describ": 5, "veri": [], "deep": 5, "convolut": 2, "larg": 5, "modifi": [], "normal": [5, 6], "rectangular": [], "pool": [], "simpler": [], "head": [], "input_tensor": 5, "random": [5, 6, 7], "uniform": [5, 6], "512": [], "maxval": [5, 6], "imagenet": [], "extractor": 5, "resnet18": [], "resnet": 5, "18": 2, "residu": [], "boolean": [], "resnet34": [], "34": [], "resnet50": [], "50": 5, "resnet31": [], "downsiz": [], "4": [], "mobilenet_v3_smal": [], "mobilenetv3": [], "search": [], "kera": 5, "mobilenet_v3_larg": [], "mobilenet_v3_small_r": [], "mobilenet_v3_large_r": [], "mobilenet_v3_small_orient": [], "magc_resnet31": [], "global": [], "context": [], "224": [5, 6], "vit_": [], "visiontransform": [], "worth": [], "16x16": [], "patch": [], "unoffici": [], "config": [], "vit_b": [], "b": 7, "textnet_tini": [], "textnet": [], "faster": [], "arbitrarili": [], "detector": [], "minimalist": [], "kernel": [], "czczup": [], "tini": [], "textnet_smal": [], "textnet_bas": [], "crop_orientation_predictor": [], "arch": 5, "croporientationpredictor": [], "np": [5, 7], "classif_mobilenet_v3_smal": [], "input_crop": [], "rand": [5, 7], "600": [5, 7], "800": [5, 7], "astyp": [5, 7], "crop": 5, "dataset": 5, "linknet_resnet18": [], "1024": [5, 7], "linknet_resnet34": [], "linknet_resnet50": [], "db_resnet50": 5, "backbon": 5, "db_mobilenet_v3_larg": 5, "mobilenet": 5, "v3": 5, "detection_predictor": 5, "assume_straight_pag": [], "detectionpredictor": 5, "input_pag": [5, 7], "itself": [], "fit": [], "crnn_vgg16_bn": 5, "128": 5, "crnn_mobilenet_v3_smal": [], "crnn_mobilenet_v3_larg": 5, "sar_resnet31": 5, "31": 5, "64": [5, 6], "256": 5, "paper": 5, "1910": 5, "02562": 5, "keywoard": [], "vitstr_smal": [], "vitstr_bas": [], "recognition_predictor": 5, "recognitionpredictor": 5, "ocr_predictor": 5, "det_arch": 5, "reco_arch": 5, "pretrained_backbon": [], "symmetric_pad": 6, "export_as_straight_box": [], "detect_orient": [], "straighten_pag": [], "detect_languag": [], "ocrpredictor": 5, "up": 5, "assum": [], "preserv": 6, "ratio": 6, "symmetr": 6, "bottom": [], "final": [], "potenti": 5, "estim": [], "slightli": [], "deterior": [], "latenc": [], "median": [], "Then": 5, "again": 3, "improv": [], "kie_predictor": [], "kiepredictor": [], "kie": [], "login_to_hub": [], "login": [], "huggingfac": [], "hub": [], "from_hub": [], "repo_id": [], "instanti": 5, "hf": [], "fasterrcnn_mobilenet_v3_large_fpn": [], "repo": [], "hf_hub_download": [], "snapshot_download": [], "checkpoint": [], "push_to_hf_hub": [], "model_nam": [], "save": [1, 5], "configur": [], "my": [], "procedur": 6, "draw": 6, "design": 6, "torchvis": 6, "resiz": [5, 6], "bilinear": [5, 6], "transfo": 6, "minval": 6, "interpol": [5, 6], "zero": [5, 6], "while": 6, "done": 6, "mean": [6, 7], "std": 6, "gaussian": 6, "distribut": 6, "485": 6, "456": 6, "406": 6, "229": 6, "225": 6, "averag": [5, 6], "per": [5, 6], "standard": 6, "deviat": 6, "lambdatransform": 6, "fn": 6, "lambda": 6, "tograi": 6, "num_output_channel": [], "grayscal": 6, "colorinvers": 6, "min_val": 6, "tranform": 6, "color": 6, "shift": 6, "randomli": 6, "invert": 6, "6": [3, 5, 6], "rang": 6, "randombright": 6, "max_delta": 6, "adjust": 6, "bright": 6, "delta": 6, "offset": 6, "add": [6, 7], "pick": 6, "p": 6, "probabl": 6, "randomcontrast": 6, "contrast": 6, "contrast_factor": 6, "factor": 6, "randomsatur": 6, "satur": 6, "hsv": 6, "increas": 6, "randomhu": 6, "hue": 6, "randomgamma": 6, "min_gamma": 6, "max_gamma": 6, "min_gain": 6, "max_gain": 6, "gamma": 6, "correct": 6, "neg": 6, "lower": [6, 7], "param": [5, 6], "constant": 6, "multipli": 6, "randomjpegqu": 6, "min_qual": 6, "60": 6, "max_qual": 6, "jpeg": 6, "qualiti": 6, "dimension": 6, "between": [6, 7], "randomrot": 6, "max_angl": 6, "expand": 6, "degre": 6, "uniformli": 6, "randomcrop": [], "08": [], "75": 5, "33": [], "min_area": [], "max_area": [], "min_ratio": [], "max_ratio": [], "gaussianblur": [], "kernel_shap": [], "blur": [], "min": [], "channelshuffl": [], "gaussiannois": [], "nois": [], "randomhorizontalflip": [], "flip": [], "int64": [], "randomshadow": [], "opacity_rang": [], "shade": [], "opac": [], "It": 6, "consecut": [5, 6], "sequenti": [5, 6], "oneof": 6, "jpegqual": 6, "randomappli": 6, "regroup": 7, "core": 7, "complementari": 7, "sens": 7, "visualize_pag": 7, "words_onli": 7, "display_artefact": 7, "add_label": 7, "figur": 7, "block": [5, 7], "plt": 7, "ocr_db_crnn": 7, "artefact": [5, 7], "figsiz": 7, "largest": 7, "side": 7, "plot": 7, "static": 7, "top": 7, "synthesize_pag": [], "draw_proba": [], "respons": [], "blank": [], "blue": [], "red": [], "font_siz": [], "13": 5, "famili": [], "synthes": [], "metric": [5, 7], "assess": 7, "textmatch": 7, "match": [2, 7], "accuraci": 7, "aggreg": [1, 7], "foral": 7, "y": 7, "mathcal": 7, "frac": 7, "sum": 7, "limits_": 7, "f_": 7, "y_i": 7, "x_i": 7, "indic": 7, "defin": 7, "f_a": 7, "left": 7, "begin": 7, "ll": 7, "mbox": 7, "strictli": 7, "integ": 7, "updat": 7, "hello": 7, "world": 7, "summari": 7, "gt": [], "pred": [], "groung": [], "truth": 7, "exact": 7, "score": 7, "counterpart": 7, "unidecod": 7, "localizationconfus": 7, "iou_thresh": 7, "mask_shap": 7, "use_broadcast": [], "confus": 7, "iou": 7, "recal": [5, 7], "g_": 7, "precis": [5, 7], "meaniou": 7, "j": 7, "y_j": 7, "being": [5, 7], "intersect": 7, "union": 7, "g_x": 7, "assign": 7, "_i": 7, "geq": 7, "ground": 7, "asarrai": 7, "70": [5, 7], "110": 7, "95": 7, "200": 7, "150": 7, "pair": 7, "broadcast": [], "consum": [], "memori": [], "either": 5, "ocrmetr": 7, "l": 7, "hat": 7, "h_": 7, "b_j": 7, "l_j": 7, "gt_box": [], "pred_box": [], "gt_label": [], "pred_label": [], "comparison": 7, "detectionmetr": [], "c_j": [], "compil": [], "better": 5, "leverag": [], "descript": [], "colab": [], "quicktour": [], "present": [], "main": [], "produc": 5, "searchabl": [], "don": [], "meet": [], "detail": [], "link": [], "section": 5, "det_model": [], "load_weight": [], "path_to_checkpoint": [], "weight": [], "reco_model": [], "det_param": [], "path_to_pt": [], "map_loc": [], "load_state_dict": [], "reco_param": [], "vocab": [2, 5], "class_nam": [], "total": [], "date": 5, "preprocessor": 5, "det_predictor": [], "798": [], "785": [], "772": [], "264": [], "2749": [], "287": [], "reco_predictor": [], "694": [], "695": [], "693": [], "299": [], "296": [], "301": [], "polici": [], "restrict": [], "write": [], "outsid": [], "tmp": [], "work": [], "step": [], "usag": 5, "multiprocess": [], "doctr_multiprocessing_dis": [], "variabl": [], "becaus": [], "shm": [], "share": [], "chang": [], "By": [], "doctr_cache_dir": [], "focu": [], "love": [], "appreci": [], "interfac": [], "io": 2, "custom": [], "felix92": [], "db": [], "vgg16": 5, "bn": [], "plug": [], "obj_detect": [], "exist": [], "overwritten": [], "prerequisit": 2, "creat": [], "co": [], "instal": 2, "git": 2, "lf": [], "my_awesome_model": [], "v1": [], "directli": 5, "after": [], "python3": [], "train_tensorflow": [], "py": [], "train_pytorch": [], "tabl": [], "pull": [], "dummi": [], "tilman": [], "rassi": [], "fascan": [], "evalu": [1, 2, 5], "predefin": 1, "prefer": 1, "signific": 1, "valid": [], "149": [], "626": [], "360": [], "2000": [], "3000": [], "249": [], "33402": [], "13068": [], "772875": [], "85875": [], "246": [], "233": [], "resourc": [], "7149": [], "796": [], "handwritten": [], "1268": [], "472": [], "21888": [], "8707": [], "33608": [], "19342": [], "uppercas": [], "19370": [], "2186": [], "257": [], "647": [], "73257": [], "26032": [], "7100000": [], "707470": [], "1156": [], "1107": [], "849": [], "1095": [], "207901": [], "22672": [], "7581382": [], "1337891": [], "7141797": [], "793533": [], "49377": [], "19598": [], "alwai": [], "regular": [], "2700": [], "300": [], "background": [], "qr_code": [], "bar_cod": [], "photo": [], "classif": [], "mani": [], "sensit": [], "abl": [], "howev": [], "guidanc": [], "tool": [], "further": [], "anot": [], "handl": 1, "underli": 1, "defer": 1, "dataload": 1, "good": [], "achiev": [], "might": 5, "tune": 2, "thing": [], "product": [], "readi": [], "help": [], "support": 5, "devic": [], "fp16": [1, 5], "point": [], "occupi": [], "bit": [], "advantag": [], "less": [], "mixed_precis": [], "set_global_polici": [], "mixed_float16": [], "cuda": [], "re": [], "exchang": [], "interoper": [], "machin": [], "structur": [2, 5], "layer": [], "metadata": [], "util": [2, 5], "export_model_to_onnx": [], "input_shap": 5, "dummy_input": [], "tensorspec": [], "model_path": [], "come": [], "soon": [], "seen": 5, "onc": 5, "separ": 5, "compon": 5, "charg": 5, "usabl": 5, "backend": 5, "along": 5, "processor": 5, "reusabl": 5, "consist": [], "delimit": [], "2d": [], "corner": [], "flag": [], "belong": [], "skew": [], "comprehens": [], "benchmark": [], "publicli": [], "sec": [], "25": [5, 6], "84": 5, "39": 5, "85": 5, "86": 5, "93": 5, "83": 5, "24": [], "80": 5, "29": 5, "90": 5, "67": 5, "76": 5, "11": [2, 5], "81": 5, "71": 5, "7": 5, "21": 5, "82": 5, "20": 5, "49": 5, "87": 5, "63": 5, "17": 5, "28": [2, 5], "51": 5, "46": 5, "db_resnet34": [], "22": [], "89": 5, "74": [], "56": 5, "68": 5, "92": 5, "61": 5, "41": 5, "00": 5, "79": 5, "38": [], "88": [], "62": 5, "26": 5, "06": [], "78": 5, "47": 5, "54": [], "abov": 5, "cf": 5, "disclaim": 5, "combin": 5, "199": 5, "second": 5, "warmup": [], "phase": [], "measur": 5, "1000": 5, "obtain": [], "11th": [], "gen": [], "intel": [], "r": [], "tm": [], "i7": [], "11800h": [], "30ghz": [], "wrap": [], "useabl": [], "favorit": [], "dummy_img": [], "area": [], "send": [], "snippet": [], "transcrib": [], "partial": [], "15": 5, "9": [], "73": 5, "44": [], "14": 5, "55": [], "58": [], "57": 5, "66": 5, "01": [], "98": 5, "23": 5, "69": 5, "99": 5, "91": 5, "05": 2, "09": [], "96": [1, 5], "40": [], "53": 5, "most": 5, "print": [], "cfg": [], "30595": 5, "45": [], "72": 5, "43": 5, "65": 5, "77": 5, "30": 5, "07": [2, 5], "27": 5, "gvision": 5, "59": 5, "03": [2, 5], "azur": [], "recogn": [], "42": 5, "go": [], "mention": [], "still": [], "return": [1, 4, 5, 7], "nest": 5, "get": [4, 5], "typic": 5, "layout": 5, "340": 5, "json_output": 5, "1357421875": 5, "0361328125": 5, "8564453125": 5, "8603515625": 5, "914085328578949": 5, "5478515625": 5, "06640625": 5, "5810546875": 5, "0966796875": 5, "9949972033500671": 5, "51171875": 5, "1630859375": 5, "9578408598899841": 5, "1396484375": 5, "3232421875": 5, "185546875": 5, "3515625": 5, "outpout": [], "xml": [], "hocr": [], "export_as_xml": [], "xml_output": [], "xml_bytes_str": [], "xml_element": [], "utf": [], "xmln": [], "w3": [], "1999": [], "xhtml": [], "lang": [], "en": [], "meta": [], "equiv": [], "charset": [], "system": [], "ocr_pag": [], "ocr_carea": [], "ocr_par": [], "ocr_lin": [], "ocrx_word": [], "div": [], "id": 5, "page_1": [], "bbox": [], "3456": [], "ppageno": [], "block_1_1": [], "857": [], "529": [], "2504": [], "2710": [], "par_1_1": [], "span": [], "line_1_1": [], "x_size": [], "x_descend": [], "x_ascend": [], "word_1_1": [], "1552": [], "540": [], "1778": [], "580": [], "x_wconf": [], "word_1_2": [], "1782": [], "1900": [], "583": [], "word_1_3": [], "1420": [], "597": [], "1684": [], "641": [], "threshold": [], "region": [], "accur": [], "postprocessor": [], "bin_thresh": [], "box_thresh": [], "hook": [], "manipul": [], "customhook": [], "def": [], "__call__": [], "self": [], "loc_pr": [], "Be": [], "awar": [], "my_hook": [], "middl": [], "pipelin": [], "add_hook": [], "execut": [], "file_path": [], "read_img": 4, "seemlessli": 2, "conda": [], "newer": [], "developp": 3, "fp": 5, "scheme": 5, "deform": 5, "statist": 5, "turn": [], "easier": 5, "let": 5, "db_resnet50_predictor": [], "sar_vgg16_bn": 5, "rnn": [], "enhanc": [], "symbol": [], "crnn_vgg16_bn_predictor": [], "sar_vgg16_bn_predictor": [], "16bn": [], "convert_to_tflit": 5, "tf_model": 5, "tflite": 5, "conv_sequ": 5, "relu": 5, "kernel_s": 5, "serialized_model": 5, "convert_to_fp16": 5, "half": 5, "serial": 5, "quantize_model": 5, "quantiz": 5, "exclud": 5, "inherit": [1, 5], "abstract": 1, "verifi": 1, "file_nam": 1, "file_hash": 1, "extract_arch": 1, "overwrit": 1, "sha256": [], "archiv": [], "disk": [], "775": [], "856": [], "860": [], "862": [], "863": [], "sar_resnet31_predictor": [], "ocr_db_crnn_vgg": [], "652": [], "721": [], "ocr_db_sar_vgg": [], "653": [], "ocr_db_sar_resnet": [], "665": [], "735": [], "595": [], "625": [], "781": [], "830": [], "exactmatch": [], "ignore_cas": [], "ignore_acc": [], "ignor": [], "letter": [], "accent": [], "error": [], "max_dist": [], "levenshtein": [], "distanc": [], "autoclass": [], "loader": 1, "154": 1, "as_imag": 4, "convert_page_to_numpi": 4, "get_word": 4, "fitz": 4, "gettextword": 4, "get_artefact": 4, "entir": 4, "fulli": [], "daili": 2, "mix": 2, "fine": 2, "scratch": 2, "special": 2, "recurr": 2, "733": [], "817": [], "745": [], "875": [], "frame": 5, "feed": 5, "warm": 5, "c5": 5, "x12larg": 5, "xeon": 5, "platinum": 5, "8275l": 5, "913": [], "917": [], "921": [], "crnn_resnet31": 5, "629": [], "701": [], "664": [], "780": [], "630": [], "702": [], "666": [], "783": [], "640": [], "713": [], "672": [], "789": [], "na": [], "753": [], "700": [], "533": [], "689": [], "611": [], "660": [], "db_sar_vgg": 5, "db_sar_resnet": 5, "db_crnn_vgg": 5, "db_crnn_resnet": 5, "properti": 5, "input_t": 5, "saved_model": 5, "And": 5, "nestedobject": [], "changelog": 2, "v0": 2, "2021": 2, "8m": 5, "02": [2, 5], "5m": 5, "1m": 5, "19": [], "invoic": 5, "flexibl": 7, "rotated_bbox": [1, 7], "beta": 2, "linknet16": 5, "160": 5, "arg": 1, "bash": 3, "tax": 5, "35": 5}, "objects": {"doctr.datasets": [[1, 0, 1, "", "CORD"], [1, 0, 1, "", "CharacterGenerator"], [1, 0, 1, "", "FUNSD"], [1, 0, 1, "", "OCRDataset"], [1, 0, 1, "", "SROIE"], [1, 1, 1, "", "encode_sequences"]], "doctr.datasets.datasets": [[1, 0, 1, "", "VisionDataset"]], "doctr.datasets.loader": [[1, 0, 1, "", "DataLoader"]], "doctr.io": [[4, 0, 1, "", "Artefact"], [4, 0, 1, "", "Block"], [4, 0, 1, "", "Document"], [4, 0, 1, "", "DocumentFile"], [4, 0, 1, "", "Line"], [4, 0, 1, "", "PDF"], [4, 0, 1, "", "Page"], [4, 0, 1, "", "Word"], [4, 1, 1, "", "decode_img_as_tensor"], [4, 1, 1, "", "read_html"], [4, 1, 1, "", "read_img_as_numpy"], [4, 1, 1, "", "read_img_as_tensor"], [4, 1, 1, "", "read_pdf"]], "doctr.io.Document": [[4, 2, 1, "", "show"]], "doctr.io.DocumentFile": [[4, 2, 1, "", "from_images"], [4, 2, 1, "", "from_pdf"], [4, 2, 1, "", "from_url"]], "doctr.io.PDF": [[4, 2, 1, "", "as_images"], [4, 2, 1, "", "get_artefacts"], [4, 2, 1, "", "get_words"]], "doctr.io.Page": [[4, 2, 1, "", "show"]], "doctr.models.detection": [[5, 1, 1, "", "db_mobilenet_v3_large"], [5, 1, 1, "", "db_resnet50"], [5, 1, 1, "", "detection_predictor"], [5, 1, 1, "", "linknet16"]], "doctr.models.export": [[5, 1, 1, "", "convert_to_fp16"], [5, 1, 1, "", "convert_to_tflite"], [5, 1, 1, "", "quantize_model"]], "doctr.models.recognition": [[5, 1, 1, "", "crnn_mobilenet_v3_large"], [5, 1, 1, "", "crnn_vgg16_bn"], [5, 1, 1, "", "master"], [5, 1, 1, "", "recognition_predictor"], [5, 1, 1, "", "sar_resnet31"], [5, 1, 1, "", "sar_vgg16_bn"]], "doctr.models.zoo": [[5, 1, 1, "", "ocr_predictor"]], "doctr.transforms": [[6, 0, 1, "", "ColorInversion"], [6, 0, 1, "", "Compose"], [6, 0, 1, "", "LambdaTransformation"], [6, 0, 1, "", "Normalize"], [6, 0, 1, "", "OneOf"], [6, 0, 1, "", "RandomApply"], [6, 0, 1, "", "RandomBrightness"], [6, 0, 1, "", "RandomContrast"], [6, 0, 1, "", "RandomGamma"], [6, 0, 1, "", "RandomHue"], [6, 0, 1, "", "RandomJpegQuality"], [6, 0, 1, "", "RandomRotate"], [6, 0, 1, "", "RandomSaturation"], [6, 0, 1, "", "Resize"], [6, 0, 1, "", "ToGray"]], "doctr.utils.metrics": [[7, 0, 1, "", "LocalizationConfusion"], [7, 0, 1, "", "OCRMetric"], [7, 0, 1, "", "TextMatch"]], "doctr.utils.metrics.LocalizationConfusion": [[7, 2, 1, "", "summary"]], "doctr.utils.metrics.OCRMetric": [[7, 2, 1, "", "summary"]], "doctr.utils.metrics.TextMatch": [[7, 2, 1, "", "summary"]], "doctr.utils.visualization": [[7, 1, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:class", "1": "py:function", "2": "py:method"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"], "2": ["py", "method", "Python method"]}, "titleterms": {"changelog": 0, "v0": 0, "7": [], "0": 0, "2024": [], "09": [], "6": [], "2022": [], "29": [], "5": [], "1": 0, "03": 0, "22": [], "2021": 0, "12": [], "31": [], "4": [], "11": 0, "10": [], "01": [], "3": 0, "08": [], "27": [], "07": 0, "02": 0, "2": 0, "05": 0, "28": 0, "18": 0, "contributor": [], "coven": [], "code": [], "conduct": [], "our": [], "pledg": [], "standard": [], "enforc": [], "respons": [], "scope": [], "guidelin": [], "correct": [], "warn": [], "temporari": [], "ban": [], "perman": [], "attribut": [], "contribut": [], "doctr": [1, 2, 4, 5, 6, 7], "codebas": [], "structur": 4, "continu": [], "integr": [], "feedback": [], "featur": 2, "request": [], "bug": [], "report": [], "question": [], "develop": [], "mode": [], "instal": 3, "commit": [], "unit": [], "test": [], "qualiti": [], "style": [], "verif": [], "modifi": [], "document": [2, 4], "let": [], "": [], "connect": [], "prerequisit": 3, "via": 3, "python": 3, "packag": [2, 3], "git": 3, "text": [2, 5], "recognit": [2, 5], "main": 2, "model": [2, 5], "zoo": [2, 5], "detect": [2, 5], "support": [1, 2, 6], "dataset": [1, 2], "arg": [], "synthet": [], "gener": [], "custom": [], "loader": [], "dataload": [], "vocab": 1, "return": [], "io": 4, "word": 4, "line": 4, "artefact": 4, "block": 4, "page": 4, "file": 4, "read": 4, "classif": [], "factori": [], "transform": 6, "compos": 6, "util": 7, "visual": 7, "task": 7, "evalu": 7, "notebook": [], "train": 2, "your": 2, "own": [], "load": 1, "aw": [], "lambda": [], "share": [], "commun": [], "from": [], "huggingfac": [], "hub": [], "push": [], "pretrain": [], "name": [], "convent": [], "choos": [], "readi": [], "us": 5, "avail": 1, "object": [], "data": 1, "prepar": [], "infer": [], "optim": [], "half": [], "precis": [], "export": 5, "onnx": [], "right": [], "architectur": [], "predictor": [2, 5], "end": 5, "ocr": 5, "two": 5, "stage": 5, "approach": 5, "what": [], "should": [], "i": [], "do": [], "output": 5, "advanc": [], "option": [], "get": 2, "start": 2, "conda": [], "pre": 5, "process": 5, "post": [], "build": 2, "implement": [], "content": [], "compress": 5, "savedmodel": 5, "note": 2, "refer": 2}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"Changelog": [[0, "changelog"]], "v0.3.0 (2021-07-02)": [[0, "v0-3-0-2021-07-02"]], "v0.2.1 (2021-05-28)": [[0, "v0-2-1-2021-05-28"]], "v0.2.0 (2021-05-11)": [[0, "v0-2-0-2021-05-11"]], "v0.1.1 (2021-03-18)": [[0, "v0-1-1-2021-03-18"]], "v0.1.0 (2021-03-05)": [[0, "v0-1-0-2021-03-05"]], "doctr.datasets": [[1, "doctr-datasets"]], "Available Datasets": [[1, "available-datasets"]], "Data Loading": [[1, "data-loading"]], "Supported Vocabs": [[1, "supported-vocabs"]], "DocTR Vocabs": [[1, "id1"]], "DocTR: Document Text Recognition": [[2, "doctr-document-text-recognition"]], "Main Features": [[2, "main-features"]], "Getting Started": [[2, "getting-started"]], "Build & train your predictor": [[2, "build-train-your-predictor"]], "Model zoo": [[2, "model-zoo"]], "Text detection models": [[2, "text-detection-models"]], "Text recognition models": [[2, "text-recognition-models"]], "Supported datasets": [[2, "supported-datasets"]], "Notes": [[2, null]], "Package Reference": [[2, null]], "Installation": [[3, "installation"]], "Prerequisites": [[3, "prerequisites"]], "Via Python Package": [[3, "via-python-package"]], "Via Git": [[3, "via-git"]], "doctr.io": [[4, "doctr-io"]], "Document structure": [[4, "document-structure"]], "Word": [[4, "word"]], "Line": [[4, "line"]], "Artefact": [[4, "artefact"]], "Block": [[4, "block"]], "Page": [[4, "page"]], "Document": [[4, "document"]], "File reading": [[4, "file-reading"]], "doctr.models": [[5, "doctr-models"]], "Text Detection": [[5, "text-detection"]], "Pre-processing for detection": [[5, "pre-processing-for-detection"]], "Detection models": [[5, "detection-models"]], "Detection predictors": [[5, "detection-predictors"]], "Text Recognition": [[5, "text-recognition"]], "Text recognition model zoo": [[5, "id4"]], "Pre-processing for recognition": [[5, "pre-processing-for-recognition"]], "Recognition models": [[5, "recognition-models"]], "Recognition predictors": [[5, "recognition-predictors"]], "End-to-End OCR": [[5, "end-to-end-ocr"]], "Two-stage approaches": [[5, "two-stage-approaches"]], "Export model output": [[5, "export-model-output"]], "Model export": [[5, "model-export"]], "Model compression": [[5, "model-compression"]], "Using SavedModel": [[5, "using-savedmodel"]], "doctr.transforms": [[6, "doctr-transforms"]], "Supported transformations": [[6, "supported-transformations"]], "Composing transformations": [[6, "composing-transformations"]], "doctr.utils": [[7, "doctr-utils"]], "Visualization": [[7, "visualization"]], "Task evaluation": [[7, "task-evaluation"]]}, "indexentries": {"cord (class in doctr.datasets)": [[1, "doctr.datasets.CORD"]], "charactergenerator (class in doctr.datasets)": [[1, "doctr.datasets.CharacterGenerator"]], "dataloader (class in doctr.datasets.loader)": [[1, "doctr.datasets.loader.DataLoader"]], "funsd (class in doctr.datasets)": [[1, "doctr.datasets.FUNSD"]], "ocrdataset (class in doctr.datasets)": [[1, "doctr.datasets.OCRDataset"]], "sroie (class in doctr.datasets)": [[1, "doctr.datasets.SROIE"]], "visiondataset (class in doctr.datasets.datasets)": [[1, "doctr.datasets.datasets.VisionDataset"]], "encode_sequences() (in module doctr.datasets)": [[1, "doctr.datasets.encode_sequences"]], "artefact (class in doctr.io)": [[4, "doctr.io.Artefact"]], "block (class in doctr.io)": [[4, "doctr.io.Block"]], "document (class in doctr.io)": [[4, "doctr.io.Document"]], "documentfile (class in doctr.io)": [[4, "doctr.io.DocumentFile"]], "line (class in doctr.io)": [[4, "doctr.io.Line"]], "pdf (class in doctr.io)": [[4, "doctr.io.PDF"]], "page (class in doctr.io)": [[4, "doctr.io.Page"]], "word (class in doctr.io)": [[4, "doctr.io.Word"]], "as_images() (doctr.io.pdf method)": [[4, "doctr.io.PDF.as_images"]], "decode_img_as_tensor() (in module doctr.io)": [[4, "doctr.io.decode_img_as_tensor"]], "from_images() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_images"]], "from_pdf() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_pdf"]], "from_url() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_url"]], "get_artefacts() (doctr.io.pdf method)": [[4, "doctr.io.PDF.get_artefacts"]], "get_words() (doctr.io.pdf method)": [[4, "doctr.io.PDF.get_words"]], "read_html() (in module doctr.io)": [[4, "doctr.io.read_html"]], "read_img_as_numpy() (in module doctr.io)": [[4, "doctr.io.read_img_as_numpy"]], "read_img_as_tensor() (in module doctr.io)": [[4, "doctr.io.read_img_as_tensor"]], "read_pdf() (in module doctr.io)": [[4, "doctr.io.read_pdf"]], "show() (doctr.io.document method)": [[4, "doctr.io.Document.show"]], "show() (doctr.io.page method)": [[4, "doctr.io.Page.show"]], "convert_to_fp16() (in module doctr.models.export)": [[5, "doctr.models.export.convert_to_fp16"]], "convert_to_tflite() (in module doctr.models.export)": [[5, "doctr.models.export.convert_to_tflite"]], "crnn_mobilenet_v3_large() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_mobilenet_v3_large"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_vgg16_bn"]], "db_mobilenet_v3_large() (in module doctr.models.detection)": [[5, "doctr.models.detection.db_mobilenet_v3_large"]], "db_resnet50() (in module doctr.models.detection)": [[5, "doctr.models.detection.db_resnet50"]], "detection_predictor() (in module doctr.models.detection)": [[5, "doctr.models.detection.detection_predictor"]], "linknet16() (in module doctr.models.detection)": [[5, "doctr.models.detection.linknet16"]], "master() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.master"]], "ocr_predictor() (in module doctr.models.zoo)": [[5, "doctr.models.zoo.ocr_predictor"]], "quantize_model() (in module doctr.models.export)": [[5, "doctr.models.export.quantize_model"]], "recognition_predictor() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.recognition_predictor"]], "sar_resnet31() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.sar_resnet31"]], "sar_vgg16_bn() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.sar_vgg16_bn"]], "colorinversion (class in doctr.transforms)": [[6, "doctr.transforms.ColorInversion"]], "compose (class in doctr.transforms)": [[6, "doctr.transforms.Compose"]], "lambdatransformation (class in doctr.transforms)": [[6, "doctr.transforms.LambdaTransformation"]], "normalize (class in doctr.transforms)": [[6, "doctr.transforms.Normalize"]], "oneof (class in doctr.transforms)": [[6, "doctr.transforms.OneOf"]], "randomapply (class in doctr.transforms)": [[6, "doctr.transforms.RandomApply"]], "randombrightness (class in doctr.transforms)": [[6, "doctr.transforms.RandomBrightness"]], "randomcontrast (class in doctr.transforms)": [[6, "doctr.transforms.RandomContrast"]], "randomgamma (class in doctr.transforms)": [[6, "doctr.transforms.RandomGamma"]], "randomhue (class in doctr.transforms)": [[6, "doctr.transforms.RandomHue"]], "randomjpegquality (class in doctr.transforms)": [[6, "doctr.transforms.RandomJpegQuality"]], "randomrotate (class in doctr.transforms)": [[6, "doctr.transforms.RandomRotate"]], "randomsaturation (class in doctr.transforms)": [[6, "doctr.transforms.RandomSaturation"]], "resize (class in doctr.transforms)": [[6, "doctr.transforms.Resize"]], "togray (class in doctr.transforms)": [[6, "doctr.transforms.ToGray"]], "localizationconfusion (class in doctr.utils.metrics)": [[7, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[7, "doctr.utils.metrics.OCRMetric"]], "textmatch (class in doctr.utils.metrics)": [[7, "doctr.utils.metrics.TextMatch"]], "summary() (doctr.utils.metrics.localizationconfusion method)": [[7, "doctr.utils.metrics.LocalizationConfusion.summary"]], "summary() (doctr.utils.metrics.ocrmetric method)": [[7, "doctr.utils.metrics.OCRMetric.summary"]], "summary() (doctr.utils.metrics.textmatch method)": [[7, "doctr.utils.metrics.TextMatch.summary"]], "visualize_page() (in module doctr.utils.visualization)": [[7, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["changelog", "datasets", "index", "installing", "io", "models", "transforms", "utils"], "filenames": ["changelog.rst", "datasets.rst", "index.rst", "installing.rst", "io.rst", "models.rst", "transforms.rst", "utils.rst"], "titles": ["Changelog", "doctr.datasets", "DocTR: Document Text Recognition", "Installation", "doctr.io", "doctr.models", "doctr.transforms", "doctr.utils"], "terms": {"releas": [0, 3], "note": 0, "we": [2, 3, 4, 5, 6], "member": [], "leader": [], "make": [5, 7], "particip": [], "commun": [], "harass": [], "free": [], "experi": 5, "everyon": [], "regardless": [], "ag": [], "bodi": [], "size": [1, 4, 5, 6], "visibl": [], "invis": [], "disabl": [], "ethnic": [], "sex": [], "characterist": [], "gender": [], "ident": [], "express": 6, "level": [5, 7], "educ": [], "socio": [], "econom": [], "statu": [], "nation": [], "person": [], "appear": [], "race": [], "religion": [], "sexual": [], "orient": [4, 5], "act": [], "interact": [4, 7], "wai": [1, 2, 5], "contribut": [], "an": [1, 2, 4, 5, 7], "open": [], "welcom": 2, "divers": [], "inclus": [], "healthi": [], "exampl": [1, 4, 5, 6, 7], "behavior": [], "posit": 7, "environ": [], "includ": [3, 5], "demonstr": [], "empathi": [], "kind": [], "toward": 3, "other": [], "peopl": [], "Being": [], "respect": [], "differ": [], "opinion": [], "viewpoint": [], "give": [], "gracefulli": [], "accept": [], "construct": [], "feedback": [], "apolog": [], "those": [3, 4, 5], "affect": [], "mistak": [], "learn": 5, "from": [1, 2, 4, 5, 6, 7], "focus": [], "what": [], "i": [1, 4, 5, 6, 7], "best": [], "just": 5, "u": 5, "individu": [], "overal": [], "unaccept": [], "The": [1, 4, 5, 7], "us": [1, 3, 7], "languag": [2, 4, 5], "imageri": [], "attent": [], "advanc": [], "ani": [1, 2, 4, 5, 6, 7], "troll": [], "insult": [], "derogatori": [], "comment": [], "polit": [], "attack": [], "public": 2, "privat": 5, "publish": [], "inform": [1, 2, 5], "physic": 4, "email": [], "address": 4, "without": 5, "explicit": [], "permiss": [], "which": 5, "could": [], "reason": [], "consid": [1, 4, 7], "inappropri": [], "profession": [], "set": [1, 5, 7], "ar": [1, 3, 4, 5, 6, 7], "clarifi": [], "take": 1, "appropri": 5, "fair": [], "action": [], "thei": [], "deem": [], "threaten": [], "offens": [], "harm": [], "have": [1, 5, 7], "right": [5, 7], "remov": [], "edit": [], "reject": [], "commit": [], "wiki": [], "issu": [], "align": 4, "thi": [3, 5, 7], "moder": [], "decis": [], "when": [], "appli": [1, 6], "within": [], "all": [1, 2, 4, 5, 6, 7], "space": [], "also": 5, "offici": [], "repres": [4, 5], "e": [3, 4], "mail": [], "post": 5, "via": 2, "social": [], "media": [], "account": [], "appoint": [], "onlin": [], "offlin": [], "event": [], "instanc": 5, "abus": [], "otherwis": 7, "mai": [], "report": [], "contact": [], "minde": 3, "com": [3, 4], "complaint": [], "review": [], "investig": [], "promptli": [], "fairli": [], "oblig": [], "privaci": [], "secur": [], "incid": [], "follow": [1, 3, 5, 6, 7], "impact": [], "determin": [], "consequ": [], "violat": [], "unprofession": [], "unwelcom": [], "A": [1, 2, 4, 5], "written": 4, "provid": [2, 5], "clariti": [], "around": 5, "natur": 2, "explan": 5, "why": [], "wa": [], "apologi": [], "request": [], "through": [1, 6], "singl": [], "seri": [], "continu": [], "No": 5, "involv": 5, "unsolicit": [], "specifi": [1, 4], "period": [], "time": [1, 5, 7], "avoid": 3, "well": [], "extern": [], "channel": [4, 5, 6], "like": [], "term": [], "lead": [], "seriou": [], "sustain": [], "sort": [], "allow": [], "dure": [], "pattern": [], "aggress": [], "disparag": [], "class": [1, 4, 6, 7], "adapt": [], "version": 5, "0": [1, 2, 5, 6, 7], "avail": [2, 5, 6], "http": [3, 4, 5], "www": 4, "org": 5, "_": [1, 5], "html": [], "were": 4, "inspir": 6, "mozilla": [], "": [4, 7], "ladder": [], "For": [3, 5], "answer": [], "common": [6, 7], "question": [], "about": 5, "see": [], "faq": [], "translat": [], "everyth": [], "you": [3, 5], "need": [3, 7], "know": [], "effici": [1, 5], "project": [], "packag": 7, "python": 2, "doc": [4, 5], "librari": 3, "build": 3, "script": [], "refer": [3, 5], "train": [1, 5, 6], "demo": [], "small": 2, "app": [], "showcas": [], "capabl": 5, "api": [], "minim": [], "templat": 4, "deploi": [], "rest": [6, 7], "ensur": [], "proper": [], "mainten": [], "github": 3, "worklow": [], "run": 3, "job": [], "coverag": [], "codecov": [], "back": [], "result": [4, 5], "As": [], "contributor": [], "onli": [6, 7], "your": [1, 4, 5, 7], "ad": 6, "whether": [1, 4, 6, 7], "encount": [], "problem": [], "suggest": [], "input": [4, 5, 6], "ha": [1, 7], "valu": [4, 5, 6], "can": [1, 3, 5], "purpos": [], "advis": [], "first": [], "check": 5, "topic": [], "wasn": [], "t": 1, "alreadi": [], "cover": [], "close": [], "If": [3, 4, 5], "feel": [], "new": [], "one": [1, 5, 6], "do": 3, "so": [1, 3], "whenev": [], "possibl": 7, "enough": 5, "jump": [], "wonder": [], "how": [], "someth": [], "more": 5, "gener": 1, "should": [1, 4, 6, 7], "out": [5, 6, 7], "discuss": [], "q": [], "forum": [], "specif": [1, 3, 5, 7], "stackoverflow": [], "addit": [], "depend": [2, 3], "command": [], "m": [5, 7], "pip": 3, "upgrad": [], "dev": [], "pre": [], "docstr": [], "In": [1, 5], "pleas": [], "googl": [], "eas": [], "process": [2, 4], "later": [], "messag": [], "udac": [], "guid": [], "order": [1, 4, 5], "same": [4, 5, 7], "ci": [], "workflow": [], "unittest": [], "local": [1, 2, 5, 7], "To": [3, 5], "togeth": [4, 5], "current": [], "built": [], "sphinx": [], "thank": [], "our": 5, "file": [1, 2], "been": [5, 7], "rebuilt": [], "want": [], "forc": [], "complet": [], "rebuild": [], "delet": [], "_build": [], "directori": [], "addition": 5, "clear": [], "web": 4, "browser": [], "cach": 1, "modif": [], "now": 2, "locat": 4, "index": 4, "wish": [], "somewher": [], "els": [], "than": [3, 7], "join": [], "slack": [], "where": [4, 7], "find": 3, "requir": [3, 6], "3": [2, 3, 4, 5, 6, 7], "8": [5, 6], "higher": 3, "whichev": 3, "o": 3, "least": 3, "tensorflow": [2, 3, 4, 5, 6], "pytorch": [2, 3], "correspond": [3, 5], "page": [3, 5, 7], "2": [2, 5, 6], "macbook": [], "m1": [], "chip": [], "some": 3, "metal": [], "plugin": [], "1": [1, 2, 5, 6, 7], "12": 5, "anoth": [1, 3, 5], "linux": 3, "few": 3, "extra": 3, "maco": 3, "user": [2, 3, 4], "them": [1, 3, 5], "homebrew": [], "brew": 3, "cairo": 3, "pango": 3, "gdk": 3, "pixbuf": 3, "libffi": 3, "window": [3, 7], "gtk": 3, "latest": 3, "over": [1, 3, 7], "here": [1, 3, 5, 6], "last": [1, 3, 5], "stabl": 3, "doctr": 3, "strive": 3, "reduc": [3, 6], "framework": [1, 3], "minimum": [3, 7], "necessari": 3, "featur": [3, 5, 7], "develop": 3, "third": 3, "parti": 3, "miss": 3, "tf": [3, 4, 5, 6], "torch": 3, "mode": 3, "clone": 3, "state": 2, "art": 2, "optic": [2, 5], "charact": [1, 2, 4, 5, 7], "made": 2, "seamless": 2, "access": [1, 2, 4], "anyon": 2, "power": 2, "easi": [2, 7], "extract": [1, 2], "valuabl": 2, "autom": 2, "seamlessli": [], "understand": [1, 2, 5], "task": [1, 2, 5], "ocr": [1, 2, 7], "predictor": [], "pars": [1, 2], "textual": [1, 2, 4], "identifi": [2, 5], "each": [1, 2, 4, 5, 6, 7], "word": [2, 5, 7], "research": 2, "quickli": 2, "compar": 2, "own": 2, "architectur": [2, 5], "speed": [2, 5], "perform": [2, 4, 5, 6, 7], "robust": 2, "stage": 2, "pretrain": [2, 5, 7], "paramet": [1, 2, 4, 5, 6, 7], "friendli": 2, "line": [2, 5, 7], "code": [2, 3, 4], "load": [2, 5], "googlevis": 2, "aw": [2, 5], "textract": [2, 5], "optim": 2, "infer": [2, 6], "both": [2, 5, 6], "cpu": [2, 5], "gpu": 2, "light": 2, "activ": [], "maintain": 2, "integr": 2, "deploy": [], "dbnet": [2, 5], "real": [5, 6], "scene": [2, 5], "differenti": [2, 5], "binar": [2, 5], "linknet": [2, 5], "exploit": 5, "encod": [1, 4, 5], "represent": 5, "semant": 5, "segment": 5, "sar": [2, 5], "show": [2, 4, 5, 7], "attend": [2, 5], "read": [2, 5], "simpl": 5, "strong": 5, "baselin": 5, "irregular": 5, "crnn": [2, 5], "end": [1, 2, 7], "trainabl": 5, "neural": [2, 5], "network": [2, 5], "imag": [1, 4, 5, 6, 7], "base": 5, "sequenc": [1, 4, 5, 7], "Its": 5, "applic": 5, "master": [2, 5], "multi": 2, "aspect": [2, 6], "non": [2, 4, 6, 7], "vitstr": [], "vision": [], "transform": [1, 2], "fast": 1, "parseq": [], "permut": [], "autoregress": [], "funsd": [1, 2, 5], "form": [1, 2, 5], "noisi": [1, 2], "scan": [1, 2], "cord": [1, 2, 5], "consolid": [1, 2], "receipt": [1, 2, 5], "forpost": [1, 2], "sroie": [1, 2], "icdar": 2, "2019": 2, "iiit": [], "5k": [], "cvit": [], "street": [], "view": [], "synthtext": [], "visual": 2, "geometri": [4, 5], "group": [], "svhn": [], "digit": 1, "unsupervis": [], "ic03": [], "2003": [], "ic13": [], "2013": [], "imgur5k": [], "textstylebrush": [], "transfer": [], "aesthet": [], "mjsynth": [], "synthet": [], "data": [2, 4, 5, 6, 7], "artifici": [], "iiithw": [], "wildreceipt": [], "spatial": 4, "dual": [], "modal": [], "graph": 4, "kei": [], "bool": [1, 4, 5, 6, 7], "true": [1, 4, 5, 6, 7], "use_polygon": [], "fals": [1, 5, 6, 7], "recognition_task": [], "kwarg": [1, 4, 5, 7], "sourc": [1, 4, 5, 6, 7], "document": [1, 5, 7], "import": [1, 4, 5, 6, 7], "train_set": 1, "download": 1, "img": [1, 6], "target": [1, 4, 5, 6], "subset": [1, 5], "polygon": 1, "rotat": [1, 4, 6], "bound": [1, 4, 6, 7], "box": [1, 4, 7], "instead": [1, 4], "straight": 1, "ones": 1, "recognit": 7, "keyword": [1, 4], "argument": [1, 4], "visiondataset": 1, "icdar2019": 1, "competit": 1, "iiit5k": [], "bmvc": [], "2012": [], "text": [4, 7], "prior": [], "svt": [], "ucsd": [], "comput": [5, 7], "hous": [], "number": [1, 6, 7], "localis": [], "repositori": [], "websit": [], "entri": [], "futur": [], "direct": [], "img_fold": 1, "str": [1, 4, 5, 6, 7], "label_fold": [], "label": [1, 7], "part": 6, "challeng": [], "task2": [], "2015": [], "path": [1, 4, 5], "challenge2_training_task12_imag": [], "challenge2_training_task1_gt": [], "test_set": [], "challenge2_test_task12_imag": [], "challenge2_test_task1_gt": [], "folder": [1, 5], "annot": 4, "abstractdataset": [], "label_path": [], "handwrit": [], "dataset_info": [], "imgur5k_annot": [], "json": 5, "pure": [], "mnt": [], "ramdisk": [], "max": 7, "90kdict32px": [], "imlist": [], "txt": [], "hw": [], "images_90k_norm": [], "90k": [], "docartefact": [], "object": [1, 5], "detect": [], "element": [1, 4, 5], "varieti": [], "arxiv": 5, "ab": [], "2103": [], "14470v1": [], "test": [], "charactergener": 1, "implement": [1, 4, 5, 6, 7], "d": 1, "abdef": 1, "num_sampl": 1, "100": [5, 6, 7], "vocabulari": 1, "sampl": 1, "iter": 1, "cache_sampl": 1, "firsthand": 1, "font_famili": [], "font": [], "img_transform": [], "compos": [1, 2, 5], "sample_transform": 1, "wordgener": [], "min_char": [], "int": [1, 4, 5, 6, 7], "max_char": [], "list": [1, 4, 6], "none": [1, 4, 5, 7], "callabl": [1, 6], "tupl": [4, 5, 6, 7], "32": [1, 5, 6], "maximum": [1, 6], "detectiondataset": [], "recognitiondataset": [], "labels_path": [], "contain": [], "ocrdataset": 1, "label_fil": 1, "jpg": [1, 4], "root": 1, "shuffl": 1, "batch_siz": 1, "drop_last": 1, "num_work": [], "collate_fn": 1, "wrapper": [1, 6], "train_load": 1, "train_it": 1, "next": 1, "befor": [1, 6], "pass": [1, 5], "batch": [1, 5, 6], "drop": 1, "isn": 1, "full": [1, 5, 7], "worker": 1, "function": [5, 6, 7], "merg": [], "sinc": 1, "content": [1, 4], "properli": 1, "model": [1, 7], "interpret": [1, 4], "multipl": [1, 4, 6], "name": [1, 5], "10": [1, 5, 7], "0123456789": 1, "hindi_digit": [], "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "ascii_lett": 1, "52": [1, 5], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "punctuat": 1, "currenc": 1, "5": [1, 6, 7], "ancient_greek": [], "48": 5, "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": [], "arabic_lett": [], "37": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": [], "persian_lett": [], "\u067e\u0686\u06a2\u06a4\u06af": [], "arabic_diacrit": [], "arabic_punctu": [], "latin": 1, "94": 5, "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "english": [], "legacy_french": [], "123": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": 1, "french": [1, 5], "126": [], "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": [], "portugues": [], "131": [], "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": [], "spanish": [], "116": [], "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": [], "italian": [], "120": [], "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": [], "german": [], "108": [], "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": [], "arab": [], "101": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": [], "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "czech": [], "130": [], "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": [], "polish": [], "118": [], "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": [], "dutch": [], "114": [], "norwegian": [], "106": [], "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": [], "danish": [], "finnish": [], "104": [], "\u00e4\u00f6\u00e4\u00f6": [], "swedish": [], "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": [], "vietnames": [], "234": [], "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": [], "hebrew": [], "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": [], "multilingu": [], "195": [], "encode_sequ": 1, "target_s": 1, "eo": 1, "pad": [1, 5, 6], "dynamic_seq_length": 1, "ndarrai": [1, 4, 7], "given": [1, 4, 5, 7], "map": 1, "n": [1, 5, 7], "length": 1, "Of": 1, "string": [1, 4, 5, 7], "option": 1, "start": 1, "case": [1, 7], "upper": [1, 6], "enabl": [1, 4], "dynam": 1, "tensor": [1, 4, 5, 6], "modul": [4, 5, 6, 7], "easili": [4, 5, 7], "export": [2, 4, 7], "analysi": [4, 5], "format": [4, 5], "organ": 4, "uninterrupt": 4, "confid": [4, 5], "float": [4, 6, 7], "associ": 4, "predict": [4, 7], "xmin": 4, "ymin": 4, "xmax": 4, "ymax": 4, "coordin": 4, "rel": 4, "collect": 4, "meant": 4, "two": 4, "column": 4, "horizont": 4, "resolv": 4, "default": [4, 5], "smallest": 4, "enclos": 4, "g": 4, "qr": 4, "pictur": 4, "chart": 4, "signatur": 4, "logo": 4, "etc": 4, "artefact_typ": 4, "type": [4, 5], "sever": [4, 6], "its": [1, 4, 5, 7], "titl": 4, "underneath": 4, "page_idx": [4, 5], "dimens": [4, 5, 7], "dict": [4, 5, 7], "numpi": [4, 5, 7], "arrai": [4, 7], "uint8": [4, 5, 7], "raw": [4, 7], "pixel": [4, 6], "height": 4, "width": 4, "dictionari": [4, 7], "angl": [4, 6], "degress": 4, "preserve_aspect_ratio": 6, "overlai": 4, "displai": [4, 7], "matplotlib": 7, "pyplot": 7, "method": 6, "high": 4, "convers": 4, "read_pdf": 4, "byte": [4, 5], "scale": 7, "rgb_mode": [], "password": [], "pdf": [4, 5], "convert": [4, 5, 6], "render": [], "72dpi": [], "output": [4, 6], "rgb": [4, 6], "bgr": 4, "unlock": [], "encrypt": [], "pypdfium2": [], "pdfpage": [], "decod": 4, "shape": [4, 5, 6, 7], "h": 4, "x": [4, 6, 7], "w": [4, 7], "c": [], "read_img_as_numpi": 4, "output_s": [4, 6], "rgb_output": 4, "expect": [4, 5, 6], "read_img_as_tensor": 4, "img_path": 4, "dtype": [4, 5], "float32": [4, 5], "desir": 4, "relat": 4, "divid": 4, "255": [4, 5, 6, 7], "decode_img_as_tensor": 4, "img_cont": 4, "stream": 4, "read_html": 4, "url": [1, 4], "yoursit": 4, "weasyprint": [], "documentfil": 4, "extens": 4, "classmethod": 4, "from_pdf": 4, "binari": 4, "from_url": 4, "from_imag": 4, "page1": 4, "png": 4, "page2": 4, "vgg16_bn_r": [], "vgg": 5, "16": 5, "describ": 5, "veri": [], "deep": 5, "convolut": 2, "larg": 5, "modifi": [], "normal": [5, 6], "rectangular": [], "pool": [], "simpler": [], "head": [], "input_tensor": 5, "random": [5, 6, 7], "uniform": [5, 6], "512": [], "maxval": [5, 6], "imagenet": [], "extractor": 5, "resnet18": [], "resnet": 5, "18": 2, "residu": [], "boolean": [], "resnet34": [], "34": [], "resnet50": [], "50": 5, "resnet31": [], "downsiz": [], "4": [], "mobilenet_v3_smal": [], "mobilenetv3": [], "search": [], "kera": 5, "mobilenet_v3_larg": [], "mobilenet_v3_small_r": [], "mobilenet_v3_large_r": [], "mobilenet_v3_small_orient": [], "magc_resnet31": [], "global": [], "context": [], "224": [5, 6], "vit_": [], "visiontransform": [], "worth": [], "16x16": [], "patch": [], "unoffici": [], "config": [], "vit_b": [], "b": 7, "textnet_tini": [], "textnet": [], "faster": [], "arbitrarili": [], "detector": [], "minimalist": [], "kernel": [], "czczup": [], "tini": [], "textnet_smal": [], "textnet_bas": [], "crop_orientation_predictor": [], "arch": 5, "croporientationpredictor": [], "np": [5, 7], "classif_mobilenet_v3_smal": [], "input_crop": [], "rand": [5, 7], "600": [5, 7], "800": [5, 7], "astyp": [5, 7], "crop": 5, "dataset": 5, "linknet_resnet18": [], "1024": [5, 7], "linknet_resnet34": [], "linknet_resnet50": [], "db_resnet50": 5, "backbon": 5, "db_mobilenet_v3_larg": 5, "mobilenet": 5, "v3": 5, "detection_predictor": 5, "assume_straight_pag": [], "detectionpredictor": 5, "input_pag": [5, 7], "itself": [], "fit": [], "crnn_vgg16_bn": 5, "128": 5, "crnn_mobilenet_v3_smal": [], "crnn_mobilenet_v3_larg": 5, "sar_resnet31": 5, "31": 5, "64": [5, 6], "256": 5, "paper": 5, "1910": 5, "02562": 5, "keywoard": [], "vitstr_smal": [], "vitstr_bas": [], "recognition_predictor": 5, "recognitionpredictor": 5, "ocr_predictor": 5, "det_arch": 5, "reco_arch": 5, "pretrained_backbon": [], "symmetric_pad": 6, "export_as_straight_box": [], "detect_orient": [], "straighten_pag": [], "detect_languag": [], "ocrpredictor": 5, "up": 5, "assum": [], "preserv": 6, "ratio": 6, "symmetr": 6, "bottom": [], "final": [], "potenti": 5, "estim": [], "slightli": [], "deterior": [], "latenc": [], "median": [], "Then": 5, "again": 3, "improv": [], "kie_predictor": [], "kiepredictor": [], "kie": [], "login_to_hub": [], "login": [], "huggingfac": [], "hub": [], "from_hub": [], "repo_id": [], "instanti": 5, "hf": [], "fasterrcnn_mobilenet_v3_large_fpn": [], "repo": [], "hf_hub_download": [], "snapshot_download": [], "checkpoint": [], "push_to_hf_hub": [], "model_nam": [], "save": [1, 5], "configur": [], "my": [], "procedur": 6, "draw": 6, "design": 6, "torchvis": 6, "resiz": [5, 6], "bilinear": [5, 6], "transfo": 6, "minval": 6, "interpol": [5, 6], "zero": [5, 6], "while": 6, "done": 6, "mean": [6, 7], "std": 6, "gaussian": 6, "distribut": 6, "485": 6, "456": 6, "406": 6, "229": 6, "225": 6, "averag": [5, 6], "per": [5, 6], "standard": 6, "deviat": 6, "lambdatransform": 6, "fn": 6, "lambda": 6, "tograi": 6, "num_output_channel": [], "grayscal": 6, "colorinvers": 6, "min_val": 6, "tranform": 6, "color": 6, "shift": 6, "randomli": 6, "invert": 6, "6": [3, 5, 6], "rang": 6, "randombright": 6, "max_delta": 6, "adjust": 6, "bright": 6, "delta": 6, "offset": 6, "add": [6, 7], "pick": 6, "p": 6, "probabl": 6, "randomcontrast": 6, "contrast": 6, "contrast_factor": 6, "factor": 6, "randomsatur": 6, "satur": 6, "hsv": 6, "increas": 6, "randomhu": 6, "hue": 6, "randomgamma": 6, "min_gamma": 6, "max_gamma": 6, "min_gain": 6, "max_gain": 6, "gamma": 6, "correct": 6, "neg": 6, "lower": [6, 7], "param": [5, 6], "constant": 6, "multipli": 6, "randomjpegqu": 6, "min_qual": 6, "60": 6, "max_qual": 6, "jpeg": 6, "qualiti": 6, "dimension": 6, "between": [6, 7], "randomrot": 6, "max_angl": 6, "expand": 6, "degre": 6, "uniformli": 6, "randomcrop": [], "08": [], "75": 5, "33": [], "min_area": [], "max_area": [], "min_ratio": [], "max_ratio": [], "gaussianblur": [], "kernel_shap": [], "blur": [], "min": [], "channelshuffl": [], "gaussiannois": [], "nois": [], "randomhorizontalflip": [], "flip": [], "int64": [], "randomshadow": [], "opacity_rang": [], "shade": [], "opac": [], "It": 6, "consecut": [5, 6], "sequenti": [5, 6], "oneof": 6, "jpegqual": 6, "randomappli": 6, "regroup": 7, "core": 7, "complementari": 7, "sens": 7, "visualize_pag": 7, "words_onli": 7, "display_artefact": 7, "add_label": 7, "figur": 7, "block": [5, 7], "plt": 7, "ocr_db_crnn": 7, "artefact": [5, 7], "figsiz": 7, "largest": 7, "side": 7, "plot": 7, "static": 7, "top": 7, "synthesize_pag": [], "draw_proba": [], "respons": [], "blank": [], "blue": [], "red": [], "font_siz": [], "13": 5, "famili": [], "synthes": [], "metric": [5, 7], "assess": 7, "textmatch": 7, "match": [2, 7], "accuraci": 7, "aggreg": [1, 7], "foral": 7, "y": 7, "mathcal": 7, "frac": 7, "sum": 7, "limits_": 7, "f_": 7, "y_i": 7, "x_i": 7, "indic": 7, "defin": 7, "f_a": 7, "left": 7, "begin": 7, "ll": 7, "mbox": 7, "strictli": 7, "integ": 7, "updat": 7, "hello": 7, "world": 7, "summari": 7, "gt": [], "pred": [], "groung": [], "truth": 7, "exact": 7, "score": 7, "counterpart": 7, "unidecod": 7, "localizationconfus": 7, "iou_thresh": 7, "mask_shap": 7, "use_broadcast": [], "confus": 7, "iou": 7, "recal": [5, 7], "g_": 7, "precis": [5, 7], "meaniou": 7, "j": 7, "y_j": 7, "being": [5, 7], "intersect": 7, "union": 7, "g_x": 7, "assign": 7, "_i": 7, "geq": 7, "ground": 7, "asarrai": 7, "70": [5, 7], "110": 7, "95": 7, "200": 7, "150": 7, "pair": 7, "broadcast": [], "consum": [], "memori": [], "either": 5, "ocrmetr": 7, "l": 7, "hat": 7, "h_": 7, "b_j": 7, "l_j": 7, "gt_box": [], "pred_box": [], "gt_label": [], "pred_label": [], "comparison": 7, "detectionmetr": [], "c_j": [], "compil": [], "better": 5, "leverag": [], "descript": [], "colab": [], "quicktour": [], "present": [], "main": [], "produc": 5, "searchabl": [], "don": [], "meet": [], "detail": [], "link": [], "section": 5, "det_model": [], "load_weight": [], "path_to_checkpoint": [], "weight": [], "reco_model": [], "det_param": [], "path_to_pt": [], "map_loc": [], "load_state_dict": [], "reco_param": [], "vocab": [2, 5], "class_nam": [], "total": [], "date": 5, "preprocessor": 5, "det_predictor": [], "798": [], "785": [], "772": [], "264": [], "2749": [], "287": [], "reco_predictor": [], "694": [], "695": [], "693": [], "299": [], "296": [], "301": [], "polici": [], "restrict": [], "write": [], "outsid": [], "tmp": [], "work": [], "step": [], "usag": 5, "multiprocess": [], "doctr_multiprocessing_dis": [], "variabl": [], "becaus": [], "shm": [], "share": [], "chang": [], "By": [], "doctr_cache_dir": [], "focu": [], "love": [], "appreci": [], "interfac": [], "io": 2, "custom": [], "felix92": [], "db": [], "vgg16": 5, "bn": [], "plug": [], "obj_detect": [], "exist": [], "overwritten": [], "prerequisit": 2, "creat": [], "co": [], "instal": 2, "git": 2, "lf": [], "my_awesome_model": [], "v1": [], "directli": 5, "after": [], "python3": [], "train_tensorflow": [], "py": [], "train_pytorch": [], "tabl": [], "pull": [], "dummi": [], "tilman": [], "rassi": [], "fascan": [], "evalu": [1, 2, 5], "predefin": 1, "prefer": 1, "signific": 1, "valid": [], "149": [], "626": [], "360": [], "2000": [], "3000": [], "249": [], "33402": [], "13068": [], "772875": [], "85875": [], "246": [], "233": [], "resourc": [], "7149": [], "796": [], "handwritten": [], "1268": [], "472": [], "21888": [], "8707": [], "33608": [], "19342": [], "uppercas": [], "19370": [], "2186": [], "257": [], "647": [], "73257": [], "26032": [], "7100000": [], "707470": [], "1156": [], "1107": [], "849": [], "1095": [], "207901": [], "22672": [], "7581382": [], "1337891": [], "7141797": [], "793533": [], "49377": [], "19598": [], "alwai": [], "regular": [], "2700": [], "300": [], "background": [], "qr_code": [], "bar_cod": [], "photo": [], "classif": [], "mani": [], "sensit": [], "abl": [], "howev": [], "guidanc": [], "tool": [], "further": [], "anot": [], "handl": 1, "underli": 1, "defer": 1, "dataload": 1, "good": [], "achiev": [], "might": 5, "tune": 2, "thing": [], "product": [], "readi": [], "help": [], "support": 5, "devic": [], "fp16": [1, 5], "point": [], "occupi": [], "bit": [], "advantag": [], "less": [], "mixed_precis": [], "set_global_polici": [], "mixed_float16": [], "cuda": [], "re": [], "exchang": [], "interoper": [], "machin": [], "structur": [2, 5], "layer": [], "metadata": [], "util": [2, 5], "export_model_to_onnx": [], "input_shap": 5, "dummy_input": [], "tensorspec": [], "model_path": [], "come": [], "soon": [], "seen": 5, "onc": 5, "separ": 5, "compon": 5, "charg": 5, "usabl": 5, "backend": 5, "along": 5, "processor": 5, "reusabl": 5, "consist": [], "delimit": [], "2d": [], "corner": [], "flag": [], "belong": [], "skew": [], "comprehens": [], "benchmark": [], "publicli": [], "sec": [], "25": [5, 6], "84": 5, "39": 5, "85": 5, "86": 5, "93": 5, "83": 5, "24": [], "80": 5, "29": 5, "90": 5, "67": 5, "76": 5, "11": [2, 5], "81": 5, "71": 5, "7": 5, "21": 5, "82": 5, "20": 5, "49": 5, "87": 5, "63": 5, "17": 5, "28": [2, 5], "51": 5, "46": 5, "db_resnet34": [], "22": [], "89": 5, "74": [], "56": 5, "68": 5, "92": 5, "61": 5, "41": 5, "00": 5, "79": 5, "38": [], "88": [], "62": 5, "26": 5, "06": [], "78": 5, "47": 5, "54": [], "abov": 5, "cf": 5, "disclaim": 5, "combin": 5, "199": 5, "second": 5, "warmup": [], "phase": [], "measur": 5, "1000": 5, "obtain": [], "11th": [], "gen": [], "intel": [], "r": [], "tm": [], "i7": [], "11800h": [], "30ghz": [], "wrap": [], "useabl": [], "favorit": [], "dummy_img": [], "area": [], "send": [], "snippet": [], "transcrib": [], "partial": [], "15": 5, "9": [], "73": 5, "44": [], "14": 5, "55": [], "58": [], "57": 5, "66": 5, "01": [], "98": 5, "23": 5, "69": 5, "99": 5, "91": 5, "05": 2, "09": [], "96": [1, 5], "40": [], "53": 5, "most": 5, "print": [], "cfg": [], "30595": 5, "45": [], "72": 5, "43": 5, "65": 5, "77": 5, "30": 5, "07": [2, 5], "27": 5, "gvision": 5, "59": 5, "03": [2, 5], "azur": [], "recogn": [], "42": 5, "go": [], "mention": [], "still": [], "return": [1, 4, 5, 7], "documentbuild": [], "resolve_lin": [], "automat": [], "resolve_block": [], "paragraph_break": [], "paragraph": [], "035": [], "nest": 5, "get": [4, 5], "typic": 5, "layout": 5, "340": 5, "text_output": [], "json_output": 5, "1357421875": 5, "0361328125": 5, "8564453125": 5, "8603515625": 5, "914085328578949": 5, "5478515625": 5, "06640625": 5, "5810546875": 5, "0966796875": 5, "9949972033500671": 5, "51171875": 5, "1630859375": 5, "9578408598899841": 5, "1396484375": 5, "3232421875": 5, "185546875": 5, "3515625": 5, "outpout": [], "xml": [], "hocr": [], "export_as_xml": [], "xml_output": [], "xml_bytes_str": [], "xml_element": [], "utf": [], "xmln": [], "w3": [], "1999": [], "xhtml": [], "lang": [], "en": [], "meta": [], "equiv": [], "charset": [], "system": [], "ocr_pag": [], "ocr_carea": [], "ocr_par": [], "ocr_lin": [], "ocrx_word": [], "div": [], "id": 5, "page_1": [], "bbox": [], "3456": [], "ppageno": [], "block_1_1": [], "857": [], "529": [], "2504": [], "2710": [], "par_1_1": [], "span": [], "line_1_1": [], "x_size": [], "x_descend": [], "x_ascend": [], "word_1_1": [], "1552": [], "540": [], "1778": [], "580": [], "x_wconf": [], "word_1_2": [], "1782": [], "1900": [], "583": [], "word_1_3": [], "1420": [], "597": [], "1684": [], "641": [], "threshold": [], "region": [], "accur": [], "postprocessor": [], "bin_thresh": [], "box_thresh": [], "hook": [], "manipul": [], "customhook": [], "def": [], "__call__": [], "self": [], "loc_pr": [], "Be": [], "awar": [], "my_hook": [], "middl": [], "pipelin": [], "add_hook": [], "execut": [], "file_path": [], "read_img": 4, "seemlessli": 2, "conda": [], "newer": [], "developp": 3, "fp": 5, "scheme": 5, "deform": 5, "statist": 5, "turn": [], "easier": 5, "let": 5, "db_resnet50_predictor": [], "sar_vgg16_bn": 5, "rnn": [], "enhanc": [], "symbol": [], "crnn_vgg16_bn_predictor": [], "sar_vgg16_bn_predictor": [], "16bn": [], "convert_to_tflit": 5, "tf_model": 5, "tflite": 5, "conv_sequ": 5, "relu": 5, "kernel_s": 5, "serialized_model": 5, "convert_to_fp16": 5, "half": 5, "serial": 5, "quantize_model": 5, "quantiz": 5, "exclud": 5, "inherit": [1, 5], "abstract": 1, "verifi": 1, "file_nam": 1, "file_hash": 1, "extract_arch": 1, "overwrit": 1, "sha256": [], "archiv": [], "disk": [], "775": [], "856": [], "860": [], "862": [], "863": [], "sar_resnet31_predictor": [], "ocr_db_crnn_vgg": [], "652": [], "721": [], "ocr_db_sar_vgg": [], "653": [], "ocr_db_sar_resnet": [], "665": [], "735": [], "595": [], "625": [], "781": [], "830": [], "exactmatch": [], "ignore_cas": [], "ignore_acc": [], "ignor": [], "letter": [], "accent": [], "error": [], "max_dist": [], "levenshtein": [], "distanc": [], "autoclass": [], "loader": 1, "154": 1, "as_imag": 4, "convert_page_to_numpi": 4, "get_word": 4, "fitz": 4, "gettextword": 4, "get_artefact": 4, "entir": 4, "fulli": [], "daili": 2, "mix": 2, "fine": 2, "scratch": 2, "special": 2, "recurr": 2, "733": [], "817": [], "745": [], "875": [], "frame": 5, "feed": 5, "warm": 5, "c5": 5, "x12larg": 5, "xeon": 5, "platinum": 5, "8275l": 5, "913": [], "917": [], "921": [], "crnn_resnet31": 5, "629": [], "701": [], "664": [], "780": [], "630": [], "702": [], "666": [], "783": [], "640": [], "713": [], "672": [], "789": [], "na": [], "753": [], "700": [], "533": [], "689": [], "611": [], "660": [], "db_sar_vgg": 5, "db_sar_resnet": 5, "db_crnn_vgg": 5, "db_crnn_resnet": 5, "properti": 5, "input_t": 5, "saved_model": 5, "And": 5, "nestedobject": [], "changelog": 2, "v0": 2, "2021": 2, "8m": 5, "02": [2, 5], "5m": 5, "1m": 5, "19": [], "invoic": 5, "flexibl": 7, "rotated_bbox": [1, 7], "beta": 2, "linknet16": 5, "160": 5, "arg": 1, "bash": 3, "tax": 5, "35": 5}, "objects": {"doctr.datasets": [[1, 0, 1, "", "CORD"], [1, 0, 1, "", "CharacterGenerator"], [1, 0, 1, "", "FUNSD"], [1, 0, 1, "", "OCRDataset"], [1, 0, 1, "", "SROIE"], [1, 1, 1, "", "encode_sequences"]], "doctr.datasets.datasets": [[1, 0, 1, "", "VisionDataset"]], "doctr.datasets.loader": [[1, 0, 1, "", "DataLoader"]], "doctr.io": [[4, 0, 1, "", "Artefact"], [4, 0, 1, "", "Block"], [4, 0, 1, "", "Document"], [4, 0, 1, "", "DocumentFile"], [4, 0, 1, "", "Line"], [4, 0, 1, "", "PDF"], [4, 0, 1, "", "Page"], [4, 0, 1, "", "Word"], [4, 1, 1, "", "decode_img_as_tensor"], [4, 1, 1, "", "read_html"], [4, 1, 1, "", "read_img_as_numpy"], [4, 1, 1, "", "read_img_as_tensor"], [4, 1, 1, "", "read_pdf"]], "doctr.io.Document": [[4, 2, 1, "", "show"]], "doctr.io.DocumentFile": [[4, 2, 1, "", "from_images"], [4, 2, 1, "", "from_pdf"], [4, 2, 1, "", "from_url"]], "doctr.io.PDF": [[4, 2, 1, "", "as_images"], [4, 2, 1, "", "get_artefacts"], [4, 2, 1, "", "get_words"]], "doctr.io.Page": [[4, 2, 1, "", "show"]], "doctr.models.detection": [[5, 1, 1, "", "db_mobilenet_v3_large"], [5, 1, 1, "", "db_resnet50"], [5, 1, 1, "", "detection_predictor"], [5, 1, 1, "", "linknet16"]], "doctr.models.export": [[5, 1, 1, "", "convert_to_fp16"], [5, 1, 1, "", "convert_to_tflite"], [5, 1, 1, "", "quantize_model"]], "doctr.models.recognition": [[5, 1, 1, "", "crnn_mobilenet_v3_large"], [5, 1, 1, "", "crnn_vgg16_bn"], [5, 1, 1, "", "master"], [5, 1, 1, "", "recognition_predictor"], [5, 1, 1, "", "sar_resnet31"], [5, 1, 1, "", "sar_vgg16_bn"]], "doctr.models.zoo": [[5, 1, 1, "", "ocr_predictor"]], "doctr.transforms": [[6, 0, 1, "", "ColorInversion"], [6, 0, 1, "", "Compose"], [6, 0, 1, "", "LambdaTransformation"], [6, 0, 1, "", "Normalize"], [6, 0, 1, "", "OneOf"], [6, 0, 1, "", "RandomApply"], [6, 0, 1, "", "RandomBrightness"], [6, 0, 1, "", "RandomContrast"], [6, 0, 1, "", "RandomGamma"], [6, 0, 1, "", "RandomHue"], [6, 0, 1, "", "RandomJpegQuality"], [6, 0, 1, "", "RandomRotate"], [6, 0, 1, "", "RandomSaturation"], [6, 0, 1, "", "Resize"], [6, 0, 1, "", "ToGray"]], "doctr.utils.metrics": [[7, 0, 1, "", "LocalizationConfusion"], [7, 0, 1, "", "OCRMetric"], [7, 0, 1, "", "TextMatch"]], "doctr.utils.metrics.LocalizationConfusion": [[7, 2, 1, "", "summary"]], "doctr.utils.metrics.OCRMetric": [[7, 2, 1, "", "summary"]], "doctr.utils.metrics.TextMatch": [[7, 2, 1, "", "summary"]], "doctr.utils.visualization": [[7, 1, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:class", "1": "py:function", "2": "py:method"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"], "2": ["py", "method", "Python method"]}, "titleterms": {"changelog": 0, "v0": 0, "7": [], "0": 0, "2024": [], "09": [], "6": [], "2022": [], "29": [], "5": [], "1": 0, "03": 0, "22": [], "2021": 0, "12": [], "31": [], "4": [], "11": 0, "10": [], "01": [], "3": 0, "08": [], "27": [], "07": 0, "02": 0, "2": 0, "05": 0, "28": 0, "18": 0, "contributor": [], "coven": [], "code": [], "conduct": [], "our": [], "pledg": [], "standard": [], "enforc": [], "respons": [], "scope": [], "guidelin": [], "correct": [], "warn": [], "temporari": [], "ban": [], "perman": [], "attribut": [], "contribut": [], "doctr": [1, 2, 4, 5, 6, 7], "codebas": [], "structur": 4, "continu": [], "integr": [], "feedback": [], "featur": 2, "request": [], "bug": [], "report": [], "question": [], "develop": [], "mode": [], "instal": 3, "commit": [], "unit": [], "test": [], "qualiti": [], "style": [], "verif": [], "modifi": [], "document": [2, 4], "let": [], "": [], "connect": [], "prerequisit": 3, "via": 3, "python": 3, "packag": [2, 3], "git": 3, "text": [2, 5], "recognit": [2, 5], "main": 2, "model": [2, 5], "zoo": [2, 5], "detect": [2, 5], "support": [1, 2, 6], "dataset": [1, 2], "arg": [], "synthet": [], "gener": [], "custom": [], "loader": [], "dataload": [], "vocab": 1, "return": [], "io": 4, "word": 4, "line": 4, "artefact": 4, "block": 4, "page": 4, "file": 4, "read": 4, "classif": [], "factori": [], "transform": 6, "compos": 6, "util": 7, "visual": 7, "task": 7, "evalu": 7, "notebook": [], "train": 2, "your": 2, "own": [], "load": 1, "aw": [], "lambda": [], "share": [], "commun": [], "from": [], "huggingfac": [], "hub": [], "push": [], "pretrain": [], "name": [], "convent": [], "choos": [], "readi": [], "us": 5, "avail": 1, "object": [], "data": 1, "prepar": [], "infer": [], "optim": [], "half": [], "precis": [], "export": 5, "onnx": [], "right": [], "architectur": [], "predictor": [2, 5], "end": 5, "ocr": 5, "two": 5, "stage": 5, "approach": 5, "what": [], "should": [], "i": [], "do": [], "output": 5, "advanc": [], "option": [], "get": 2, "start": 2, "conda": [], "pre": 5, "process": 5, "post": [], "build": 2, "implement": [], "content": [], "compress": 5, "savedmodel": 5, "note": 2, "refer": 2}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"Changelog": [[0, "changelog"]], "v0.3.0 (2021-07-02)": [[0, "v0-3-0-2021-07-02"]], "v0.2.1 (2021-05-28)": [[0, "v0-2-1-2021-05-28"]], "v0.2.0 (2021-05-11)": [[0, "v0-2-0-2021-05-11"]], "v0.1.1 (2021-03-18)": [[0, "v0-1-1-2021-03-18"]], "v0.1.0 (2021-03-05)": [[0, "v0-1-0-2021-03-05"]], "doctr.datasets": [[1, "doctr-datasets"]], "Available Datasets": [[1, "available-datasets"]], "Data Loading": [[1, "data-loading"]], "Supported Vocabs": [[1, "supported-vocabs"]], "DocTR Vocabs": [[1, "id1"]], "DocTR: Document Text Recognition": [[2, "doctr-document-text-recognition"]], "Main Features": [[2, "main-features"]], "Getting Started": [[2, "getting-started"]], "Build & train your predictor": [[2, "build-train-your-predictor"]], "Model zoo": [[2, "model-zoo"]], "Text detection models": [[2, "text-detection-models"]], "Text recognition models": [[2, "text-recognition-models"]], "Supported datasets": [[2, "supported-datasets"]], "Notes": [[2, null]], "Package Reference": [[2, null]], "Installation": [[3, "installation"]], "Prerequisites": [[3, "prerequisites"]], "Via Python Package": [[3, "via-python-package"]], "Via Git": [[3, "via-git"]], "doctr.io": [[4, "doctr-io"]], "Document structure": [[4, "document-structure"]], "Word": [[4, "word"]], "Line": [[4, "line"]], "Artefact": [[4, "artefact"]], "Block": [[4, "block"]], "Page": [[4, "page"]], "Document": [[4, "document"]], "File reading": [[4, "file-reading"]], "doctr.models": [[5, "doctr-models"]], "Text Detection": [[5, "text-detection"]], "Pre-processing for detection": [[5, "pre-processing-for-detection"]], "Detection models": [[5, "detection-models"]], "Detection predictors": [[5, "detection-predictors"]], "Text Recognition": [[5, "text-recognition"]], "Text recognition model zoo": [[5, "id4"]], "Pre-processing for recognition": [[5, "pre-processing-for-recognition"]], "Recognition models": [[5, "recognition-models"]], "Recognition predictors": [[5, "recognition-predictors"]], "End-to-End OCR": [[5, "end-to-end-ocr"]], "Two-stage approaches": [[5, "two-stage-approaches"]], "Export model output": [[5, "export-model-output"]], "Model export": [[5, "model-export"]], "Model compression": [[5, "model-compression"]], "Using SavedModel": [[5, "using-savedmodel"]], "doctr.transforms": [[6, "doctr-transforms"]], "Supported transformations": [[6, "supported-transformations"]], "Composing transformations": [[6, "composing-transformations"]], "doctr.utils": [[7, "doctr-utils"]], "Visualization": [[7, "visualization"]], "Task evaluation": [[7, "task-evaluation"]]}, "indexentries": {"cord (class in doctr.datasets)": [[1, "doctr.datasets.CORD"]], "charactergenerator (class in doctr.datasets)": [[1, "doctr.datasets.CharacterGenerator"]], "dataloader (class in doctr.datasets.loader)": [[1, "doctr.datasets.loader.DataLoader"]], "funsd (class in doctr.datasets)": [[1, "doctr.datasets.FUNSD"]], "ocrdataset (class in doctr.datasets)": [[1, "doctr.datasets.OCRDataset"]], "sroie (class in doctr.datasets)": [[1, "doctr.datasets.SROIE"]], "visiondataset (class in doctr.datasets.datasets)": [[1, "doctr.datasets.datasets.VisionDataset"]], "encode_sequences() (in module doctr.datasets)": [[1, "doctr.datasets.encode_sequences"]], "artefact (class in doctr.io)": [[4, "doctr.io.Artefact"]], "block (class in doctr.io)": [[4, "doctr.io.Block"]], "document (class in doctr.io)": [[4, "doctr.io.Document"]], "documentfile (class in doctr.io)": [[4, "doctr.io.DocumentFile"]], "line (class in doctr.io)": [[4, "doctr.io.Line"]], "pdf (class in doctr.io)": [[4, "doctr.io.PDF"]], "page (class in doctr.io)": [[4, "doctr.io.Page"]], "word (class in doctr.io)": [[4, "doctr.io.Word"]], "as_images() (doctr.io.pdf method)": [[4, "doctr.io.PDF.as_images"]], "decode_img_as_tensor() (in module doctr.io)": [[4, "doctr.io.decode_img_as_tensor"]], "from_images() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_images"]], "from_pdf() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_pdf"]], "from_url() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_url"]], "get_artefacts() (doctr.io.pdf method)": [[4, "doctr.io.PDF.get_artefacts"]], "get_words() (doctr.io.pdf method)": [[4, "doctr.io.PDF.get_words"]], "read_html() (in module doctr.io)": [[4, "doctr.io.read_html"]], "read_img_as_numpy() (in module doctr.io)": [[4, "doctr.io.read_img_as_numpy"]], "read_img_as_tensor() (in module doctr.io)": [[4, "doctr.io.read_img_as_tensor"]], "read_pdf() (in module doctr.io)": [[4, "doctr.io.read_pdf"]], "show() (doctr.io.document method)": [[4, "doctr.io.Document.show"]], "show() (doctr.io.page method)": [[4, "doctr.io.Page.show"]], "convert_to_fp16() (in module doctr.models.export)": [[5, "doctr.models.export.convert_to_fp16"]], "convert_to_tflite() (in module doctr.models.export)": [[5, "doctr.models.export.convert_to_tflite"]], "crnn_mobilenet_v3_large() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_mobilenet_v3_large"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_vgg16_bn"]], "db_mobilenet_v3_large() (in module doctr.models.detection)": [[5, "doctr.models.detection.db_mobilenet_v3_large"]], "db_resnet50() (in module doctr.models.detection)": [[5, "doctr.models.detection.db_resnet50"]], "detection_predictor() (in module doctr.models.detection)": [[5, "doctr.models.detection.detection_predictor"]], "linknet16() (in module doctr.models.detection)": [[5, "doctr.models.detection.linknet16"]], "master() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.master"]], "ocr_predictor() (in module doctr.models.zoo)": [[5, "doctr.models.zoo.ocr_predictor"]], "quantize_model() (in module doctr.models.export)": [[5, "doctr.models.export.quantize_model"]], "recognition_predictor() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.recognition_predictor"]], "sar_resnet31() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.sar_resnet31"]], "sar_vgg16_bn() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.sar_vgg16_bn"]], "colorinversion (class in doctr.transforms)": [[6, "doctr.transforms.ColorInversion"]], "compose (class in doctr.transforms)": [[6, "doctr.transforms.Compose"]], "lambdatransformation (class in doctr.transforms)": [[6, "doctr.transforms.LambdaTransformation"]], "normalize (class in doctr.transforms)": [[6, "doctr.transforms.Normalize"]], "oneof (class in doctr.transforms)": [[6, "doctr.transforms.OneOf"]], "randomapply (class in doctr.transforms)": [[6, "doctr.transforms.RandomApply"]], "randombrightness (class in doctr.transforms)": [[6, "doctr.transforms.RandomBrightness"]], "randomcontrast (class in doctr.transforms)": [[6, "doctr.transforms.RandomContrast"]], "randomgamma (class in doctr.transforms)": [[6, "doctr.transforms.RandomGamma"]], "randomhue (class in doctr.transforms)": [[6, "doctr.transforms.RandomHue"]], "randomjpegquality (class in doctr.transforms)": [[6, "doctr.transforms.RandomJpegQuality"]], "randomrotate (class in doctr.transforms)": [[6, "doctr.transforms.RandomRotate"]], "randomsaturation (class in doctr.transforms)": [[6, "doctr.transforms.RandomSaturation"]], "resize (class in doctr.transforms)": [[6, "doctr.transforms.Resize"]], "togray (class in doctr.transforms)": [[6, "doctr.transforms.ToGray"]], "localizationconfusion (class in doctr.utils.metrics)": [[7, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[7, "doctr.utils.metrics.OCRMetric"]], "textmatch (class in doctr.utils.metrics)": [[7, "doctr.utils.metrics.TextMatch"]], "summary() (doctr.utils.metrics.localizationconfusion method)": [[7, "doctr.utils.metrics.LocalizationConfusion.summary"]], "summary() (doctr.utils.metrics.ocrmetric method)": [[7, "doctr.utils.metrics.OCRMetric.summary"]], "summary() (doctr.utils.metrics.textmatch method)": [[7, "doctr.utils.metrics.TextMatch.summary"]], "visualize_page() (in module doctr.utils.visualization)": [[7, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file diff --git a/v0.3.1/using_doctr/using_models.html b/v0.3.1/using_doctr/using_models.html index b2e6a5d739..b471cdc1f1 100644 --- a/v0.3.1/using_doctr/using_models.html +++ b/v0.3.1/using_doctr/using_models.html @@ -836,6 +836,17 @@

Two-stage approachesmodel = ocr_predictor('linknet_resnet18', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True)

+

To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying DocumentBuilder:

+
    +
  • resolve_lines: whether words should be automatically grouped into lines (default: True)

  • +
  • resolve_blocks: whether lines should be automatically grouped into blocks (default: True)

  • +
  • paragraph_break: relative length of the minimum space separating paragraphs (default: 0.035)

  • +
+

For example to disable the automatic grouping of lines into blocks:

+
from doctr.model import ocr_predictor
+model = ocr_predictor(pretrained=True, resolve_blocks=False)
+
+

What should I do with the output?#

@@ -859,6 +870,14 @@

What should I do with the output?) +

To get only the text content of the Document, you can use the render method:

+
text_output = result.render()
+
+
+

For reference, here is the output for the Document above:

+
No. RECEIPT DATE
+
+

You can also export them as a nested dict, more appropriate for JSON format:

json_output = result.export()
 
diff --git a/v0.4.0/_sources/using_doctr/using_models.rst.txt b/v0.4.0/_sources/using_doctr/using_models.rst.txt index 208e0956bb..27c087096a 100644 --- a/v0.4.0/_sources/using_doctr/using_models.rst.txt +++ b/v0.4.0/_sources/using_doctr/using_models.rst.txt @@ -279,6 +279,19 @@ For instance, this snippet instantiates an end-to-end ocr_predictor working with from doctr.model import ocr_predictor model = ocr_predictor('linknet_resnet18', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True) +To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying `DocumentBuilder`: + +* `resolve_lines`: whether words should be automatically grouped into lines (default: True) +* `resolve_blocks`: whether lines should be automatically grouped into blocks (default: True) +* `paragraph_break`: relative length of the minimum space separating paragraphs (default: 0.035) + +For example to disable the automatic grouping of lines into blocks: + +.. code:: python3 + + from doctr.model import ocr_predictor + model = ocr_predictor(pretrained=True, resolve_blocks=False) + What should I do with the output? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -304,6 +317,14 @@ Here is a typical `Document` layout:: )] ) +To get only the text content of the `Document`, you can use the `render` method:: + + text_output = result.render() + +For reference, here is the output for the `Document` above:: + + No. RECEIPT DATE + You can also export them as a nested dict, more appropriate for JSON format:: json_output = result.export() diff --git a/v0.4.0/searchindex.js b/v0.4.0/searchindex.js index 31d7378710..bcef1e9d32 100644 --- a/v0.4.0/searchindex.js +++ b/v0.4.0/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["changelog", "datasets", "index", "installing", "io", "models", "transforms", "using_model_export", "using_models", "utils"], "filenames": ["changelog.rst", "datasets.rst", "index.rst", "installing.rst", "io.rst", "models.rst", "transforms.rst", "using_model_export.rst", "using_models.rst", "utils.rst"], "titles": ["Changelog", "doctr.datasets", "DocTR: Document Text Recognition", "Installation", "doctr.io", "doctr.models", "doctr.transforms", "Preparing your model for inference", "Choosing the right model", "doctr.utils"], "terms": {"releas": [0, 3], "note": 0, "we": [2, 3, 4, 6, 8], "member": [], "leader": [], "make": [7, 8, 9], "particip": [], "commun": [], "harass": [], "free": [], "experi": [], "everyon": [], "regardless": [], "ag": [], "bodi": [], "size": [1, 4, 6, 8, 9], "visibl": [], "invis": [], "disabl": [], "ethnic": [], "sex": [], "characterist": [], "gender": [], "ident": [], "express": 6, "level": [8, 9], "educ": [], "socio": [], "econom": [], "statu": [], "nation": [], "person": [], "appear": [], "race": [], "religion": [], "sexual": [], "orient": [4, 8], "act": [], "interact": [4, 9], "wai": [1, 2], "contribut": [], "an": [1, 2, 4, 5, 7, 9], "open": [], "welcom": [], "divers": [], "inclus": [], "healthi": [], "exampl": [1, 4, 5, 6, 9], "behavior": [], "posit": 9, "environ": [], "includ": 3, "demonstr": [], "empathi": [], "kind": [], "toward": 3, "other": [], "peopl": [], "Being": [], "respect": [], "differ": [], "opinion": [], "viewpoint": [], "give": [], "gracefulli": [], "accept": [], "construct": [], "feedback": [], "apolog": [], "those": [3, 4, 8], "affect": [], "mistak": [], "learn": 8, "from": [1, 2, 4, 5, 6, 7, 8, 9], "focus": [], "what": [], "i": [1, 4, 5, 6, 7, 9], "best": [], "just": 7, "u": 8, "individu": [], "overal": [], "unaccept": [], "The": [4, 8, 9], "us": [1, 3, 5, 8, 9], "languag": [2, 4, 8], "imageri": [], "attent": [], "advanc": [], "ani": [1, 4, 5, 6, 7, 8, 9], "troll": [], "insult": [], "derogatori": [], "comment": [], "polit": [], "attack": [], "public": 2, "privat": 8, "publish": [], "inform": [1, 2, 8], "physic": 4, "email": [], "address": 4, "without": [], "explicit": [], "permiss": [], "which": 8, "could": [], "reason": [], "consid": [1, 4, 8, 9], "inappropri": [], "profession": [], "set": [1, 8, 9], "ar": [1, 3, 4, 6, 8, 9], "clarifi": [], "take": [1, 7, 8], "appropri": 8, "fair": [], "action": [], "thei": 8, "deem": [], "threaten": [], "offens": [], "harm": [], "have": [1, 7, 8, 9], "right": 9, "remov": [], "edit": [], "reject": [], "commit": [], "wiki": [], "issu": [], "align": 4, "thi": [3, 7, 8, 9], "moder": [], "decis": [], "when": [], "appli": [1, 6], "within": [], "all": [1, 4, 6, 8, 9], "space": [], "also": 8, "offici": [], "repres": [4, 8, 9], "e": [3, 4, 5], "mail": [], "post": 8, "via": [], "social": [], "media": [], "account": 7, "appoint": [], "onlin": [], "offlin": [], "event": [], "instanc": 8, "abus": [], "otherwis": 9, "mai": 8, "report": [], "contact": [], "minde": [2, 3], "com": [3, 4], "complaint": [], "review": [], "investig": [], "promptli": [], "fairli": [], "oblig": [], "privaci": [], "secur": [], "incid": [], "follow": [1, 3, 6, 7, 8, 9], "impact": [], "determin": [], "consequ": [], "violat": [], "unprofession": [], "unwelcom": [], "A": [1, 2, 4, 5, 7], "written": 4, "provid": [2, 7, 8], "clariti": [], "around": [], "natur": 2, "explan": 8, "why": [], "wa": [], "apologi": [], "request": [], "through": [1, 6], "singl": [], "seri": [], "continu": [], "No": 8, "involv": 8, "unsolicit": [], "specifi": [1, 4], "period": [], "time": [1, 2, 5, 9], "avoid": 3, "well": 7, "extern": [], "channel": [4, 6], "like": [], "term": [], "lead": [], "seriou": [], "sustain": [], "sort": [], "allow": [], "dure": [], "pattern": [], "aggress": [], "disparag": [], "class": [1, 4, 6, 9], "adapt": [], "version": 7, "0": [1, 6, 8, 9], "avail": [2, 6], "http": [3, 4, 5], "www": 4, "org": 5, "_": [1, 5, 7], "html": [], "were": [4, 8], "inspir": 6, "mozilla": [], "": [4, 9], "ladder": [], "For": [3, 8], "answer": [], "common": [6, 9], "question": [], "about": 8, "see": [], "faq": [], "translat": [], "everyth": 8, "you": [3, 7, 8], "need": [3, 9], "know": [], "effici": [1, 2, 5], "project": [], "packag": [2, 7, 9], "python": [], "doc": [4, 8], "librari": 3, "build": 3, "script": [], "refer": [3, 8], "train": [1, 5, 6, 8], "demo": 2, "small": 5, "app": [], "showcas": [], "capabl": 8, "api": 2, "minim": 2, "templat": [2, 4], "deploi": [], "rest": [6, 9], "ensur": [], "proper": [], "mainten": [], "github": 3, "worklow": [], "run": 3, "job": [], "coverag": [], "codecov": [], "back": [], "result": [4, 8], "As": [], "contributor": [], "onli": [6, 9], "your": [1, 2, 4, 8, 9], "ad": [5, 6], "whether": [1, 4, 6, 9], "encount": [], "problem": [], "suggest": [], "input": [4, 6, 8], "ha": [1, 9], "valu": [4, 6, 8], "can": [1, 3, 7, 8], "purpos": [], "advis": [], "first": [], "check": 8, "topic": [], "wasn": [], "t": [1, 8], "alreadi": [], "cover": [], "close": [], "If": [3, 4, 5, 7], "feel": [], "new": 9, "one": [1, 5, 6, 8], "do": [3, 7], "so": [1, 3], "whenev": [], "possibl": 9, "enough": 8, "jump": [], "wonder": [], "how": [], "someth": [], "more": 8, "gener": 1, "should": [1, 4, 6, 9], "out": [5, 6, 8, 9], "discuss": [], "q": [], "forum": [], "specif": [1, 3, 8, 9], "stackoverflow": [], "addit": [], "depend": [2, 3], "command": [], "m": [8, 9], "pip": 3, "upgrad": [], "dev": [], "pre": 5, "docstr": [], "In": 1, "pleas": [], "googl": [], "eas": [], "process": [2, 4, 8], "later": [], "messag": [], "udac": [], "guid": [], "order": [1, 4], "same": [4, 8, 9], "ci": [], "workflow": [], "unittest": [], "local": [1, 2, 5, 8, 9], "To": [3, 8], "togeth": 4, "current": 8, "built": [], "sphinx": [], "thank": [], "our": [5, 8], "file": 1, "been": [8, 9], "rebuilt": [], "want": 7, "forc": [], "complet": [], "rebuild": [], "delet": [], "_build": [], "directori": [], "addition": [7, 8], "clear": [], "web": 4, "browser": 2, "cach": 1, "modif": [], "now": [], "locat": 4, "index": 4, "wish": 7, "somewher": [], "els": [], "than": [3, 9], "join": [], "slack": [], "where": [4, 6, 8, 9], "find": 3, "requir": [3, 6], "3": [2, 3, 4, 5, 6, 7, 8, 9], "8": [5, 6, 8], "higher": 3, "whichev": 3, "o": 3, "least": 3, "tensorflow": [2, 3, 4, 5, 6, 8], "pytorch": [2, 3, 8], "correspond": [3, 8], "page": [3, 8, 9], "2": [2, 3, 6, 7, 8], "macbook": [], "m1": [], "chip": [], "some": 3, "metal": [], "plugin": [], "1": [1, 5, 6, 7, 8, 9], "12": 8, "anoth": [1, 3, 5], "linux": 3, "few": [3, 7], "extra": 3, "maco": 3, "user": [2, 3, 4], "them": [1, 3, 8], "homebrew": 3, "brew": 3, "cairo": 3, "pango": 3, "gdk": 3, "pixbuf": 3, "libffi": 3, "window": [3, 5, 9], "gtk": 3, "latest": 3, "over": [1, 3, 8, 9], "here": [1, 3, 6, 8], "last": [1, 3], "stabl": 3, "doctr": [3, 7, 8], "strive": 3, "reduc": [3, 6], "framework": [1, 3, 8], "minimum": [3, 9], "necessari": 3, "featur": [3, 5, 9], "develop": 3, "third": 3, "parti": 3, "miss": 3, "tf": [3, 4, 5, 6, 7], "torch": 3, "mode": 3, "clone": 3, "state": [2, 9], "art": 2, "optic": [2, 8], "charact": [1, 2, 4, 8, 9], "made": 2, "seamless": 2, "access": [1, 2, 4, 8], "anyon": 2, "power": 2, "easi": [2, 9], "extract": [1, 2], "valuabl": 2, "autom": 2, "seamlessli": 8, "understand": [1, 2, 8], "task": [1, 2, 8], "ocr": [1, 2, 5, 9], "predictor": [2, 5], "pars": [1, 2], "textual": [1, 2, 4, 8], "identifi": 2, "each": [1, 2, 4, 6, 8, 9], "word": [2, 8, 9], "research": 2, "quickli": 2, "compar": 2, "own": 2, "architectur": [2, 5], "speed": 2, "perform": [2, 4, 6, 7, 8, 9], "robust": 2, "stage": 2, "pretrain": [2, 5, 7, 8, 9], "paramet": [1, 2, 4, 5, 6, 9], "friendli": 2, "line": [2, 8, 9], "code": [2, 4], "load": [2, 7], "googlevis": 2, "aw": [2, 8], "textract": [2, 8], "optim": [2, 7], "infer": [2, 6], "both": [2, 6, 8], "cpu": [2, 8], "gpu": 2, "light": 2, "activ": 2, "maintain": 2, "integr": 2, "deploy": 2, "dbnet": [2, 5], "real": [2, 5, 6], "scene": [2, 5], "differenti": [2, 5], "binar": [2, 5], "linknet": [2, 5], "exploit": [2, 5], "encod": [1, 2, 4, 5], "represent": [2, 5], "semant": [2, 5], "segment": [2, 5, 8], "sar": [2, 5], "show": [2, 4, 5, 9], "attend": [2, 5], "read": [2, 5], "simpl": [2, 5], "strong": [2, 5], "baselin": [2, 5], "irregular": [2, 5], "crnn": [2, 5], "end": [1, 2, 5, 9], "trainabl": [2, 5], "neural": [2, 5], "network": [2, 5], "imag": [1, 2, 4, 5, 6, 8, 9], "base": [2, 5], "sequenc": [1, 2, 4, 5, 8, 9], "Its": [2, 5], "applic": [2, 5], "master": [2, 5, 8], "multi": 2, "aspect": [2, 6], "non": [2, 4, 6, 9], "vitstr": [], "vision": [], "transform": 1, "fast": 1, "parseq": [], "permut": [], "autoregress": [], "funsd": [1, 2, 8], "form": [1, 2, 8], "noisi": [1, 2], "scan": [1, 2], "cord": [1, 2, 8], "consolid": [1, 2], "receipt": [1, 2, 8], "forpost": [1, 2], "sroie": [1, 2], "icdar": 2, "2019": 2, "iiit": [], "5k": [], "cvit": [], "street": [], "view": [], "synthtext": [], "visual": [], "geometri": [4, 8], "group": [], "svhn": [], "digit": 1, "unsupervis": [], "ic03": [], "2003": [], "ic13": [], "2013": [], "imgur5k": [], "textstylebrush": [], "transfer": [], "aesthet": [], "mjsynth": [], "synthet": [], "data": [4, 6, 9], "artifici": [], "iiithw": [], "wildreceipt": [], "spatial": 4, "dual": [], "modal": [], "graph": 4, "kei": [], "bool": [1, 4, 5, 6, 9], "true": [1, 4, 5, 6, 7, 8, 9], "use_polygon": [], "fals": [1, 5, 6, 7, 9], "recognition_task": [], "kwarg": [1, 4, 5, 9], "sourc": [1, 4, 5, 6, 9], "document": [1, 8, 9], "import": [1, 4, 5, 6, 7, 8, 9], "train_set": 1, "download": 1, "img": [1, 6], "target": [1, 4, 6], "subset": [1, 8], "polygon": [1, 8], "rotat": [1, 4, 6], "bound": [1, 4, 6, 8, 9], "box": [1, 4, 6, 8, 9], "instead": [1, 4], "straight": [1, 8], "ones": 1, "recognit": 9, "keyword": [1, 4], "argument": [1, 4], "visiondataset": 1, "icdar2019": 1, "competit": 1, "iiit5k": [], "bmvc": [], "2012": [], "text": [4, 5, 9], "prior": [], "svt": [], "ucsd": [], "comput": [8, 9], "hous": [], "number": [1, 6, 8, 9], "localis": [], "repositori": [], "websit": [], "entri": [], "futur": [], "direct": [], "img_fold": 1, "str": [1, 4, 5, 6, 9], "label_fold": [], "label": [1, 9], "part": 6, "challeng": [], "task2": [], "2015": [], "path": [1, 4, 7], "challenge2_training_task12_imag": [], "challenge2_training_task1_gt": [], "test_set": [], "challenge2_test_task12_imag": [], "challenge2_test_task1_gt": [], "folder": [1, 7], "annot": 4, "abstractdataset": [], "label_path": [], "handwrit": [], "dataset_info": [], "imgur5k_annot": [], "json": 8, "pure": [], "mnt": [], "ramdisk": [], "max": 9, "90kdict32px": [], "imlist": [], "txt": [], "hw": [], "images_90k_norm": [], "90k": [], "docartefact": [], "object": [1, 8, 9], "detect": [], "element": [1, 4, 8, 9], "varieti": [], "arxiv": 5, "ab": [], "2103": [], "14470v1": [], "test": [], "charactergener": 1, "implement": [1, 4, 6, 7, 8, 9], "d": 1, "abdef": 1, "num_sampl": 1, "100": [1, 6, 7, 8, 9], "vocabulari": 1, "sampl": [1, 8], "iter": 1, "cache_sampl": 1, "firsthand": 1, "font_famili": 9, "font": 9, "img_transform": [], "compos": [1, 8], "sample_transform": 1, "wordgener": [], "min_char": [], "int": [1, 4, 6, 9], "max_char": [], "list": [1, 4, 6, 9], "none": [1, 4, 8, 9], "callabl": [1, 6], "tupl": [4, 6, 9], "32": [1, 5, 6, 7, 8], "maximum": [1, 6], "detectiondataset": [], "recognitiondataset": [], "labels_path": [], "contain": 8, "ocrdataset": 1, "label_fil": 1, "jpg": [1, 4], "root": 1, "shuffl": 1, "batch_siz": 1, "drop_last": 1, "num_work": [], "collate_fn": 1, "wrapper": [1, 6], "train_load": 1, "train_it": 1, "next": 1, "befor": [1, 6], "pass": [1, 8], "batch": [1, 5, 6, 8], "drop": 1, "isn": 1, "full": [1, 8, 9], "worker": 1, "function": [6, 9], "merg": [], "sinc": [1, 8], "content": [1, 4, 9], "properli": 1, "model": [1, 9], "interpret": [1, 4], "multipl": [1, 4, 6], "name": [1, 5], "10": [1, 8, 9], "0123456789": 1, "hindi_digit": [], "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "ascii_lett": 1, "52": [1, 8], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "punctuat": 1, "currenc": 1, "5": [1, 6, 9], "ancient_greek": [], "48": 5, "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": [], "arabic_lett": [], "37": 8, "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": [], "persian_lett": [], "\u067e\u0686\u06a2\u06a4\u06af": [], "arabic_diacrit": [], "arabic_punctu": [], "latin": 1, "94": [1, 8], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "english": 1, "legacy_french": 1, "123": 1, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": 1, "french": [1, 8], "126": 1, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": 1, "portugues": 1, "131": 1, "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": 1, "spanish": 1, "116": 1, "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": 1, "italian": [], "120": [], "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": [], "german": 1, "108": 1, "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": 1, "arab": [], "101": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": [], "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "czech": [], "130": [], "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": [], "polish": [], "118": [], "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": [], "dutch": [], "114": [], "norwegian": [], "106": [], "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": [], "danish": [], "finnish": [], "104": [], "\u00e4\u00f6\u00e4\u00f6": [], "swedish": [], "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": [], "vietnames": [], "234": [], "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": [], "hebrew": [], "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": [], "multilingu": [], "195": [], "encode_sequ": 1, "target_s": 1, "eo": 1, "pad": [1, 6], "dynamic_seq_length": 1, "ndarrai": [1, 4, 9], "given": [1, 4, 8, 9], "map": 1, "n": [1, 9], "length": 1, "Of": 1, "string": [1, 4, 9], "option": 1, "start": 1, "case": [1, 8, 9], "upper": [1, 6], "enabl": [1, 4], "dynam": 1, "tensor": [1, 4, 6, 8], "modul": [4, 6, 8, 9], "easili": [4, 7, 8, 9], "export": [4, 7, 8, 9], "analysi": 4, "format": [4, 7, 8], "organ": 4, "uninterrupt": [4, 8], "confid": [4, 8, 9], "float": [4, 6, 7, 9], "associ": 4, "predict": [4, 9], "xmin": 4, "ymin": 4, "xmax": 4, "ymax": 4, "coordin": [4, 8], "rel": [4, 6], "collect": 4, "meant": [4, 7], "two": 4, "column": 4, "horizont": 4, "resolv": 4, "default": [4, 7, 9], "smallest": 4, "enclos": 4, "g": [4, 5], "qr": 4, "pictur": 4, "chart": 4, "signatur": 4, "logo": 4, "etc": 4, "artefact_typ": 4, "type": [4, 8], "sever": [4, 6, 8], "its": [1, 4, 6, 8, 9], "titl": 4, "underneath": 4, "page_idx": [4, 8], "dimens": [4, 8, 9], "dict": [4, 8, 9], "numpi": [4, 5, 8, 9], "arrai": [4, 9], "uint8": [4, 5, 8, 9], "raw": [4, 9], "pixel": [4, 6, 8], "height": 4, "width": 4, "dictionari": [4, 9], "angl": [4, 6], "degress": 4, "preserve_aspect_ratio": 6, "overlai": 4, "displai": [4, 9], "matplotlib": 9, "pyplot": 9, "method": 6, "high": 4, "convers": 4, "read_pdf": 4, "byte": 4, "scale": [5, 6, 9], "rgb_mode": [], "password": [], "pdf": [4, 5], "convert": [4, 6, 7], "render": [], "72dpi": [], "output": [4, 6], "rgb": [4, 6], "bgr": 4, "unlock": [], "encrypt": [], "pypdfium2": [], "pdfpage": [], "decod": 4, "shape": [4, 5, 6, 7, 8, 9], "h": [4, 5, 6], "x": [4, 6, 9], "w": [4, 5, 6, 9], "c": [], "read_img_as_numpi": 4, "output_s": [4, 6], "rgb_output": 4, "expect": [4, 6], "read_img_as_tensor": 4, "img_path": 4, "dtype": [4, 5, 7], "float32": [4, 5, 7], "desir": 4, "relat": 4, "divid": 4, "255": [4, 5, 6, 8, 9], "decode_img_as_tensor": 4, "img_cont": 4, "stream": 4, "read_html": 4, "url": 4, "yoursit": 4, "weasyprint": [], "documentfil": 4, "extens": 4, "classmethod": 4, "from_pdf": 4, "binari": [4, 8], "from_url": 4, "from_imag": 4, "page1": 4, "png": 4, "page2": 4, "vgg16_bn_r": [], "vgg": 5, "16": 5, "describ": 5, "veri": 5, "deep": [5, 8], "convolut": 5, "larg": 5, "modifi": 5, "normal": [5, 6], "rectangular": 5, "pool": 5, "simpler": [], "head": [], "input_tensor": 5, "random": [5, 6, 7, 8, 9], "uniform": [5, 6, 7], "512": 5, "maxval": [5, 6, 7], "imagenet": 5, "extractor": 5, "resnet18": [], "resnet": 5, "18": [], "residu": [], "boolean": 5, "resnet34": [], "34": 8, "resnet50": [], "50": [5, 8], "resnet31": 5, "downsiz": 5, "4": [5, 8], "mobilenet_v3_smal": 5, "mobilenetv3": 5, "search": 5, "kera": [5, 7], "mobilenet_v3_larg": 5, "mobilenet_v3_small_r": 5, "mobilenet_v3_large_r": 5, "mobilenet_v3_small_orient": [], "magc_resnet31": [], "global": [], "context": [], "224": [5, 6, 7], "vit_": [], "visiontransform": [], "worth": [], "16x16": [], "patch": [], "unoffici": [], "config": [], "vit_b": [], "b": 9, "textnet_tini": [], "textnet": [], "faster": [], "arbitrarili": [], "detector": [], "minimalist": [], "kernel": [], "czczup": [], "tini": [], "textnet_smal": [], "textnet_bas": [], "crop_orientation_predictor": [], "arch": 5, "croporientationpredictor": [], "np": [5, 7, 8, 9], "classif_mobilenet_v3_smal": [], "input_crop": [], "rand": [5, 7, 8, 9], "600": [5, 8, 9], "800": [5, 8, 9], "astyp": [5, 7, 8, 9], "crop": [6, 8], "dataset": [5, 8], "linknet_resnet18": [], "1024": [5, 7, 8, 9], "linknet_resnet34": [], "linknet_resnet50": [], "db_resnet50": [5, 7, 8], "backbon": [], "db_mobilenet_v3_larg": [5, 8], "mobilenet": 5, "v3": 5, "detection_predictor": [5, 8], "assume_straight_pag": [], "detectionpredictor": 5, "input_pag": [5, 8, 9], "itself": [], "fit": [], "crnn_vgg16_bn": [5, 8], "128": [5, 8], "crnn_mobilenet_v3_smal": [5, 8], "crnn_mobilenet_v3_larg": [5, 8], "sar_resnet31": [5, 8], "31": [5, 8], "64": [5, 6, 8], "256": 5, "paper": 5, "1910": 5, "02562": 5, "keywoard": [], "vitstr_smal": [], "vitstr_bas": [], "recognition_predictor": [5, 8], "recognitionpredictor": 5, "ocr_predictor": [5, 8], "det_arch": 5, "reco_arch": 5, "pretrained_backbon": [], "symmetric_pad": 6, "export_as_straight_box": [], "detect_orient": [], "straighten_pag": [], "detect_languag": [], "ocrpredictor": 5, "up": 8, "assum": [], "preserv": 6, "ratio": 6, "symmetr": 6, "bottom": 8, "final": 7, "potenti": [], "estim": [], "slightli": [], "deterior": [], "latenc": [], "median": [], "Then": [], "again": 3, "improv": [], "kie_predictor": [], "kiepredictor": [], "kie": [], "login_to_hub": [], "login": [], "huggingfac": [], "hub": [], "from_hub": [], "repo_id": [], "instanti": [], "hf": [], "fasterrcnn_mobilenet_v3_large_fpn": [], "repo": [], "hf_hub_download": [], "snapshot_download": [], "checkpoint": [], "push_to_hf_hub": [], "model_nam": [], "save": [1, 7], "configur": [], "my": [], "procedur": 6, "draw": [6, 9], "design": 6, "torchvis": 6, "resiz": 6, "bilinear": 6, "transfo": 6, "minval": 6, "interpol": 6, "zero": 6, "while": [6, 8], "done": 6, "mean": [6, 9], "std": 6, "gaussian": 6, "distribut": 6, "485": 6, "456": 6, "406": 6, "229": 6, "225": 6, "averag": [6, 8], "per": [6, 8], "standard": 6, "deviat": 6, "lambdatransform": 6, "fn": 6, "lambda": 6, "tograi": 6, "num_output_channel": [], "grayscal": 6, "colorinvers": 6, "min_val": 6, "tranform": 6, "color": [6, 9], "shift": 6, "randomli": 6, "invert": 6, "6": [3, 6, 8], "rang": [6, 7], "randombright": 6, "max_delta": 6, "adjust": 6, "bright": 6, "delta": 6, "offset": 6, "add": [6, 9], "pick": 6, "p": [6, 9], "probabl": 6, "randomcontrast": 6, "contrast": 6, "contrast_factor": 6, "factor": 6, "randomsatur": 6, "satur": 6, "hsv": 6, "increas": 6, "randomhu": 6, "hue": 6, "randomgamma": 6, "min_gamma": 6, "max_gamma": 6, "min_gain": 6, "max_gain": 6, "gamma": 6, "correct": 6, "neg": 6, "lower": [6, 9], "param": [6, 8], "constant": 6, "multipli": 6, "randomjpegqu": 6, "min_qual": 6, "60": 6, "max_qual": 6, "jpeg": 6, "qualiti": 6, "dimension": 6, "between": [6, 9], "randomrot": 6, "max_angl": 6, "expand": 6, "degre": 6, "uniformli": 6, "randomcrop": 6, "08": [6, 8], "75": [6, 8], "33": 6, "min_area": 6, "max_area": 6, "min_ratio": 6, "max_ratio": 6, "gaussianblur": [], "kernel_shap": [], "blur": [], "min": [], "channelshuffl": [], "gaussiannois": [], "nois": [], "randomhorizontalflip": [], "flip": [], "int64": [], "randomshadow": [], "opacity_rang": [], "shade": [], "opac": [], "It": 6, "consecut": [6, 8], "sequenti": [6, 7], "oneof": 6, "jpegqual": 6, "randomappli": 6, "regroup": 9, "core": 9, "complementari": 9, "sens": 9, "visualize_pag": 9, "words_onli": 9, "display_artefact": 9, "add_label": 9, "figur": 9, "block": [8, 9], "plt": 9, "ocr_db_crnn": 9, "artefact": [8, 9], "figsiz": 9, "largest": 9, "side": 9, "plot": 9, "static": 9, "top": [8, 9], "synthesize_pag": 9, "draw_proba": 9, "respons": 9, "blank": 9, "blue": 9, "red": 9, "font_siz": 9, "13": [8, 9], "famili": 9, "synthes": 9, "metric": [8, 9], "assess": 9, "textmatch": 9, "match": [8, 9], "accuraci": 9, "aggreg": [1, 9], "foral": 9, "y": 9, "mathcal": 9, "frac": 9, "sum": 9, "limits_": 9, "f_": 9, "y_i": 9, "x_i": 9, "indic": 9, "defin": 9, "f_a": 9, "left": [8, 9], "begin": 9, "ll": 9, "mbox": 9, "strictli": 9, "integ": [7, 9], "updat": 9, "hello": 9, "world": 9, "summari": 9, "gt": 9, "pred": 9, "groung": 9, "truth": 9, "exact": [8, 9], "score": 9, "counterpart": 9, "unidecod": 9, "localizationconfus": 9, "iou_thresh": 9, "mask_shap": 9, "use_broadcast": [], "confus": 9, "iou": 9, "recal": [8, 9], "g_": 9, "precis": [8, 9], "meaniou": 9, "j": 9, "y_j": 9, "being": [8, 9], "intersect": 9, "union": 9, "g_x": 9, "assign": 9, "_i": 9, "geq": 9, "ground": 9, "asarrai": 9, "70": [8, 9], "110": 9, "95": 9, "200": 9, "150": [8, 9], "pair": 9, "broadcast": [], "consum": [], "memori": [], "either": 8, "ocrmetr": 9, "l": 9, "hat": 9, "h_": 9, "b_j": 9, "l_j": 9, "gt_box": 9, "pred_box": 9, "gt_label": 9, "pred_label": 9, "comparison": [8, 9], "detectionmetr": [], "c_j": [], "compil": 8, "better": 8, "leverag": [], "descript": [], "colab": [], "quicktour": [], "present": [], "main": [], "produc": 8, "searchabl": [], "don": 8, "meet": [], "detail": 8, "link": [], "section": [7, 8], "det_model": [], "load_weight": [], "path_to_checkpoint": [], "weight": [], "reco_model": [], "det_param": [], "path_to_pt": [], "map_loc": [], "load_state_dict": [], "reco_param": [], "vocab": 8, "class_nam": [], "total": [], "date": 8, "preprocessor": 8, "det_predictor": [], "798": [], "785": [], "772": [], "264": [], "2749": [], "287": [], "reco_predictor": [], "694": [], "695": [], "693": [], "299": [], "296": [], "301": [], "polici": [], "restrict": [], "write": [], "outsid": [], "tmp": [], "work": [], "step": [], "usag": [], "multiprocess": [], "doctr_multiprocessing_dis": [], "variabl": [], "becaus": [], "shm": [], "share": 8, "chang": [], "By": [], "doctr_cache_dir": [], "focu": [], "love": [], "appreci": [], "interfac": [], "io": [], "custom": [], "felix92": [], "db": [], "vgg16": [], "bn": [], "plug": [], "obj_detect": [], "exist": [], "overwritten": [], "prerequisit": [], "creat": [], "co": [], "instal": [], "git": [], "lf": [], "my_awesome_model": [], "v1": [], "directli": 8, "after": 8, "python3": [], "train_tensorflow": [], "py": [], "train_pytorch": [], "tabl": [], "pull": [], "dummi": [], "tilman": [], "rassi": [], "fascan": [], "evalu": [1, 8], "predefin": 1, "prefer": 1, "signific": 1, "valid": [], "149": [], "626": [], "360": [], "2000": [], "3000": [], "249": [], "33402": [], "13068": [], "772875": [], "85875": [], "246": [], "233": [], "resourc": 7, "7149": [], "796": [], "handwritten": [], "1268": [], "472": [], "21888": [], "8707": [], "33608": [], "19342": [], "uppercas": [], "19370": [], "2186": [], "257": [], "647": [], "73257": [], "26032": [], "7100000": [], "707470": [], "1156": [], "1107": [], "849": [], "1095": [], "207901": [], "22672": [], "7581382": [], "1337891": [], "7141797": [], "793533": [], "49377": [], "19598": [], "alwai": [], "regular": [], "2700": [], "300": [], "background": [], "qr_code": [], "bar_cod": [], "photo": [], "classif": [], "mani": 8, "sensit": 8, "abl": 8, "howev": [], "guidanc": [], "tool": [], "further": [], "anot": [], "handl": 1, "underli": 1, "defer": 1, "dataload": 1, "good": 7, "achiev": 7, "might": [7, 8], "tune": 7, "thing": [7, 8], "product": 7, "readi": 7, "help": 7, "support": 8, "devic": [], "fp16": [], "point": [], "occupi": [], "bit": [], "advantag": [], "less": [], "mixed_precis": [], "set_global_polici": [], "mixed_float16": [], "cuda": [], "re": [], "exchang": [], "interoper": [], "machin": [], "structur": 8, "layer": [], "metadata": [], "util": 7, "export_model_to_onnx": [], "input_shap": 7, "dummy_input": [], "tensorspec": [], "model_path": [], "come": [], "soon": [], "seen": 8, "onc": 8, "separ": 8, "compon": 8, "charg": 8, "usabl": 8, "backend": 8, "along": 8, "processor": 8, "reusabl": 8, "consist": 8, "delimit": 8, "2d": 8, "corner": 8, "flag": 8, "belong": 8, "skew": [], "comprehens": 8, "benchmark": 8, "publicli": 8, "sec": [], "25": 8, "84": 8, "39": 8, "85": 8, "86": 8, "93": 8, "83": 8, "24": [], "80": 8, "29": 8, "90": 8, "67": 8, "76": 8, "11": 8, "81": 8, "71": 8, "7": 8, "21": 8, "82": 8, "20": 8, "49": 8, "87": 8, "63": 8, "17": [], "28": [], "51": 8, "46": 8, "db_resnet34": [], "22": [], "89": 8, "74": 8, "56": 8, "68": 8, "92": 8, "61": 8, "41": 8, "00": 8, "79": 8, "38": [], "88": [], "62": 8, "26": [], "06": 8, "78": 8, "47": 8, "54": [], "abov": 8, "cf": 8, "disclaim": 8, "combin": 8, "199": 8, "second": 8, "warmup": 8, "phase": 8, "measur": 8, "1000": 8, "obtain": 8, "11th": [], "gen": [], "intel": [], "r": [], "tm": [], "i7": [], "11800h": [], "30ghz": [], "wrap": 8, "useabl": 8, "favorit": 8, "dummy_img": 8, "area": [], "send": [], "snippet": [], "transcrib": 8, "partial": [], "15": 8, "9": [], "73": 8, "44": [], "14": 8, "55": [], "58": [], "57": 8, "66": 8, "01": [], "98": 8, "23": [], "69": 8, "99": 8, "91": 8, "05": [], "09": [], "96": 8, "40": [], "53": 8, "most": 8, "print": 8, "cfg": 8, "30595": 8, "45": [], "72": 8, "43": 8, "65": 8, "77": 8, "30": 8, "07": [], "27": 8, "gvision": 8, "59": 8, "03": 8, "azur": [], "recogn": [], "42": 8, "go": [], "mention": [], "still": [], "return": [1, 4, 5, 8, 9], "nest": 8, "get": [4, 8], "typic": 8, "layout": 8, "340": 8, "json_output": 8, "1357421875": 8, "0361328125": 8, "8564453125": 8, "8603515625": 8, "914085328578949": 8, "5478515625": 8, "06640625": 8, "5810546875": 8, "0966796875": 8, "9949972033500671": 8, "51171875": 8, "1630859375": 8, "9578408598899841": 8, "1396484375": 8, "3232421875": 8, "185546875": 8, "3515625": 8, "outpout": [], "xml": [], "hocr": [], "export_as_xml": [], "xml_output": [], "xml_bytes_str": [], "xml_element": [], "utf": [], "xmln": [], "w3": [], "1999": [], "xhtml": [], "lang": [], "en": [], "meta": [], "equiv": [], "charset": [], "system": [], "ocr_pag": [], "ocr_carea": [], "ocr_par": [], "ocr_lin": [], "ocrx_word": [], "div": [], "id": 8, "page_1": [], "bbox": [], "3456": [], "ppageno": [], "block_1_1": [], "857": [], "529": [], "2504": [], "2710": [], "par_1_1": [], "span": [], "line_1_1": [], "x_size": [], "x_descend": [], "x_ascend": [], "word_1_1": [], "1552": [], "540": [], "1778": [], "580": [], "x_wconf": [], "word_1_2": [], "1782": [], "1900": [], "583": [], "word_1_3": [], "1420": [], "597": [], "1684": [], "641": [], "threshold": [], "region": [], "accur": [], "postprocessor": [], "bin_thresh": [], "box_thresh": [], "hook": [], "manipul": [], "customhook": [], "def": 7, "__call__": [], "self": [], "loc_pr": [], "Be": [], "awar": [], "my_hook": [], "middl": [], "pipelin": [], "add_hook": [], "execut": [], "file_path": [], "read_img": 4, "seemlessli": 2, "conda": [], "newer": [], "developp": 3, "fp": 8, "scheme": [], "deform": [], "statist": [], "turn": [], "easier": [], "let": [], "db_resnet50_predictor": [], "sar_vgg16_bn": [], "rnn": [], "enhanc": [], "symbol": [], "crnn_vgg16_bn_predictor": [], "sar_vgg16_bn_predictor": [], "16bn": [], "convert_to_tflit": [], "tf_model": 7, "tflite": 7, "conv_sequ": 7, "relu": 7, "kernel_s": 7, "serialized_model": 7, "convert_to_fp16": [], "half": [], "serial": 7, "quantize_model": [], "quantiz": [], "exclud": [], "inherit": 7, "abstract": [], "verifi": [], "file_nam": [], "file_hash": [], "extract_arch": [], "overwrit": [], "sha256": [], "archiv": [], "disk": [], "775": [], "856": [], "860": [], "862": [], "863": [], "sar_resnet31_predictor": [], "ocr_db_crnn_vgg": [], "652": [], "721": [], "ocr_db_sar_vgg": [], "653": [], "ocr_db_sar_resnet": [], "665": [], "735": [], "595": [], "625": [], "781": [], "830": [], "exactmatch": [], "ignore_cas": [], "ignore_acc": [], "ignor": [], "letter": [], "accent": [], "error": [], "max_dist": [], "levenshtein": [], "distanc": [], "autoclass": [], "loader": 1, "154": [], "as_imag": 4, "convert_page_to_numpi": 4, "get_word": 4, "fitz": 4, "gettextword": 4, "get_artefact": 4, "entir": 4, "fulli": [], "daili": [], "mix": [], "fine": [], "scratch": [], "special": [], "recurr": [], "733": [], "817": [], "745": [], "875": [], "frame": 8, "feed": [], "warm": [], "c5": 8, "x12larg": 8, "xeon": 8, "platinum": 8, "8275l": 8, "913": [], "917": [], "921": [], "crnn_resnet31": [], "629": [], "701": [], "664": [], "780": [], "630": [], "702": [], "666": [], "783": [], "640": [], "713": [], "672": [], "789": [], "na": [], "753": [], "700": [], "533": [], "689": [], "611": [], "660": [], "db_sar_vgg": 5, "db_sar_resnet": 5, "db_crnn_vgg": 5, "db_crnn_resnet": 5, "properti": 7, "input_t": 7, "saved_model": 7, "And": 7, "nestedobject": [], "changelog": [], "v0": [], "2021": [], "8m": 8, "02": 8, "5m": 8, "1m": 8, "19": [], "invoic": 8, "flexibl": 9, "rotated_bbox": [1, 9], "beta": [], "linknet16": [5, 8], "160": 5, "arg": 1, "bash": [], "tax": 8, "35": 8, "vgg16_bn": 5, "mobilenetv3_larg": 5, "mobilenetv3_smal": 5, "constraint": 7, "tfliteconvert": 7, "from_keras_model": 7, "target_spec": 7, "supported_typ": 7, "float16": 7, "fallback": 7, "oper": 7, "representative_dataset": 7, "yield": 7, "supported_op": 7, "opsset": 7, "tflite_builtins_int8": 7, "inference_input_typ": 7, "int8": 7, "inference_output_typ": 7, "2m": 8, "7m": 8, "look": 8, "variou": 8, "below": 8, "unfortun": 8, "moment": 8, "04": 8, "36": 8, "97": 8}, "objects": {"doctr.datasets": [[1, 0, 1, "", "CORD"], [1, 0, 1, "", "CharacterGenerator"], [1, 0, 1, "", "FUNSD"], [1, 0, 1, "", "OCRDataset"], [1, 0, 1, "", "SROIE"], [1, 1, 1, "", "encode_sequences"]], "doctr.datasets.loader": [[1, 0, 1, "", "DataLoader"]], "doctr.io": [[4, 0, 1, "", "Artefact"], [4, 0, 1, "", "Block"], [4, 0, 1, "", "Document"], [4, 0, 1, "", "DocumentFile"], [4, 0, 1, "", "Line"], [4, 0, 1, "", "PDF"], [4, 0, 1, "", "Page"], [4, 0, 1, "", "Word"], [4, 1, 1, "", "decode_img_as_tensor"], [4, 1, 1, "", "read_html"], [4, 1, 1, "", "read_img_as_numpy"], [4, 1, 1, "", "read_img_as_tensor"], [4, 1, 1, "", "read_pdf"]], "doctr.io.Document": [[4, 2, 1, "", "show"]], "doctr.io.DocumentFile": [[4, 2, 1, "", "from_images"], [4, 2, 1, "", "from_pdf"], [4, 2, 1, "", "from_url"]], "doctr.io.PDF": [[4, 2, 1, "", "as_images"], [4, 2, 1, "", "get_artefacts"], [4, 2, 1, "", "get_words"]], "doctr.io.Page": [[4, 2, 1, "", "show"]], "doctr.models.backbones": [[5, 1, 1, "", "mobilenet_v3_large"], [5, 1, 1, "", "mobilenet_v3_large_r"], [5, 1, 1, "", "mobilenet_v3_small"], [5, 1, 1, "", "mobilenet_v3_small_r"], [5, 1, 1, "", "resnet31"], [5, 1, 1, "", "vgg16_bn"]], "doctr.models.detection": [[5, 1, 1, "", "db_mobilenet_v3_large"], [5, 1, 1, "", "db_resnet50"], [5, 1, 1, "", "detection_predictor"], [5, 1, 1, "", "linknet16"]], "doctr.models": [[5, 1, 1, "", "ocr_predictor"]], "doctr.models.recognition": [[5, 1, 1, "", "crnn_mobilenet_v3_large"], [5, 1, 1, "", "crnn_mobilenet_v3_small"], [5, 1, 1, "", "crnn_vgg16_bn"], [5, 1, 1, "", "master"], [5, 1, 1, "", "recognition_predictor"], [5, 1, 1, "", "sar_resnet31"]], "doctr.transforms": [[6, 0, 1, "", "ColorInversion"], [6, 0, 1, "", "Compose"], [6, 0, 1, "", "LambdaTransformation"], [6, 0, 1, "", "Normalize"], [6, 0, 1, "", "OneOf"], [6, 0, 1, "", "RandomApply"], [6, 0, 1, "", "RandomBrightness"], [6, 0, 1, "", "RandomContrast"], [6, 0, 1, "", "RandomCrop"], [6, 0, 1, "", "RandomGamma"], [6, 0, 1, "", "RandomHue"], [6, 0, 1, "", "RandomJpegQuality"], [6, 0, 1, "", "RandomRotate"], [6, 0, 1, "", "RandomSaturation"], [6, 0, 1, "", "Resize"], [6, 0, 1, "", "ToGray"]], "doctr.utils.metrics": [[9, 0, 1, "", "LocalizationConfusion"], [9, 0, 1, "", "OCRMetric"], [9, 0, 1, "", "TextMatch"]], "doctr.utils.metrics.LocalizationConfusion": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.OCRMetric": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.TextMatch": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.visualization": [[9, 1, 1, "", "synthesize_page"], [9, 1, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:class", "1": "py:function", "2": "py:method"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"], "2": ["py", "method", "Python method"]}, "titleterms": {"changelog": 0, "v0": 0, "7": [], "0": 0, "2024": [], "09": [], "6": [], "2022": [], "29": [], "5": [], "1": 0, "03": 0, "22": [], "2021": 0, "12": [], "31": [], "4": [], "11": 0, "10": [], "01": [], "3": 0, "08": 0, "27": 0, "07": 0, "02": 0, "2": 0, "05": 0, "28": 0, "18": 0, "contributor": [], "coven": [], "code": [], "conduct": [], "our": [], "pledg": [], "standard": [], "enforc": [], "respons": [], "scope": [], "guidelin": [], "correct": [], "warn": [], "temporari": [], "ban": [], "perman": [], "attribut": [], "contribut": [], "doctr": [1, 2, 4, 5, 6, 9], "codebas": [], "structur": 4, "continu": [], "integr": [], "feedback": [], "featur": 2, "request": [], "bug": [], "report": [], "question": [], "develop": [], "mode": [], "instal": 3, "commit": [], "unit": [], "test": [], "qualiti": [], "style": [], "verif": [], "modifi": [], "document": [2, 4], "let": [], "": [], "connect": [], "prerequisit": 3, "via": 3, "python": 3, "packag": 3, "git": 3, "text": [2, 8], "recognit": [2, 5, 8], "main": 2, "model": [2, 5, 7, 8], "zoo": [2, 5, 8], "detect": [2, 5, 8], "support": [1, 2, 6], "dataset": [1, 2], "arg": [], "synthet": [], "gener": [], "custom": [], "loader": [], "dataload": [], "vocab": 1, "return": [], "io": 4, "word": 4, "line": 4, "artefact": 4, "block": 4, "page": 4, "file": 4, "read": 4, "classif": [], "factori": [], "transform": 6, "compos": 6, "util": 9, "visual": 9, "task": 9, "evalu": 9, "notebook": [], "train": 7, "your": 7, "own": [], "load": 1, "aw": [], "lambda": [], "share": [], "commun": [], "from": [], "huggingfac": [], "hub": [], "push": [], "pretrain": [], "name": [], "convent": [], "choos": 8, "readi": [], "us": 7, "avail": [1, 8], "object": [], "data": 1, "prepar": 7, "infer": 7, "optim": [], "half": 7, "precis": 7, "export": [], "onnx": [], "right": 8, "architectur": 8, "predictor": 8, "end": 8, "ocr": 8, "two": 8, "stage": 8, "approach": 8, "what": 8, "should": 8, "i": 8, "do": 8, "output": 8, "advanc": [], "option": [], "get": [], "start": [], "conda": [], "pre": [], "process": [], "post": 7, "build": [], "implement": [], "content": [], "compress": 7, "savedmodel": 7, "note": [], "refer": [], "backbon": 5, "tensorflow": 7, "lite": 7, "quantiz": 7}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"Changelog": [[0, "changelog"]], "v0.3.1 (2021-08-27)": [[0, "v0-3-1-2021-08-27"]], "v0.3.0 (2021-07-02)": [[0, "v0-3-0-2021-07-02"]], "v0.2.1 (2021-05-28)": [[0, "v0-2-1-2021-05-28"]], "v0.2.0 (2021-05-11)": [[0, "v0-2-0-2021-05-11"]], "v0.1.1 (2021-03-18)": [[0, "v0-1-1-2021-03-18"]], "v0.1.0 (2021-03-05)": [[0, "v0-1-0-2021-03-05"]], "doctr.datasets": [[1, "doctr-datasets"]], "Available Datasets": [[1, "available-datasets"]], "Data Loading": [[1, "data-loading"]], "Supported Vocabs": [[1, "supported-vocabs"]], "DocTR Vocabs": [[1, "id1"]], "DocTR: Document Text Recognition": [[2, "doctr-document-text-recognition"]], "Main Features": [[2, "main-features"]], "Model zoo": [[2, "model-zoo"]], "Text detection models": [[2, "text-detection-models"]], "Text recognition models": [[2, "text-recognition-models"]], "Supported datasets": [[2, "supported-datasets"]], "Installation": [[3, "installation"]], "Prerequisites": [[3, "prerequisites"]], "Via Python Package": [[3, "via-python-package"]], "Via Git": [[3, "via-git"]], "doctr.io": [[4, "doctr-io"]], "Document structure": [[4, "document-structure"]], "Word": [[4, "word"]], "Line": [[4, "line"]], "Artefact": [[4, "artefact"]], "Block": [[4, "block"]], "Page": [[4, "page"]], "Document": [[4, "document"]], "File reading": [[4, "file-reading"]], "doctr.models": [[5, "doctr-models"]], "doctr.models.backbones": [[5, "doctr-models-backbones"]], "doctr.models.detection": [[5, "doctr-models-detection"]], "doctr.models.recognition": [[5, "doctr-models-recognition"]], "doctr.models.zoo": [[5, "doctr-models-zoo"]], "doctr.transforms": [[6, "doctr-transforms"]], "Supported transformations": [[6, "supported-transformations"]], "Composing transformations": [[6, "composing-transformations"]], "Preparing your model for inference": [[7, "preparing-your-model-for-inference"]], "Model compression": [[7, "model-compression"]], "TensorFlow Lite": [[7, "tensorflow-lite"]], "Half-precision": [[7, "half-precision"]], "Post-training quantization": [[7, "post-training-quantization"]], "Using SavedModel": [[7, "using-savedmodel"]], "Choosing the right model": [[8, "choosing-the-right-model"]], "Text Detection": [[8, "text-detection"]], "Available architectures": [[8, "available-architectures"], [8, "id1"], [8, "id3"]], "Detection predictors": [[8, "detection-predictors"]], "Text Recognition": [[8, "text-recognition"]], "Text recognition model zoo": [[8, "id5"]], "Recognition predictors": [[8, "recognition-predictors"]], "End-to-End OCR": [[8, "end-to-end-ocr"]], "Two-stage approaches": [[8, "two-stage-approaches"]], "What should I do with the output?": [[8, "what-should-i-do-with-the-output"]], "doctr.utils": [[9, "doctr-utils"]], "Visualization": [[9, "visualization"]], "Task evaluation": [[9, "task-evaluation"]]}, "indexentries": {"cord (class in doctr.datasets)": [[1, "doctr.datasets.CORD"]], "charactergenerator (class in doctr.datasets)": [[1, "doctr.datasets.CharacterGenerator"]], "dataloader (class in doctr.datasets.loader)": [[1, "doctr.datasets.loader.DataLoader"]], "funsd (class in doctr.datasets)": [[1, "doctr.datasets.FUNSD"]], "ocrdataset (class in doctr.datasets)": [[1, "doctr.datasets.OCRDataset"]], "sroie (class in doctr.datasets)": [[1, "doctr.datasets.SROIE"]], "encode_sequences() (in module doctr.datasets)": [[1, "doctr.datasets.encode_sequences"]], "artefact (class in doctr.io)": [[4, "doctr.io.Artefact"]], "block (class in doctr.io)": [[4, "doctr.io.Block"]], "document (class in doctr.io)": [[4, "doctr.io.Document"]], "documentfile (class in doctr.io)": [[4, "doctr.io.DocumentFile"]], "line (class in doctr.io)": [[4, "doctr.io.Line"]], "pdf (class in doctr.io)": [[4, "doctr.io.PDF"]], "page (class in doctr.io)": [[4, "doctr.io.Page"]], "word (class in doctr.io)": [[4, "doctr.io.Word"]], "as_images() (doctr.io.pdf method)": [[4, "doctr.io.PDF.as_images"]], "decode_img_as_tensor() (in module doctr.io)": [[4, "doctr.io.decode_img_as_tensor"]], "from_images() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_images"]], "from_pdf() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_pdf"]], "from_url() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_url"]], "get_artefacts() (doctr.io.pdf method)": [[4, "doctr.io.PDF.get_artefacts"]], "get_words() (doctr.io.pdf method)": [[4, "doctr.io.PDF.get_words"]], "read_html() (in module doctr.io)": [[4, "doctr.io.read_html"]], "read_img_as_numpy() (in module doctr.io)": [[4, "doctr.io.read_img_as_numpy"]], "read_img_as_tensor() (in module doctr.io)": [[4, "doctr.io.read_img_as_tensor"]], "read_pdf() (in module doctr.io)": [[4, "doctr.io.read_pdf"]], "show() (doctr.io.document method)": [[4, "doctr.io.Document.show"]], "show() (doctr.io.page method)": [[4, "doctr.io.Page.show"]], "crnn_mobilenet_v3_large() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_mobilenet_v3_large"]], "crnn_mobilenet_v3_small() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_mobilenet_v3_small"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_vgg16_bn"]], "db_mobilenet_v3_large() (in module doctr.models.detection)": [[5, "doctr.models.detection.db_mobilenet_v3_large"]], "db_resnet50() (in module doctr.models.detection)": [[5, "doctr.models.detection.db_resnet50"]], "detection_predictor() (in module doctr.models.detection)": [[5, "doctr.models.detection.detection_predictor"]], "linknet16() (in module doctr.models.detection)": [[5, "doctr.models.detection.linknet16"]], "master() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.master"]], "mobilenet_v3_large() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.mobilenet_v3_large"]], "mobilenet_v3_large_r() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.mobilenet_v3_large_r"]], "mobilenet_v3_small() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.mobilenet_v3_small"]], "mobilenet_v3_small_r() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.mobilenet_v3_small_r"]], "ocr_predictor() (in module doctr.models)": [[5, "doctr.models.ocr_predictor"]], "recognition_predictor() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.recognition_predictor"]], "resnet31() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.resnet31"]], "sar_resnet31() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.sar_resnet31"]], "vgg16_bn() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.vgg16_bn"]], "colorinversion (class in doctr.transforms)": [[6, "doctr.transforms.ColorInversion"]], "compose (class in doctr.transforms)": [[6, "doctr.transforms.Compose"]], "lambdatransformation (class in doctr.transforms)": [[6, "doctr.transforms.LambdaTransformation"]], "normalize (class in doctr.transforms)": [[6, "doctr.transforms.Normalize"]], "oneof (class in doctr.transforms)": [[6, "doctr.transforms.OneOf"]], "randomapply (class in doctr.transforms)": [[6, "doctr.transforms.RandomApply"]], "randombrightness (class in doctr.transforms)": [[6, "doctr.transforms.RandomBrightness"]], "randomcontrast (class in doctr.transforms)": [[6, "doctr.transforms.RandomContrast"]], "randomcrop (class in doctr.transforms)": [[6, "doctr.transforms.RandomCrop"]], "randomgamma (class in doctr.transforms)": [[6, "doctr.transforms.RandomGamma"]], "randomhue (class in doctr.transforms)": [[6, "doctr.transforms.RandomHue"]], "randomjpegquality (class in doctr.transforms)": [[6, "doctr.transforms.RandomJpegQuality"]], "randomrotate (class in doctr.transforms)": [[6, "doctr.transforms.RandomRotate"]], "randomsaturation (class in doctr.transforms)": [[6, "doctr.transforms.RandomSaturation"]], "resize (class in doctr.transforms)": [[6, "doctr.transforms.Resize"]], "togray (class in doctr.transforms)": [[6, "doctr.transforms.ToGray"]], "localizationconfusion (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.OCRMetric"]], "textmatch (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.TextMatch"]], "summary() (doctr.utils.metrics.localizationconfusion method)": [[9, "doctr.utils.metrics.LocalizationConfusion.summary"]], "summary() (doctr.utils.metrics.ocrmetric method)": [[9, "doctr.utils.metrics.OCRMetric.summary"]], "summary() (doctr.utils.metrics.textmatch method)": [[9, "doctr.utils.metrics.TextMatch.summary"]], "synthesize_page() (in module doctr.utils.visualization)": [[9, "doctr.utils.visualization.synthesize_page"]], "update() (doctr.utils.metrics.localizationconfusion method)": [[9, "doctr.utils.metrics.LocalizationConfusion.update"]], "update() (doctr.utils.metrics.ocrmetric method)": [[9, "doctr.utils.metrics.OCRMetric.update"]], "update() (doctr.utils.metrics.textmatch method)": [[9, "doctr.utils.metrics.TextMatch.update"]], "visualize_page() (in module doctr.utils.visualization)": [[9, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["changelog", "datasets", "index", "installing", "io", "models", "transforms", "using_model_export", "using_models", "utils"], "filenames": ["changelog.rst", "datasets.rst", "index.rst", "installing.rst", "io.rst", "models.rst", "transforms.rst", "using_model_export.rst", "using_models.rst", "utils.rst"], "titles": ["Changelog", "doctr.datasets", "DocTR: Document Text Recognition", "Installation", "doctr.io", "doctr.models", "doctr.transforms", "Preparing your model for inference", "Choosing the right model", "doctr.utils"], "terms": {"releas": [0, 3], "note": 0, "we": [2, 3, 4, 6, 8], "member": [], "leader": [], "make": [7, 8, 9], "particip": [], "commun": [], "harass": [], "free": [], "experi": [], "everyon": [], "regardless": [], "ag": [], "bodi": [], "size": [1, 4, 6, 8, 9], "visibl": [], "invis": [], "disabl": [], "ethnic": [], "sex": [], "characterist": [], "gender": [], "ident": [], "express": 6, "level": [8, 9], "educ": [], "socio": [], "econom": [], "statu": [], "nation": [], "person": [], "appear": [], "race": [], "religion": [], "sexual": [], "orient": [4, 8], "act": [], "interact": [4, 9], "wai": [1, 2], "contribut": [], "an": [1, 2, 4, 5, 7, 9], "open": [], "welcom": [], "divers": [], "inclus": [], "healthi": [], "exampl": [1, 4, 5, 6, 9], "behavior": [], "posit": 9, "environ": [], "includ": 3, "demonstr": [], "empathi": [], "kind": [], "toward": 3, "other": [], "peopl": [], "Being": [], "respect": [], "differ": [], "opinion": [], "viewpoint": [], "give": [], "gracefulli": [], "accept": [], "construct": [], "feedback": [], "apolog": [], "those": [3, 4, 8], "affect": [], "mistak": [], "learn": 8, "from": [1, 2, 4, 5, 6, 7, 8, 9], "focus": [], "what": [], "i": [1, 4, 5, 6, 7, 9], "best": [], "just": 7, "u": 8, "individu": [], "overal": [], "unaccept": [], "The": [4, 8, 9], "us": [1, 3, 5, 8, 9], "languag": [2, 4, 8], "imageri": [], "attent": [], "advanc": [], "ani": [1, 4, 5, 6, 7, 8, 9], "troll": [], "insult": [], "derogatori": [], "comment": [], "polit": [], "attack": [], "public": 2, "privat": 8, "publish": [], "inform": [1, 2, 8], "physic": 4, "email": [], "address": 4, "without": [], "explicit": [], "permiss": [], "which": 8, "could": [], "reason": [], "consid": [1, 4, 8, 9], "inappropri": [], "profession": [], "set": [1, 8, 9], "ar": [1, 3, 4, 6, 8, 9], "clarifi": [], "take": [1, 7, 8], "appropri": 8, "fair": [], "action": [], "thei": 8, "deem": [], "threaten": [], "offens": [], "harm": [], "have": [1, 7, 8, 9], "right": 9, "remov": [], "edit": [], "reject": [], "commit": [], "wiki": [], "issu": [], "align": 4, "thi": [3, 7, 8, 9], "moder": [], "decis": [], "when": [], "appli": [1, 6], "within": [], "all": [1, 4, 6, 8, 9], "space": [], "also": 8, "offici": [], "repres": [4, 8, 9], "e": [3, 4, 5], "mail": [], "post": 8, "via": [], "social": [], "media": [], "account": 7, "appoint": [], "onlin": [], "offlin": [], "event": [], "instanc": 8, "abus": [], "otherwis": 9, "mai": 8, "report": [], "contact": [], "minde": [2, 3], "com": [3, 4], "complaint": [], "review": [], "investig": [], "promptli": [], "fairli": [], "oblig": [], "privaci": [], "secur": [], "incid": [], "follow": [1, 3, 6, 7, 8, 9], "impact": [], "determin": [], "consequ": [], "violat": [], "unprofession": [], "unwelcom": [], "A": [1, 2, 4, 5, 7], "written": 4, "provid": [2, 7, 8], "clariti": [], "around": [], "natur": 2, "explan": 8, "why": [], "wa": [], "apologi": [], "request": [], "through": [1, 6], "singl": [], "seri": [], "continu": [], "No": 8, "involv": 8, "unsolicit": [], "specifi": [1, 4], "period": [], "time": [1, 2, 5, 9], "avoid": 3, "well": 7, "extern": [], "channel": [4, 6], "like": [], "term": [], "lead": [], "seriou": [], "sustain": [], "sort": [], "allow": [], "dure": [], "pattern": [], "aggress": [], "disparag": [], "class": [1, 4, 6, 9], "adapt": [], "version": 7, "0": [1, 6, 8, 9], "avail": [2, 6], "http": [3, 4, 5], "www": 4, "org": 5, "_": [1, 5, 7], "html": [], "were": [4, 8], "inspir": 6, "mozilla": [], "": [4, 9], "ladder": [], "For": [3, 8], "answer": [], "common": [6, 9], "question": [], "about": 8, "see": [], "faq": [], "translat": [], "everyth": 8, "you": [3, 7, 8], "need": [3, 9], "know": [], "effici": [1, 2, 5], "project": [], "packag": [2, 7, 9], "python": [], "doc": [4, 8], "librari": 3, "build": 3, "script": [], "refer": [3, 8], "train": [1, 5, 6, 8], "demo": 2, "small": 5, "app": [], "showcas": [], "capabl": 8, "api": 2, "minim": 2, "templat": [2, 4], "deploi": [], "rest": [6, 9], "ensur": [], "proper": [], "mainten": [], "github": 3, "worklow": [], "run": 3, "job": [], "coverag": [], "codecov": [], "back": [], "result": [4, 8], "As": [], "contributor": [], "onli": [6, 9], "your": [1, 2, 4, 8, 9], "ad": [5, 6], "whether": [1, 4, 6, 9], "encount": [], "problem": [], "suggest": [], "input": [4, 6, 8], "ha": [1, 9], "valu": [4, 6, 8], "can": [1, 3, 7, 8], "purpos": [], "advis": [], "first": [], "check": 8, "topic": [], "wasn": [], "t": [1, 8], "alreadi": [], "cover": [], "close": [], "If": [3, 4, 5, 7], "feel": [], "new": 9, "one": [1, 5, 6, 8], "do": [3, 7], "so": [1, 3], "whenev": [], "possibl": 9, "enough": 8, "jump": [], "wonder": [], "how": [], "someth": [], "more": 8, "gener": 1, "should": [1, 4, 6, 9], "out": [5, 6, 8, 9], "discuss": [], "q": [], "forum": [], "specif": [1, 3, 8, 9], "stackoverflow": [], "addit": [], "depend": [2, 3], "command": [], "m": [8, 9], "pip": 3, "upgrad": [], "dev": [], "pre": 5, "docstr": [], "In": 1, "pleas": [], "googl": [], "eas": [], "process": [2, 4, 8], "later": [], "messag": [], "udac": [], "guid": [], "order": [1, 4], "same": [4, 8, 9], "ci": [], "workflow": [], "unittest": [], "local": [1, 2, 5, 8, 9], "To": [3, 8], "togeth": 4, "current": 8, "built": [], "sphinx": [], "thank": [], "our": [5, 8], "file": 1, "been": [8, 9], "rebuilt": [], "want": 7, "forc": [], "complet": [], "rebuild": [], "delet": [], "_build": [], "directori": [], "addition": [7, 8], "clear": [], "web": 4, "browser": 2, "cach": 1, "modif": [], "now": [], "locat": 4, "index": 4, "wish": 7, "somewher": [], "els": [], "than": [3, 9], "join": [], "slack": [], "where": [4, 6, 8, 9], "find": 3, "requir": [3, 6], "3": [2, 3, 4, 5, 6, 7, 8, 9], "8": [5, 6, 8], "higher": 3, "whichev": 3, "o": 3, "least": 3, "tensorflow": [2, 3, 4, 5, 6, 8], "pytorch": [2, 3, 8], "correspond": [3, 8], "page": [3, 8, 9], "2": [2, 3, 6, 7, 8], "macbook": [], "m1": [], "chip": [], "some": 3, "metal": [], "plugin": [], "1": [1, 5, 6, 7, 8, 9], "12": 8, "anoth": [1, 3, 5], "linux": 3, "few": [3, 7], "extra": 3, "maco": 3, "user": [2, 3, 4], "them": [1, 3, 8], "homebrew": 3, "brew": 3, "cairo": 3, "pango": 3, "gdk": 3, "pixbuf": 3, "libffi": 3, "window": [3, 5, 9], "gtk": 3, "latest": 3, "over": [1, 3, 8, 9], "here": [1, 3, 6, 8], "last": [1, 3], "stabl": 3, "doctr": [3, 7, 8], "strive": 3, "reduc": [3, 6], "framework": [1, 3, 8], "minimum": [3, 9], "necessari": 3, "featur": [3, 5, 9], "develop": 3, "third": 3, "parti": 3, "miss": 3, "tf": [3, 4, 5, 6, 7], "torch": 3, "mode": 3, "clone": 3, "state": [2, 9], "art": 2, "optic": [2, 8], "charact": [1, 2, 4, 8, 9], "made": 2, "seamless": 2, "access": [1, 2, 4, 8], "anyon": 2, "power": 2, "easi": [2, 9], "extract": [1, 2], "valuabl": 2, "autom": 2, "seamlessli": 8, "understand": [1, 2, 8], "task": [1, 2, 8], "ocr": [1, 2, 5, 9], "predictor": [2, 5], "pars": [1, 2], "textual": [1, 2, 4, 8], "identifi": 2, "each": [1, 2, 4, 6, 8, 9], "word": [2, 8, 9], "research": 2, "quickli": 2, "compar": 2, "own": 2, "architectur": [2, 5], "speed": 2, "perform": [2, 4, 6, 7, 8, 9], "robust": 2, "stage": 2, "pretrain": [2, 5, 7, 8, 9], "paramet": [1, 2, 4, 5, 6, 9], "friendli": 2, "line": [2, 8, 9], "code": [2, 4], "load": [2, 7], "googlevis": 2, "aw": [2, 8], "textract": [2, 8], "optim": [2, 7], "infer": [2, 6], "both": [2, 6, 8], "cpu": [2, 8], "gpu": 2, "light": 2, "activ": 2, "maintain": 2, "integr": 2, "deploy": 2, "dbnet": [2, 5], "real": [2, 5, 6], "scene": [2, 5], "differenti": [2, 5], "binar": [2, 5], "linknet": [2, 5], "exploit": [2, 5], "encod": [1, 2, 4, 5], "represent": [2, 5], "semant": [2, 5], "segment": [2, 5, 8], "sar": [2, 5], "show": [2, 4, 5, 9], "attend": [2, 5], "read": [2, 5], "simpl": [2, 5], "strong": [2, 5], "baselin": [2, 5], "irregular": [2, 5], "crnn": [2, 5], "end": [1, 2, 5, 9], "trainabl": [2, 5], "neural": [2, 5], "network": [2, 5], "imag": [1, 2, 4, 5, 6, 8, 9], "base": [2, 5], "sequenc": [1, 2, 4, 5, 8, 9], "Its": [2, 5], "applic": [2, 5], "master": [2, 5, 8], "multi": 2, "aspect": [2, 6], "non": [2, 4, 6, 9], "vitstr": [], "vision": [], "transform": 1, "fast": 1, "parseq": [], "permut": [], "autoregress": [], "funsd": [1, 2, 8], "form": [1, 2, 8], "noisi": [1, 2], "scan": [1, 2], "cord": [1, 2, 8], "consolid": [1, 2], "receipt": [1, 2, 8], "forpost": [1, 2], "sroie": [1, 2], "icdar": 2, "2019": 2, "iiit": [], "5k": [], "cvit": [], "street": [], "view": [], "synthtext": [], "visual": [], "geometri": [4, 8], "group": [], "svhn": [], "digit": 1, "unsupervis": [], "ic03": [], "2003": [], "ic13": [], "2013": [], "imgur5k": [], "textstylebrush": [], "transfer": [], "aesthet": [], "mjsynth": [], "synthet": [], "data": [4, 6, 9], "artifici": [], "iiithw": [], "wildreceipt": [], "spatial": 4, "dual": [], "modal": [], "graph": 4, "kei": [], "bool": [1, 4, 5, 6, 9], "true": [1, 4, 5, 6, 7, 8, 9], "use_polygon": [], "fals": [1, 5, 6, 7, 9], "recognition_task": [], "kwarg": [1, 4, 5, 9], "sourc": [1, 4, 5, 6, 9], "document": [1, 8, 9], "import": [1, 4, 5, 6, 7, 8, 9], "train_set": 1, "download": 1, "img": [1, 6], "target": [1, 4, 6], "subset": [1, 8], "polygon": [1, 8], "rotat": [1, 4, 6], "bound": [1, 4, 6, 8, 9], "box": [1, 4, 6, 8, 9], "instead": [1, 4], "straight": [1, 8], "ones": 1, "recognit": 9, "keyword": [1, 4], "argument": [1, 4], "visiondataset": 1, "icdar2019": 1, "competit": 1, "iiit5k": [], "bmvc": [], "2012": [], "text": [4, 5, 9], "prior": [], "svt": [], "ucsd": [], "comput": [8, 9], "hous": [], "number": [1, 6, 8, 9], "localis": [], "repositori": [], "websit": [], "entri": [], "futur": [], "direct": [], "img_fold": 1, "str": [1, 4, 5, 6, 9], "label_fold": [], "label": [1, 9], "part": 6, "challeng": [], "task2": [], "2015": [], "path": [1, 4, 7], "challenge2_training_task12_imag": [], "challenge2_training_task1_gt": [], "test_set": [], "challenge2_test_task12_imag": [], "challenge2_test_task1_gt": [], "folder": [1, 7], "annot": 4, "abstractdataset": [], "label_path": [], "handwrit": [], "dataset_info": [], "imgur5k_annot": [], "json": 8, "pure": [], "mnt": [], "ramdisk": [], "max": 9, "90kdict32px": [], "imlist": [], "txt": [], "hw": [], "images_90k_norm": [], "90k": [], "docartefact": [], "object": [1, 8, 9], "detect": [], "element": [1, 4, 8, 9], "varieti": [], "arxiv": 5, "ab": [], "2103": [], "14470v1": [], "test": [], "charactergener": 1, "implement": [1, 4, 6, 7, 8, 9], "d": 1, "abdef": 1, "num_sampl": 1, "100": [1, 6, 7, 8, 9], "vocabulari": 1, "sampl": [1, 8], "iter": 1, "cache_sampl": 1, "firsthand": 1, "font_famili": 9, "font": 9, "img_transform": [], "compos": [1, 8], "sample_transform": 1, "wordgener": [], "min_char": [], "int": [1, 4, 6, 9], "max_char": [], "list": [1, 4, 6, 9], "none": [1, 4, 8, 9], "callabl": [1, 6], "tupl": [4, 6, 9], "32": [1, 5, 6, 7, 8], "maximum": [1, 6], "detectiondataset": [], "recognitiondataset": [], "labels_path": [], "contain": 8, "ocrdataset": 1, "label_fil": 1, "jpg": [1, 4], "root": 1, "shuffl": 1, "batch_siz": 1, "drop_last": 1, "num_work": [], "collate_fn": 1, "wrapper": [1, 6], "train_load": 1, "train_it": 1, "next": 1, "befor": [1, 6], "pass": [1, 8], "batch": [1, 5, 6, 8], "drop": 1, "isn": 1, "full": [1, 8, 9], "worker": 1, "function": [6, 9], "merg": [], "sinc": [1, 8], "content": [1, 4, 9], "properli": 1, "model": [1, 9], "interpret": [1, 4], "multipl": [1, 4, 6], "name": [1, 5], "10": [1, 8, 9], "0123456789": 1, "hindi_digit": [], "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "ascii_lett": 1, "52": [1, 8], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "punctuat": 1, "currenc": 1, "5": [1, 6, 9], "ancient_greek": [], "48": 5, "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": [], "arabic_lett": [], "37": 8, "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": [], "persian_lett": [], "\u067e\u0686\u06a2\u06a4\u06af": [], "arabic_diacrit": [], "arabic_punctu": [], "latin": 1, "94": [1, 8], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "english": 1, "legacy_french": 1, "123": 1, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": 1, "french": [1, 8], "126": 1, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": 1, "portugues": 1, "131": 1, "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": 1, "spanish": 1, "116": 1, "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": 1, "italian": [], "120": [], "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": [], "german": 1, "108": 1, "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": 1, "arab": [], "101": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": [], "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "czech": [], "130": [], "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": [], "polish": [], "118": [], "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": [], "dutch": [], "114": [], "norwegian": [], "106": [], "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": [], "danish": [], "finnish": [], "104": [], "\u00e4\u00f6\u00e4\u00f6": [], "swedish": [], "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": [], "vietnames": [], "234": [], "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": [], "hebrew": [], "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": [], "multilingu": [], "195": [], "encode_sequ": 1, "target_s": 1, "eo": 1, "pad": [1, 6], "dynamic_seq_length": 1, "ndarrai": [1, 4, 9], "given": [1, 4, 8, 9], "map": 1, "n": [1, 9], "length": 1, "Of": 1, "string": [1, 4, 9], "option": 1, "start": 1, "case": [1, 8, 9], "upper": [1, 6], "enabl": [1, 4], "dynam": 1, "tensor": [1, 4, 6, 8], "modul": [4, 6, 8, 9], "easili": [4, 7, 8, 9], "export": [4, 7, 8, 9], "analysi": 4, "format": [4, 7, 8], "organ": 4, "uninterrupt": [4, 8], "confid": [4, 8, 9], "float": [4, 6, 7, 9], "associ": 4, "predict": [4, 9], "xmin": 4, "ymin": 4, "xmax": 4, "ymax": 4, "coordin": [4, 8], "rel": [4, 6], "collect": 4, "meant": [4, 7], "two": 4, "column": 4, "horizont": 4, "resolv": 4, "default": [4, 7, 9], "smallest": 4, "enclos": 4, "g": [4, 5], "qr": 4, "pictur": 4, "chart": 4, "signatur": 4, "logo": 4, "etc": 4, "artefact_typ": 4, "type": [4, 8], "sever": [4, 6, 8], "its": [1, 4, 6, 8, 9], "titl": 4, "underneath": 4, "page_idx": [4, 8], "dimens": [4, 8, 9], "dict": [4, 8, 9], "numpi": [4, 5, 8, 9], "arrai": [4, 9], "uint8": [4, 5, 8, 9], "raw": [4, 9], "pixel": [4, 6, 8], "height": 4, "width": 4, "dictionari": [4, 9], "angl": [4, 6], "degress": 4, "preserve_aspect_ratio": 6, "overlai": 4, "displai": [4, 9], "matplotlib": 9, "pyplot": 9, "method": 6, "high": 4, "convers": 4, "read_pdf": 4, "byte": 4, "scale": [5, 6, 9], "rgb_mode": [], "password": [], "pdf": [4, 5], "convert": [4, 6, 7], "render": [], "72dpi": [], "output": [4, 6], "rgb": [4, 6], "bgr": 4, "unlock": [], "encrypt": [], "pypdfium2": [], "pdfpage": [], "decod": 4, "shape": [4, 5, 6, 7, 8, 9], "h": [4, 5, 6], "x": [4, 6, 9], "w": [4, 5, 6, 9], "c": [], "read_img_as_numpi": 4, "output_s": [4, 6], "rgb_output": 4, "expect": [4, 6], "read_img_as_tensor": 4, "img_path": 4, "dtype": [4, 5, 7], "float32": [4, 5, 7], "desir": 4, "relat": 4, "divid": 4, "255": [4, 5, 6, 8, 9], "decode_img_as_tensor": 4, "img_cont": 4, "stream": 4, "read_html": 4, "url": 4, "yoursit": 4, "weasyprint": [], "documentfil": 4, "extens": 4, "classmethod": 4, "from_pdf": 4, "binari": [4, 8], "from_url": 4, "from_imag": 4, "page1": 4, "png": 4, "page2": 4, "vgg16_bn_r": [], "vgg": 5, "16": 5, "describ": 5, "veri": 5, "deep": [5, 8], "convolut": 5, "larg": 5, "modifi": 5, "normal": [5, 6], "rectangular": 5, "pool": 5, "simpler": [], "head": [], "input_tensor": 5, "random": [5, 6, 7, 8, 9], "uniform": [5, 6, 7], "512": 5, "maxval": [5, 6, 7], "imagenet": 5, "extractor": 5, "resnet18": [], "resnet": 5, "18": [], "residu": [], "boolean": 5, "resnet34": [], "34": 8, "resnet50": [], "50": [5, 8], "resnet31": 5, "downsiz": 5, "4": [5, 8], "mobilenet_v3_smal": 5, "mobilenetv3": 5, "search": 5, "kera": [5, 7], "mobilenet_v3_larg": 5, "mobilenet_v3_small_r": 5, "mobilenet_v3_large_r": 5, "mobilenet_v3_small_orient": [], "magc_resnet31": [], "global": [], "context": [], "224": [5, 6, 7], "vit_": [], "visiontransform": [], "worth": [], "16x16": [], "patch": [], "unoffici": [], "config": [], "vit_b": [], "b": 9, "textnet_tini": [], "textnet": [], "faster": [], "arbitrarili": [], "detector": [], "minimalist": [], "kernel": [], "czczup": [], "tini": [], "textnet_smal": [], "textnet_bas": [], "crop_orientation_predictor": [], "arch": 5, "croporientationpredictor": [], "np": [5, 7, 8, 9], "classif_mobilenet_v3_smal": [], "input_crop": [], "rand": [5, 7, 8, 9], "600": [5, 8, 9], "800": [5, 8, 9], "astyp": [5, 7, 8, 9], "crop": [6, 8], "dataset": [5, 8], "linknet_resnet18": [], "1024": [5, 7, 8, 9], "linknet_resnet34": [], "linknet_resnet50": [], "db_resnet50": [5, 7, 8], "backbon": [], "db_mobilenet_v3_larg": [5, 8], "mobilenet": 5, "v3": 5, "detection_predictor": [5, 8], "assume_straight_pag": [], "detectionpredictor": 5, "input_pag": [5, 8, 9], "itself": [], "fit": [], "crnn_vgg16_bn": [5, 8], "128": [5, 8], "crnn_mobilenet_v3_smal": [5, 8], "crnn_mobilenet_v3_larg": [5, 8], "sar_resnet31": [5, 8], "31": [5, 8], "64": [5, 6, 8], "256": 5, "paper": 5, "1910": 5, "02562": 5, "keywoard": [], "vitstr_smal": [], "vitstr_bas": [], "recognition_predictor": [5, 8], "recognitionpredictor": 5, "ocr_predictor": [5, 8], "det_arch": 5, "reco_arch": 5, "pretrained_backbon": [], "symmetric_pad": 6, "export_as_straight_box": [], "detect_orient": [], "straighten_pag": [], "detect_languag": [], "ocrpredictor": 5, "up": 8, "assum": [], "preserv": 6, "ratio": 6, "symmetr": 6, "bottom": 8, "final": 7, "potenti": [], "estim": [], "slightli": [], "deterior": [], "latenc": [], "median": [], "Then": [], "again": 3, "improv": [], "kie_predictor": [], "kiepredictor": [], "kie": [], "login_to_hub": [], "login": [], "huggingfac": [], "hub": [], "from_hub": [], "repo_id": [], "instanti": [], "hf": [], "fasterrcnn_mobilenet_v3_large_fpn": [], "repo": [], "hf_hub_download": [], "snapshot_download": [], "checkpoint": [], "push_to_hf_hub": [], "model_nam": [], "save": [1, 7], "configur": [], "my": [], "procedur": 6, "draw": [6, 9], "design": 6, "torchvis": 6, "resiz": 6, "bilinear": 6, "transfo": 6, "minval": 6, "interpol": 6, "zero": 6, "while": [6, 8], "done": 6, "mean": [6, 9], "std": 6, "gaussian": 6, "distribut": 6, "485": 6, "456": 6, "406": 6, "229": 6, "225": 6, "averag": [6, 8], "per": [6, 8], "standard": 6, "deviat": 6, "lambdatransform": 6, "fn": 6, "lambda": 6, "tograi": 6, "num_output_channel": [], "grayscal": 6, "colorinvers": 6, "min_val": 6, "tranform": 6, "color": [6, 9], "shift": 6, "randomli": 6, "invert": 6, "6": [3, 6, 8], "rang": [6, 7], "randombright": 6, "max_delta": 6, "adjust": 6, "bright": 6, "delta": 6, "offset": 6, "add": [6, 9], "pick": 6, "p": [6, 9], "probabl": 6, "randomcontrast": 6, "contrast": 6, "contrast_factor": 6, "factor": 6, "randomsatur": 6, "satur": 6, "hsv": 6, "increas": 6, "randomhu": 6, "hue": 6, "randomgamma": 6, "min_gamma": 6, "max_gamma": 6, "min_gain": 6, "max_gain": 6, "gamma": 6, "correct": 6, "neg": 6, "lower": [6, 9], "param": [6, 8], "constant": 6, "multipli": 6, "randomjpegqu": 6, "min_qual": 6, "60": 6, "max_qual": 6, "jpeg": 6, "qualiti": 6, "dimension": 6, "between": [6, 9], "randomrot": 6, "max_angl": 6, "expand": 6, "degre": 6, "uniformli": 6, "randomcrop": 6, "08": [6, 8], "75": [6, 8], "33": 6, "min_area": 6, "max_area": 6, "min_ratio": 6, "max_ratio": 6, "gaussianblur": [], "kernel_shap": [], "blur": [], "min": [], "channelshuffl": [], "gaussiannois": [], "nois": [], "randomhorizontalflip": [], "flip": [], "int64": [], "randomshadow": [], "opacity_rang": [], "shade": [], "opac": [], "It": 6, "consecut": [6, 8], "sequenti": [6, 7], "oneof": 6, "jpegqual": 6, "randomappli": 6, "regroup": 9, "core": 9, "complementari": 9, "sens": 9, "visualize_pag": 9, "words_onli": 9, "display_artefact": 9, "add_label": 9, "figur": 9, "block": [8, 9], "plt": 9, "ocr_db_crnn": 9, "artefact": [8, 9], "figsiz": 9, "largest": 9, "side": 9, "plot": 9, "static": 9, "top": [8, 9], "synthesize_pag": 9, "draw_proba": 9, "respons": 9, "blank": 9, "blue": 9, "red": 9, "font_siz": 9, "13": [8, 9], "famili": 9, "synthes": 9, "metric": [8, 9], "assess": 9, "textmatch": 9, "match": [8, 9], "accuraci": 9, "aggreg": [1, 9], "foral": 9, "y": 9, "mathcal": 9, "frac": 9, "sum": 9, "limits_": 9, "f_": 9, "y_i": 9, "x_i": 9, "indic": 9, "defin": 9, "f_a": 9, "left": [8, 9], "begin": 9, "ll": 9, "mbox": 9, "strictli": 9, "integ": [7, 9], "updat": 9, "hello": 9, "world": 9, "summari": 9, "gt": 9, "pred": 9, "groung": 9, "truth": 9, "exact": [8, 9], "score": 9, "counterpart": 9, "unidecod": 9, "localizationconfus": 9, "iou_thresh": 9, "mask_shap": 9, "use_broadcast": [], "confus": 9, "iou": 9, "recal": [8, 9], "g_": 9, "precis": [8, 9], "meaniou": 9, "j": 9, "y_j": 9, "being": [8, 9], "intersect": 9, "union": 9, "g_x": 9, "assign": 9, "_i": 9, "geq": 9, "ground": 9, "asarrai": 9, "70": [8, 9], "110": 9, "95": 9, "200": 9, "150": [8, 9], "pair": 9, "broadcast": [], "consum": [], "memori": [], "either": 8, "ocrmetr": 9, "l": 9, "hat": 9, "h_": 9, "b_j": 9, "l_j": 9, "gt_box": 9, "pred_box": 9, "gt_label": 9, "pred_label": 9, "comparison": [8, 9], "detectionmetr": [], "c_j": [], "compil": 8, "better": 8, "leverag": [], "descript": [], "colab": [], "quicktour": [], "present": [], "main": [], "produc": 8, "searchabl": [], "don": 8, "meet": [], "detail": 8, "link": [], "section": [7, 8], "det_model": [], "load_weight": [], "path_to_checkpoint": [], "weight": [], "reco_model": [], "det_param": [], "path_to_pt": [], "map_loc": [], "load_state_dict": [], "reco_param": [], "vocab": 8, "class_nam": [], "total": [], "date": 8, "preprocessor": 8, "det_predictor": [], "798": [], "785": [], "772": [], "264": [], "2749": [], "287": [], "reco_predictor": [], "694": [], "695": [], "693": [], "299": [], "296": [], "301": [], "polici": [], "restrict": [], "write": [], "outsid": [], "tmp": [], "work": [], "step": [], "usag": [], "multiprocess": [], "doctr_multiprocessing_dis": [], "variabl": [], "becaus": [], "shm": [], "share": 8, "chang": [], "By": [], "doctr_cache_dir": [], "focu": [], "love": [], "appreci": [], "interfac": [], "io": [], "custom": [], "felix92": [], "db": [], "vgg16": [], "bn": [], "plug": [], "obj_detect": [], "exist": [], "overwritten": [], "prerequisit": [], "creat": [], "co": [], "instal": [], "git": [], "lf": [], "my_awesome_model": [], "v1": [], "directli": 8, "after": 8, "python3": [], "train_tensorflow": [], "py": [], "train_pytorch": [], "tabl": [], "pull": [], "dummi": [], "tilman": [], "rassi": [], "fascan": [], "evalu": [1, 8], "predefin": 1, "prefer": 1, "signific": 1, "valid": [], "149": [], "626": [], "360": [], "2000": [], "3000": [], "249": [], "33402": [], "13068": [], "772875": [], "85875": [], "246": [], "233": [], "resourc": 7, "7149": [], "796": [], "handwritten": [], "1268": [], "472": [], "21888": [], "8707": [], "33608": [], "19342": [], "uppercas": [], "19370": [], "2186": [], "257": [], "647": [], "73257": [], "26032": [], "7100000": [], "707470": [], "1156": [], "1107": [], "849": [], "1095": [], "207901": [], "22672": [], "7581382": [], "1337891": [], "7141797": [], "793533": [], "49377": [], "19598": [], "alwai": [], "regular": [], "2700": [], "300": [], "background": [], "qr_code": [], "bar_cod": [], "photo": [], "classif": [], "mani": 8, "sensit": 8, "abl": 8, "howev": [], "guidanc": [], "tool": [], "further": [], "anot": [], "handl": 1, "underli": 1, "defer": 1, "dataload": 1, "good": 7, "achiev": 7, "might": [7, 8], "tune": 7, "thing": [7, 8], "product": 7, "readi": 7, "help": 7, "support": 8, "devic": [], "fp16": [], "point": [], "occupi": [], "bit": [], "advantag": [], "less": [], "mixed_precis": [], "set_global_polici": [], "mixed_float16": [], "cuda": [], "re": [], "exchang": [], "interoper": [], "machin": [], "structur": 8, "layer": [], "metadata": [], "util": 7, "export_model_to_onnx": [], "input_shap": 7, "dummy_input": [], "tensorspec": [], "model_path": [], "come": [], "soon": [], "seen": 8, "onc": 8, "separ": 8, "compon": 8, "charg": 8, "usabl": 8, "backend": 8, "along": 8, "processor": 8, "reusabl": 8, "consist": 8, "delimit": 8, "2d": 8, "corner": 8, "flag": 8, "belong": 8, "skew": [], "comprehens": 8, "benchmark": 8, "publicli": 8, "sec": [], "25": 8, "84": 8, "39": 8, "85": 8, "86": 8, "93": 8, "83": 8, "24": [], "80": 8, "29": 8, "90": 8, "67": 8, "76": 8, "11": 8, "81": 8, "71": 8, "7": 8, "21": 8, "82": 8, "20": 8, "49": 8, "87": 8, "63": 8, "17": [], "28": [], "51": 8, "46": 8, "db_resnet34": [], "22": [], "89": 8, "74": 8, "56": 8, "68": 8, "92": 8, "61": 8, "41": 8, "00": 8, "79": 8, "38": [], "88": [], "62": 8, "26": [], "06": 8, "78": 8, "47": 8, "54": [], "abov": 8, "cf": 8, "disclaim": 8, "combin": 8, "199": 8, "second": 8, "warmup": 8, "phase": 8, "measur": 8, "1000": 8, "obtain": 8, "11th": [], "gen": [], "intel": [], "r": [], "tm": [], "i7": [], "11800h": [], "30ghz": [], "wrap": 8, "useabl": 8, "favorit": 8, "dummy_img": 8, "area": [], "send": [], "snippet": [], "transcrib": 8, "partial": [], "15": 8, "9": [], "73": 8, "44": [], "14": 8, "55": [], "58": [], "57": 8, "66": 8, "01": [], "98": 8, "23": [], "69": 8, "99": 8, "91": 8, "05": [], "09": [], "96": 8, "40": [], "53": 8, "most": 8, "print": 8, "cfg": 8, "30595": 8, "45": [], "72": 8, "43": 8, "65": 8, "77": 8, "30": 8, "07": [], "27": 8, "gvision": 8, "59": 8, "03": 8, "azur": [], "recogn": [], "42": 8, "go": [], "mention": [], "still": [], "return": [1, 4, 5, 8, 9], "documentbuild": [], "resolve_lin": [], "automat": [], "resolve_block": [], "paragraph_break": [], "paragraph": [], "035": [], "nest": 8, "get": [4, 8], "typic": 8, "layout": 8, "340": 8, "text_output": [], "json_output": 8, "1357421875": 8, "0361328125": 8, "8564453125": 8, "8603515625": 8, "914085328578949": 8, "5478515625": 8, "06640625": 8, "5810546875": 8, "0966796875": 8, "9949972033500671": 8, "51171875": 8, "1630859375": 8, "9578408598899841": 8, "1396484375": 8, "3232421875": 8, "185546875": 8, "3515625": 8, "outpout": [], "xml": [], "hocr": [], "export_as_xml": [], "xml_output": [], "xml_bytes_str": [], "xml_element": [], "utf": [], "xmln": [], "w3": [], "1999": [], "xhtml": [], "lang": [], "en": [], "meta": [], "equiv": [], "charset": [], "system": [], "ocr_pag": [], "ocr_carea": [], "ocr_par": [], "ocr_lin": [], "ocrx_word": [], "div": [], "id": 8, "page_1": [], "bbox": [], "3456": [], "ppageno": [], "block_1_1": [], "857": [], "529": [], "2504": [], "2710": [], "par_1_1": [], "span": [], "line_1_1": [], "x_size": [], "x_descend": [], "x_ascend": [], "word_1_1": [], "1552": [], "540": [], "1778": [], "580": [], "x_wconf": [], "word_1_2": [], "1782": [], "1900": [], "583": [], "word_1_3": [], "1420": [], "597": [], "1684": [], "641": [], "threshold": [], "region": [], "accur": [], "postprocessor": [], "bin_thresh": [], "box_thresh": [], "hook": [], "manipul": [], "customhook": [], "def": 7, "__call__": [], "self": [], "loc_pr": [], "Be": [], "awar": [], "my_hook": [], "middl": [], "pipelin": [], "add_hook": [], "execut": [], "file_path": [], "read_img": 4, "seemlessli": 2, "conda": [], "newer": [], "developp": 3, "fp": 8, "scheme": [], "deform": [], "statist": [], "turn": [], "easier": [], "let": [], "db_resnet50_predictor": [], "sar_vgg16_bn": [], "rnn": [], "enhanc": [], "symbol": [], "crnn_vgg16_bn_predictor": [], "sar_vgg16_bn_predictor": [], "16bn": [], "convert_to_tflit": [], "tf_model": 7, "tflite": 7, "conv_sequ": 7, "relu": 7, "kernel_s": 7, "serialized_model": 7, "convert_to_fp16": [], "half": [], "serial": 7, "quantize_model": [], "quantiz": [], "exclud": [], "inherit": 7, "abstract": [], "verifi": [], "file_nam": [], "file_hash": [], "extract_arch": [], "overwrit": [], "sha256": [], "archiv": [], "disk": [], "775": [], "856": [], "860": [], "862": [], "863": [], "sar_resnet31_predictor": [], "ocr_db_crnn_vgg": [], "652": [], "721": [], "ocr_db_sar_vgg": [], "653": [], "ocr_db_sar_resnet": [], "665": [], "735": [], "595": [], "625": [], "781": [], "830": [], "exactmatch": [], "ignore_cas": [], "ignore_acc": [], "ignor": [], "letter": [], "accent": [], "error": [], "max_dist": [], "levenshtein": [], "distanc": [], "autoclass": [], "loader": 1, "154": [], "as_imag": 4, "convert_page_to_numpi": 4, "get_word": 4, "fitz": 4, "gettextword": 4, "get_artefact": 4, "entir": 4, "fulli": [], "daili": [], "mix": [], "fine": [], "scratch": [], "special": [], "recurr": [], "733": [], "817": [], "745": [], "875": [], "frame": 8, "feed": [], "warm": [], "c5": 8, "x12larg": 8, "xeon": 8, "platinum": 8, "8275l": 8, "913": [], "917": [], "921": [], "crnn_resnet31": [], "629": [], "701": [], "664": [], "780": [], "630": [], "702": [], "666": [], "783": [], "640": [], "713": [], "672": [], "789": [], "na": [], "753": [], "700": [], "533": [], "689": [], "611": [], "660": [], "db_sar_vgg": 5, "db_sar_resnet": 5, "db_crnn_vgg": 5, "db_crnn_resnet": 5, "properti": 7, "input_t": 7, "saved_model": 7, "And": 7, "nestedobject": [], "changelog": [], "v0": [], "2021": [], "8m": 8, "02": 8, "5m": 8, "1m": 8, "19": [], "invoic": 8, "flexibl": 9, "rotated_bbox": [1, 9], "beta": [], "linknet16": [5, 8], "160": 5, "arg": 1, "bash": [], "tax": 8, "35": 8, "vgg16_bn": 5, "mobilenetv3_larg": 5, "mobilenetv3_smal": 5, "constraint": 7, "tfliteconvert": 7, "from_keras_model": 7, "target_spec": 7, "supported_typ": 7, "float16": 7, "fallback": 7, "oper": 7, "representative_dataset": 7, "yield": 7, "supported_op": 7, "opsset": 7, "tflite_builtins_int8": 7, "inference_input_typ": 7, "int8": 7, "inference_output_typ": 7, "2m": 8, "7m": 8, "look": 8, "variou": 8, "below": 8, "unfortun": 8, "moment": 8, "04": 8, "36": 8, "97": 8}, "objects": {"doctr.datasets": [[1, 0, 1, "", "CORD"], [1, 0, 1, "", "CharacterGenerator"], [1, 0, 1, "", "FUNSD"], [1, 0, 1, "", "OCRDataset"], [1, 0, 1, "", "SROIE"], [1, 1, 1, "", "encode_sequences"]], "doctr.datasets.loader": [[1, 0, 1, "", "DataLoader"]], "doctr.io": [[4, 0, 1, "", "Artefact"], [4, 0, 1, "", "Block"], [4, 0, 1, "", "Document"], [4, 0, 1, "", "DocumentFile"], [4, 0, 1, "", "Line"], [4, 0, 1, "", "PDF"], [4, 0, 1, "", "Page"], [4, 0, 1, "", "Word"], [4, 1, 1, "", "decode_img_as_tensor"], [4, 1, 1, "", "read_html"], [4, 1, 1, "", "read_img_as_numpy"], [4, 1, 1, "", "read_img_as_tensor"], [4, 1, 1, "", "read_pdf"]], "doctr.io.Document": [[4, 2, 1, "", "show"]], "doctr.io.DocumentFile": [[4, 2, 1, "", "from_images"], [4, 2, 1, "", "from_pdf"], [4, 2, 1, "", "from_url"]], "doctr.io.PDF": [[4, 2, 1, "", "as_images"], [4, 2, 1, "", "get_artefacts"], [4, 2, 1, "", "get_words"]], "doctr.io.Page": [[4, 2, 1, "", "show"]], "doctr.models.backbones": [[5, 1, 1, "", "mobilenet_v3_large"], [5, 1, 1, "", "mobilenet_v3_large_r"], [5, 1, 1, "", "mobilenet_v3_small"], [5, 1, 1, "", "mobilenet_v3_small_r"], [5, 1, 1, "", "resnet31"], [5, 1, 1, "", "vgg16_bn"]], "doctr.models.detection": [[5, 1, 1, "", "db_mobilenet_v3_large"], [5, 1, 1, "", "db_resnet50"], [5, 1, 1, "", "detection_predictor"], [5, 1, 1, "", "linknet16"]], "doctr.models": [[5, 1, 1, "", "ocr_predictor"]], "doctr.models.recognition": [[5, 1, 1, "", "crnn_mobilenet_v3_large"], [5, 1, 1, "", "crnn_mobilenet_v3_small"], [5, 1, 1, "", "crnn_vgg16_bn"], [5, 1, 1, "", "master"], [5, 1, 1, "", "recognition_predictor"], [5, 1, 1, "", "sar_resnet31"]], "doctr.transforms": [[6, 0, 1, "", "ColorInversion"], [6, 0, 1, "", "Compose"], [6, 0, 1, "", "LambdaTransformation"], [6, 0, 1, "", "Normalize"], [6, 0, 1, "", "OneOf"], [6, 0, 1, "", "RandomApply"], [6, 0, 1, "", "RandomBrightness"], [6, 0, 1, "", "RandomContrast"], [6, 0, 1, "", "RandomCrop"], [6, 0, 1, "", "RandomGamma"], [6, 0, 1, "", "RandomHue"], [6, 0, 1, "", "RandomJpegQuality"], [6, 0, 1, "", "RandomRotate"], [6, 0, 1, "", "RandomSaturation"], [6, 0, 1, "", "Resize"], [6, 0, 1, "", "ToGray"]], "doctr.utils.metrics": [[9, 0, 1, "", "LocalizationConfusion"], [9, 0, 1, "", "OCRMetric"], [9, 0, 1, "", "TextMatch"]], "doctr.utils.metrics.LocalizationConfusion": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.OCRMetric": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.TextMatch": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.visualization": [[9, 1, 1, "", "synthesize_page"], [9, 1, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:class", "1": "py:function", "2": "py:method"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"], "2": ["py", "method", "Python method"]}, "titleterms": {"changelog": 0, "v0": 0, "7": [], "0": 0, "2024": [], "09": [], "6": [], "2022": [], "29": [], "5": [], "1": 0, "03": 0, "22": [], "2021": 0, "12": [], "31": [], "4": [], "11": 0, "10": [], "01": [], "3": 0, "08": 0, "27": 0, "07": 0, "02": 0, "2": 0, "05": 0, "28": 0, "18": 0, "contributor": [], "coven": [], "code": [], "conduct": [], "our": [], "pledg": [], "standard": [], "enforc": [], "respons": [], "scope": [], "guidelin": [], "correct": [], "warn": [], "temporari": [], "ban": [], "perman": [], "attribut": [], "contribut": [], "doctr": [1, 2, 4, 5, 6, 9], "codebas": [], "structur": 4, "continu": [], "integr": [], "feedback": [], "featur": 2, "request": [], "bug": [], "report": [], "question": [], "develop": [], "mode": [], "instal": 3, "commit": [], "unit": [], "test": [], "qualiti": [], "style": [], "verif": [], "modifi": [], "document": [2, 4], "let": [], "": [], "connect": [], "prerequisit": 3, "via": 3, "python": 3, "packag": 3, "git": 3, "text": [2, 8], "recognit": [2, 5, 8], "main": 2, "model": [2, 5, 7, 8], "zoo": [2, 5, 8], "detect": [2, 5, 8], "support": [1, 2, 6], "dataset": [1, 2], "arg": [], "synthet": [], "gener": [], "custom": [], "loader": [], "dataload": [], "vocab": 1, "return": [], "io": 4, "word": 4, "line": 4, "artefact": 4, "block": 4, "page": 4, "file": 4, "read": 4, "classif": [], "factori": [], "transform": 6, "compos": 6, "util": 9, "visual": 9, "task": 9, "evalu": 9, "notebook": [], "train": 7, "your": 7, "own": [], "load": 1, "aw": [], "lambda": [], "share": [], "commun": [], "from": [], "huggingfac": [], "hub": [], "push": [], "pretrain": [], "name": [], "convent": [], "choos": 8, "readi": [], "us": 7, "avail": [1, 8], "object": [], "data": 1, "prepar": 7, "infer": 7, "optim": [], "half": 7, "precis": 7, "export": [], "onnx": [], "right": 8, "architectur": 8, "predictor": 8, "end": 8, "ocr": 8, "two": 8, "stage": 8, "approach": 8, "what": 8, "should": 8, "i": 8, "do": 8, "output": 8, "advanc": [], "option": [], "get": [], "start": [], "conda": [], "pre": [], "process": [], "post": 7, "build": [], "implement": [], "content": [], "compress": 7, "savedmodel": 7, "note": [], "refer": [], "backbon": 5, "tensorflow": 7, "lite": 7, "quantiz": 7}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"Changelog": [[0, "changelog"]], "v0.3.1 (2021-08-27)": [[0, "v0-3-1-2021-08-27"]], "v0.3.0 (2021-07-02)": [[0, "v0-3-0-2021-07-02"]], "v0.2.1 (2021-05-28)": [[0, "v0-2-1-2021-05-28"]], "v0.2.0 (2021-05-11)": [[0, "v0-2-0-2021-05-11"]], "v0.1.1 (2021-03-18)": [[0, "v0-1-1-2021-03-18"]], "v0.1.0 (2021-03-05)": [[0, "v0-1-0-2021-03-05"]], "doctr.datasets": [[1, "doctr-datasets"]], "Available Datasets": [[1, "available-datasets"]], "Data Loading": [[1, "data-loading"]], "Supported Vocabs": [[1, "supported-vocabs"]], "DocTR Vocabs": [[1, "id1"]], "DocTR: Document Text Recognition": [[2, "doctr-document-text-recognition"]], "Main Features": [[2, "main-features"]], "Model zoo": [[2, "model-zoo"]], "Text detection models": [[2, "text-detection-models"]], "Text recognition models": [[2, "text-recognition-models"]], "Supported datasets": [[2, "supported-datasets"]], "Installation": [[3, "installation"]], "Prerequisites": [[3, "prerequisites"]], "Via Python Package": [[3, "via-python-package"]], "Via Git": [[3, "via-git"]], "doctr.io": [[4, "doctr-io"]], "Document structure": [[4, "document-structure"]], "Word": [[4, "word"]], "Line": [[4, "line"]], "Artefact": [[4, "artefact"]], "Block": [[4, "block"]], "Page": [[4, "page"]], "Document": [[4, "document"]], "File reading": [[4, "file-reading"]], "doctr.models": [[5, "doctr-models"]], "doctr.models.backbones": [[5, "doctr-models-backbones"]], "doctr.models.detection": [[5, "doctr-models-detection"]], "doctr.models.recognition": [[5, "doctr-models-recognition"]], "doctr.models.zoo": [[5, "doctr-models-zoo"]], "doctr.transforms": [[6, "doctr-transforms"]], "Supported transformations": [[6, "supported-transformations"]], "Composing transformations": [[6, "composing-transformations"]], "Preparing your model for inference": [[7, "preparing-your-model-for-inference"]], "Model compression": [[7, "model-compression"]], "TensorFlow Lite": [[7, "tensorflow-lite"]], "Half-precision": [[7, "half-precision"]], "Post-training quantization": [[7, "post-training-quantization"]], "Using SavedModel": [[7, "using-savedmodel"]], "Choosing the right model": [[8, "choosing-the-right-model"]], "Text Detection": [[8, "text-detection"]], "Available architectures": [[8, "available-architectures"], [8, "id1"], [8, "id3"]], "Detection predictors": [[8, "detection-predictors"]], "Text Recognition": [[8, "text-recognition"]], "Text recognition model zoo": [[8, "id5"]], "Recognition predictors": [[8, "recognition-predictors"]], "End-to-End OCR": [[8, "end-to-end-ocr"]], "Two-stage approaches": [[8, "two-stage-approaches"]], "What should I do with the output?": [[8, "what-should-i-do-with-the-output"]], "doctr.utils": [[9, "doctr-utils"]], "Visualization": [[9, "visualization"]], "Task evaluation": [[9, "task-evaluation"]]}, "indexentries": {"cord (class in doctr.datasets)": [[1, "doctr.datasets.CORD"]], "charactergenerator (class in doctr.datasets)": [[1, "doctr.datasets.CharacterGenerator"]], "dataloader (class in doctr.datasets.loader)": [[1, "doctr.datasets.loader.DataLoader"]], "funsd (class in doctr.datasets)": [[1, "doctr.datasets.FUNSD"]], "ocrdataset (class in doctr.datasets)": [[1, "doctr.datasets.OCRDataset"]], "sroie (class in doctr.datasets)": [[1, "doctr.datasets.SROIE"]], "encode_sequences() (in module doctr.datasets)": [[1, "doctr.datasets.encode_sequences"]], "artefact (class in doctr.io)": [[4, "doctr.io.Artefact"]], "block (class in doctr.io)": [[4, "doctr.io.Block"]], "document (class in doctr.io)": [[4, "doctr.io.Document"]], "documentfile (class in doctr.io)": [[4, "doctr.io.DocumentFile"]], "line (class in doctr.io)": [[4, "doctr.io.Line"]], "pdf (class in doctr.io)": [[4, "doctr.io.PDF"]], "page (class in doctr.io)": [[4, "doctr.io.Page"]], "word (class in doctr.io)": [[4, "doctr.io.Word"]], "as_images() (doctr.io.pdf method)": [[4, "doctr.io.PDF.as_images"]], "decode_img_as_tensor() (in module doctr.io)": [[4, "doctr.io.decode_img_as_tensor"]], "from_images() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_images"]], "from_pdf() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_pdf"]], "from_url() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_url"]], "get_artefacts() (doctr.io.pdf method)": [[4, "doctr.io.PDF.get_artefacts"]], "get_words() (doctr.io.pdf method)": [[4, "doctr.io.PDF.get_words"]], "read_html() (in module doctr.io)": [[4, "doctr.io.read_html"]], "read_img_as_numpy() (in module doctr.io)": [[4, "doctr.io.read_img_as_numpy"]], "read_img_as_tensor() (in module doctr.io)": [[4, "doctr.io.read_img_as_tensor"]], "read_pdf() (in module doctr.io)": [[4, "doctr.io.read_pdf"]], "show() (doctr.io.document method)": [[4, "doctr.io.Document.show"]], "show() (doctr.io.page method)": [[4, "doctr.io.Page.show"]], "crnn_mobilenet_v3_large() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_mobilenet_v3_large"]], "crnn_mobilenet_v3_small() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_mobilenet_v3_small"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_vgg16_bn"]], "db_mobilenet_v3_large() (in module doctr.models.detection)": [[5, "doctr.models.detection.db_mobilenet_v3_large"]], "db_resnet50() (in module doctr.models.detection)": [[5, "doctr.models.detection.db_resnet50"]], "detection_predictor() (in module doctr.models.detection)": [[5, "doctr.models.detection.detection_predictor"]], "linknet16() (in module doctr.models.detection)": [[5, "doctr.models.detection.linknet16"]], "master() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.master"]], "mobilenet_v3_large() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.mobilenet_v3_large"]], "mobilenet_v3_large_r() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.mobilenet_v3_large_r"]], "mobilenet_v3_small() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.mobilenet_v3_small"]], "mobilenet_v3_small_r() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.mobilenet_v3_small_r"]], "ocr_predictor() (in module doctr.models)": [[5, "doctr.models.ocr_predictor"]], "recognition_predictor() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.recognition_predictor"]], "resnet31() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.resnet31"]], "sar_resnet31() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.sar_resnet31"]], "vgg16_bn() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.vgg16_bn"]], "colorinversion (class in doctr.transforms)": [[6, "doctr.transforms.ColorInversion"]], "compose (class in doctr.transforms)": [[6, "doctr.transforms.Compose"]], "lambdatransformation (class in doctr.transforms)": [[6, "doctr.transforms.LambdaTransformation"]], "normalize (class in doctr.transforms)": [[6, "doctr.transforms.Normalize"]], "oneof (class in doctr.transforms)": [[6, "doctr.transforms.OneOf"]], "randomapply (class in doctr.transforms)": [[6, "doctr.transforms.RandomApply"]], "randombrightness (class in doctr.transforms)": [[6, "doctr.transforms.RandomBrightness"]], "randomcontrast (class in doctr.transforms)": [[6, "doctr.transforms.RandomContrast"]], "randomcrop (class in doctr.transforms)": [[6, "doctr.transforms.RandomCrop"]], "randomgamma (class in doctr.transforms)": [[6, "doctr.transforms.RandomGamma"]], "randomhue (class in doctr.transforms)": [[6, "doctr.transforms.RandomHue"]], "randomjpegquality (class in doctr.transforms)": [[6, "doctr.transforms.RandomJpegQuality"]], "randomrotate (class in doctr.transforms)": [[6, "doctr.transforms.RandomRotate"]], "randomsaturation (class in doctr.transforms)": [[6, "doctr.transforms.RandomSaturation"]], "resize (class in doctr.transforms)": [[6, "doctr.transforms.Resize"]], "togray (class in doctr.transforms)": [[6, "doctr.transforms.ToGray"]], "localizationconfusion (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.OCRMetric"]], "textmatch (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.TextMatch"]], "summary() (doctr.utils.metrics.localizationconfusion method)": [[9, "doctr.utils.metrics.LocalizationConfusion.summary"]], "summary() (doctr.utils.metrics.ocrmetric method)": [[9, "doctr.utils.metrics.OCRMetric.summary"]], "summary() (doctr.utils.metrics.textmatch method)": [[9, "doctr.utils.metrics.TextMatch.summary"]], "synthesize_page() (in module doctr.utils.visualization)": [[9, "doctr.utils.visualization.synthesize_page"]], "update() (doctr.utils.metrics.localizationconfusion method)": [[9, "doctr.utils.metrics.LocalizationConfusion.update"]], "update() (doctr.utils.metrics.ocrmetric method)": [[9, "doctr.utils.metrics.OCRMetric.update"]], "update() (doctr.utils.metrics.textmatch method)": [[9, "doctr.utils.metrics.TextMatch.update"]], "visualize_page() (in module doctr.utils.visualization)": [[9, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file diff --git a/v0.4.0/using_doctr/using_models.html b/v0.4.0/using_doctr/using_models.html index b2e6a5d739..b471cdc1f1 100644 --- a/v0.4.0/using_doctr/using_models.html +++ b/v0.4.0/using_doctr/using_models.html @@ -836,6 +836,17 @@

Two-stage approachesmodel = ocr_predictor('linknet_resnet18', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True)

+

To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying DocumentBuilder:

+
    +
  • resolve_lines: whether words should be automatically grouped into lines (default: True)

  • +
  • resolve_blocks: whether lines should be automatically grouped into blocks (default: True)

  • +
  • paragraph_break: relative length of the minimum space separating paragraphs (default: 0.035)

  • +
+

For example to disable the automatic grouping of lines into blocks:

+
from doctr.model import ocr_predictor
+model = ocr_predictor(pretrained=True, resolve_blocks=False)
+
+

What should I do with the output?#

@@ -859,6 +870,14 @@

What should I do with the output?) +

To get only the text content of the Document, you can use the render method:

+
text_output = result.render()
+
+
+

For reference, here is the output for the Document above:

+
No. RECEIPT DATE
+
+

You can also export them as a nested dict, more appropriate for JSON format:

json_output = result.export()
 
diff --git a/v0.4.1/_sources/using_doctr/using_models.rst.txt b/v0.4.1/_sources/using_doctr/using_models.rst.txt index 208e0956bb..27c087096a 100644 --- a/v0.4.1/_sources/using_doctr/using_models.rst.txt +++ b/v0.4.1/_sources/using_doctr/using_models.rst.txt @@ -279,6 +279,19 @@ For instance, this snippet instantiates an end-to-end ocr_predictor working with from doctr.model import ocr_predictor model = ocr_predictor('linknet_resnet18', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True) +To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying `DocumentBuilder`: + +* `resolve_lines`: whether words should be automatically grouped into lines (default: True) +* `resolve_blocks`: whether lines should be automatically grouped into blocks (default: True) +* `paragraph_break`: relative length of the minimum space separating paragraphs (default: 0.035) + +For example to disable the automatic grouping of lines into blocks: + +.. code:: python3 + + from doctr.model import ocr_predictor + model = ocr_predictor(pretrained=True, resolve_blocks=False) + What should I do with the output? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -304,6 +317,14 @@ Here is a typical `Document` layout:: )] ) +To get only the text content of the `Document`, you can use the `render` method:: + + text_output = result.render() + +For reference, here is the output for the `Document` above:: + + No. RECEIPT DATE + You can also export them as a nested dict, more appropriate for JSON format:: json_output = result.export() diff --git a/v0.4.1/searchindex.js b/v0.4.1/searchindex.js index 71ae6798dd..c76d8f63bf 100644 --- a/v0.4.1/searchindex.js +++ b/v0.4.1/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["changelog", "datasets", "index", "installing", "io", "models", "notebooks", "transforms", "using_model_export", "using_models", "utils"], "filenames": ["changelog.rst", "datasets.rst", "index.rst", "installing.rst", "io.rst", "models.rst", "notebooks.md", "transforms.rst", "using_model_export.rst", "using_models.rst", "utils.rst"], "titles": ["Changelog", "doctr.datasets", "docTR: Document Text Recognition", "Installation", "doctr.io", "doctr.models", "docTR Notebooks", "doctr.transforms", "Preparing your model for inference", "Choosing the right model", "doctr.utils"], "terms": {"releas": [0, 3], "note": [0, 1], "we": [2, 3, 4, 7, 9], "member": [], "leader": [], "make": [8, 9, 10], "particip": [], "commun": [], "harass": [], "free": [], "experi": [], "everyon": [], "regardless": [], "ag": [], "bodi": 9, "size": [1, 4, 7, 9, 10], "visibl": [], "invis": [], "disabl": [], "ethnic": [], "sex": [], "characterist": [], "gender": [], "ident": [], "express": 7, "level": [1, 9, 10], "educ": [], "socio": [], "econom": [], "statu": [], "nation": [], "person": [], "appear": [], "race": [], "religion": [], "sexual": [], "orient": [4, 9], "act": [], "interact": [4, 10], "wai": [1, 2], "contribut": [], "an": [1, 2, 4, 5, 8, 10], "open": [], "welcom": [], "divers": [], "inclus": [], "healthi": [], "exampl": [1, 4, 5, 7, 10], "behavior": [], "posit": 10, "environ": [], "includ": [1, 3], "demonstr": [], "empathi": [], "kind": [], "toward": 3, "other": [], "peopl": [], "Being": [], "respect": [], "differ": [], "opinion": [], "viewpoint": [], "give": [], "gracefulli": [], "accept": [], "construct": [], "feedback": [], "apolog": [], "those": [3, 4, 9], "affect": [], "mistak": [], "learn": 9, "from": [1, 2, 4, 5, 7, 8, 9, 10], "focus": [], "what": [], "i": [1, 4, 5, 7, 8, 10], "best": [], "just": 8, "u": 9, "individu": [], "overal": [], "unaccept": [], "The": [1, 4, 9, 10], "us": [1, 3, 5, 9, 10], "languag": [1, 2, 4, 9], "imageri": [], "attent": [], "advanc": [], "ani": [1, 4, 5, 7, 8, 9, 10], "troll": [], "insult": [], "derogatori": [], "comment": [], "polit": [], "attack": [], "public": 2, "privat": 9, "publish": [], "inform": [1, 2, 9], "physic": 4, "email": [], "address": 4, "without": 5, "explicit": [], "permiss": [], "which": 9, "could": [], "reason": [], "consid": [1, 4, 9, 10], "inappropri": [], "profession": [], "set": [1, 5, 9, 10], "ar": [1, 3, 4, 6, 7, 9, 10], "clarifi": [], "take": [1, 8, 9], "appropri": 9, "fair": [], "action": [], "thei": 9, "deem": [], "threaten": [], "offens": [], "harm": [], "have": [1, 8, 9, 10], "right": 10, "remov": [], "edit": [], "reject": [], "commit": [], "wiki": [], "issu": [], "align": 4, "thi": [1, 3, 8, 9, 10], "moder": [], "decis": [], "when": 5, "appli": [1, 7], "within": [], "all": [1, 4, 7, 9, 10], "space": [], "also": 9, "offici": [], "repres": [4, 9, 10], "e": [3, 4, 5], "mail": [], "post": 9, "via": [], "social": [], "media": [], "account": 8, "appoint": [], "onlin": [], "offlin": [], "event": [], "instanc": 9, "abus": [], "otherwis": 10, "mai": 9, "report": [], "contact": [], "minde": [2, 3], "com": [3, 4], "complaint": [], "review": [], "investig": [], "promptli": [], "fairli": [], "oblig": [], "privaci": [], "secur": [], "incid": [], "follow": [1, 3, 7, 8, 9, 10], "impact": [], "determin": [], "consequ": [], "violat": [], "unprofession": [], "unwelcom": [], "A": [1, 2, 4, 5, 6, 8], "written": 4, "provid": [2, 8, 9], "clariti": [], "around": [], "natur": 2, "explan": 9, "why": [], "wa": [], "apologi": [], "request": [], "through": [1, 7], "singl": [], "seri": [], "continu": [], "No": 9, "involv": 9, "unsolicit": [], "specifi": [1, 4], "period": [], "time": [1, 2, 5, 10], "avoid": 3, "well": 8, "extern": [], "channel": [4, 7], "like": [], "term": [], "lead": [], "seriou": [], "sustain": [], "sort": [], "allow": [], "dure": [], "pattern": [], "aggress": [], "disparag": [], "class": [1, 4, 7, 9, 10], "adapt": [], "version": [8, 9], "0": [1, 7, 9, 10], "avail": [2, 7], "http": [3, 4, 5, 9], "www": [4, 9], "org": [5, 9], "_": [1, 5, 8], "html": 9, "were": [4, 9], "inspir": 7, "mozilla": [], "": [4, 10], "ladder": [], "For": [3, 9], "answer": [], "common": [7, 10], "question": [], "about": 9, "see": [], "faq": [], "translat": [], "everyth": 9, "you": [3, 5, 8, 9], "need": [3, 10], "know": [], "effici": [1, 2, 5], "project": [], "packag": [2, 8, 10], "python": [], "doc": [4, 9], "librari": [3, 6], "build": 3, "script": [], "refer": [3, 9], "train": [1, 5, 7, 9], "demo": 2, "small": 5, "app": [], "showcas": [], "capabl": [6, 9], "api": 2, "minim": 2, "templat": [2, 4], "deploi": [], "rest": [7, 10], "ensur": [], "proper": [], "mainten": [], "github": 3, "worklow": [], "run": 3, "job": [], "coverag": [], "codecov": [], "back": [], "result": [4, 9], "As": [], "contributor": [], "onli": [5, 7, 10], "your": [1, 2, 4, 9, 10], "ad": [5, 7], "whether": [1, 4, 7, 10], "encount": [], "problem": [], "suggest": [], "input": [4, 7, 9], "ha": [1, 10], "valu": [4, 7, 9], "can": [1, 3, 8, 9], "purpos": [], "advis": [], "first": [], "check": 9, "topic": [], "wasn": [], "t": [1, 9], "alreadi": [], "cover": [], "close": [], "If": [3, 4, 5, 8], "feel": [], "new": 10, "one": [1, 5, 7, 9], "do": [3, 8], "so": [1, 3], "whenev": [], "possibl": 10, "enough": 9, "jump": [], "wonder": [], "how": [], "someth": [], "more": 9, "gener": 1, "should": [1, 4, 7, 10], "out": [5, 7, 9, 10], "discuss": [], "q": [], "forum": [], "specif": [1, 3, 9, 10], "stackoverflow": [], "addit": [], "depend": [2, 3], "command": [], "m": [9, 10], "pip": 3, "upgrad": [], "dev": [], "pre": 5, "docstr": [], "In": 1, "pleas": [], "googl": [], "eas": [], "process": [2, 4, 9], "later": [], "messag": [], "udac": [], "guid": [], "order": [1, 4], "same": [4, 9, 10], "ci": [], "workflow": [], "unittest": [], "local": [1, 2, 5, 9, 10], "To": [3, 9], "togeth": 4, "current": 9, "built": [], "sphinx": [], "thank": [], "our": [5, 9], "file": 1, "been": [9, 10], "rebuilt": [], "want": 8, "forc": [], "complet": [], "rebuild": [], "delet": [], "_build": [], "directori": [], "addition": [8, 9], "clear": [], "web": 4, "browser": 2, "cach": 1, "modif": [], "now": [], "locat": 4, "index": 4, "wish": 8, "somewher": [], "els": [], "than": [3, 10], "join": [], "slack": [], "where": [4, 7, 9, 10], "find": 3, "requir": [3, 7], "3": [2, 3, 4, 5, 7, 8, 9, 10], "8": [5, 7, 9], "higher": [1, 3], "whichev": 3, "o": 3, "least": 3, "tensorflow": [2, 3, 4, 5, 7, 9], "pytorch": [2, 3, 9], "correspond": [3, 9], "page": [1, 3, 5, 9, 10], "2": [2, 3, 7, 8, 9], "macbook": [], "m1": [], "chip": [], "some": [3, 6], "metal": [], "plugin": [], "1": [1, 5, 7, 8, 9, 10], "12": 9, "anoth": [1, 3, 5], "linux": 3, "few": [3, 8], "extra": 3, "maco": 3, "user": [2, 3, 4, 6], "them": [1, 3, 9], "homebrew": 3, "brew": 3, "cairo": 3, "pango": 3, "gdk": 3, "pixbuf": 3, "libffi": 3, "window": [3, 5, 10], "gtk": 3, "latest": 3, "over": [1, 3, 9, 10], "here": [1, 3, 6, 7, 9], "last": [1, 3], "stabl": 3, "doctr": [3, 8, 9], "strive": 3, "reduc": [3, 7], "framework": [1, 3, 9], "minimum": [3, 10], "necessari": 3, "featur": [3, 5, 6, 10], "develop": 3, "third": 3, "parti": 3, "miss": 3, "tf": [3, 4, 5, 7, 8], "torch": 3, "mode": 3, "clone": 3, "state": [2, 10], "art": 2, "optic": [2, 9], "charact": [1, 2, 4, 9, 10], "made": 2, "seamless": 2, "access": [1, 2, 4, 9], "anyon": 2, "power": 2, "easi": [2, 10], "extract": [1, 2], "valuabl": 2, "autom": 2, "seamlessli": 9, "understand": [1, 2, 9], "task": [1, 2, 9], "ocr": [1, 2, 5, 10], "predictor": [2, 5], "pars": [1, 2], "textual": [1, 2, 4, 5, 9], "identifi": 2, "each": [1, 2, 4, 7, 9, 10], "word": [2, 9, 10], "research": 2, "quickli": 2, "compar": 2, "own": 2, "architectur": [2, 5], "speed": [2, 5], "perform": [2, 4, 7, 8, 9, 10], "robust": 2, "stage": 2, "pretrain": [2, 5, 8, 9, 10], "paramet": [1, 2, 4, 5, 7, 10], "friendli": 2, "line": [2, 9, 10], "code": [2, 4], "load": [2, 8], "googlevis": 2, "aw": [2, 9], "textract": [2, 9], "optim": [2, 8], "infer": [2, 5, 7], "both": [2, 7, 9], "cpu": [2, 9], "gpu": 2, "light": 2, "activ": 2, "maintain": 2, "integr": 2, "deploy": 2, "dbnet": [2, 5], "real": [2, 5, 7], "scene": [1, 2, 5], "differenti": [2, 5], "binar": [2, 5], "linknet": [2, 5], "exploit": [2, 5], "encod": [1, 2, 4, 5, 9], "represent": [2, 5], "semant": [2, 5], "segment": [2, 5, 9], "sar": [2, 5], "show": [2, 4, 5, 10], "attend": [2, 5], "read": [2, 5], "simpl": [2, 5], "strong": [2, 5], "baselin": [2, 5, 9], "irregular": [2, 5], "crnn": [2, 5], "end": [1, 2, 5, 10], "trainabl": [2, 5], "neural": [2, 5], "network": [2, 5], "imag": [1, 2, 4, 5, 7, 9, 10], "base": [2, 5], "sequenc": [1, 2, 4, 5, 9, 10], "Its": [2, 5], "applic": [2, 5], "master": [2, 5, 9], "multi": 2, "aspect": [2, 7], "non": [1, 2, 4, 7, 10], "vitstr": [], "vision": 1, "transform": 1, "fast": 1, "parseq": [], "permut": [], "autoregress": [], "funsd": [1, 2, 9], "form": [1, 2, 9], "noisi": [1, 2], "scan": [1, 2], "cord": [1, 2, 9], "consolid": [1, 2], "receipt": [1, 2, 9], "forpost": [1, 2], "sroie": [1, 2], "icdar": 2, "2019": 2, "iiit": 1, "5k": 1, "cvit": [], "street": 1, "view": 1, "synthtext": [], "visual": [], "geometri": [4, 9], "group": [], "svhn": [], "digit": 1, "unsupervis": [], "ic03": [], "2003": [], "ic13": [], "2013": [], "imgur5k": [], "textstylebrush": [], "transfer": [], "aesthet": [], "mjsynth": [], "synthet": 1, "data": [4, 7, 10], "artifici": [], "iiithw": [], "wildreceipt": [], "spatial": 4, "dual": [], "modal": [], "graph": 4, "kei": [], "bool": [1, 4, 5, 7, 10], "true": [1, 4, 5, 7, 8, 9, 10], "use_polygon": [], "fals": [1, 5, 7, 8, 10], "recognition_task": [], "kwarg": [1, 4, 5, 10], "sourc": [1, 4, 5, 7, 10], "document": [1, 9, 10], "import": [1, 4, 5, 7, 8, 9, 10], "train_set": 1, "download": 1, "img": [1, 7], "target": [1, 4, 7], "subset": [1, 9], "polygon": [1, 9], "rotat": [1, 4, 5, 7], "bound": [1, 4, 5, 7, 9, 10], "box": [1, 4, 5, 7, 9, 10], "instead": [1, 4], "straight": [1, 5, 9], "ones": 1, "recognit": [1, 10], "keyword": [1, 4], "argument": [1, 4], "visiondataset": 1, "icdar2019": 1, "competit": 1, "iiit5k": 1, "bmvc": 1, "2012": 1, "text": [1, 4, 5, 10], "prior": 1, "svt": 1, "ucsd": 1, "comput": [1, 9, 10], "hous": [], "number": [1, 7, 9, 10], "localis": [], "repositori": [], "websit": [], "entri": [], "futur": [], "direct": [], "img_fold": 1, "str": [1, 4, 5, 7, 10], "label_fold": [], "label": [1, 10], "part": 7, "challeng": [], "task2": [], "2015": [], "path": [1, 4, 8], "challenge2_training_task12_imag": [], "challenge2_training_task1_gt": [], "test_set": [], "challenge2_test_task12_imag": [], "challenge2_test_task1_gt": [], "folder": [1, 8], "annot": 4, "abstractdataset": [], "label_path": [], "handwrit": [], "dataset_info": [], "imgur5k_annot": [], "json": 9, "pure": [], "mnt": [], "ramdisk": [], "max": 10, "90kdict32px": [], "imlist": [], "txt": [], "hw": [], "images_90k_norm": [], "90k": [], "docartefact": 1, "object": [1, 9, 10], "detect": [1, 10], "element": [1, 4, 5, 9, 10], "varieti": 1, "arxiv": 5, "ab": [], "2103": [], "14470v1": [], "test": [], "charactergener": 1, "implement": [1, 4, 7, 8, 9, 10], "d": 1, "abdef": 1, "num_sampl": 1, "100": [1, 7, 8, 9, 10], "vocabulari": 1, "sampl": [1, 9], "iter": 1, "cache_sampl": 1, "firsthand": 1, "font_famili": 10, "font": 10, "img_transform": [], "compos": [1, 9], "sample_transform": 1, "wordgener": [], "min_char": [], "int": [1, 4, 7, 10], "max_char": [], "list": [1, 4, 7, 10], "none": [1, 4, 9, 10], "callabl": [1, 7], "tupl": [4, 7, 10], "32": [1, 5, 7, 8, 9], "maximum": [1, 7], "detectiondataset": [], "recognitiondataset": [], "labels_path": [], "contain": 9, "ocrdataset": 1, "label_fil": 1, "jpg": [1, 4], "root": 1, "shuffl": 1, "batch_siz": 1, "drop_last": 1, "num_work": [], "collate_fn": 1, "wrapper": [1, 7], "train_load": 1, "train_it": 1, "next": 1, "befor": [1, 7], "pass": [1, 5, 9], "batch": [1, 5, 7, 9], "drop": 1, "isn": 1, "full": [1, 9, 10], "worker": 1, "function": [7, 10], "merg": [], "sinc": [1, 9], "content": [1, 4, 9, 10], "properli": 1, "model": [1, 10], "interpret": [1, 4], "multipl": [1, 4, 7], "name": [1, 5, 9], "10": [1, 9, 10], "0123456789": 1, "hindi_digit": [], "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "ascii_lett": 1, "52": [1, 9], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "punctuat": 1, "currenc": 1, "5": [1, 7, 9, 10], "ancient_greek": [], "48": [5, 9], "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": [], "arabic_lett": [], "37": 9, "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": [], "persian_lett": [], "\u067e\u0686\u06a2\u06a4\u06af": [], "arabic_diacrit": [], "arabic_punctu": [], "latin": 1, "94": [1, 9], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "english": 1, "legacy_french": 1, "123": 1, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": 1, "french": [1, 9], "126": 1, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": 1, "portugues": 1, "131": 1, "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": 1, "spanish": 1, "116": 1, "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": 1, "italian": [], "120": [], "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": [], "german": 1, "108": 1, "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": 1, "arab": [], "101": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": [], "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "czech": [], "130": [], "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": [], "polish": [], "118": [], "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": [], "dutch": [], "114": [], "norwegian": [], "106": [], "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": [], "danish": [], "finnish": [], "104": [], "\u00e4\u00f6\u00e4\u00f6": [], "swedish": [], "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": [], "vietnames": [], "234": [], "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": [], "hebrew": [], "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": [], "multilingu": [], "195": [], "encode_sequ": 1, "target_s": 1, "eo": 1, "pad": [1, 7], "dynamic_seq_length": 1, "ndarrai": [1, 4, 10], "given": [1, 4, 9, 10], "map": 1, "n": [1, 10], "length": 1, "Of": 1, "string": [1, 4, 9, 10], "option": 1, "start": 1, "case": [1, 9, 10], "upper": [1, 7], "enabl": [1, 4], "dynam": 1, "tensor": [1, 4, 7, 9], "modul": [4, 7, 9, 10], "easili": [4, 8, 9, 10], "export": [4, 5, 8, 9, 10], "analysi": 4, "format": [4, 8, 9], "organ": 4, "uninterrupt": [4, 9], "confid": [4, 9, 10], "float": [4, 7, 8, 10], "associ": 4, "predict": [4, 5, 10], "xmin": 4, "ymin": 4, "xmax": 4, "ymax": 4, "coordin": [4, 9], "rel": [4, 7], "collect": 4, "meant": [4, 8], "two": 4, "column": 4, "horizont": 4, "resolv": 4, "default": [4, 8, 10], "smallest": 4, "enclos": 4, "g": [4, 5], "qr": 4, "pictur": 4, "chart": 4, "signatur": 4, "logo": 4, "etc": 4, "artefact_typ": 4, "type": [4, 9], "sever": [4, 7, 9], "its": [1, 4, 7, 9, 10], "titl": [4, 9], "underneath": 4, "page_idx": [4, 9], "dimens": [4, 9, 10], "dict": [4, 9, 10], "numpi": [4, 5, 9, 10], "arrai": [4, 10], "uint8": [4, 5, 9, 10], "raw": [4, 10], "pixel": [4, 7, 9], "height": 4, "width": 4, "dictionari": [4, 10], "angl": [4, 7], "degress": 4, "preserve_aspect_ratio": 7, "overlai": 4, "displai": [4, 10], "matplotlib": 10, "pyplot": 10, "method": [7, 9], "high": 4, "convers": 4, "read_pdf": 4, "byte": [4, 9], "scale": [5, 7, 10], "rgb_mode": [], "password": [], "pdf": [4, 5], "convert": [4, 7, 8], "render": [], "72dpi": [], "output": [4, 7], "rgb": [4, 7], "bgr": 4, "unlock": [], "encrypt": [], "pypdfium2": [], "pdfpage": [], "decod": 4, "shape": [4, 5, 7, 8, 9, 10], "h": [4, 5, 7], "x": [4, 7, 10], "w": [4, 5, 7, 10], "c": 10, "read_img_as_numpi": 4, "output_s": [4, 7], "rgb_output": 4, "expect": [4, 7], "read_img_as_tensor": 4, "img_path": 4, "dtype": [4, 5, 8, 10], "float32": [4, 5, 8], "desir": 4, "relat": 4, "divid": 4, "255": [4, 5, 7, 9, 10], "decode_img_as_tensor": 4, "img_cont": 4, "stream": 4, "read_html": 4, "url": 4, "yoursit": 4, "weasyprint": [], "documentfil": 4, "extens": 4, "classmethod": 4, "from_pdf": 4, "binari": [4, 9], "from_url": 4, "from_imag": 4, "page1": 4, "png": 4, "page2": 4, "vgg16_bn_r": [], "vgg": 5, "16": 5, "describ": 5, "veri": 5, "deep": [5, 9], "convolut": 5, "larg": 5, "modifi": 5, "normal": [5, 7], "rectangular": 5, "pool": 5, "simpler": [], "head": 9, "input_tensor": 5, "random": [5, 7, 8, 9, 10], "uniform": [5, 7, 8], "512": 5, "maxval": [5, 7, 8], "imagenet": 5, "extractor": 5, "resnet18": [], "resnet": 5, "18": [], "residu": [], "boolean": 5, "resnet34": [], "34": 9, "resnet50": [], "50": [5, 9], "resnet31": 5, "downsiz": 5, "4": [5, 9], "mobilenet_v3_smal": 5, "mobilenetv3": 5, "search": 5, "kera": [5, 8], "mobilenet_v3_larg": 5, "mobilenet_v3_small_r": 5, "mobilenet_v3_large_r": 5, "mobilenet_v3_small_orient": [], "magc_resnet31": [], "global": [], "context": [], "224": [5, 7, 8], "vit_": [], "visiontransform": [], "worth": [], "16x16": [], "patch": [], "unoffici": [], "config": [], "vit_b": [], "b": 10, "textnet_tini": [], "textnet": [], "faster": [], "arbitrarili": [], "detector": [], "minimalist": [], "kernel": [], "czczup": [], "tini": [], "textnet_smal": [], "textnet_bas": [], "crop_orientation_predictor": [], "arch": 5, "croporientationpredictor": [], "np": [5, 8, 9, 10], "classif_mobilenet_v3_smal": [], "input_crop": [], "rand": [5, 8, 9, 10], "600": [5, 9, 10], "800": [5, 9, 10], "astyp": [5, 8, 9, 10], "crop": [7, 9], "dataset": [5, 9], "linknet_resnet18": [], "1024": [5, 8, 9, 10], "linknet_resnet34": [], "linknet_resnet50": [], "db_resnet50": [5, 8, 9], "backbon": [], "db_mobilenet_v3_larg": [5, 9], "mobilenet": 5, "v3": 5, "detection_predictor": [5, 9], "assume_straight_pag": 5, "detectionpredictor": 5, "input_pag": [5, 9, 10], "itself": [], "fit": [], "crnn_vgg16_bn": [5, 9], "128": [5, 9], "crnn_mobilenet_v3_smal": [5, 9], "crnn_mobilenet_v3_larg": [5, 9], "sar_resnet31": [5, 9], "31": [5, 9], "64": [5, 7, 9], "256": 5, "paper": 5, "1910": 5, "02562": 5, "keywoard": [], "vitstr_smal": [], "vitstr_bas": [], "recognition_predictor": [5, 9], "recognitionpredictor": 5, "ocr_predictor": [5, 9], "det_arch": 5, "reco_arch": 5, "pretrained_backbon": [], "symmetric_pad": 7, "export_as_straight_box": 5, "detect_orient": [], "straighten_pag": [], "detect_languag": [], "ocrpredictor": 5, "up": [5, 9], "assum": 5, "preserv": 7, "ratio": 7, "symmetr": 7, "bottom": 9, "final": [5, 8], "potenti": 5, "estim": [], "slightli": [], "deterior": [], "latenc": [], "median": [], "Then": [], "again": 3, "improv": [], "kie_predictor": [], "kiepredictor": [], "kie": [], "login_to_hub": [], "login": [], "huggingfac": [], "hub": [], "from_hub": [], "repo_id": [], "instanti": [], "hf": [], "fasterrcnn_mobilenet_v3_large_fpn": [], "repo": [], "hf_hub_download": [], "snapshot_download": [], "checkpoint": [], "push_to_hf_hub": [], "model_nam": [], "save": [1, 8], "configur": [], "my": [], "procedur": 7, "draw": [7, 10], "design": 7, "torchvis": 7, "resiz": 7, "bilinear": 7, "transfo": 7, "minval": 7, "interpol": 7, "zero": [7, 10], "while": [7, 9], "done": 7, "mean": [7, 10], "std": 7, "gaussian": 7, "distribut": 7, "485": 7, "456": 7, "406": 7, "229": 7, "225": 7, "averag": [7, 9], "per": [7, 9], "standard": 7, "deviat": 7, "lambdatransform": 7, "fn": 7, "lambda": 7, "tograi": 7, "num_output_channel": [], "grayscal": 7, "colorinvers": 7, "min_val": 7, "tranform": 7, "color": [7, 10], "shift": 7, "randomli": 7, "invert": 7, "6": [3, 7, 9], "rang": [7, 8], "randombright": 7, "max_delta": 7, "adjust": 7, "bright": 7, "delta": 7, "offset": 7, "add": [7, 10], "pick": 7, "p": [7, 9, 10], "probabl": 7, "randomcontrast": 7, "contrast": 7, "contrast_factor": 7, "factor": 7, "randomsatur": 7, "satur": 7, "hsv": 7, "increas": 7, "randomhu": 7, "hue": 7, "randomgamma": 7, "min_gamma": 7, "max_gamma": 7, "min_gain": 7, "max_gain": 7, "gamma": 7, "correct": 7, "neg": 7, "lower": [7, 10], "param": [7, 9], "constant": 7, "multipli": 7, "randomjpegqu": 7, "min_qual": 7, "60": 7, "max_qual": 7, "jpeg": 7, "qualiti": 7, "dimension": 7, "between": [7, 10], "randomrot": 7, "max_angl": 7, "expand": 7, "degre": 7, "uniformli": 7, "randomcrop": 7, "08": [7, 9], "75": [7, 9], "33": 7, "min_area": 7, "max_area": 7, "min_ratio": 7, "max_ratio": 7, "gaussianblur": [], "kernel_shap": [], "blur": [], "min": [], "channelshuffl": [], "gaussiannois": [], "nois": [], "randomhorizontalflip": [], "flip": [], "int64": 10, "randomshadow": [], "opacity_rang": [], "shade": [], "opac": [], "It": 7, "consecut": [7, 9], "sequenti": [7, 8], "oneof": 7, "jpegqual": 7, "randomappli": 7, "regroup": 10, "core": 10, "complementari": 10, "sens": 10, "visualize_pag": 10, "words_onli": 10, "display_artefact": 10, "add_label": 10, "figur": 10, "block": [9, 10], "plt": 10, "ocr_db_crnn": 10, "artefact": [9, 10], "figsiz": 10, "largest": 10, "side": 10, "plot": 10, "static": 10, "top": [9, 10], "synthesize_pag": 10, "draw_proba": 10, "respons": 10, "blank": 10, "blue": 10, "red": 10, "font_siz": 10, "13": [9, 10], "famili": 10, "synthes": 10, "metric": [9, 10], "assess": 10, "textmatch": 10, "match": [9, 10], "accuraci": 10, "aggreg": [1, 10], "foral": 10, "y": 10, "mathcal": 10, "frac": 10, "sum": 10, "limits_": 10, "f_": 10, "y_i": 10, "x_i": 10, "indic": 10, "defin": 10, "f_a": 10, "left": [9, 10], "begin": 10, "ll": 10, "mbox": 10, "strictli": 10, "integ": [8, 10], "updat": 10, "hello": [9, 10], "world": [9, 10], "summari": 10, "gt": 10, "pred": 10, "groung": 10, "truth": 10, "exact": [9, 10], "score": 10, "counterpart": 10, "unidecod": 10, "localizationconfus": 10, "iou_thresh": 10, "mask_shap": 10, "use_broadcast": [], "confus": 10, "iou": 10, "recal": [9, 10], "g_": 10, "precis": [9, 10], "meaniou": 10, "j": 10, "y_j": 10, "being": [9, 10], "intersect": 10, "union": 10, "g_x": 10, "assign": 10, "_i": 10, "geq": 10, "ground": 10, "asarrai": 10, "70": [9, 10], "110": 10, "95": [9, 10], "200": 10, "150": [9, 10], "pair": 10, "broadcast": [], "consum": [], "memori": [], "either": 9, "ocrmetr": 10, "l": 10, "hat": 10, "h_": 10, "b_j": 10, "l_j": 10, "gt_box": 10, "pred_box": 10, "gt_label": 10, "pred_label": 10, "comparison": [9, 10], "detectionmetr": 10, "c_j": 10, "compil": [6, 9], "better": [6, 9], "leverag": 6, "descript": 6, "colab": [], "quicktour": 6, "present": 6, "main": 6, "produc": 9, "searchabl": [], "don": 9, "meet": [], "detail": 9, "link": [], "section": [8, 9], "det_model": [], "load_weight": [], "path_to_checkpoint": [], "weight": [], "reco_model": [], "det_param": [], "path_to_pt": [], "map_loc": [], "load_state_dict": [], "reco_param": [], "vocab": 9, "class_nam": [], "total": [], "date": 9, "preprocessor": 9, "det_predictor": [], "798": [], "785": [], "772": [], "264": [], "2749": [], "287": [], "reco_predictor": [], "694": [], "695": [], "693": [], "299": [], "296": [], "301": [], "polici": [], "restrict": [], "write": [], "outsid": [], "tmp": [], "work": [], "step": [], "usag": [], "multiprocess": [], "doctr_multiprocessing_dis": [], "variabl": [], "becaus": [], "shm": [], "share": 9, "chang": [], "By": [], "doctr_cache_dir": [], "focu": [], "love": [], "appreci": [], "interfac": [], "io": [], "custom": [], "felix92": [], "db": [], "vgg16": [], "bn": [], "plug": [], "obj_detect": [], "exist": [], "overwritten": [], "prerequisit": [], "creat": [], "co": [], "instal": [], "git": [], "lf": [], "my_awesome_model": [], "v1": [], "directli": 9, "after": 9, "python3": [], "train_tensorflow": [], "py": [], "train_pytorch": [], "tabl": [], "pull": [], "dummi": [], "tilman": [], "rassi": [], "fascan": [], "evalu": [1, 9], "predefin": 1, "prefer": 1, "signific": 1, "valid": [], "149": [], "626": [], "360": [], "2000": [], "3000": [], "249": [], "33402": [], "13068": [], "772875": [], "85875": [], "246": [], "233": [], "resourc": 8, "7149": [], "796": [], "handwritten": [], "1268": [], "472": [], "21888": [], "8707": [], "33608": [], "19342": [], "uppercas": [], "19370": [], "2186": [], "257": [], "647": [], "73257": [], "26032": [], "7100000": [], "707470": [], "1156": [], "1107": [], "849": [], "1095": [], "207901": [], "22672": [], "7581382": [], "1337891": [], "7141797": [], "793533": [], "49377": [], "19598": [], "alwai": [], "regular": [], "2700": [], "300": [], "background": [], "qr_code": [], "bar_cod": [], "photo": [], "classif": [], "mani": 9, "sensit": 9, "abl": 9, "howev": [], "guidanc": [], "tool": [], "further": [], "anot": [], "handl": 1, "underli": 1, "defer": 1, "dataload": 1, "good": 8, "achiev": 8, "might": [8, 9], "tune": 8, "thing": [8, 9], "product": 8, "readi": 8, "help": 8, "support": 9, "devic": [], "fp16": [], "point": [], "occupi": [], "bit": [], "advantag": [], "less": [], "mixed_precis": [], "set_global_polici": [], "mixed_float16": [], "cuda": [], "re": [], "exchang": [], "interoper": [], "machin": [], "structur": 9, "layer": [], "metadata": [], "util": 8, "export_model_to_onnx": [], "input_shap": 8, "dummy_input": [], "tensorspec": [], "model_path": [], "come": [], "soon": [], "seen": 9, "onc": 9, "separ": 9, "compon": 9, "charg": 9, "usabl": 9, "backend": 9, "along": 9, "processor": 9, "reusabl": 9, "consist": 9, "delimit": 9, "2d": 9, "corner": 9, "flag": 9, "belong": 9, "skew": [], "comprehens": 9, "benchmark": 9, "publicli": 9, "sec": [], "25": 9, "84": 9, "39": 9, "85": 9, "86": 9, "93": 9, "83": 9, "24": [], "80": 9, "29": 9, "90": 9, "67": 9, "76": 9, "11": 9, "81": 9, "71": 9, "7": 9, "21": 9, "82": 9, "20": 9, "49": 9, "87": 9, "63": 9, "17": [], "28": [], "51": 9, "46": 9, "db_resnet34": [], "22": [], "89": 9, "74": 9, "56": 9, "68": 9, "92": 9, "61": 9, "41": 9, "00": 9, "79": 9, "38": 9, "88": [], "62": 9, "26": [], "06": 9, "78": 9, "47": 9, "54": [], "abov": 9, "cf": 9, "disclaim": 9, "combin": 9, "199": 9, "second": 9, "warmup": 9, "phase": 9, "measur": 9, "1000": 9, "obtain": 9, "11th": [], "gen": [], "intel": [], "r": [], "tm": [], "i7": [], "11800h": [], "30ghz": [], "wrap": 9, "useabl": 9, "favorit": 9, "dummy_img": 9, "area": [], "send": [], "snippet": [], "transcrib": 9, "partial": [], "15": 9, "9": [], "73": 9, "44": [], "14": 9, "55": [], "58": [], "57": 9, "66": 9, "01": 9, "98": 9, "23": [], "69": 9, "99": 9, "91": 9, "05": [], "09": [], "96": 9, "40": [], "53": 9, "most": 9, "print": 9, "cfg": 9, "30595": 9, "45": 9, "72": 9, "43": 9, "65": 9, "77": 9, "30": 9, "07": [], "27": 9, "gvision": 9, "59": 9, "03": 9, "azur": [], "recogn": [], "42": 9, "go": [], "mention": [], "still": [], "return": [1, 4, 5, 9, 10], "nest": 9, "get": [4, 9], "typic": 9, "layout": 9, "340": 9, "json_output": 9, "1357421875": 9, "0361328125": 9, "8564453125": 9, "8603515625": 9, "914085328578949": 9, "5478515625": 9, "06640625": 9, "5810546875": 9, "0966796875": 9, "9949972033500671": 9, "51171875": 9, "1630859375": 9, "9578408598899841": 9, "1396484375": 9, "3232421875": 9, "185546875": 9, "3515625": 9, "outpout": 9, "xml": 9, "hocr": 9, "export_as_xml": 9, "xml_output": 9, "xml_bytes_str": 9, "xml_element": 9, "utf": 9, "xmln": 9, "w3": 9, "1999": 9, "xhtml": 9, "lang": 9, "en": 9, "meta": 9, "equiv": 9, "charset": 9, "system": 9, "ocr_pag": 9, "ocr_carea": 9, "ocr_par": 9, "ocr_lin": 9, "ocrx_word": 9, "div": 9, "id": 9, "page_1": 9, "bbox": 9, "3456": 9, "ppageno": 9, "block_1_1": 9, "857": 9, "529": 9, "2504": 9, "2710": 9, "par_1_1": 9, "span": 9, "line_1_1": 9, "x_size": 9, "x_descend": 9, "x_ascend": 9, "word_1_1": 9, "1552": 9, "540": 9, "1778": 9, "580": 9, "x_wconf": 9, "word_1_2": 9, "1782": 9, "1900": 9, "583": 9, "word_1_3": 9, "1420": 9, "597": 9, "1684": 9, "641": 9, "threshold": [], "region": [], "accur": [], "postprocessor": [], "bin_thresh": [], "box_thresh": [], "hook": [], "manipul": [], "customhook": [], "def": 8, "__call__": [], "self": [], "loc_pr": [], "Be": [], "awar": [], "my_hook": [], "middl": [], "pipelin": [], "add_hook": [], "execut": [], "file_path": [], "read_img": 4, "seemlessli": 2, "conda": [], "newer": [], "developp": 3, "fp": 9, "scheme": [], "deform": [], "statist": [], "turn": [], "easier": [], "let": [], "db_resnet50_predictor": [], "sar_vgg16_bn": [], "rnn": [], "enhanc": [], "symbol": [], "crnn_vgg16_bn_predictor": [], "sar_vgg16_bn_predictor": [], "16bn": [], "convert_to_tflit": [], "tf_model": 8, "tflite": 8, "conv_sequ": 8, "relu": 8, "kernel_s": 8, "serialized_model": 8, "convert_to_fp16": [], "half": [], "serial": 8, "quantize_model": [], "quantiz": [], "exclud": [], "inherit": 8, "abstract": [], "verifi": [], "file_nam": [], "file_hash": [], "extract_arch": [], "overwrit": [], "sha256": [], "archiv": [], "disk": [], "775": [], "856": [], "860": [], "862": [], "863": [], "sar_resnet31_predictor": [], "ocr_db_crnn_vgg": [], "652": [], "721": [], "ocr_db_sar_vgg": [], "653": [], "ocr_db_sar_resnet": [], "665": [], "735": [], "595": [], "625": [], "781": [], "830": [], "exactmatch": [], "ignore_cas": [], "ignore_acc": [], "ignor": [], "letter": [], "accent": [], "error": [], "max_dist": [], "levenshtein": [], "distanc": [], "autoclass": [], "loader": 1, "154": [], "as_imag": 4, "convert_page_to_numpi": 4, "get_word": 4, "fitz": 4, "gettextword": 4, "get_artefact": 4, "entir": 4, "fulli": [], "daili": [], "mix": [], "fine": 9, "scratch": [], "special": [], "recurr": [], "733": [], "817": [], "745": [], "875": [], "frame": 9, "feed": [], "warm": [], "c5": 9, "x12larg": 9, "xeon": 9, "platinum": 9, "8275l": 9, "913": [], "917": [], "921": [], "crnn_resnet31": [], "629": [], "701": [], "664": [], "780": [], "630": [], "702": [], "666": [], "783": [], "640": [], "713": [], "672": [], "789": [], "na": [], "753": [], "700": [], "533": [], "689": [], "611": [], "660": [], "db_sar_vgg": [], "db_sar_resnet": [], "db_crnn_vgg": [], "db_crnn_resnet": [], "properti": 8, "input_t": 8, "saved_model": 8, "And": 8, "nestedobject": [], "changelog": [], "v0": [], "2021": [], "8m": 9, "02": 9, "5m": 9, "1m": 9, "19": [], "invoic": 9, "flexibl": [], "rotated_bbox": [1, 10], "beta": [], "linknet16": [5, 9], "160": 5, "arg": 1, "bash": [], "tax": 9, "35": 9, "vgg16_bn": 5, "mobilenetv3_larg": 5, "mobilenetv3_smal": 5, "constraint": 8, "tfliteconvert": 8, "from_keras_model": 8, "target_spec": 8, "supported_typ": 8, "float16": 8, "fallback": 8, "oper": 8, "representative_dataset": 8, "yield": 8, "supported_op": 8, "opsset": 8, "tflite_builtins_int8": 8, "inference_input_typ": 8, "int8": 8, "inference_output_typ": 8, "2m": 9, "7m": 9, "look": 9, "variou": 9, "below": 9, "unfortun": 9, "moment": 9, "04": 9, "36": 9, "97": 9, "resum": 9, "road": 9}, "objects": {"doctr.datasets": [[1, 0, 1, "", "CORD"], [1, 0, 1, "", "CharacterGenerator"], [1, 0, 1, "", "DocArtefacts"], [1, 0, 1, "", "FUNSD"], [1, 0, 1, "", "IIIT5K"], [1, 0, 1, "", "OCRDataset"], [1, 0, 1, "", "SROIE"], [1, 0, 1, "", "SVT"], [1, 1, 1, "", "encode_sequences"]], "doctr.datasets.loader": [[1, 0, 1, "", "DataLoader"]], "doctr.io": [[4, 0, 1, "", "Artefact"], [4, 0, 1, "", "Block"], [4, 0, 1, "", "Document"], [4, 0, 1, "", "DocumentFile"], [4, 0, 1, "", "Line"], [4, 0, 1, "", "PDF"], [4, 0, 1, "", "Page"], [4, 0, 1, "", "Word"], [4, 1, 1, "", "decode_img_as_tensor"], [4, 1, 1, "", "read_html"], [4, 1, 1, "", "read_img_as_numpy"], [4, 1, 1, "", "read_img_as_tensor"], [4, 1, 1, "", "read_pdf"]], "doctr.io.Document": [[4, 2, 1, "", "show"]], "doctr.io.DocumentFile": [[4, 2, 1, "", "from_images"], [4, 2, 1, "", "from_pdf"], [4, 2, 1, "", "from_url"]], "doctr.io.PDF": [[4, 2, 1, "", "as_images"], [4, 2, 1, "", "get_artefacts"], [4, 2, 1, "", "get_words"]], "doctr.io.Page": [[4, 2, 1, "", "show"]], "doctr.models.backbones": [[5, 1, 1, "", "mobilenet_v3_large"], [5, 1, 1, "", "mobilenet_v3_large_r"], [5, 1, 1, "", "mobilenet_v3_small"], [5, 1, 1, "", "mobilenet_v3_small_r"], [5, 1, 1, "", "resnet31"], [5, 1, 1, "", "vgg16_bn"]], "doctr.models.detection": [[5, 1, 1, "", "db_mobilenet_v3_large"], [5, 1, 1, "", "db_resnet50"], [5, 1, 1, "", "detection_predictor"], [5, 1, 1, "", "linknet16"]], "doctr.models": [[5, 1, 1, "", "ocr_predictor"]], "doctr.models.recognition": [[5, 1, 1, "", "crnn_mobilenet_v3_large"], [5, 1, 1, "", "crnn_mobilenet_v3_small"], [5, 1, 1, "", "crnn_vgg16_bn"], [5, 1, 1, "", "master"], [5, 1, 1, "", "recognition_predictor"], [5, 1, 1, "", "sar_resnet31"]], "doctr.transforms": [[7, 0, 1, "", "ColorInversion"], [7, 0, 1, "", "Compose"], [7, 0, 1, "", "LambdaTransformation"], [7, 0, 1, "", "Normalize"], [7, 0, 1, "", "OneOf"], [7, 0, 1, "", "RandomApply"], [7, 0, 1, "", "RandomBrightness"], [7, 0, 1, "", "RandomContrast"], [7, 0, 1, "", "RandomCrop"], [7, 0, 1, "", "RandomGamma"], [7, 0, 1, "", "RandomHue"], [7, 0, 1, "", "RandomJpegQuality"], [7, 0, 1, "", "RandomRotate"], [7, 0, 1, "", "RandomSaturation"], [7, 0, 1, "", "Resize"], [7, 0, 1, "", "ToGray"]], "doctr.utils.metrics": [[10, 0, 1, "", "DetectionMetric"], [10, 0, 1, "", "LocalizationConfusion"], [10, 0, 1, "", "OCRMetric"], [10, 0, 1, "", "TextMatch"]], "doctr.utils.metrics.DetectionMetric": [[10, 2, 1, "", "summary"], [10, 2, 1, "", "update"]], "doctr.utils.metrics.LocalizationConfusion": [[10, 2, 1, "", "summary"], [10, 2, 1, "", "update"]], "doctr.utils.metrics.OCRMetric": [[10, 2, 1, "", "summary"], [10, 2, 1, "", "update"]], "doctr.utils.metrics.TextMatch": [[10, 2, 1, "", "summary"], [10, 2, 1, "", "update"]], "doctr.utils.visualization": [[10, 1, 1, "", "synthesize_page"], [10, 1, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:class", "1": "py:function", "2": "py:method"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"], "2": ["py", "method", "Python method"]}, "titleterms": {"changelog": 0, "v0": 0, "7": [], "0": 0, "2024": [], "09": [], "6": [], "2022": [], "29": [], "5": [], "1": 0, "03": 0, "22": [], "2021": 0, "12": [], "31": [], "4": 0, "11": 0, "10": 0, "01": 0, "3": 0, "08": 0, "27": 0, "07": 0, "02": 0, "2": 0, "05": 0, "28": 0, "18": 0, "contributor": [], "coven": [], "code": [], "conduct": [], "our": [], "pledg": [], "standard": [], "enforc": [], "respons": [], "scope": [], "guidelin": [], "correct": [], "warn": [], "temporari": [], "ban": [], "perman": [], "attribut": [], "contribut": [], "doctr": [1, 2, 4, 5, 6, 7, 10], "codebas": [], "structur": 4, "continu": [], "integr": [], "feedback": [], "featur": 2, "request": [], "bug": [], "report": [], "question": [], "develop": [], "mode": [], "instal": 3, "commit": [], "unit": [], "test": [], "qualiti": [], "style": [], "verif": [], "modifi": [], "document": [2, 4], "let": [], "": [], "connect": [], "prerequisit": 3, "via": 3, "python": 3, "packag": 3, "git": 3, "text": [2, 9], "recognit": [2, 5, 9], "main": 2, "model": [2, 5, 8, 9], "zoo": [2, 5, 9], "detect": [2, 5, 9], "support": [1, 2, 7], "dataset": [1, 2], "arg": [], "synthet": [], "gener": [], "custom": [], "loader": [], "dataload": [], "vocab": 1, "return": [], "io": 4, "word": 4, "line": 4, "artefact": 4, "block": 4, "page": 4, "file": 4, "read": 4, "classif": [], "factori": [], "transform": 7, "compos": 7, "util": 10, "visual": 10, "task": 10, "evalu": 10, "notebook": 6, "train": 8, "your": 8, "own": [], "load": 1, "aw": [], "lambda": [], "share": [], "commun": [], "from": [], "huggingfac": [], "hub": [], "push": [], "pretrain": [], "name": [], "convent": [], "choos": 9, "readi": [], "us": 8, "avail": [1, 9], "object": [], "data": 1, "prepar": 8, "infer": 8, "optim": [], "half": 8, "precis": 8, "export": [], "onnx": [], "right": 9, "architectur": 9, "predictor": 9, "end": 9, "ocr": 9, "two": 9, "stage": 9, "approach": 9, "what": 9, "should": 9, "i": 9, "do": 9, "output": 9, "advanc": [], "option": [], "get": [], "start": [], "conda": [], "pre": [], "process": [], "post": 8, "build": [], "implement": [], "content": [], "compress": 8, "savedmodel": 8, "note": [], "refer": [], "backbon": 5, "tensorflow": 8, "lite": 8, "quantiz": 8}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"Changelog": [[0, "changelog"]], "v0.4.0 (2021-10-01)": [[0, "v0-4-0-2021-10-01"]], "v0.3.1 (2021-08-27)": [[0, "v0-3-1-2021-08-27"]], "v0.3.0 (2021-07-02)": [[0, "v0-3-0-2021-07-02"]], "v0.2.1 (2021-05-28)": [[0, "v0-2-1-2021-05-28"]], "v0.2.0 (2021-05-11)": [[0, "v0-2-0-2021-05-11"]], "v0.1.1 (2021-03-18)": [[0, "v0-1-1-2021-03-18"]], "v0.1.0 (2021-03-05)": [[0, "v0-1-0-2021-03-05"]], "doctr.datasets": [[1, "doctr-datasets"]], "Available Datasets": [[1, "available-datasets"]], "Data Loading": [[1, "data-loading"]], "Supported Vocabs": [[1, "supported-vocabs"]], "docTR Vocabs": [[1, "id1"]], "docTR: Document Text Recognition": [[2, "doctr-document-text-recognition"]], "Main Features": [[2, "main-features"]], "Model zoo": [[2, "model-zoo"]], "Text detection models": [[2, "text-detection-models"]], "Text recognition models": [[2, "text-recognition-models"]], "Supported datasets": [[2, "supported-datasets"]], "Installation": [[3, "installation"]], "Prerequisites": [[3, "prerequisites"]], "Via Python Package": [[3, "via-python-package"]], "Via Git": [[3, "via-git"]], "doctr.io": [[4, "doctr-io"]], "Document structure": [[4, "document-structure"]], "Word": [[4, "word"]], "Line": [[4, "line"]], "Artefact": [[4, "artefact"]], "Block": [[4, "block"]], "Page": [[4, "page"]], "Document": [[4, "document"]], "File reading": [[4, "file-reading"]], "doctr.models": [[5, "doctr-models"]], "doctr.models.backbones": [[5, "doctr-models-backbones"]], "doctr.models.detection": [[5, "doctr-models-detection"]], "doctr.models.recognition": [[5, "doctr-models-recognition"]], "doctr.models.zoo": [[5, "doctr-models-zoo"]], "docTR Notebooks": [[6, "doctr-notebooks"]], "doctr.transforms": [[7, "doctr-transforms"]], "Supported transformations": [[7, "supported-transformations"]], "Composing transformations": [[7, "composing-transformations"]], "Preparing your model for inference": [[8, "preparing-your-model-for-inference"]], "Model compression": [[8, "model-compression"]], "TensorFlow Lite": [[8, "tensorflow-lite"]], "Half-precision": [[8, "half-precision"]], "Post-training quantization": [[8, "post-training-quantization"]], "Using SavedModel": [[8, "using-savedmodel"]], "Choosing the right model": [[9, "choosing-the-right-model"]], "Text Detection": [[9, "text-detection"]], "Available architectures": [[9, "available-architectures"], [9, "id1"], [9, "id3"]], "Detection predictors": [[9, "detection-predictors"]], "Text Recognition": [[9, "text-recognition"]], "Text recognition model zoo": [[9, "id5"]], "Recognition predictors": [[9, "recognition-predictors"]], "End-to-End OCR": [[9, "end-to-end-ocr"]], "Two-stage approaches": [[9, "two-stage-approaches"]], "What should I do with the output?": [[9, "what-should-i-do-with-the-output"]], "doctr.utils": [[10, "doctr-utils"]], "Visualization": [[10, "visualization"]], "Task evaluation": [[10, "task-evaluation"]]}, "indexentries": {"cord (class in doctr.datasets)": [[1, "doctr.datasets.CORD"]], "charactergenerator (class in doctr.datasets)": [[1, "doctr.datasets.CharacterGenerator"]], "dataloader (class in doctr.datasets.loader)": [[1, "doctr.datasets.loader.DataLoader"]], "docartefacts (class in doctr.datasets)": [[1, "doctr.datasets.DocArtefacts"]], "funsd (class in doctr.datasets)": [[1, "doctr.datasets.FUNSD"]], "iiit5k (class in doctr.datasets)": [[1, "doctr.datasets.IIIT5K"]], "ocrdataset (class in doctr.datasets)": [[1, "doctr.datasets.OCRDataset"]], "sroie (class in doctr.datasets)": [[1, "doctr.datasets.SROIE"]], "svt (class in doctr.datasets)": [[1, "doctr.datasets.SVT"]], "encode_sequences() (in module doctr.datasets)": [[1, "doctr.datasets.encode_sequences"]], "artefact (class in doctr.io)": [[4, "doctr.io.Artefact"]], "block (class in doctr.io)": [[4, "doctr.io.Block"]], "document (class in doctr.io)": [[4, "doctr.io.Document"]], "documentfile (class in doctr.io)": [[4, "doctr.io.DocumentFile"]], "line (class in doctr.io)": [[4, "doctr.io.Line"]], "pdf (class in doctr.io)": [[4, "doctr.io.PDF"]], "page (class in doctr.io)": [[4, "doctr.io.Page"]], "word (class in doctr.io)": [[4, "doctr.io.Word"]], "as_images() (doctr.io.pdf method)": [[4, "doctr.io.PDF.as_images"]], "decode_img_as_tensor() (in module doctr.io)": [[4, "doctr.io.decode_img_as_tensor"]], "from_images() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_images"]], "from_pdf() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_pdf"]], "from_url() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_url"]], "get_artefacts() (doctr.io.pdf method)": [[4, "doctr.io.PDF.get_artefacts"]], "get_words() (doctr.io.pdf method)": [[4, "doctr.io.PDF.get_words"]], "read_html() (in module doctr.io)": [[4, "doctr.io.read_html"]], "read_img_as_numpy() (in module doctr.io)": [[4, "doctr.io.read_img_as_numpy"]], "read_img_as_tensor() (in module doctr.io)": [[4, "doctr.io.read_img_as_tensor"]], "read_pdf() (in module doctr.io)": [[4, "doctr.io.read_pdf"]], "show() (doctr.io.document method)": [[4, "doctr.io.Document.show"]], "show() (doctr.io.page method)": [[4, "doctr.io.Page.show"]], "crnn_mobilenet_v3_large() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_mobilenet_v3_large"]], "crnn_mobilenet_v3_small() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_mobilenet_v3_small"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_vgg16_bn"]], "db_mobilenet_v3_large() (in module doctr.models.detection)": [[5, "doctr.models.detection.db_mobilenet_v3_large"]], "db_resnet50() (in module doctr.models.detection)": [[5, "doctr.models.detection.db_resnet50"]], "detection_predictor() (in module doctr.models.detection)": [[5, "doctr.models.detection.detection_predictor"]], "linknet16() (in module doctr.models.detection)": [[5, "doctr.models.detection.linknet16"]], "master() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.master"]], "mobilenet_v3_large() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.mobilenet_v3_large"]], "mobilenet_v3_large_r() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.mobilenet_v3_large_r"]], "mobilenet_v3_small() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.mobilenet_v3_small"]], "mobilenet_v3_small_r() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.mobilenet_v3_small_r"]], "ocr_predictor() (in module doctr.models)": [[5, "doctr.models.ocr_predictor"]], "recognition_predictor() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.recognition_predictor"]], "resnet31() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.resnet31"]], "sar_resnet31() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.sar_resnet31"]], "vgg16_bn() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.vgg16_bn"]], "colorinversion (class in doctr.transforms)": [[7, "doctr.transforms.ColorInversion"]], "compose (class in doctr.transforms)": [[7, "doctr.transforms.Compose"]], "lambdatransformation (class in doctr.transforms)": [[7, "doctr.transforms.LambdaTransformation"]], "normalize (class in doctr.transforms)": [[7, "doctr.transforms.Normalize"]], "oneof (class in doctr.transforms)": [[7, "doctr.transforms.OneOf"]], "randomapply (class in doctr.transforms)": [[7, "doctr.transforms.RandomApply"]], "randombrightness (class in doctr.transforms)": [[7, "doctr.transforms.RandomBrightness"]], "randomcontrast (class in doctr.transforms)": [[7, "doctr.transforms.RandomContrast"]], "randomcrop (class in doctr.transforms)": [[7, "doctr.transforms.RandomCrop"]], "randomgamma (class in doctr.transforms)": [[7, "doctr.transforms.RandomGamma"]], "randomhue (class in doctr.transforms)": [[7, "doctr.transforms.RandomHue"]], "randomjpegquality (class in doctr.transforms)": [[7, "doctr.transforms.RandomJpegQuality"]], "randomrotate (class in doctr.transforms)": [[7, "doctr.transforms.RandomRotate"]], "randomsaturation (class in doctr.transforms)": [[7, "doctr.transforms.RandomSaturation"]], "resize (class in doctr.transforms)": [[7, "doctr.transforms.Resize"]], "togray (class in doctr.transforms)": [[7, "doctr.transforms.ToGray"]], "detectionmetric (class in doctr.utils.metrics)": [[10, "doctr.utils.metrics.DetectionMetric"]], "localizationconfusion (class in doctr.utils.metrics)": [[10, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[10, "doctr.utils.metrics.OCRMetric"]], "textmatch (class in doctr.utils.metrics)": [[10, "doctr.utils.metrics.TextMatch"]], "summary() (doctr.utils.metrics.detectionmetric method)": [[10, "doctr.utils.metrics.DetectionMetric.summary"]], "summary() (doctr.utils.metrics.localizationconfusion method)": [[10, "doctr.utils.metrics.LocalizationConfusion.summary"]], "summary() (doctr.utils.metrics.ocrmetric method)": [[10, "doctr.utils.metrics.OCRMetric.summary"]], "summary() (doctr.utils.metrics.textmatch method)": [[10, "doctr.utils.metrics.TextMatch.summary"]], "synthesize_page() (in module doctr.utils.visualization)": [[10, "doctr.utils.visualization.synthesize_page"]], "update() (doctr.utils.metrics.detectionmetric method)": [[10, "doctr.utils.metrics.DetectionMetric.update"]], "update() (doctr.utils.metrics.localizationconfusion method)": [[10, "doctr.utils.metrics.LocalizationConfusion.update"]], "update() (doctr.utils.metrics.ocrmetric method)": [[10, "doctr.utils.metrics.OCRMetric.update"]], "update() (doctr.utils.metrics.textmatch method)": [[10, "doctr.utils.metrics.TextMatch.update"]], "visualize_page() (in module doctr.utils.visualization)": [[10, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["changelog", "datasets", "index", "installing", "io", "models", "notebooks", "transforms", "using_model_export", "using_models", "utils"], "filenames": ["changelog.rst", "datasets.rst", "index.rst", "installing.rst", "io.rst", "models.rst", "notebooks.md", "transforms.rst", "using_model_export.rst", "using_models.rst", "utils.rst"], "titles": ["Changelog", "doctr.datasets", "docTR: Document Text Recognition", "Installation", "doctr.io", "doctr.models", "docTR Notebooks", "doctr.transforms", "Preparing your model for inference", "Choosing the right model", "doctr.utils"], "terms": {"releas": [0, 3], "note": [0, 1], "we": [2, 3, 4, 7, 9], "member": [], "leader": [], "make": [8, 9, 10], "particip": [], "commun": [], "harass": [], "free": [], "experi": [], "everyon": [], "regardless": [], "ag": [], "bodi": 9, "size": [1, 4, 7, 9, 10], "visibl": [], "invis": [], "disabl": [], "ethnic": [], "sex": [], "characterist": [], "gender": [], "ident": [], "express": 7, "level": [1, 9, 10], "educ": [], "socio": [], "econom": [], "statu": [], "nation": [], "person": [], "appear": [], "race": [], "religion": [], "sexual": [], "orient": [4, 9], "act": [], "interact": [4, 10], "wai": [1, 2], "contribut": [], "an": [1, 2, 4, 5, 8, 10], "open": [], "welcom": [], "divers": [], "inclus": [], "healthi": [], "exampl": [1, 4, 5, 7, 10], "behavior": [], "posit": 10, "environ": [], "includ": [1, 3], "demonstr": [], "empathi": [], "kind": [], "toward": 3, "other": [], "peopl": [], "Being": [], "respect": [], "differ": [], "opinion": [], "viewpoint": [], "give": [], "gracefulli": [], "accept": [], "construct": [], "feedback": [], "apolog": [], "those": [3, 4, 9], "affect": [], "mistak": [], "learn": 9, "from": [1, 2, 4, 5, 7, 8, 9, 10], "focus": [], "what": [], "i": [1, 4, 5, 7, 8, 10], "best": [], "just": 8, "u": 9, "individu": [], "overal": [], "unaccept": [], "The": [1, 4, 9, 10], "us": [1, 3, 5, 9, 10], "languag": [1, 2, 4, 9], "imageri": [], "attent": [], "advanc": [], "ani": [1, 4, 5, 7, 8, 9, 10], "troll": [], "insult": [], "derogatori": [], "comment": [], "polit": [], "attack": [], "public": 2, "privat": 9, "publish": [], "inform": [1, 2, 9], "physic": 4, "email": [], "address": 4, "without": 5, "explicit": [], "permiss": [], "which": 9, "could": [], "reason": [], "consid": [1, 4, 9, 10], "inappropri": [], "profession": [], "set": [1, 5, 9, 10], "ar": [1, 3, 4, 6, 7, 9, 10], "clarifi": [], "take": [1, 8, 9], "appropri": 9, "fair": [], "action": [], "thei": 9, "deem": [], "threaten": [], "offens": [], "harm": [], "have": [1, 8, 9, 10], "right": 10, "remov": [], "edit": [], "reject": [], "commit": [], "wiki": [], "issu": [], "align": 4, "thi": [1, 3, 8, 9, 10], "moder": [], "decis": [], "when": 5, "appli": [1, 7], "within": [], "all": [1, 4, 7, 9, 10], "space": [], "also": 9, "offici": [], "repres": [4, 9, 10], "e": [3, 4, 5], "mail": [], "post": 9, "via": [], "social": [], "media": [], "account": 8, "appoint": [], "onlin": [], "offlin": [], "event": [], "instanc": 9, "abus": [], "otherwis": 10, "mai": 9, "report": [], "contact": [], "minde": [2, 3], "com": [3, 4], "complaint": [], "review": [], "investig": [], "promptli": [], "fairli": [], "oblig": [], "privaci": [], "secur": [], "incid": [], "follow": [1, 3, 7, 8, 9, 10], "impact": [], "determin": [], "consequ": [], "violat": [], "unprofession": [], "unwelcom": [], "A": [1, 2, 4, 5, 6, 8], "written": 4, "provid": [2, 8, 9], "clariti": [], "around": [], "natur": 2, "explan": 9, "why": [], "wa": [], "apologi": [], "request": [], "through": [1, 7], "singl": [], "seri": [], "continu": [], "No": 9, "involv": 9, "unsolicit": [], "specifi": [1, 4], "period": [], "time": [1, 2, 5, 10], "avoid": 3, "well": 8, "extern": [], "channel": [4, 7], "like": [], "term": [], "lead": [], "seriou": [], "sustain": [], "sort": [], "allow": [], "dure": [], "pattern": [], "aggress": [], "disparag": [], "class": [1, 4, 7, 9, 10], "adapt": [], "version": [8, 9], "0": [1, 7, 9, 10], "avail": [2, 7], "http": [3, 4, 5, 9], "www": [4, 9], "org": [5, 9], "_": [1, 5, 8], "html": 9, "were": [4, 9], "inspir": 7, "mozilla": [], "": [4, 10], "ladder": [], "For": [3, 9], "answer": [], "common": [7, 10], "question": [], "about": 9, "see": [], "faq": [], "translat": [], "everyth": 9, "you": [3, 5, 8, 9], "need": [3, 10], "know": [], "effici": [1, 2, 5], "project": [], "packag": [2, 8, 10], "python": [], "doc": [4, 9], "librari": [3, 6], "build": 3, "script": [], "refer": [3, 9], "train": [1, 5, 7, 9], "demo": 2, "small": 5, "app": [], "showcas": [], "capabl": [6, 9], "api": 2, "minim": 2, "templat": [2, 4], "deploi": [], "rest": [7, 10], "ensur": [], "proper": [], "mainten": [], "github": 3, "worklow": [], "run": 3, "job": [], "coverag": [], "codecov": [], "back": [], "result": [4, 9], "As": [], "contributor": [], "onli": [5, 7, 10], "your": [1, 2, 4, 9, 10], "ad": [5, 7], "whether": [1, 4, 7, 10], "encount": [], "problem": [], "suggest": [], "input": [4, 7, 9], "ha": [1, 10], "valu": [4, 7, 9], "can": [1, 3, 8, 9], "purpos": [], "advis": [], "first": [], "check": 9, "topic": [], "wasn": [], "t": [1, 9], "alreadi": [], "cover": [], "close": [], "If": [3, 4, 5, 8], "feel": [], "new": 10, "one": [1, 5, 7, 9], "do": [3, 8], "so": [1, 3], "whenev": [], "possibl": 10, "enough": 9, "jump": [], "wonder": [], "how": [], "someth": [], "more": 9, "gener": 1, "should": [1, 4, 7, 10], "out": [5, 7, 9, 10], "discuss": [], "q": [], "forum": [], "specif": [1, 3, 9, 10], "stackoverflow": [], "addit": [], "depend": [2, 3], "command": [], "m": [9, 10], "pip": 3, "upgrad": [], "dev": [], "pre": 5, "docstr": [], "In": 1, "pleas": [], "googl": [], "eas": [], "process": [2, 4, 9], "later": [], "messag": [], "udac": [], "guid": [], "order": [1, 4], "same": [4, 9, 10], "ci": [], "workflow": [], "unittest": [], "local": [1, 2, 5, 9, 10], "To": [3, 9], "togeth": 4, "current": 9, "built": [], "sphinx": [], "thank": [], "our": [5, 9], "file": 1, "been": [9, 10], "rebuilt": [], "want": 8, "forc": [], "complet": [], "rebuild": [], "delet": [], "_build": [], "directori": [], "addition": [8, 9], "clear": [], "web": 4, "browser": 2, "cach": 1, "modif": [], "now": [], "locat": 4, "index": 4, "wish": 8, "somewher": [], "els": [], "than": [3, 10], "join": [], "slack": [], "where": [4, 7, 9, 10], "find": 3, "requir": [3, 7], "3": [2, 3, 4, 5, 7, 8, 9, 10], "8": [5, 7, 9], "higher": [1, 3], "whichev": 3, "o": 3, "least": 3, "tensorflow": [2, 3, 4, 5, 7, 9], "pytorch": [2, 3, 9], "correspond": [3, 9], "page": [1, 3, 5, 9, 10], "2": [2, 3, 7, 8, 9], "macbook": [], "m1": [], "chip": [], "some": [3, 6], "metal": [], "plugin": [], "1": [1, 5, 7, 8, 9, 10], "12": 9, "anoth": [1, 3, 5], "linux": 3, "few": [3, 8], "extra": 3, "maco": 3, "user": [2, 3, 4, 6], "them": [1, 3, 9], "homebrew": 3, "brew": 3, "cairo": 3, "pango": 3, "gdk": 3, "pixbuf": 3, "libffi": 3, "window": [3, 5, 10], "gtk": 3, "latest": 3, "over": [1, 3, 9, 10], "here": [1, 3, 6, 7, 9], "last": [1, 3], "stabl": 3, "doctr": [3, 8, 9], "strive": 3, "reduc": [3, 7], "framework": [1, 3, 9], "minimum": [3, 10], "necessari": 3, "featur": [3, 5, 6, 10], "develop": 3, "third": 3, "parti": 3, "miss": 3, "tf": [3, 4, 5, 7, 8], "torch": 3, "mode": 3, "clone": 3, "state": [2, 10], "art": 2, "optic": [2, 9], "charact": [1, 2, 4, 9, 10], "made": 2, "seamless": 2, "access": [1, 2, 4, 9], "anyon": 2, "power": 2, "easi": [2, 10], "extract": [1, 2], "valuabl": 2, "autom": 2, "seamlessli": 9, "understand": [1, 2, 9], "task": [1, 2, 9], "ocr": [1, 2, 5, 10], "predictor": [2, 5], "pars": [1, 2], "textual": [1, 2, 4, 5, 9], "identifi": 2, "each": [1, 2, 4, 7, 9, 10], "word": [2, 9, 10], "research": 2, "quickli": 2, "compar": 2, "own": 2, "architectur": [2, 5], "speed": [2, 5], "perform": [2, 4, 7, 8, 9, 10], "robust": 2, "stage": 2, "pretrain": [2, 5, 8, 9, 10], "paramet": [1, 2, 4, 5, 7, 10], "friendli": 2, "line": [2, 9, 10], "code": [2, 4], "load": [2, 8], "googlevis": 2, "aw": [2, 9], "textract": [2, 9], "optim": [2, 8], "infer": [2, 5, 7], "both": [2, 7, 9], "cpu": [2, 9], "gpu": 2, "light": 2, "activ": 2, "maintain": 2, "integr": 2, "deploy": 2, "dbnet": [2, 5], "real": [2, 5, 7], "scene": [1, 2, 5], "differenti": [2, 5], "binar": [2, 5], "linknet": [2, 5], "exploit": [2, 5], "encod": [1, 2, 4, 5, 9], "represent": [2, 5], "semant": [2, 5], "segment": [2, 5, 9], "sar": [2, 5], "show": [2, 4, 5, 10], "attend": [2, 5], "read": [2, 5], "simpl": [2, 5], "strong": [2, 5], "baselin": [2, 5, 9], "irregular": [2, 5], "crnn": [2, 5], "end": [1, 2, 5, 10], "trainabl": [2, 5], "neural": [2, 5], "network": [2, 5], "imag": [1, 2, 4, 5, 7, 9, 10], "base": [2, 5], "sequenc": [1, 2, 4, 5, 9, 10], "Its": [2, 5], "applic": [2, 5], "master": [2, 5, 9], "multi": 2, "aspect": [2, 7], "non": [1, 2, 4, 7, 10], "vitstr": [], "vision": 1, "transform": 1, "fast": 1, "parseq": [], "permut": [], "autoregress": [], "funsd": [1, 2, 9], "form": [1, 2, 9], "noisi": [1, 2], "scan": [1, 2], "cord": [1, 2, 9], "consolid": [1, 2], "receipt": [1, 2, 9], "forpost": [1, 2], "sroie": [1, 2], "icdar": 2, "2019": 2, "iiit": 1, "5k": 1, "cvit": [], "street": 1, "view": 1, "synthtext": [], "visual": [], "geometri": [4, 9], "group": [], "svhn": [], "digit": 1, "unsupervis": [], "ic03": [], "2003": [], "ic13": [], "2013": [], "imgur5k": [], "textstylebrush": [], "transfer": [], "aesthet": [], "mjsynth": [], "synthet": 1, "data": [4, 7, 10], "artifici": [], "iiithw": [], "wildreceipt": [], "spatial": 4, "dual": [], "modal": [], "graph": 4, "kei": [], "bool": [1, 4, 5, 7, 10], "true": [1, 4, 5, 7, 8, 9, 10], "use_polygon": [], "fals": [1, 5, 7, 8, 10], "recognition_task": [], "kwarg": [1, 4, 5, 10], "sourc": [1, 4, 5, 7, 10], "document": [1, 9, 10], "import": [1, 4, 5, 7, 8, 9, 10], "train_set": 1, "download": 1, "img": [1, 7], "target": [1, 4, 7], "subset": [1, 9], "polygon": [1, 9], "rotat": [1, 4, 5, 7], "bound": [1, 4, 5, 7, 9, 10], "box": [1, 4, 5, 7, 9, 10], "instead": [1, 4], "straight": [1, 5, 9], "ones": 1, "recognit": [1, 10], "keyword": [1, 4], "argument": [1, 4], "visiondataset": 1, "icdar2019": 1, "competit": 1, "iiit5k": 1, "bmvc": 1, "2012": 1, "text": [1, 4, 5, 10], "prior": 1, "svt": 1, "ucsd": 1, "comput": [1, 9, 10], "hous": [], "number": [1, 7, 9, 10], "localis": [], "repositori": [], "websit": [], "entri": [], "futur": [], "direct": [], "img_fold": 1, "str": [1, 4, 5, 7, 10], "label_fold": [], "label": [1, 10], "part": 7, "challeng": [], "task2": [], "2015": [], "path": [1, 4, 8], "challenge2_training_task12_imag": [], "challenge2_training_task1_gt": [], "test_set": [], "challenge2_test_task12_imag": [], "challenge2_test_task1_gt": [], "folder": [1, 8], "annot": 4, "abstractdataset": [], "label_path": [], "handwrit": [], "dataset_info": [], "imgur5k_annot": [], "json": 9, "pure": [], "mnt": [], "ramdisk": [], "max": 10, "90kdict32px": [], "imlist": [], "txt": [], "hw": [], "images_90k_norm": [], "90k": [], "docartefact": 1, "object": [1, 9, 10], "detect": [1, 10], "element": [1, 4, 5, 9, 10], "varieti": 1, "arxiv": 5, "ab": [], "2103": [], "14470v1": [], "test": [], "charactergener": 1, "implement": [1, 4, 7, 8, 9, 10], "d": 1, "abdef": 1, "num_sampl": 1, "100": [1, 7, 8, 9, 10], "vocabulari": 1, "sampl": [1, 9], "iter": 1, "cache_sampl": 1, "firsthand": 1, "font_famili": 10, "font": 10, "img_transform": [], "compos": [1, 9], "sample_transform": 1, "wordgener": [], "min_char": [], "int": [1, 4, 7, 10], "max_char": [], "list": [1, 4, 7, 10], "none": [1, 4, 9, 10], "callabl": [1, 7], "tupl": [4, 7, 10], "32": [1, 5, 7, 8, 9], "maximum": [1, 7], "detectiondataset": [], "recognitiondataset": [], "labels_path": [], "contain": 9, "ocrdataset": 1, "label_fil": 1, "jpg": [1, 4], "root": 1, "shuffl": 1, "batch_siz": 1, "drop_last": 1, "num_work": [], "collate_fn": 1, "wrapper": [1, 7], "train_load": 1, "train_it": 1, "next": 1, "befor": [1, 7], "pass": [1, 5, 9], "batch": [1, 5, 7, 9], "drop": 1, "isn": 1, "full": [1, 9, 10], "worker": 1, "function": [7, 10], "merg": [], "sinc": [1, 9], "content": [1, 4, 9, 10], "properli": 1, "model": [1, 10], "interpret": [1, 4], "multipl": [1, 4, 7], "name": [1, 5, 9], "10": [1, 9, 10], "0123456789": 1, "hindi_digit": [], "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "ascii_lett": 1, "52": [1, 9], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "punctuat": 1, "currenc": 1, "5": [1, 7, 9, 10], "ancient_greek": [], "48": [5, 9], "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": [], "arabic_lett": [], "37": 9, "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": [], "persian_lett": [], "\u067e\u0686\u06a2\u06a4\u06af": [], "arabic_diacrit": [], "arabic_punctu": [], "latin": 1, "94": [1, 9], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "english": 1, "legacy_french": 1, "123": 1, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": 1, "french": [1, 9], "126": 1, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": 1, "portugues": 1, "131": 1, "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": 1, "spanish": 1, "116": 1, "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": 1, "italian": [], "120": [], "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": [], "german": 1, "108": 1, "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": 1, "arab": [], "101": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": [], "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "czech": [], "130": [], "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": [], "polish": [], "118": [], "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": [], "dutch": [], "114": [], "norwegian": [], "106": [], "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": [], "danish": [], "finnish": [], "104": [], "\u00e4\u00f6\u00e4\u00f6": [], "swedish": [], "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": [], "vietnames": [], "234": [], "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": [], "hebrew": [], "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": [], "multilingu": [], "195": [], "encode_sequ": 1, "target_s": 1, "eo": 1, "pad": [1, 7], "dynamic_seq_length": 1, "ndarrai": [1, 4, 10], "given": [1, 4, 9, 10], "map": 1, "n": [1, 10], "length": 1, "Of": 1, "string": [1, 4, 9, 10], "option": 1, "start": 1, "case": [1, 9, 10], "upper": [1, 7], "enabl": [1, 4], "dynam": 1, "tensor": [1, 4, 7, 9], "modul": [4, 7, 9, 10], "easili": [4, 8, 9, 10], "export": [4, 5, 8, 9, 10], "analysi": 4, "format": [4, 8, 9], "organ": 4, "uninterrupt": [4, 9], "confid": [4, 9, 10], "float": [4, 7, 8, 10], "associ": 4, "predict": [4, 5, 10], "xmin": 4, "ymin": 4, "xmax": 4, "ymax": 4, "coordin": [4, 9], "rel": [4, 7], "collect": 4, "meant": [4, 8], "two": 4, "column": 4, "horizont": 4, "resolv": 4, "default": [4, 8, 10], "smallest": 4, "enclos": 4, "g": [4, 5], "qr": 4, "pictur": 4, "chart": 4, "signatur": 4, "logo": 4, "etc": 4, "artefact_typ": 4, "type": [4, 9], "sever": [4, 7, 9], "its": [1, 4, 7, 9, 10], "titl": [4, 9], "underneath": 4, "page_idx": [4, 9], "dimens": [4, 9, 10], "dict": [4, 9, 10], "numpi": [4, 5, 9, 10], "arrai": [4, 10], "uint8": [4, 5, 9, 10], "raw": [4, 10], "pixel": [4, 7, 9], "height": 4, "width": 4, "dictionari": [4, 10], "angl": [4, 7], "degress": 4, "preserve_aspect_ratio": 7, "overlai": 4, "displai": [4, 10], "matplotlib": 10, "pyplot": 10, "method": [7, 9], "high": 4, "convers": 4, "read_pdf": 4, "byte": [4, 9], "scale": [5, 7, 10], "rgb_mode": [], "password": [], "pdf": [4, 5], "convert": [4, 7, 8], "render": [], "72dpi": [], "output": [4, 7], "rgb": [4, 7], "bgr": 4, "unlock": [], "encrypt": [], "pypdfium2": [], "pdfpage": [], "decod": 4, "shape": [4, 5, 7, 8, 9, 10], "h": [4, 5, 7], "x": [4, 7, 10], "w": [4, 5, 7, 10], "c": 10, "read_img_as_numpi": 4, "output_s": [4, 7], "rgb_output": 4, "expect": [4, 7], "read_img_as_tensor": 4, "img_path": 4, "dtype": [4, 5, 8, 10], "float32": [4, 5, 8], "desir": 4, "relat": 4, "divid": 4, "255": [4, 5, 7, 9, 10], "decode_img_as_tensor": 4, "img_cont": 4, "stream": 4, "read_html": 4, "url": 4, "yoursit": 4, "weasyprint": [], "documentfil": 4, "extens": 4, "classmethod": 4, "from_pdf": 4, "binari": [4, 9], "from_url": 4, "from_imag": 4, "page1": 4, "png": 4, "page2": 4, "vgg16_bn_r": [], "vgg": 5, "16": 5, "describ": 5, "veri": 5, "deep": [5, 9], "convolut": 5, "larg": 5, "modifi": 5, "normal": [5, 7], "rectangular": 5, "pool": 5, "simpler": [], "head": 9, "input_tensor": 5, "random": [5, 7, 8, 9, 10], "uniform": [5, 7, 8], "512": 5, "maxval": [5, 7, 8], "imagenet": 5, "extractor": 5, "resnet18": [], "resnet": 5, "18": [], "residu": [], "boolean": 5, "resnet34": [], "34": 9, "resnet50": [], "50": [5, 9], "resnet31": 5, "downsiz": 5, "4": [5, 9], "mobilenet_v3_smal": 5, "mobilenetv3": 5, "search": 5, "kera": [5, 8], "mobilenet_v3_larg": 5, "mobilenet_v3_small_r": 5, "mobilenet_v3_large_r": 5, "mobilenet_v3_small_orient": [], "magc_resnet31": [], "global": [], "context": [], "224": [5, 7, 8], "vit_": [], "visiontransform": [], "worth": [], "16x16": [], "patch": [], "unoffici": [], "config": [], "vit_b": [], "b": 10, "textnet_tini": [], "textnet": [], "faster": [], "arbitrarili": [], "detector": [], "minimalist": [], "kernel": [], "czczup": [], "tini": [], "textnet_smal": [], "textnet_bas": [], "crop_orientation_predictor": [], "arch": 5, "croporientationpredictor": [], "np": [5, 8, 9, 10], "classif_mobilenet_v3_smal": [], "input_crop": [], "rand": [5, 8, 9, 10], "600": [5, 9, 10], "800": [5, 9, 10], "astyp": [5, 8, 9, 10], "crop": [7, 9], "dataset": [5, 9], "linknet_resnet18": [], "1024": [5, 8, 9, 10], "linknet_resnet34": [], "linknet_resnet50": [], "db_resnet50": [5, 8, 9], "backbon": [], "db_mobilenet_v3_larg": [5, 9], "mobilenet": 5, "v3": 5, "detection_predictor": [5, 9], "assume_straight_pag": 5, "detectionpredictor": 5, "input_pag": [5, 9, 10], "itself": [], "fit": [], "crnn_vgg16_bn": [5, 9], "128": [5, 9], "crnn_mobilenet_v3_smal": [5, 9], "crnn_mobilenet_v3_larg": [5, 9], "sar_resnet31": [5, 9], "31": [5, 9], "64": [5, 7, 9], "256": 5, "paper": 5, "1910": 5, "02562": 5, "keywoard": [], "vitstr_smal": [], "vitstr_bas": [], "recognition_predictor": [5, 9], "recognitionpredictor": 5, "ocr_predictor": [5, 9], "det_arch": 5, "reco_arch": 5, "pretrained_backbon": [], "symmetric_pad": 7, "export_as_straight_box": 5, "detect_orient": [], "straighten_pag": [], "detect_languag": [], "ocrpredictor": 5, "up": [5, 9], "assum": 5, "preserv": 7, "ratio": 7, "symmetr": 7, "bottom": 9, "final": [5, 8], "potenti": 5, "estim": [], "slightli": [], "deterior": [], "latenc": [], "median": [], "Then": [], "again": 3, "improv": [], "kie_predictor": [], "kiepredictor": [], "kie": [], "login_to_hub": [], "login": [], "huggingfac": [], "hub": [], "from_hub": [], "repo_id": [], "instanti": [], "hf": [], "fasterrcnn_mobilenet_v3_large_fpn": [], "repo": [], "hf_hub_download": [], "snapshot_download": [], "checkpoint": [], "push_to_hf_hub": [], "model_nam": [], "save": [1, 8], "configur": [], "my": [], "procedur": 7, "draw": [7, 10], "design": 7, "torchvis": 7, "resiz": 7, "bilinear": 7, "transfo": 7, "minval": 7, "interpol": 7, "zero": [7, 10], "while": [7, 9], "done": 7, "mean": [7, 10], "std": 7, "gaussian": 7, "distribut": 7, "485": 7, "456": 7, "406": 7, "229": 7, "225": 7, "averag": [7, 9], "per": [7, 9], "standard": 7, "deviat": 7, "lambdatransform": 7, "fn": 7, "lambda": 7, "tograi": 7, "num_output_channel": [], "grayscal": 7, "colorinvers": 7, "min_val": 7, "tranform": 7, "color": [7, 10], "shift": 7, "randomli": 7, "invert": 7, "6": [3, 7, 9], "rang": [7, 8], "randombright": 7, "max_delta": 7, "adjust": 7, "bright": 7, "delta": 7, "offset": 7, "add": [7, 10], "pick": 7, "p": [7, 9, 10], "probabl": 7, "randomcontrast": 7, "contrast": 7, "contrast_factor": 7, "factor": 7, "randomsatur": 7, "satur": 7, "hsv": 7, "increas": 7, "randomhu": 7, "hue": 7, "randomgamma": 7, "min_gamma": 7, "max_gamma": 7, "min_gain": 7, "max_gain": 7, "gamma": 7, "correct": 7, "neg": 7, "lower": [7, 10], "param": [7, 9], "constant": 7, "multipli": 7, "randomjpegqu": 7, "min_qual": 7, "60": 7, "max_qual": 7, "jpeg": 7, "qualiti": 7, "dimension": 7, "between": [7, 10], "randomrot": 7, "max_angl": 7, "expand": 7, "degre": 7, "uniformli": 7, "randomcrop": 7, "08": [7, 9], "75": [7, 9], "33": 7, "min_area": 7, "max_area": 7, "min_ratio": 7, "max_ratio": 7, "gaussianblur": [], "kernel_shap": [], "blur": [], "min": [], "channelshuffl": [], "gaussiannois": [], "nois": [], "randomhorizontalflip": [], "flip": [], "int64": 10, "randomshadow": [], "opacity_rang": [], "shade": [], "opac": [], "It": 7, "consecut": [7, 9], "sequenti": [7, 8], "oneof": 7, "jpegqual": 7, "randomappli": 7, "regroup": 10, "core": 10, "complementari": 10, "sens": 10, "visualize_pag": 10, "words_onli": 10, "display_artefact": 10, "add_label": 10, "figur": 10, "block": [9, 10], "plt": 10, "ocr_db_crnn": 10, "artefact": [9, 10], "figsiz": 10, "largest": 10, "side": 10, "plot": 10, "static": 10, "top": [9, 10], "synthesize_pag": 10, "draw_proba": 10, "respons": 10, "blank": 10, "blue": 10, "red": 10, "font_siz": 10, "13": [9, 10], "famili": 10, "synthes": 10, "metric": [9, 10], "assess": 10, "textmatch": 10, "match": [9, 10], "accuraci": 10, "aggreg": [1, 10], "foral": 10, "y": 10, "mathcal": 10, "frac": 10, "sum": 10, "limits_": 10, "f_": 10, "y_i": 10, "x_i": 10, "indic": 10, "defin": 10, "f_a": 10, "left": [9, 10], "begin": 10, "ll": 10, "mbox": 10, "strictli": 10, "integ": [8, 10], "updat": 10, "hello": [9, 10], "world": [9, 10], "summari": 10, "gt": 10, "pred": 10, "groung": 10, "truth": 10, "exact": [9, 10], "score": 10, "counterpart": 10, "unidecod": 10, "localizationconfus": 10, "iou_thresh": 10, "mask_shap": 10, "use_broadcast": [], "confus": 10, "iou": 10, "recal": [9, 10], "g_": 10, "precis": [9, 10], "meaniou": 10, "j": 10, "y_j": 10, "being": [9, 10], "intersect": 10, "union": 10, "g_x": 10, "assign": 10, "_i": 10, "geq": 10, "ground": 10, "asarrai": 10, "70": [9, 10], "110": 10, "95": [9, 10], "200": 10, "150": [9, 10], "pair": 10, "broadcast": [], "consum": [], "memori": [], "either": 9, "ocrmetr": 10, "l": 10, "hat": 10, "h_": 10, "b_j": 10, "l_j": 10, "gt_box": 10, "pred_box": 10, "gt_label": 10, "pred_label": 10, "comparison": [9, 10], "detectionmetr": 10, "c_j": 10, "compil": [6, 9], "better": [6, 9], "leverag": 6, "descript": 6, "colab": [], "quicktour": 6, "present": 6, "main": 6, "produc": 9, "searchabl": [], "don": 9, "meet": [], "detail": 9, "link": [], "section": [8, 9], "det_model": [], "load_weight": [], "path_to_checkpoint": [], "weight": [], "reco_model": [], "det_param": [], "path_to_pt": [], "map_loc": [], "load_state_dict": [], "reco_param": [], "vocab": 9, "class_nam": [], "total": [], "date": 9, "preprocessor": 9, "det_predictor": [], "798": [], "785": [], "772": [], "264": [], "2749": [], "287": [], "reco_predictor": [], "694": [], "695": [], "693": [], "299": [], "296": [], "301": [], "polici": [], "restrict": [], "write": [], "outsid": [], "tmp": [], "work": [], "step": [], "usag": [], "multiprocess": [], "doctr_multiprocessing_dis": [], "variabl": [], "becaus": [], "shm": [], "share": 9, "chang": [], "By": [], "doctr_cache_dir": [], "focu": [], "love": [], "appreci": [], "interfac": [], "io": [], "custom": [], "felix92": [], "db": [], "vgg16": [], "bn": [], "plug": [], "obj_detect": [], "exist": [], "overwritten": [], "prerequisit": [], "creat": [], "co": [], "instal": [], "git": [], "lf": [], "my_awesome_model": [], "v1": [], "directli": 9, "after": 9, "python3": [], "train_tensorflow": [], "py": [], "train_pytorch": [], "tabl": [], "pull": [], "dummi": [], "tilman": [], "rassi": [], "fascan": [], "evalu": [1, 9], "predefin": 1, "prefer": 1, "signific": 1, "valid": [], "149": [], "626": [], "360": [], "2000": [], "3000": [], "249": [], "33402": [], "13068": [], "772875": [], "85875": [], "246": [], "233": [], "resourc": 8, "7149": [], "796": [], "handwritten": [], "1268": [], "472": [], "21888": [], "8707": [], "33608": [], "19342": [], "uppercas": [], "19370": [], "2186": [], "257": [], "647": [], "73257": [], "26032": [], "7100000": [], "707470": [], "1156": [], "1107": [], "849": [], "1095": [], "207901": [], "22672": [], "7581382": [], "1337891": [], "7141797": [], "793533": [], "49377": [], "19598": [], "alwai": [], "regular": [], "2700": [], "300": [], "background": [], "qr_code": [], "bar_cod": [], "photo": [], "classif": [], "mani": 9, "sensit": 9, "abl": 9, "howev": [], "guidanc": [], "tool": [], "further": [], "anot": [], "handl": 1, "underli": 1, "defer": 1, "dataload": 1, "good": 8, "achiev": 8, "might": [8, 9], "tune": 8, "thing": [8, 9], "product": 8, "readi": 8, "help": 8, "support": 9, "devic": [], "fp16": [], "point": [], "occupi": [], "bit": [], "advantag": [], "less": [], "mixed_precis": [], "set_global_polici": [], "mixed_float16": [], "cuda": [], "re": [], "exchang": [], "interoper": [], "machin": [], "structur": 9, "layer": [], "metadata": [], "util": 8, "export_model_to_onnx": [], "input_shap": 8, "dummy_input": [], "tensorspec": [], "model_path": [], "come": [], "soon": [], "seen": 9, "onc": 9, "separ": 9, "compon": 9, "charg": 9, "usabl": 9, "backend": 9, "along": 9, "processor": 9, "reusabl": 9, "consist": 9, "delimit": 9, "2d": 9, "corner": 9, "flag": 9, "belong": 9, "skew": [], "comprehens": 9, "benchmark": 9, "publicli": 9, "sec": [], "25": 9, "84": 9, "39": 9, "85": 9, "86": 9, "93": 9, "83": 9, "24": [], "80": 9, "29": 9, "90": 9, "67": 9, "76": 9, "11": 9, "81": 9, "71": 9, "7": 9, "21": 9, "82": 9, "20": 9, "49": 9, "87": 9, "63": 9, "17": [], "28": [], "51": 9, "46": 9, "db_resnet34": [], "22": [], "89": 9, "74": 9, "56": 9, "68": 9, "92": 9, "61": 9, "41": 9, "00": 9, "79": 9, "38": 9, "88": [], "62": 9, "26": [], "06": 9, "78": 9, "47": 9, "54": [], "abov": 9, "cf": 9, "disclaim": 9, "combin": 9, "199": 9, "second": 9, "warmup": 9, "phase": 9, "measur": 9, "1000": 9, "obtain": 9, "11th": [], "gen": [], "intel": [], "r": [], "tm": [], "i7": [], "11800h": [], "30ghz": [], "wrap": 9, "useabl": 9, "favorit": 9, "dummy_img": 9, "area": [], "send": [], "snippet": [], "transcrib": 9, "partial": [], "15": 9, "9": [], "73": 9, "44": [], "14": 9, "55": [], "58": [], "57": 9, "66": 9, "01": 9, "98": 9, "23": [], "69": 9, "99": 9, "91": 9, "05": [], "09": [], "96": 9, "40": [], "53": 9, "most": 9, "print": 9, "cfg": 9, "30595": 9, "45": 9, "72": 9, "43": 9, "65": 9, "77": 9, "30": 9, "07": [], "27": 9, "gvision": 9, "59": 9, "03": 9, "azur": [], "recogn": [], "42": 9, "go": [], "mention": [], "still": [], "return": [1, 4, 5, 9, 10], "documentbuild": [], "resolve_lin": [], "automat": [], "resolve_block": [], "paragraph_break": [], "paragraph": [], "035": [], "nest": 9, "get": [4, 9], "typic": 9, "layout": 9, "340": 9, "text_output": [], "json_output": 9, "1357421875": 9, "0361328125": 9, "8564453125": 9, "8603515625": 9, "914085328578949": 9, "5478515625": 9, "06640625": 9, "5810546875": 9, "0966796875": 9, "9949972033500671": 9, "51171875": 9, "1630859375": 9, "9578408598899841": 9, "1396484375": 9, "3232421875": 9, "185546875": 9, "3515625": 9, "outpout": 9, "xml": 9, "hocr": 9, "export_as_xml": 9, "xml_output": 9, "xml_bytes_str": 9, "xml_element": 9, "utf": 9, "xmln": 9, "w3": 9, "1999": 9, "xhtml": 9, "lang": 9, "en": 9, "meta": 9, "equiv": 9, "charset": 9, "system": 9, "ocr_pag": 9, "ocr_carea": 9, "ocr_par": 9, "ocr_lin": 9, "ocrx_word": 9, "div": 9, "id": 9, "page_1": 9, "bbox": 9, "3456": 9, "ppageno": 9, "block_1_1": 9, "857": 9, "529": 9, "2504": 9, "2710": 9, "par_1_1": 9, "span": 9, "line_1_1": 9, "x_size": 9, "x_descend": 9, "x_ascend": 9, "word_1_1": 9, "1552": 9, "540": 9, "1778": 9, "580": 9, "x_wconf": 9, "word_1_2": 9, "1782": 9, "1900": 9, "583": 9, "word_1_3": 9, "1420": 9, "597": 9, "1684": 9, "641": 9, "threshold": [], "region": [], "accur": [], "postprocessor": [], "bin_thresh": [], "box_thresh": [], "hook": [], "manipul": [], "customhook": [], "def": 8, "__call__": [], "self": [], "loc_pr": [], "Be": [], "awar": [], "my_hook": [], "middl": [], "pipelin": [], "add_hook": [], "execut": [], "file_path": [], "read_img": 4, "seemlessli": 2, "conda": [], "newer": [], "developp": 3, "fp": 9, "scheme": [], "deform": [], "statist": [], "turn": [], "easier": [], "let": [], "db_resnet50_predictor": [], "sar_vgg16_bn": [], "rnn": [], "enhanc": [], "symbol": [], "crnn_vgg16_bn_predictor": [], "sar_vgg16_bn_predictor": [], "16bn": [], "convert_to_tflit": [], "tf_model": 8, "tflite": 8, "conv_sequ": 8, "relu": 8, "kernel_s": 8, "serialized_model": 8, "convert_to_fp16": [], "half": [], "serial": 8, "quantize_model": [], "quantiz": [], "exclud": [], "inherit": 8, "abstract": [], "verifi": [], "file_nam": [], "file_hash": [], "extract_arch": [], "overwrit": [], "sha256": [], "archiv": [], "disk": [], "775": [], "856": [], "860": [], "862": [], "863": [], "sar_resnet31_predictor": [], "ocr_db_crnn_vgg": [], "652": [], "721": [], "ocr_db_sar_vgg": [], "653": [], "ocr_db_sar_resnet": [], "665": [], "735": [], "595": [], "625": [], "781": [], "830": [], "exactmatch": [], "ignore_cas": [], "ignore_acc": [], "ignor": [], "letter": [], "accent": [], "error": [], "max_dist": [], "levenshtein": [], "distanc": [], "autoclass": [], "loader": 1, "154": [], "as_imag": 4, "convert_page_to_numpi": 4, "get_word": 4, "fitz": 4, "gettextword": 4, "get_artefact": 4, "entir": 4, "fulli": [], "daili": [], "mix": [], "fine": 9, "scratch": [], "special": [], "recurr": [], "733": [], "817": [], "745": [], "875": [], "frame": 9, "feed": [], "warm": [], "c5": 9, "x12larg": 9, "xeon": 9, "platinum": 9, "8275l": 9, "913": [], "917": [], "921": [], "crnn_resnet31": [], "629": [], "701": [], "664": [], "780": [], "630": [], "702": [], "666": [], "783": [], "640": [], "713": [], "672": [], "789": [], "na": [], "753": [], "700": [], "533": [], "689": [], "611": [], "660": [], "db_sar_vgg": [], "db_sar_resnet": [], "db_crnn_vgg": [], "db_crnn_resnet": [], "properti": 8, "input_t": 8, "saved_model": 8, "And": 8, "nestedobject": [], "changelog": [], "v0": [], "2021": [], "8m": 9, "02": 9, "5m": 9, "1m": 9, "19": [], "invoic": 9, "flexibl": [], "rotated_bbox": [1, 10], "beta": [], "linknet16": [5, 9], "160": 5, "arg": 1, "bash": [], "tax": 9, "35": 9, "vgg16_bn": 5, "mobilenetv3_larg": 5, "mobilenetv3_smal": 5, "constraint": 8, "tfliteconvert": 8, "from_keras_model": 8, "target_spec": 8, "supported_typ": 8, "float16": 8, "fallback": 8, "oper": 8, "representative_dataset": 8, "yield": 8, "supported_op": 8, "opsset": 8, "tflite_builtins_int8": 8, "inference_input_typ": 8, "int8": 8, "inference_output_typ": 8, "2m": 9, "7m": 9, "look": 9, "variou": 9, "below": 9, "unfortun": 9, "moment": 9, "04": 9, "36": 9, "97": 9, "resum": 9, "road": 9}, "objects": {"doctr.datasets": [[1, 0, 1, "", "CORD"], [1, 0, 1, "", "CharacterGenerator"], [1, 0, 1, "", "DocArtefacts"], [1, 0, 1, "", "FUNSD"], [1, 0, 1, "", "IIIT5K"], [1, 0, 1, "", "OCRDataset"], [1, 0, 1, "", "SROIE"], [1, 0, 1, "", "SVT"], [1, 1, 1, "", "encode_sequences"]], "doctr.datasets.loader": [[1, 0, 1, "", "DataLoader"]], "doctr.io": [[4, 0, 1, "", "Artefact"], [4, 0, 1, "", "Block"], [4, 0, 1, "", "Document"], [4, 0, 1, "", "DocumentFile"], [4, 0, 1, "", "Line"], [4, 0, 1, "", "PDF"], [4, 0, 1, "", "Page"], [4, 0, 1, "", "Word"], [4, 1, 1, "", "decode_img_as_tensor"], [4, 1, 1, "", "read_html"], [4, 1, 1, "", "read_img_as_numpy"], [4, 1, 1, "", "read_img_as_tensor"], [4, 1, 1, "", "read_pdf"]], "doctr.io.Document": [[4, 2, 1, "", "show"]], "doctr.io.DocumentFile": [[4, 2, 1, "", "from_images"], [4, 2, 1, "", "from_pdf"], [4, 2, 1, "", "from_url"]], "doctr.io.PDF": [[4, 2, 1, "", "as_images"], [4, 2, 1, "", "get_artefacts"], [4, 2, 1, "", "get_words"]], "doctr.io.Page": [[4, 2, 1, "", "show"]], "doctr.models.backbones": [[5, 1, 1, "", "mobilenet_v3_large"], [5, 1, 1, "", "mobilenet_v3_large_r"], [5, 1, 1, "", "mobilenet_v3_small"], [5, 1, 1, "", "mobilenet_v3_small_r"], [5, 1, 1, "", "resnet31"], [5, 1, 1, "", "vgg16_bn"]], "doctr.models.detection": [[5, 1, 1, "", "db_mobilenet_v3_large"], [5, 1, 1, "", "db_resnet50"], [5, 1, 1, "", "detection_predictor"], [5, 1, 1, "", "linknet16"]], "doctr.models": [[5, 1, 1, "", "ocr_predictor"]], "doctr.models.recognition": [[5, 1, 1, "", "crnn_mobilenet_v3_large"], [5, 1, 1, "", "crnn_mobilenet_v3_small"], [5, 1, 1, "", "crnn_vgg16_bn"], [5, 1, 1, "", "master"], [5, 1, 1, "", "recognition_predictor"], [5, 1, 1, "", "sar_resnet31"]], "doctr.transforms": [[7, 0, 1, "", "ColorInversion"], [7, 0, 1, "", "Compose"], [7, 0, 1, "", "LambdaTransformation"], [7, 0, 1, "", "Normalize"], [7, 0, 1, "", "OneOf"], [7, 0, 1, "", "RandomApply"], [7, 0, 1, "", "RandomBrightness"], [7, 0, 1, "", "RandomContrast"], [7, 0, 1, "", "RandomCrop"], [7, 0, 1, "", "RandomGamma"], [7, 0, 1, "", "RandomHue"], [7, 0, 1, "", "RandomJpegQuality"], [7, 0, 1, "", "RandomRotate"], [7, 0, 1, "", "RandomSaturation"], [7, 0, 1, "", "Resize"], [7, 0, 1, "", "ToGray"]], "doctr.utils.metrics": [[10, 0, 1, "", "DetectionMetric"], [10, 0, 1, "", "LocalizationConfusion"], [10, 0, 1, "", "OCRMetric"], [10, 0, 1, "", "TextMatch"]], "doctr.utils.metrics.DetectionMetric": [[10, 2, 1, "", "summary"], [10, 2, 1, "", "update"]], "doctr.utils.metrics.LocalizationConfusion": [[10, 2, 1, "", "summary"], [10, 2, 1, "", "update"]], "doctr.utils.metrics.OCRMetric": [[10, 2, 1, "", "summary"], [10, 2, 1, "", "update"]], "doctr.utils.metrics.TextMatch": [[10, 2, 1, "", "summary"], [10, 2, 1, "", "update"]], "doctr.utils.visualization": [[10, 1, 1, "", "synthesize_page"], [10, 1, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:class", "1": "py:function", "2": "py:method"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"], "2": ["py", "method", "Python method"]}, "titleterms": {"changelog": 0, "v0": 0, "7": [], "0": 0, "2024": [], "09": [], "6": [], "2022": [], "29": [], "5": [], "1": 0, "03": 0, "22": [], "2021": 0, "12": [], "31": [], "4": 0, "11": 0, "10": 0, "01": 0, "3": 0, "08": 0, "27": 0, "07": 0, "02": 0, "2": 0, "05": 0, "28": 0, "18": 0, "contributor": [], "coven": [], "code": [], "conduct": [], "our": [], "pledg": [], "standard": [], "enforc": [], "respons": [], "scope": [], "guidelin": [], "correct": [], "warn": [], "temporari": [], "ban": [], "perman": [], "attribut": [], "contribut": [], "doctr": [1, 2, 4, 5, 6, 7, 10], "codebas": [], "structur": 4, "continu": [], "integr": [], "feedback": [], "featur": 2, "request": [], "bug": [], "report": [], "question": [], "develop": [], "mode": [], "instal": 3, "commit": [], "unit": [], "test": [], "qualiti": [], "style": [], "verif": [], "modifi": [], "document": [2, 4], "let": [], "": [], "connect": [], "prerequisit": 3, "via": 3, "python": 3, "packag": 3, "git": 3, "text": [2, 9], "recognit": [2, 5, 9], "main": 2, "model": [2, 5, 8, 9], "zoo": [2, 5, 9], "detect": [2, 5, 9], "support": [1, 2, 7], "dataset": [1, 2], "arg": [], "synthet": [], "gener": [], "custom": [], "loader": [], "dataload": [], "vocab": 1, "return": [], "io": 4, "word": 4, "line": 4, "artefact": 4, "block": 4, "page": 4, "file": 4, "read": 4, "classif": [], "factori": [], "transform": 7, "compos": 7, "util": 10, "visual": 10, "task": 10, "evalu": 10, "notebook": 6, "train": 8, "your": 8, "own": [], "load": 1, "aw": [], "lambda": [], "share": [], "commun": [], "from": [], "huggingfac": [], "hub": [], "push": [], "pretrain": [], "name": [], "convent": [], "choos": 9, "readi": [], "us": 8, "avail": [1, 9], "object": [], "data": 1, "prepar": 8, "infer": 8, "optim": [], "half": 8, "precis": 8, "export": [], "onnx": [], "right": 9, "architectur": 9, "predictor": 9, "end": 9, "ocr": 9, "two": 9, "stage": 9, "approach": 9, "what": 9, "should": 9, "i": 9, "do": 9, "output": 9, "advanc": [], "option": [], "get": [], "start": [], "conda": [], "pre": [], "process": [], "post": 8, "build": [], "implement": [], "content": [], "compress": 8, "savedmodel": 8, "note": [], "refer": [], "backbon": 5, "tensorflow": 8, "lite": 8, "quantiz": 8}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"Changelog": [[0, "changelog"]], "v0.4.0 (2021-10-01)": [[0, "v0-4-0-2021-10-01"]], "v0.3.1 (2021-08-27)": [[0, "v0-3-1-2021-08-27"]], "v0.3.0 (2021-07-02)": [[0, "v0-3-0-2021-07-02"]], "v0.2.1 (2021-05-28)": [[0, "v0-2-1-2021-05-28"]], "v0.2.0 (2021-05-11)": [[0, "v0-2-0-2021-05-11"]], "v0.1.1 (2021-03-18)": [[0, "v0-1-1-2021-03-18"]], "v0.1.0 (2021-03-05)": [[0, "v0-1-0-2021-03-05"]], "doctr.datasets": [[1, "doctr-datasets"]], "Available Datasets": [[1, "available-datasets"]], "Data Loading": [[1, "data-loading"]], "Supported Vocabs": [[1, "supported-vocabs"]], "docTR Vocabs": [[1, "id1"]], "docTR: Document Text Recognition": [[2, "doctr-document-text-recognition"]], "Main Features": [[2, "main-features"]], "Model zoo": [[2, "model-zoo"]], "Text detection models": [[2, "text-detection-models"]], "Text recognition models": [[2, "text-recognition-models"]], "Supported datasets": [[2, "supported-datasets"]], "Installation": [[3, "installation"]], "Prerequisites": [[3, "prerequisites"]], "Via Python Package": [[3, "via-python-package"]], "Via Git": [[3, "via-git"]], "doctr.io": [[4, "doctr-io"]], "Document structure": [[4, "document-structure"]], "Word": [[4, "word"]], "Line": [[4, "line"]], "Artefact": [[4, "artefact"]], "Block": [[4, "block"]], "Page": [[4, "page"]], "Document": [[4, "document"]], "File reading": [[4, "file-reading"]], "doctr.models": [[5, "doctr-models"]], "doctr.models.backbones": [[5, "doctr-models-backbones"]], "doctr.models.detection": [[5, "doctr-models-detection"]], "doctr.models.recognition": [[5, "doctr-models-recognition"]], "doctr.models.zoo": [[5, "doctr-models-zoo"]], "docTR Notebooks": [[6, "doctr-notebooks"]], "doctr.transforms": [[7, "doctr-transforms"]], "Supported transformations": [[7, "supported-transformations"]], "Composing transformations": [[7, "composing-transformations"]], "Preparing your model for inference": [[8, "preparing-your-model-for-inference"]], "Model compression": [[8, "model-compression"]], "TensorFlow Lite": [[8, "tensorflow-lite"]], "Half-precision": [[8, "half-precision"]], "Post-training quantization": [[8, "post-training-quantization"]], "Using SavedModel": [[8, "using-savedmodel"]], "Choosing the right model": [[9, "choosing-the-right-model"]], "Text Detection": [[9, "text-detection"]], "Available architectures": [[9, "available-architectures"], [9, "id1"], [9, "id3"]], "Detection predictors": [[9, "detection-predictors"]], "Text Recognition": [[9, "text-recognition"]], "Text recognition model zoo": [[9, "id5"]], "Recognition predictors": [[9, "recognition-predictors"]], "End-to-End OCR": [[9, "end-to-end-ocr"]], "Two-stage approaches": [[9, "two-stage-approaches"]], "What should I do with the output?": [[9, "what-should-i-do-with-the-output"]], "doctr.utils": [[10, "doctr-utils"]], "Visualization": [[10, "visualization"]], "Task evaluation": [[10, "task-evaluation"]]}, "indexentries": {"cord (class in doctr.datasets)": [[1, "doctr.datasets.CORD"]], "charactergenerator (class in doctr.datasets)": [[1, "doctr.datasets.CharacterGenerator"]], "dataloader (class in doctr.datasets.loader)": [[1, "doctr.datasets.loader.DataLoader"]], "docartefacts (class in doctr.datasets)": [[1, "doctr.datasets.DocArtefacts"]], "funsd (class in doctr.datasets)": [[1, "doctr.datasets.FUNSD"]], "iiit5k (class in doctr.datasets)": [[1, "doctr.datasets.IIIT5K"]], "ocrdataset (class in doctr.datasets)": [[1, "doctr.datasets.OCRDataset"]], "sroie (class in doctr.datasets)": [[1, "doctr.datasets.SROIE"]], "svt (class in doctr.datasets)": [[1, "doctr.datasets.SVT"]], "encode_sequences() (in module doctr.datasets)": [[1, "doctr.datasets.encode_sequences"]], "artefact (class in doctr.io)": [[4, "doctr.io.Artefact"]], "block (class in doctr.io)": [[4, "doctr.io.Block"]], "document (class in doctr.io)": [[4, "doctr.io.Document"]], "documentfile (class in doctr.io)": [[4, "doctr.io.DocumentFile"]], "line (class in doctr.io)": [[4, "doctr.io.Line"]], "pdf (class in doctr.io)": [[4, "doctr.io.PDF"]], "page (class in doctr.io)": [[4, "doctr.io.Page"]], "word (class in doctr.io)": [[4, "doctr.io.Word"]], "as_images() (doctr.io.pdf method)": [[4, "doctr.io.PDF.as_images"]], "decode_img_as_tensor() (in module doctr.io)": [[4, "doctr.io.decode_img_as_tensor"]], "from_images() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_images"]], "from_pdf() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_pdf"]], "from_url() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_url"]], "get_artefacts() (doctr.io.pdf method)": [[4, "doctr.io.PDF.get_artefacts"]], "get_words() (doctr.io.pdf method)": [[4, "doctr.io.PDF.get_words"]], "read_html() (in module doctr.io)": [[4, "doctr.io.read_html"]], "read_img_as_numpy() (in module doctr.io)": [[4, "doctr.io.read_img_as_numpy"]], "read_img_as_tensor() (in module doctr.io)": [[4, "doctr.io.read_img_as_tensor"]], "read_pdf() (in module doctr.io)": [[4, "doctr.io.read_pdf"]], "show() (doctr.io.document method)": [[4, "doctr.io.Document.show"]], "show() (doctr.io.page method)": [[4, "doctr.io.Page.show"]], "crnn_mobilenet_v3_large() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_mobilenet_v3_large"]], "crnn_mobilenet_v3_small() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_mobilenet_v3_small"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_vgg16_bn"]], "db_mobilenet_v3_large() (in module doctr.models.detection)": [[5, "doctr.models.detection.db_mobilenet_v3_large"]], "db_resnet50() (in module doctr.models.detection)": [[5, "doctr.models.detection.db_resnet50"]], "detection_predictor() (in module doctr.models.detection)": [[5, "doctr.models.detection.detection_predictor"]], "linknet16() (in module doctr.models.detection)": [[5, "doctr.models.detection.linknet16"]], "master() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.master"]], "mobilenet_v3_large() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.mobilenet_v3_large"]], "mobilenet_v3_large_r() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.mobilenet_v3_large_r"]], "mobilenet_v3_small() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.mobilenet_v3_small"]], "mobilenet_v3_small_r() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.mobilenet_v3_small_r"]], "ocr_predictor() (in module doctr.models)": [[5, "doctr.models.ocr_predictor"]], "recognition_predictor() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.recognition_predictor"]], "resnet31() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.resnet31"]], "sar_resnet31() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.sar_resnet31"]], "vgg16_bn() (in module doctr.models.backbones)": [[5, "doctr.models.backbones.vgg16_bn"]], "colorinversion (class in doctr.transforms)": [[7, "doctr.transforms.ColorInversion"]], "compose (class in doctr.transforms)": [[7, "doctr.transforms.Compose"]], "lambdatransformation (class in doctr.transforms)": [[7, "doctr.transforms.LambdaTransformation"]], "normalize (class in doctr.transforms)": [[7, "doctr.transforms.Normalize"]], "oneof (class in doctr.transforms)": [[7, "doctr.transforms.OneOf"]], "randomapply (class in doctr.transforms)": [[7, "doctr.transforms.RandomApply"]], "randombrightness (class in doctr.transforms)": [[7, "doctr.transforms.RandomBrightness"]], "randomcontrast (class in doctr.transforms)": [[7, "doctr.transforms.RandomContrast"]], "randomcrop (class in doctr.transforms)": [[7, "doctr.transforms.RandomCrop"]], "randomgamma (class in doctr.transforms)": [[7, "doctr.transforms.RandomGamma"]], "randomhue (class in doctr.transforms)": [[7, "doctr.transforms.RandomHue"]], "randomjpegquality (class in doctr.transforms)": [[7, "doctr.transforms.RandomJpegQuality"]], "randomrotate (class in doctr.transforms)": [[7, "doctr.transforms.RandomRotate"]], "randomsaturation (class in doctr.transforms)": [[7, "doctr.transforms.RandomSaturation"]], "resize (class in doctr.transforms)": [[7, "doctr.transforms.Resize"]], "togray (class in doctr.transforms)": [[7, "doctr.transforms.ToGray"]], "detectionmetric (class in doctr.utils.metrics)": [[10, "doctr.utils.metrics.DetectionMetric"]], "localizationconfusion (class in doctr.utils.metrics)": [[10, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[10, "doctr.utils.metrics.OCRMetric"]], "textmatch (class in doctr.utils.metrics)": [[10, "doctr.utils.metrics.TextMatch"]], "summary() (doctr.utils.metrics.detectionmetric method)": [[10, "doctr.utils.metrics.DetectionMetric.summary"]], "summary() (doctr.utils.metrics.localizationconfusion method)": [[10, "doctr.utils.metrics.LocalizationConfusion.summary"]], "summary() (doctr.utils.metrics.ocrmetric method)": [[10, "doctr.utils.metrics.OCRMetric.summary"]], "summary() (doctr.utils.metrics.textmatch method)": [[10, "doctr.utils.metrics.TextMatch.summary"]], "synthesize_page() (in module doctr.utils.visualization)": [[10, "doctr.utils.visualization.synthesize_page"]], "update() (doctr.utils.metrics.detectionmetric method)": [[10, "doctr.utils.metrics.DetectionMetric.update"]], "update() (doctr.utils.metrics.localizationconfusion method)": [[10, "doctr.utils.metrics.LocalizationConfusion.update"]], "update() (doctr.utils.metrics.ocrmetric method)": [[10, "doctr.utils.metrics.OCRMetric.update"]], "update() (doctr.utils.metrics.textmatch method)": [[10, "doctr.utils.metrics.TextMatch.update"]], "visualize_page() (in module doctr.utils.visualization)": [[10, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file diff --git a/v0.4.1/using_doctr/using_models.html b/v0.4.1/using_doctr/using_models.html index b2e6a5d739..b471cdc1f1 100644 --- a/v0.4.1/using_doctr/using_models.html +++ b/v0.4.1/using_doctr/using_models.html @@ -836,6 +836,17 @@

Two-stage approachesmodel = ocr_predictor('linknet_resnet18', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True)

+

To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying DocumentBuilder:

+
    +
  • resolve_lines: whether words should be automatically grouped into lines (default: True)

  • +
  • resolve_blocks: whether lines should be automatically grouped into blocks (default: True)

  • +
  • paragraph_break: relative length of the minimum space separating paragraphs (default: 0.035)

  • +
+

For example to disable the automatic grouping of lines into blocks:

+
from doctr.model import ocr_predictor
+model = ocr_predictor(pretrained=True, resolve_blocks=False)
+
+

What should I do with the output?#

@@ -859,6 +870,14 @@

What should I do with the output?) +

To get only the text content of the Document, you can use the render method:

+
text_output = result.render()
+
+
+

For reference, here is the output for the Document above:

+
No. RECEIPT DATE
+
+

You can also export them as a nested dict, more appropriate for JSON format:

json_output = result.export()
 
diff --git a/v0.5.0/_sources/using_doctr/using_models.rst.txt b/v0.5.0/_sources/using_doctr/using_models.rst.txt index 208e0956bb..27c087096a 100644 --- a/v0.5.0/_sources/using_doctr/using_models.rst.txt +++ b/v0.5.0/_sources/using_doctr/using_models.rst.txt @@ -279,6 +279,19 @@ For instance, this snippet instantiates an end-to-end ocr_predictor working with from doctr.model import ocr_predictor model = ocr_predictor('linknet_resnet18', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True) +To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying `DocumentBuilder`: + +* `resolve_lines`: whether words should be automatically grouped into lines (default: True) +* `resolve_blocks`: whether lines should be automatically grouped into blocks (default: True) +* `paragraph_break`: relative length of the minimum space separating paragraphs (default: 0.035) + +For example to disable the automatic grouping of lines into blocks: + +.. code:: python3 + + from doctr.model import ocr_predictor + model = ocr_predictor(pretrained=True, resolve_blocks=False) + What should I do with the output? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -304,6 +317,14 @@ Here is a typical `Document` layout:: )] ) +To get only the text content of the `Document`, you can use the `render` method:: + + text_output = result.render() + +For reference, here is the output for the `Document` above:: + + No. RECEIPT DATE + You can also export them as a nested dict, more appropriate for JSON format:: json_output = result.export() diff --git a/v0.5.0/searchindex.js b/v0.5.0/searchindex.js index fd54c0f0be..3e9571a836 100644 --- a/v0.5.0/searchindex.js +++ b/v0.5.0/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["changelog", "datasets", "index", "installing", "io", "models", "notebooks", "transforms", "using_model_export", "using_models", "utils"], "filenames": ["changelog.rst", "datasets.rst", "index.rst", "installing.rst", "io.rst", "models.rst", "notebooks.md", "transforms.rst", "using_model_export.rst", "using_models.rst", "utils.rst"], "titles": ["Changelog", "doctr.datasets", "docTR: Document Text Recognition", "Installation", "doctr.io", "doctr.models", "docTR Notebooks", "doctr.transforms", "Preparing your model for inference", "Choosing the right model", "doctr.utils"], "terms": {"releas": [0, 3], "note": [0, 1], "we": [1, 2, 3, 4, 7, 9], "member": [], "leader": [], "make": [8, 9, 10], "particip": [], "commun": [], "harass": [], "free": [], "experi": [], "everyon": [], "regardless": [], "ag": [], "bodi": 9, "size": [1, 4, 7, 9, 10], "visibl": [], "invis": [], "disabl": [], "ethnic": [], "sex": [], "characterist": [], "gender": [], "ident": [], "express": 7, "level": [1, 9, 10], "educ": [], "socio": [], "econom": [], "statu": [], "nation": [], "person": 1, "appear": [], "race": [], "religion": [], "sexual": [], "orient": [4, 5, 9], "act": [], "interact": [4, 10], "wai": [1, 2], "contribut": [], "an": [1, 2, 4, 5, 8, 10], "open": [], "welcom": [], "divers": [], "inclus": [], "healthi": [], "exampl": [1, 4, 5, 7, 10], "behavior": [], "posit": 10, "environ": [], "includ": [1, 3], "demonstr": [], "empathi": [], "kind": [], "toward": 3, "other": [], "peopl": [], "Being": [], "respect": [], "differ": [], "opinion": [], "viewpoint": [], "give": [], "gracefulli": [], "accept": [], "construct": [], "feedback": [], "apolog": [], "those": [3, 4, 9], "affect": [], "mistak": [], "learn": [2, 5, 9], "from": [1, 2, 4, 5, 6, 7, 8, 9, 10], "focus": 1, "what": [], "i": [1, 4, 5, 7, 8, 10], "best": [], "just": 8, "u": 9, "individu": [], "overal": [], "unaccept": [], "The": [1, 4, 9, 10], "us": [1, 3, 5, 9, 10], "languag": [1, 2, 4, 9], "imageri": [], "attent": 5, "advanc": [], "ani": [1, 4, 5, 7, 8, 9, 10], "troll": [], "insult": [], "derogatori": [], "comment": [], "polit": [], "attack": [], "public": 2, "privat": 9, "publish": [], "inform": [1, 2, 9], "physic": 4, "email": [], "address": 4, "without": 5, "explicit": [], "permiss": [], "which": 9, "could": [], "reason": [], "consid": [1, 4, 9, 10], "inappropri": [], "profession": [], "set": [1, 5, 9, 10], "ar": [1, 3, 4, 6, 7, 9, 10], "clarifi": [], "take": [1, 8, 9], "appropri": 9, "fair": [], "action": [], "thei": [9, 10], "deem": [], "threaten": [], "offens": [], "harm": [], "have": [1, 8, 9, 10], "right": 10, "remov": [], "edit": [], "reject": [], "commit": [], "wiki": [], "issu": [], "align": 4, "thi": [1, 3, 8, 9, 10], "moder": [], "decis": [], "when": 5, "appli": [1, 7], "within": [], "all": [1, 4, 7, 9, 10], "space": [], "also": 9, "offici": [], "repres": [4, 9, 10], "e": [3, 4, 5], "mail": [], "post": 9, "via": [], "social": [], "media": [], "account": 8, "appoint": [], "onlin": [], "offlin": [], "event": [], "instanc": 9, "abus": [], "otherwis": 10, "mai": 9, "report": [], "contact": [], "minde": [2, 3], "com": [3, 4], "complaint": [], "review": [], "investig": [], "promptli": [], "fairli": [], "oblig": [], "privaci": [], "secur": [], "incid": [], "follow": [1, 3, 7, 8, 9, 10], "impact": [], "determin": [], "consequ": [], "violat": [], "unprofession": [], "unwelcom": [], "A": [1, 2, 4, 5, 6, 8], "written": 4, "provid": [1, 2, 8, 9], "clariti": [], "around": [], "natur": [1, 2], "explan": 9, "why": [], "wa": [], "apologi": [], "request": [], "through": [1, 7], "singl": [], "seri": [], "continu": [], "No": 9, "involv": 9, "unsolicit": [], "specifi": [1, 4], "period": [], "time": [1, 2, 5, 10], "avoid": 3, "well": 8, "extern": [], "channel": [4, 7], "like": [], "term": [], "lead": [], "seriou": [], "sustain": [], "sort": [], "allow": [], "dure": [], "pattern": [], "aggress": [], "disparag": [], "class": [1, 4, 7, 9, 10], "adapt": [], "version": [8, 9], "0": [1, 7, 9, 10], "avail": [2, 7], "http": [3, 4, 5, 9], "www": [4, 9], "org": [5, 9], "_": [1, 5, 8], "html": 9, "were": [4, 9], "inspir": 7, "mozilla": [], "": [4, 10], "ladder": [], "For": [3, 9], "answer": [], "common": [7, 10], "question": [], "about": 9, "see": [], "faq": [], "translat": [], "everyth": 9, "you": [1, 3, 4, 5, 8, 9], "need": [1, 3, 10], "know": [], "effici": [1, 2, 5], "project": 1, "packag": [2, 8, 10], "python": [], "doc": [4, 9], "librari": [3, 6], "build": 3, "script": [], "refer": [3, 9], "train": [1, 5, 7, 9], "demo": 2, "small": 5, "app": [], "showcas": [], "capabl": [6, 9], "api": 2, "minim": 2, "templat": [2, 4], "deploi": [], "rest": [7, 10], "ensur": [], "proper": [], "mainten": [], "github": 3, "worklow": [], "run": [3, 5], "job": [], "coverag": [], "codecov": [], "back": [], "result": [1, 4, 6, 9], "As": [], "contributor": [], "onli": [5, 7, 10], "your": [1, 2, 4, 9, 10], "ad": [5, 7], "whether": [1, 4, 7, 10], "encount": [], "problem": [], "suggest": [], "input": [4, 5, 7, 9], "ha": [1, 10], "valu": [4, 7, 9], "can": [1, 3, 8, 9], "purpos": [], "advis": [], "first": [], "check": 9, "topic": [], "wasn": [], "t": [1, 9], "alreadi": [], "cover": [], "close": [], "If": [3, 4, 5, 8], "feel": [], "new": 10, "one": [1, 5, 7, 9], "do": [3, 8], "so": [1, 3], "whenev": [], "possibl": 10, "enough": 9, "jump": [], "wonder": [], "how": 1, "someth": [], "more": [9, 10], "gener": 1, "should": [1, 4, 7, 10], "out": [5, 7, 9, 10], "discuss": [], "q": [], "forum": [], "specif": [1, 3, 9, 10], "stackoverflow": [], "addit": [], "depend": [2, 3], "command": [], "m": [9, 10], "pip": 3, "upgrad": [], "dev": [], "pre": 5, "docstr": [], "In": 1, "pleas": [], "googl": [], "eas": [], "process": [2, 4, 9], "later": [], "messag": [], "udac": [], "guid": [], "order": [1, 4, 7], "same": [1, 4, 9, 10], "ci": [], "workflow": [], "unittest": [], "local": [1, 2, 5, 9, 10], "To": [3, 9], "togeth": 4, "current": 9, "built": [], "sphinx": [], "thank": [], "our": [5, 9], "file": 1, "been": [1, 9, 10], "rebuilt": [], "want": 8, "forc": [], "complet": [], "rebuild": [], "delet": [], "_build": [], "directori": [], "addition": [8, 9], "clear": [], "web": 4, "browser": 2, "cach": 1, "modif": [], "now": [], "locat": 4, "index": 4, "wish": 8, "somewher": [], "els": [], "than": [3, 10], "join": [], "slack": [], "where": [4, 7, 9, 10], "find": 3, "requir": [3, 7], "3": [2, 3, 4, 5, 7, 8, 9, 10], "8": [5, 7, 9], "higher": [1, 3], "whichev": 3, "o": 3, "least": 3, "tensorflow": [2, 3, 4, 5, 7, 9], "pytorch": [2, 3, 9], "correspond": [3, 9], "page": [1, 3, 5, 9, 10], "2": [2, 3, 7, 8, 9], "macbook": [], "m1": [], "chip": [], "some": [1, 3, 6], "metal": [], "plugin": [], "1": [1, 5, 7, 8, 9, 10], "12": 9, "anoth": [1, 3, 5], "linux": 3, "few": [3, 8], "extra": 3, "maco": 3, "user": [2, 3, 4, 6], "them": [1, 3, 9], "homebrew": 3, "brew": 3, "cairo": 3, "pango": 3, "gdk": 3, "pixbuf": 3, "libffi": 3, "window": [3, 5, 10], "gtk": 3, "latest": 3, "over": [1, 3, 9, 10], "here": [1, 3, 6, 7, 9], "last": [1, 3], "stabl": 3, "doctr": [3, 8, 9], "strive": 3, "reduc": [3, 7], "framework": [1, 3, 9], "minimum": [1, 3, 10], "necessari": 3, "featur": [3, 5, 6, 10], "develop": 3, "third": 3, "parti": 3, "miss": 3, "tf": [3, 4, 5, 7, 8], "torch": [3, 5], "mode": 3, "clone": 3, "state": [2, 10], "art": 2, "optic": [2, 9], "charact": [1, 2, 4, 9, 10], "made": 2, "seamless": 2, "access": [1, 2, 4, 9], "anyon": 2, "power": 2, "easi": [2, 10], "extract": [1, 2], "valuabl": 2, "autom": 2, "seamlessli": 9, "understand": [1, 2, 9], "task": [1, 2, 9], "ocr": [1, 2, 5, 10], "predictor": [2, 4, 5], "pars": [1, 2], "textual": [1, 2, 4, 5, 9], "identifi": 2, "each": [1, 2, 4, 7, 9, 10], "word": [1, 2, 9, 10], "research": 2, "quickli": 2, "compar": 2, "own": [1, 2], "architectur": [2, 5], "speed": [2, 5], "perform": [2, 4, 7, 8, 9, 10], "robust": [1, 2], "stage": 2, "pretrain": [2, 5, 8, 9, 10], "paramet": [1, 2, 4, 5, 7, 10], "friendli": 2, "line": [2, 9, 10], "code": [2, 4], "load": [2, 8], "googlevis": 2, "aw": [2, 9], "textract": [2, 9], "optim": [2, 8], "infer": [2, 5, 7], "both": [1, 2, 7, 9], "cpu": [2, 9], "gpu": 2, "light": 2, "activ": 2, "maintain": 2, "integr": 2, "deploy": 2, "dbnet": [2, 5], "real": [2, 5, 7], "scene": [1, 2, 5], "differenti": [2, 5], "binar": [2, 5], "linknet": [2, 5], "exploit": [2, 5], "encod": [1, 2, 4, 5, 9], "represent": [2, 5], "semant": [2, 5], "segment": [2, 5, 9], "sar": [2, 5], "show": [2, 4, 5, 10], "attend": [2, 5], "read": [1, 2, 5], "simpl": [2, 5], "strong": [2, 5], "baselin": [2, 5, 9], "irregular": [2, 5], "crnn": [2, 5], "end": [1, 2, 5, 10], "trainabl": [2, 5], "neural": [2, 5], "network": [2, 5], "imag": [1, 2, 4, 5, 7, 9, 10], "base": [2, 5], "sequenc": [1, 2, 4, 5, 9, 10], "Its": [2, 5], "applic": [2, 5], "master": [2, 5, 9], "multi": [2, 5], "aspect": [2, 5, 7], "non": [1, 2, 4, 5, 7, 10], "vitstr": [], "vision": 1, "transform": 1, "fast": 1, "parseq": [], "permut": [], "autoregress": [], "funsd": [1, 2, 9], "form": [1, 2, 9], "noisi": [1, 2], "scan": [1, 2], "cord": [1, 2, 9], "consolid": [1, 2], "receipt": [1, 2, 9], "forpost": [1, 2], "sroie": [1, 2], "icdar": [1, 2], "2019": 2, "iiit": [1, 2], "5k": [1, 2], "cvit": 2, "street": [1, 2], "view": [1, 2], "synthtext": [1, 2], "visual": 2, "geometri": [2, 4, 9], "group": 2, "svhn": [1, 2], "digit": [1, 2], "unsupervis": 2, "ic03": [1, 2], "2003": [1, 2], "ic13": [1, 2], "2013": [1, 2], "imgur5k": [], "textstylebrush": [], "transfer": [], "aesthet": [], "mjsynth": [], "synthet": [], "data": [4, 7, 10], "artifici": [], "iiithw": [], "wildreceipt": [], "spatial": [4, 10], "dual": [], "modal": [], "graph": 4, "kei": [], "bool": [1, 4, 5, 7, 10], "true": [1, 4, 5, 7, 8, 9, 10], "use_polygon": [1, 10], "fals": [1, 4, 5, 7, 8, 10], "recognition_task": [], "kwarg": [1, 4, 5, 10], "sourc": [1, 4, 5, 7, 10], "document": [1, 5, 6, 9, 10], "import": [1, 4, 5, 7, 8, 9, 10], "train_set": 1, "download": 1, "img": [1, 7], "target": [1, 4, 7, 10], "subset": [1, 9], "polygon": [1, 9], "rotat": [1, 4, 5, 7, 10], "bound": [1, 4, 5, 7, 9, 10], "box": [1, 4, 5, 7, 9, 10], "instead": [1, 4], "straight": [1, 5, 9], "ones": [1, 10], "recognit": [1, 10], "keyword": [1, 4], "argument": [1, 4], "visiondataset": 1, "icdar2019": 1, "competit": 1, "iiit5k": 1, "bmvc": 1, "2012": 1, "text": [1, 4, 5, 10], "prior": 1, "svt": 1, "ucsd": 1, "comput": [1, 9, 10], "hous": 1, "number": [1, 7, 9, 10], "localis": 1, "repositori": 1, "websit": 1, "entri": 1, "futur": 1, "direct": 1, "img_fold": 1, "str": [1, 4, 5, 7, 10], "label_fold": 1, "label": [1, 10], "part": [1, 7], "challeng": 1, "task2": 1, "2015": 1, "path": [1, 4, 8], "challenge2_training_task12_imag": 1, "challenge2_training_task1_gt": 1, "test_set": 1, "challenge2_test_task12_imag": 1, "challenge2_test_task1_gt": 1, "folder": [1, 8], "annot": [1, 4], "abstractdataset": [], "label_path": 1, "handwrit": [], "dataset_info": [], "imgur5k_annot": [], "json": [1, 9], "pure": [], "mnt": [], "ramdisk": [], "max": [7, 10], "90kdict32px": [], "imlist": [], "txt": [], "hw": [], "images_90k_norm": [], "90k": [], "docartefact": 1, "object": [1, 6, 9, 10], "detect": [1, 6, 10], "element": [1, 4, 5, 9, 10], "varieti": 1, "arxiv": 5, "ab": [], "2103": [], "14470v1": [], "test": [], "charactergener": 1, "implement": [1, 4, 7, 8, 9, 10], "d": 1, "abdef": 1, "num_sampl": 1, "100": [1, 7, 8, 9, 10], "vocabulari": 1, "sampl": [1, 9], "iter": [1, 7], "cache_sampl": 1, "firsthand": 1, "font_famili": [1, 10], "font": [1, 10], "img_transform": 1, "compos": [1, 9], "sample_transform": 1, "wordgener": 1, "min_char": 1, "int": [1, 4, 7, 10], "max_char": 1, "list": [1, 4, 7, 10], "none": [1, 4, 9, 10], "callabl": [1, 7], "tupl": [1, 4, 7, 10], "32": [1, 5, 7, 8, 9], "maximum": [1, 7], "detectiondataset": 1, "recognitiondataset": 1, "labels_path": 1, "contain": [1, 9], "ocrdataset": 1, "label_fil": 1, "jpg": [1, 4], "root": 1, "shuffl": [1, 7], "batch_siz": 1, "drop_last": 1, "num_work": 1, "collate_fn": 1, "wrapper": [1, 7], "train_load": 1, "train_it": 1, "next": 1, "befor": [1, 5, 7], "pass": [1, 4, 5, 9], "batch": [1, 5, 7, 9], "drop": 1, "isn": 1, "full": [1, 9, 10], "worker": 1, "function": [7, 10], "merg": [], "sinc": [1, 9], "content": [1, 4, 9, 10], "properli": 1, "model": [1, 10], "interpret": [1, 4], "multipl": [1, 4, 7], "name": [1, 5, 9], "10": [1, 9, 10], "0123456789": 1, "hindi_digit": [], "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "ascii_lett": 1, "52": [1, 9], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "punctuat": 1, "currenc": 1, "5": [1, 7, 9, 10], "ancient_greek": [], "48": [5, 9], "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": [], "arabic_lett": [], "37": 9, "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": [], "persian_lett": [], "\u067e\u0686\u06a2\u06a4\u06af": [], "arabic_diacrit": [], "arabic_punctu": [], "latin": 1, "94": [1, 9], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "english": 1, "legacy_french": 1, "123": 1, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": 1, "french": [1, 9], "126": 1, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": 1, "portugues": 1, "131": 1, "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": 1, "spanish": 1, "116": 1, "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": 1, "italian": [], "120": [], "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": [], "german": 1, "108": 1, "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": 1, "arab": [], "101": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": [], "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "czech": [], "130": [], "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": [], "polish": [], "118": [], "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": [], "dutch": [], "114": [], "norwegian": [], "106": [], "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": [], "danish": [], "finnish": [], "104": [], "\u00e4\u00f6\u00e4\u00f6": [], "swedish": [], "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": [], "vietnames": [], "234": [], "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": [], "hebrew": [], "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": [], "multilingu": [], "195": [], "encode_sequ": 1, "target_s": 1, "eo": 1, "pad": [1, 5, 7], "dynamic_seq_length": 1, "ndarrai": [1, 4, 10], "given": [1, 4, 7, 9, 10], "map": 1, "n": [1, 10], "length": 1, "Of": 1, "string": [1, 4, 9, 10], "option": 1, "start": 1, "case": [1, 9, 10], "upper": [1, 7], "enabl": [1, 4], "dynam": 1, "tensor": [1, 4, 7, 9], "modul": [4, 7, 9, 10], "easili": [4, 8, 9, 10], "export": [4, 5, 6, 8, 9, 10], "analysi": 4, "format": [1, 4, 8, 9, 10], "organ": 4, "uninterrupt": [4, 9], "confid": [4, 9, 10], "float": [4, 7, 8, 10], "associ": 4, "predict": [4, 5, 10], "xmin": 4, "ymin": 4, "xmax": 4, "ymax": 4, "coordin": [4, 9], "rel": [4, 7, 10], "collect": 4, "meant": [4, 8], "two": 4, "column": 4, "horizont": 4, "resolv": 4, "default": [4, 8, 10], "smallest": 4, "enclos": 4, "g": [4, 5], "qr": 4, "pictur": 4, "chart": 4, "signatur": 4, "logo": 4, "etc": 4, "artefact_typ": 4, "type": [4, 9], "sever": [4, 7, 9], "its": [1, 4, 7, 9, 10], "titl": [4, 9], "underneath": 4, "page_idx": [4, 9], "dimens": [4, 9, 10], "dict": [4, 9, 10], "numpi": [4, 5, 9, 10], "arrai": [4, 10], "uint8": [4, 5, 9, 10], "raw": [4, 10], "pixel": [4, 7, 9], "height": 4, "width": 4, "dictionari": [4, 10], "angl": [4, 7], "degress": 4, "preserve_aspect_ratio": [4, 5, 7], "overlai": 4, "displai": [4, 10], "matplotlib": 10, "pyplot": 10, "method": [7, 9], "high": 4, "convers": 4, "read_pdf": 4, "byte": [4, 9], "scale": [5, 7, 10], "rgb_mode": [], "password": [], "pdf": [4, 5, 6], "convert": [4, 7, 8], "render": [], "72dpi": [], "output": [4, 7], "rgb": [4, 7], "bgr": 4, "unlock": [], "encrypt": [], "pypdfium2": [], "pdfpage": [], "decod": 4, "shape": [4, 5, 7, 8, 9, 10], "h": [4, 5, 7], "x": [4, 7, 10], "w": [4, 5, 7, 10], "c": 10, "read_img_as_numpi": 4, "output_s": [4, 7], "rgb_output": 4, "expect": [4, 7, 10], "read_img_as_tensor": 4, "img_path": 4, "dtype": [4, 5, 8, 10], "float32": [4, 5, 8], "desir": 4, "relat": 4, "divid": 4, "255": [4, 5, 7, 9, 10], "decode_img_as_tensor": 4, "img_cont": 4, "stream": 4, "read_html": 4, "url": 4, "yoursit": 4, "weasyprint": [], "documentfil": 4, "extens": 4, "classmethod": 4, "from_pdf": 4, "binari": [4, 9], "from_url": 4, "from_imag": 4, "page1": 4, "png": 4, "page2": 4, "vgg16_bn_r": 5, "vgg": 5, "16": 5, "describ": [5, 10], "veri": 5, "deep": [5, 9], "convolut": 5, "larg": 5, "modifi": 5, "normal": [5, 7], "rectangular": 5, "pool": 5, "simpler": 5, "head": [5, 9], "input_tensor": 5, "random": [5, 7, 8, 9, 10], "uniform": [5, 7, 8], "512": 5, "maxval": [5, 7, 8], "imagenet": 5, "extractor": 5, "resnet18": 5, "resnet": 5, "18": [5, 9], "residu": 5, "boolean": 5, "resnet34": [], "34": 9, "resnet50": [], "50": [5, 9], "resnet31": 5, "downsiz": 5, "4": [5, 9, 10], "mobilenet_v3_smal": 5, "mobilenetv3": 5, "search": 5, "kera": [5, 8], "mobilenet_v3_larg": 5, "mobilenet_v3_small_r": 5, "mobilenet_v3_large_r": 5, "mobilenet_v3_small_orient": 5, "magc_resnet31": 5, "global": 5, "context": 5, "224": [5, 7, 8], "vit_": [], "visiontransform": [], "worth": [], "16x16": [], "patch": [], "unoffici": [], "config": [], "vit_b": [], "b": 10, "textnet_tini": [], "textnet": [], "faster": [], "arbitrarili": [], "detector": [], "minimalist": [], "kernel": 7, "czczup": [], "tini": [], "textnet_smal": [], "textnet_bas": [], "crop_orientation_predictor": 5, "arch": 5, "croporientationpredictor": 5, "np": [5, 8, 9, 10], "classif_mobilenet_v3_smal": 5, "input_crop": 5, "rand": [5, 8, 9, 10], "600": [5, 9, 10], "800": [5, 9, 10], "astyp": [5, 8, 9, 10], "crop": [5, 7, 9], "dataset": [5, 9], "linknet_resnet18": [5, 9], "1024": [5, 8, 9, 10], "linknet_resnet34": [], "linknet_resnet50": [], "db_resnet50": [5, 8, 9], "backbon": 5, "db_mobilenet_v3_larg": [5, 9], "mobilenet": 5, "v3": 5, "detection_predictor": [5, 9], "assume_straight_pag": 5, "detectionpredictor": 5, "input_pag": [5, 9, 10], "itself": [], "fit": 5, "crnn_vgg16_bn": [5, 9], "128": [5, 9], "crnn_mobilenet_v3_smal": [5, 9], "crnn_mobilenet_v3_larg": [5, 9], "sar_resnet31": [5, 9], "31": [5, 9], "64": [5, 7, 9], "256": 5, "paper": 5, "1910": 5, "02562": 5, "keywoard": [], "vitstr_smal": [], "vitstr_bas": [], "recognition_predictor": [5, 9], "recognitionpredictor": 5, "ocr_predictor": [5, 9], "det_arch": 5, "reco_arch": 5, "pretrained_backbon": [], "symmetric_pad": 7, "export_as_straight_box": 5, "detect_orient": [], "straighten_pag": [], "detect_languag": [], "ocrpredictor": 5, "up": [5, 9], "assum": 5, "preserv": [5, 7], "ratio": [5, 7], "symmetr": 7, "bottom": 9, "final": [5, 8], "potenti": 5, "estim": [], "slightli": [], "deterior": [], "latenc": [], "median": [], "Then": [], "again": 3, "improv": [], "kie_predictor": [], "kiepredictor": [], "kie": [], "login_to_hub": [], "login": [], "huggingfac": [], "hub": [], "from_hub": [], "repo_id": [], "instanti": [], "hf": [], "fasterrcnn_mobilenet_v3_large_fpn": [], "repo": [], "hf_hub_download": [], "snapshot_download": [], "checkpoint": [], "push_to_hf_hub": [], "model_nam": [], "save": [1, 8], "configur": [], "my": [], "procedur": 7, "draw": [7, 10], "design": 7, "torchvis": 7, "resiz": 7, "bilinear": 7, "transfo": 7, "minval": 7, "interpol": 7, "zero": [7, 10], "while": [7, 9], "done": 7, "mean": [7, 10], "std": 7, "gaussian": 7, "distribut": 7, "485": 7, "456": 7, "406": 7, "229": 7, "225": 7, "averag": [7, 9], "per": [7, 9], "standard": 7, "deviat": 7, "lambdatransform": 7, "fn": 7, "lambda": 7, "tograi": 7, "num_output_channel": 7, "grayscal": 7, "colorinvers": 7, "min_val": 7, "tranform": 7, "color": [7, 10], "shift": 7, "randomli": 7, "invert": 7, "6": [3, 7, 9], "rang": [7, 8], "randombright": 7, "max_delta": 7, "adjust": 7, "bright": 7, "delta": 7, "offset": 7, "add": [7, 10], "pick": 7, "p": [7, 9, 10], "probabl": 7, "randomcontrast": 7, "contrast": 7, "contrast_factor": 7, "factor": 7, "randomsatur": 7, "satur": 7, "hsv": 7, "increas": 7, "randomhu": 7, "hue": 7, "randomgamma": 7, "min_gamma": 7, "max_gamma": 7, "min_gain": 7, "max_gain": 7, "gamma": 7, "correct": 7, "neg": 7, "lower": [7, 10], "param": [7, 9], "constant": 7, "multipli": 7, "randomjpegqu": 7, "min_qual": 7, "60": 7, "max_qual": 7, "jpeg": 7, "qualiti": 7, "dimension": 7, "between": [7, 10], "randomrot": 7, "max_angl": 7, "expand": 7, "degre": 7, "uniformli": 7, "randomcrop": 7, "08": [7, 9], "75": [7, 9], "33": 7, "min_area": 7, "max_area": 7, "min_ratio": 7, "max_ratio": 7, "gaussianblur": 7, "kernel_shap": 7, "blur": 7, "min": 7, "channelshuffl": 7, "gaussiannois": 7, "nois": 7, "randomhorizontalflip": [], "flip": [], "int64": 10, "randomshadow": [], "opacity_rang": [], "shade": [], "opac": [], "It": 7, "consecut": [7, 9], "sequenti": [7, 8], "oneof": 7, "jpegqual": 7, "randomappli": 7, "regroup": 10, "core": 10, "complementari": 10, "sens": 10, "visualize_pag": 10, "words_onli": 10, "display_artefact": 10, "add_label": 10, "figur": 10, "block": [9, 10], "plt": 10, "ocr_db_crnn": 10, "artefact": [6, 9, 10], "figsiz": 10, "largest": 10, "side": 10, "plot": 10, "static": 10, "top": [9, 10], "synthesize_pag": 10, "draw_proba": 10, "respons": 10, "blank": 10, "blue": 10, "red": 10, "font_siz": 10, "13": [9, 10], "famili": 10, "synthes": 10, "metric": [9, 10], "assess": 10, "textmatch": 10, "match": [9, 10], "accuraci": 10, "aggreg": [1, 10], "foral": 10, "y": 10, "mathcal": 10, "frac": 10, "sum": 10, "limits_": 10, "f_": 10, "y_i": 10, "x_i": 10, "indic": 10, "defin": 10, "f_a": 10, "left": [9, 10], "begin": 10, "ll": 10, "mbox": 10, "strictli": 10, "integ": [8, 10], "updat": 10, "hello": [9, 10], "world": [9, 10], "summari": 10, "gt": 10, "pred": 10, "groung": 10, "truth": 10, "exact": [9, 10], "score": 10, "counterpart": 10, "unidecod": 10, "localizationconfus": 10, "iou_thresh": 10, "mask_shap": 10, "use_broadcast": 10, "confus": 10, "iou": 10, "recal": [9, 10], "g_": 10, "precis": [9, 10], "meaniou": 10, "j": 10, "y_j": 10, "being": [9, 10], "intersect": 10, "union": 10, "g_x": 10, "assign": 10, "_i": 10, "geq": 10, "ground": 10, "asarrai": 10, "70": [9, 10], "110": 10, "95": [9, 10], "200": 10, "150": [9, 10], "pair": 10, "broadcast": 10, "consum": 10, "memori": 10, "either": [9, 10], "ocrmetr": 10, "l": 10, "hat": 10, "h_": 10, "b_j": 10, "l_j": 10, "gt_box": 10, "pred_box": 10, "gt_label": 10, "pred_label": 10, "comparison": [9, 10], "detectionmetr": 10, "c_j": 10, "compil": [6, 9], "better": [6, 9], "leverag": 6, "descript": 6, "colab": [], "quicktour": 6, "present": 6, "main": 6, "produc": [6, 9], "searchabl": 6, "don": 9, "meet": [], "detail": 9, "link": [], "section": [8, 9], "det_model": [], "load_weight": [], "path_to_checkpoint": [], "weight": [], "reco_model": [], "det_param": [], "path_to_pt": [], "map_loc": [], "load_state_dict": [], "reco_param": [], "vocab": 9, "class_nam": [], "total": [], "date": 9, "preprocessor": 9, "det_predictor": [], "798": [], "785": [], "772": [], "264": [], "2749": [], "287": [], "reco_predictor": [], "694": [], "695": [], "693": [], "299": [], "296": [], "301": [], "polici": [], "restrict": [], "write": [], "outsid": [], "tmp": [], "work": [], "step": [], "usag": [], "multiprocess": [], "doctr_multiprocessing_dis": [], "variabl": [], "becaus": [], "shm": [], "share": [1, 9], "chang": [], "By": [], "doctr_cache_dir": [], "focu": [], "love": [], "appreci": [], "interfac": [], "io": [], "custom": [], "felix92": [], "db": [], "vgg16": [], "bn": [], "plug": [], "obj_detect": [], "exist": [], "overwritten": [], "prerequisit": [], "creat": [], "co": [], "instal": [], "git": [], "lf": [], "my_awesome_model": [], "v1": [], "directli": 9, "after": 9, "python3": [], "train_tensorflow": [], "py": [], "train_pytorch": [], "tabl": [], "pull": [], "dummi": [], "tilman": [], "rassi": [], "fascan": [], "evalu": [1, 9], "predefin": 1, "prefer": 1, "signific": 1, "valid": [], "149": [], "626": [], "360": [], "2000": [], "3000": [], "249": [], "33402": [], "13068": [], "772875": [], "85875": [], "246": [], "233": [], "resourc": 8, "7149": [], "796": [], "handwritten": [], "1268": [], "472": [], "21888": [], "8707": [], "33608": [], "19342": [], "uppercas": [], "19370": [], "2186": [], "257": [], "647": [], "73257": [], "26032": [], "7100000": [], "707470": [], "1156": [], "1107": [], "849": [], "1095": [], "207901": [], "22672": [], "7581382": [], "1337891": [], "7141797": [], "793533": [], "49377": [], "19598": [], "alwai": [], "regular": [], "2700": [], "300": [], "background": [], "qr_code": [], "bar_cod": [], "photo": [], "classif": [], "mani": [1, 9], "sensit": [1, 9], "abl": [1, 9], "howev": 1, "guidanc": 1, "tool": 1, "further": [], "anot": [], "handl": 1, "underli": 1, "defer": 1, "dataload": 1, "good": 8, "achiev": 8, "might": [8, 9], "tune": 8, "thing": [8, 9], "product": 8, "readi": 8, "help": 8, "support": 9, "devic": [], "fp16": [], "point": [], "occupi": [], "bit": [], "advantag": [], "less": [], "mixed_precis": [], "set_global_polici": [], "mixed_float16": [], "cuda": [], "re": [], "exchang": [], "interoper": [], "machin": [], "structur": 9, "layer": [], "metadata": [], "util": 8, "export_model_to_onnx": [], "input_shap": 8, "dummy_input": [], "tensorspec": [], "model_path": [], "come": [], "soon": [], "seen": 9, "onc": 9, "separ": 9, "compon": 9, "charg": 9, "usabl": 9, "backend": 9, "along": 9, "processor": 9, "reusabl": 9, "consist": 9, "delimit": 9, "2d": 9, "corner": 9, "flag": 9, "belong": 9, "skew": [], "comprehens": 9, "benchmark": 9, "publicli": 9, "sec": [], "25": 9, "84": 9, "39": 9, "85": 9, "86": 9, "93": 9, "83": 9, "24": [], "80": 9, "29": 9, "90": 9, "67": 9, "76": 9, "11": 9, "81": 9, "71": 9, "7": 9, "21": 9, "82": 9, "20": 9, "49": 9, "87": 9, "63": 9, "17": [], "28": [], "51": 9, "46": 9, "db_resnet34": [], "22": [], "89": 9, "74": 9, "56": 9, "68": 9, "92": 9, "61": 9, "41": 9, "00": 9, "79": 9, "38": 9, "88": [], "62": 9, "26": [], "06": 9, "78": 9, "47": 9, "54": [], "abov": 9, "cf": 9, "disclaim": 9, "combin": 9, "199": 9, "second": 9, "warmup": 9, "phase": 9, "measur": 9, "1000": 9, "obtain": 9, "11th": [], "gen": [], "intel": [], "r": [], "tm": [], "i7": [], "11800h": [], "30ghz": [], "wrap": 9, "useabl": 9, "favorit": 9, "dummy_img": 9, "area": [], "send": [], "snippet": [], "transcrib": 9, "partial": [], "15": 9, "9": [], "73": 9, "44": [], "14": 9, "55": [], "58": [], "57": 9, "66": 9, "01": 9, "98": 9, "23": [], "69": 9, "99": 9, "91": 9, "05": [], "09": [], "96": 9, "40": [], "53": 9, "most": 9, "print": 9, "cfg": 9, "30595": 9, "45": 9, "72": 9, "43": 9, "65": 9, "77": 9, "30": 9, "07": [], "27": 9, "gvision": 9, "59": 9, "03": 9, "azur": [], "recogn": [], "42": 9, "go": [], "mention": [], "still": [], "return": [1, 4, 5, 9, 10], "nest": 9, "get": [4, 9], "typic": 9, "layout": 9, "340": 9, "json_output": 9, "1357421875": 9, "0361328125": 9, "8564453125": 9, "8603515625": 9, "914085328578949": 9, "5478515625": 9, "06640625": 9, "5810546875": 9, "0966796875": 9, "9949972033500671": 9, "51171875": 9, "1630859375": 9, "9578408598899841": 9, "1396484375": 9, "3232421875": 9, "185546875": 9, "3515625": 9, "outpout": 9, "xml": 9, "hocr": 9, "export_as_xml": 9, "xml_output": 9, "xml_bytes_str": 9, "xml_element": 9, "utf": 9, "xmln": 9, "w3": 9, "1999": 9, "xhtml": 9, "lang": 9, "en": 9, "meta": 9, "equiv": 9, "charset": 9, "system": 9, "ocr_pag": 9, "ocr_carea": 9, "ocr_par": 9, "ocr_lin": 9, "ocrx_word": 9, "div": 9, "id": 9, "page_1": 9, "bbox": 9, "3456": 9, "ppageno": 9, "block_1_1": 9, "857": 9, "529": 9, "2504": 9, "2710": 9, "par_1_1": 9, "span": 9, "line_1_1": 9, "x_size": 9, "x_descend": 9, "x_ascend": 9, "word_1_1": 9, "1552": 9, "540": 9, "1778": 9, "580": 9, "x_wconf": 9, "word_1_2": 9, "1782": 9, "1900": 9, "583": 9, "word_1_3": 9, "1420": 9, "597": 9, "1684": 9, "641": 9, "threshold": [], "region": [], "accur": [], "postprocessor": [], "bin_thresh": [], "box_thresh": [], "hook": [], "manipul": [], "customhook": [], "def": 8, "__call__": [], "self": [], "loc_pr": [], "Be": [], "awar": [], "my_hook": [], "middl": [], "pipelin": [], "add_hook": [], "execut": [], "file_path": [], "read_img": 4, "seemlessli": 2, "conda": [], "newer": [], "developp": 3, "fp": 9, "scheme": [], "deform": [], "statist": [], "turn": [], "easier": [], "let": [], "db_resnet50_predictor": [], "sar_vgg16_bn": [], "rnn": [], "enhanc": [], "symbol": [], "crnn_vgg16_bn_predictor": [], "sar_vgg16_bn_predictor": [], "16bn": [], "convert_to_tflit": [], "tf_model": 8, "tflite": 8, "conv_sequ": 8, "relu": 8, "kernel_s": 8, "serialized_model": 8, "convert_to_fp16": [], "half": [], "serial": 8, "quantize_model": [], "quantiz": [], "exclud": [], "inherit": 8, "abstract": [], "verifi": [], "file_nam": [], "file_hash": [], "extract_arch": [], "overwrit": [], "sha256": [], "archiv": [], "disk": [], "775": [], "856": [], "860": [], "862": [], "863": [], "sar_resnet31_predictor": [], "ocr_db_crnn_vgg": [], "652": [], "721": [], "ocr_db_sar_vgg": [], "653": [], "ocr_db_sar_resnet": [], "665": [], "735": [], "595": [], "625": [], "781": [], "830": [], "exactmatch": [], "ignore_cas": [], "ignore_acc": [], "ignor": [], "letter": [], "accent": [], "error": [], "max_dist": [], "levenshtein": [], "distanc": [], "autoclass": [], "loader": 1, "154": [], "as_imag": 4, "convert_page_to_numpi": 4, "get_word": 4, "fitz": 4, "gettextword": [], "get_artefact": 4, "entir": 4, "fulli": [], "daili": [], "mix": [], "fine": 9, "scratch": [], "special": [], "recurr": [], "733": [], "817": [], "745": [], "875": [], "frame": 9, "feed": [], "warm": [], "c5": 9, "x12larg": 9, "xeon": 9, "platinum": 9, "8275l": 9, "913": [], "917": [], "921": [], "crnn_resnet31": [], "629": [], "701": [], "664": [], "780": [], "630": [], "702": [], "666": [], "783": [], "640": [], "713": [], "672": [], "789": [], "na": [], "753": [], "700": [], "533": [], "689": [], "611": [], "660": [], "db_sar_vgg": [], "db_sar_resnet": [], "db_crnn_vgg": [], "db_crnn_resnet": [], "properti": 8, "input_t": 8, "saved_model": 8, "And": 8, "nestedobject": [], "changelog": [], "v0": [], "2021": [], "8m": 9, "02": 9, "5m": 9, "1m": 9, "19": [], "invoic": 9, "flexibl": [], "rotated_bbox": [], "beta": [], "linknet16": [], "160": 5, "arg": 1, "bash": [], "tax": 9, "35": 9, "vgg16_bn": [], "mobilenetv3_larg": 5, "mobilenetv3_smal": 5, "constraint": 8, "tfliteconvert": 8, "from_keras_model": 8, "target_spec": 8, "supported_typ": 8, "float16": 8, "fallback": 8, "oper": 8, "representative_dataset": 8, "yield": 8, "supported_op": 8, "opsset": 8, "tflite_builtins_int8": 8, "inference_input_typ": 8, "int8": 8, "inference_output_typ": 8, "2m": 9, "7m": 9, "look": 9, "variou": 9, "below": 9, "unfortun": 9, "moment": 9, "04": 9, "36": 9, "97": 9, "resum": 9, "road": 9, "get_text_word": 4, "get_lin": 4}, "objects": {"doctr.datasets": [[1, 0, 1, "", "CORD"], [1, 0, 1, "", "CharacterGenerator"], [1, 0, 1, "", "DetectionDataset"], [1, 0, 1, "", "DocArtefacts"], [1, 0, 1, "", "FUNSD"], [1, 0, 1, "", "IC03"], [1, 0, 1, "", "IC13"], [1, 0, 1, "", "IIIT5K"], [1, 0, 1, "", "OCRDataset"], [1, 0, 1, "", "RecognitionDataset"], [1, 0, 1, "", "SROIE"], [1, 0, 1, "", "SVHN"], [1, 0, 1, "", "SVT"], [1, 0, 1, "", "SynthText"], [1, 0, 1, "", "WordGenerator"], [1, 1, 1, "", "encode_sequences"]], "doctr.datasets.loader": [[1, 0, 1, "", "DataLoader"]], "doctr.io": [[4, 0, 1, "", "Artefact"], [4, 0, 1, "", "Block"], [4, 0, 1, "", "Document"], [4, 0, 1, "", "DocumentFile"], [4, 0, 1, "", "Line"], [4, 0, 1, "", "PDF"], [4, 0, 1, "", "Page"], [4, 0, 1, "", "Word"], [4, 1, 1, "", "decode_img_as_tensor"], [4, 1, 1, "", "read_html"], [4, 1, 1, "", "read_img_as_numpy"], [4, 1, 1, "", "read_img_as_tensor"], [4, 1, 1, "", "read_pdf"]], "doctr.io.Document": [[4, 2, 1, "", "show"]], "doctr.io.DocumentFile": [[4, 2, 1, "", "from_images"], [4, 2, 1, "", "from_pdf"], [4, 2, 1, "", "from_url"]], "doctr.io.PDF": [[4, 2, 1, "", "as_images"], [4, 2, 1, "", "get_artefacts"], [4, 2, 1, "", "get_lines"], [4, 2, 1, "", "get_words"]], "doctr.io.Page": [[4, 2, 1, "", "show"]], "doctr.models.classification": [[5, 1, 1, "", "crop_orientation_predictor"], [5, 1, 1, "", "magc_resnet31"], [5, 1, 1, "", "mobilenet_v3_large"], [5, 1, 1, "", "mobilenet_v3_large_r"], [5, 1, 1, "", "mobilenet_v3_small"], [5, 1, 1, "", "mobilenet_v3_small_orientation"], [5, 1, 1, "", "mobilenet_v3_small_r"], [5, 1, 1, "", "resnet18"], [5, 1, 1, "", "resnet31"], [5, 1, 1, "", "vgg16_bn_r"]], "doctr.models.detection": [[5, 1, 1, "", "db_mobilenet_v3_large"], [5, 1, 1, "", "db_resnet50"], [5, 1, 1, "", "detection_predictor"], [5, 1, 1, "", "linknet_resnet18"]], "doctr.models": [[5, 1, 1, "", "ocr_predictor"]], "doctr.models.recognition": [[5, 1, 1, "", "crnn_mobilenet_v3_large"], [5, 1, 1, "", "crnn_mobilenet_v3_small"], [5, 1, 1, "", "crnn_vgg16_bn"], [5, 1, 1, "", "master"], [5, 1, 1, "", "recognition_predictor"], [5, 1, 1, "", "sar_resnet31"]], "doctr.transforms": [[7, 0, 1, "", "ChannelShuffle"], [7, 0, 1, "", "ColorInversion"], [7, 0, 1, "", "Compose"], [7, 0, 1, "", "GaussianBlur"], [7, 0, 1, "", "GaussianNoise"], [7, 0, 1, "", "LambdaTransformation"], [7, 0, 1, "", "Normalize"], [7, 0, 1, "", "OneOf"], [7, 0, 1, "", "RandomApply"], [7, 0, 1, "", "RandomBrightness"], [7, 0, 1, "", "RandomContrast"], [7, 0, 1, "", "RandomCrop"], [7, 0, 1, "", "RandomGamma"], [7, 0, 1, "", "RandomHue"], [7, 0, 1, "", "RandomJpegQuality"], [7, 0, 1, "", "RandomRotate"], [7, 0, 1, "", "RandomSaturation"], [7, 0, 1, "", "Resize"], [7, 0, 1, "", "ToGray"]], "doctr.utils.metrics": [[10, 0, 1, "", "DetectionMetric"], [10, 0, 1, "", "LocalizationConfusion"], [10, 0, 1, "", "OCRMetric"], [10, 0, 1, "", "TextMatch"]], "doctr.utils.metrics.DetectionMetric": [[10, 2, 1, "", "summary"], [10, 2, 1, "", "update"]], "doctr.utils.metrics.LocalizationConfusion": [[10, 2, 1, "", "summary"], [10, 2, 1, "", "update"]], "doctr.utils.metrics.OCRMetric": [[10, 2, 1, "", "summary"], [10, 2, 1, "", "update"]], "doctr.utils.metrics.TextMatch": [[10, 2, 1, "", "summary"], [10, 2, 1, "", "update"]], "doctr.utils.visualization": [[10, 1, 1, "", "synthesize_page"], [10, 1, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:class", "1": "py:function", "2": "py:method"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"], "2": ["py", "method", "Python method"]}, "titleterms": {"changelog": 0, "v0": 0, "7": [], "0": 0, "2024": [], "09": [], "6": [], "2022": [], "29": [], "5": [], "1": 0, "03": 0, "22": 0, "2021": 0, "12": [], "31": [], "4": 0, "11": 0, "10": 0, "01": 0, "3": 0, "08": 0, "27": 0, "07": 0, "02": 0, "2": 0, "05": 0, "28": 0, "18": 0, "contributor": [], "coven": [], "code": [], "conduct": [], "our": [], "pledg": [], "standard": [], "enforc": [], "respons": [], "scope": [], "guidelin": [], "correct": [], "warn": [], "temporari": [], "ban": [], "perman": [], "attribut": [], "contribut": [], "doctr": [1, 2, 4, 5, 6, 7, 10], "codebas": [], "structur": 4, "continu": [], "integr": [], "feedback": [], "featur": 2, "request": [], "bug": [], "report": [], "question": [], "develop": [], "mode": [], "instal": 3, "commit": [], "unit": [], "test": [], "qualiti": [], "style": [], "verif": [], "modifi": [], "document": [2, 4], "let": [], "": [], "connect": [], "prerequisit": 3, "via": 3, "python": 3, "packag": 3, "git": 3, "text": [2, 9], "recognit": [2, 5, 9], "main": 2, "model": [2, 5, 8, 9], "zoo": [2, 5, 9], "detect": [2, 5, 9], "support": [1, 2, 7], "dataset": [1, 2], "arg": [], "synthet": 1, "gener": [], "custom": [], "loader": [], "dataload": [], "vocab": 1, "return": [], "io": 4, "word": 4, "line": 4, "artefact": 4, "block": 4, "page": 4, "file": 4, "read": 4, "classif": 5, "factori": [], "transform": 7, "compos": 7, "util": 10, "visual": 10, "task": 10, "evalu": 10, "notebook": 6, "train": 8, "your": 8, "own": [], "load": 1, "aw": [], "lambda": [], "share": [], "commun": [], "from": [], "huggingfac": [], "hub": [], "push": [], "pretrain": [], "name": [], "convent": [], "choos": 9, "readi": [], "us": 8, "avail": [1, 9], "object": [], "data": 1, "prepar": 8, "infer": 8, "optim": [], "half": 8, "precis": 8, "export": [], "onnx": [], "right": 9, "architectur": 9, "predictor": 9, "end": 9, "ocr": 9, "two": 9, "stage": 9, "approach": 9, "what": 9, "should": 9, "i": 9, "do": 9, "output": 9, "advanc": [], "option": [], "get": [], "start": [], "conda": [], "pre": [], "process": [], "post": 8, "build": [], "implement": [], "content": [], "compress": 8, "savedmodel": 8, "note": [], "refer": [], "backbon": [], "tensorflow": 8, "lite": 8, "quantiz": 8, "public": 1, "privat": 1}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"Changelog": [[0, "changelog"]], "v0.4.1 (2021-11-22)": [[0, "v0-4-1-2021-11-22"]], "v0.4.0 (2021-10-01)": [[0, "v0-4-0-2021-10-01"]], "v0.3.1 (2021-08-27)": [[0, "v0-3-1-2021-08-27"]], "v0.3.0 (2021-07-02)": [[0, "v0-3-0-2021-07-02"]], "v0.2.1 (2021-05-28)": [[0, "v0-2-1-2021-05-28"]], "v0.2.0 (2021-05-11)": [[0, "v0-2-0-2021-05-11"]], "v0.1.1 (2021-03-18)": [[0, "v0-1-1-2021-03-18"]], "v0.1.0 (2021-03-05)": [[0, "v0-1-0-2021-03-05"]], "doctr.datasets": [[1, "doctr-datasets"]], "Available Datasets": [[1, "available-datasets"]], "Public datasets": [[1, "public-datasets"]], "docTR synthetic datasets": [[1, "doctr-synthetic-datasets"]], "docTR private datasets": [[1, "doctr-private-datasets"]], "Data Loading": [[1, "data-loading"]], "Supported Vocabs": [[1, "supported-vocabs"]], "docTR Vocabs": [[1, "id1"]], "docTR: Document Text Recognition": [[2, "doctr-document-text-recognition"]], "Main Features": [[2, "main-features"]], "Model zoo": [[2, "model-zoo"]], "Text detection models": [[2, "text-detection-models"]], "Text recognition models": [[2, "text-recognition-models"]], "Supported datasets": [[2, "supported-datasets"]], "Installation": [[3, "installation"]], "Prerequisites": [[3, "prerequisites"]], "Via Python Package": [[3, "via-python-package"]], "Via Git": [[3, "via-git"]], "doctr.io": [[4, "doctr-io"]], "Document structure": [[4, "document-structure"]], "Word": [[4, "word"]], "Line": [[4, "line"]], "Artefact": [[4, "artefact"]], "Block": [[4, "block"]], "Page": [[4, "page"]], "Document": [[4, "document"]], "File reading": [[4, "file-reading"]], "doctr.models": [[5, "doctr-models"]], "doctr.models.classification": [[5, "doctr-models-classification"]], "doctr.models.detection": [[5, "doctr-models-detection"]], "doctr.models.recognition": [[5, "doctr-models-recognition"]], "doctr.models.zoo": [[5, "doctr-models-zoo"]], "docTR Notebooks": [[6, "doctr-notebooks"]], "doctr.transforms": [[7, "doctr-transforms"]], "Supported transformations": [[7, "supported-transformations"]], "Composing transformations": [[7, "composing-transformations"]], "Preparing your model for inference": [[8, "preparing-your-model-for-inference"]], "Model compression": [[8, "model-compression"]], "TensorFlow Lite": [[8, "tensorflow-lite"]], "Half-precision": [[8, "half-precision"]], "Post-training quantization": [[8, "post-training-quantization"]], "Using SavedModel": [[8, "using-savedmodel"]], "Choosing the right model": [[9, "choosing-the-right-model"]], "Text Detection": [[9, "text-detection"]], "Available architectures": [[9, "available-architectures"], [9, "id1"], [9, "id3"]], "Detection predictors": [[9, "detection-predictors"]], "Text Recognition": [[9, "text-recognition"]], "Text recognition model zoo": [[9, "id5"]], "Recognition predictors": [[9, "recognition-predictors"]], "End-to-End OCR": [[9, "end-to-end-ocr"]], "Two-stage approaches": [[9, "two-stage-approaches"]], "What should I do with the output?": [[9, "what-should-i-do-with-the-output"]], "doctr.utils": [[10, "doctr-utils"]], "Visualization": [[10, "visualization"]], "Task evaluation": [[10, "task-evaluation"]]}, "indexentries": {"cord (class in doctr.datasets)": [[1, "doctr.datasets.CORD"]], "charactergenerator (class in doctr.datasets)": [[1, "doctr.datasets.CharacterGenerator"]], "dataloader (class in doctr.datasets.loader)": [[1, "doctr.datasets.loader.DataLoader"]], "detectiondataset (class in doctr.datasets)": [[1, "doctr.datasets.DetectionDataset"]], "docartefacts (class in doctr.datasets)": [[1, "doctr.datasets.DocArtefacts"]], "funsd (class in doctr.datasets)": [[1, "doctr.datasets.FUNSD"]], "ic03 (class in doctr.datasets)": [[1, "doctr.datasets.IC03"]], "ic13 (class in doctr.datasets)": [[1, "doctr.datasets.IC13"]], "iiit5k (class in doctr.datasets)": [[1, "doctr.datasets.IIIT5K"]], "ocrdataset (class in doctr.datasets)": [[1, "doctr.datasets.OCRDataset"]], "recognitiondataset (class in doctr.datasets)": [[1, "doctr.datasets.RecognitionDataset"]], "sroie (class in doctr.datasets)": [[1, "doctr.datasets.SROIE"]], "svhn (class in doctr.datasets)": [[1, "doctr.datasets.SVHN"]], "svt (class in doctr.datasets)": [[1, "doctr.datasets.SVT"]], "synthtext (class in doctr.datasets)": [[1, "doctr.datasets.SynthText"]], "wordgenerator (class in doctr.datasets)": [[1, "doctr.datasets.WordGenerator"]], "encode_sequences() (in module doctr.datasets)": [[1, "doctr.datasets.encode_sequences"]], "artefact (class in doctr.io)": [[4, "doctr.io.Artefact"]], "block (class in doctr.io)": [[4, "doctr.io.Block"]], "document (class in doctr.io)": [[4, "doctr.io.Document"]], "documentfile (class in doctr.io)": [[4, "doctr.io.DocumentFile"]], "line (class in doctr.io)": [[4, "doctr.io.Line"]], "pdf (class in doctr.io)": [[4, "doctr.io.PDF"]], "page (class in doctr.io)": [[4, "doctr.io.Page"]], "word (class in doctr.io)": [[4, "doctr.io.Word"]], "as_images() (doctr.io.pdf method)": [[4, "doctr.io.PDF.as_images"]], "decode_img_as_tensor() (in module doctr.io)": [[4, "doctr.io.decode_img_as_tensor"]], "from_images() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_images"]], "from_pdf() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_pdf"]], "from_url() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_url"]], "get_artefacts() (doctr.io.pdf method)": [[4, "doctr.io.PDF.get_artefacts"]], "get_lines() (doctr.io.pdf method)": [[4, "doctr.io.PDF.get_lines"]], "get_words() (doctr.io.pdf method)": [[4, "doctr.io.PDF.get_words"]], "read_html() (in module doctr.io)": [[4, "doctr.io.read_html"]], "read_img_as_numpy() (in module doctr.io)": [[4, "doctr.io.read_img_as_numpy"]], "read_img_as_tensor() (in module doctr.io)": [[4, "doctr.io.read_img_as_tensor"]], "read_pdf() (in module doctr.io)": [[4, "doctr.io.read_pdf"]], "show() (doctr.io.document method)": [[4, "doctr.io.Document.show"]], "show() (doctr.io.page method)": [[4, "doctr.io.Page.show"]], "crnn_mobilenet_v3_large() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_mobilenet_v3_large"]], "crnn_mobilenet_v3_small() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_mobilenet_v3_small"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_vgg16_bn"]], "crop_orientation_predictor() (in module doctr.models.classification)": [[5, "doctr.models.classification.crop_orientation_predictor"]], "db_mobilenet_v3_large() (in module doctr.models.detection)": [[5, "doctr.models.detection.db_mobilenet_v3_large"]], "db_resnet50() (in module doctr.models.detection)": [[5, "doctr.models.detection.db_resnet50"]], "detection_predictor() (in module doctr.models.detection)": [[5, "doctr.models.detection.detection_predictor"]], "linknet_resnet18() (in module doctr.models.detection)": [[5, "doctr.models.detection.linknet_resnet18"]], "magc_resnet31() (in module doctr.models.classification)": [[5, "doctr.models.classification.magc_resnet31"]], "master() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.master"]], "mobilenet_v3_large() (in module doctr.models.classification)": [[5, "doctr.models.classification.mobilenet_v3_large"]], "mobilenet_v3_large_r() (in module doctr.models.classification)": [[5, "doctr.models.classification.mobilenet_v3_large_r"]], "mobilenet_v3_small() (in module doctr.models.classification)": [[5, "doctr.models.classification.mobilenet_v3_small"]], "mobilenet_v3_small_orientation() (in module doctr.models.classification)": [[5, "doctr.models.classification.mobilenet_v3_small_orientation"]], "mobilenet_v3_small_r() (in module doctr.models.classification)": [[5, "doctr.models.classification.mobilenet_v3_small_r"]], "ocr_predictor() (in module doctr.models)": [[5, "doctr.models.ocr_predictor"]], "recognition_predictor() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.recognition_predictor"]], "resnet18() (in module doctr.models.classification)": [[5, "doctr.models.classification.resnet18"]], "resnet31() (in module doctr.models.classification)": [[5, "doctr.models.classification.resnet31"]], "sar_resnet31() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.sar_resnet31"]], "vgg16_bn_r() (in module doctr.models.classification)": [[5, "doctr.models.classification.vgg16_bn_r"]], "channelshuffle (class in doctr.transforms)": [[7, "doctr.transforms.ChannelShuffle"]], "colorinversion (class in doctr.transforms)": [[7, "doctr.transforms.ColorInversion"]], "compose (class in doctr.transforms)": [[7, "doctr.transforms.Compose"]], "gaussianblur (class in doctr.transforms)": [[7, "doctr.transforms.GaussianBlur"]], "gaussiannoise (class in doctr.transforms)": [[7, "doctr.transforms.GaussianNoise"]], "lambdatransformation (class in doctr.transforms)": [[7, "doctr.transforms.LambdaTransformation"]], "normalize (class in doctr.transforms)": [[7, "doctr.transforms.Normalize"]], "oneof (class in doctr.transforms)": [[7, "doctr.transforms.OneOf"]], "randomapply (class in doctr.transforms)": [[7, "doctr.transforms.RandomApply"]], "randombrightness (class in doctr.transforms)": [[7, "doctr.transforms.RandomBrightness"]], "randomcontrast (class in doctr.transforms)": [[7, "doctr.transforms.RandomContrast"]], "randomcrop (class in doctr.transforms)": [[7, "doctr.transforms.RandomCrop"]], "randomgamma (class in doctr.transforms)": [[7, "doctr.transforms.RandomGamma"]], "randomhue (class in doctr.transforms)": [[7, "doctr.transforms.RandomHue"]], "randomjpegquality (class in doctr.transforms)": [[7, "doctr.transforms.RandomJpegQuality"]], "randomrotate (class in doctr.transforms)": [[7, "doctr.transforms.RandomRotate"]], "randomsaturation (class in doctr.transforms)": [[7, "doctr.transforms.RandomSaturation"]], "resize (class in doctr.transforms)": [[7, "doctr.transforms.Resize"]], "togray (class in doctr.transforms)": [[7, "doctr.transforms.ToGray"]], "detectionmetric (class in doctr.utils.metrics)": [[10, "doctr.utils.metrics.DetectionMetric"]], "localizationconfusion (class in doctr.utils.metrics)": [[10, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[10, "doctr.utils.metrics.OCRMetric"]], "textmatch (class in doctr.utils.metrics)": [[10, "doctr.utils.metrics.TextMatch"]], "summary() (doctr.utils.metrics.detectionmetric method)": [[10, "doctr.utils.metrics.DetectionMetric.summary"]], "summary() (doctr.utils.metrics.localizationconfusion method)": [[10, "doctr.utils.metrics.LocalizationConfusion.summary"]], "summary() (doctr.utils.metrics.ocrmetric method)": [[10, "doctr.utils.metrics.OCRMetric.summary"]], "summary() (doctr.utils.metrics.textmatch method)": [[10, "doctr.utils.metrics.TextMatch.summary"]], "synthesize_page() (in module doctr.utils.visualization)": [[10, "doctr.utils.visualization.synthesize_page"]], "update() (doctr.utils.metrics.detectionmetric method)": [[10, "doctr.utils.metrics.DetectionMetric.update"]], "update() (doctr.utils.metrics.localizationconfusion method)": [[10, "doctr.utils.metrics.LocalizationConfusion.update"]], "update() (doctr.utils.metrics.ocrmetric method)": [[10, "doctr.utils.metrics.OCRMetric.update"]], "update() (doctr.utils.metrics.textmatch method)": [[10, "doctr.utils.metrics.TextMatch.update"]], "visualize_page() (in module doctr.utils.visualization)": [[10, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["changelog", "datasets", "index", "installing", "io", "models", "notebooks", "transforms", "using_model_export", "using_models", "utils"], "filenames": ["changelog.rst", "datasets.rst", "index.rst", "installing.rst", "io.rst", "models.rst", "notebooks.md", "transforms.rst", "using_model_export.rst", "using_models.rst", "utils.rst"], "titles": ["Changelog", "doctr.datasets", "docTR: Document Text Recognition", "Installation", "doctr.io", "doctr.models", "docTR Notebooks", "doctr.transforms", "Preparing your model for inference", "Choosing the right model", "doctr.utils"], "terms": {"releas": [0, 3], "note": [0, 1], "we": [1, 2, 3, 4, 7, 9], "member": [], "leader": [], "make": [8, 9, 10], "particip": [], "commun": [], "harass": [], "free": [], "experi": [], "everyon": [], "regardless": [], "ag": [], "bodi": 9, "size": [1, 4, 7, 9, 10], "visibl": [], "invis": [], "disabl": [], "ethnic": [], "sex": [], "characterist": [], "gender": [], "ident": [], "express": 7, "level": [1, 9, 10], "educ": [], "socio": [], "econom": [], "statu": [], "nation": [], "person": 1, "appear": [], "race": [], "religion": [], "sexual": [], "orient": [4, 5, 9], "act": [], "interact": [4, 10], "wai": [1, 2], "contribut": [], "an": [1, 2, 4, 5, 8, 10], "open": [], "welcom": [], "divers": [], "inclus": [], "healthi": [], "exampl": [1, 4, 5, 7, 10], "behavior": [], "posit": 10, "environ": [], "includ": [1, 3], "demonstr": [], "empathi": [], "kind": [], "toward": 3, "other": [], "peopl": [], "Being": [], "respect": [], "differ": [], "opinion": [], "viewpoint": [], "give": [], "gracefulli": [], "accept": [], "construct": [], "feedback": [], "apolog": [], "those": [3, 4, 9], "affect": [], "mistak": [], "learn": [2, 5, 9], "from": [1, 2, 4, 5, 6, 7, 8, 9, 10], "focus": 1, "what": [], "i": [1, 4, 5, 7, 8, 10], "best": [], "just": 8, "u": 9, "individu": [], "overal": [], "unaccept": [], "The": [1, 4, 9, 10], "us": [1, 3, 5, 9, 10], "languag": [1, 2, 4, 9], "imageri": [], "attent": 5, "advanc": [], "ani": [1, 4, 5, 7, 8, 9, 10], "troll": [], "insult": [], "derogatori": [], "comment": [], "polit": [], "attack": [], "public": 2, "privat": 9, "publish": [], "inform": [1, 2, 9], "physic": 4, "email": [], "address": 4, "without": 5, "explicit": [], "permiss": [], "which": 9, "could": [], "reason": [], "consid": [1, 4, 9, 10], "inappropri": [], "profession": [], "set": [1, 5, 9, 10], "ar": [1, 3, 4, 6, 7, 9, 10], "clarifi": [], "take": [1, 8, 9], "appropri": 9, "fair": [], "action": [], "thei": [9, 10], "deem": [], "threaten": [], "offens": [], "harm": [], "have": [1, 8, 9, 10], "right": 10, "remov": [], "edit": [], "reject": [], "commit": [], "wiki": [], "issu": [], "align": 4, "thi": [1, 3, 8, 9, 10], "moder": [], "decis": [], "when": 5, "appli": [1, 7], "within": [], "all": [1, 4, 7, 9, 10], "space": [], "also": 9, "offici": [], "repres": [4, 9, 10], "e": [3, 4, 5], "mail": [], "post": 9, "via": [], "social": [], "media": [], "account": 8, "appoint": [], "onlin": [], "offlin": [], "event": [], "instanc": 9, "abus": [], "otherwis": 10, "mai": 9, "report": [], "contact": [], "minde": [2, 3], "com": [3, 4], "complaint": [], "review": [], "investig": [], "promptli": [], "fairli": [], "oblig": [], "privaci": [], "secur": [], "incid": [], "follow": [1, 3, 7, 8, 9, 10], "impact": [], "determin": [], "consequ": [], "violat": [], "unprofession": [], "unwelcom": [], "A": [1, 2, 4, 5, 6, 8], "written": 4, "provid": [1, 2, 8, 9], "clariti": [], "around": [], "natur": [1, 2], "explan": 9, "why": [], "wa": [], "apologi": [], "request": [], "through": [1, 7], "singl": [], "seri": [], "continu": [], "No": 9, "involv": 9, "unsolicit": [], "specifi": [1, 4], "period": [], "time": [1, 2, 5, 10], "avoid": 3, "well": 8, "extern": [], "channel": [4, 7], "like": [], "term": [], "lead": [], "seriou": [], "sustain": [], "sort": [], "allow": [], "dure": [], "pattern": [], "aggress": [], "disparag": [], "class": [1, 4, 7, 9, 10], "adapt": [], "version": [8, 9], "0": [1, 7, 9, 10], "avail": [2, 7], "http": [3, 4, 5, 9], "www": [4, 9], "org": [5, 9], "_": [1, 5, 8], "html": 9, "were": [4, 9], "inspir": 7, "mozilla": [], "": [4, 10], "ladder": [], "For": [3, 9], "answer": [], "common": [7, 10], "question": [], "about": 9, "see": [], "faq": [], "translat": [], "everyth": 9, "you": [1, 3, 4, 5, 8, 9], "need": [1, 3, 10], "know": [], "effici": [1, 2, 5], "project": 1, "packag": [2, 8, 10], "python": [], "doc": [4, 9], "librari": [3, 6], "build": 3, "script": [], "refer": [3, 9], "train": [1, 5, 7, 9], "demo": 2, "small": 5, "app": [], "showcas": [], "capabl": [6, 9], "api": 2, "minim": 2, "templat": [2, 4], "deploi": [], "rest": [7, 10], "ensur": [], "proper": [], "mainten": [], "github": 3, "worklow": [], "run": [3, 5], "job": [], "coverag": [], "codecov": [], "back": [], "result": [1, 4, 6, 9], "As": [], "contributor": [], "onli": [5, 7, 10], "your": [1, 2, 4, 9, 10], "ad": [5, 7], "whether": [1, 4, 7, 10], "encount": [], "problem": [], "suggest": [], "input": [4, 5, 7, 9], "ha": [1, 10], "valu": [4, 7, 9], "can": [1, 3, 8, 9], "purpos": [], "advis": [], "first": [], "check": 9, "topic": [], "wasn": [], "t": [1, 9], "alreadi": [], "cover": [], "close": [], "If": [3, 4, 5, 8], "feel": [], "new": 10, "one": [1, 5, 7, 9], "do": [3, 8], "so": [1, 3], "whenev": [], "possibl": 10, "enough": 9, "jump": [], "wonder": [], "how": 1, "someth": [], "more": [9, 10], "gener": 1, "should": [1, 4, 7, 10], "out": [5, 7, 9, 10], "discuss": [], "q": [], "forum": [], "specif": [1, 3, 9, 10], "stackoverflow": [], "addit": [], "depend": [2, 3], "command": [], "m": [9, 10], "pip": 3, "upgrad": [], "dev": [], "pre": 5, "docstr": [], "In": 1, "pleas": [], "googl": [], "eas": [], "process": [2, 4, 9], "later": [], "messag": [], "udac": [], "guid": [], "order": [1, 4, 7], "same": [1, 4, 9, 10], "ci": [], "workflow": [], "unittest": [], "local": [1, 2, 5, 9, 10], "To": [3, 9], "togeth": 4, "current": 9, "built": [], "sphinx": [], "thank": [], "our": [5, 9], "file": 1, "been": [1, 9, 10], "rebuilt": [], "want": 8, "forc": [], "complet": [], "rebuild": [], "delet": [], "_build": [], "directori": [], "addition": [8, 9], "clear": [], "web": 4, "browser": 2, "cach": 1, "modif": [], "now": [], "locat": 4, "index": 4, "wish": 8, "somewher": [], "els": [], "than": [3, 10], "join": [], "slack": [], "where": [4, 7, 9, 10], "find": 3, "requir": [3, 7], "3": [2, 3, 4, 5, 7, 8, 9, 10], "8": [5, 7, 9], "higher": [1, 3], "whichev": 3, "o": 3, "least": 3, "tensorflow": [2, 3, 4, 5, 7, 9], "pytorch": [2, 3, 9], "correspond": [3, 9], "page": [1, 3, 5, 9, 10], "2": [2, 3, 7, 8, 9], "macbook": [], "m1": [], "chip": [], "some": [1, 3, 6], "metal": [], "plugin": [], "1": [1, 5, 7, 8, 9, 10], "12": 9, "anoth": [1, 3, 5], "linux": 3, "few": [3, 8], "extra": 3, "maco": 3, "user": [2, 3, 4, 6], "them": [1, 3, 9], "homebrew": 3, "brew": 3, "cairo": 3, "pango": 3, "gdk": 3, "pixbuf": 3, "libffi": 3, "window": [3, 5, 10], "gtk": 3, "latest": 3, "over": [1, 3, 9, 10], "here": [1, 3, 6, 7, 9], "last": [1, 3], "stabl": 3, "doctr": [3, 8, 9], "strive": 3, "reduc": [3, 7], "framework": [1, 3, 9], "minimum": [1, 3, 10], "necessari": 3, "featur": [3, 5, 6, 10], "develop": 3, "third": 3, "parti": 3, "miss": 3, "tf": [3, 4, 5, 7, 8], "torch": [3, 5], "mode": 3, "clone": 3, "state": [2, 10], "art": 2, "optic": [2, 9], "charact": [1, 2, 4, 9, 10], "made": 2, "seamless": 2, "access": [1, 2, 4, 9], "anyon": 2, "power": 2, "easi": [2, 10], "extract": [1, 2], "valuabl": 2, "autom": 2, "seamlessli": 9, "understand": [1, 2, 9], "task": [1, 2, 9], "ocr": [1, 2, 5, 10], "predictor": [2, 4, 5], "pars": [1, 2], "textual": [1, 2, 4, 5, 9], "identifi": 2, "each": [1, 2, 4, 7, 9, 10], "word": [1, 2, 9, 10], "research": 2, "quickli": 2, "compar": 2, "own": [1, 2], "architectur": [2, 5], "speed": [2, 5], "perform": [2, 4, 7, 8, 9, 10], "robust": [1, 2], "stage": 2, "pretrain": [2, 5, 8, 9, 10], "paramet": [1, 2, 4, 5, 7, 10], "friendli": 2, "line": [2, 9, 10], "code": [2, 4], "load": [2, 8], "googlevis": 2, "aw": [2, 9], "textract": [2, 9], "optim": [2, 8], "infer": [2, 5, 7], "both": [1, 2, 7, 9], "cpu": [2, 9], "gpu": 2, "light": 2, "activ": 2, "maintain": 2, "integr": 2, "deploy": 2, "dbnet": [2, 5], "real": [2, 5, 7], "scene": [1, 2, 5], "differenti": [2, 5], "binar": [2, 5], "linknet": [2, 5], "exploit": [2, 5], "encod": [1, 2, 4, 5, 9], "represent": [2, 5], "semant": [2, 5], "segment": [2, 5, 9], "sar": [2, 5], "show": [2, 4, 5, 10], "attend": [2, 5], "read": [1, 2, 5], "simpl": [2, 5], "strong": [2, 5], "baselin": [2, 5, 9], "irregular": [2, 5], "crnn": [2, 5], "end": [1, 2, 5, 10], "trainabl": [2, 5], "neural": [2, 5], "network": [2, 5], "imag": [1, 2, 4, 5, 7, 9, 10], "base": [2, 5], "sequenc": [1, 2, 4, 5, 9, 10], "Its": [2, 5], "applic": [2, 5], "master": [2, 5, 9], "multi": [2, 5], "aspect": [2, 5, 7], "non": [1, 2, 4, 5, 7, 10], "vitstr": [], "vision": 1, "transform": 1, "fast": 1, "parseq": [], "permut": [], "autoregress": [], "funsd": [1, 2, 9], "form": [1, 2, 9], "noisi": [1, 2], "scan": [1, 2], "cord": [1, 2, 9], "consolid": [1, 2], "receipt": [1, 2, 9], "forpost": [1, 2], "sroie": [1, 2], "icdar": [1, 2], "2019": 2, "iiit": [1, 2], "5k": [1, 2], "cvit": 2, "street": [1, 2], "view": [1, 2], "synthtext": [1, 2], "visual": 2, "geometri": [2, 4, 9], "group": 2, "svhn": [1, 2], "digit": [1, 2], "unsupervis": 2, "ic03": [1, 2], "2003": [1, 2], "ic13": [1, 2], "2013": [1, 2], "imgur5k": [], "textstylebrush": [], "transfer": [], "aesthet": [], "mjsynth": [], "synthet": [], "data": [4, 7, 10], "artifici": [], "iiithw": [], "wildreceipt": [], "spatial": [4, 10], "dual": [], "modal": [], "graph": 4, "kei": [], "bool": [1, 4, 5, 7, 10], "true": [1, 4, 5, 7, 8, 9, 10], "use_polygon": [1, 10], "fals": [1, 4, 5, 7, 8, 10], "recognition_task": [], "kwarg": [1, 4, 5, 10], "sourc": [1, 4, 5, 7, 10], "document": [1, 5, 6, 9, 10], "import": [1, 4, 5, 7, 8, 9, 10], "train_set": 1, "download": 1, "img": [1, 7], "target": [1, 4, 7, 10], "subset": [1, 9], "polygon": [1, 9], "rotat": [1, 4, 5, 7, 10], "bound": [1, 4, 5, 7, 9, 10], "box": [1, 4, 5, 7, 9, 10], "instead": [1, 4], "straight": [1, 5, 9], "ones": [1, 10], "recognit": [1, 10], "keyword": [1, 4], "argument": [1, 4], "visiondataset": 1, "icdar2019": 1, "competit": 1, "iiit5k": 1, "bmvc": 1, "2012": 1, "text": [1, 4, 5, 10], "prior": 1, "svt": 1, "ucsd": 1, "comput": [1, 9, 10], "hous": 1, "number": [1, 7, 9, 10], "localis": 1, "repositori": 1, "websit": 1, "entri": 1, "futur": 1, "direct": 1, "img_fold": 1, "str": [1, 4, 5, 7, 10], "label_fold": 1, "label": [1, 10], "part": [1, 7], "challeng": 1, "task2": 1, "2015": 1, "path": [1, 4, 8], "challenge2_training_task12_imag": 1, "challenge2_training_task1_gt": 1, "test_set": 1, "challenge2_test_task12_imag": 1, "challenge2_test_task1_gt": 1, "folder": [1, 8], "annot": [1, 4], "abstractdataset": [], "label_path": 1, "handwrit": [], "dataset_info": [], "imgur5k_annot": [], "json": [1, 9], "pure": [], "mnt": [], "ramdisk": [], "max": [7, 10], "90kdict32px": [], "imlist": [], "txt": [], "hw": [], "images_90k_norm": [], "90k": [], "docartefact": 1, "object": [1, 6, 9, 10], "detect": [1, 6, 10], "element": [1, 4, 5, 9, 10], "varieti": 1, "arxiv": 5, "ab": [], "2103": [], "14470v1": [], "test": [], "charactergener": 1, "implement": [1, 4, 7, 8, 9, 10], "d": 1, "abdef": 1, "num_sampl": 1, "100": [1, 7, 8, 9, 10], "vocabulari": 1, "sampl": [1, 9], "iter": [1, 7], "cache_sampl": 1, "firsthand": 1, "font_famili": [1, 10], "font": [1, 10], "img_transform": 1, "compos": [1, 9], "sample_transform": 1, "wordgener": 1, "min_char": 1, "int": [1, 4, 7, 10], "max_char": 1, "list": [1, 4, 7, 10], "none": [1, 4, 9, 10], "callabl": [1, 7], "tupl": [1, 4, 7, 10], "32": [1, 5, 7, 8, 9], "maximum": [1, 7], "detectiondataset": 1, "recognitiondataset": 1, "labels_path": 1, "contain": [1, 9], "ocrdataset": 1, "label_fil": 1, "jpg": [1, 4], "root": 1, "shuffl": [1, 7], "batch_siz": 1, "drop_last": 1, "num_work": 1, "collate_fn": 1, "wrapper": [1, 7], "train_load": 1, "train_it": 1, "next": 1, "befor": [1, 5, 7], "pass": [1, 4, 5, 9], "batch": [1, 5, 7, 9], "drop": 1, "isn": 1, "full": [1, 9, 10], "worker": 1, "function": [7, 10], "merg": [], "sinc": [1, 9], "content": [1, 4, 9, 10], "properli": 1, "model": [1, 10], "interpret": [1, 4], "multipl": [1, 4, 7], "name": [1, 5, 9], "10": [1, 9, 10], "0123456789": 1, "hindi_digit": [], "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "ascii_lett": 1, "52": [1, 9], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "punctuat": 1, "currenc": 1, "5": [1, 7, 9, 10], "ancient_greek": [], "48": [5, 9], "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": [], "arabic_lett": [], "37": 9, "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": [], "persian_lett": [], "\u067e\u0686\u06a2\u06a4\u06af": [], "arabic_diacrit": [], "arabic_punctu": [], "latin": 1, "94": [1, 9], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 1, "english": 1, "legacy_french": 1, "123": 1, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": 1, "french": [1, 9], "126": 1, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": 1, "portugues": 1, "131": 1, "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": 1, "spanish": 1, "116": 1, "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": 1, "italian": [], "120": [], "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": [], "german": 1, "108": 1, "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": 1, "arab": [], "101": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": [], "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "czech": [], "130": [], "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": [], "polish": [], "118": [], "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": [], "dutch": [], "114": [], "norwegian": [], "106": [], "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": [], "danish": [], "finnish": [], "104": [], "\u00e4\u00f6\u00e4\u00f6": [], "swedish": [], "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": [], "vietnames": [], "234": [], "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": [], "hebrew": [], "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": [], "multilingu": [], "195": [], "encode_sequ": 1, "target_s": 1, "eo": 1, "pad": [1, 5, 7], "dynamic_seq_length": 1, "ndarrai": [1, 4, 10], "given": [1, 4, 7, 9, 10], "map": 1, "n": [1, 10], "length": 1, "Of": 1, "string": [1, 4, 9, 10], "option": 1, "start": 1, "case": [1, 9, 10], "upper": [1, 7], "enabl": [1, 4], "dynam": 1, "tensor": [1, 4, 7, 9], "modul": [4, 7, 9, 10], "easili": [4, 8, 9, 10], "export": [4, 5, 6, 8, 9, 10], "analysi": 4, "format": [1, 4, 8, 9, 10], "organ": 4, "uninterrupt": [4, 9], "confid": [4, 9, 10], "float": [4, 7, 8, 10], "associ": 4, "predict": [4, 5, 10], "xmin": 4, "ymin": 4, "xmax": 4, "ymax": 4, "coordin": [4, 9], "rel": [4, 7, 10], "collect": 4, "meant": [4, 8], "two": 4, "column": 4, "horizont": 4, "resolv": 4, "default": [4, 8, 10], "smallest": 4, "enclos": 4, "g": [4, 5], "qr": 4, "pictur": 4, "chart": 4, "signatur": 4, "logo": 4, "etc": 4, "artefact_typ": 4, "type": [4, 9], "sever": [4, 7, 9], "its": [1, 4, 7, 9, 10], "titl": [4, 9], "underneath": 4, "page_idx": [4, 9], "dimens": [4, 9, 10], "dict": [4, 9, 10], "numpi": [4, 5, 9, 10], "arrai": [4, 10], "uint8": [4, 5, 9, 10], "raw": [4, 10], "pixel": [4, 7, 9], "height": 4, "width": 4, "dictionari": [4, 10], "angl": [4, 7], "degress": 4, "preserve_aspect_ratio": [4, 5, 7], "overlai": 4, "displai": [4, 10], "matplotlib": 10, "pyplot": 10, "method": [7, 9], "high": 4, "convers": 4, "read_pdf": 4, "byte": [4, 9], "scale": [5, 7, 10], "rgb_mode": [], "password": [], "pdf": [4, 5, 6], "convert": [4, 7, 8], "render": [], "72dpi": [], "output": [4, 7], "rgb": [4, 7], "bgr": 4, "unlock": [], "encrypt": [], "pypdfium2": [], "pdfpage": [], "decod": 4, "shape": [4, 5, 7, 8, 9, 10], "h": [4, 5, 7], "x": [4, 7, 10], "w": [4, 5, 7, 10], "c": 10, "read_img_as_numpi": 4, "output_s": [4, 7], "rgb_output": 4, "expect": [4, 7, 10], "read_img_as_tensor": 4, "img_path": 4, "dtype": [4, 5, 8, 10], "float32": [4, 5, 8], "desir": 4, "relat": 4, "divid": 4, "255": [4, 5, 7, 9, 10], "decode_img_as_tensor": 4, "img_cont": 4, "stream": 4, "read_html": 4, "url": 4, "yoursit": 4, "weasyprint": [], "documentfil": 4, "extens": 4, "classmethod": 4, "from_pdf": 4, "binari": [4, 9], "from_url": 4, "from_imag": 4, "page1": 4, "png": 4, "page2": 4, "vgg16_bn_r": 5, "vgg": 5, "16": 5, "describ": [5, 10], "veri": 5, "deep": [5, 9], "convolut": 5, "larg": 5, "modifi": 5, "normal": [5, 7], "rectangular": 5, "pool": 5, "simpler": 5, "head": [5, 9], "input_tensor": 5, "random": [5, 7, 8, 9, 10], "uniform": [5, 7, 8], "512": 5, "maxval": [5, 7, 8], "imagenet": 5, "extractor": 5, "resnet18": 5, "resnet": 5, "18": [5, 9], "residu": 5, "boolean": 5, "resnet34": [], "34": 9, "resnet50": [], "50": [5, 9], "resnet31": 5, "downsiz": 5, "4": [5, 9, 10], "mobilenet_v3_smal": 5, "mobilenetv3": 5, "search": 5, "kera": [5, 8], "mobilenet_v3_larg": 5, "mobilenet_v3_small_r": 5, "mobilenet_v3_large_r": 5, "mobilenet_v3_small_orient": 5, "magc_resnet31": 5, "global": 5, "context": 5, "224": [5, 7, 8], "vit_": [], "visiontransform": [], "worth": [], "16x16": [], "patch": [], "unoffici": [], "config": [], "vit_b": [], "b": 10, "textnet_tini": [], "textnet": [], "faster": [], "arbitrarili": [], "detector": [], "minimalist": [], "kernel": 7, "czczup": [], "tini": [], "textnet_smal": [], "textnet_bas": [], "crop_orientation_predictor": 5, "arch": 5, "croporientationpredictor": 5, "np": [5, 8, 9, 10], "classif_mobilenet_v3_smal": 5, "input_crop": 5, "rand": [5, 8, 9, 10], "600": [5, 9, 10], "800": [5, 9, 10], "astyp": [5, 8, 9, 10], "crop": [5, 7, 9], "dataset": [5, 9], "linknet_resnet18": [5, 9], "1024": [5, 8, 9, 10], "linknet_resnet34": [], "linknet_resnet50": [], "db_resnet50": [5, 8, 9], "backbon": 5, "db_mobilenet_v3_larg": [5, 9], "mobilenet": 5, "v3": 5, "detection_predictor": [5, 9], "assume_straight_pag": 5, "detectionpredictor": 5, "input_pag": [5, 9, 10], "itself": [], "fit": 5, "crnn_vgg16_bn": [5, 9], "128": [5, 9], "crnn_mobilenet_v3_smal": [5, 9], "crnn_mobilenet_v3_larg": [5, 9], "sar_resnet31": [5, 9], "31": [5, 9], "64": [5, 7, 9], "256": 5, "paper": 5, "1910": 5, "02562": 5, "keywoard": [], "vitstr_smal": [], "vitstr_bas": [], "recognition_predictor": [5, 9], "recognitionpredictor": 5, "ocr_predictor": [5, 9], "det_arch": 5, "reco_arch": 5, "pretrained_backbon": [], "symmetric_pad": 7, "export_as_straight_box": 5, "detect_orient": [], "straighten_pag": [], "detect_languag": [], "ocrpredictor": 5, "up": [5, 9], "assum": 5, "preserv": [5, 7], "ratio": [5, 7], "symmetr": 7, "bottom": 9, "final": [5, 8], "potenti": 5, "estim": [], "slightli": [], "deterior": [], "latenc": [], "median": [], "Then": [], "again": 3, "improv": [], "kie_predictor": [], "kiepredictor": [], "kie": [], "login_to_hub": [], "login": [], "huggingfac": [], "hub": [], "from_hub": [], "repo_id": [], "instanti": [], "hf": [], "fasterrcnn_mobilenet_v3_large_fpn": [], "repo": [], "hf_hub_download": [], "snapshot_download": [], "checkpoint": [], "push_to_hf_hub": [], "model_nam": [], "save": [1, 8], "configur": [], "my": [], "procedur": 7, "draw": [7, 10], "design": 7, "torchvis": 7, "resiz": 7, "bilinear": 7, "transfo": 7, "minval": 7, "interpol": 7, "zero": [7, 10], "while": [7, 9], "done": 7, "mean": [7, 10], "std": 7, "gaussian": 7, "distribut": 7, "485": 7, "456": 7, "406": 7, "229": 7, "225": 7, "averag": [7, 9], "per": [7, 9], "standard": 7, "deviat": 7, "lambdatransform": 7, "fn": 7, "lambda": 7, "tograi": 7, "num_output_channel": 7, "grayscal": 7, "colorinvers": 7, "min_val": 7, "tranform": 7, "color": [7, 10], "shift": 7, "randomli": 7, "invert": 7, "6": [3, 7, 9], "rang": [7, 8], "randombright": 7, "max_delta": 7, "adjust": 7, "bright": 7, "delta": 7, "offset": 7, "add": [7, 10], "pick": 7, "p": [7, 9, 10], "probabl": 7, "randomcontrast": 7, "contrast": 7, "contrast_factor": 7, "factor": 7, "randomsatur": 7, "satur": 7, "hsv": 7, "increas": 7, "randomhu": 7, "hue": 7, "randomgamma": 7, "min_gamma": 7, "max_gamma": 7, "min_gain": 7, "max_gain": 7, "gamma": 7, "correct": 7, "neg": 7, "lower": [7, 10], "param": [7, 9], "constant": 7, "multipli": 7, "randomjpegqu": 7, "min_qual": 7, "60": 7, "max_qual": 7, "jpeg": 7, "qualiti": 7, "dimension": 7, "between": [7, 10], "randomrot": 7, "max_angl": 7, "expand": 7, "degre": 7, "uniformli": 7, "randomcrop": 7, "08": [7, 9], "75": [7, 9], "33": 7, "min_area": 7, "max_area": 7, "min_ratio": 7, "max_ratio": 7, "gaussianblur": 7, "kernel_shap": 7, "blur": 7, "min": 7, "channelshuffl": 7, "gaussiannois": 7, "nois": 7, "randomhorizontalflip": [], "flip": [], "int64": 10, "randomshadow": [], "opacity_rang": [], "shade": [], "opac": [], "It": 7, "consecut": [7, 9], "sequenti": [7, 8], "oneof": 7, "jpegqual": 7, "randomappli": 7, "regroup": 10, "core": 10, "complementari": 10, "sens": 10, "visualize_pag": 10, "words_onli": 10, "display_artefact": 10, "add_label": 10, "figur": 10, "block": [9, 10], "plt": 10, "ocr_db_crnn": 10, "artefact": [6, 9, 10], "figsiz": 10, "largest": 10, "side": 10, "plot": 10, "static": 10, "top": [9, 10], "synthesize_pag": 10, "draw_proba": 10, "respons": 10, "blank": 10, "blue": 10, "red": 10, "font_siz": 10, "13": [9, 10], "famili": 10, "synthes": 10, "metric": [9, 10], "assess": 10, "textmatch": 10, "match": [9, 10], "accuraci": 10, "aggreg": [1, 10], "foral": 10, "y": 10, "mathcal": 10, "frac": 10, "sum": 10, "limits_": 10, "f_": 10, "y_i": 10, "x_i": 10, "indic": 10, "defin": 10, "f_a": 10, "left": [9, 10], "begin": 10, "ll": 10, "mbox": 10, "strictli": 10, "integ": [8, 10], "updat": 10, "hello": [9, 10], "world": [9, 10], "summari": 10, "gt": 10, "pred": 10, "groung": 10, "truth": 10, "exact": [9, 10], "score": 10, "counterpart": 10, "unidecod": 10, "localizationconfus": 10, "iou_thresh": 10, "mask_shap": 10, "use_broadcast": 10, "confus": 10, "iou": 10, "recal": [9, 10], "g_": 10, "precis": [9, 10], "meaniou": 10, "j": 10, "y_j": 10, "being": [9, 10], "intersect": 10, "union": 10, "g_x": 10, "assign": 10, "_i": 10, "geq": 10, "ground": 10, "asarrai": 10, "70": [9, 10], "110": 10, "95": [9, 10], "200": 10, "150": [9, 10], "pair": 10, "broadcast": 10, "consum": 10, "memori": 10, "either": [9, 10], "ocrmetr": 10, "l": 10, "hat": 10, "h_": 10, "b_j": 10, "l_j": 10, "gt_box": 10, "pred_box": 10, "gt_label": 10, "pred_label": 10, "comparison": [9, 10], "detectionmetr": 10, "c_j": 10, "compil": [6, 9], "better": [6, 9], "leverag": 6, "descript": 6, "colab": [], "quicktour": 6, "present": 6, "main": 6, "produc": [6, 9], "searchabl": 6, "don": 9, "meet": [], "detail": 9, "link": [], "section": [8, 9], "det_model": [], "load_weight": [], "path_to_checkpoint": [], "weight": [], "reco_model": [], "det_param": [], "path_to_pt": [], "map_loc": [], "load_state_dict": [], "reco_param": [], "vocab": 9, "class_nam": [], "total": [], "date": 9, "preprocessor": 9, "det_predictor": [], "798": [], "785": [], "772": [], "264": [], "2749": [], "287": [], "reco_predictor": [], "694": [], "695": [], "693": [], "299": [], "296": [], "301": [], "polici": [], "restrict": [], "write": [], "outsid": [], "tmp": [], "work": [], "step": [], "usag": [], "multiprocess": [], "doctr_multiprocessing_dis": [], "variabl": [], "becaus": [], "shm": [], "share": [1, 9], "chang": [], "By": [], "doctr_cache_dir": [], "focu": [], "love": [], "appreci": [], "interfac": [], "io": [], "custom": [], "felix92": [], "db": [], "vgg16": [], "bn": [], "plug": [], "obj_detect": [], "exist": [], "overwritten": [], "prerequisit": [], "creat": [], "co": [], "instal": [], "git": [], "lf": [], "my_awesome_model": [], "v1": [], "directli": 9, "after": 9, "python3": [], "train_tensorflow": [], "py": [], "train_pytorch": [], "tabl": [], "pull": [], "dummi": [], "tilman": [], "rassi": [], "fascan": [], "evalu": [1, 9], "predefin": 1, "prefer": 1, "signific": 1, "valid": [], "149": [], "626": [], "360": [], "2000": [], "3000": [], "249": [], "33402": [], "13068": [], "772875": [], "85875": [], "246": [], "233": [], "resourc": 8, "7149": [], "796": [], "handwritten": [], "1268": [], "472": [], "21888": [], "8707": [], "33608": [], "19342": [], "uppercas": [], "19370": [], "2186": [], "257": [], "647": [], "73257": [], "26032": [], "7100000": [], "707470": [], "1156": [], "1107": [], "849": [], "1095": [], "207901": [], "22672": [], "7581382": [], "1337891": [], "7141797": [], "793533": [], "49377": [], "19598": [], "alwai": [], "regular": [], "2700": [], "300": [], "background": [], "qr_code": [], "bar_cod": [], "photo": [], "classif": [], "mani": [1, 9], "sensit": [1, 9], "abl": [1, 9], "howev": 1, "guidanc": 1, "tool": 1, "further": [], "anot": [], "handl": 1, "underli": 1, "defer": 1, "dataload": 1, "good": 8, "achiev": 8, "might": [8, 9], "tune": 8, "thing": [8, 9], "product": 8, "readi": 8, "help": 8, "support": 9, "devic": [], "fp16": [], "point": [], "occupi": [], "bit": [], "advantag": [], "less": [], "mixed_precis": [], "set_global_polici": [], "mixed_float16": [], "cuda": [], "re": [], "exchang": [], "interoper": [], "machin": [], "structur": 9, "layer": [], "metadata": [], "util": 8, "export_model_to_onnx": [], "input_shap": 8, "dummy_input": [], "tensorspec": [], "model_path": [], "come": [], "soon": [], "seen": 9, "onc": 9, "separ": 9, "compon": 9, "charg": 9, "usabl": 9, "backend": 9, "along": 9, "processor": 9, "reusabl": 9, "consist": 9, "delimit": 9, "2d": 9, "corner": 9, "flag": 9, "belong": 9, "skew": [], "comprehens": 9, "benchmark": 9, "publicli": 9, "sec": [], "25": 9, "84": 9, "39": 9, "85": 9, "86": 9, "93": 9, "83": 9, "24": [], "80": 9, "29": 9, "90": 9, "67": 9, "76": 9, "11": 9, "81": 9, "71": 9, "7": 9, "21": 9, "82": 9, "20": 9, "49": 9, "87": 9, "63": 9, "17": [], "28": [], "51": 9, "46": 9, "db_resnet34": [], "22": [], "89": 9, "74": 9, "56": 9, "68": 9, "92": 9, "61": 9, "41": 9, "00": 9, "79": 9, "38": 9, "88": [], "62": 9, "26": [], "06": 9, "78": 9, "47": 9, "54": [], "abov": 9, "cf": 9, "disclaim": 9, "combin": 9, "199": 9, "second": 9, "warmup": 9, "phase": 9, "measur": 9, "1000": 9, "obtain": 9, "11th": [], "gen": [], "intel": [], "r": [], "tm": [], "i7": [], "11800h": [], "30ghz": [], "wrap": 9, "useabl": 9, "favorit": 9, "dummy_img": 9, "area": [], "send": [], "snippet": [], "transcrib": 9, "partial": [], "15": 9, "9": [], "73": 9, "44": [], "14": 9, "55": [], "58": [], "57": 9, "66": 9, "01": 9, "98": 9, "23": [], "69": 9, "99": 9, "91": 9, "05": [], "09": [], "96": 9, "40": [], "53": 9, "most": 9, "print": 9, "cfg": 9, "30595": 9, "45": 9, "72": 9, "43": 9, "65": 9, "77": 9, "30": 9, "07": [], "27": 9, "gvision": 9, "59": 9, "03": 9, "azur": [], "recogn": [], "42": 9, "go": [], "mention": [], "still": [], "return": [1, 4, 5, 9, 10], "documentbuild": [], "resolve_lin": [], "automat": [], "resolve_block": [], "paragraph_break": [], "paragraph": [], "035": [], "nest": 9, "get": [4, 9], "typic": 9, "layout": 9, "340": 9, "text_output": [], "json_output": 9, "1357421875": 9, "0361328125": 9, "8564453125": 9, "8603515625": 9, "914085328578949": 9, "5478515625": 9, "06640625": 9, "5810546875": 9, "0966796875": 9, "9949972033500671": 9, "51171875": 9, "1630859375": 9, "9578408598899841": 9, "1396484375": 9, "3232421875": 9, "185546875": 9, "3515625": 9, "outpout": 9, "xml": 9, "hocr": 9, "export_as_xml": 9, "xml_output": 9, "xml_bytes_str": 9, "xml_element": 9, "utf": 9, "xmln": 9, "w3": 9, "1999": 9, "xhtml": 9, "lang": 9, "en": 9, "meta": 9, "equiv": 9, "charset": 9, "system": 9, "ocr_pag": 9, "ocr_carea": 9, "ocr_par": 9, "ocr_lin": 9, "ocrx_word": 9, "div": 9, "id": 9, "page_1": 9, "bbox": 9, "3456": 9, "ppageno": 9, "block_1_1": 9, "857": 9, "529": 9, "2504": 9, "2710": 9, "par_1_1": 9, "span": 9, "line_1_1": 9, "x_size": 9, "x_descend": 9, "x_ascend": 9, "word_1_1": 9, "1552": 9, "540": 9, "1778": 9, "580": 9, "x_wconf": 9, "word_1_2": 9, "1782": 9, "1900": 9, "583": 9, "word_1_3": 9, "1420": 9, "597": 9, "1684": 9, "641": 9, "threshold": [], "region": [], "accur": [], "postprocessor": [], "bin_thresh": [], "box_thresh": [], "hook": [], "manipul": [], "customhook": [], "def": 8, "__call__": [], "self": [], "loc_pr": [], "Be": [], "awar": [], "my_hook": [], "middl": [], "pipelin": [], "add_hook": [], "execut": [], "file_path": [], "read_img": 4, "seemlessli": 2, "conda": [], "newer": [], "developp": 3, "fp": 9, "scheme": [], "deform": [], "statist": [], "turn": [], "easier": [], "let": [], "db_resnet50_predictor": [], "sar_vgg16_bn": [], "rnn": [], "enhanc": [], "symbol": [], "crnn_vgg16_bn_predictor": [], "sar_vgg16_bn_predictor": [], "16bn": [], "convert_to_tflit": [], "tf_model": 8, "tflite": 8, "conv_sequ": 8, "relu": 8, "kernel_s": 8, "serialized_model": 8, "convert_to_fp16": [], "half": [], "serial": 8, "quantize_model": [], "quantiz": [], "exclud": [], "inherit": 8, "abstract": [], "verifi": [], "file_nam": [], "file_hash": [], "extract_arch": [], "overwrit": [], "sha256": [], "archiv": [], "disk": [], "775": [], "856": [], "860": [], "862": [], "863": [], "sar_resnet31_predictor": [], "ocr_db_crnn_vgg": [], "652": [], "721": [], "ocr_db_sar_vgg": [], "653": [], "ocr_db_sar_resnet": [], "665": [], "735": [], "595": [], "625": [], "781": [], "830": [], "exactmatch": [], "ignore_cas": [], "ignore_acc": [], "ignor": [], "letter": [], "accent": [], "error": [], "max_dist": [], "levenshtein": [], "distanc": [], "autoclass": [], "loader": 1, "154": [], "as_imag": 4, "convert_page_to_numpi": 4, "get_word": 4, "fitz": 4, "gettextword": [], "get_artefact": 4, "entir": 4, "fulli": [], "daili": [], "mix": [], "fine": 9, "scratch": [], "special": [], "recurr": [], "733": [], "817": [], "745": [], "875": [], "frame": 9, "feed": [], "warm": [], "c5": 9, "x12larg": 9, "xeon": 9, "platinum": 9, "8275l": 9, "913": [], "917": [], "921": [], "crnn_resnet31": [], "629": [], "701": [], "664": [], "780": [], "630": [], "702": [], "666": [], "783": [], "640": [], "713": [], "672": [], "789": [], "na": [], "753": [], "700": [], "533": [], "689": [], "611": [], "660": [], "db_sar_vgg": [], "db_sar_resnet": [], "db_crnn_vgg": [], "db_crnn_resnet": [], "properti": 8, "input_t": 8, "saved_model": 8, "And": 8, "nestedobject": [], "changelog": [], "v0": [], "2021": [], "8m": 9, "02": 9, "5m": 9, "1m": 9, "19": [], "invoic": 9, "flexibl": [], "rotated_bbox": [], "beta": [], "linknet16": [], "160": 5, "arg": 1, "bash": [], "tax": 9, "35": 9, "vgg16_bn": [], "mobilenetv3_larg": 5, "mobilenetv3_smal": 5, "constraint": 8, "tfliteconvert": 8, "from_keras_model": 8, "target_spec": 8, "supported_typ": 8, "float16": 8, "fallback": 8, "oper": 8, "representative_dataset": 8, "yield": 8, "supported_op": 8, "opsset": 8, "tflite_builtins_int8": 8, "inference_input_typ": 8, "int8": 8, "inference_output_typ": 8, "2m": 9, "7m": 9, "look": 9, "variou": 9, "below": 9, "unfortun": 9, "moment": 9, "04": 9, "36": 9, "97": 9, "resum": 9, "road": 9, "get_text_word": 4, "get_lin": 4}, "objects": {"doctr.datasets": [[1, 0, 1, "", "CORD"], [1, 0, 1, "", "CharacterGenerator"], [1, 0, 1, "", "DetectionDataset"], [1, 0, 1, "", "DocArtefacts"], [1, 0, 1, "", "FUNSD"], [1, 0, 1, "", "IC03"], [1, 0, 1, "", "IC13"], [1, 0, 1, "", "IIIT5K"], [1, 0, 1, "", "OCRDataset"], [1, 0, 1, "", "RecognitionDataset"], [1, 0, 1, "", "SROIE"], [1, 0, 1, "", "SVHN"], [1, 0, 1, "", "SVT"], [1, 0, 1, "", "SynthText"], [1, 0, 1, "", "WordGenerator"], [1, 1, 1, "", "encode_sequences"]], "doctr.datasets.loader": [[1, 0, 1, "", "DataLoader"]], "doctr.io": [[4, 0, 1, "", "Artefact"], [4, 0, 1, "", "Block"], [4, 0, 1, "", "Document"], [4, 0, 1, "", "DocumentFile"], [4, 0, 1, "", "Line"], [4, 0, 1, "", "PDF"], [4, 0, 1, "", "Page"], [4, 0, 1, "", "Word"], [4, 1, 1, "", "decode_img_as_tensor"], [4, 1, 1, "", "read_html"], [4, 1, 1, "", "read_img_as_numpy"], [4, 1, 1, "", "read_img_as_tensor"], [4, 1, 1, "", "read_pdf"]], "doctr.io.Document": [[4, 2, 1, "", "show"]], "doctr.io.DocumentFile": [[4, 2, 1, "", "from_images"], [4, 2, 1, "", "from_pdf"], [4, 2, 1, "", "from_url"]], "doctr.io.PDF": [[4, 2, 1, "", "as_images"], [4, 2, 1, "", "get_artefacts"], [4, 2, 1, "", "get_lines"], [4, 2, 1, "", "get_words"]], "doctr.io.Page": [[4, 2, 1, "", "show"]], "doctr.models.classification": [[5, 1, 1, "", "crop_orientation_predictor"], [5, 1, 1, "", "magc_resnet31"], [5, 1, 1, "", "mobilenet_v3_large"], [5, 1, 1, "", "mobilenet_v3_large_r"], [5, 1, 1, "", "mobilenet_v3_small"], [5, 1, 1, "", "mobilenet_v3_small_orientation"], [5, 1, 1, "", "mobilenet_v3_small_r"], [5, 1, 1, "", "resnet18"], [5, 1, 1, "", "resnet31"], [5, 1, 1, "", "vgg16_bn_r"]], "doctr.models.detection": [[5, 1, 1, "", "db_mobilenet_v3_large"], [5, 1, 1, "", "db_resnet50"], [5, 1, 1, "", "detection_predictor"], [5, 1, 1, "", "linknet_resnet18"]], "doctr.models": [[5, 1, 1, "", "ocr_predictor"]], "doctr.models.recognition": [[5, 1, 1, "", "crnn_mobilenet_v3_large"], [5, 1, 1, "", "crnn_mobilenet_v3_small"], [5, 1, 1, "", "crnn_vgg16_bn"], [5, 1, 1, "", "master"], [5, 1, 1, "", "recognition_predictor"], [5, 1, 1, "", "sar_resnet31"]], "doctr.transforms": [[7, 0, 1, "", "ChannelShuffle"], [7, 0, 1, "", "ColorInversion"], [7, 0, 1, "", "Compose"], [7, 0, 1, "", "GaussianBlur"], [7, 0, 1, "", "GaussianNoise"], [7, 0, 1, "", "LambdaTransformation"], [7, 0, 1, "", "Normalize"], [7, 0, 1, "", "OneOf"], [7, 0, 1, "", "RandomApply"], [7, 0, 1, "", "RandomBrightness"], [7, 0, 1, "", "RandomContrast"], [7, 0, 1, "", "RandomCrop"], [7, 0, 1, "", "RandomGamma"], [7, 0, 1, "", "RandomHue"], [7, 0, 1, "", "RandomJpegQuality"], [7, 0, 1, "", "RandomRotate"], [7, 0, 1, "", "RandomSaturation"], [7, 0, 1, "", "Resize"], [7, 0, 1, "", "ToGray"]], "doctr.utils.metrics": [[10, 0, 1, "", "DetectionMetric"], [10, 0, 1, "", "LocalizationConfusion"], [10, 0, 1, "", "OCRMetric"], [10, 0, 1, "", "TextMatch"]], "doctr.utils.metrics.DetectionMetric": [[10, 2, 1, "", "summary"], [10, 2, 1, "", "update"]], "doctr.utils.metrics.LocalizationConfusion": [[10, 2, 1, "", "summary"], [10, 2, 1, "", "update"]], "doctr.utils.metrics.OCRMetric": [[10, 2, 1, "", "summary"], [10, 2, 1, "", "update"]], "doctr.utils.metrics.TextMatch": [[10, 2, 1, "", "summary"], [10, 2, 1, "", "update"]], "doctr.utils.visualization": [[10, 1, 1, "", "synthesize_page"], [10, 1, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:class", "1": "py:function", "2": "py:method"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"], "2": ["py", "method", "Python method"]}, "titleterms": {"changelog": 0, "v0": 0, "7": [], "0": 0, "2024": [], "09": [], "6": [], "2022": [], "29": [], "5": [], "1": 0, "03": 0, "22": 0, "2021": 0, "12": [], "31": [], "4": 0, "11": 0, "10": 0, "01": 0, "3": 0, "08": 0, "27": 0, "07": 0, "02": 0, "2": 0, "05": 0, "28": 0, "18": 0, "contributor": [], "coven": [], "code": [], "conduct": [], "our": [], "pledg": [], "standard": [], "enforc": [], "respons": [], "scope": [], "guidelin": [], "correct": [], "warn": [], "temporari": [], "ban": [], "perman": [], "attribut": [], "contribut": [], "doctr": [1, 2, 4, 5, 6, 7, 10], "codebas": [], "structur": 4, "continu": [], "integr": [], "feedback": [], "featur": 2, "request": [], "bug": [], "report": [], "question": [], "develop": [], "mode": [], "instal": 3, "commit": [], "unit": [], "test": [], "qualiti": [], "style": [], "verif": [], "modifi": [], "document": [2, 4], "let": [], "": [], "connect": [], "prerequisit": 3, "via": 3, "python": 3, "packag": 3, "git": 3, "text": [2, 9], "recognit": [2, 5, 9], "main": 2, "model": [2, 5, 8, 9], "zoo": [2, 5, 9], "detect": [2, 5, 9], "support": [1, 2, 7], "dataset": [1, 2], "arg": [], "synthet": 1, "gener": [], "custom": [], "loader": [], "dataload": [], "vocab": 1, "return": [], "io": 4, "word": 4, "line": 4, "artefact": 4, "block": 4, "page": 4, "file": 4, "read": 4, "classif": 5, "factori": [], "transform": 7, "compos": 7, "util": 10, "visual": 10, "task": 10, "evalu": 10, "notebook": 6, "train": 8, "your": 8, "own": [], "load": 1, "aw": [], "lambda": [], "share": [], "commun": [], "from": [], "huggingfac": [], "hub": [], "push": [], "pretrain": [], "name": [], "convent": [], "choos": 9, "readi": [], "us": 8, "avail": [1, 9], "object": [], "data": 1, "prepar": 8, "infer": 8, "optim": [], "half": 8, "precis": 8, "export": [], "onnx": [], "right": 9, "architectur": 9, "predictor": 9, "end": 9, "ocr": 9, "two": 9, "stage": 9, "approach": 9, "what": 9, "should": 9, "i": 9, "do": 9, "output": 9, "advanc": [], "option": [], "get": [], "start": [], "conda": [], "pre": [], "process": [], "post": 8, "build": [], "implement": [], "content": [], "compress": 8, "savedmodel": 8, "note": [], "refer": [], "backbon": [], "tensorflow": 8, "lite": 8, "quantiz": 8, "public": 1, "privat": 1}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"Changelog": [[0, "changelog"]], "v0.4.1 (2021-11-22)": [[0, "v0-4-1-2021-11-22"]], "v0.4.0 (2021-10-01)": [[0, "v0-4-0-2021-10-01"]], "v0.3.1 (2021-08-27)": [[0, "v0-3-1-2021-08-27"]], "v0.3.0 (2021-07-02)": [[0, "v0-3-0-2021-07-02"]], "v0.2.1 (2021-05-28)": [[0, "v0-2-1-2021-05-28"]], "v0.2.0 (2021-05-11)": [[0, "v0-2-0-2021-05-11"]], "v0.1.1 (2021-03-18)": [[0, "v0-1-1-2021-03-18"]], "v0.1.0 (2021-03-05)": [[0, "v0-1-0-2021-03-05"]], "doctr.datasets": [[1, "doctr-datasets"]], "Available Datasets": [[1, "available-datasets"]], "Public datasets": [[1, "public-datasets"]], "docTR synthetic datasets": [[1, "doctr-synthetic-datasets"]], "docTR private datasets": [[1, "doctr-private-datasets"]], "Data Loading": [[1, "data-loading"]], "Supported Vocabs": [[1, "supported-vocabs"]], "docTR Vocabs": [[1, "id1"]], "docTR: Document Text Recognition": [[2, "doctr-document-text-recognition"]], "Main Features": [[2, "main-features"]], "Model zoo": [[2, "model-zoo"]], "Text detection models": [[2, "text-detection-models"]], "Text recognition models": [[2, "text-recognition-models"]], "Supported datasets": [[2, "supported-datasets"]], "Installation": [[3, "installation"]], "Prerequisites": [[3, "prerequisites"]], "Via Python Package": [[3, "via-python-package"]], "Via Git": [[3, "via-git"]], "doctr.io": [[4, "doctr-io"]], "Document structure": [[4, "document-structure"]], "Word": [[4, "word"]], "Line": [[4, "line"]], "Artefact": [[4, "artefact"]], "Block": [[4, "block"]], "Page": [[4, "page"]], "Document": [[4, "document"]], "File reading": [[4, "file-reading"]], "doctr.models": [[5, "doctr-models"]], "doctr.models.classification": [[5, "doctr-models-classification"]], "doctr.models.detection": [[5, "doctr-models-detection"]], "doctr.models.recognition": [[5, "doctr-models-recognition"]], "doctr.models.zoo": [[5, "doctr-models-zoo"]], "docTR Notebooks": [[6, "doctr-notebooks"]], "doctr.transforms": [[7, "doctr-transforms"]], "Supported transformations": [[7, "supported-transformations"]], "Composing transformations": [[7, "composing-transformations"]], "Preparing your model for inference": [[8, "preparing-your-model-for-inference"]], "Model compression": [[8, "model-compression"]], "TensorFlow Lite": [[8, "tensorflow-lite"]], "Half-precision": [[8, "half-precision"]], "Post-training quantization": [[8, "post-training-quantization"]], "Using SavedModel": [[8, "using-savedmodel"]], "Choosing the right model": [[9, "choosing-the-right-model"]], "Text Detection": [[9, "text-detection"]], "Available architectures": [[9, "available-architectures"], [9, "id1"], [9, "id3"]], "Detection predictors": [[9, "detection-predictors"]], "Text Recognition": [[9, "text-recognition"]], "Text recognition model zoo": [[9, "id5"]], "Recognition predictors": [[9, "recognition-predictors"]], "End-to-End OCR": [[9, "end-to-end-ocr"]], "Two-stage approaches": [[9, "two-stage-approaches"]], "What should I do with the output?": [[9, "what-should-i-do-with-the-output"]], "doctr.utils": [[10, "doctr-utils"]], "Visualization": [[10, "visualization"]], "Task evaluation": [[10, "task-evaluation"]]}, "indexentries": {"cord (class in doctr.datasets)": [[1, "doctr.datasets.CORD"]], "charactergenerator (class in doctr.datasets)": [[1, "doctr.datasets.CharacterGenerator"]], "dataloader (class in doctr.datasets.loader)": [[1, "doctr.datasets.loader.DataLoader"]], "detectiondataset (class in doctr.datasets)": [[1, "doctr.datasets.DetectionDataset"]], "docartefacts (class in doctr.datasets)": [[1, "doctr.datasets.DocArtefacts"]], "funsd (class in doctr.datasets)": [[1, "doctr.datasets.FUNSD"]], "ic03 (class in doctr.datasets)": [[1, "doctr.datasets.IC03"]], "ic13 (class in doctr.datasets)": [[1, "doctr.datasets.IC13"]], "iiit5k (class in doctr.datasets)": [[1, "doctr.datasets.IIIT5K"]], "ocrdataset (class in doctr.datasets)": [[1, "doctr.datasets.OCRDataset"]], "recognitiondataset (class in doctr.datasets)": [[1, "doctr.datasets.RecognitionDataset"]], "sroie (class in doctr.datasets)": [[1, "doctr.datasets.SROIE"]], "svhn (class in doctr.datasets)": [[1, "doctr.datasets.SVHN"]], "svt (class in doctr.datasets)": [[1, "doctr.datasets.SVT"]], "synthtext (class in doctr.datasets)": [[1, "doctr.datasets.SynthText"]], "wordgenerator (class in doctr.datasets)": [[1, "doctr.datasets.WordGenerator"]], "encode_sequences() (in module doctr.datasets)": [[1, "doctr.datasets.encode_sequences"]], "artefact (class in doctr.io)": [[4, "doctr.io.Artefact"]], "block (class in doctr.io)": [[4, "doctr.io.Block"]], "document (class in doctr.io)": [[4, "doctr.io.Document"]], "documentfile (class in doctr.io)": [[4, "doctr.io.DocumentFile"]], "line (class in doctr.io)": [[4, "doctr.io.Line"]], "pdf (class in doctr.io)": [[4, "doctr.io.PDF"]], "page (class in doctr.io)": [[4, "doctr.io.Page"]], "word (class in doctr.io)": [[4, "doctr.io.Word"]], "as_images() (doctr.io.pdf method)": [[4, "doctr.io.PDF.as_images"]], "decode_img_as_tensor() (in module doctr.io)": [[4, "doctr.io.decode_img_as_tensor"]], "from_images() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_images"]], "from_pdf() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_pdf"]], "from_url() (doctr.io.documentfile class method)": [[4, "doctr.io.DocumentFile.from_url"]], "get_artefacts() (doctr.io.pdf method)": [[4, "doctr.io.PDF.get_artefacts"]], "get_lines() (doctr.io.pdf method)": [[4, "doctr.io.PDF.get_lines"]], "get_words() (doctr.io.pdf method)": [[4, "doctr.io.PDF.get_words"]], "read_html() (in module doctr.io)": [[4, "doctr.io.read_html"]], "read_img_as_numpy() (in module doctr.io)": [[4, "doctr.io.read_img_as_numpy"]], "read_img_as_tensor() (in module doctr.io)": [[4, "doctr.io.read_img_as_tensor"]], "read_pdf() (in module doctr.io)": [[4, "doctr.io.read_pdf"]], "show() (doctr.io.document method)": [[4, "doctr.io.Document.show"]], "show() (doctr.io.page method)": [[4, "doctr.io.Page.show"]], "crnn_mobilenet_v3_large() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_mobilenet_v3_large"]], "crnn_mobilenet_v3_small() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_mobilenet_v3_small"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.crnn_vgg16_bn"]], "crop_orientation_predictor() (in module doctr.models.classification)": [[5, "doctr.models.classification.crop_orientation_predictor"]], "db_mobilenet_v3_large() (in module doctr.models.detection)": [[5, "doctr.models.detection.db_mobilenet_v3_large"]], "db_resnet50() (in module doctr.models.detection)": [[5, "doctr.models.detection.db_resnet50"]], "detection_predictor() (in module doctr.models.detection)": [[5, "doctr.models.detection.detection_predictor"]], "linknet_resnet18() (in module doctr.models.detection)": [[5, "doctr.models.detection.linknet_resnet18"]], "magc_resnet31() (in module doctr.models.classification)": [[5, "doctr.models.classification.magc_resnet31"]], "master() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.master"]], "mobilenet_v3_large() (in module doctr.models.classification)": [[5, "doctr.models.classification.mobilenet_v3_large"]], "mobilenet_v3_large_r() (in module doctr.models.classification)": [[5, "doctr.models.classification.mobilenet_v3_large_r"]], "mobilenet_v3_small() (in module doctr.models.classification)": [[5, "doctr.models.classification.mobilenet_v3_small"]], "mobilenet_v3_small_orientation() (in module doctr.models.classification)": [[5, "doctr.models.classification.mobilenet_v3_small_orientation"]], "mobilenet_v3_small_r() (in module doctr.models.classification)": [[5, "doctr.models.classification.mobilenet_v3_small_r"]], "ocr_predictor() (in module doctr.models)": [[5, "doctr.models.ocr_predictor"]], "recognition_predictor() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.recognition_predictor"]], "resnet18() (in module doctr.models.classification)": [[5, "doctr.models.classification.resnet18"]], "resnet31() (in module doctr.models.classification)": [[5, "doctr.models.classification.resnet31"]], "sar_resnet31() (in module doctr.models.recognition)": [[5, "doctr.models.recognition.sar_resnet31"]], "vgg16_bn_r() (in module doctr.models.classification)": [[5, "doctr.models.classification.vgg16_bn_r"]], "channelshuffle (class in doctr.transforms)": [[7, "doctr.transforms.ChannelShuffle"]], "colorinversion (class in doctr.transforms)": [[7, "doctr.transforms.ColorInversion"]], "compose (class in doctr.transforms)": [[7, "doctr.transforms.Compose"]], "gaussianblur (class in doctr.transforms)": [[7, "doctr.transforms.GaussianBlur"]], "gaussiannoise (class in doctr.transforms)": [[7, "doctr.transforms.GaussianNoise"]], "lambdatransformation (class in doctr.transforms)": [[7, "doctr.transforms.LambdaTransformation"]], "normalize (class in doctr.transforms)": [[7, "doctr.transforms.Normalize"]], "oneof (class in doctr.transforms)": [[7, "doctr.transforms.OneOf"]], "randomapply (class in doctr.transforms)": [[7, "doctr.transforms.RandomApply"]], "randombrightness (class in doctr.transforms)": [[7, "doctr.transforms.RandomBrightness"]], "randomcontrast (class in doctr.transforms)": [[7, "doctr.transforms.RandomContrast"]], "randomcrop (class in doctr.transforms)": [[7, "doctr.transforms.RandomCrop"]], "randomgamma (class in doctr.transforms)": [[7, "doctr.transforms.RandomGamma"]], "randomhue (class in doctr.transforms)": [[7, "doctr.transforms.RandomHue"]], "randomjpegquality (class in doctr.transforms)": [[7, "doctr.transforms.RandomJpegQuality"]], "randomrotate (class in doctr.transforms)": [[7, "doctr.transforms.RandomRotate"]], "randomsaturation (class in doctr.transforms)": [[7, "doctr.transforms.RandomSaturation"]], "resize (class in doctr.transforms)": [[7, "doctr.transforms.Resize"]], "togray (class in doctr.transforms)": [[7, "doctr.transforms.ToGray"]], "detectionmetric (class in doctr.utils.metrics)": [[10, "doctr.utils.metrics.DetectionMetric"]], "localizationconfusion (class in doctr.utils.metrics)": [[10, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[10, "doctr.utils.metrics.OCRMetric"]], "textmatch (class in doctr.utils.metrics)": [[10, "doctr.utils.metrics.TextMatch"]], "summary() (doctr.utils.metrics.detectionmetric method)": [[10, "doctr.utils.metrics.DetectionMetric.summary"]], "summary() (doctr.utils.metrics.localizationconfusion method)": [[10, "doctr.utils.metrics.LocalizationConfusion.summary"]], "summary() (doctr.utils.metrics.ocrmetric method)": [[10, "doctr.utils.metrics.OCRMetric.summary"]], "summary() (doctr.utils.metrics.textmatch method)": [[10, "doctr.utils.metrics.TextMatch.summary"]], "synthesize_page() (in module doctr.utils.visualization)": [[10, "doctr.utils.visualization.synthesize_page"]], "update() (doctr.utils.metrics.detectionmetric method)": [[10, "doctr.utils.metrics.DetectionMetric.update"]], "update() (doctr.utils.metrics.localizationconfusion method)": [[10, "doctr.utils.metrics.LocalizationConfusion.update"]], "update() (doctr.utils.metrics.ocrmetric method)": [[10, "doctr.utils.metrics.OCRMetric.update"]], "update() (doctr.utils.metrics.textmatch method)": [[10, "doctr.utils.metrics.TextMatch.update"]], "visualize_page() (in module doctr.utils.visualization)": [[10, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file diff --git a/v0.5.0/using_doctr/using_models.html b/v0.5.0/using_doctr/using_models.html index b2e6a5d739..b471cdc1f1 100644 --- a/v0.5.0/using_doctr/using_models.html +++ b/v0.5.0/using_doctr/using_models.html @@ -836,6 +836,17 @@

Two-stage approachesmodel = ocr_predictor('linknet_resnet18', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True)

+

To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying DocumentBuilder:

+
    +
  • resolve_lines: whether words should be automatically grouped into lines (default: True)

  • +
  • resolve_blocks: whether lines should be automatically grouped into blocks (default: True)

  • +
  • paragraph_break: relative length of the minimum space separating paragraphs (default: 0.035)

  • +
+

For example to disable the automatic grouping of lines into blocks:

+
from doctr.model import ocr_predictor
+model = ocr_predictor(pretrained=True, resolve_blocks=False)
+
+

What should I do with the output?#

@@ -859,6 +870,14 @@

What should I do with the output?) +

To get only the text content of the Document, you can use the render method:

+
text_output = result.render()
+
+
+

For reference, here is the output for the Document above:

+
No. RECEIPT DATE
+
+

You can also export them as a nested dict, more appropriate for JSON format:

json_output = result.export()
 
diff --git a/v0.5.1/searchindex.js b/v0.5.1/searchindex.js index 91458a4784..219d5edfc9 100644 --- a/v0.5.1/searchindex.js +++ b/v0.5.1/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["changelog", "contributing/code_of_conduct", "contributing/contributing", "getting_started/installing", "index", "modules/datasets", "modules/io", "modules/models", "modules/transforms", "modules/utils", "notebooks", "using_doctr/using_model_export", "using_doctr/using_models"], "filenames": ["changelog.rst", "contributing/code_of_conduct.md", "contributing/contributing.md", "getting_started/installing.rst", "index.rst", "modules/datasets.rst", "modules/io.rst", "modules/models.rst", "modules/transforms.rst", "modules/utils.rst", "notebooks.rst", "using_doctr/using_model_export.rst", "using_doctr/using_models.rst"], "titles": ["Changelog", "Contributor Covenant Code of Conduct", "Contributing to docTR", "Installation", "docTR: Document Text Recognition", "doctr.datasets", "doctr.io", "doctr.models", "doctr.transforms", "doctr.utils", "docTR Notebooks", "Preparing your model for inference", "Choosing the right model"], "terms": {"releas": [0, 3], "note": [0, 5], "we": [1, 2, 3, 4, 5, 6, 8, 12], "member": 1, "leader": 1, "make": [1, 2, 9, 11, 12], "particip": 1, "commun": 1, "harass": 1, "free": [1, 2], "experi": 1, "everyon": 1, "regardless": 1, "ag": 1, "bodi": [1, 12], "size": [1, 5, 6, 8, 9, 12], "visibl": 1, "invis": 1, "disabl": 1, "ethnic": 1, "sex": 1, "characterist": 1, "gender": 1, "ident": 1, "express": [1, 8], "level": [1, 5, 9, 12], "educ": 1, "socio": 1, "econom": 1, "statu": 1, "nation": 1, "person": [1, 5], "appear": 1, "race": 1, "religion": 1, "sexual": 1, "orient": [1, 6, 7, 12], "act": 1, "interact": [1, 6, 9], "wai": [1, 4, 5], "contribut": 1, "an": [1, 2, 4, 5, 6, 7, 9, 11, 12], "open": [1, 2], "welcom": 1, "divers": 1, "inclus": 1, "healthi": 1, "exampl": [1, 2, 4, 5, 7], "behavior": 1, "posit": [1, 9], "environ": 1, "includ": [1, 3, 5], "demonstr": 1, "empathi": 1, "kind": [1, 12], "toward": [1, 3], "other": [1, 2], "peopl": 1, "Being": 1, "respect": 1, "differ": 1, "opinion": 1, "viewpoint": 1, "give": 1, "gracefulli": 1, "accept": 1, "construct": 1, "feedback": 1, "apolog": 1, "those": [1, 3, 6, 12], "affect": 1, "mistak": 1, "learn": [1, 4, 7, 12], "from": [1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12], "focus": [1, 5], "what": 1, "i": [1, 2, 5, 6, 7, 8, 9, 11], "best": 1, "just": [1, 11], "u": [1, 2, 12], "individu": 1, "overal": 1, "unaccept": 1, "The": [1, 2, 5, 6, 9, 12], "us": [1, 2, 3, 5, 7, 9, 12], "languag": [1, 4, 5, 6, 12], "imageri": 1, "attent": [1, 7], "advanc": 1, "ani": [1, 5, 6, 7, 8, 9, 11, 12], "troll": 1, "insult": 1, "derogatori": 1, "comment": 1, "polit": 1, "attack": 1, "public": [1, 4], "privat": [1, 12], "publish": 1, "inform": [1, 2, 4, 5, 12], "physic": [1, 6], "email": 1, "address": [1, 6], "without": [1, 7], "explicit": 1, "permiss": 1, "which": [1, 12], "could": 1, "reason": 1, "consid": [1, 2, 5, 6, 9, 12], "inappropri": 1, "profession": 1, "set": [1, 2, 5, 7, 9, 12], "ar": [1, 2, 3, 5, 6, 8, 9, 10, 12], "clarifi": 1, "take": [1, 5, 11, 12], "appropri": [1, 2, 12], "fair": 1, "action": 1, "thei": [1, 9, 12], "deem": 1, "threaten": 1, "offens": 1, "harm": 1, "have": [1, 2, 5, 9, 11, 12], "right": [1, 7, 9], "remov": 1, "edit": 1, "reject": 1, "commit": 1, "wiki": 1, "issu": [1, 2], "align": [1, 6], "thi": [1, 2, 3, 5, 9, 11, 12], "moder": 1, "decis": 1, "when": [1, 2, 7], "appli": [1, 5, 8], "within": 1, "all": [1, 2, 5, 6, 8, 9, 12], "space": 1, "also": [1, 12], "offici": 1, "repres": [1, 9, 12], "e": [1, 2, 3, 6, 7], "mail": 1, "post": [1, 12], "via": 1, "social": 1, "media": 1, "account": [1, 11], "appoint": 1, "onlin": 1, "offlin": 1, "event": 1, "instanc": [1, 12], "abus": 1, "otherwis": [1, 9], "mai": [1, 12], "report": 1, "contact": 1, "minde": [1, 3, 4], "com": [1, 3, 6], "complaint": 1, "review": 1, "investig": 1, "promptli": 1, "fairli": 1, "oblig": 1, "privaci": 1, "secur": 1, "incid": 1, "follow": [1, 2, 3, 5, 8, 9, 11, 12], "impact": 1, "determin": 1, "consequ": 1, "violat": 1, "unprofession": 1, "unwelcom": 1, "A": [1, 2, 4, 5, 6, 7, 10, 11], "written": [1, 6], "provid": [1, 2, 4, 5, 11, 12], "clariti": 1, "around": 1, "natur": [1, 4, 5], "explan": [1, 12], "why": 1, "wa": 1, "apologi": 1, "request": 1, "through": [1, 5, 8], "singl": [1, 2, 4, 5], "seri": 1, "continu": 1, "No": [1, 12], "involv": [1, 12], "unsolicit": 1, "specifi": [1, 5, 6], "period": 1, "time": [1, 4, 5, 7, 9], "avoid": [1, 3], "well": [1, 11], "extern": 1, "channel": [1, 2, 6, 8], "like": 1, "term": 1, "lead": 1, "seriou": 1, "sustain": 1, "sort": 1, "allow": 1, "dure": 1, "pattern": 1, "aggress": 1, "disparag": 1, "class": [1, 5, 6, 8, 9, 12], "adapt": 1, "version": [1, 2, 11, 12], "0": [1, 5, 8, 9, 12], "avail": [1, 4, 8], "http": [1, 3, 6, 7, 12], "www": [1, 6, 12], "org": [1, 7, 12], "_": [1, 5, 7, 11], "html": [1, 2, 12], "were": [1, 6, 12], "inspir": [1, 8], "mozilla": 1, "": [1, 6, 9], "ladder": 1, "For": [1, 2, 3, 12], "answer": 1, "common": [1, 2, 8, 9], "question": 1, "about": [1, 12], "see": [1, 2], "faq": 1, "translat": 1, "everyth": [2, 12], "you": [2, 3, 5, 6, 7, 11, 12], "need": [2, 3, 5, 9], "know": 2, "effici": [2, 4, 5, 7], "project": [2, 5], "packag": [2, 4, 9, 11], "python": 2, "doc": [2, 6, 12], "librari": [2, 3, 10], "build": [2, 3], "script": 2, "refer": [2, 3, 12], "train": [2, 5, 7, 8, 12], "demo": [2, 4], "small": [2, 7], "app": 2, "showcas": 2, "capabl": [2, 10, 12], "api": [2, 4], "minim": [2, 4], "templat": [2, 4], "deploi": 2, "rest": [2, 8, 9], "ensur": 2, "proper": 2, "mainten": 2, "github": [2, 3], "worklow": 2, "run": [2, 3, 7], "job": 2, "coverag": 2, "codecov": 2, "back": 2, "result": [2, 5, 6, 10, 12], "As": 2, "contributor": 2, "onli": [2, 7, 8, 9, 12], "your": [2, 4, 5, 6, 9, 12], "ad": [2, 7, 8], "whether": [2, 5, 6, 8, 9], "encount": 2, "problem": 2, "suggest": 2, "input": [2, 6, 7, 8, 12], "ha": [2, 5, 9], "valu": [2, 6, 8, 12], "can": [2, 3, 5, 11, 12], "purpos": 2, "advis": 2, "first": 2, "check": [2, 12], "topic": 2, "wasn": 2, "t": [2, 5, 12], "alreadi": 2, "cover": 2, "close": 2, "If": [2, 3, 6, 7, 11, 12], "feel": 2, "new": [2, 9], "one": [2, 5, 7, 8, 12], "do": [2, 3, 11], "so": [2, 3, 5], "whenev": 2, "possibl": [2, 9], "enough": [2, 12], "jump": 2, "wonder": 2, "how": [2, 5], "someth": 2, "more": [2, 9, 12], "gener": [2, 5], "should": [2, 5, 6, 8, 9], "out": [2, 7, 8, 9, 12], "discuss": 2, "q": 2, "forum": 2, "specif": [2, 3, 5, 9, 12], "stackoverflow": 2, "addit": [2, 6], "depend": [2, 3, 4], "command": 2, "m": [9, 12], "pip": [2, 3], "upgrad": [], "dev": 2, "pre": 7, "docstr": [], "In": [2, 5], "pleas": 2, "googl": 2, "eas": 2, "process": [2, 4, 6, 12], "later": 2, "messag": 2, "udac": 2, "guid": 2, "order": [5, 6, 8], "same": [2, 5, 6, 9, 12], "ci": 2, "workflow": 2, "unittest": 2, "local": [2, 4, 5, 7, 9, 12], "To": [2, 3, 12], "togeth": [2, 6], "current": 12, "built": [], "sphinx": [], "thank": [], "our": [7, 12], "file": [2, 5], "been": [5, 9, 12], "rebuilt": [], "want": [11, 12], "forc": [], "complet": [], "rebuild": [], "delet": [], "_build": 2, "directori": [], "addition": [2, 11, 12], "clear": [], "web": 6, "browser": [2, 4], "cach": 5, "modif": 2, "now": 2, "locat": [2, 6], "index": [2, 6], "wish": [2, 11], "somewher": 2, "els": 2, "than": [2, 3, 9], "join": 2, "slack": 2, "where": [2, 6, 8, 9, 12], "find": [2, 3], "requir": [3, 8], "3": [3, 4, 6, 7, 8, 9, 11, 12], "8": [7, 8, 12], "higher": [3, 5], "whichev": 3, "o": 3, "least": 3, "tensorflow": [3, 4, 6, 7, 8, 12], "pytorch": [3, 4, 8, 12], "correspond": [3, 6, 12], "page": [3, 5, 7, 9, 12], "2": [3, 4, 6, 8, 11, 12], "macbook": [], "m1": [], "chip": [], "some": [2, 3, 5, 10], "metal": [], "plugin": [], "1": [5, 6, 7, 8, 9, 11, 12], "12": 12, "anoth": [3, 5, 7], "linux": 3, "few": [3, 11], "extra": 3, "maco": 3, "user": [3, 4, 6, 10], "them": [3, 5, 12], "homebrew": 3, "brew": 3, "cairo": 3, "pango": 3, "gdk": 3, "pixbuf": 3, "libffi": 3, "window": [3, 7, 9], "gtk": 3, "latest": [3, 12], "over": [3, 5, 9, 12], "here": [3, 5, 8, 10, 12], "last": [3, 5], "stabl": 3, "doctr": [3, 11, 12], "strive": 3, "reduc": [3, 8], "framework": [3, 5, 12], "minimum": [3, 5, 8, 9], "necessari": 3, "featur": [3, 7, 9, 10], "develop": 3, "third": 3, "parti": 3, "miss": 3, "tf": [3, 6, 7, 8, 11], "torch": [3, 8], "mode": 3, "clone": 3, "state": [4, 9], "art": 4, "optic": [4, 12], "charact": [4, 5, 6, 9, 12], "made": 4, "seamless": 4, "access": [4, 5, 6, 12], "anyon": 4, "power": 4, "easi": [4, 9], "extract": [4, 5], "valuabl": 4, "autom": 4, "seamlessli": 12, "understand": [4, 5, 12], "task": [4, 5, 12], "ocr": [4, 5, 7, 9], "predictor": [4, 6, 7], "pars": [4, 5], "textual": [4, 5, 6, 7, 12], "identifi": 4, "each": [4, 5, 6, 8, 9, 12], "word": [4, 5, 9, 12], "research": 4, "quickli": 4, "compar": 4, "own": [4, 5], "architectur": [4, 7], "speed": [4, 7], "perform": [4, 6, 8, 9, 11, 12], "robust": [4, 5], "stage": 4, "pretrain": [4, 7, 9, 11, 12], "paramet": [4, 5, 6, 7, 8, 9], "friendli": 4, "line": [4, 9, 12], "code": [4, 6], "load": [4, 11], "googlevis": 4, "aw": [4, 12], "textract": [4, 12], "optim": [4, 11], "infer": [4, 7, 8], "both": [4, 5, 8, 12], "cpu": [4, 12], "gpu": 4, "light": 4, "activ": 4, "maintain": 4, "integr": 4, "deploy": 4, "dbnet": [4, 7], "real": [4, 7, 8], "scene": [4, 5, 7], "differenti": [4, 7], "binar": [4, 7], "linknet": [4, 7], "exploit": [4, 7], "encod": [4, 5, 6, 7, 12], "represent": [4, 7], "semant": [4, 7], "segment": [4, 7, 12], "sar": [4, 7], "show": [4, 6, 7, 9], "attend": [4, 7], "read": [2, 4, 5, 7], "simpl": [4, 7], "strong": [4, 7], "baselin": [4, 7, 12], "irregular": [4, 7], "crnn": [4, 7], "end": [4, 5, 7, 9], "trainabl": [4, 7], "neural": [4, 7], "network": [4, 7], "imag": [4, 5, 6, 7, 8, 9, 12], "base": [4, 7], "sequenc": [4, 5, 6, 7, 9, 12], "Its": [4, 7], "applic": [4, 7], "master": [4, 7, 12], "multi": [4, 7], "aspect": [4, 7, 8, 12], "non": [4, 5, 6, 7, 8, 9], "vitstr": [], "vision": 5, "transform": 5, "fast": 5, "parseq": [], "permut": [], "autoregress": [], "funsd": [4, 5, 12], "form": [4, 5, 12], "noisi": [4, 5], "scan": [4, 5], "cord": [4, 5, 12], "consolid": [4, 5], "receipt": [4, 5, 12], "forpost": [4, 5], "sroie": [4, 5], "icdar": [4, 5], "2019": 4, "iiit": [4, 5], "5k": [4, 5], "cvit": 4, "street": [4, 5], "view": [4, 5], "synthtext": [4, 5], "visual": 4, "geometri": [4, 6, 12], "group": 4, "svhn": [4, 5], "digit": [4, 5], "unsupervis": 4, "ic03": [4, 5], "2003": [4, 5], "ic13": [4, 5], "2013": [4, 5], "imgur5k": [4, 5], "textstylebrush": [4, 5], "transfer": [4, 5], "aesthet": [4, 5], "mjsynth": [], "synthet": [], "data": [6, 8, 9], "artifici": [], "iiithw": [], "wildreceipt": [], "spatial": [6, 9], "dual": [], "modal": [], "graph": 6, "kei": [], "bool": [5, 6, 7, 8, 9], "true": [5, 6, 7, 8, 9, 11, 12], "use_polygon": [5, 9], "fals": [5, 6, 7, 8, 9, 11, 12], "recognition_task": [], "kwarg": [5, 6, 7, 9], "sourc": [5, 6, 7, 8, 9], "document": [5, 7, 9, 10, 12], "import": [5, 6, 7, 8, 9, 11, 12], "train_set": 5, "download": 5, "img": [5, 8], "target": [5, 6, 8, 9], "subset": [5, 12], "polygon": [5, 12], "rotat": [5, 6, 7, 8, 9, 12], "bound": [5, 6, 7, 8, 9, 12], "box": [5, 6, 7, 8, 9, 12], "instead": [5, 6, 7], "straight": [5, 7, 12], "ones": [5, 8, 9], "recognit": [5, 9], "keyword": [5, 7], "argument": [5, 12], "visiondataset": 5, "icdar2019": 5, "competit": 5, "iiit5k": 5, "bmvc": 5, "2012": 5, "text": [5, 6, 7, 9], "prior": 5, "svt": 5, "ucsd": 5, "comput": [5, 9, 12], "hous": 5, "number": [5, 8, 9, 12], "localis": 5, "repositori": [2, 5], "websit": 5, "entri": 5, "futur": 5, "direct": 5, "img_fold": 5, "str": [5, 6, 7, 8, 9], "label_fold": 5, "label": [5, 8, 9], "part": [5, 8, 12], "challeng": 5, "task2": 5, "2015": 5, "path": [5, 6, 11], "challenge2_training_task12_imag": 5, "challenge2_training_task1_gt": 5, "test_set": 5, "challenge2_test_task12_imag": 5, "challenge2_test_task1_gt": 5, "folder": [2, 5, 11], "annot": 5, "abstractdataset": 5, "label_path": 5, "handwrit": 5, "dataset_info": 5, "imgur5k_annot": 5, "json": [5, 12], "pure": [], "mnt": [], "ramdisk": [], "max": [8, 9], "90kdict32px": [], "imlist": [], "txt": [], "hw": [], "images_90k_norm": [], "90k": [], "docartefact": 5, "object": [5, 9, 10, 12], "detect": [5, 9, 10], "element": [5, 6, 7, 9, 12], "varieti": 5, "arxiv": 7, "ab": [], "2103": [], "14470v1": [], "test": [], "charactergener": 5, "implement": [5, 6, 8, 9, 11, 12], "d": 5, "abdef": 5, "num_sampl": 5, "100": [5, 8, 9, 11, 12], "vocabulari": 5, "sampl": [5, 12], "iter": [5, 8], "cache_sampl": 5, "firsthand": 5, "font_famili": [5, 9], "font": [5, 9], "img_transform": 5, "compos": [5, 12], "sample_transform": 5, "wordgener": 5, "min_char": 5, "int": [5, 6, 8, 9], "max_char": 5, "list": [5, 6, 8, 9], "none": [5, 6, 8, 9, 12], "callabl": [5, 8], "tupl": [5, 6, 8, 9], "32": [5, 7, 8, 11, 12], "maximum": [5, 8], "detectiondataset": 5, "recognitiondataset": 5, "labels_path": 5, "contain": [5, 12], "ocrdataset": 5, "label_fil": 5, "jpg": [5, 6], "root": [2, 5], "shuffl": [5, 8], "batch_siz": 5, "drop_last": 5, "num_work": 5, "collate_fn": 5, "wrapper": [5, 8], "train_load": 5, "train_it": 5, "next": 5, "befor": [5, 7, 8, 12], "pass": [5, 6, 7, 12], "batch": [5, 7, 8, 12], "drop": 5, "isn": 5, "full": [5, 9, 12], "worker": 5, "function": [5, 8, 9], "merg": 5, "sinc": [5, 12], "content": [5, 6, 9, 12], "properli": 5, "model": [5, 9], "interpret": [5, 6], "multipl": [5, 6, 8], "name": [5, 7, 12], "10": [5, 9, 12], "0123456789": 5, "hindi_digit": [], "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "ascii_lett": 5, "52": [5, 12], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 5, "punctuat": 5, "currenc": 5, "5": [5, 8, 9, 12], "ancient_greek": [], "48": [7, 12], "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": [], "arabic_lett": [], "37": 12, "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": [], "persian_lett": [], "\u067e\u0686\u06a2\u06a4\u06af": [], "arabic_diacrit": [], "arabic_punctu": [], "latin": 5, "94": [5, 12], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 5, "english": 5, "legacy_french": 5, "123": 5, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": 5, "french": [5, 12], "126": 5, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": 5, "portugues": 5, "131": 5, "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": 5, "spanish": 5, "116": 5, "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": 5, "italian": [], "120": [], "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": [], "german": 5, "108": 5, "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": 5, "arab": [], "101": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": [], "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "czech": [], "130": [], "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": [], "polish": [], "118": [], "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": [], "dutch": [], "114": [], "norwegian": [], "106": [], "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": [], "danish": [], "finnish": [], "104": [], "\u00e4\u00f6\u00e4\u00f6": [], "swedish": [], "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": [], "vietnames": [], "234": [], "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": [], "hebrew": [], "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": [], "multilingu": [], "195": [], "encode_sequ": 5, "target_s": 5, "eo": 5, "pad": [5, 7, 8, 12], "dynamic_seq_length": 5, "ndarrai": [5, 6, 8, 9], "given": [5, 6, 8, 9, 12], "map": 5, "n": [5, 9], "length": 5, "Of": 5, "string": [5, 6, 9, 12], "option": 5, "start": 5, "case": [5, 9, 12], "upper": [5, 8], "enabl": [5, 6], "dynam": 5, "tensor": [5, 6, 8, 12], "modul": [6, 8, 9, 12], "easili": [6, 9, 11, 12], "export": [6, 7, 9, 10, 11, 12], "analysi": 6, "format": [5, 6, 9, 11, 12], "organ": 6, "uninterrupt": [6, 12], "confid": [6, 9, 12], "float": [6, 8, 9, 11], "associ": 6, "predict": [6, 7, 9], "xmin": 6, "ymin": 6, "xmax": 6, "ymax": 6, "coordin": [6, 12], "rel": [6, 8, 9], "collect": 6, "meant": [6, 11], "two": 6, "column": 6, "horizont": [6, 8], "resolv": 6, "default": [6, 9, 11], "smallest": 6, "enclos": 6, "g": [6, 7], "qr": 6, "pictur": 6, "chart": 6, "signatur": 6, "logo": 6, "etc": 6, "artefact_typ": 6, "type": [6, 12], "sever": [6, 8, 12], "its": [5, 6, 8, 9, 12], "titl": [6, 12], "underneath": 6, "page_idx": [6, 12], "dimens": [6, 9, 12], "dict": [6, 9, 12], "numpi": [6, 7, 9, 12], "arrai": [6, 8, 9], "uint8": [6, 7, 9, 12], "raw": [6, 9], "pixel": [6, 8, 12], "height": 6, "width": 6, "dictionari": [6, 9], "angl": [6, 8], "degress": 6, "preserve_aspect_ratio": [6, 7, 8, 12], "overlai": 6, "displai": [6, 9], "matplotlib": 9, "pyplot": 9, "method": [8, 12], "high": 6, "convers": 6, "read_pdf": 6, "byte": [6, 12], "scale": [6, 7, 8, 9], "rgb_mode": [], "password": [], "pdf": [6, 7, 10], "convert": [6, 8, 11], "render": 6, "72dpi": 6, "output": [6, 8], "rgb": [6, 8], "bgr": 6, "unlock": [], "encrypt": [], "pypdfium2": 6, "pdfpage": [], "decod": 6, "shape": [6, 7, 8, 9, 11, 12], "h": [6, 7, 8], "x": [6, 8, 9], "w": [6, 7, 8, 9], "c": [6, 9], "read_img_as_numpi": 6, "output_s": [6, 8], "rgb_output": 6, "expect": [2, 6, 8, 9], "read_img_as_tensor": 6, "img_path": 6, "dtype": [6, 7, 8, 9, 11], "float32": [6, 7, 8, 11], "desir": 6, "relat": [2, 6], "divid": 6, "255": [6, 7, 8, 9, 12], "decode_img_as_tensor": 6, "img_cont": 6, "stream": 6, "read_html": 6, "url": 6, "yoursit": 6, "weasyprint": [], "documentfil": 6, "extens": 6, "classmethod": 6, "from_pdf": 6, "binari": [6, 12], "from_url": 6, "from_imag": 6, "page1": 6, "png": 6, "page2": 6, "vgg16_bn_r": 7, "vgg": 7, "16": 7, "describ": [7, 9], "veri": 7, "deep": [7, 12], "convolut": 7, "larg": 7, "modifi": 7, "normal": [7, 8], "rectangular": 7, "pool": 7, "simpler": 7, "head": [7, 12], "input_tensor": 7, "random": [7, 8, 9, 11, 12], "uniform": [7, 8, 11], "512": 7, "maxval": [7, 8, 11], "imagenet": 7, "extractor": 7, "resnet18": 7, "resnet": 7, "18": [7, 12], "residu": 7, "boolean": [7, 12], "resnet34": 7, "34": [7, 12], "resnet50": 7, "50": [7, 12], "resnet31": 7, "downsiz": 7, "4": [7, 8, 9, 12], "mobilenet_v3_smal": 7, "mobilenetv3": 7, "search": 7, "kera": [7, 11], "mobilenet_v3_larg": 7, "mobilenet_v3_small_r": 7, "mobilenet_v3_large_r": 7, "mobilenet_v3_small_orient": 7, "magc_resnet31": 7, "global": 7, "context": 7, "224": [7, 8, 11], "vit_": [], "visiontransform": [], "worth": [], "16x16": [], "patch": [], "unoffici": [], "config": 2, "vit_b": [], "b": 9, "textnet_tini": [], "textnet": [], "faster": [], "arbitrarili": [], "detector": [], "minimalist": [], "kernel": 8, "czczup": [], "tini": [], "textnet_smal": [], "textnet_bas": [], "crop_orientation_predictor": 7, "arch": 7, "croporientationpredictor": 7, "np": [7, 8, 9, 11, 12], "classif_mobilenet_v3_smal": 7, "input_crop": 7, "rand": [7, 8, 9, 11, 12], "600": [7, 9, 12], "800": [7, 9, 12], "astyp": [7, 9, 11, 12], "crop": [7, 8, 12], "dataset": [7, 12], "linknet_resnet18": [7, 12], "1024": [7, 9, 11, 12], "linknet_resnet34": 7, "linknet_resnet50": 7, "db_resnet50": [7, 11, 12], "backbon": 7, "db_mobilenet_v3_larg": [7, 12], "mobilenet": 7, "v3": 7, "detection_predictor": [7, 12], "assume_straight_pag": [7, 12], "detectionpredictor": 7, "input_pag": [7, 9, 12], "itself": [], "fit": [7, 12], "crnn_vgg16_bn": [7, 12], "128": [7, 12], "crnn_mobilenet_v3_smal": [7, 12], "crnn_mobilenet_v3_larg": [7, 12], "sar_resnet31": [7, 12], "31": [7, 12], "64": [7, 8, 12], "256": 7, "paper": 7, "1910": 7, "02562": 7, "keywoard": [], "vitstr_smal": [], "vitstr_bas": [], "recognition_predictor": [7, 12], "recognitionpredictor": 7, "ocr_predictor": [7, 12], "det_arch": 7, "reco_arch": 7, "pretrained_backbon": [], "symmetric_pad": [7, 8, 12], "export_as_straight_box": [7, 12], "detect_orient": [], "straighten_pag": [], "detect_languag": [], "ocrpredictor": 7, "up": [7, 12], "assum": 7, "preserv": [7, 8, 12], "ratio": [7, 8, 12], "symmetr": [7, 8, 12], "bottom": [7, 12], "final": [7, 11], "potenti": 7, "estim": [], "slightli": [], "deterior": [], "latenc": [], "median": [], "Then": [], "again": [], "improv": [], "kie_predictor": [], "kiepredictor": [], "kie": [], "login_to_hub": [], "login": [], "huggingfac": [], "hub": [], "from_hub": [], "repo_id": [], "instanti": 12, "hf": [], "fasterrcnn_mobilenet_v3_large_fpn": [], "repo": [], "hf_hub_download": [], "snapshot_download": [], "checkpoint": [], "push_to_hf_hub": [], "model_nam": [], "save": [5, 11], "configur": 2, "my": [], "procedur": 8, "draw": [8, 9], "design": 8, "torchvis": 8, "resiz": [8, 12], "bilinear": 8, "transfo": 8, "minval": 8, "interpol": 8, "zero": [8, 9], "while": [8, 12], "done": 8, "mean": [8, 9], "std": 8, "gaussian": 8, "distribut": 8, "485": 8, "456": 8, "406": 8, "229": 8, "225": 8, "averag": [8, 12], "per": [8, 12], "standard": 8, "deviat": 8, "lambdatransform": 8, "fn": 8, "lambda": 8, "tograi": 8, "num_output_channel": 8, "grayscal": 8, "colorinvers": 8, "min_val": 8, "tranform": 8, "color": [8, 9], "shift": 8, "randomli": 8, "invert": 8, "6": [3, 8, 12], "rang": [8, 11], "randombright": 8, "max_delta": 8, "adjust": [2, 8], "bright": 8, "delta": 8, "offset": 8, "add": [8, 9], "pick": 8, "p": [8, 9, 12], "probabl": 8, "randomcontrast": 8, "contrast": 8, "contrast_factor": 8, "factor": 8, "randomsatur": 8, "satur": 8, "hsv": 8, "increas": 8, "randomhu": 8, "hue": 8, "randomgamma": 8, "min_gamma": 8, "max_gamma": 8, "min_gain": 8, "max_gain": 8, "gamma": 8, "correct": 8, "neg": 8, "lower": [8, 9], "param": [8, 12], "constant": 8, "multipli": 8, "randomjpegqu": 8, "min_qual": 8, "60": 8, "max_qual": 8, "jpeg": 8, "qualiti": 8, "dimension": 8, "between": [8, 9], "randomrot": 8, "max_angl": 8, "expand": 8, "degre": 8, "uniformli": 8, "randomcrop": 8, "08": [8, 12], "75": [8, 12], "33": 8, "min_area": 8, "max_area": 8, "min_ratio": 8, "max_ratio": 8, "gaussianblur": 8, "kernel_shap": 8, "blur": 8, "min": 8, "channelshuffl": 8, "gaussiannois": 8, "nois": 8, "randomhorizontalflip": 8, "flip": 8, "int64": [8, 9], "randomshadow": 8, "opacity_rang": 8, "shade": 8, "opac": 8, "It": 8, "consecut": [8, 12], "sequenti": [8, 11], "oneof": 8, "jpegqual": 8, "randomappli": 8, "regroup": 9, "core": 9, "complementari": 9, "sens": 9, "visualize_pag": 9, "words_onli": 9, "display_artefact": 9, "add_label": 9, "figur": 9, "block": [9, 12], "plt": 9, "ocr_db_crnn": 9, "artefact": [9, 10, 12], "figsiz": 9, "largest": 9, "side": 9, "plot": 9, "static": 9, "top": [9, 12], "synthesize_pag": 9, "draw_proba": 9, "respons": 9, "blank": 9, "blue": 9, "red": 9, "font_siz": 9, "13": [9, 12], "famili": 9, "synthes": 9, "metric": [9, 12], "assess": 9, "textmatch": 9, "match": [9, 12], "accuraci": 9, "aggreg": [5, 9], "foral": 9, "y": 9, "mathcal": 9, "frac": 9, "sum": 9, "limits_": 9, "f_": 9, "y_i": 9, "x_i": 9, "indic": 9, "defin": 9, "f_a": 9, "left": [9, 12], "begin": 9, "ll": 9, "mbox": 9, "strictli": 9, "integ": [9, 11], "updat": 9, "hello": [9, 12], "world": [9, 12], "summari": 9, "gt": 9, "pred": 9, "groung": 9, "truth": 9, "exact": [9, 12], "score": 9, "counterpart": 9, "unidecod": 9, "localizationconfus": 9, "iou_thresh": 9, "mask_shap": 9, "use_broadcast": 9, "confus": 9, "iou": 9, "recal": [9, 12], "g_": 9, "precis": [9, 12], "meaniou": 9, "j": 9, "y_j": 9, "being": [9, 12], "intersect": 9, "union": 9, "g_x": 9, "assign": 9, "_i": 9, "geq": 9, "ground": 9, "asarrai": 9, "70": [9, 12], "110": 9, "95": [9, 12], "200": 9, "150": [9, 12], "pair": 9, "broadcast": 9, "consum": 9, "memori": 9, "either": [9, 12], "ocrmetr": 9, "l": 9, "hat": 9, "h_": 9, "b_j": 9, "l_j": 9, "gt_box": 9, "pred_box": 9, "gt_label": 9, "pred_label": 9, "comparison": [9, 12], "detectionmetr": 9, "c_j": 9, "compil": [10, 12], "better": [10, 12], "leverag": 10, "descript": 10, "colab": 10, "quicktour": 10, "present": 10, "main": 10, "produc": [10, 12], "searchabl": 10, "don": 12, "meet": [], "detail": 12, "link": [], "section": [11, 12], "det_model": [], "load_weight": [], "path_to_checkpoint": [], "weight": [], "reco_model": [], "det_param": [], "path_to_pt": [], "map_loc": [], "load_state_dict": [], "reco_param": [], "vocab": 12, "class_nam": [], "total": [], "date": 12, "preprocessor": 12, "det_predictor": [], "798": [], "785": [], "772": [], "264": [], "2749": [], "287": [], "reco_predictor": [], "694": [], "695": [], "693": [], "299": [], "296": [], "301": [], "polici": [], "restrict": [], "write": [], "outsid": [], "tmp": [], "work": 12, "step": [], "usag": [], "multiprocess": [], "doctr_multiprocessing_dis": [], "variabl": [], "becaus": [], "shm": [], "share": [5, 12], "chang": [], "By": [], "doctr_cache_dir": [], "focu": [], "love": [], "appreci": [], "interfac": [], "io": [], "custom": [], "felix92": [], "db": [], "vgg16": [], "bn": [], "plug": [], "obj_detect": [], "exist": [], "overwritten": [], "prerequisit": [], "creat": [], "co": [], "instal": [], "git": [], "lf": [], "my_awesome_model": [], "v1": [], "directli": 12, "after": [2, 12], "python3": [], "train_tensorflow": [], "py": 2, "train_pytorch": [], "tabl": [], "pull": [], "dummi": [], "tilman": [], "rassi": [], "fascan": [], "evalu": [5, 12], "predefin": 5, "prefer": 5, "signific": 5, "valid": [], "149": [], "626": [], "360": [], "2000": [], "3000": [], "249": [], "33402": [], "13068": [], "772875": [], "85875": [], "246": [], "233": [], "resourc": 11, "7149": [], "796": [], "handwritten": [], "1268": [], "472": [], "21888": [], "8707": [], "33608": [], "19342": [], "uppercas": [], "19370": [], "2186": [], "257": [], "647": [], "73257": [], "26032": [], "7100000": [], "707470": [], "1156": [], "1107": [], "849": [], "1095": [], "207901": [], "22672": [], "7581382": [], "1337891": [], "7141797": [], "793533": [], "49377": [], "19598": [], "alwai": [], "regular": [], "2700": [], "300": [], "background": [], "qr_code": [], "bar_cod": [], "photo": [], "classif": [], "mani": [5, 12], "sensit": [5, 12], "abl": [5, 12], "howev": 5, "guidanc": 5, "tool": 5, "further": [], "anot": [], "handl": 5, "underli": 5, "defer": 5, "dataload": 5, "good": 11, "achiev": 11, "might": [11, 12], "tune": 11, "thing": [11, 12], "product": 11, "readi": 11, "help": 11, "support": 12, "devic": [], "fp16": [], "point": [], "occupi": [], "bit": [], "advantag": [], "less": [], "mixed_precis": [], "set_global_polici": [], "mixed_float16": [], "cuda": [], "re": [], "exchang": [], "interoper": [], "machin": [], "structur": 12, "layer": [], "metadata": [], "util": 11, "export_model_to_onnx": [], "input_shap": 11, "dummy_input": [], "tensorspec": [], "model_path": [], "come": [], "soon": [], "seen": 12, "onc": 12, "separ": 12, "compon": 12, "charg": 12, "usabl": 12, "backend": 12, "along": 12, "processor": 12, "reusabl": 12, "consist": 12, "delimit": 12, "2d": 12, "corner": 12, "flag": 12, "belong": 12, "skew": 12, "comprehens": 12, "benchmark": 12, "publicli": 12, "sec": [], "25": 12, "84": 12, "39": 12, "85": 12, "86": 12, "93": 12, "83": 12, "24": [], "80": 12, "29": 12, "90": 12, "67": 12, "76": 12, "11": 12, "81": 12, "71": 12, "7": 12, "21": 12, "82": 12, "20": 12, "49": 12, "87": 12, "63": 12, "17": [], "28": [], "51": 12, "46": 12, "db_resnet34": [], "22": [], "89": 12, "74": 12, "56": 12, "68": 12, "92": 12, "61": 12, "41": 12, "00": 12, "79": 12, "38": 12, "88": [], "62": 12, "26": [], "06": 12, "78": 12, "47": 12, "54": [], "abov": 12, "cf": 12, "disclaim": 12, "combin": 12, "199": 12, "second": 12, "warmup": 12, "phase": 12, "measur": 12, "1000": 12, "obtain": 12, "11th": [], "gen": [], "intel": [], "r": [], "tm": [], "i7": [], "11800h": [], "30ghz": [], "wrap": 12, "useabl": 12, "favorit": 12, "dummy_img": 12, "area": 12, "send": 12, "snippet": 12, "transcrib": 12, "partial": [], "15": 12, "9": [], "73": 12, "44": [], "14": 12, "55": [], "58": [], "57": 12, "66": 12, "01": 12, "98": 12, "23": [], "69": 12, "99": 12, "91": 12, "05": [], "09": [], "96": 12, "40": [], "53": 12, "most": 12, "print": 12, "cfg": 12, "30595": 12, "45": 12, "72": 12, "43": 12, "65": 12, "77": 12, "30": 12, "07": [], "27": 12, "gvision": 12, "59": 12, "03": 12, "azur": [], "recogn": [], "42": 12, "go": 12, "mention": 12, "still": 12, "return": [5, 6, 7, 9, 12], "nest": 12, "get": 12, "typic": 12, "layout": 12, "340": 12, "json_output": 12, "1357421875": 12, "0361328125": 12, "8564453125": 12, "8603515625": 12, "914085328578949": 12, "5478515625": 12, "06640625": 12, "5810546875": 12, "0966796875": 12, "9949972033500671": 12, "51171875": 12, "1630859375": 12, "9578408598899841": 12, "1396484375": 12, "3232421875": 12, "185546875": 12, "3515625": 12, "outpout": 12, "xml": 12, "hocr": 12, "export_as_xml": 12, "xml_output": 12, "xml_bytes_str": 12, "xml_element": 12, "utf": 12, "xmln": 12, "w3": 12, "1999": 12, "xhtml": 12, "lang": 12, "en": 12, "meta": 12, "equiv": 12, "charset": 12, "system": 12, "ocr_pag": 12, "ocr_carea": 12, "ocr_par": 12, "ocr_lin": 12, "ocrx_word": 12, "div": 12, "id": 12, "page_1": 12, "bbox": 12, "3456": 12, "ppageno": 12, "block_1_1": 12, "857": 12, "529": 12, "2504": 12, "2710": 12, "par_1_1": 12, "span": 12, "line_1_1": 12, "x_size": 12, "x_descend": 12, "x_ascend": 12, "word_1_1": 12, "1552": 12, "540": 12, "1778": 12, "580": 12, "x_wconf": 12, "word_1_2": 12, "1782": 12, "1900": 12, "583": 12, "word_1_3": 12, "1420": 12, "597": 12, "1684": 12, "641": 12, "threshold": [], "region": [], "accur": [], "postprocessor": [], "bin_thresh": [], "box_thresh": [], "hook": [], "manipul": [], "customhook": [], "def": 11, "__call__": [], "self": [], "loc_pr": [], "Be": [], "awar": [], "my_hook": [], "middl": [], "pipelin": [], "add_hook": [], "execut": [], "file_path": [], "read_img": 6, "seemlessli": 4, "conda": [], "newer": [], "developp": 3, "fp": 12, "scheme": [], "deform": [], "statist": [], "turn": [], "easier": [], "let": [], "db_resnet50_predictor": [], "sar_vgg16_bn": [], "rnn": [], "enhanc": [], "symbol": [], "crnn_vgg16_bn_predictor": [], "sar_vgg16_bn_predictor": [], "16bn": [], "convert_to_tflit": [], "tf_model": 11, "tflite": 11, "conv_sequ": 11, "relu": 11, "kernel_s": 11, "serialized_model": 11, "convert_to_fp16": [], "half": [], "serial": 11, "quantize_model": [], "quantiz": [], "exclud": [], "inherit": 11, "abstract": [], "verifi": 2, "file_nam": [], "file_hash": [], "extract_arch": [], "overwrit": [], "sha256": [], "archiv": [], "disk": [], "775": [], "856": [], "860": [], "862": [], "863": [], "sar_resnet31_predictor": [], "ocr_db_crnn_vgg": [], "652": [], "721": [], "ocr_db_sar_vgg": [], "653": [], "ocr_db_sar_resnet": [], "665": [], "735": [], "595": [], "625": [], "781": [], "830": [], "exactmatch": [], "ignore_cas": [], "ignore_acc": [], "ignor": [], "letter": [], "accent": [], "error": [], "max_dist": [], "levenshtein": [], "distanc": [], "autoclass": [], "loader": 5, "154": [], "as_imag": [], "convert_page_to_numpi": [], "get_word": [], "fitz": [], "gettextword": [], "get_artefact": [], "entir": [], "fulli": [], "daili": [], "mix": [], "fine": 12, "scratch": [], "special": [], "recurr": [], "733": [], "817": [], "745": [], "875": [], "frame": 12, "feed": [], "warm": [], "c5": 12, "x12larg": 12, "xeon": 12, "platinum": 12, "8275l": 12, "913": [], "917": [], "921": [], "crnn_resnet31": [], "629": [], "701": [], "664": [], "780": [], "630": [], "702": [], "666": [], "783": [], "640": [], "713": [], "672": [], "789": [], "na": [], "753": [], "700": [], "533": [], "689": [], "611": [], "660": [], "db_sar_vgg": [], "db_sar_resnet": [], "db_crnn_vgg": [], "db_crnn_resnet": [], "properti": 11, "input_t": 11, "saved_model": 11, "And": 11, "nestedobject": [], "changelog": [], "v0": [], "2021": [], "8m": 12, "02": 12, "5m": 12, "1m": 12, "19": [], "invoic": 12, "flexibl": [], "rotated_bbox": [], "beta": [], "linknet16": [], "160": 7, "arg": [5, 7], "bash": [], "tax": 12, "35": 12, "vgg16_bn": [], "mobilenetv3_larg": [], "mobilenetv3_smal": [], "constraint": 11, "tfliteconvert": 11, "from_keras_model": 11, "target_spec": 11, "supported_typ": 11, "float16": 11, "fallback": 11, "oper": [2, 11], "representative_dataset": 11, "yield": 11, "supported_op": 11, "opsset": 11, "tflite_builtins_int8": 11, "inference_input_typ": 11, "int8": 11, "inference_output_typ": 11, "2m": 12, "7m": 12, "look": 12, "variou": 12, "below": 12, "unfortun": 12, "moment": 12, "04": 12, "36": 12, "97": 12, "resum": 12, "road": 12, "get_text_word": [], "get_lin": [], "style": 2, "incom": 2, "pr": 2, "compli": 2, "flake8": 2, "convent": 2, "isort": 2, "reorder": 2, "catch": 2, "cleaner": 2, "mypi": 2, "ini": 2, "keep": 2, "sane": 2, "pydocstyl": 2, "_helper": 6, "pdf_render": 6, "render_pdf_topil": 6, "linknet_resnet18_rot": 12, "db_resnet50_rot": 12, "nb": 12}, "objects": {"doctr.datasets": [[5, 0, 1, "", "CORD"], [5, 0, 1, "", "CharacterGenerator"], [5, 0, 1, "", "DetectionDataset"], [5, 0, 1, "", "DocArtefacts"], [5, 0, 1, "", "FUNSD"], [5, 0, 1, "", "IC03"], [5, 0, 1, "", "IC13"], [5, 0, 1, "", "IIIT5K"], [5, 0, 1, "", "IMGUR5K"], [5, 0, 1, "", "OCRDataset"], [5, 0, 1, "", "RecognitionDataset"], [5, 0, 1, "", "SROIE"], [5, 0, 1, "", "SVHN"], [5, 0, 1, "", "SVT"], [5, 0, 1, "", "SynthText"], [5, 0, 1, "", "WordGenerator"], [5, 1, 1, "", "encode_sequences"]], "doctr.datasets.loader": [[5, 0, 1, "", "DataLoader"]], "doctr.io": [[6, 0, 1, "", "Artefact"], [6, 0, 1, "", "Block"], [6, 0, 1, "", "Document"], [6, 0, 1, "", "DocumentFile"], [6, 0, 1, "", "Line"], [6, 0, 1, "", "Page"], [6, 0, 1, "", "Word"], [6, 1, 1, "", "decode_img_as_tensor"], [6, 1, 1, "", "read_html"], [6, 1, 1, "", "read_img_as_numpy"], [6, 1, 1, "", "read_img_as_tensor"], [6, 1, 1, "", "read_pdf"]], "doctr.io.Document": [[6, 2, 1, "", "show"]], "doctr.io.DocumentFile": [[6, 2, 1, "", "from_images"], [6, 2, 1, "", "from_pdf"], [6, 2, 1, "", "from_url"]], "doctr.io.Page": [[6, 2, 1, "", "show"]], "doctr.models.classification": [[7, 1, 1, "", "crop_orientation_predictor"], [7, 1, 1, "", "magc_resnet31"], [7, 1, 1, "", "mobilenet_v3_large"], [7, 1, 1, "", "mobilenet_v3_large_r"], [7, 1, 1, "", "mobilenet_v3_small"], [7, 1, 1, "", "mobilenet_v3_small_orientation"], [7, 1, 1, "", "mobilenet_v3_small_r"], [7, 1, 1, "", "resnet18"], [7, 1, 1, "", "resnet31"], [7, 1, 1, "", "resnet34"], [7, 1, 1, "", "resnet50"], [7, 1, 1, "", "vgg16_bn_r"]], "doctr.models.detection": [[7, 1, 1, "", "db_mobilenet_v3_large"], [7, 1, 1, "", "db_resnet50"], [7, 1, 1, "", "detection_predictor"], [7, 1, 1, "", "linknet_resnet18"], [7, 1, 1, "", "linknet_resnet34"], [7, 1, 1, "", "linknet_resnet50"]], "doctr.models": [[7, 1, 1, "", "ocr_predictor"]], "doctr.models.recognition": [[7, 1, 1, "", "crnn_mobilenet_v3_large"], [7, 1, 1, "", "crnn_mobilenet_v3_small"], [7, 1, 1, "", "crnn_vgg16_bn"], [7, 1, 1, "", "master"], [7, 1, 1, "", "recognition_predictor"], [7, 1, 1, "", "sar_resnet31"]], "doctr.transforms": [[8, 0, 1, "", "ChannelShuffle"], [8, 0, 1, "", "ColorInversion"], [8, 0, 1, "", "Compose"], [8, 0, 1, "", "GaussianBlur"], [8, 0, 1, "", "GaussianNoise"], [8, 0, 1, "", "LambdaTransformation"], [8, 0, 1, "", "Normalize"], [8, 0, 1, "", "OneOf"], [8, 0, 1, "", "RandomApply"], [8, 0, 1, "", "RandomBrightness"], [8, 0, 1, "", "RandomContrast"], [8, 0, 1, "", "RandomCrop"], [8, 0, 1, "", "RandomGamma"], [8, 0, 1, "", "RandomHorizontalFlip"], [8, 0, 1, "", "RandomHue"], [8, 0, 1, "", "RandomJpegQuality"], [8, 0, 1, "", "RandomRotate"], [8, 0, 1, "", "RandomSaturation"], [8, 0, 1, "", "RandomShadow"], [8, 0, 1, "", "Resize"], [8, 0, 1, "", "ToGray"]], "doctr.utils.metrics": [[9, 0, 1, "", "DetectionMetric"], [9, 0, 1, "", "LocalizationConfusion"], [9, 0, 1, "", "OCRMetric"], [9, 0, 1, "", "TextMatch"]], "doctr.utils.metrics.DetectionMetric": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.LocalizationConfusion": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.OCRMetric": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.TextMatch": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.visualization": [[9, 1, 1, "", "synthesize_page"], [9, 1, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:class", "1": "py:function", "2": "py:method"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"], "2": ["py", "method", "Python method"]}, "titleterms": {"changelog": 0, "v0": 0, "7": [], "0": 0, "2024": [], "09": [], "6": [], "2022": [], "29": [], "5": 0, "1": [0, 1], "03": 0, "22": 0, "2021": 0, "12": 0, "31": 0, "4": [0, 1], "11": 0, "10": 0, "01": 0, "3": [0, 1], "08": 0, "27": 0, "07": 0, "02": 0, "2": [0, 1], "05": 0, "28": 0, "18": 0, "contributor": 1, "coven": 1, "code": [1, 2], "conduct": 1, "our": 1, "pledg": 1, "standard": 1, "enforc": 1, "respons": 1, "scope": 1, "guidelin": 1, "correct": 1, "warn": 1, "temporari": 1, "ban": 1, "perman": 1, "attribut": 1, "contribut": 2, "doctr": [2, 4, 5, 6, 7, 8, 9, 10], "codebas": 2, "structur": [2, 6], "continu": 2, "integr": 2, "feedback": 2, "featur": [2, 4], "request": 2, "bug": 2, "report": 2, "question": 2, "develop": 2, "mode": 2, "instal": [2, 3], "commit": 2, "unit": 2, "test": 2, "qualiti": 2, "style": [], "verif": 2, "modifi": 2, "document": [2, 4, 6], "let": 2, "": 2, "connect": 2, "prerequisit": 3, "via": 3, "python": 3, "packag": 3, "git": 3, "text": [4, 12], "recognit": [4, 7, 12], "main": 4, "model": [4, 7, 11, 12], "zoo": [4, 7, 12], "detect": [4, 7, 12], "support": [4, 5, 8], "dataset": [4, 5], "arg": [], "synthet": 5, "gener": [], "custom": [], "loader": [], "dataload": [], "vocab": 5, "return": [], "io": 6, "word": 6, "line": 6, "artefact": 6, "block": 6, "page": 6, "file": 6, "read": 6, "classif": 7, "factori": [], "transform": 8, "compos": 8, "util": 9, "visual": 9, "task": 9, "evalu": 9, "notebook": 10, "train": 11, "your": 11, "own": [], "load": 5, "aw": [], "lambda": [], "share": [], "commun": [], "from": [], "huggingfac": [], "hub": [], "push": [], "pretrain": [], "name": [], "convent": [], "choos": 12, "readi": [], "us": 11, "avail": [5, 12], "object": [], "data": 5, "prepar": 11, "infer": 11, "optim": [], "half": 11, "precis": 11, "export": [], "onnx": [], "right": 12, "architectur": 12, "predictor": 12, "end": 12, "ocr": 12, "two": 12, "stage": 12, "approach": 12, "what": 12, "should": 12, "i": 12, "do": 12, "output": 12, "advanc": [], "option": [], "get": [], "start": [], "conda": [], "pre": [], "process": [], "post": 11, "build": [], "implement": [], "content": [], "compress": 11, "savedmodel": 11, "note": [], "refer": [], "backbon": [], "tensorflow": 11, "lite": 11, "quantiz": 11, "public": 5, "privat": 5, "lint": 2, "import": 2, "order": 2, "annot": 2, "type": 2, "docstr": 2, "format": 2}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"Changelog": [[0, "changelog"]], "v0.5.0 (2021-12-31)": [[0, "v0-5-0-2021-12-31"]], "v0.4.1 (2021-11-22)": [[0, "v0-4-1-2021-11-22"]], "v0.4.0 (2021-10-01)": [[0, "v0-4-0-2021-10-01"]], "v0.3.1 (2021-08-27)": [[0, "v0-3-1-2021-08-27"]], "v0.3.0 (2021-07-02)": [[0, "v0-3-0-2021-07-02"]], "v0.2.1 (2021-05-28)": [[0, "v0-2-1-2021-05-28"]], "v0.2.0 (2021-05-11)": [[0, "v0-2-0-2021-05-11"]], "v0.1.1 (2021-03-18)": [[0, "v0-1-1-2021-03-18"]], "v0.1.0 (2021-03-05)": [[0, "v0-1-0-2021-03-05"]], "Contributor Covenant Code of Conduct": [[1, "contributor-covenant-code-of-conduct"]], "Our Pledge": [[1, "our-pledge"]], "Our Standards": [[1, "our-standards"]], "Enforcement Responsibilities": [[1, "enforcement-responsibilities"]], "Scope": [[1, "scope"]], "Enforcement": [[1, "enforcement"]], "Enforcement Guidelines": [[1, "enforcement-guidelines"]], "1. Correction": [[1, "correction"]], "2. Warning": [[1, "warning"]], "3. Temporary Ban": [[1, "temporary-ban"]], "4. Permanent Ban": [[1, "permanent-ban"]], "Attribution": [[1, "attribution"]], "Contributing to docTR": [[2, "contributing-to-doctr"]], "Codebase structure": [[2, "codebase-structure"]], "Continuous Integration": [[2, "continuous-integration"]], "Feedback": [[2, "feedback"]], "Feature requests & bug report": [[2, "feature-requests-bug-report"]], "Questions": [[2, "questions"]], "Developing docTR": [[2, "developing-doctr"]], "Developer mode installation": [[2, "developer-mode-installation"]], "Commits": [[2, "commits"]], "Unit tests": [[2, "unit-tests"]], "Code quality": [[2, "code-quality"]], "Lint verification": [[2, "lint-verification"]], "Import order": [[2, "import-order"]], "Annotation typing": [[2, "annotation-typing"]], "Docstring format": [[2, "docstring-format"]], "Modifying the documentation": [[2, "modifying-the-documentation"]], "Let\u2019s connect": [[2, "let-s-connect"]], "Installation": [[3, "installation"]], "Prerequisites": [[3, "prerequisites"]], "Via Python Package": [[3, "via-python-package"]], "Via Git": [[3, "via-git"]], "docTR: Document Text Recognition": [[4, "doctr-document-text-recognition"]], "Main Features": [[4, "main-features"]], "Model zoo": [[4, "model-zoo"]], "Text detection models": [[4, "text-detection-models"]], "Text recognition models": [[4, "text-recognition-models"]], "Supported datasets": [[4, "supported-datasets"]], "doctr.datasets": [[5, "doctr-datasets"]], "Available Datasets": [[5, "available-datasets"]], "Public datasets": [[5, "public-datasets"]], "docTR synthetic datasets": [[5, "doctr-synthetic-datasets"]], "docTR private datasets": [[5, "doctr-private-datasets"]], "Data Loading": [[5, "data-loading"]], "Supported Vocabs": [[5, "supported-vocabs"]], "docTR Vocabs": [[5, "id2"]], "doctr.io": [[6, "doctr-io"]], "Document structure": [[6, "document-structure"]], "Word": [[6, "word"]], "Line": [[6, "line"]], "Artefact": [[6, "artefact"]], "Block": [[6, "block"]], "Page": [[6, "page"]], "Document": [[6, "document"]], "File reading": [[6, "file-reading"]], "doctr.models": [[7, "doctr-models"]], "doctr.models.classification": [[7, "doctr-models-classification"]], "doctr.models.detection": [[7, "doctr-models-detection"]], "doctr.models.recognition": [[7, "doctr-models-recognition"]], "doctr.models.zoo": [[7, "doctr-models-zoo"]], "doctr.transforms": [[8, "doctr-transforms"]], "Supported transformations": [[8, "supported-transformations"]], "Composing transformations": [[8, "composing-transformations"]], "doctr.utils": [[9, "doctr-utils"]], "Visualization": [[9, "visualization"]], "Task evaluation": [[9, "task-evaluation"]], "docTR Notebooks": [[10, "doctr-notebooks"]], "Preparing your model for inference": [[11, "preparing-your-model-for-inference"]], "Model compression": [[11, "model-compression"]], "TensorFlow Lite": [[11, "tensorflow-lite"]], "Half-precision": [[11, "half-precision"]], "Post-training quantization": [[11, "post-training-quantization"]], "Using SavedModel": [[11, "using-savedmodel"]], "Choosing the right model": [[12, "choosing-the-right-model"]], "Text Detection": [[12, "text-detection"]], "Available architectures": [[12, "available-architectures"], [12, "id1"], [12, "id3"]], "Detection predictors": [[12, "detection-predictors"]], "Text Recognition": [[12, "text-recognition"]], "Text recognition model zoo": [[12, "id5"]], "Recognition predictors": [[12, "recognition-predictors"]], "End-to-End OCR": [[12, "end-to-end-ocr"]], "Two-stage approaches": [[12, "two-stage-approaches"]], "What should I do with the output?": [[12, "what-should-i-do-with-the-output"]]}, "indexentries": {"cord (class in doctr.datasets)": [[5, "doctr.datasets.CORD"]], "charactergenerator (class in doctr.datasets)": [[5, "doctr.datasets.CharacterGenerator"]], "dataloader (class in doctr.datasets.loader)": [[5, "doctr.datasets.loader.DataLoader"]], "detectiondataset (class in doctr.datasets)": [[5, "doctr.datasets.DetectionDataset"]], "docartefacts (class in doctr.datasets)": [[5, "doctr.datasets.DocArtefacts"]], "funsd (class in doctr.datasets)": [[5, "doctr.datasets.FUNSD"]], "ic03 (class in doctr.datasets)": [[5, "doctr.datasets.IC03"]], "ic13 (class in doctr.datasets)": [[5, "doctr.datasets.IC13"]], "iiit5k (class in doctr.datasets)": [[5, "doctr.datasets.IIIT5K"]], "imgur5k (class in doctr.datasets)": [[5, "doctr.datasets.IMGUR5K"]], "ocrdataset (class in doctr.datasets)": [[5, "doctr.datasets.OCRDataset"]], "recognitiondataset (class in doctr.datasets)": [[5, "doctr.datasets.RecognitionDataset"]], "sroie (class in doctr.datasets)": [[5, "doctr.datasets.SROIE"]], "svhn (class in doctr.datasets)": [[5, "doctr.datasets.SVHN"]], "svt (class in doctr.datasets)": [[5, "doctr.datasets.SVT"]], "synthtext (class in doctr.datasets)": [[5, "doctr.datasets.SynthText"]], "wordgenerator (class in doctr.datasets)": [[5, "doctr.datasets.WordGenerator"]], "encode_sequences() (in module doctr.datasets)": [[5, "doctr.datasets.encode_sequences"]], "artefact (class in doctr.io)": [[6, "doctr.io.Artefact"]], "block (class in doctr.io)": [[6, "doctr.io.Block"]], "document (class in doctr.io)": [[6, "doctr.io.Document"]], "documentfile (class in doctr.io)": [[6, "doctr.io.DocumentFile"]], "line (class in doctr.io)": [[6, "doctr.io.Line"]], "page (class in doctr.io)": [[6, "doctr.io.Page"]], "word (class in doctr.io)": [[6, "doctr.io.Word"]], "decode_img_as_tensor() (in module doctr.io)": [[6, "doctr.io.decode_img_as_tensor"]], "from_images() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_images"]], "from_pdf() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_pdf"]], "from_url() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_url"]], "read_html() (in module doctr.io)": [[6, "doctr.io.read_html"]], "read_img_as_numpy() (in module doctr.io)": [[6, "doctr.io.read_img_as_numpy"]], "read_img_as_tensor() (in module doctr.io)": [[6, "doctr.io.read_img_as_tensor"]], "read_pdf() (in module doctr.io)": [[6, "doctr.io.read_pdf"]], "show() (doctr.io.document method)": [[6, "doctr.io.Document.show"]], "show() (doctr.io.page method)": [[6, "doctr.io.Page.show"]], "crnn_mobilenet_v3_large() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_mobilenet_v3_large"]], "crnn_mobilenet_v3_small() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_mobilenet_v3_small"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_vgg16_bn"]], "crop_orientation_predictor() (in module doctr.models.classification)": [[7, "doctr.models.classification.crop_orientation_predictor"]], "db_mobilenet_v3_large() (in module doctr.models.detection)": [[7, "doctr.models.detection.db_mobilenet_v3_large"]], "db_resnet50() (in module doctr.models.detection)": [[7, "doctr.models.detection.db_resnet50"]], "detection_predictor() (in module doctr.models.detection)": [[7, "doctr.models.detection.detection_predictor"]], "linknet_resnet18() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet18"]], "linknet_resnet34() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet34"]], "linknet_resnet50() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet50"]], "magc_resnet31() (in module doctr.models.classification)": [[7, "doctr.models.classification.magc_resnet31"]], "master() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.master"]], "mobilenet_v3_large() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_large"]], "mobilenet_v3_large_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_large_r"]], "mobilenet_v3_small() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small"]], "mobilenet_v3_small_orientation() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small_orientation"]], "mobilenet_v3_small_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small_r"]], "ocr_predictor() (in module doctr.models)": [[7, "doctr.models.ocr_predictor"]], "recognition_predictor() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.recognition_predictor"]], "resnet18() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet18"]], "resnet31() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet31"]], "resnet34() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet34"]], "resnet50() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet50"]], "sar_resnet31() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.sar_resnet31"]], "vgg16_bn_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.vgg16_bn_r"]], "channelshuffle (class in doctr.transforms)": [[8, "doctr.transforms.ChannelShuffle"]], "colorinversion (class in doctr.transforms)": [[8, "doctr.transforms.ColorInversion"]], "compose (class in doctr.transforms)": [[8, "doctr.transforms.Compose"]], "gaussianblur (class in doctr.transforms)": [[8, "doctr.transforms.GaussianBlur"]], "gaussiannoise (class in doctr.transforms)": [[8, "doctr.transforms.GaussianNoise"]], "lambdatransformation (class in doctr.transforms)": [[8, "doctr.transforms.LambdaTransformation"]], "normalize (class in doctr.transforms)": [[8, "doctr.transforms.Normalize"]], "oneof (class in doctr.transforms)": [[8, "doctr.transforms.OneOf"]], "randomapply (class in doctr.transforms)": [[8, "doctr.transforms.RandomApply"]], "randombrightness (class in doctr.transforms)": [[8, "doctr.transforms.RandomBrightness"]], "randomcontrast (class in doctr.transforms)": [[8, "doctr.transforms.RandomContrast"]], "randomcrop (class in doctr.transforms)": [[8, "doctr.transforms.RandomCrop"]], "randomgamma (class in doctr.transforms)": [[8, "doctr.transforms.RandomGamma"]], "randomhorizontalflip (class in doctr.transforms)": [[8, "doctr.transforms.RandomHorizontalFlip"]], "randomhue (class in doctr.transforms)": [[8, "doctr.transforms.RandomHue"]], "randomjpegquality (class in doctr.transforms)": [[8, "doctr.transforms.RandomJpegQuality"]], "randomrotate (class in doctr.transforms)": [[8, "doctr.transforms.RandomRotate"]], "randomsaturation (class in doctr.transforms)": [[8, "doctr.transforms.RandomSaturation"]], "randomshadow (class in doctr.transforms)": [[8, "doctr.transforms.RandomShadow"]], "resize (class in doctr.transforms)": [[8, "doctr.transforms.Resize"]], "togray (class in doctr.transforms)": [[8, "doctr.transforms.ToGray"]], "detectionmetric (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.DetectionMetric"]], "localizationconfusion (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.OCRMetric"]], "textmatch (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.TextMatch"]], "summary() (doctr.utils.metrics.detectionmetric method)": [[9, "doctr.utils.metrics.DetectionMetric.summary"]], "summary() (doctr.utils.metrics.localizationconfusion method)": [[9, "doctr.utils.metrics.LocalizationConfusion.summary"]], "summary() (doctr.utils.metrics.ocrmetric method)": [[9, "doctr.utils.metrics.OCRMetric.summary"]], "summary() (doctr.utils.metrics.textmatch method)": [[9, "doctr.utils.metrics.TextMatch.summary"]], "synthesize_page() (in module doctr.utils.visualization)": [[9, "doctr.utils.visualization.synthesize_page"]], "update() (doctr.utils.metrics.detectionmetric method)": [[9, "doctr.utils.metrics.DetectionMetric.update"]], "update() (doctr.utils.metrics.localizationconfusion method)": [[9, "doctr.utils.metrics.LocalizationConfusion.update"]], "update() (doctr.utils.metrics.ocrmetric method)": [[9, "doctr.utils.metrics.OCRMetric.update"]], "update() (doctr.utils.metrics.textmatch method)": [[9, "doctr.utils.metrics.TextMatch.update"]], "visualize_page() (in module doctr.utils.visualization)": [[9, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["changelog", "contributing/code_of_conduct", "contributing/contributing", "getting_started/installing", "index", "modules/datasets", "modules/io", "modules/models", "modules/transforms", "modules/utils", "notebooks", "using_doctr/using_model_export", "using_doctr/using_models"], "filenames": ["changelog.rst", "contributing/code_of_conduct.md", "contributing/contributing.md", "getting_started/installing.rst", "index.rst", "modules/datasets.rst", "modules/io.rst", "modules/models.rst", "modules/transforms.rst", "modules/utils.rst", "notebooks.rst", "using_doctr/using_model_export.rst", "using_doctr/using_models.rst"], "titles": ["Changelog", "Contributor Covenant Code of Conduct", "Contributing to docTR", "Installation", "docTR: Document Text Recognition", "doctr.datasets", "doctr.io", "doctr.models", "doctr.transforms", "doctr.utils", "docTR Notebooks", "Preparing your model for inference", "Choosing the right model"], "terms": {"releas": [0, 3], "note": [0, 5], "we": [1, 2, 3, 4, 5, 6, 8, 12], "member": 1, "leader": 1, "make": [1, 2, 9, 11, 12], "particip": 1, "commun": 1, "harass": 1, "free": [1, 2], "experi": 1, "everyon": 1, "regardless": 1, "ag": 1, "bodi": [1, 12], "size": [1, 5, 6, 8, 9, 12], "visibl": 1, "invis": 1, "disabl": 1, "ethnic": 1, "sex": 1, "characterist": 1, "gender": 1, "ident": 1, "express": [1, 8], "level": [1, 5, 9, 12], "educ": 1, "socio": 1, "econom": 1, "statu": 1, "nation": 1, "person": [1, 5], "appear": 1, "race": 1, "religion": 1, "sexual": 1, "orient": [1, 6, 7, 12], "act": 1, "interact": [1, 6, 9], "wai": [1, 4, 5], "contribut": 1, "an": [1, 2, 4, 5, 6, 7, 9, 11, 12], "open": [1, 2], "welcom": 1, "divers": 1, "inclus": 1, "healthi": 1, "exampl": [1, 2, 4, 5, 7], "behavior": 1, "posit": [1, 9], "environ": 1, "includ": [1, 3, 5], "demonstr": 1, "empathi": 1, "kind": [1, 12], "toward": [1, 3], "other": [1, 2], "peopl": 1, "Being": 1, "respect": 1, "differ": 1, "opinion": 1, "viewpoint": 1, "give": 1, "gracefulli": 1, "accept": 1, "construct": 1, "feedback": 1, "apolog": 1, "those": [1, 3, 6, 12], "affect": 1, "mistak": 1, "learn": [1, 4, 7, 12], "from": [1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12], "focus": [1, 5], "what": 1, "i": [1, 2, 5, 6, 7, 8, 9, 11], "best": 1, "just": [1, 11], "u": [1, 2, 12], "individu": 1, "overal": 1, "unaccept": 1, "The": [1, 2, 5, 6, 9, 12], "us": [1, 2, 3, 5, 7, 9, 12], "languag": [1, 4, 5, 6, 12], "imageri": 1, "attent": [1, 7], "advanc": 1, "ani": [1, 5, 6, 7, 8, 9, 11, 12], "troll": 1, "insult": 1, "derogatori": 1, "comment": 1, "polit": 1, "attack": 1, "public": [1, 4], "privat": [1, 12], "publish": 1, "inform": [1, 2, 4, 5, 12], "physic": [1, 6], "email": 1, "address": [1, 6], "without": [1, 7], "explicit": 1, "permiss": 1, "which": [1, 12], "could": 1, "reason": 1, "consid": [1, 2, 5, 6, 9, 12], "inappropri": 1, "profession": 1, "set": [1, 2, 5, 7, 9, 12], "ar": [1, 2, 3, 5, 6, 8, 9, 10, 12], "clarifi": 1, "take": [1, 5, 11, 12], "appropri": [1, 2, 12], "fair": 1, "action": 1, "thei": [1, 9, 12], "deem": 1, "threaten": 1, "offens": 1, "harm": 1, "have": [1, 2, 5, 9, 11, 12], "right": [1, 7, 9], "remov": 1, "edit": 1, "reject": 1, "commit": 1, "wiki": 1, "issu": [1, 2], "align": [1, 6], "thi": [1, 2, 3, 5, 9, 11, 12], "moder": 1, "decis": 1, "when": [1, 2, 7], "appli": [1, 5, 8], "within": 1, "all": [1, 2, 5, 6, 8, 9, 12], "space": 1, "also": [1, 12], "offici": 1, "repres": [1, 9, 12], "e": [1, 2, 3, 6, 7], "mail": 1, "post": [1, 12], "via": 1, "social": 1, "media": 1, "account": [1, 11], "appoint": 1, "onlin": 1, "offlin": 1, "event": 1, "instanc": [1, 12], "abus": 1, "otherwis": [1, 9], "mai": [1, 12], "report": 1, "contact": 1, "minde": [1, 3, 4], "com": [1, 3, 6], "complaint": 1, "review": 1, "investig": 1, "promptli": 1, "fairli": 1, "oblig": 1, "privaci": 1, "secur": 1, "incid": 1, "follow": [1, 2, 3, 5, 8, 9, 11, 12], "impact": 1, "determin": 1, "consequ": 1, "violat": 1, "unprofession": 1, "unwelcom": 1, "A": [1, 2, 4, 5, 6, 7, 10, 11], "written": [1, 6], "provid": [1, 2, 4, 5, 11, 12], "clariti": 1, "around": 1, "natur": [1, 4, 5], "explan": [1, 12], "why": 1, "wa": 1, "apologi": 1, "request": 1, "through": [1, 5, 8], "singl": [1, 2, 4, 5], "seri": 1, "continu": 1, "No": [1, 12], "involv": [1, 12], "unsolicit": 1, "specifi": [1, 5, 6], "period": 1, "time": [1, 4, 5, 7, 9], "avoid": [1, 3], "well": [1, 11], "extern": 1, "channel": [1, 2, 6, 8], "like": 1, "term": 1, "lead": 1, "seriou": 1, "sustain": 1, "sort": 1, "allow": 1, "dure": 1, "pattern": 1, "aggress": 1, "disparag": 1, "class": [1, 5, 6, 8, 9, 12], "adapt": 1, "version": [1, 2, 11, 12], "0": [1, 5, 8, 9, 12], "avail": [1, 4, 8], "http": [1, 3, 6, 7, 12], "www": [1, 6, 12], "org": [1, 7, 12], "_": [1, 5, 7, 11], "html": [1, 2, 12], "were": [1, 6, 12], "inspir": [1, 8], "mozilla": 1, "": [1, 6, 9], "ladder": 1, "For": [1, 2, 3, 12], "answer": 1, "common": [1, 2, 8, 9], "question": 1, "about": [1, 12], "see": [1, 2], "faq": 1, "translat": 1, "everyth": [2, 12], "you": [2, 3, 5, 6, 7, 11, 12], "need": [2, 3, 5, 9], "know": 2, "effici": [2, 4, 5, 7], "project": [2, 5], "packag": [2, 4, 9, 11], "python": 2, "doc": [2, 6, 12], "librari": [2, 3, 10], "build": [2, 3], "script": 2, "refer": [2, 3, 12], "train": [2, 5, 7, 8, 12], "demo": [2, 4], "small": [2, 7], "app": 2, "showcas": 2, "capabl": [2, 10, 12], "api": [2, 4], "minim": [2, 4], "templat": [2, 4], "deploi": 2, "rest": [2, 8, 9], "ensur": 2, "proper": 2, "mainten": 2, "github": [2, 3], "worklow": 2, "run": [2, 3, 7], "job": 2, "coverag": 2, "codecov": 2, "back": 2, "result": [2, 5, 6, 10, 12], "As": 2, "contributor": 2, "onli": [2, 7, 8, 9, 12], "your": [2, 4, 5, 6, 9, 12], "ad": [2, 7, 8], "whether": [2, 5, 6, 8, 9], "encount": 2, "problem": 2, "suggest": 2, "input": [2, 6, 7, 8, 12], "ha": [2, 5, 9], "valu": [2, 6, 8, 12], "can": [2, 3, 5, 11, 12], "purpos": 2, "advis": 2, "first": 2, "check": [2, 12], "topic": 2, "wasn": 2, "t": [2, 5, 12], "alreadi": 2, "cover": 2, "close": 2, "If": [2, 3, 6, 7, 11, 12], "feel": 2, "new": [2, 9], "one": [2, 5, 7, 8, 12], "do": [2, 3, 11], "so": [2, 3, 5], "whenev": 2, "possibl": [2, 9], "enough": [2, 12], "jump": 2, "wonder": 2, "how": [2, 5], "someth": 2, "more": [2, 9, 12], "gener": [2, 5], "should": [2, 5, 6, 8, 9], "out": [2, 7, 8, 9, 12], "discuss": 2, "q": 2, "forum": 2, "specif": [2, 3, 5, 9, 12], "stackoverflow": 2, "addit": [2, 6], "depend": [2, 3, 4], "command": 2, "m": [9, 12], "pip": [2, 3], "upgrad": [], "dev": 2, "pre": 7, "docstr": [], "In": [2, 5], "pleas": 2, "googl": 2, "eas": 2, "process": [2, 4, 6, 12], "later": 2, "messag": 2, "udac": 2, "guid": 2, "order": [5, 6, 8], "same": [2, 5, 6, 9, 12], "ci": 2, "workflow": 2, "unittest": 2, "local": [2, 4, 5, 7, 9, 12], "To": [2, 3, 12], "togeth": [2, 6], "current": 12, "built": [], "sphinx": [], "thank": [], "our": [7, 12], "file": [2, 5], "been": [5, 9, 12], "rebuilt": [], "want": [11, 12], "forc": [], "complet": [], "rebuild": [], "delet": [], "_build": 2, "directori": [], "addition": [2, 11, 12], "clear": [], "web": 6, "browser": [2, 4], "cach": 5, "modif": 2, "now": 2, "locat": [2, 6], "index": [2, 6], "wish": [2, 11], "somewher": 2, "els": 2, "than": [2, 3, 9], "join": 2, "slack": 2, "where": [2, 6, 8, 9, 12], "find": [2, 3], "requir": [3, 8], "3": [3, 4, 6, 7, 8, 9, 11, 12], "8": [7, 8, 12], "higher": [3, 5], "whichev": 3, "o": 3, "least": 3, "tensorflow": [3, 4, 6, 7, 8, 12], "pytorch": [3, 4, 8, 12], "correspond": [3, 6, 12], "page": [3, 5, 7, 9, 12], "2": [3, 4, 6, 8, 11, 12], "macbook": [], "m1": [], "chip": [], "some": [2, 3, 5, 10], "metal": [], "plugin": [], "1": [5, 6, 7, 8, 9, 11, 12], "12": 12, "anoth": [3, 5, 7], "linux": 3, "few": [3, 11], "extra": 3, "maco": 3, "user": [3, 4, 6, 10], "them": [3, 5, 12], "homebrew": 3, "brew": 3, "cairo": 3, "pango": 3, "gdk": 3, "pixbuf": 3, "libffi": 3, "window": [3, 7, 9], "gtk": 3, "latest": [3, 12], "over": [3, 5, 9, 12], "here": [3, 5, 8, 10, 12], "last": [3, 5], "stabl": 3, "doctr": [3, 11, 12], "strive": 3, "reduc": [3, 8], "framework": [3, 5, 12], "minimum": [3, 5, 8, 9], "necessari": 3, "featur": [3, 7, 9, 10], "develop": 3, "third": 3, "parti": 3, "miss": 3, "tf": [3, 6, 7, 8, 11], "torch": [3, 8], "mode": 3, "clone": 3, "state": [4, 9], "art": 4, "optic": [4, 12], "charact": [4, 5, 6, 9, 12], "made": 4, "seamless": 4, "access": [4, 5, 6, 12], "anyon": 4, "power": 4, "easi": [4, 9], "extract": [4, 5], "valuabl": 4, "autom": 4, "seamlessli": 12, "understand": [4, 5, 12], "task": [4, 5, 12], "ocr": [4, 5, 7, 9], "predictor": [4, 6, 7], "pars": [4, 5], "textual": [4, 5, 6, 7, 12], "identifi": 4, "each": [4, 5, 6, 8, 9, 12], "word": [4, 5, 9, 12], "research": 4, "quickli": 4, "compar": 4, "own": [4, 5], "architectur": [4, 7], "speed": [4, 7], "perform": [4, 6, 8, 9, 11, 12], "robust": [4, 5], "stage": 4, "pretrain": [4, 7, 9, 11, 12], "paramet": [4, 5, 6, 7, 8, 9], "friendli": 4, "line": [4, 9, 12], "code": [4, 6], "load": [4, 11], "googlevis": 4, "aw": [4, 12], "textract": [4, 12], "optim": [4, 11], "infer": [4, 7, 8], "both": [4, 5, 8, 12], "cpu": [4, 12], "gpu": 4, "light": 4, "activ": 4, "maintain": 4, "integr": 4, "deploy": 4, "dbnet": [4, 7], "real": [4, 7, 8], "scene": [4, 5, 7], "differenti": [4, 7], "binar": [4, 7], "linknet": [4, 7], "exploit": [4, 7], "encod": [4, 5, 6, 7, 12], "represent": [4, 7], "semant": [4, 7], "segment": [4, 7, 12], "sar": [4, 7], "show": [4, 6, 7, 9], "attend": [4, 7], "read": [2, 4, 5, 7], "simpl": [4, 7], "strong": [4, 7], "baselin": [4, 7, 12], "irregular": [4, 7], "crnn": [4, 7], "end": [4, 5, 7, 9], "trainabl": [4, 7], "neural": [4, 7], "network": [4, 7], "imag": [4, 5, 6, 7, 8, 9, 12], "base": [4, 7], "sequenc": [4, 5, 6, 7, 9, 12], "Its": [4, 7], "applic": [4, 7], "master": [4, 7, 12], "multi": [4, 7], "aspect": [4, 7, 8, 12], "non": [4, 5, 6, 7, 8, 9], "vitstr": [], "vision": 5, "transform": 5, "fast": 5, "parseq": [], "permut": [], "autoregress": [], "funsd": [4, 5, 12], "form": [4, 5, 12], "noisi": [4, 5], "scan": [4, 5], "cord": [4, 5, 12], "consolid": [4, 5], "receipt": [4, 5, 12], "forpost": [4, 5], "sroie": [4, 5], "icdar": [4, 5], "2019": 4, "iiit": [4, 5], "5k": [4, 5], "cvit": 4, "street": [4, 5], "view": [4, 5], "synthtext": [4, 5], "visual": 4, "geometri": [4, 6, 12], "group": 4, "svhn": [4, 5], "digit": [4, 5], "unsupervis": 4, "ic03": [4, 5], "2003": [4, 5], "ic13": [4, 5], "2013": [4, 5], "imgur5k": [4, 5], "textstylebrush": [4, 5], "transfer": [4, 5], "aesthet": [4, 5], "mjsynth": [], "synthet": [], "data": [6, 8, 9], "artifici": [], "iiithw": [], "wildreceipt": [], "spatial": [6, 9], "dual": [], "modal": [], "graph": 6, "kei": [], "bool": [5, 6, 7, 8, 9], "true": [5, 6, 7, 8, 9, 11, 12], "use_polygon": [5, 9], "fals": [5, 6, 7, 8, 9, 11, 12], "recognition_task": [], "kwarg": [5, 6, 7, 9], "sourc": [5, 6, 7, 8, 9], "document": [5, 7, 9, 10, 12], "import": [5, 6, 7, 8, 9, 11, 12], "train_set": 5, "download": 5, "img": [5, 8], "target": [5, 6, 8, 9], "subset": [5, 12], "polygon": [5, 12], "rotat": [5, 6, 7, 8, 9, 12], "bound": [5, 6, 7, 8, 9, 12], "box": [5, 6, 7, 8, 9, 12], "instead": [5, 6, 7], "straight": [5, 7, 12], "ones": [5, 8, 9], "recognit": [5, 9], "keyword": [5, 7], "argument": [5, 12], "visiondataset": 5, "icdar2019": 5, "competit": 5, "iiit5k": 5, "bmvc": 5, "2012": 5, "text": [5, 6, 7, 9], "prior": 5, "svt": 5, "ucsd": 5, "comput": [5, 9, 12], "hous": 5, "number": [5, 8, 9, 12], "localis": 5, "repositori": [2, 5], "websit": 5, "entri": 5, "futur": 5, "direct": 5, "img_fold": 5, "str": [5, 6, 7, 8, 9], "label_fold": 5, "label": [5, 8, 9], "part": [5, 8, 12], "challeng": 5, "task2": 5, "2015": 5, "path": [5, 6, 11], "challenge2_training_task12_imag": 5, "challenge2_training_task1_gt": 5, "test_set": 5, "challenge2_test_task12_imag": 5, "challenge2_test_task1_gt": 5, "folder": [2, 5, 11], "annot": 5, "abstractdataset": 5, "label_path": 5, "handwrit": 5, "dataset_info": 5, "imgur5k_annot": 5, "json": [5, 12], "pure": [], "mnt": [], "ramdisk": [], "max": [8, 9], "90kdict32px": [], "imlist": [], "txt": [], "hw": [], "images_90k_norm": [], "90k": [], "docartefact": 5, "object": [5, 9, 10, 12], "detect": [5, 9, 10], "element": [5, 6, 7, 9, 12], "varieti": 5, "arxiv": 7, "ab": [], "2103": [], "14470v1": [], "test": [], "charactergener": 5, "implement": [5, 6, 8, 9, 11, 12], "d": 5, "abdef": 5, "num_sampl": 5, "100": [5, 8, 9, 11, 12], "vocabulari": 5, "sampl": [5, 12], "iter": [5, 8], "cache_sampl": 5, "firsthand": 5, "font_famili": [5, 9], "font": [5, 9], "img_transform": 5, "compos": [5, 12], "sample_transform": 5, "wordgener": 5, "min_char": 5, "int": [5, 6, 8, 9], "max_char": 5, "list": [5, 6, 8, 9], "none": [5, 6, 8, 9, 12], "callabl": [5, 8], "tupl": [5, 6, 8, 9], "32": [5, 7, 8, 11, 12], "maximum": [5, 8], "detectiondataset": 5, "recognitiondataset": 5, "labels_path": 5, "contain": [5, 12], "ocrdataset": 5, "label_fil": 5, "jpg": [5, 6], "root": [2, 5], "shuffl": [5, 8], "batch_siz": 5, "drop_last": 5, "num_work": 5, "collate_fn": 5, "wrapper": [5, 8], "train_load": 5, "train_it": 5, "next": 5, "befor": [5, 7, 8, 12], "pass": [5, 6, 7, 12], "batch": [5, 7, 8, 12], "drop": 5, "isn": 5, "full": [5, 9, 12], "worker": 5, "function": [5, 8, 9], "merg": 5, "sinc": [5, 12], "content": [5, 6, 9, 12], "properli": 5, "model": [5, 9], "interpret": [5, 6], "multipl": [5, 6, 8], "name": [5, 7, 12], "10": [5, 9, 12], "0123456789": 5, "hindi_digit": [], "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "ascii_lett": 5, "52": [5, 12], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 5, "punctuat": 5, "currenc": 5, "5": [5, 8, 9, 12], "ancient_greek": [], "48": [7, 12], "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": [], "arabic_lett": [], "37": 12, "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": [], "persian_lett": [], "\u067e\u0686\u06a2\u06a4\u06af": [], "arabic_diacrit": [], "arabic_punctu": [], "latin": 5, "94": [5, 12], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 5, "english": 5, "legacy_french": 5, "123": 5, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": 5, "french": [5, 12], "126": 5, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": 5, "portugues": 5, "131": 5, "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": 5, "spanish": 5, "116": 5, "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": 5, "italian": [], "120": [], "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": [], "german": 5, "108": 5, "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": 5, "arab": [], "101": [], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": [], "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": [], "czech": [], "130": [], "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": [], "polish": [], "118": [], "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": [], "dutch": [], "114": [], "norwegian": [], "106": [], "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": [], "danish": [], "finnish": [], "104": [], "\u00e4\u00f6\u00e4\u00f6": [], "swedish": [], "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": [], "vietnames": [], "234": [], "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": [], "hebrew": [], "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": [], "multilingu": [], "195": [], "encode_sequ": 5, "target_s": 5, "eo": 5, "pad": [5, 7, 8, 12], "dynamic_seq_length": 5, "ndarrai": [5, 6, 8, 9], "given": [5, 6, 8, 9, 12], "map": 5, "n": [5, 9], "length": 5, "Of": 5, "string": [5, 6, 9, 12], "option": 5, "start": 5, "case": [5, 9, 12], "upper": [5, 8], "enabl": [5, 6], "dynam": 5, "tensor": [5, 6, 8, 12], "modul": [6, 8, 9, 12], "easili": [6, 9, 11, 12], "export": [6, 7, 9, 10, 11, 12], "analysi": 6, "format": [5, 6, 9, 11, 12], "organ": 6, "uninterrupt": [6, 12], "confid": [6, 9, 12], "float": [6, 8, 9, 11], "associ": 6, "predict": [6, 7, 9], "xmin": 6, "ymin": 6, "xmax": 6, "ymax": 6, "coordin": [6, 12], "rel": [6, 8, 9], "collect": 6, "meant": [6, 11], "two": 6, "column": 6, "horizont": [6, 8], "resolv": 6, "default": [6, 9, 11], "smallest": 6, "enclos": 6, "g": [6, 7], "qr": 6, "pictur": 6, "chart": 6, "signatur": 6, "logo": 6, "etc": 6, "artefact_typ": 6, "type": [6, 12], "sever": [6, 8, 12], "its": [5, 6, 8, 9, 12], "titl": [6, 12], "underneath": 6, "page_idx": [6, 12], "dimens": [6, 9, 12], "dict": [6, 9, 12], "numpi": [6, 7, 9, 12], "arrai": [6, 8, 9], "uint8": [6, 7, 9, 12], "raw": [6, 9], "pixel": [6, 8, 12], "height": 6, "width": 6, "dictionari": [6, 9], "angl": [6, 8], "degress": 6, "preserve_aspect_ratio": [6, 7, 8, 12], "overlai": 6, "displai": [6, 9], "matplotlib": 9, "pyplot": 9, "method": [8, 12], "high": 6, "convers": 6, "read_pdf": 6, "byte": [6, 12], "scale": [6, 7, 8, 9], "rgb_mode": [], "password": [], "pdf": [6, 7, 10], "convert": [6, 8, 11], "render": 6, "72dpi": 6, "output": [6, 8], "rgb": [6, 8], "bgr": 6, "unlock": [], "encrypt": [], "pypdfium2": 6, "pdfpage": [], "decod": 6, "shape": [6, 7, 8, 9, 11, 12], "h": [6, 7, 8], "x": [6, 8, 9], "w": [6, 7, 8, 9], "c": [6, 9], "read_img_as_numpi": 6, "output_s": [6, 8], "rgb_output": 6, "expect": [2, 6, 8, 9], "read_img_as_tensor": 6, "img_path": 6, "dtype": [6, 7, 8, 9, 11], "float32": [6, 7, 8, 11], "desir": 6, "relat": [2, 6], "divid": 6, "255": [6, 7, 8, 9, 12], "decode_img_as_tensor": 6, "img_cont": 6, "stream": 6, "read_html": 6, "url": 6, "yoursit": 6, "weasyprint": [], "documentfil": 6, "extens": 6, "classmethod": 6, "from_pdf": 6, "binari": [6, 12], "from_url": 6, "from_imag": 6, "page1": 6, "png": 6, "page2": 6, "vgg16_bn_r": 7, "vgg": 7, "16": 7, "describ": [7, 9], "veri": 7, "deep": [7, 12], "convolut": 7, "larg": 7, "modifi": 7, "normal": [7, 8], "rectangular": 7, "pool": 7, "simpler": 7, "head": [7, 12], "input_tensor": 7, "random": [7, 8, 9, 11, 12], "uniform": [7, 8, 11], "512": 7, "maxval": [7, 8, 11], "imagenet": 7, "extractor": 7, "resnet18": 7, "resnet": 7, "18": [7, 12], "residu": 7, "boolean": [7, 12], "resnet34": 7, "34": [7, 12], "resnet50": 7, "50": [7, 12], "resnet31": 7, "downsiz": 7, "4": [7, 8, 9, 12], "mobilenet_v3_smal": 7, "mobilenetv3": 7, "search": 7, "kera": [7, 11], "mobilenet_v3_larg": 7, "mobilenet_v3_small_r": 7, "mobilenet_v3_large_r": 7, "mobilenet_v3_small_orient": 7, "magc_resnet31": 7, "global": 7, "context": 7, "224": [7, 8, 11], "vit_": [], "visiontransform": [], "worth": [], "16x16": [], "patch": [], "unoffici": [], "config": 2, "vit_b": [], "b": 9, "textnet_tini": [], "textnet": [], "faster": [], "arbitrarili": [], "detector": [], "minimalist": [], "kernel": 8, "czczup": [], "tini": [], "textnet_smal": [], "textnet_bas": [], "crop_orientation_predictor": 7, "arch": 7, "croporientationpredictor": 7, "np": [7, 8, 9, 11, 12], "classif_mobilenet_v3_smal": 7, "input_crop": 7, "rand": [7, 8, 9, 11, 12], "600": [7, 9, 12], "800": [7, 9, 12], "astyp": [7, 9, 11, 12], "crop": [7, 8, 12], "dataset": [7, 12], "linknet_resnet18": [7, 12], "1024": [7, 9, 11, 12], "linknet_resnet34": 7, "linknet_resnet50": 7, "db_resnet50": [7, 11, 12], "backbon": 7, "db_mobilenet_v3_larg": [7, 12], "mobilenet": 7, "v3": 7, "detection_predictor": [7, 12], "assume_straight_pag": [7, 12], "detectionpredictor": 7, "input_pag": [7, 9, 12], "itself": [], "fit": [7, 12], "crnn_vgg16_bn": [7, 12], "128": [7, 12], "crnn_mobilenet_v3_smal": [7, 12], "crnn_mobilenet_v3_larg": [7, 12], "sar_resnet31": [7, 12], "31": [7, 12], "64": [7, 8, 12], "256": 7, "paper": 7, "1910": 7, "02562": 7, "keywoard": [], "vitstr_smal": [], "vitstr_bas": [], "recognition_predictor": [7, 12], "recognitionpredictor": 7, "ocr_predictor": [7, 12], "det_arch": 7, "reco_arch": 7, "pretrained_backbon": [], "symmetric_pad": [7, 8, 12], "export_as_straight_box": [7, 12], "detect_orient": [], "straighten_pag": [], "detect_languag": [], "ocrpredictor": 7, "up": [7, 12], "assum": 7, "preserv": [7, 8, 12], "ratio": [7, 8, 12], "symmetr": [7, 8, 12], "bottom": [7, 12], "final": [7, 11], "potenti": 7, "estim": [], "slightli": [], "deterior": [], "latenc": [], "median": [], "Then": [], "again": [], "improv": [], "kie_predictor": [], "kiepredictor": [], "kie": [], "login_to_hub": [], "login": [], "huggingfac": [], "hub": [], "from_hub": [], "repo_id": [], "instanti": 12, "hf": [], "fasterrcnn_mobilenet_v3_large_fpn": [], "repo": [], "hf_hub_download": [], "snapshot_download": [], "checkpoint": [], "push_to_hf_hub": [], "model_nam": [], "save": [5, 11], "configur": 2, "my": [], "procedur": 8, "draw": [8, 9], "design": 8, "torchvis": 8, "resiz": [8, 12], "bilinear": 8, "transfo": 8, "minval": 8, "interpol": 8, "zero": [8, 9], "while": [8, 12], "done": 8, "mean": [8, 9], "std": 8, "gaussian": 8, "distribut": 8, "485": 8, "456": 8, "406": 8, "229": 8, "225": 8, "averag": [8, 12], "per": [8, 12], "standard": 8, "deviat": 8, "lambdatransform": 8, "fn": 8, "lambda": 8, "tograi": 8, "num_output_channel": 8, "grayscal": 8, "colorinvers": 8, "min_val": 8, "tranform": 8, "color": [8, 9], "shift": 8, "randomli": 8, "invert": 8, "6": [3, 8, 12], "rang": [8, 11], "randombright": 8, "max_delta": 8, "adjust": [2, 8], "bright": 8, "delta": 8, "offset": 8, "add": [8, 9], "pick": 8, "p": [8, 9, 12], "probabl": 8, "randomcontrast": 8, "contrast": 8, "contrast_factor": 8, "factor": 8, "randomsatur": 8, "satur": 8, "hsv": 8, "increas": 8, "randomhu": 8, "hue": 8, "randomgamma": 8, "min_gamma": 8, "max_gamma": 8, "min_gain": 8, "max_gain": 8, "gamma": 8, "correct": 8, "neg": 8, "lower": [8, 9], "param": [8, 12], "constant": 8, "multipli": 8, "randomjpegqu": 8, "min_qual": 8, "60": 8, "max_qual": 8, "jpeg": 8, "qualiti": 8, "dimension": 8, "between": [8, 9], "randomrot": 8, "max_angl": 8, "expand": 8, "degre": 8, "uniformli": 8, "randomcrop": 8, "08": [8, 12], "75": [8, 12], "33": 8, "min_area": 8, "max_area": 8, "min_ratio": 8, "max_ratio": 8, "gaussianblur": 8, "kernel_shap": 8, "blur": 8, "min": 8, "channelshuffl": 8, "gaussiannois": 8, "nois": 8, "randomhorizontalflip": 8, "flip": 8, "int64": [8, 9], "randomshadow": 8, "opacity_rang": 8, "shade": 8, "opac": 8, "It": 8, "consecut": [8, 12], "sequenti": [8, 11], "oneof": 8, "jpegqual": 8, "randomappli": 8, "regroup": 9, "core": 9, "complementari": 9, "sens": 9, "visualize_pag": 9, "words_onli": 9, "display_artefact": 9, "add_label": 9, "figur": 9, "block": [9, 12], "plt": 9, "ocr_db_crnn": 9, "artefact": [9, 10, 12], "figsiz": 9, "largest": 9, "side": 9, "plot": 9, "static": 9, "top": [9, 12], "synthesize_pag": 9, "draw_proba": 9, "respons": 9, "blank": 9, "blue": 9, "red": 9, "font_siz": 9, "13": [9, 12], "famili": 9, "synthes": 9, "metric": [9, 12], "assess": 9, "textmatch": 9, "match": [9, 12], "accuraci": 9, "aggreg": [5, 9], "foral": 9, "y": 9, "mathcal": 9, "frac": 9, "sum": 9, "limits_": 9, "f_": 9, "y_i": 9, "x_i": 9, "indic": 9, "defin": 9, "f_a": 9, "left": [9, 12], "begin": 9, "ll": 9, "mbox": 9, "strictli": 9, "integ": [9, 11], "updat": 9, "hello": [9, 12], "world": [9, 12], "summari": 9, "gt": 9, "pred": 9, "groung": 9, "truth": 9, "exact": [9, 12], "score": 9, "counterpart": 9, "unidecod": 9, "localizationconfus": 9, "iou_thresh": 9, "mask_shap": 9, "use_broadcast": 9, "confus": 9, "iou": 9, "recal": [9, 12], "g_": 9, "precis": [9, 12], "meaniou": 9, "j": 9, "y_j": 9, "being": [9, 12], "intersect": 9, "union": 9, "g_x": 9, "assign": 9, "_i": 9, "geq": 9, "ground": 9, "asarrai": 9, "70": [9, 12], "110": 9, "95": [9, 12], "200": 9, "150": [9, 12], "pair": 9, "broadcast": 9, "consum": 9, "memori": 9, "either": [9, 12], "ocrmetr": 9, "l": 9, "hat": 9, "h_": 9, "b_j": 9, "l_j": 9, "gt_box": 9, "pred_box": 9, "gt_label": 9, "pred_label": 9, "comparison": [9, 12], "detectionmetr": 9, "c_j": 9, "compil": [10, 12], "better": [10, 12], "leverag": 10, "descript": 10, "colab": 10, "quicktour": 10, "present": 10, "main": 10, "produc": [10, 12], "searchabl": 10, "don": 12, "meet": [], "detail": 12, "link": [], "section": [11, 12], "det_model": [], "load_weight": [], "path_to_checkpoint": [], "weight": [], "reco_model": [], "det_param": [], "path_to_pt": [], "map_loc": [], "load_state_dict": [], "reco_param": [], "vocab": 12, "class_nam": [], "total": [], "date": 12, "preprocessor": 12, "det_predictor": [], "798": [], "785": [], "772": [], "264": [], "2749": [], "287": [], "reco_predictor": [], "694": [], "695": [], "693": [], "299": [], "296": [], "301": [], "polici": [], "restrict": [], "write": [], "outsid": [], "tmp": [], "work": 12, "step": [], "usag": [], "multiprocess": [], "doctr_multiprocessing_dis": [], "variabl": [], "becaus": [], "shm": [], "share": [5, 12], "chang": [], "By": [], "doctr_cache_dir": [], "focu": [], "love": [], "appreci": [], "interfac": [], "io": [], "custom": [], "felix92": [], "db": [], "vgg16": [], "bn": [], "plug": [], "obj_detect": [], "exist": [], "overwritten": [], "prerequisit": [], "creat": [], "co": [], "instal": [], "git": [], "lf": [], "my_awesome_model": [], "v1": [], "directli": 12, "after": [2, 12], "python3": [], "train_tensorflow": [], "py": 2, "train_pytorch": [], "tabl": [], "pull": [], "dummi": [], "tilman": [], "rassi": [], "fascan": [], "evalu": [5, 12], "predefin": 5, "prefer": 5, "signific": 5, "valid": [], "149": [], "626": [], "360": [], "2000": [], "3000": [], "249": [], "33402": [], "13068": [], "772875": [], "85875": [], "246": [], "233": [], "resourc": 11, "7149": [], "796": [], "handwritten": [], "1268": [], "472": [], "21888": [], "8707": [], "33608": [], "19342": [], "uppercas": [], "19370": [], "2186": [], "257": [], "647": [], "73257": [], "26032": [], "7100000": [], "707470": [], "1156": [], "1107": [], "849": [], "1095": [], "207901": [], "22672": [], "7581382": [], "1337891": [], "7141797": [], "793533": [], "49377": [], "19598": [], "alwai": [], "regular": [], "2700": [], "300": [], "background": [], "qr_code": [], "bar_cod": [], "photo": [], "classif": [], "mani": [5, 12], "sensit": [5, 12], "abl": [5, 12], "howev": 5, "guidanc": 5, "tool": 5, "further": [], "anot": [], "handl": 5, "underli": 5, "defer": 5, "dataload": 5, "good": 11, "achiev": 11, "might": [11, 12], "tune": 11, "thing": [11, 12], "product": 11, "readi": 11, "help": 11, "support": 12, "devic": [], "fp16": [], "point": [], "occupi": [], "bit": [], "advantag": [], "less": [], "mixed_precis": [], "set_global_polici": [], "mixed_float16": [], "cuda": [], "re": [], "exchang": [], "interoper": [], "machin": [], "structur": 12, "layer": [], "metadata": [], "util": 11, "export_model_to_onnx": [], "input_shap": 11, "dummy_input": [], "tensorspec": [], "model_path": [], "come": [], "soon": [], "seen": 12, "onc": 12, "separ": 12, "compon": 12, "charg": 12, "usabl": 12, "backend": 12, "along": 12, "processor": 12, "reusabl": 12, "consist": 12, "delimit": 12, "2d": 12, "corner": 12, "flag": 12, "belong": 12, "skew": 12, "comprehens": 12, "benchmark": 12, "publicli": 12, "sec": [], "25": 12, "84": 12, "39": 12, "85": 12, "86": 12, "93": 12, "83": 12, "24": [], "80": 12, "29": 12, "90": 12, "67": 12, "76": 12, "11": 12, "81": 12, "71": 12, "7": 12, "21": 12, "82": 12, "20": 12, "49": 12, "87": 12, "63": 12, "17": [], "28": [], "51": 12, "46": 12, "db_resnet34": [], "22": [], "89": 12, "74": 12, "56": 12, "68": 12, "92": 12, "61": 12, "41": 12, "00": 12, "79": 12, "38": 12, "88": [], "62": 12, "26": [], "06": 12, "78": 12, "47": 12, "54": [], "abov": 12, "cf": 12, "disclaim": 12, "combin": 12, "199": 12, "second": 12, "warmup": 12, "phase": 12, "measur": 12, "1000": 12, "obtain": 12, "11th": [], "gen": [], "intel": [], "r": [], "tm": [], "i7": [], "11800h": [], "30ghz": [], "wrap": 12, "useabl": 12, "favorit": 12, "dummy_img": 12, "area": 12, "send": 12, "snippet": 12, "transcrib": 12, "partial": [], "15": 12, "9": [], "73": 12, "44": [], "14": 12, "55": [], "58": [], "57": 12, "66": 12, "01": 12, "98": 12, "23": [], "69": 12, "99": 12, "91": 12, "05": [], "09": [], "96": 12, "40": [], "53": 12, "most": 12, "print": 12, "cfg": 12, "30595": 12, "45": 12, "72": 12, "43": 12, "65": 12, "77": 12, "30": 12, "07": [], "27": 12, "gvision": 12, "59": 12, "03": 12, "azur": [], "recogn": [], "42": 12, "go": 12, "mention": 12, "still": 12, "return": [5, 6, 7, 9, 12], "documentbuild": [], "resolve_lin": [], "automat": [], "resolve_block": [], "paragraph_break": [], "paragraph": [], "035": [], "nest": 12, "get": 12, "typic": 12, "layout": 12, "340": 12, "text_output": [], "json_output": 12, "1357421875": 12, "0361328125": 12, "8564453125": 12, "8603515625": 12, "914085328578949": 12, "5478515625": 12, "06640625": 12, "5810546875": 12, "0966796875": 12, "9949972033500671": 12, "51171875": 12, "1630859375": 12, "9578408598899841": 12, "1396484375": 12, "3232421875": 12, "185546875": 12, "3515625": 12, "outpout": 12, "xml": 12, "hocr": 12, "export_as_xml": 12, "xml_output": 12, "xml_bytes_str": 12, "xml_element": 12, "utf": 12, "xmln": 12, "w3": 12, "1999": 12, "xhtml": 12, "lang": 12, "en": 12, "meta": 12, "equiv": 12, "charset": 12, "system": 12, "ocr_pag": 12, "ocr_carea": 12, "ocr_par": 12, "ocr_lin": 12, "ocrx_word": 12, "div": 12, "id": 12, "page_1": 12, "bbox": 12, "3456": 12, "ppageno": 12, "block_1_1": 12, "857": 12, "529": 12, "2504": 12, "2710": 12, "par_1_1": 12, "span": 12, "line_1_1": 12, "x_size": 12, "x_descend": 12, "x_ascend": 12, "word_1_1": 12, "1552": 12, "540": 12, "1778": 12, "580": 12, "x_wconf": 12, "word_1_2": 12, "1782": 12, "1900": 12, "583": 12, "word_1_3": 12, "1420": 12, "597": 12, "1684": 12, "641": 12, "threshold": [], "region": [], "accur": [], "postprocessor": [], "bin_thresh": [], "box_thresh": [], "hook": [], "manipul": [], "customhook": [], "def": 11, "__call__": [], "self": [], "loc_pr": [], "Be": [], "awar": [], "my_hook": [], "middl": [], "pipelin": [], "add_hook": [], "execut": [], "file_path": [], "read_img": 6, "seemlessli": 4, "conda": [], "newer": [], "developp": 3, "fp": 12, "scheme": [], "deform": [], "statist": [], "turn": [], "easier": [], "let": [], "db_resnet50_predictor": [], "sar_vgg16_bn": [], "rnn": [], "enhanc": [], "symbol": [], "crnn_vgg16_bn_predictor": [], "sar_vgg16_bn_predictor": [], "16bn": [], "convert_to_tflit": [], "tf_model": 11, "tflite": 11, "conv_sequ": 11, "relu": 11, "kernel_s": 11, "serialized_model": 11, "convert_to_fp16": [], "half": [], "serial": 11, "quantize_model": [], "quantiz": [], "exclud": [], "inherit": 11, "abstract": [], "verifi": 2, "file_nam": [], "file_hash": [], "extract_arch": [], "overwrit": [], "sha256": [], "archiv": [], "disk": [], "775": [], "856": [], "860": [], "862": [], "863": [], "sar_resnet31_predictor": [], "ocr_db_crnn_vgg": [], "652": [], "721": [], "ocr_db_sar_vgg": [], "653": [], "ocr_db_sar_resnet": [], "665": [], "735": [], "595": [], "625": [], "781": [], "830": [], "exactmatch": [], "ignore_cas": [], "ignore_acc": [], "ignor": [], "letter": [], "accent": [], "error": [], "max_dist": [], "levenshtein": [], "distanc": [], "autoclass": [], "loader": 5, "154": [], "as_imag": [], "convert_page_to_numpi": [], "get_word": [], "fitz": [], "gettextword": [], "get_artefact": [], "entir": [], "fulli": [], "daili": [], "mix": [], "fine": 12, "scratch": [], "special": [], "recurr": [], "733": [], "817": [], "745": [], "875": [], "frame": 12, "feed": [], "warm": [], "c5": 12, "x12larg": 12, "xeon": 12, "platinum": 12, "8275l": 12, "913": [], "917": [], "921": [], "crnn_resnet31": [], "629": [], "701": [], "664": [], "780": [], "630": [], "702": [], "666": [], "783": [], "640": [], "713": [], "672": [], "789": [], "na": [], "753": [], "700": [], "533": [], "689": [], "611": [], "660": [], "db_sar_vgg": [], "db_sar_resnet": [], "db_crnn_vgg": [], "db_crnn_resnet": [], "properti": 11, "input_t": 11, "saved_model": 11, "And": 11, "nestedobject": [], "changelog": [], "v0": [], "2021": [], "8m": 12, "02": 12, "5m": 12, "1m": 12, "19": [], "invoic": 12, "flexibl": [], "rotated_bbox": [], "beta": [], "linknet16": [], "160": 7, "arg": [5, 7], "bash": [], "tax": 12, "35": 12, "vgg16_bn": [], "mobilenetv3_larg": [], "mobilenetv3_smal": [], "constraint": 11, "tfliteconvert": 11, "from_keras_model": 11, "target_spec": 11, "supported_typ": 11, "float16": 11, "fallback": 11, "oper": [2, 11], "representative_dataset": 11, "yield": 11, "supported_op": 11, "opsset": 11, "tflite_builtins_int8": 11, "inference_input_typ": 11, "int8": 11, "inference_output_typ": 11, "2m": 12, "7m": 12, "look": 12, "variou": 12, "below": 12, "unfortun": 12, "moment": 12, "04": 12, "36": 12, "97": 12, "resum": 12, "road": 12, "get_text_word": [], "get_lin": [], "style": 2, "incom": 2, "pr": 2, "compli": 2, "flake8": 2, "convent": 2, "isort": 2, "reorder": 2, "catch": 2, "cleaner": 2, "mypi": 2, "ini": 2, "keep": 2, "sane": 2, "pydocstyl": 2, "_helper": 6, "pdf_render": 6, "render_pdf_topil": 6, "linknet_resnet18_rot": 12, "db_resnet50_rot": 12, "nb": 12}, "objects": {"doctr.datasets": [[5, 0, 1, "", "CORD"], [5, 0, 1, "", "CharacterGenerator"], [5, 0, 1, "", "DetectionDataset"], [5, 0, 1, "", "DocArtefacts"], [5, 0, 1, "", "FUNSD"], [5, 0, 1, "", "IC03"], [5, 0, 1, "", "IC13"], [5, 0, 1, "", "IIIT5K"], [5, 0, 1, "", "IMGUR5K"], [5, 0, 1, "", "OCRDataset"], [5, 0, 1, "", "RecognitionDataset"], [5, 0, 1, "", "SROIE"], [5, 0, 1, "", "SVHN"], [5, 0, 1, "", "SVT"], [5, 0, 1, "", "SynthText"], [5, 0, 1, "", "WordGenerator"], [5, 1, 1, "", "encode_sequences"]], "doctr.datasets.loader": [[5, 0, 1, "", "DataLoader"]], "doctr.io": [[6, 0, 1, "", "Artefact"], [6, 0, 1, "", "Block"], [6, 0, 1, "", "Document"], [6, 0, 1, "", "DocumentFile"], [6, 0, 1, "", "Line"], [6, 0, 1, "", "Page"], [6, 0, 1, "", "Word"], [6, 1, 1, "", "decode_img_as_tensor"], [6, 1, 1, "", "read_html"], [6, 1, 1, "", "read_img_as_numpy"], [6, 1, 1, "", "read_img_as_tensor"], [6, 1, 1, "", "read_pdf"]], "doctr.io.Document": [[6, 2, 1, "", "show"]], "doctr.io.DocumentFile": [[6, 2, 1, "", "from_images"], [6, 2, 1, "", "from_pdf"], [6, 2, 1, "", "from_url"]], "doctr.io.Page": [[6, 2, 1, "", "show"]], "doctr.models.classification": [[7, 1, 1, "", "crop_orientation_predictor"], [7, 1, 1, "", "magc_resnet31"], [7, 1, 1, "", "mobilenet_v3_large"], [7, 1, 1, "", "mobilenet_v3_large_r"], [7, 1, 1, "", "mobilenet_v3_small"], [7, 1, 1, "", "mobilenet_v3_small_orientation"], [7, 1, 1, "", "mobilenet_v3_small_r"], [7, 1, 1, "", "resnet18"], [7, 1, 1, "", "resnet31"], [7, 1, 1, "", "resnet34"], [7, 1, 1, "", "resnet50"], [7, 1, 1, "", "vgg16_bn_r"]], "doctr.models.detection": [[7, 1, 1, "", "db_mobilenet_v3_large"], [7, 1, 1, "", "db_resnet50"], [7, 1, 1, "", "detection_predictor"], [7, 1, 1, "", "linknet_resnet18"], [7, 1, 1, "", "linknet_resnet34"], [7, 1, 1, "", "linknet_resnet50"]], "doctr.models": [[7, 1, 1, "", "ocr_predictor"]], "doctr.models.recognition": [[7, 1, 1, "", "crnn_mobilenet_v3_large"], [7, 1, 1, "", "crnn_mobilenet_v3_small"], [7, 1, 1, "", "crnn_vgg16_bn"], [7, 1, 1, "", "master"], [7, 1, 1, "", "recognition_predictor"], [7, 1, 1, "", "sar_resnet31"]], "doctr.transforms": [[8, 0, 1, "", "ChannelShuffle"], [8, 0, 1, "", "ColorInversion"], [8, 0, 1, "", "Compose"], [8, 0, 1, "", "GaussianBlur"], [8, 0, 1, "", "GaussianNoise"], [8, 0, 1, "", "LambdaTransformation"], [8, 0, 1, "", "Normalize"], [8, 0, 1, "", "OneOf"], [8, 0, 1, "", "RandomApply"], [8, 0, 1, "", "RandomBrightness"], [8, 0, 1, "", "RandomContrast"], [8, 0, 1, "", "RandomCrop"], [8, 0, 1, "", "RandomGamma"], [8, 0, 1, "", "RandomHorizontalFlip"], [8, 0, 1, "", "RandomHue"], [8, 0, 1, "", "RandomJpegQuality"], [8, 0, 1, "", "RandomRotate"], [8, 0, 1, "", "RandomSaturation"], [8, 0, 1, "", "RandomShadow"], [8, 0, 1, "", "Resize"], [8, 0, 1, "", "ToGray"]], "doctr.utils.metrics": [[9, 0, 1, "", "DetectionMetric"], [9, 0, 1, "", "LocalizationConfusion"], [9, 0, 1, "", "OCRMetric"], [9, 0, 1, "", "TextMatch"]], "doctr.utils.metrics.DetectionMetric": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.LocalizationConfusion": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.OCRMetric": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.TextMatch": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.visualization": [[9, 1, 1, "", "synthesize_page"], [9, 1, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:class", "1": "py:function", "2": "py:method"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"], "2": ["py", "method", "Python method"]}, "titleterms": {"changelog": 0, "v0": 0, "7": [], "0": 0, "2024": [], "09": [], "6": [], "2022": [], "29": [], "5": 0, "1": [0, 1], "03": 0, "22": 0, "2021": 0, "12": 0, "31": 0, "4": [0, 1], "11": 0, "10": 0, "01": 0, "3": [0, 1], "08": 0, "27": 0, "07": 0, "02": 0, "2": [0, 1], "05": 0, "28": 0, "18": 0, "contributor": 1, "coven": 1, "code": [1, 2], "conduct": 1, "our": 1, "pledg": 1, "standard": 1, "enforc": 1, "respons": 1, "scope": 1, "guidelin": 1, "correct": 1, "warn": 1, "temporari": 1, "ban": 1, "perman": 1, "attribut": 1, "contribut": 2, "doctr": [2, 4, 5, 6, 7, 8, 9, 10], "codebas": 2, "structur": [2, 6], "continu": 2, "integr": 2, "feedback": 2, "featur": [2, 4], "request": 2, "bug": 2, "report": 2, "question": 2, "develop": 2, "mode": 2, "instal": [2, 3], "commit": 2, "unit": 2, "test": 2, "qualiti": 2, "style": [], "verif": 2, "modifi": 2, "document": [2, 4, 6], "let": 2, "": 2, "connect": 2, "prerequisit": 3, "via": 3, "python": 3, "packag": 3, "git": 3, "text": [4, 12], "recognit": [4, 7, 12], "main": 4, "model": [4, 7, 11, 12], "zoo": [4, 7, 12], "detect": [4, 7, 12], "support": [4, 5, 8], "dataset": [4, 5], "arg": [], "synthet": 5, "gener": [], "custom": [], "loader": [], "dataload": [], "vocab": 5, "return": [], "io": 6, "word": 6, "line": 6, "artefact": 6, "block": 6, "page": 6, "file": 6, "read": 6, "classif": 7, "factori": [], "transform": 8, "compos": 8, "util": 9, "visual": 9, "task": 9, "evalu": 9, "notebook": 10, "train": 11, "your": 11, "own": [], "load": 5, "aw": [], "lambda": [], "share": [], "commun": [], "from": [], "huggingfac": [], "hub": [], "push": [], "pretrain": [], "name": [], "convent": [], "choos": 12, "readi": [], "us": 11, "avail": [5, 12], "object": [], "data": 5, "prepar": 11, "infer": 11, "optim": [], "half": 11, "precis": 11, "export": [], "onnx": [], "right": 12, "architectur": 12, "predictor": 12, "end": 12, "ocr": 12, "two": 12, "stage": 12, "approach": 12, "what": 12, "should": 12, "i": 12, "do": 12, "output": 12, "advanc": [], "option": [], "get": [], "start": [], "conda": [], "pre": [], "process": [], "post": 11, "build": [], "implement": [], "content": [], "compress": 11, "savedmodel": 11, "note": [], "refer": [], "backbon": [], "tensorflow": 11, "lite": 11, "quantiz": 11, "public": 5, "privat": 5, "lint": 2, "import": 2, "order": 2, "annot": 2, "type": 2, "docstr": 2, "format": 2}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"Changelog": [[0, "changelog"]], "v0.5.0 (2021-12-31)": [[0, "v0-5-0-2021-12-31"]], "v0.4.1 (2021-11-22)": [[0, "v0-4-1-2021-11-22"]], "v0.4.0 (2021-10-01)": [[0, "v0-4-0-2021-10-01"]], "v0.3.1 (2021-08-27)": [[0, "v0-3-1-2021-08-27"]], "v0.3.0 (2021-07-02)": [[0, "v0-3-0-2021-07-02"]], "v0.2.1 (2021-05-28)": [[0, "v0-2-1-2021-05-28"]], "v0.2.0 (2021-05-11)": [[0, "v0-2-0-2021-05-11"]], "v0.1.1 (2021-03-18)": [[0, "v0-1-1-2021-03-18"]], "v0.1.0 (2021-03-05)": [[0, "v0-1-0-2021-03-05"]], "Contributor Covenant Code of Conduct": [[1, "contributor-covenant-code-of-conduct"]], "Our Pledge": [[1, "our-pledge"]], "Our Standards": [[1, "our-standards"]], "Enforcement Responsibilities": [[1, "enforcement-responsibilities"]], "Scope": [[1, "scope"]], "Enforcement": [[1, "enforcement"]], "Enforcement Guidelines": [[1, "enforcement-guidelines"]], "1. Correction": [[1, "correction"]], "2. Warning": [[1, "warning"]], "3. Temporary Ban": [[1, "temporary-ban"]], "4. Permanent Ban": [[1, "permanent-ban"]], "Attribution": [[1, "attribution"]], "Contributing to docTR": [[2, "contributing-to-doctr"]], "Codebase structure": [[2, "codebase-structure"]], "Continuous Integration": [[2, "continuous-integration"]], "Feedback": [[2, "feedback"]], "Feature requests & bug report": [[2, "feature-requests-bug-report"]], "Questions": [[2, "questions"]], "Developing docTR": [[2, "developing-doctr"]], "Developer mode installation": [[2, "developer-mode-installation"]], "Commits": [[2, "commits"]], "Unit tests": [[2, "unit-tests"]], "Code quality": [[2, "code-quality"]], "Lint verification": [[2, "lint-verification"]], "Import order": [[2, "import-order"]], "Annotation typing": [[2, "annotation-typing"]], "Docstring format": [[2, "docstring-format"]], "Modifying the documentation": [[2, "modifying-the-documentation"]], "Let\u2019s connect": [[2, "let-s-connect"]], "Installation": [[3, "installation"]], "Prerequisites": [[3, "prerequisites"]], "Via Python Package": [[3, "via-python-package"]], "Via Git": [[3, "via-git"]], "docTR: Document Text Recognition": [[4, "doctr-document-text-recognition"]], "Main Features": [[4, "main-features"]], "Model zoo": [[4, "model-zoo"]], "Text detection models": [[4, "text-detection-models"]], "Text recognition models": [[4, "text-recognition-models"]], "Supported datasets": [[4, "supported-datasets"]], "doctr.datasets": [[5, "doctr-datasets"]], "Available Datasets": [[5, "available-datasets"]], "Public datasets": [[5, "public-datasets"]], "docTR synthetic datasets": [[5, "doctr-synthetic-datasets"]], "docTR private datasets": [[5, "doctr-private-datasets"]], "Data Loading": [[5, "data-loading"]], "Supported Vocabs": [[5, "supported-vocabs"]], "docTR Vocabs": [[5, "id2"]], "doctr.io": [[6, "doctr-io"]], "Document structure": [[6, "document-structure"]], "Word": [[6, "word"]], "Line": [[6, "line"]], "Artefact": [[6, "artefact"]], "Block": [[6, "block"]], "Page": [[6, "page"]], "Document": [[6, "document"]], "File reading": [[6, "file-reading"]], "doctr.models": [[7, "doctr-models"]], "doctr.models.classification": [[7, "doctr-models-classification"]], "doctr.models.detection": [[7, "doctr-models-detection"]], "doctr.models.recognition": [[7, "doctr-models-recognition"]], "doctr.models.zoo": [[7, "doctr-models-zoo"]], "doctr.transforms": [[8, "doctr-transforms"]], "Supported transformations": [[8, "supported-transformations"]], "Composing transformations": [[8, "composing-transformations"]], "doctr.utils": [[9, "doctr-utils"]], "Visualization": [[9, "visualization"]], "Task evaluation": [[9, "task-evaluation"]], "docTR Notebooks": [[10, "doctr-notebooks"]], "Preparing your model for inference": [[11, "preparing-your-model-for-inference"]], "Model compression": [[11, "model-compression"]], "TensorFlow Lite": [[11, "tensorflow-lite"]], "Half-precision": [[11, "half-precision"]], "Post-training quantization": [[11, "post-training-quantization"]], "Using SavedModel": [[11, "using-savedmodel"]], "Choosing the right model": [[12, "choosing-the-right-model"]], "Text Detection": [[12, "text-detection"]], "Available architectures": [[12, "available-architectures"], [12, "id1"], [12, "id3"]], "Detection predictors": [[12, "detection-predictors"]], "Text Recognition": [[12, "text-recognition"]], "Text recognition model zoo": [[12, "id5"]], "Recognition predictors": [[12, "recognition-predictors"]], "End-to-End OCR": [[12, "end-to-end-ocr"]], "Two-stage approaches": [[12, "two-stage-approaches"]], "What should I do with the output?": [[12, "what-should-i-do-with-the-output"]]}, "indexentries": {"cord (class in doctr.datasets)": [[5, "doctr.datasets.CORD"]], "charactergenerator (class in doctr.datasets)": [[5, "doctr.datasets.CharacterGenerator"]], "dataloader (class in doctr.datasets.loader)": [[5, "doctr.datasets.loader.DataLoader"]], "detectiondataset (class in doctr.datasets)": [[5, "doctr.datasets.DetectionDataset"]], "docartefacts (class in doctr.datasets)": [[5, "doctr.datasets.DocArtefacts"]], "funsd (class in doctr.datasets)": [[5, "doctr.datasets.FUNSD"]], "ic03 (class in doctr.datasets)": [[5, "doctr.datasets.IC03"]], "ic13 (class in doctr.datasets)": [[5, "doctr.datasets.IC13"]], "iiit5k (class in doctr.datasets)": [[5, "doctr.datasets.IIIT5K"]], "imgur5k (class in doctr.datasets)": [[5, "doctr.datasets.IMGUR5K"]], "ocrdataset (class in doctr.datasets)": [[5, "doctr.datasets.OCRDataset"]], "recognitiondataset (class in doctr.datasets)": [[5, "doctr.datasets.RecognitionDataset"]], "sroie (class in doctr.datasets)": [[5, "doctr.datasets.SROIE"]], "svhn (class in doctr.datasets)": [[5, "doctr.datasets.SVHN"]], "svt (class in doctr.datasets)": [[5, "doctr.datasets.SVT"]], "synthtext (class in doctr.datasets)": [[5, "doctr.datasets.SynthText"]], "wordgenerator (class in doctr.datasets)": [[5, "doctr.datasets.WordGenerator"]], "encode_sequences() (in module doctr.datasets)": [[5, "doctr.datasets.encode_sequences"]], "artefact (class in doctr.io)": [[6, "doctr.io.Artefact"]], "block (class in doctr.io)": [[6, "doctr.io.Block"]], "document (class in doctr.io)": [[6, "doctr.io.Document"]], "documentfile (class in doctr.io)": [[6, "doctr.io.DocumentFile"]], "line (class in doctr.io)": [[6, "doctr.io.Line"]], "page (class in doctr.io)": [[6, "doctr.io.Page"]], "word (class in doctr.io)": [[6, "doctr.io.Word"]], "decode_img_as_tensor() (in module doctr.io)": [[6, "doctr.io.decode_img_as_tensor"]], "from_images() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_images"]], "from_pdf() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_pdf"]], "from_url() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_url"]], "read_html() (in module doctr.io)": [[6, "doctr.io.read_html"]], "read_img_as_numpy() (in module doctr.io)": [[6, "doctr.io.read_img_as_numpy"]], "read_img_as_tensor() (in module doctr.io)": [[6, "doctr.io.read_img_as_tensor"]], "read_pdf() (in module doctr.io)": [[6, "doctr.io.read_pdf"]], "show() (doctr.io.document method)": [[6, "doctr.io.Document.show"]], "show() (doctr.io.page method)": [[6, "doctr.io.Page.show"]], "crnn_mobilenet_v3_large() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_mobilenet_v3_large"]], "crnn_mobilenet_v3_small() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_mobilenet_v3_small"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_vgg16_bn"]], "crop_orientation_predictor() (in module doctr.models.classification)": [[7, "doctr.models.classification.crop_orientation_predictor"]], "db_mobilenet_v3_large() (in module doctr.models.detection)": [[7, "doctr.models.detection.db_mobilenet_v3_large"]], "db_resnet50() (in module doctr.models.detection)": [[7, "doctr.models.detection.db_resnet50"]], "detection_predictor() (in module doctr.models.detection)": [[7, "doctr.models.detection.detection_predictor"]], "linknet_resnet18() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet18"]], "linknet_resnet34() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet34"]], "linknet_resnet50() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet50"]], "magc_resnet31() (in module doctr.models.classification)": [[7, "doctr.models.classification.magc_resnet31"]], "master() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.master"]], "mobilenet_v3_large() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_large"]], "mobilenet_v3_large_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_large_r"]], "mobilenet_v3_small() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small"]], "mobilenet_v3_small_orientation() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small_orientation"]], "mobilenet_v3_small_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small_r"]], "ocr_predictor() (in module doctr.models)": [[7, "doctr.models.ocr_predictor"]], "recognition_predictor() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.recognition_predictor"]], "resnet18() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet18"]], "resnet31() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet31"]], "resnet34() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet34"]], "resnet50() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet50"]], "sar_resnet31() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.sar_resnet31"]], "vgg16_bn_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.vgg16_bn_r"]], "channelshuffle (class in doctr.transforms)": [[8, "doctr.transforms.ChannelShuffle"]], "colorinversion (class in doctr.transforms)": [[8, "doctr.transforms.ColorInversion"]], "compose (class in doctr.transforms)": [[8, "doctr.transforms.Compose"]], "gaussianblur (class in doctr.transforms)": [[8, "doctr.transforms.GaussianBlur"]], "gaussiannoise (class in doctr.transforms)": [[8, "doctr.transforms.GaussianNoise"]], "lambdatransformation (class in doctr.transforms)": [[8, "doctr.transforms.LambdaTransformation"]], "normalize (class in doctr.transforms)": [[8, "doctr.transforms.Normalize"]], "oneof (class in doctr.transforms)": [[8, "doctr.transforms.OneOf"]], "randomapply (class in doctr.transforms)": [[8, "doctr.transforms.RandomApply"]], "randombrightness (class in doctr.transforms)": [[8, "doctr.transforms.RandomBrightness"]], "randomcontrast (class in doctr.transforms)": [[8, "doctr.transforms.RandomContrast"]], "randomcrop (class in doctr.transforms)": [[8, "doctr.transforms.RandomCrop"]], "randomgamma (class in doctr.transforms)": [[8, "doctr.transforms.RandomGamma"]], "randomhorizontalflip (class in doctr.transforms)": [[8, "doctr.transforms.RandomHorizontalFlip"]], "randomhue (class in doctr.transforms)": [[8, "doctr.transforms.RandomHue"]], "randomjpegquality (class in doctr.transforms)": [[8, "doctr.transforms.RandomJpegQuality"]], "randomrotate (class in doctr.transforms)": [[8, "doctr.transforms.RandomRotate"]], "randomsaturation (class in doctr.transforms)": [[8, "doctr.transforms.RandomSaturation"]], "randomshadow (class in doctr.transforms)": [[8, "doctr.transforms.RandomShadow"]], "resize (class in doctr.transforms)": [[8, "doctr.transforms.Resize"]], "togray (class in doctr.transforms)": [[8, "doctr.transforms.ToGray"]], "detectionmetric (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.DetectionMetric"]], "localizationconfusion (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.OCRMetric"]], "textmatch (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.TextMatch"]], "summary() (doctr.utils.metrics.detectionmetric method)": [[9, "doctr.utils.metrics.DetectionMetric.summary"]], "summary() (doctr.utils.metrics.localizationconfusion method)": [[9, "doctr.utils.metrics.LocalizationConfusion.summary"]], "summary() (doctr.utils.metrics.ocrmetric method)": [[9, "doctr.utils.metrics.OCRMetric.summary"]], "summary() (doctr.utils.metrics.textmatch method)": [[9, "doctr.utils.metrics.TextMatch.summary"]], "synthesize_page() (in module doctr.utils.visualization)": [[9, "doctr.utils.visualization.synthesize_page"]], "update() (doctr.utils.metrics.detectionmetric method)": [[9, "doctr.utils.metrics.DetectionMetric.update"]], "update() (doctr.utils.metrics.localizationconfusion method)": [[9, "doctr.utils.metrics.LocalizationConfusion.update"]], "update() (doctr.utils.metrics.ocrmetric method)": [[9, "doctr.utils.metrics.OCRMetric.update"]], "update() (doctr.utils.metrics.textmatch method)": [[9, "doctr.utils.metrics.TextMatch.update"]], "visualize_page() (in module doctr.utils.visualization)": [[9, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file diff --git a/v0.6.0/searchindex.js b/v0.6.0/searchindex.js index 579ff1d28a..f27342e00e 100644 --- a/v0.6.0/searchindex.js +++ b/v0.6.0/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["changelog", "contributing/code_of_conduct", "contributing/contributing", "getting_started/installing", "index", "modules/datasets", "modules/io", "modules/models", "modules/transforms", "modules/utils", "notebooks", "using_doctr/running_on_aws", "using_doctr/sharing_models", "using_doctr/using_datasets", "using_doctr/using_model_export", "using_doctr/using_models"], "filenames": ["changelog.rst", "contributing/code_of_conduct.md", "contributing/contributing.md", "getting_started/installing.rst", "index.rst", "modules/datasets.rst", "modules/io.rst", "modules/models.rst", "modules/transforms.rst", "modules/utils.rst", "notebooks.rst", "using_doctr/running_on_aws.rst", "using_doctr/sharing_models.rst", "using_doctr/using_datasets.rst", "using_doctr/using_model_export.rst", "using_doctr/using_models.rst"], "titles": ["Changelog", "Contributor Covenant Code of Conduct", "Contributing to docTR", "Installation", "docTR: Document Text Recognition", "doctr.datasets", "doctr.io", "doctr.models", "doctr.transforms", "doctr.utils", "docTR Notebooks", "AWS Lambda", "Share your model with the community", "Choose a ready to use dataset", "Preparing your model for inference", "Choosing the right model"], "terms": {"releas": [0, 3], "note": [0, 5, 7, 12], "we": [1, 2, 3, 4, 6, 8, 12, 13, 15], "member": 1, "leader": 1, "make": [1, 2, 9, 11, 12, 14, 15], "particip": 1, "commun": 1, "harass": 1, "free": [1, 2, 12], "experi": 1, "everyon": 1, "regardless": 1, "ag": 1, "bodi": [1, 15], "size": [1, 5, 6, 8, 9, 15], "visibl": 1, "invis": 1, "disabl": [1, 11], "ethnic": 1, "sex": 1, "characterist": 1, "gender": 1, "ident": 1, "express": [1, 8], "level": [1, 5, 9, 15], "educ": 1, "socio": 1, "econom": 1, "statu": 1, "nation": 1, "person": [1, 13], "appear": 1, "race": 1, "religion": 1, "sexual": 1, "orient": [1, 6, 7, 15], "act": 1, "interact": [1, 6, 9], "wai": [1, 4, 13], "contribut": 1, "an": [1, 2, 4, 5, 6, 7, 9, 14, 15], "open": [1, 2, 12], "welcom": 1, "divers": 1, "inclus": 1, "healthi": 1, "exampl": [1, 2, 4, 5, 7, 12], "behavior": 1, "posit": [1, 9], "environ": 1, "includ": [1, 3, 5, 13], "demonstr": 1, "empathi": 1, "kind": [1, 15], "toward": [1, 3], "other": [1, 2], "peopl": 1, "Being": 1, "respect": 1, "differ": 1, "opinion": 1, "viewpoint": 1, "give": 1, "gracefulli": 1, "accept": 1, "construct": 1, "feedback": 1, "apolog": 1, "those": [1, 3, 6, 15], "affect": 1, "mistak": 1, "learn": [1, 4, 7, 15], "from": [1, 2, 4, 5, 6, 7, 8, 9, 10, 13, 14, 15], "focus": [1, 5], "what": 1, "i": [1, 2, 5, 6, 7, 8, 9, 11, 12, 13, 14], "best": 1, "just": [1, 14], "u": [1, 2, 15], "individu": 1, "overal": [1, 7], "unaccept": 1, "The": [1, 2, 5, 6, 9, 15], "us": [1, 2, 3, 5, 7, 9, 11, 12, 15], "languag": [1, 4, 5, 6, 7, 12, 15], "imageri": 1, "attent": [1, 7], "advanc": 1, "ani": [1, 5, 6, 7, 8, 9, 14, 15], "troll": 1, "insult": 1, "derogatori": 1, "comment": 1, "polit": 1, "attack": 1, "public": [1, 4], "privat": [1, 15], "publish": 1, "inform": [1, 2, 4, 5, 13, 15], "physic": [1, 6], "email": 1, "address": [1, 6], "without": [1, 5, 7], "explicit": 1, "permiss": 1, "which": [1, 7, 11, 13, 15], "could": 1, "reason": 1, "consid": [1, 2, 5, 6, 9, 15], "inappropri": 1, "profession": 1, "set": [1, 2, 5, 7, 9, 11, 15], "ar": [1, 2, 3, 5, 6, 8, 9, 10, 11, 13, 15], "clarifi": 1, "take": [1, 5, 14, 15], "appropri": [1, 2, 15], "fair": 1, "action": 1, "thei": [1, 9, 15], "deem": 1, "threaten": 1, "offens": 1, "harm": 1, "have": [1, 2, 9, 12, 13, 14, 15], "right": [1, 7, 9], "remov": 1, "edit": 1, "reject": 1, "commit": 1, "wiki": 1, "issu": [1, 2, 12], "align": [1, 6], "thi": [1, 2, 3, 5, 9, 11, 12, 13, 14, 15], "moder": 1, "decis": 1, "when": [1, 2, 7], "appli": [1, 5, 8], "within": 1, "all": [1, 2, 5, 6, 8, 9, 13, 15], "space": 1, "also": [1, 7, 12, 13, 15], "offici": 1, "repres": [1, 9, 15], "e": [1, 2, 3, 6, 7], "mail": 1, "post": [1, 15], "via": 1, "social": 1, "media": 1, "account": [1, 12, 14], "appoint": 1, "onlin": 1, "offlin": 1, "event": 1, "instanc": [1, 15], "abus": 1, "otherwis": [1, 6, 9], "mai": [1, 15], "report": 1, "contact": 1, "minde": [1, 3, 4, 7], "com": [1, 3, 6, 11, 12, 15], "complaint": 1, "review": 1, "investig": 1, "promptli": 1, "fairli": 1, "oblig": 1, "privaci": 1, "secur": [1, 11], "incid": 1, "follow": [1, 2, 3, 5, 8, 9, 12, 14, 15], "impact": 1, "determin": 1, "consequ": 1, "violat": 1, "unprofession": 1, "unwelcom": 1, "A": [1, 2, 4, 5, 6, 7, 10, 14], "written": [1, 6], "provid": [1, 2, 4, 12, 13, 14, 15], "clariti": 1, "around": 1, "natur": [1, 4, 5], "explan": [1, 15], "why": 1, "wa": 1, "apologi": 1, "request": [1, 12], "through": [1, 8, 13], "singl": [1, 2, 4, 5], "seri": 1, "continu": 1, "No": [1, 15], "involv": [1, 15], "unsolicit": 1, "specifi": [1, 5, 6], "period": 1, "time": [1, 4, 7, 9, 13], "avoid": [1, 3], "well": [1, 14], "extern": [1, 13], "channel": [1, 2, 6, 8], "like": 1, "term": 1, "lead": 1, "seriou": 1, "sustain": 1, "sort": 1, "allow": [1, 11], "dure": 1, "pattern": 1, "aggress": 1, "disparag": 1, "class": [1, 5, 6, 8, 9, 15], "adapt": 1, "version": [1, 2, 3, 14, 15], "0": [1, 3, 5, 8, 9, 13, 15], "avail": [1, 4, 8], "http": [1, 3, 6, 7, 11, 12, 15], "www": [1, 6, 15], "org": [1, 7, 15], "_": [1, 5, 7, 14], "html": [1, 2, 15], "were": [1, 6, 15], "inspir": [1, 8], "mozilla": 1, "": [1, 6, 7, 9, 11, 12], "ladder": 1, "For": [1, 2, 3, 15], "answer": 1, "common": [1, 2, 8, 9], "question": 1, "about": [1, 11, 13, 15], "see": [1, 2], "faq": 1, "translat": 1, "everyth": [2, 15], "you": [2, 3, 5, 6, 7, 11, 12, 13, 14, 15], "need": [2, 3, 5, 9, 11, 12], "know": 2, "effici": [2, 4, 5, 7], "project": [2, 13], "packag": [2, 4, 9, 11, 13, 14], "python": 2, "doc": [2, 6, 15], "librari": [2, 3, 10], "build": [2, 3], "script": [2, 13], "refer": [2, 3, 12, 13, 15], "train": [2, 5, 7, 8, 12, 13, 15], "demo": [2, 4], "small": [2, 7], "app": 2, "showcas": 2, "capabl": [2, 10, 15], "api": [2, 4], "minim": [2, 4], "templat": [2, 4], "deploi": 2, "rest": [2, 8, 9], "ensur": 2, "proper": 2, "mainten": 2, "github": [2, 3, 12], "worklow": 2, "run": [2, 3, 7], "job": 2, "coverag": 2, "codecov": 2, "back": 2, "result": [2, 5, 6, 10, 12, 15], "As": 2, "contributor": 2, "onli": [2, 7, 8, 9, 12, 13, 15], "your": [2, 4, 6, 9, 15], "ad": [2, 7, 8], "whether": [2, 5, 6, 8, 9, 13], "encount": 2, "problem": 2, "suggest": [2, 12], "input": [2, 6, 7, 8, 15], "ha": [2, 5, 9, 13], "valu": [2, 6, 8, 15], "can": [2, 3, 11, 12, 13, 14, 15], "purpos": 2, "advis": 2, "first": 2, "check": [2, 12, 15], "topic": 2, "wasn": 2, "t": [2, 5, 15], "alreadi": 2, "cover": 2, "close": 2, "If": [2, 3, 6, 7, 14, 15], "feel": [2, 12], "new": [2, 9], "one": [2, 5, 7, 8, 12, 15], "do": [2, 3, 7, 11, 14], "so": [2, 3, 5, 7, 12, 13], "whenev": 2, "possibl": [2, 9, 12], "enough": [2, 15], "jump": 2, "wonder": 2, "how": [2, 12, 13], "someth": 2, "more": [2, 9, 11, 13, 15], "gener": [2, 7], "should": [2, 5, 6, 8, 9], "out": [2, 7, 8, 9, 15], "discuss": 2, "q": 2, "forum": 2, "specif": [2, 3, 9, 13, 15], "stackoverflow": 2, "addit": [2, 3, 6], "depend": [2, 3, 4], "command": 2, "m": [9, 15], "pip": [2, 3], "upgrad": [], "dev": [2, 11], "pre": 7, "docstr": [], "In": [2, 5, 13], "pleas": 2, "googl": 2, "eas": 2, "process": [2, 4, 6, 15], "later": 2, "messag": 2, "udac": 2, "guid": 2, "order": [5, 6, 8], "same": [2, 6, 9, 13, 15], "ci": 2, "workflow": 2, "unittest": 2, "local": [2, 4, 5, 7, 9, 13, 15], "To": [2, 3, 12, 15], "togeth": [2, 6], "current": 15, "built": [], "sphinx": [], "thank": [], "our": [7, 15], "file": [2, 5], "been": [9, 13, 15], "rebuilt": [], "want": [14, 15], "forc": [], "complet": [], "rebuild": [], "delet": [], "_build": 2, "directori": 11, "addition": [2, 14, 15], "clear": [], "web": 6, "browser": [2, 4], "cach": [5, 11], "modif": 2, "now": 2, "locat": [2, 6], "index": [2, 6], "wish": [2, 14], "somewher": 2, "els": 2, "than": [2, 3, 9, 12], "join": 2, "slack": 2, "where": [2, 6, 8, 9, 15], "find": [2, 3, 13], "requir": [3, 8], "3": [3, 4, 6, 7, 8, 9, 14, 15], "8": [7, 8, 15], "higher": [3, 5], "whichev": 3, "o": 3, "least": 3, "tensorflow": [3, 4, 6, 7, 8, 12, 15], "pytorch": [3, 4, 7, 8, 12, 15], "correspond": [3, 6, 15], "page": [3, 5, 7, 9, 15], "2": [3, 4, 5, 6, 8, 11, 14, 15], "macbook": 3, "m1": 3, "chip": 3, "some": [2, 3, 10, 12, 13], "metal": 3, "plugin": 3, "1": [3, 5, 6, 7, 8, 9, 11, 13, 14, 15], "12": [3, 15], "anoth": [3, 7, 13], "linux": 3, "few": [3, 14], "extra": 3, "maco": 3, "user": [3, 4, 6, 10], "them": [3, 5, 15], "homebrew": 3, "brew": 3, "cairo": 3, "pango": 3, "gdk": 3, "pixbuf": 3, "libffi": 3, "window": [3, 7, 9], "gtk": 3, "latest": [3, 15], "over": [3, 5, 9, 15], "here": [3, 8, 10, 13, 15], "last": [3, 5], "stabl": 3, "doctr": [3, 11, 12, 13, 14, 15], "strive": 3, "reduc": [3, 8], "framework": [3, 12, 13, 15], "minimum": [3, 5, 8, 9], "necessari": 3, "featur": [3, 7, 9, 10], "develop": 3, "third": 3, "parti": 3, "miss": 3, "tf": [3, 6, 7, 8, 12, 14], "torch": [3, 8, 12], "mode": 3, "clone": 3, "state": [4, 9], "art": 4, "optic": [4, 15], "charact": [4, 5, 6, 9, 13, 15], "made": 4, "seamless": 4, "access": [4, 6, 13, 15], "anyon": 4, "power": 4, "easi": [4, 9, 12], "extract": [4, 5], "valuabl": 4, "autom": 4, "seamlessli": [4, 15], "understand": [4, 5, 15], "task": [4, 5, 7, 12, 13, 15], "ocr": [4, 5, 7, 9, 12, 13], "predictor": [4, 6, 7, 12], "pars": [4, 5], "textual": [4, 5, 6, 7, 15], "identifi": 4, "each": [4, 5, 6, 7, 8, 9, 13, 15], "word": [4, 5, 7, 9, 15], "research": 4, "quickli": 4, "compar": 4, "own": 4, "architectur": [4, 7, 12], "speed": [4, 7], "perform": [4, 6, 8, 9, 14, 15], "robust": [4, 5], "stage": 4, "pretrain": [4, 7, 9, 14, 15], "paramet": [4, 5, 6, 7, 8, 9], "friendli": 4, "line": [4, 9, 15], "code": [4, 6], "load": [4, 5, 7, 14], "googlevis": 4, "aw": [4, 15], "textract": [4, 15], "optim": [4, 14], "infer": [4, 7, 8], "both": [4, 5, 8, 13, 15], "cpu": [4, 15], "gpu": 4, "light": 4, "activ": 4, "maintain": 4, "integr": [4, 12, 13], "deploy": 4, "dbnet": [4, 7], "real": [4, 7, 8], "scene": [4, 5, 7], "differenti": [4, 7], "binar": [4, 7], "linknet": [4, 7], "exploit": [4, 7], "encod": [4, 5, 6, 7, 15], "represent": [4, 7], "semant": [4, 7], "segment": [4, 7, 15], "sar": [4, 7], "show": [4, 6, 7, 9, 12], "attend": [4, 7], "read": [2, 4, 5, 7, 11], "simpl": [4, 7], "strong": [4, 7], "baselin": [4, 7, 15], "irregular": [4, 7, 13], "crnn": [4, 7, 12], "end": [4, 5, 7, 9], "trainabl": [4, 7], "neural": [4, 5, 7], "network": [4, 5, 7], "imag": [4, 5, 6, 7, 8, 9, 12, 13, 15], "base": [4, 7], "sequenc": [4, 5, 6, 7, 9, 15], "Its": [4, 7], "applic": [4, 7], "master": [4, 7, 15], "multi": [4, 7], "aspect": [4, 7, 8, 15], "non": [4, 5, 6, 7, 8, 9], "vitstr": [4, 7], "vision": [4, 5, 7], "transform": [4, 5, 7], "fast": [4, 5, 7], "parseq": 7, "permut": [], "autoregress": [], "funsd": [4, 5, 13, 15], "form": [4, 5, 15], "noisi": [4, 5], "scan": [4, 5], "cord": [4, 5, 13, 15], "consolid": [4, 5], "receipt": [4, 5, 15], "forpost": [4, 5], "sroie": [4, 5, 13], "icdar": [4, 5], "2019": 4, "iiit": [4, 5], "5k": [4, 5], "cvit": 4, "street": [4, 5], "view": [4, 5], "synthtext": [4, 5, 13], "visual": 4, "geometri": [4, 6, 15], "group": 4, "svhn": [4, 5, 13], "digit": [4, 5, 13], "unsupervis": 4, "ic03": [4, 5, 13], "2003": [4, 5], "ic13": [4, 5, 13], "2013": [4, 5], "imgur5k": [4, 5, 13], "textstylebrush": [4, 5], "transfer": [4, 5], "aesthet": [4, 5], "mjsynth": [4, 5, 13], "synthet": 4, "data": [4, 5, 6, 8, 9, 12], "artifici": [4, 5], "iiithw": [], "wildreceipt": [], "spatial": [6, 9], "dual": [], "modal": [], "graph": 6, "kei": [], "bool": [5, 6, 7, 8, 9], "true": [5, 6, 7, 8, 9, 11, 12, 13, 14, 15], "use_polygon": [5, 9, 13], "fals": [5, 6, 7, 8, 9, 14, 15], "recognition_task": [5, 13], "kwarg": [5, 6, 7, 9], "sourc": [5, 6, 7, 8, 9, 12], "document": [5, 7, 9, 10, 13, 15], "import": [5, 6, 7, 8, 9, 12, 13, 14, 15], "train_set": [5, 13], "download": [5, 13], "img": [5, 8, 13], "target": [5, 6, 8, 9, 13], "subset": [5, 15], "polygon": [5, 15], "rotat": [5, 6, 7, 8, 9, 13, 15], "bound": [5, 6, 7, 8, 9, 15], "box": [5, 6, 7, 8, 9, 13, 15], "instead": [5, 6, 7], "straight": [5, 7, 13, 15], "ones": [5, 8, 9], "recognit": [5, 9], "keyword": [5, 7], "argument": [5, 7, 15], "visiondataset": 5, "icdar2019": 5, "competit": 5, "iiit5k": [5, 13], "bmvc": 5, "2012": 5, "text": [5, 6, 7, 9, 13], "prior": 5, "svt": [5, 13], "ucsd": 5, "comput": [5, 9, 15], "hous": 5, "number": [5, 8, 9, 15], "localis": 5, "repositori": [2, 5, 7, 12], "websit": 5, "entri": 5, "futur": 5, "direct": 5, "img_fold": [5, 13], "str": [5, 6, 7, 8, 9], "label_fold": 5, "label": [5, 8, 9, 13], "part": [5, 8, 15], "challeng": 5, "task2": 5, "2015": 5, "path": [5, 6, 13, 14], "challenge2_training_task12_imag": 5, "challenge2_training_task1_gt": 5, "test_set": 5, "challenge2_test_task12_imag": 5, "challenge2_test_task1_gt": 5, "folder": [2, 5, 14], "annot": 5, "abstractdataset": 5, "label_path": [5, 13], "handwrit": 5, "dataset_info": 5, "imgur5k_annot": 5, "json": [5, 13, 15], "pure": 5, "mnt": 5, "ramdisk": 5, "max": [5, 8, 9], "90kdict32px": 5, "imlist": 5, "txt": 5, "hw": [], "images_90k_norm": [], "90k": [], "docartefact": [5, 13], "object": [5, 9, 10, 15], "detect": [5, 9, 10], "element": [5, 6, 7, 9, 15], "varieti": 5, "arxiv": 7, "ab": [], "2103": [], "14470v1": [], "test": 13, "charactergener": [5, 13], "implement": [5, 6, 8, 9, 14, 15], "d": [5, 13], "abdef": [5, 13], "num_sampl": [5, 13], "100": [5, 8, 9, 13, 14, 15], "vocabulari": [5, 12], "sampl": [5, 13, 15], "iter": [5, 8, 13], "cache_sampl": 5, "firsthand": 5, "font_famili": [5, 9], "font": [5, 9], "img_transform": 5, "compos": [5, 15], "sample_transform": 5, "wordgener": [5, 13], "min_char": [5, 13], "int": [5, 6, 8, 9], "max_char": [5, 13], "list": [5, 6, 8, 9, 12], "none": [5, 6, 7, 8, 9, 15], "callabl": [5, 8], "tupl": [5, 6, 8, 9], "32": [5, 7, 8, 13, 14, 15], "maximum": [5, 8], "detectiondataset": [5, 13], "recognitiondataset": [5, 13], "labels_path": [5, 13], "contain": [5, 13, 15], "ocrdataset": [5, 13], "label_fil": [5, 13], "jpg": [5, 6, 12], "root": [2, 5], "shuffl": [5, 8], "batch_siz": [5, 13], "drop_last": 5, "num_work": 5, "collate_fn": 5, "wrapper": [5, 8], "train_load": [5, 13], "train_it": [5, 13], "next": [5, 13], "befor": [5, 7, 8, 15], "pass": [5, 6, 7, 15], "batch": [5, 7, 8, 13, 15], "drop": 5, "isn": 5, "full": [5, 9, 15], "worker": 5, "function": [5, 8, 9], "merg": 5, "sinc": [5, 13, 15], "content": [5, 6, 9, 15], "properli": 5, "model": [5, 9, 11, 13], "interpret": [5, 6], "multipl": [5, 6, 8], "name": [5, 7, 15], "10": [5, 9, 15], "0123456789": 5, "hindi_digit": 5, "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": 5, "ascii_lett": 5, "52": [5, 15], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 5, "punctuat": 5, "currenc": 5, "5": [5, 8, 9, 15], "ancient_greek": 5, "48": [5, 15], "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": 5, "arabic_lett": 5, "37": [5, 15], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": 5, "persian_lett": 5, "\u067e\u0686\u06a2\u06a4\u06af": 5, "arabic_diacrit": 5, "arabic_punctu": 5, "latin": 5, "94": [5, 15], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 5, "english": [5, 13], "legacy_french": 5, "123": 5, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": 5, "french": [5, 12, 15], "126": 5, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": 5, "portugues": 5, "131": 5, "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": 5, "spanish": 5, "116": 5, "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": 5, "italian": [], "120": [], "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": [], "german": 5, "108": 5, "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": 5, "arab": 5, "101": 5, "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": 5, "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": 5, "czech": 5, "130": 5, "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": 5, "polish": [], "118": [], "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": [], "dutch": [], "114": [], "norwegian": [], "106": [], "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": [], "danish": [], "finnish": [], "104": [], "\u00e4\u00f6\u00e4\u00f6": [], "swedish": [], "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": [], "vietnames": 5, "234": 5, "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": 5, "hebrew": [], "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": [], "multilingu": [], "195": [], "encode_sequ": 5, "target_s": 5, "eo": 5, "pad": [5, 7, 8, 15], "dynamic_seq_length": 5, "ndarrai": [5, 6, 8, 9], "given": [5, 6, 8, 9, 15], "map": 5, "n": [5, 9], "length": 5, "Of": 5, "string": [5, 6, 9, 15], "option": 5, "start": 5, "case": [5, 9, 15], "upper": [5, 8], "enabl": [5, 6], "dynam": 5, "tensor": [5, 6, 8, 15], "modul": [6, 8, 9, 15], "easili": [6, 9, 12, 13, 14, 15], "export": [6, 7, 9, 10, 14, 15], "analysi": 6, "format": [6, 9, 13, 14, 15], "organ": 6, "uninterrupt": [6, 15], "confid": [6, 9, 15], "float": [6, 8, 9, 14], "associ": 6, "predict": [6, 7, 9], "xmin": 6, "ymin": 6, "xmax": 6, "ymax": 6, "coordin": [6, 15], "rel": [6, 8, 9], "collect": 6, "meant": [6, 14], "two": [6, 11], "column": 6, "horizont": [6, 8], "resolv": 6, "default": [6, 9, 11, 14], "smallest": 6, "enclos": 6, "g": [6, 7], "qr": 6, "pictur": 6, "chart": 6, "signatur": 6, "logo": [6, 13], "etc": 6, "artefact_typ": 6, "type": [6, 12, 15], "sever": [6, 8, 15], "its": [6, 7, 8, 9, 13, 15], "titl": [6, 15], "underneath": 6, "page_idx": [6, 15], "dimens": [6, 9, 15], "dict": [6, 9, 15], "numpi": [6, 7, 9, 15], "arrai": [6, 8, 9], "uint8": [6, 7, 9, 15], "raw": [6, 9], "pixel": [6, 8, 15], "height": 6, "width": 6, "dictionari": [6, 9], "angl": [6, 8], "degress": 6, "preserve_aspect_ratio": [6, 7, 8, 15], "overlai": 6, "displai": [6, 9], "matplotlib": 9, "pyplot": 9, "method": [8, 15], "high": 6, "convers": 6, "read_pdf": 6, "byte": [6, 15], "scale": [6, 7, 8, 9], "rgb_mode": 6, "password": 6, "pdf": [6, 7, 10], "convert": [6, 8, 14], "render": 6, "72dpi": 6, "output": [6, 8], "rgb": [6, 8], "bgr": 6, "unlock": 6, "encrypt": 6, "pypdfium2": 6, "pdfpage": [], "decod": 6, "shape": [6, 7, 8, 9, 14, 15], "h": [6, 7, 8], "x": [6, 8, 9], "w": [6, 7, 8, 9], "c": [6, 9], "read_img_as_numpi": 6, "output_s": [6, 8], "rgb_output": 6, "expect": [2, 6, 8, 9], "read_img_as_tensor": 6, "img_path": 6, "dtype": [6, 7, 8, 9, 14], "float32": [6, 7, 8, 14], "desir": 6, "relat": [2, 6], "divid": 6, "255": [6, 7, 8, 9, 15], "decode_img_as_tensor": 6, "img_cont": 6, "stream": 6, "read_html": 6, "url": 6, "yoursit": 6, "weasyprint": [], "documentfil": [6, 12], "extens": 6, "classmethod": 6, "from_pdf": 6, "binari": [6, 15], "from_url": 6, "from_imag": [6, 12], "page1": 6, "png": 6, "page2": 6, "vgg16_bn_r": 7, "vgg": 7, "16": 7, "describ": [7, 9], "veri": 7, "deep": [7, 15], "convolut": 7, "larg": [7, 12], "modifi": 7, "normal": [7, 8], "rectangular": 7, "pool": 7, "simpler": 7, "head": [7, 15], "input_tensor": 7, "random": [7, 8, 9, 14, 15], "uniform": [7, 8, 14], "512": 7, "maxval": [7, 8, 14], "imagenet": 7, "extractor": 7, "resnet18": [7, 12], "resnet": 7, "18": [7, 15], "residu": 7, "boolean": [7, 15], "resnet34": 7, "34": [7, 15], "resnet50": [7, 12], "50": [7, 13, 15], "resnet31": 7, "downsiz": 7, "4": [7, 8, 9, 15], "mobilenet_v3_smal": 7, "mobilenetv3": 7, "search": 7, "kera": [7, 14], "mobilenet_v3_larg": 7, "mobilenet_v3_small_r": 7, "mobilenet_v3_large_r": 7, "mobilenet_v3_small_orient": 7, "magc_resnet31": 7, "global": 7, "context": 7, "224": [7, 8, 14], "vit_": 7, "visiontransform": 7, "worth": 7, "16x16": 7, "patch": 7, "unoffici": 7, "config": [2, 7], "vit_b": 7, "b": [7, 9], "textnet_tini": [], "textnet": [], "faster": [], "arbitrarili": [], "detector": [], "minimalist": [], "kernel": 8, "czczup": [], "tini": [], "textnet_smal": [], "textnet_bas": [], "crop_orientation_predictor": 7, "arch": [7, 12], "croporientationpredictor": 7, "np": [7, 8, 9, 14, 15], "classif_mobilenet_v3_smal": 7, "input_crop": 7, "rand": [7, 8, 9, 14, 15], "600": [7, 9, 15], "800": [7, 9, 13, 15], "astyp": [7, 9, 14, 15], "crop": [7, 8, 13, 15], "dataset": [7, 15], "linknet_resnet18": [7, 15], "1024": [7, 9, 14, 15], "linknet_resnet34": 7, "linknet_resnet50": 7, "db_resnet50": [7, 12, 14, 15], "backbon": 7, "db_mobilenet_v3_larg": [7, 12, 15], "mobilenet": [7, 12], "v3": [7, 12], "detection_predictor": [7, 15], "assume_straight_pag": [7, 15], "detectionpredictor": 7, "input_pag": [7, 9, 15], "itself": [7, 12], "fit": [7, 15], "crnn_vgg16_bn": [7, 12, 15], "128": [7, 15], "crnn_mobilenet_v3_smal": [7, 15], "crnn_mobilenet_v3_larg": [7, 12, 15], "sar_resnet31": [7, 15], "31": [7, 15], "64": [7, 8, 15], "256": 7, "paper": 7, "1910": 7, "02562": 7, "keywoard": [], "vitstr_smal": 7, "vitstr_bas": 7, "recognition_predictor": [7, 15], "recognitionpredictor": 7, "ocr_predictor": [7, 12, 15], "det_arch": [7, 12], "reco_arch": [7, 12], "pretrained_backbon": 7, "symmetric_pad": [7, 8, 15], "export_as_straight_box": [7, 15], "detect_orient": 7, "straighten_pag": [], "detect_languag": 7, "ocrpredictor": 7, "up": [7, 15], "assum": 7, "preserv": [7, 8, 15], "ratio": [7, 8, 15], "symmetr": [7, 8, 15], "bottom": [7, 15], "final": [7, 14], "potenti": 7, "estim": 7, "slightli": 7, "deterior": 7, "latenc": 7, "median": [], "Then": [], "again": [], "improv": [], "kie_predictor": [], "kiepredictor": [], "kie": [], "login_to_hub": [7, 12], "login": 7, "huggingfac": 7, "hub": 7, "from_hub": [7, 12], "repo_id": [7, 12], "instanti": [7, 15], "hf": 7, "fasterrcnn_mobilenet_v3_large_fpn": 7, "repo": 7, "hf_hub_download": 7, "snapshot_download": 7, "checkpoint": 7, "push_to_hf_hub": [7, 12], "model_nam": [7, 12], "save": [7, 13, 14], "configur": [2, 7], "my": 7, "procedur": 8, "draw": [8, 9], "design": 8, "torchvis": 8, "resiz": [8, 15], "bilinear": 8, "transfo": 8, "minval": 8, "interpol": 8, "zero": [8, 9], "while": [8, 15], "done": 8, "mean": [8, 9], "std": 8, "gaussian": 8, "distribut": 8, "485": 8, "456": 8, "406": 8, "229": [8, 13], "225": 8, "averag": [8, 15], "per": [8, 15], "standard": 8, "deviat": 8, "lambdatransform": 8, "fn": 8, "lambda": 8, "tograi": 8, "num_output_channel": 8, "grayscal": 8, "colorinvers": 8, "min_val": 8, "tranform": 8, "color": [8, 9], "shift": 8, "randomli": 8, "invert": 8, "6": [3, 8, 15], "rang": [8, 14], "randombright": 8, "max_delta": 8, "adjust": [2, 8], "bright": 8, "delta": 8, "offset": 8, "add": [8, 9, 12], "pick": 8, "p": [8, 9, 15], "probabl": 8, "randomcontrast": 8, "contrast": 8, "contrast_factor": 8, "factor": 8, "randomsatur": 8, "satur": 8, "hsv": 8, "increas": 8, "randomhu": 8, "hue": 8, "randomgamma": 8, "min_gamma": 8, "max_gamma": 8, "min_gain": 8, "max_gain": 8, "gamma": 8, "correct": 8, "neg": 8, "lower": [8, 9], "param": [8, 15], "constant": 8, "multipli": 8, "randomjpegqu": 8, "min_qual": 8, "60": 8, "max_qual": 8, "jpeg": 8, "qualiti": 8, "dimension": 8, "between": [8, 9], "randomrot": 8, "max_angl": 8, "expand": 8, "degre": 8, "uniformli": 8, "randomcrop": 8, "08": [8, 15], "75": [8, 15], "33": 8, "min_area": 8, "max_area": 8, "min_ratio": 8, "max_ratio": 8, "gaussianblur": 8, "kernel_shap": 8, "blur": 8, "min": 8, "channelshuffl": 8, "gaussiannois": 8, "nois": 8, "randomhorizontalflip": 8, "flip": 8, "int64": [8, 9], "randomshadow": 8, "opacity_rang": 8, "shade": 8, "opac": 8, "It": [8, 12], "consecut": [8, 15], "sequenti": [8, 14], "oneof": 8, "jpegqual": 8, "randomappli": 8, "regroup": 9, "core": 9, "complementari": 9, "sens": 9, "visualize_pag": 9, "words_onli": 9, "display_artefact": 9, "add_label": 9, "figur": 9, "block": [9, 15], "plt": 9, "ocr_db_crnn": 9, "artefact": [9, 10, 15], "figsiz": 9, "largest": 9, "side": 9, "plot": 9, "static": 9, "top": [9, 15], "synthesize_pag": 9, "draw_proba": 9, "respons": 9, "blank": 9, "blue": 9, "red": 9, "font_siz": 9, "13": [9, 15], "famili": 9, "synthes": 9, "metric": [9, 15], "assess": 9, "textmatch": 9, "match": [9, 15], "accuraci": 9, "aggreg": [9, 13], "foral": 9, "y": 9, "mathcal": 9, "frac": 9, "sum": 9, "limits_": 9, "f_": 9, "y_i": 9, "x_i": 9, "indic": 9, "defin": 9, "f_a": 9, "left": [9, 15], "begin": 9, "ll": 9, "mbox": 9, "strictli": 9, "integ": [9, 14], "updat": 9, "hello": [9, 15], "world": [9, 15], "summari": 9, "gt": 9, "pred": 9, "groung": 9, "truth": 9, "exact": [9, 15], "score": 9, "counterpart": 9, "unidecod": 9, "localizationconfus": 9, "iou_thresh": 9, "mask_shap": 9, "use_broadcast": 9, "confus": 9, "iou": 9, "recal": [9, 15], "g_": 9, "precis": [9, 15], "meaniou": 9, "j": 9, "y_j": 9, "being": [9, 15], "intersect": 9, "union": 9, "g_x": 9, "assign": 9, "_i": 9, "geq": 9, "ground": 9, "asarrai": 9, "70": [9, 15], "110": 9, "95": [9, 15], "200": 9, "150": [9, 15], "pair": 9, "broadcast": 9, "consum": 9, "memori": [9, 11], "either": [9, 15], "ocrmetr": 9, "l": 9, "hat": 9, "h_": 9, "b_j": 9, "l_j": 9, "gt_box": 9, "pred_box": 9, "gt_label": 9, "pred_label": 9, "comparison": [9, 15], "detectionmetr": 9, "c_j": 9, "compil": [10, 15], "better": [10, 15], "leverag": 10, "descript": 10, "colab": 10, "quicktour": 10, "present": 10, "main": 10, "produc": [10, 15], "searchabl": 10, "don": 15, "meet": [], "detail": 15, "link": [], "section": [12, 14, 15], "det_model": 12, "load_weight": [], "path_to_checkpoint": [], "weight": [], "reco_model": 12, "det_param": [], "path_to_pt": [], "map_loc": [], "load_state_dict": [], "reco_param": [], "vocab": [12, 13, 15], "class_nam": [], "total": [], "date": 15, "preprocessor": 15, "det_predictor": [], "798": [], "785": [], "772": [], "264": [], "2749": [], "287": [], "reco_predictor": [], "694": [], "695": [], "693": [], "299": [], "296": [], "301": [], "polici": 11, "restrict": [], "write": 11, "outsid": 11, "tmp": 11, "work": [11, 15], "step": [], "usag": 11, "multiprocess": 11, "doctr_multiprocessing_dis": 11, "variabl": 11, "becaus": 11, "shm": 11, "share": [11, 13, 15], "chang": 11, "By": 11, "doctr_cache_dir": 11, "focu": 12, "love": 12, "appreci": 12, "interfac": 12, "io": 12, "custom": 12, "felix92": 12, "db": 12, "vgg16": 12, "bn": 12, "plug": 12, "obj_detect": 12, "exist": 12, "overwritten": 12, "prerequisit": 12, "creat": 12, "co": 12, "instal": 12, "git": 12, "lf": 12, "my_awesome_model": 12, "v1": 12, "directli": [12, 15], "after": [2, 12, 15], "python3": 12, "train_tensorflow": 12, "py": [2, 12], "train_pytorch": 12, "tabl": 12, "pull": 12, "dummi": 12, "tilman": [], "rassi": [], "fascan": [], "evalu": [13, 15], "predefin": 13, "prefer": 13, "signific": 13, "valid": 13, "149": 13, "626": 13, "360": 13, "2000": 13, "3000": 13, "249": 13, "33402": 13, "13068": 13, "772875": 13, "85875": 13, "246": 13, "233": 13, "resourc": [13, 14], "7149": 13, "796": 13, "handwritten": 13, "1268": [], "472": [], "21888": 13, "8707": 13, "33608": 13, "19342": 13, "uppercas": 13, "19370": 13, "2186": 13, "257": 13, "647": 13, "73257": 13, "26032": 13, "7100000": 13, "707470": 13, "1156": 13, "1107": 13, "849": 13, "1095": 13, "207901": 13, "22672": 13, "7581382": 13, "1337891": 13, "7141797": [], "793533": [], "49377": [], "19598": [], "alwai": 13, "regular": 13, "2700": 13, "300": 13, "background": 13, "qr_code": 13, "bar_cod": 13, "photo": 13, "classif": 13, "mani": [13, 15], "sensit": [13, 15], "abl": [13, 15], "howev": 13, "guidanc": 13, "tool": 13, "further": 13, "anot": 13, "handl": 13, "underli": 13, "defer": 13, "dataload": 13, "good": 14, "achiev": 14, "might": [14, 15], "tune": 14, "thing": [11, 14, 15], "product": 14, "readi": 14, "help": 14, "support": 15, "devic": [], "fp16": [], "point": [], "occupi": [], "bit": [], "advantag": [], "less": [], "mixed_precis": [], "set_global_polici": [], "mixed_float16": [], "cuda": [], "re": [], "exchang": [], "interoper": [], "machin": [], "structur": 15, "layer": [], "metadata": [], "util": 14, "export_model_to_onnx": [], "input_shap": 14, "dummy_input": [], "tensorspec": [], "model_path": [], "come": [], "soon": [], "seen": 15, "onc": 15, "separ": 15, "compon": 15, "charg": 15, "usabl": 15, "backend": 15, "along": 15, "processor": 15, "reusabl": 15, "consist": 15, "delimit": 15, "2d": 15, "corner": 15, "flag": 15, "belong": 15, "skew": 15, "comprehens": 15, "benchmark": 15, "publicli": 15, "sec": [], "25": 15, "84": 15, "39": 15, "85": 15, "86": 15, "93": 15, "83": 15, "24": [], "80": 15, "29": 15, "90": 15, "67": 15, "76": 15, "11": 15, "81": 15, "71": 15, "7": 15, "21": 15, "82": 15, "20": 15, "49": 15, "87": 15, "63": 15, "17": [], "28": [], "51": 15, "46": 15, "db_resnet34": [], "22": [], "89": 15, "74": 15, "56": 15, "68": 15, "92": 15, "61": 15, "41": 15, "00": 15, "79": 15, "38": 15, "88": [], "62": 15, "26": [], "06": 15, "78": 15, "47": 15, "54": [], "abov": 15, "cf": 15, "disclaim": 15, "combin": 15, "199": 15, "second": 15, "warmup": 15, "phase": 15, "measur": 15, "1000": 15, "obtain": 15, "11th": [], "gen": [], "intel": [], "r": [], "tm": [], "i7": [], "11800h": [], "30ghz": [], "wrap": 15, "useabl": 15, "favorit": 15, "dummy_img": 15, "area": 15, "send": 15, "snippet": 15, "transcrib": 15, "partial": [], "15": 15, "9": [], "73": 15, "44": [], "14": 15, "55": [], "58": [], "57": 15, "66": 15, "01": 15, "98": 15, "23": [], "69": 15, "99": 15, "91": 15, "05": [], "09": [], "96": 15, "40": [], "53": 15, "most": 15, "print": 15, "cfg": 15, "30595": 15, "45": 15, "72": 15, "43": 15, "65": 15, "77": 15, "30": 15, "07": [], "27": 15, "gvision": 15, "59": 15, "03": 15, "azur": [], "recogn": [], "42": 15, "go": 15, "mention": 15, "still": 15, "return": [5, 6, 7, 9, 15], "nest": 15, "get": 15, "typic": 15, "layout": 15, "340": 15, "json_output": 15, "1357421875": 15, "0361328125": 15, "8564453125": 15, "8603515625": 15, "914085328578949": 15, "5478515625": 15, "06640625": 15, "5810546875": 15, "0966796875": 15, "9949972033500671": 15, "51171875": 15, "1630859375": 15, "9578408598899841": 15, "1396484375": 15, "3232421875": 15, "185546875": 15, "3515625": 15, "outpout": 15, "xml": 15, "hocr": 15, "export_as_xml": 15, "xml_output": 15, "xml_bytes_str": 15, "xml_element": 15, "utf": 15, "xmln": 15, "w3": 15, "1999": 15, "xhtml": 15, "lang": 15, "en": 15, "meta": 15, "equiv": 15, "charset": 15, "system": 15, "ocr_pag": 15, "ocr_carea": 15, "ocr_par": 15, "ocr_lin": 15, "ocrx_word": 15, "div": 15, "id": 15, "page_1": 15, "bbox": 15, "3456": 15, "ppageno": 15, "block_1_1": 15, "857": 15, "529": 15, "2504": 15, "2710": 15, "par_1_1": 15, "span": 15, "line_1_1": 15, "x_size": 15, "x_descend": 15, "x_ascend": 15, "word_1_1": 15, "1552": 15, "540": 15, "1778": 15, "580": 15, "x_wconf": 15, "word_1_2": 15, "1782": 15, "1900": 15, "583": 15, "word_1_3": 15, "1420": 15, "597": 15, "1684": 15, "641": 15, "threshold": [], "region": [], "accur": [], "postprocessor": [], "bin_thresh": [], "box_thresh": [], "hook": [], "manipul": [], "customhook": [], "def": 14, "__call__": [], "self": [], "loc_pr": [], "Be": [], "awar": [], "my_hook": [], "middl": [], "pipelin": [], "add_hook": [], "execut": [], "file_path": [], "read_img": 6, "seemlessli": [], "conda": [], "newer": [], "developp": 3, "fp": 15, "scheme": [], "deform": [], "statist": [], "turn": [], "easier": [], "let": [], "db_resnet50_predictor": [], "sar_vgg16_bn": [], "rnn": [], "enhanc": [], "symbol": [], "crnn_vgg16_bn_predictor": [], "sar_vgg16_bn_predictor": [], "16bn": [], "convert_to_tflit": [], "tf_model": 14, "tflite": 14, "conv_sequ": 14, "relu": 14, "kernel_s": 14, "serialized_model": 14, "convert_to_fp16": [], "half": [], "serial": 14, "quantize_model": [], "quantiz": [], "exclud": [], "inherit": 14, "abstract": [], "verifi": 2, "file_nam": [], "file_hash": [], "extract_arch": [], "overwrit": [], "sha256": [], "archiv": [], "disk": [], "775": [], "856": [], "860": [], "862": [], "863": [], "sar_resnet31_predictor": [], "ocr_db_crnn_vgg": [], "652": [], "721": [], "ocr_db_sar_vgg": [], "653": [], "ocr_db_sar_resnet": [], "665": [], "735": [], "595": [], "625": [], "781": [], "830": [], "exactmatch": [], "ignore_cas": [], "ignore_acc": [], "ignor": [], "letter": [], "accent": [], "error": [], "max_dist": [], "levenshtein": [], "distanc": [], "autoclass": [], "loader": [], "154": [], "as_imag": [], "convert_page_to_numpi": [], "get_word": [], "fitz": [], "gettextword": [], "get_artefact": [], "entir": [], "fulli": [], "daili": [], "mix": [], "fine": 15, "scratch": [], "special": [], "recurr": [], "733": [], "817": [], "745": [], "875": [], "frame": 15, "feed": [], "warm": [], "c5": 15, "x12larg": 15, "xeon": 15, "platinum": 15, "8275l": 15, "913": [], "917": [], "921": [], "crnn_resnet31": [], "629": [], "701": [], "664": [], "780": [], "630": [], "702": [], "666": [], "783": [], "640": [], "713": [], "672": [], "789": [], "na": [], "753": [], "700": [], "533": [], "689": [], "611": [], "660": [], "db_sar_vgg": [], "db_sar_resnet": [], "db_crnn_vgg": [], "db_crnn_resnet": [], "properti": 14, "input_t": 14, "saved_model": 14, "And": 14, "nestedobject": [], "changelog": [], "v0": [], "2021": [], "8m": 15, "02": 15, "5m": 15, "1m": 15, "19": [], "invoic": 15, "flexibl": [], "rotated_bbox": [], "beta": [], "linknet16": [], "160": [], "arg": [5, 7], "bash": [], "tax": 15, "35": 15, "vgg16_bn": [], "mobilenetv3_larg": [], "mobilenetv3_smal": [], "constraint": 14, "tfliteconvert": 14, "from_keras_model": 14, "target_spec": 14, "supported_typ": 14, "float16": 14, "fallback": 14, "oper": [2, 14], "representative_dataset": 14, "yield": 14, "supported_op": 14, "opsset": 14, "tflite_builtins_int8": 14, "inference_input_typ": 14, "int8": 14, "inference_output_typ": 14, "2m": 15, "7m": 15, "look": 15, "variou": 15, "below": 15, "unfortun": 15, "moment": 15, "04": 15, "36": 15, "97": 15, "resum": 15, "road": 15, "get_text_word": [], "get_lin": [], "style": 2, "incom": 2, "pr": 2, "compli": 2, "flake8": 2, "convent": 2, "isort": 2, "reorder": 2, "catch": 2, "cleaner": 2, "mypi": 2, "ini": 2, "keep": 2, "sane": 2, "pydocstyl": 2, "_helper": [], "pdf_render": [], "render_pdf_topil": [], "linknet_resnet18_rot": [7, 15], "db_resnet50_rot": 15, "nb": 15, "pdfdocument": 6, "render_to": 6, "amazon": [11, 15], "doe": 11, "anywher": 11, "There": 11, "enivron": 11, "ec2": 15}, "objects": {"doctr.datasets": [[5, 0, 1, "", "CORD"], [5, 0, 1, "", "CharacterGenerator"], [5, 0, 1, "", "DetectionDataset"], [5, 0, 1, "", "DocArtefacts"], [5, 0, 1, "", "FUNSD"], [5, 0, 1, "", "IC03"], [5, 0, 1, "", "IC13"], [5, 0, 1, "", "IIIT5K"], [5, 0, 1, "", "IMGUR5K"], [5, 0, 1, "", "MJSynth"], [5, 0, 1, "", "OCRDataset"], [5, 0, 1, "", "RecognitionDataset"], [5, 0, 1, "", "SROIE"], [5, 0, 1, "", "SVHN"], [5, 0, 1, "", "SVT"], [5, 0, 1, "", "SynthText"], [5, 0, 1, "", "WordGenerator"], [5, 1, 1, "", "encode_sequences"]], "doctr.datasets.loader": [[5, 0, 1, "", "DataLoader"]], "doctr.io": [[6, 0, 1, "", "Artefact"], [6, 0, 1, "", "Block"], [6, 0, 1, "", "Document"], [6, 0, 1, "", "DocumentFile"], [6, 0, 1, "", "Line"], [6, 0, 1, "", "Page"], [6, 0, 1, "", "Word"], [6, 1, 1, "", "decode_img_as_tensor"], [6, 1, 1, "", "read_html"], [6, 1, 1, "", "read_img_as_numpy"], [6, 1, 1, "", "read_img_as_tensor"], [6, 1, 1, "", "read_pdf"]], "doctr.io.Document": [[6, 2, 1, "", "show"]], "doctr.io.DocumentFile": [[6, 2, 1, "", "from_images"], [6, 2, 1, "", "from_pdf"], [6, 2, 1, "", "from_url"]], "doctr.io.Page": [[6, 2, 1, "", "show"]], "doctr.models.classification": [[7, 1, 1, "", "crop_orientation_predictor"], [7, 1, 1, "", "magc_resnet31"], [7, 1, 1, "", "mobilenet_v3_large"], [7, 1, 1, "", "mobilenet_v3_large_r"], [7, 1, 1, "", "mobilenet_v3_small"], [7, 1, 1, "", "mobilenet_v3_small_orientation"], [7, 1, 1, "", "mobilenet_v3_small_r"], [7, 1, 1, "", "resnet18"], [7, 1, 1, "", "resnet31"], [7, 1, 1, "", "resnet34"], [7, 1, 1, "", "resnet50"], [7, 1, 1, "", "vgg16_bn_r"], [7, 1, 1, "", "vit_b"], [7, 1, 1, "", "vit_s"]], "doctr.models.detection": [[7, 1, 1, "", "db_mobilenet_v3_large"], [7, 1, 1, "", "db_resnet50"], [7, 1, 1, "", "detection_predictor"], [7, 1, 1, "", "linknet_resnet18"], [7, 1, 1, "", "linknet_resnet18_rotation"], [7, 1, 1, "", "linknet_resnet34"], [7, 1, 1, "", "linknet_resnet50"]], "doctr.models.factory": [[7, 1, 1, "", "from_hub"], [7, 1, 1, "", "login_to_hub"], [7, 1, 1, "", "push_to_hf_hub"]], "doctr.models": [[7, 1, 1, "", "ocr_predictor"]], "doctr.models.recognition": [[7, 1, 1, "", "crnn_mobilenet_v3_large"], [7, 1, 1, "", "crnn_mobilenet_v3_small"], [7, 1, 1, "", "crnn_vgg16_bn"], [7, 1, 1, "", "master"], [7, 1, 1, "", "recognition_predictor"], [7, 1, 1, "", "sar_resnet31"], [7, 1, 1, "", "vitstr_base"], [7, 1, 1, "", "vitstr_small"]], "doctr.transforms": [[8, 0, 1, "", "ChannelShuffle"], [8, 0, 1, "", "ColorInversion"], [8, 0, 1, "", "Compose"], [8, 0, 1, "", "GaussianBlur"], [8, 0, 1, "", "GaussianNoise"], [8, 0, 1, "", "LambdaTransformation"], [8, 0, 1, "", "Normalize"], [8, 0, 1, "", "OneOf"], [8, 0, 1, "", "RandomApply"], [8, 0, 1, "", "RandomBrightness"], [8, 0, 1, "", "RandomContrast"], [8, 0, 1, "", "RandomCrop"], [8, 0, 1, "", "RandomGamma"], [8, 0, 1, "", "RandomHorizontalFlip"], [8, 0, 1, "", "RandomHue"], [8, 0, 1, "", "RandomJpegQuality"], [8, 0, 1, "", "RandomRotate"], [8, 0, 1, "", "RandomSaturation"], [8, 0, 1, "", "RandomShadow"], [8, 0, 1, "", "Resize"], [8, 0, 1, "", "ToGray"]], "doctr.utils.metrics": [[9, 0, 1, "", "DetectionMetric"], [9, 0, 1, "", "LocalizationConfusion"], [9, 0, 1, "", "OCRMetric"], [9, 0, 1, "", "TextMatch"]], "doctr.utils.metrics.DetectionMetric": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.LocalizationConfusion": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.OCRMetric": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.TextMatch": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.visualization": [[9, 1, 1, "", "synthesize_page"], [9, 1, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:class", "1": "py:function", "2": "py:method"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"], "2": ["py", "method", "Python method"]}, "titleterms": {"changelog": 0, "v0": 0, "7": [], "0": 0, "2024": [], "09": [], "6": [], "2022": 0, "29": [], "5": 0, "1": [0, 1], "03": 0, "22": 0, "2021": 0, "12": 0, "31": 0, "4": [0, 1], "11": 0, "10": 0, "01": 0, "3": [0, 1], "08": 0, "27": 0, "07": 0, "02": 0, "2": [0, 1], "05": 0, "28": 0, "18": 0, "contributor": 1, "coven": 1, "code": [1, 2], "conduct": 1, "our": 1, "pledg": 1, "standard": 1, "enforc": 1, "respons": 1, "scope": 1, "guidelin": 1, "correct": 1, "warn": 1, "temporari": 1, "ban": 1, "perman": 1, "attribut": 1, "contribut": 2, "doctr": [2, 4, 5, 6, 7, 8, 9, 10], "codebas": 2, "structur": [2, 6], "continu": 2, "integr": 2, "feedback": 2, "featur": [2, 4], "request": 2, "bug": 2, "report": 2, "question": 2, "develop": 2, "mode": 2, "instal": [2, 3], "commit": 2, "unit": 2, "test": 2, "qualiti": 2, "style": [], "verif": 2, "modifi": 2, "document": [2, 4, 6], "let": 2, "": 2, "connect": 2, "prerequisit": 3, "via": 3, "python": 3, "packag": 3, "git": 3, "text": [4, 15], "recognit": [4, 7, 12, 13, 15], "main": 4, "model": [4, 7, 12, 14, 15], "zoo": [4, 7, 15], "detect": [4, 7, 12, 13, 15], "support": [4, 5, 8], "dataset": [4, 5, 13], "arg": [], "synthet": [5, 13], "gener": [5, 13], "custom": 5, "loader": 5, "dataload": 5, "vocab": 5, "return": [], "io": 6, "word": 6, "line": 6, "artefact": 6, "block": 6, "page": 6, "file": 6, "read": 6, "classif": [7, 12], "factori": 7, "transform": 8, "compos": 8, "util": 9, "visual": 9, "task": 9, "evalu": 9, "notebook": 10, "train": 14, "your": [12, 13, 14], "own": 13, "load": [12, 13], "aw": 11, "lambda": 11, "share": 12, "commun": 12, "from": 12, "huggingfac": 12, "hub": 12, "push": 12, "pretrain": 12, "name": 12, "convent": 12, "choos": [13, 15], "readi": 13, "us": [13, 14], "avail": [13, 15], "object": 13, "data": 13, "prepar": 14, "infer": 14, "optim": [], "half": 14, "precis": 14, "export": [], "onnx": [], "right": 15, "architectur": 15, "predictor": 15, "end": 15, "ocr": 15, "two": 15, "stage": 15, "approach": 15, "what": 15, "should": 15, "i": 15, "do": 15, "output": 15, "advanc": [], "option": [], "get": [], "start": [], "conda": [], "pre": [], "process": [], "post": 14, "build": [], "implement": [], "content": [], "compress": 14, "savedmodel": 14, "note": [], "refer": [], "backbon": [], "tensorflow": 14, "lite": 14, "quantiz": 14, "public": [], "privat": [], "lint": 2, "import": 2, "order": 2, "annot": 2, "type": 2, "docstr": 2, "format": 2}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"Changelog": [[0, "changelog"]], "v0.5.1 (2022-03-22)": [[0, "v0-5-1-2022-03-22"]], "v0.5.0 (2021-12-31)": [[0, "v0-5-0-2021-12-31"]], "v0.4.1 (2021-11-22)": [[0, "v0-4-1-2021-11-22"]], "v0.4.0 (2021-10-01)": [[0, "v0-4-0-2021-10-01"]], "v0.3.1 (2021-08-27)": [[0, "v0-3-1-2021-08-27"]], "v0.3.0 (2021-07-02)": [[0, "v0-3-0-2021-07-02"]], "v0.2.1 (2021-05-28)": [[0, "v0-2-1-2021-05-28"]], "v0.2.0 (2021-05-11)": [[0, "v0-2-0-2021-05-11"]], "v0.1.1 (2021-03-18)": [[0, "v0-1-1-2021-03-18"]], "v0.1.0 (2021-03-05)": [[0, "v0-1-0-2021-03-05"]], "Contributor Covenant Code of Conduct": [[1, "contributor-covenant-code-of-conduct"]], "Our Pledge": [[1, "our-pledge"]], "Our Standards": [[1, "our-standards"]], "Enforcement Responsibilities": [[1, "enforcement-responsibilities"]], "Scope": [[1, "scope"]], "Enforcement": [[1, "enforcement"]], "Enforcement Guidelines": [[1, "enforcement-guidelines"]], "1. Correction": [[1, "correction"]], "2. Warning": [[1, "warning"]], "3. Temporary Ban": [[1, "temporary-ban"]], "4. Permanent Ban": [[1, "permanent-ban"]], "Attribution": [[1, "attribution"]], "Contributing to docTR": [[2, "contributing-to-doctr"]], "Codebase structure": [[2, "codebase-structure"]], "Continuous Integration": [[2, "continuous-integration"]], "Feedback": [[2, "feedback"]], "Feature requests & bug report": [[2, "feature-requests-bug-report"]], "Questions": [[2, "questions"]], "Developing docTR": [[2, "developing-doctr"]], "Developer mode installation": [[2, "developer-mode-installation"]], "Commits": [[2, "commits"]], "Unit tests": [[2, "unit-tests"]], "Code quality": [[2, "code-quality"]], "Lint verification": [[2, "lint-verification"]], "Import order": [[2, "import-order"]], "Annotation typing": [[2, "annotation-typing"]], "Docstring format": [[2, "docstring-format"]], "Modifying the documentation": [[2, "modifying-the-documentation"]], "Let\u2019s connect": [[2, "let-s-connect"]], "Installation": [[3, "installation"]], "Prerequisites": [[3, "prerequisites"]], "Via Python Package": [[3, "via-python-package"]], "Via Git": [[3, "via-git"]], "docTR: Document Text Recognition": [[4, "doctr-document-text-recognition"]], "Main Features": [[4, "main-features"]], "Model zoo": [[4, "model-zoo"]], "Text detection models": [[4, "text-detection-models"]], "Text recognition models": [[4, "text-recognition-models"]], "Supported datasets": [[4, "supported-datasets"]], "doctr.datasets": [[5, "doctr-datasets"], [5, "datasets"]], "Synthetic dataset generator": [[5, "synthetic-dataset-generator"], [13, "synthetic-dataset-generator"]], "Custom dataset loader": [[5, "custom-dataset-loader"]], "Dataloader": [[5, "dataloader"]], "Supported Vocabs": [[5, "supported-vocabs"]], "docTR Vocabs": [[5, "id3"]], "doctr.io": [[6, "doctr-io"]], "Document structure": [[6, "document-structure"]], "Word": [[6, "word"]], "Line": [[6, "line"]], "Artefact": [[6, "artefact"]], "Block": [[6, "block"]], "Page": [[6, "page"]], "Document": [[6, "document"]], "File reading": [[6, "file-reading"]], "doctr.models": [[7, "doctr-models"]], "doctr.models.classification": [[7, "doctr-models-classification"]], "doctr.models.detection": [[7, "doctr-models-detection"]], "doctr.models.recognition": [[7, "doctr-models-recognition"]], "doctr.models.zoo": [[7, "doctr-models-zoo"]], "doctr.models.factory": [[7, "doctr-models-factory"]], "doctr.transforms": [[8, "doctr-transforms"]], "Supported transformations": [[8, "supported-transformations"]], "Composing transformations": [[8, "composing-transformations"]], "doctr.utils": [[9, "doctr-utils"]], "Visualization": [[9, "visualization"]], "Task evaluation": [[9, "task-evaluation"]], "docTR Notebooks": [[10, "doctr-notebooks"]], "AWS Lambda": [[11, "aws-lambda"]], "Share your model with the community": [[12, "share-your-model-with-the-community"]], "Loading from Huggingface Hub": [[12, "loading-from-huggingface-hub"]], "Pushing to the Huggingface Hub": [[12, "pushing-to-the-huggingface-hub"]], "Pretrained community models": [[12, "pretrained-community-models"]], "Naming conventions": [[12, "naming-conventions"]], "Classification": [[12, "classification"]], "Detection": [[12, "detection"], [13, "detection"]], "Recognition": [[12, "recognition"], [13, "recognition"]], "Choose a ready to use dataset": [[13, "choose-a-ready-to-use-dataset"]], "Available Datasets": [[13, "available-datasets"]], "Object Detection": [[13, "object-detection"]], "Use your own datasets": [[13, "use-your-own-datasets"]], "Data Loading": [[13, "data-loading"]], "Preparing your model for inference": [[14, "preparing-your-model-for-inference"]], "Model compression": [[14, "model-compression"]], "TensorFlow Lite": [[14, "tensorflow-lite"]], "Half-precision": [[14, "half-precision"]], "Post-training quantization": [[14, "post-training-quantization"]], "Using SavedModel": [[14, "using-savedmodel"]], "Choosing the right model": [[15, "choosing-the-right-model"]], "Text Detection": [[15, "text-detection"]], "Available architectures": [[15, "available-architectures"], [15, "id1"], [15, "id2"]], "Detection predictors": [[15, "detection-predictors"]], "Text Recognition": [[15, "text-recognition"]], "Text recognition model zoo": [[15, "id3"]], "Recognition predictors": [[15, "recognition-predictors"]], "End-to-End OCR": [[15, "end-to-end-ocr"]], "Two-stage approaches": [[15, "two-stage-approaches"]], "What should I do with the output?": [[15, "what-should-i-do-with-the-output"]]}, "indexentries": {"cord (class in doctr.datasets)": [[5, "doctr.datasets.CORD"]], "charactergenerator (class in doctr.datasets)": [[5, "doctr.datasets.CharacterGenerator"]], "dataloader (class in doctr.datasets.loader)": [[5, "doctr.datasets.loader.DataLoader"]], "detectiondataset (class in doctr.datasets)": [[5, "doctr.datasets.DetectionDataset"]], "docartefacts (class in doctr.datasets)": [[5, "doctr.datasets.DocArtefacts"]], "funsd (class in doctr.datasets)": [[5, "doctr.datasets.FUNSD"]], "ic03 (class in doctr.datasets)": [[5, "doctr.datasets.IC03"]], "ic13 (class in doctr.datasets)": [[5, "doctr.datasets.IC13"]], "iiit5k (class in doctr.datasets)": [[5, "doctr.datasets.IIIT5K"]], "imgur5k (class in doctr.datasets)": [[5, "doctr.datasets.IMGUR5K"]], "mjsynth (class in doctr.datasets)": [[5, "doctr.datasets.MJSynth"]], "ocrdataset (class in doctr.datasets)": [[5, "doctr.datasets.OCRDataset"]], "recognitiondataset (class in doctr.datasets)": [[5, "doctr.datasets.RecognitionDataset"]], "sroie (class in doctr.datasets)": [[5, "doctr.datasets.SROIE"]], "svhn (class in doctr.datasets)": [[5, "doctr.datasets.SVHN"]], "svt (class in doctr.datasets)": [[5, "doctr.datasets.SVT"]], "synthtext (class in doctr.datasets)": [[5, "doctr.datasets.SynthText"]], "wordgenerator (class in doctr.datasets)": [[5, "doctr.datasets.WordGenerator"]], "encode_sequences() (in module doctr.datasets)": [[5, "doctr.datasets.encode_sequences"]], "artefact (class in doctr.io)": [[6, "doctr.io.Artefact"]], "block (class in doctr.io)": [[6, "doctr.io.Block"]], "document (class in doctr.io)": [[6, "doctr.io.Document"]], "documentfile (class in doctr.io)": [[6, "doctr.io.DocumentFile"]], "line (class in doctr.io)": [[6, "doctr.io.Line"]], "page (class in doctr.io)": [[6, "doctr.io.Page"]], "word (class in doctr.io)": [[6, "doctr.io.Word"]], "decode_img_as_tensor() (in module doctr.io)": [[6, "doctr.io.decode_img_as_tensor"]], "from_images() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_images"]], "from_pdf() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_pdf"]], "from_url() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_url"]], "read_html() (in module doctr.io)": [[6, "doctr.io.read_html"]], "read_img_as_numpy() (in module doctr.io)": [[6, "doctr.io.read_img_as_numpy"]], "read_img_as_tensor() (in module doctr.io)": [[6, "doctr.io.read_img_as_tensor"]], "read_pdf() (in module doctr.io)": [[6, "doctr.io.read_pdf"]], "show() (doctr.io.document method)": [[6, "doctr.io.Document.show"]], "show() (doctr.io.page method)": [[6, "doctr.io.Page.show"]], "crnn_mobilenet_v3_large() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_mobilenet_v3_large"]], "crnn_mobilenet_v3_small() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_mobilenet_v3_small"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_vgg16_bn"]], "crop_orientation_predictor() (in module doctr.models.classification)": [[7, "doctr.models.classification.crop_orientation_predictor"]], "db_mobilenet_v3_large() (in module doctr.models.detection)": [[7, "doctr.models.detection.db_mobilenet_v3_large"]], "db_resnet50() (in module doctr.models.detection)": [[7, "doctr.models.detection.db_resnet50"]], "detection_predictor() (in module doctr.models.detection)": [[7, "doctr.models.detection.detection_predictor"]], "from_hub() (in module doctr.models.factory)": [[7, "doctr.models.factory.from_hub"]], "linknet_resnet18() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet18"]], "linknet_resnet18_rotation() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet18_rotation"]], "linknet_resnet34() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet34"]], "linknet_resnet50() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet50"]], "login_to_hub() (in module doctr.models.factory)": [[7, "doctr.models.factory.login_to_hub"]], "magc_resnet31() (in module doctr.models.classification)": [[7, "doctr.models.classification.magc_resnet31"]], "master() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.master"]], "mobilenet_v3_large() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_large"]], "mobilenet_v3_large_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_large_r"]], "mobilenet_v3_small() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small"]], "mobilenet_v3_small_orientation() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small_orientation"]], "mobilenet_v3_small_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small_r"]], "ocr_predictor() (in module doctr.models)": [[7, "doctr.models.ocr_predictor"]], "push_to_hf_hub() (in module doctr.models.factory)": [[7, "doctr.models.factory.push_to_hf_hub"]], "recognition_predictor() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.recognition_predictor"]], "resnet18() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet18"]], "resnet31() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet31"]], "resnet34() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet34"]], "resnet50() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet50"]], "sar_resnet31() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.sar_resnet31"]], "vgg16_bn_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.vgg16_bn_r"]], "vit_b() (in module doctr.models.classification)": [[7, "doctr.models.classification.vit_b"]], "vit_s() (in module doctr.models.classification)": [[7, "doctr.models.classification.vit_s"]], "vitstr_base() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.vitstr_base"]], "vitstr_small() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.vitstr_small"]], "channelshuffle (class in doctr.transforms)": [[8, "doctr.transforms.ChannelShuffle"]], "colorinversion (class in doctr.transforms)": [[8, "doctr.transforms.ColorInversion"]], "compose (class in doctr.transforms)": [[8, "doctr.transforms.Compose"]], "gaussianblur (class in doctr.transforms)": [[8, "doctr.transforms.GaussianBlur"]], "gaussiannoise (class in doctr.transforms)": [[8, "doctr.transforms.GaussianNoise"]], "lambdatransformation (class in doctr.transforms)": [[8, "doctr.transforms.LambdaTransformation"]], "normalize (class in doctr.transforms)": [[8, "doctr.transforms.Normalize"]], "oneof (class in doctr.transforms)": [[8, "doctr.transforms.OneOf"]], "randomapply (class in doctr.transforms)": [[8, "doctr.transforms.RandomApply"]], "randombrightness (class in doctr.transforms)": [[8, "doctr.transforms.RandomBrightness"]], "randomcontrast (class in doctr.transforms)": [[8, "doctr.transforms.RandomContrast"]], "randomcrop (class in doctr.transforms)": [[8, "doctr.transforms.RandomCrop"]], "randomgamma (class in doctr.transforms)": [[8, "doctr.transforms.RandomGamma"]], "randomhorizontalflip (class in doctr.transforms)": [[8, "doctr.transforms.RandomHorizontalFlip"]], "randomhue (class in doctr.transforms)": [[8, "doctr.transforms.RandomHue"]], "randomjpegquality (class in doctr.transforms)": [[8, "doctr.transforms.RandomJpegQuality"]], "randomrotate (class in doctr.transforms)": [[8, "doctr.transforms.RandomRotate"]], "randomsaturation (class in doctr.transforms)": [[8, "doctr.transforms.RandomSaturation"]], "randomshadow (class in doctr.transforms)": [[8, "doctr.transforms.RandomShadow"]], "resize (class in doctr.transforms)": [[8, "doctr.transforms.Resize"]], "togray (class in doctr.transforms)": [[8, "doctr.transforms.ToGray"]], "detectionmetric (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.DetectionMetric"]], "localizationconfusion (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.OCRMetric"]], "textmatch (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.TextMatch"]], "summary() (doctr.utils.metrics.detectionmetric method)": [[9, "doctr.utils.metrics.DetectionMetric.summary"]], "summary() (doctr.utils.metrics.localizationconfusion method)": [[9, "doctr.utils.metrics.LocalizationConfusion.summary"]], "summary() (doctr.utils.metrics.ocrmetric method)": [[9, "doctr.utils.metrics.OCRMetric.summary"]], "summary() (doctr.utils.metrics.textmatch method)": [[9, "doctr.utils.metrics.TextMatch.summary"]], "synthesize_page() (in module doctr.utils.visualization)": [[9, "doctr.utils.visualization.synthesize_page"]], "update() (doctr.utils.metrics.detectionmetric method)": [[9, "doctr.utils.metrics.DetectionMetric.update"]], "update() (doctr.utils.metrics.localizationconfusion method)": [[9, "doctr.utils.metrics.LocalizationConfusion.update"]], "update() (doctr.utils.metrics.ocrmetric method)": [[9, "doctr.utils.metrics.OCRMetric.update"]], "update() (doctr.utils.metrics.textmatch method)": [[9, "doctr.utils.metrics.TextMatch.update"]], "visualize_page() (in module doctr.utils.visualization)": [[9, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["changelog", "contributing/code_of_conduct", "contributing/contributing", "getting_started/installing", "index", "modules/datasets", "modules/io", "modules/models", "modules/transforms", "modules/utils", "notebooks", "using_doctr/running_on_aws", "using_doctr/sharing_models", "using_doctr/using_datasets", "using_doctr/using_model_export", "using_doctr/using_models"], "filenames": ["changelog.rst", "contributing/code_of_conduct.md", "contributing/contributing.md", "getting_started/installing.rst", "index.rst", "modules/datasets.rst", "modules/io.rst", "modules/models.rst", "modules/transforms.rst", "modules/utils.rst", "notebooks.rst", "using_doctr/running_on_aws.rst", "using_doctr/sharing_models.rst", "using_doctr/using_datasets.rst", "using_doctr/using_model_export.rst", "using_doctr/using_models.rst"], "titles": ["Changelog", "Contributor Covenant Code of Conduct", "Contributing to docTR", "Installation", "docTR: Document Text Recognition", "doctr.datasets", "doctr.io", "doctr.models", "doctr.transforms", "doctr.utils", "docTR Notebooks", "AWS Lambda", "Share your model with the community", "Choose a ready to use dataset", "Preparing your model for inference", "Choosing the right model"], "terms": {"releas": [0, 3], "note": [0, 5, 7, 12], "we": [1, 2, 3, 4, 6, 8, 12, 13, 15], "member": 1, "leader": 1, "make": [1, 2, 9, 11, 12, 14, 15], "particip": 1, "commun": 1, "harass": 1, "free": [1, 2, 12], "experi": 1, "everyon": 1, "regardless": 1, "ag": 1, "bodi": [1, 15], "size": [1, 5, 6, 8, 9, 15], "visibl": 1, "invis": 1, "disabl": [1, 11], "ethnic": 1, "sex": 1, "characterist": 1, "gender": 1, "ident": 1, "express": [1, 8], "level": [1, 5, 9, 15], "educ": 1, "socio": 1, "econom": 1, "statu": 1, "nation": 1, "person": [1, 13], "appear": 1, "race": 1, "religion": 1, "sexual": 1, "orient": [1, 6, 7, 15], "act": 1, "interact": [1, 6, 9], "wai": [1, 4, 13], "contribut": 1, "an": [1, 2, 4, 5, 6, 7, 9, 14, 15], "open": [1, 2, 12], "welcom": 1, "divers": 1, "inclus": 1, "healthi": 1, "exampl": [1, 2, 4, 5, 7, 12], "behavior": 1, "posit": [1, 9], "environ": 1, "includ": [1, 3, 5, 13], "demonstr": 1, "empathi": 1, "kind": [1, 15], "toward": [1, 3], "other": [1, 2], "peopl": 1, "Being": 1, "respect": 1, "differ": 1, "opinion": 1, "viewpoint": 1, "give": 1, "gracefulli": 1, "accept": 1, "construct": 1, "feedback": 1, "apolog": 1, "those": [1, 3, 6, 15], "affect": 1, "mistak": 1, "learn": [1, 4, 7, 15], "from": [1, 2, 4, 5, 6, 7, 8, 9, 10, 13, 14, 15], "focus": [1, 5], "what": 1, "i": [1, 2, 5, 6, 7, 8, 9, 11, 12, 13, 14], "best": 1, "just": [1, 14], "u": [1, 2, 15], "individu": 1, "overal": [1, 7], "unaccept": 1, "The": [1, 2, 5, 6, 9, 15], "us": [1, 2, 3, 5, 7, 9, 11, 12, 15], "languag": [1, 4, 5, 6, 7, 12, 15], "imageri": 1, "attent": [1, 7], "advanc": 1, "ani": [1, 5, 6, 7, 8, 9, 14, 15], "troll": 1, "insult": 1, "derogatori": 1, "comment": 1, "polit": 1, "attack": 1, "public": [1, 4], "privat": [1, 15], "publish": 1, "inform": [1, 2, 4, 5, 13, 15], "physic": [1, 6], "email": 1, "address": [1, 6], "without": [1, 5, 7], "explicit": 1, "permiss": 1, "which": [1, 7, 11, 13, 15], "could": 1, "reason": 1, "consid": [1, 2, 5, 6, 9, 15], "inappropri": 1, "profession": 1, "set": [1, 2, 5, 7, 9, 11, 15], "ar": [1, 2, 3, 5, 6, 8, 9, 10, 11, 13, 15], "clarifi": 1, "take": [1, 5, 14, 15], "appropri": [1, 2, 15], "fair": 1, "action": 1, "thei": [1, 9, 15], "deem": 1, "threaten": 1, "offens": 1, "harm": 1, "have": [1, 2, 9, 12, 13, 14, 15], "right": [1, 7, 9], "remov": 1, "edit": 1, "reject": 1, "commit": 1, "wiki": 1, "issu": [1, 2, 12], "align": [1, 6], "thi": [1, 2, 3, 5, 9, 11, 12, 13, 14, 15], "moder": 1, "decis": 1, "when": [1, 2, 7], "appli": [1, 5, 8], "within": 1, "all": [1, 2, 5, 6, 8, 9, 13, 15], "space": 1, "also": [1, 7, 12, 13, 15], "offici": 1, "repres": [1, 9, 15], "e": [1, 2, 3, 6, 7], "mail": 1, "post": [1, 15], "via": 1, "social": 1, "media": 1, "account": [1, 12, 14], "appoint": 1, "onlin": 1, "offlin": 1, "event": 1, "instanc": [1, 15], "abus": 1, "otherwis": [1, 6, 9], "mai": [1, 15], "report": 1, "contact": 1, "minde": [1, 3, 4, 7], "com": [1, 3, 6, 11, 12, 15], "complaint": 1, "review": 1, "investig": 1, "promptli": 1, "fairli": 1, "oblig": 1, "privaci": 1, "secur": [1, 11], "incid": 1, "follow": [1, 2, 3, 5, 8, 9, 12, 14, 15], "impact": 1, "determin": 1, "consequ": 1, "violat": 1, "unprofession": 1, "unwelcom": 1, "A": [1, 2, 4, 5, 6, 7, 10, 14], "written": [1, 6], "provid": [1, 2, 4, 12, 13, 14, 15], "clariti": 1, "around": 1, "natur": [1, 4, 5], "explan": [1, 15], "why": 1, "wa": 1, "apologi": 1, "request": [1, 12], "through": [1, 8, 13], "singl": [1, 2, 4, 5], "seri": 1, "continu": 1, "No": [1, 15], "involv": [1, 15], "unsolicit": 1, "specifi": [1, 5, 6], "period": 1, "time": [1, 4, 7, 9, 13], "avoid": [1, 3], "well": [1, 14], "extern": [1, 13], "channel": [1, 2, 6, 8], "like": 1, "term": 1, "lead": 1, "seriou": 1, "sustain": 1, "sort": 1, "allow": [1, 11], "dure": 1, "pattern": 1, "aggress": 1, "disparag": 1, "class": [1, 5, 6, 8, 9, 15], "adapt": 1, "version": [1, 2, 3, 14, 15], "0": [1, 3, 5, 8, 9, 13, 15], "avail": [1, 4, 8], "http": [1, 3, 6, 7, 11, 12, 15], "www": [1, 6, 15], "org": [1, 7, 15], "_": [1, 5, 7, 14], "html": [1, 2, 15], "were": [1, 6, 15], "inspir": [1, 8], "mozilla": 1, "": [1, 6, 7, 9, 11, 12], "ladder": 1, "For": [1, 2, 3, 15], "answer": 1, "common": [1, 2, 8, 9], "question": 1, "about": [1, 11, 13, 15], "see": [1, 2], "faq": 1, "translat": 1, "everyth": [2, 15], "you": [2, 3, 5, 6, 7, 11, 12, 13, 14, 15], "need": [2, 3, 5, 9, 11, 12], "know": 2, "effici": [2, 4, 5, 7], "project": [2, 13], "packag": [2, 4, 9, 11, 13, 14], "python": 2, "doc": [2, 6, 15], "librari": [2, 3, 10], "build": [2, 3], "script": [2, 13], "refer": [2, 3, 12, 13, 15], "train": [2, 5, 7, 8, 12, 13, 15], "demo": [2, 4], "small": [2, 7], "app": 2, "showcas": 2, "capabl": [2, 10, 15], "api": [2, 4], "minim": [2, 4], "templat": [2, 4], "deploi": 2, "rest": [2, 8, 9], "ensur": 2, "proper": 2, "mainten": 2, "github": [2, 3, 12], "worklow": 2, "run": [2, 3, 7], "job": 2, "coverag": 2, "codecov": 2, "back": 2, "result": [2, 5, 6, 10, 12, 15], "As": 2, "contributor": 2, "onli": [2, 7, 8, 9, 12, 13, 15], "your": [2, 4, 6, 9, 15], "ad": [2, 7, 8], "whether": [2, 5, 6, 8, 9, 13], "encount": 2, "problem": 2, "suggest": [2, 12], "input": [2, 6, 7, 8, 15], "ha": [2, 5, 9, 13], "valu": [2, 6, 8, 15], "can": [2, 3, 11, 12, 13, 14, 15], "purpos": 2, "advis": 2, "first": 2, "check": [2, 12, 15], "topic": 2, "wasn": 2, "t": [2, 5, 15], "alreadi": 2, "cover": 2, "close": 2, "If": [2, 3, 6, 7, 14, 15], "feel": [2, 12], "new": [2, 9], "one": [2, 5, 7, 8, 12, 15], "do": [2, 3, 7, 11, 14], "so": [2, 3, 5, 7, 12, 13], "whenev": 2, "possibl": [2, 9, 12], "enough": [2, 15], "jump": 2, "wonder": 2, "how": [2, 12, 13], "someth": 2, "more": [2, 9, 11, 13, 15], "gener": [2, 7], "should": [2, 5, 6, 8, 9], "out": [2, 7, 8, 9, 15], "discuss": 2, "q": 2, "forum": 2, "specif": [2, 3, 9, 13, 15], "stackoverflow": 2, "addit": [2, 3, 6], "depend": [2, 3, 4], "command": 2, "m": [9, 15], "pip": [2, 3], "upgrad": [], "dev": [2, 11], "pre": 7, "docstr": [], "In": [2, 5, 13], "pleas": 2, "googl": 2, "eas": 2, "process": [2, 4, 6, 15], "later": 2, "messag": 2, "udac": 2, "guid": 2, "order": [5, 6, 8], "same": [2, 6, 9, 13, 15], "ci": 2, "workflow": 2, "unittest": 2, "local": [2, 4, 5, 7, 9, 13, 15], "To": [2, 3, 12, 15], "togeth": [2, 6], "current": 15, "built": [], "sphinx": [], "thank": [], "our": [7, 15], "file": [2, 5], "been": [9, 13, 15], "rebuilt": [], "want": [14, 15], "forc": [], "complet": [], "rebuild": [], "delet": [], "_build": 2, "directori": 11, "addition": [2, 14, 15], "clear": [], "web": 6, "browser": [2, 4], "cach": [5, 11], "modif": 2, "now": 2, "locat": [2, 6], "index": [2, 6], "wish": [2, 14], "somewher": 2, "els": 2, "than": [2, 3, 9, 12], "join": 2, "slack": 2, "where": [2, 6, 8, 9, 15], "find": [2, 3, 13], "requir": [3, 8], "3": [3, 4, 6, 7, 8, 9, 14, 15], "8": [7, 8, 15], "higher": [3, 5], "whichev": 3, "o": 3, "least": 3, "tensorflow": [3, 4, 6, 7, 8, 12, 15], "pytorch": [3, 4, 7, 8, 12, 15], "correspond": [3, 6, 15], "page": [3, 5, 7, 9, 15], "2": [3, 4, 5, 6, 8, 11, 14, 15], "macbook": 3, "m1": 3, "chip": 3, "some": [2, 3, 10, 12, 13], "metal": 3, "plugin": 3, "1": [3, 5, 6, 7, 8, 9, 11, 13, 14, 15], "12": [3, 15], "anoth": [3, 7, 13], "linux": 3, "few": [3, 14], "extra": 3, "maco": 3, "user": [3, 4, 6, 10], "them": [3, 5, 15], "homebrew": 3, "brew": 3, "cairo": 3, "pango": 3, "gdk": 3, "pixbuf": 3, "libffi": 3, "window": [3, 7, 9], "gtk": 3, "latest": [3, 15], "over": [3, 5, 9, 15], "here": [3, 8, 10, 13, 15], "last": [3, 5], "stabl": 3, "doctr": [3, 11, 12, 13, 14, 15], "strive": 3, "reduc": [3, 8], "framework": [3, 12, 13, 15], "minimum": [3, 5, 8, 9], "necessari": 3, "featur": [3, 7, 9, 10], "develop": 3, "third": 3, "parti": 3, "miss": 3, "tf": [3, 6, 7, 8, 12, 14], "torch": [3, 8, 12], "mode": 3, "clone": 3, "state": [4, 9], "art": 4, "optic": [4, 15], "charact": [4, 5, 6, 9, 13, 15], "made": 4, "seamless": 4, "access": [4, 6, 13, 15], "anyon": 4, "power": 4, "easi": [4, 9, 12], "extract": [4, 5], "valuabl": 4, "autom": 4, "seamlessli": [4, 15], "understand": [4, 5, 15], "task": [4, 5, 7, 12, 13, 15], "ocr": [4, 5, 7, 9, 12, 13], "predictor": [4, 6, 7, 12], "pars": [4, 5], "textual": [4, 5, 6, 7, 15], "identifi": 4, "each": [4, 5, 6, 7, 8, 9, 13, 15], "word": [4, 5, 7, 9, 15], "research": 4, "quickli": 4, "compar": 4, "own": 4, "architectur": [4, 7, 12], "speed": [4, 7], "perform": [4, 6, 8, 9, 14, 15], "robust": [4, 5], "stage": 4, "pretrain": [4, 7, 9, 14, 15], "paramet": [4, 5, 6, 7, 8, 9], "friendli": 4, "line": [4, 9, 15], "code": [4, 6], "load": [4, 5, 7, 14], "googlevis": 4, "aw": [4, 15], "textract": [4, 15], "optim": [4, 14], "infer": [4, 7, 8], "both": [4, 5, 8, 13, 15], "cpu": [4, 15], "gpu": 4, "light": 4, "activ": 4, "maintain": 4, "integr": [4, 12, 13], "deploy": 4, "dbnet": [4, 7], "real": [4, 7, 8], "scene": [4, 5, 7], "differenti": [4, 7], "binar": [4, 7], "linknet": [4, 7], "exploit": [4, 7], "encod": [4, 5, 6, 7, 15], "represent": [4, 7], "semant": [4, 7], "segment": [4, 7, 15], "sar": [4, 7], "show": [4, 6, 7, 9, 12], "attend": [4, 7], "read": [2, 4, 5, 7, 11], "simpl": [4, 7], "strong": [4, 7], "baselin": [4, 7, 15], "irregular": [4, 7, 13], "crnn": [4, 7, 12], "end": [4, 5, 7, 9], "trainabl": [4, 7], "neural": [4, 5, 7], "network": [4, 5, 7], "imag": [4, 5, 6, 7, 8, 9, 12, 13, 15], "base": [4, 7], "sequenc": [4, 5, 6, 7, 9, 15], "Its": [4, 7], "applic": [4, 7], "master": [4, 7, 15], "multi": [4, 7], "aspect": [4, 7, 8, 15], "non": [4, 5, 6, 7, 8, 9], "vitstr": [4, 7], "vision": [4, 5, 7], "transform": [4, 5, 7], "fast": [4, 5, 7], "parseq": 7, "permut": [], "autoregress": [], "funsd": [4, 5, 13, 15], "form": [4, 5, 15], "noisi": [4, 5], "scan": [4, 5], "cord": [4, 5, 13, 15], "consolid": [4, 5], "receipt": [4, 5, 15], "forpost": [4, 5], "sroie": [4, 5, 13], "icdar": [4, 5], "2019": 4, "iiit": [4, 5], "5k": [4, 5], "cvit": 4, "street": [4, 5], "view": [4, 5], "synthtext": [4, 5, 13], "visual": 4, "geometri": [4, 6, 15], "group": 4, "svhn": [4, 5, 13], "digit": [4, 5, 13], "unsupervis": 4, "ic03": [4, 5, 13], "2003": [4, 5], "ic13": [4, 5, 13], "2013": [4, 5], "imgur5k": [4, 5, 13], "textstylebrush": [4, 5], "transfer": [4, 5], "aesthet": [4, 5], "mjsynth": [4, 5, 13], "synthet": 4, "data": [4, 5, 6, 8, 9, 12], "artifici": [4, 5], "iiithw": [], "wildreceipt": [], "spatial": [6, 9], "dual": [], "modal": [], "graph": 6, "kei": [], "bool": [5, 6, 7, 8, 9], "true": [5, 6, 7, 8, 9, 11, 12, 13, 14, 15], "use_polygon": [5, 9, 13], "fals": [5, 6, 7, 8, 9, 14, 15], "recognition_task": [5, 13], "kwarg": [5, 6, 7, 9], "sourc": [5, 6, 7, 8, 9, 12], "document": [5, 7, 9, 10, 13, 15], "import": [5, 6, 7, 8, 9, 12, 13, 14, 15], "train_set": [5, 13], "download": [5, 13], "img": [5, 8, 13], "target": [5, 6, 8, 9, 13], "subset": [5, 15], "polygon": [5, 15], "rotat": [5, 6, 7, 8, 9, 13, 15], "bound": [5, 6, 7, 8, 9, 15], "box": [5, 6, 7, 8, 9, 13, 15], "instead": [5, 6, 7], "straight": [5, 7, 13, 15], "ones": [5, 8, 9], "recognit": [5, 9], "keyword": [5, 7], "argument": [5, 7, 15], "visiondataset": 5, "icdar2019": 5, "competit": 5, "iiit5k": [5, 13], "bmvc": 5, "2012": 5, "text": [5, 6, 7, 9, 13], "prior": 5, "svt": [5, 13], "ucsd": 5, "comput": [5, 9, 15], "hous": 5, "number": [5, 8, 9, 15], "localis": 5, "repositori": [2, 5, 7, 12], "websit": 5, "entri": 5, "futur": 5, "direct": 5, "img_fold": [5, 13], "str": [5, 6, 7, 8, 9], "label_fold": 5, "label": [5, 8, 9, 13], "part": [5, 8, 15], "challeng": 5, "task2": 5, "2015": 5, "path": [5, 6, 13, 14], "challenge2_training_task12_imag": 5, "challenge2_training_task1_gt": 5, "test_set": 5, "challenge2_test_task12_imag": 5, "challenge2_test_task1_gt": 5, "folder": [2, 5, 14], "annot": 5, "abstractdataset": 5, "label_path": [5, 13], "handwrit": 5, "dataset_info": 5, "imgur5k_annot": 5, "json": [5, 13, 15], "pure": 5, "mnt": 5, "ramdisk": 5, "max": [5, 8, 9], "90kdict32px": 5, "imlist": 5, "txt": 5, "hw": [], "images_90k_norm": [], "90k": [], "docartefact": [5, 13], "object": [5, 9, 10, 15], "detect": [5, 9, 10], "element": [5, 6, 7, 9, 15], "varieti": 5, "arxiv": 7, "ab": [], "2103": [], "14470v1": [], "test": 13, "charactergener": [5, 13], "implement": [5, 6, 8, 9, 14, 15], "d": [5, 13], "abdef": [5, 13], "num_sampl": [5, 13], "100": [5, 8, 9, 13, 14, 15], "vocabulari": [5, 12], "sampl": [5, 13, 15], "iter": [5, 8, 13], "cache_sampl": 5, "firsthand": 5, "font_famili": [5, 9], "font": [5, 9], "img_transform": 5, "compos": [5, 15], "sample_transform": 5, "wordgener": [5, 13], "min_char": [5, 13], "int": [5, 6, 8, 9], "max_char": [5, 13], "list": [5, 6, 8, 9, 12], "none": [5, 6, 7, 8, 9, 15], "callabl": [5, 8], "tupl": [5, 6, 8, 9], "32": [5, 7, 8, 13, 14, 15], "maximum": [5, 8], "detectiondataset": [5, 13], "recognitiondataset": [5, 13], "labels_path": [5, 13], "contain": [5, 13, 15], "ocrdataset": [5, 13], "label_fil": [5, 13], "jpg": [5, 6, 12], "root": [2, 5], "shuffl": [5, 8], "batch_siz": [5, 13], "drop_last": 5, "num_work": 5, "collate_fn": 5, "wrapper": [5, 8], "train_load": [5, 13], "train_it": [5, 13], "next": [5, 13], "befor": [5, 7, 8, 15], "pass": [5, 6, 7, 15], "batch": [5, 7, 8, 13, 15], "drop": 5, "isn": 5, "full": [5, 9, 15], "worker": 5, "function": [5, 8, 9], "merg": 5, "sinc": [5, 13, 15], "content": [5, 6, 9, 15], "properli": 5, "model": [5, 9, 11, 13], "interpret": [5, 6], "multipl": [5, 6, 8], "name": [5, 7, 15], "10": [5, 9, 15], "0123456789": 5, "hindi_digit": 5, "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": 5, "ascii_lett": 5, "52": [5, 15], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 5, "punctuat": 5, "currenc": 5, "5": [5, 8, 9, 15], "ancient_greek": 5, "48": [5, 15], "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": 5, "arabic_lett": 5, "37": [5, 15], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": 5, "persian_lett": 5, "\u067e\u0686\u06a2\u06a4\u06af": 5, "arabic_diacrit": 5, "arabic_punctu": 5, "latin": 5, "94": [5, 15], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 5, "english": [5, 13], "legacy_french": 5, "123": 5, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": 5, "french": [5, 12, 15], "126": 5, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": 5, "portugues": 5, "131": 5, "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": 5, "spanish": 5, "116": 5, "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": 5, "italian": [], "120": [], "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": [], "german": 5, "108": 5, "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": 5, "arab": 5, "101": 5, "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": 5, "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": 5, "czech": 5, "130": 5, "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": 5, "polish": [], "118": [], "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": [], "dutch": [], "114": [], "norwegian": [], "106": [], "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": [], "danish": [], "finnish": [], "104": [], "\u00e4\u00f6\u00e4\u00f6": [], "swedish": [], "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": [], "vietnames": 5, "234": 5, "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": 5, "hebrew": [], "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": [], "multilingu": [], "195": [], "encode_sequ": 5, "target_s": 5, "eo": 5, "pad": [5, 7, 8, 15], "dynamic_seq_length": 5, "ndarrai": [5, 6, 8, 9], "given": [5, 6, 8, 9, 15], "map": 5, "n": [5, 9], "length": 5, "Of": 5, "string": [5, 6, 9, 15], "option": 5, "start": 5, "case": [5, 9, 15], "upper": [5, 8], "enabl": [5, 6], "dynam": 5, "tensor": [5, 6, 8, 15], "modul": [6, 8, 9, 15], "easili": [6, 9, 12, 13, 14, 15], "export": [6, 7, 9, 10, 14, 15], "analysi": 6, "format": [6, 9, 13, 14, 15], "organ": 6, "uninterrupt": [6, 15], "confid": [6, 9, 15], "float": [6, 8, 9, 14], "associ": 6, "predict": [6, 7, 9], "xmin": 6, "ymin": 6, "xmax": 6, "ymax": 6, "coordin": [6, 15], "rel": [6, 8, 9], "collect": 6, "meant": [6, 14], "two": [6, 11], "column": 6, "horizont": [6, 8], "resolv": 6, "default": [6, 9, 11, 14], "smallest": 6, "enclos": 6, "g": [6, 7], "qr": 6, "pictur": 6, "chart": 6, "signatur": 6, "logo": [6, 13], "etc": 6, "artefact_typ": 6, "type": [6, 12, 15], "sever": [6, 8, 15], "its": [6, 7, 8, 9, 13, 15], "titl": [6, 15], "underneath": 6, "page_idx": [6, 15], "dimens": [6, 9, 15], "dict": [6, 9, 15], "numpi": [6, 7, 9, 15], "arrai": [6, 8, 9], "uint8": [6, 7, 9, 15], "raw": [6, 9], "pixel": [6, 8, 15], "height": 6, "width": 6, "dictionari": [6, 9], "angl": [6, 8], "degress": 6, "preserve_aspect_ratio": [6, 7, 8, 15], "overlai": 6, "displai": [6, 9], "matplotlib": 9, "pyplot": 9, "method": [8, 15], "high": 6, "convers": 6, "read_pdf": 6, "byte": [6, 15], "scale": [6, 7, 8, 9], "rgb_mode": 6, "password": 6, "pdf": [6, 7, 10], "convert": [6, 8, 14], "render": 6, "72dpi": 6, "output": [6, 8], "rgb": [6, 8], "bgr": 6, "unlock": 6, "encrypt": 6, "pypdfium2": 6, "pdfpage": [], "decod": 6, "shape": [6, 7, 8, 9, 14, 15], "h": [6, 7, 8], "x": [6, 8, 9], "w": [6, 7, 8, 9], "c": [6, 9], "read_img_as_numpi": 6, "output_s": [6, 8], "rgb_output": 6, "expect": [2, 6, 8, 9], "read_img_as_tensor": 6, "img_path": 6, "dtype": [6, 7, 8, 9, 14], "float32": [6, 7, 8, 14], "desir": 6, "relat": [2, 6], "divid": 6, "255": [6, 7, 8, 9, 15], "decode_img_as_tensor": 6, "img_cont": 6, "stream": 6, "read_html": 6, "url": 6, "yoursit": 6, "weasyprint": [], "documentfil": [6, 12], "extens": 6, "classmethod": 6, "from_pdf": 6, "binari": [6, 15], "from_url": 6, "from_imag": [6, 12], "page1": 6, "png": 6, "page2": 6, "vgg16_bn_r": 7, "vgg": 7, "16": 7, "describ": [7, 9], "veri": 7, "deep": [7, 15], "convolut": 7, "larg": [7, 12], "modifi": 7, "normal": [7, 8], "rectangular": 7, "pool": 7, "simpler": 7, "head": [7, 15], "input_tensor": 7, "random": [7, 8, 9, 14, 15], "uniform": [7, 8, 14], "512": 7, "maxval": [7, 8, 14], "imagenet": 7, "extractor": 7, "resnet18": [7, 12], "resnet": 7, "18": [7, 15], "residu": 7, "boolean": [7, 15], "resnet34": 7, "34": [7, 15], "resnet50": [7, 12], "50": [7, 13, 15], "resnet31": 7, "downsiz": 7, "4": [7, 8, 9, 15], "mobilenet_v3_smal": 7, "mobilenetv3": 7, "search": 7, "kera": [7, 14], "mobilenet_v3_larg": 7, "mobilenet_v3_small_r": 7, "mobilenet_v3_large_r": 7, "mobilenet_v3_small_orient": 7, "magc_resnet31": 7, "global": 7, "context": 7, "224": [7, 8, 14], "vit_": 7, "visiontransform": 7, "worth": 7, "16x16": 7, "patch": 7, "unoffici": 7, "config": [2, 7], "vit_b": 7, "b": [7, 9], "textnet_tini": [], "textnet": [], "faster": [], "arbitrarili": [], "detector": [], "minimalist": [], "kernel": 8, "czczup": [], "tini": [], "textnet_smal": [], "textnet_bas": [], "crop_orientation_predictor": 7, "arch": [7, 12], "croporientationpredictor": 7, "np": [7, 8, 9, 14, 15], "classif_mobilenet_v3_smal": 7, "input_crop": 7, "rand": [7, 8, 9, 14, 15], "600": [7, 9, 15], "800": [7, 9, 13, 15], "astyp": [7, 9, 14, 15], "crop": [7, 8, 13, 15], "dataset": [7, 15], "linknet_resnet18": [7, 15], "1024": [7, 9, 14, 15], "linknet_resnet34": 7, "linknet_resnet50": 7, "db_resnet50": [7, 12, 14, 15], "backbon": 7, "db_mobilenet_v3_larg": [7, 12, 15], "mobilenet": [7, 12], "v3": [7, 12], "detection_predictor": [7, 15], "assume_straight_pag": [7, 15], "detectionpredictor": 7, "input_pag": [7, 9, 15], "itself": [7, 12], "fit": [7, 15], "crnn_vgg16_bn": [7, 12, 15], "128": [7, 15], "crnn_mobilenet_v3_smal": [7, 15], "crnn_mobilenet_v3_larg": [7, 12, 15], "sar_resnet31": [7, 15], "31": [7, 15], "64": [7, 8, 15], "256": 7, "paper": 7, "1910": 7, "02562": 7, "keywoard": [], "vitstr_smal": 7, "vitstr_bas": 7, "recognition_predictor": [7, 15], "recognitionpredictor": 7, "ocr_predictor": [7, 12, 15], "det_arch": [7, 12], "reco_arch": [7, 12], "pretrained_backbon": 7, "symmetric_pad": [7, 8, 15], "export_as_straight_box": [7, 15], "detect_orient": 7, "straighten_pag": [], "detect_languag": 7, "ocrpredictor": 7, "up": [7, 15], "assum": 7, "preserv": [7, 8, 15], "ratio": [7, 8, 15], "symmetr": [7, 8, 15], "bottom": [7, 15], "final": [7, 14], "potenti": 7, "estim": 7, "slightli": 7, "deterior": 7, "latenc": 7, "median": [], "Then": [], "again": [], "improv": [], "kie_predictor": [], "kiepredictor": [], "kie": [], "login_to_hub": [7, 12], "login": 7, "huggingfac": 7, "hub": 7, "from_hub": [7, 12], "repo_id": [7, 12], "instanti": [7, 15], "hf": 7, "fasterrcnn_mobilenet_v3_large_fpn": 7, "repo": 7, "hf_hub_download": 7, "snapshot_download": 7, "checkpoint": 7, "push_to_hf_hub": [7, 12], "model_nam": [7, 12], "save": [7, 13, 14], "configur": [2, 7], "my": 7, "procedur": 8, "draw": [8, 9], "design": 8, "torchvis": 8, "resiz": [8, 15], "bilinear": 8, "transfo": 8, "minval": 8, "interpol": 8, "zero": [8, 9], "while": [8, 15], "done": 8, "mean": [8, 9], "std": 8, "gaussian": 8, "distribut": 8, "485": 8, "456": 8, "406": 8, "229": [8, 13], "225": 8, "averag": [8, 15], "per": [8, 15], "standard": 8, "deviat": 8, "lambdatransform": 8, "fn": 8, "lambda": 8, "tograi": 8, "num_output_channel": 8, "grayscal": 8, "colorinvers": 8, "min_val": 8, "tranform": 8, "color": [8, 9], "shift": 8, "randomli": 8, "invert": 8, "6": [3, 8, 15], "rang": [8, 14], "randombright": 8, "max_delta": 8, "adjust": [2, 8], "bright": 8, "delta": 8, "offset": 8, "add": [8, 9, 12], "pick": 8, "p": [8, 9, 15], "probabl": 8, "randomcontrast": 8, "contrast": 8, "contrast_factor": 8, "factor": 8, "randomsatur": 8, "satur": 8, "hsv": 8, "increas": 8, "randomhu": 8, "hue": 8, "randomgamma": 8, "min_gamma": 8, "max_gamma": 8, "min_gain": 8, "max_gain": 8, "gamma": 8, "correct": 8, "neg": 8, "lower": [8, 9], "param": [8, 15], "constant": 8, "multipli": 8, "randomjpegqu": 8, "min_qual": 8, "60": 8, "max_qual": 8, "jpeg": 8, "qualiti": 8, "dimension": 8, "between": [8, 9], "randomrot": 8, "max_angl": 8, "expand": 8, "degre": 8, "uniformli": 8, "randomcrop": 8, "08": [8, 15], "75": [8, 15], "33": 8, "min_area": 8, "max_area": 8, "min_ratio": 8, "max_ratio": 8, "gaussianblur": 8, "kernel_shap": 8, "blur": 8, "min": 8, "channelshuffl": 8, "gaussiannois": 8, "nois": 8, "randomhorizontalflip": 8, "flip": 8, "int64": [8, 9], "randomshadow": 8, "opacity_rang": 8, "shade": 8, "opac": 8, "It": [8, 12], "consecut": [8, 15], "sequenti": [8, 14], "oneof": 8, "jpegqual": 8, "randomappli": 8, "regroup": 9, "core": 9, "complementari": 9, "sens": 9, "visualize_pag": 9, "words_onli": 9, "display_artefact": 9, "add_label": 9, "figur": 9, "block": [9, 15], "plt": 9, "ocr_db_crnn": 9, "artefact": [9, 10, 15], "figsiz": 9, "largest": 9, "side": 9, "plot": 9, "static": 9, "top": [9, 15], "synthesize_pag": 9, "draw_proba": 9, "respons": 9, "blank": 9, "blue": 9, "red": 9, "font_siz": 9, "13": [9, 15], "famili": 9, "synthes": 9, "metric": [9, 15], "assess": 9, "textmatch": 9, "match": [9, 15], "accuraci": 9, "aggreg": [9, 13], "foral": 9, "y": 9, "mathcal": 9, "frac": 9, "sum": 9, "limits_": 9, "f_": 9, "y_i": 9, "x_i": 9, "indic": 9, "defin": 9, "f_a": 9, "left": [9, 15], "begin": 9, "ll": 9, "mbox": 9, "strictli": 9, "integ": [9, 14], "updat": 9, "hello": [9, 15], "world": [9, 15], "summari": 9, "gt": 9, "pred": 9, "groung": 9, "truth": 9, "exact": [9, 15], "score": 9, "counterpart": 9, "unidecod": 9, "localizationconfus": 9, "iou_thresh": 9, "mask_shap": 9, "use_broadcast": 9, "confus": 9, "iou": 9, "recal": [9, 15], "g_": 9, "precis": [9, 15], "meaniou": 9, "j": 9, "y_j": 9, "being": [9, 15], "intersect": 9, "union": 9, "g_x": 9, "assign": 9, "_i": 9, "geq": 9, "ground": 9, "asarrai": 9, "70": [9, 15], "110": 9, "95": [9, 15], "200": 9, "150": [9, 15], "pair": 9, "broadcast": 9, "consum": 9, "memori": [9, 11], "either": [9, 15], "ocrmetr": 9, "l": 9, "hat": 9, "h_": 9, "b_j": 9, "l_j": 9, "gt_box": 9, "pred_box": 9, "gt_label": 9, "pred_label": 9, "comparison": [9, 15], "detectionmetr": 9, "c_j": 9, "compil": [10, 15], "better": [10, 15], "leverag": 10, "descript": 10, "colab": 10, "quicktour": 10, "present": 10, "main": 10, "produc": [10, 15], "searchabl": 10, "don": 15, "meet": [], "detail": 15, "link": [], "section": [12, 14, 15], "det_model": 12, "load_weight": [], "path_to_checkpoint": [], "weight": [], "reco_model": 12, "det_param": [], "path_to_pt": [], "map_loc": [], "load_state_dict": [], "reco_param": [], "vocab": [12, 13, 15], "class_nam": [], "total": [], "date": 15, "preprocessor": 15, "det_predictor": [], "798": [], "785": [], "772": [], "264": [], "2749": [], "287": [], "reco_predictor": [], "694": [], "695": [], "693": [], "299": [], "296": [], "301": [], "polici": 11, "restrict": [], "write": 11, "outsid": 11, "tmp": 11, "work": [11, 15], "step": [], "usag": 11, "multiprocess": 11, "doctr_multiprocessing_dis": 11, "variabl": 11, "becaus": 11, "shm": 11, "share": [11, 13, 15], "chang": 11, "By": 11, "doctr_cache_dir": 11, "focu": 12, "love": 12, "appreci": 12, "interfac": 12, "io": 12, "custom": 12, "felix92": 12, "db": 12, "vgg16": 12, "bn": 12, "plug": 12, "obj_detect": 12, "exist": 12, "overwritten": 12, "prerequisit": 12, "creat": 12, "co": 12, "instal": 12, "git": 12, "lf": 12, "my_awesome_model": 12, "v1": 12, "directli": [12, 15], "after": [2, 12, 15], "python3": 12, "train_tensorflow": 12, "py": [2, 12], "train_pytorch": 12, "tabl": 12, "pull": 12, "dummi": 12, "tilman": [], "rassi": [], "fascan": [], "evalu": [13, 15], "predefin": 13, "prefer": 13, "signific": 13, "valid": 13, "149": 13, "626": 13, "360": 13, "2000": 13, "3000": 13, "249": 13, "33402": 13, "13068": 13, "772875": 13, "85875": 13, "246": 13, "233": 13, "resourc": [13, 14], "7149": 13, "796": 13, "handwritten": 13, "1268": [], "472": [], "21888": 13, "8707": 13, "33608": 13, "19342": 13, "uppercas": 13, "19370": 13, "2186": 13, "257": 13, "647": 13, "73257": 13, "26032": 13, "7100000": 13, "707470": 13, "1156": 13, "1107": 13, "849": 13, "1095": 13, "207901": 13, "22672": 13, "7581382": 13, "1337891": 13, "7141797": [], "793533": [], "49377": [], "19598": [], "alwai": 13, "regular": 13, "2700": 13, "300": 13, "background": 13, "qr_code": 13, "bar_cod": 13, "photo": 13, "classif": 13, "mani": [13, 15], "sensit": [13, 15], "abl": [13, 15], "howev": 13, "guidanc": 13, "tool": 13, "further": 13, "anot": 13, "handl": 13, "underli": 13, "defer": 13, "dataload": 13, "good": 14, "achiev": 14, "might": [14, 15], "tune": 14, "thing": [11, 14, 15], "product": 14, "readi": 14, "help": 14, "support": 15, "devic": [], "fp16": [], "point": [], "occupi": [], "bit": [], "advantag": [], "less": [], "mixed_precis": [], "set_global_polici": [], "mixed_float16": [], "cuda": [], "re": [], "exchang": [], "interoper": [], "machin": [], "structur": 15, "layer": [], "metadata": [], "util": 14, "export_model_to_onnx": [], "input_shap": 14, "dummy_input": [], "tensorspec": [], "model_path": [], "come": [], "soon": [], "seen": 15, "onc": 15, "separ": 15, "compon": 15, "charg": 15, "usabl": 15, "backend": 15, "along": 15, "processor": 15, "reusabl": 15, "consist": 15, "delimit": 15, "2d": 15, "corner": 15, "flag": 15, "belong": 15, "skew": 15, "comprehens": 15, "benchmark": 15, "publicli": 15, "sec": [], "25": 15, "84": 15, "39": 15, "85": 15, "86": 15, "93": 15, "83": 15, "24": [], "80": 15, "29": 15, "90": 15, "67": 15, "76": 15, "11": 15, "81": 15, "71": 15, "7": 15, "21": 15, "82": 15, "20": 15, "49": 15, "87": 15, "63": 15, "17": [], "28": [], "51": 15, "46": 15, "db_resnet34": [], "22": [], "89": 15, "74": 15, "56": 15, "68": 15, "92": 15, "61": 15, "41": 15, "00": 15, "79": 15, "38": 15, "88": [], "62": 15, "26": [], "06": 15, "78": 15, "47": 15, "54": [], "abov": 15, "cf": 15, "disclaim": 15, "combin": 15, "199": 15, "second": 15, "warmup": 15, "phase": 15, "measur": 15, "1000": 15, "obtain": 15, "11th": [], "gen": [], "intel": [], "r": [], "tm": [], "i7": [], "11800h": [], "30ghz": [], "wrap": 15, "useabl": 15, "favorit": 15, "dummy_img": 15, "area": 15, "send": 15, "snippet": 15, "transcrib": 15, "partial": [], "15": 15, "9": [], "73": 15, "44": [], "14": 15, "55": [], "58": [], "57": 15, "66": 15, "01": 15, "98": 15, "23": [], "69": 15, "99": 15, "91": 15, "05": [], "09": [], "96": 15, "40": [], "53": 15, "most": 15, "print": 15, "cfg": 15, "30595": 15, "45": 15, "72": 15, "43": 15, "65": 15, "77": 15, "30": 15, "07": [], "27": 15, "gvision": 15, "59": 15, "03": 15, "azur": [], "recogn": [], "42": 15, "go": 15, "mention": 15, "still": 15, "return": [5, 6, 7, 9, 15], "documentbuild": [], "resolve_lin": [], "automat": [], "resolve_block": [], "paragraph_break": [], "paragraph": [], "035": [], "nest": 15, "get": 15, "typic": 15, "layout": 15, "340": 15, "text_output": [], "json_output": 15, "1357421875": 15, "0361328125": 15, "8564453125": 15, "8603515625": 15, "914085328578949": 15, "5478515625": 15, "06640625": 15, "5810546875": 15, "0966796875": 15, "9949972033500671": 15, "51171875": 15, "1630859375": 15, "9578408598899841": 15, "1396484375": 15, "3232421875": 15, "185546875": 15, "3515625": 15, "outpout": 15, "xml": 15, "hocr": 15, "export_as_xml": 15, "xml_output": 15, "xml_bytes_str": 15, "xml_element": 15, "utf": 15, "xmln": 15, "w3": 15, "1999": 15, "xhtml": 15, "lang": 15, "en": 15, "meta": 15, "equiv": 15, "charset": 15, "system": 15, "ocr_pag": 15, "ocr_carea": 15, "ocr_par": 15, "ocr_lin": 15, "ocrx_word": 15, "div": 15, "id": 15, "page_1": 15, "bbox": 15, "3456": 15, "ppageno": 15, "block_1_1": 15, "857": 15, "529": 15, "2504": 15, "2710": 15, "par_1_1": 15, "span": 15, "line_1_1": 15, "x_size": 15, "x_descend": 15, "x_ascend": 15, "word_1_1": 15, "1552": 15, "540": 15, "1778": 15, "580": 15, "x_wconf": 15, "word_1_2": 15, "1782": 15, "1900": 15, "583": 15, "word_1_3": 15, "1420": 15, "597": 15, "1684": 15, "641": 15, "threshold": [], "region": [], "accur": [], "postprocessor": [], "bin_thresh": [], "box_thresh": [], "hook": [], "manipul": [], "customhook": [], "def": 14, "__call__": [], "self": [], "loc_pr": [], "Be": [], "awar": [], "my_hook": [], "middl": [], "pipelin": [], "add_hook": [], "execut": [], "file_path": [], "read_img": 6, "seemlessli": [], "conda": [], "newer": [], "developp": 3, "fp": 15, "scheme": [], "deform": [], "statist": [], "turn": [], "easier": [], "let": [], "db_resnet50_predictor": [], "sar_vgg16_bn": [], "rnn": [], "enhanc": [], "symbol": [], "crnn_vgg16_bn_predictor": [], "sar_vgg16_bn_predictor": [], "16bn": [], "convert_to_tflit": [], "tf_model": 14, "tflite": 14, "conv_sequ": 14, "relu": 14, "kernel_s": 14, "serialized_model": 14, "convert_to_fp16": [], "half": [], "serial": 14, "quantize_model": [], "quantiz": [], "exclud": [], "inherit": 14, "abstract": [], "verifi": 2, "file_nam": [], "file_hash": [], "extract_arch": [], "overwrit": [], "sha256": [], "archiv": [], "disk": [], "775": [], "856": [], "860": [], "862": [], "863": [], "sar_resnet31_predictor": [], "ocr_db_crnn_vgg": [], "652": [], "721": [], "ocr_db_sar_vgg": [], "653": [], "ocr_db_sar_resnet": [], "665": [], "735": [], "595": [], "625": [], "781": [], "830": [], "exactmatch": [], "ignore_cas": [], "ignore_acc": [], "ignor": [], "letter": [], "accent": [], "error": [], "max_dist": [], "levenshtein": [], "distanc": [], "autoclass": [], "loader": [], "154": [], "as_imag": [], "convert_page_to_numpi": [], "get_word": [], "fitz": [], "gettextword": [], "get_artefact": [], "entir": [], "fulli": [], "daili": [], "mix": [], "fine": 15, "scratch": [], "special": [], "recurr": [], "733": [], "817": [], "745": [], "875": [], "frame": 15, "feed": [], "warm": [], "c5": 15, "x12larg": 15, "xeon": 15, "platinum": 15, "8275l": 15, "913": [], "917": [], "921": [], "crnn_resnet31": [], "629": [], "701": [], "664": [], "780": [], "630": [], "702": [], "666": [], "783": [], "640": [], "713": [], "672": [], "789": [], "na": [], "753": [], "700": [], "533": [], "689": [], "611": [], "660": [], "db_sar_vgg": [], "db_sar_resnet": [], "db_crnn_vgg": [], "db_crnn_resnet": [], "properti": 14, "input_t": 14, "saved_model": 14, "And": 14, "nestedobject": [], "changelog": [], "v0": [], "2021": [], "8m": 15, "02": 15, "5m": 15, "1m": 15, "19": [], "invoic": 15, "flexibl": [], "rotated_bbox": [], "beta": [], "linknet16": [], "160": [], "arg": [5, 7], "bash": [], "tax": 15, "35": 15, "vgg16_bn": [], "mobilenetv3_larg": [], "mobilenetv3_smal": [], "constraint": 14, "tfliteconvert": 14, "from_keras_model": 14, "target_spec": 14, "supported_typ": 14, "float16": 14, "fallback": 14, "oper": [2, 14], "representative_dataset": 14, "yield": 14, "supported_op": 14, "opsset": 14, "tflite_builtins_int8": 14, "inference_input_typ": 14, "int8": 14, "inference_output_typ": 14, "2m": 15, "7m": 15, "look": 15, "variou": 15, "below": 15, "unfortun": 15, "moment": 15, "04": 15, "36": 15, "97": 15, "resum": 15, "road": 15, "get_text_word": [], "get_lin": [], "style": 2, "incom": 2, "pr": 2, "compli": 2, "flake8": 2, "convent": 2, "isort": 2, "reorder": 2, "catch": 2, "cleaner": 2, "mypi": 2, "ini": 2, "keep": 2, "sane": 2, "pydocstyl": 2, "_helper": [], "pdf_render": [], "render_pdf_topil": [], "linknet_resnet18_rot": [7, 15], "db_resnet50_rot": 15, "nb": 15, "pdfdocument": 6, "render_to": 6, "amazon": [11, 15], "doe": 11, "anywher": 11, "There": 11, "enivron": 11, "ec2": 15}, "objects": {"doctr.datasets": [[5, 0, 1, "", "CORD"], [5, 0, 1, "", "CharacterGenerator"], [5, 0, 1, "", "DetectionDataset"], [5, 0, 1, "", "DocArtefacts"], [5, 0, 1, "", "FUNSD"], [5, 0, 1, "", "IC03"], [5, 0, 1, "", "IC13"], [5, 0, 1, "", "IIIT5K"], [5, 0, 1, "", "IMGUR5K"], [5, 0, 1, "", "MJSynth"], [5, 0, 1, "", "OCRDataset"], [5, 0, 1, "", "RecognitionDataset"], [5, 0, 1, "", "SROIE"], [5, 0, 1, "", "SVHN"], [5, 0, 1, "", "SVT"], [5, 0, 1, "", "SynthText"], [5, 0, 1, "", "WordGenerator"], [5, 1, 1, "", "encode_sequences"]], "doctr.datasets.loader": [[5, 0, 1, "", "DataLoader"]], "doctr.io": [[6, 0, 1, "", "Artefact"], [6, 0, 1, "", "Block"], [6, 0, 1, "", "Document"], [6, 0, 1, "", "DocumentFile"], [6, 0, 1, "", "Line"], [6, 0, 1, "", "Page"], [6, 0, 1, "", "Word"], [6, 1, 1, "", "decode_img_as_tensor"], [6, 1, 1, "", "read_html"], [6, 1, 1, "", "read_img_as_numpy"], [6, 1, 1, "", "read_img_as_tensor"], [6, 1, 1, "", "read_pdf"]], "doctr.io.Document": [[6, 2, 1, "", "show"]], "doctr.io.DocumentFile": [[6, 2, 1, "", "from_images"], [6, 2, 1, "", "from_pdf"], [6, 2, 1, "", "from_url"]], "doctr.io.Page": [[6, 2, 1, "", "show"]], "doctr.models.classification": [[7, 1, 1, "", "crop_orientation_predictor"], [7, 1, 1, "", "magc_resnet31"], [7, 1, 1, "", "mobilenet_v3_large"], [7, 1, 1, "", "mobilenet_v3_large_r"], [7, 1, 1, "", "mobilenet_v3_small"], [7, 1, 1, "", "mobilenet_v3_small_orientation"], [7, 1, 1, "", "mobilenet_v3_small_r"], [7, 1, 1, "", "resnet18"], [7, 1, 1, "", "resnet31"], [7, 1, 1, "", "resnet34"], [7, 1, 1, "", "resnet50"], [7, 1, 1, "", "vgg16_bn_r"], [7, 1, 1, "", "vit_b"], [7, 1, 1, "", "vit_s"]], "doctr.models.detection": [[7, 1, 1, "", "db_mobilenet_v3_large"], [7, 1, 1, "", "db_resnet50"], [7, 1, 1, "", "detection_predictor"], [7, 1, 1, "", "linknet_resnet18"], [7, 1, 1, "", "linknet_resnet18_rotation"], [7, 1, 1, "", "linknet_resnet34"], [7, 1, 1, "", "linknet_resnet50"]], "doctr.models.factory": [[7, 1, 1, "", "from_hub"], [7, 1, 1, "", "login_to_hub"], [7, 1, 1, "", "push_to_hf_hub"]], "doctr.models": [[7, 1, 1, "", "ocr_predictor"]], "doctr.models.recognition": [[7, 1, 1, "", "crnn_mobilenet_v3_large"], [7, 1, 1, "", "crnn_mobilenet_v3_small"], [7, 1, 1, "", "crnn_vgg16_bn"], [7, 1, 1, "", "master"], [7, 1, 1, "", "recognition_predictor"], [7, 1, 1, "", "sar_resnet31"], [7, 1, 1, "", "vitstr_base"], [7, 1, 1, "", "vitstr_small"]], "doctr.transforms": [[8, 0, 1, "", "ChannelShuffle"], [8, 0, 1, "", "ColorInversion"], [8, 0, 1, "", "Compose"], [8, 0, 1, "", "GaussianBlur"], [8, 0, 1, "", "GaussianNoise"], [8, 0, 1, "", "LambdaTransformation"], [8, 0, 1, "", "Normalize"], [8, 0, 1, "", "OneOf"], [8, 0, 1, "", "RandomApply"], [8, 0, 1, "", "RandomBrightness"], [8, 0, 1, "", "RandomContrast"], [8, 0, 1, "", "RandomCrop"], [8, 0, 1, "", "RandomGamma"], [8, 0, 1, "", "RandomHorizontalFlip"], [8, 0, 1, "", "RandomHue"], [8, 0, 1, "", "RandomJpegQuality"], [8, 0, 1, "", "RandomRotate"], [8, 0, 1, "", "RandomSaturation"], [8, 0, 1, "", "RandomShadow"], [8, 0, 1, "", "Resize"], [8, 0, 1, "", "ToGray"]], "doctr.utils.metrics": [[9, 0, 1, "", "DetectionMetric"], [9, 0, 1, "", "LocalizationConfusion"], [9, 0, 1, "", "OCRMetric"], [9, 0, 1, "", "TextMatch"]], "doctr.utils.metrics.DetectionMetric": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.LocalizationConfusion": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.OCRMetric": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.TextMatch": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.visualization": [[9, 1, 1, "", "synthesize_page"], [9, 1, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:class", "1": "py:function", "2": "py:method"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"], "2": ["py", "method", "Python method"]}, "titleterms": {"changelog": 0, "v0": 0, "7": [], "0": 0, "2024": [], "09": [], "6": [], "2022": 0, "29": [], "5": 0, "1": [0, 1], "03": 0, "22": 0, "2021": 0, "12": 0, "31": 0, "4": [0, 1], "11": 0, "10": 0, "01": 0, "3": [0, 1], "08": 0, "27": 0, "07": 0, "02": 0, "2": [0, 1], "05": 0, "28": 0, "18": 0, "contributor": 1, "coven": 1, "code": [1, 2], "conduct": 1, "our": 1, "pledg": 1, "standard": 1, "enforc": 1, "respons": 1, "scope": 1, "guidelin": 1, "correct": 1, "warn": 1, "temporari": 1, "ban": 1, "perman": 1, "attribut": 1, "contribut": 2, "doctr": [2, 4, 5, 6, 7, 8, 9, 10], "codebas": 2, "structur": [2, 6], "continu": 2, "integr": 2, "feedback": 2, "featur": [2, 4], "request": 2, "bug": 2, "report": 2, "question": 2, "develop": 2, "mode": 2, "instal": [2, 3], "commit": 2, "unit": 2, "test": 2, "qualiti": 2, "style": [], "verif": 2, "modifi": 2, "document": [2, 4, 6], "let": 2, "": 2, "connect": 2, "prerequisit": 3, "via": 3, "python": 3, "packag": 3, "git": 3, "text": [4, 15], "recognit": [4, 7, 12, 13, 15], "main": 4, "model": [4, 7, 12, 14, 15], "zoo": [4, 7, 15], "detect": [4, 7, 12, 13, 15], "support": [4, 5, 8], "dataset": [4, 5, 13], "arg": [], "synthet": [5, 13], "gener": [5, 13], "custom": 5, "loader": 5, "dataload": 5, "vocab": 5, "return": [], "io": 6, "word": 6, "line": 6, "artefact": 6, "block": 6, "page": 6, "file": 6, "read": 6, "classif": [7, 12], "factori": 7, "transform": 8, "compos": 8, "util": 9, "visual": 9, "task": 9, "evalu": 9, "notebook": 10, "train": 14, "your": [12, 13, 14], "own": 13, "load": [12, 13], "aw": 11, "lambda": 11, "share": 12, "commun": 12, "from": 12, "huggingfac": 12, "hub": 12, "push": 12, "pretrain": 12, "name": 12, "convent": 12, "choos": [13, 15], "readi": 13, "us": [13, 14], "avail": [13, 15], "object": 13, "data": 13, "prepar": 14, "infer": 14, "optim": [], "half": 14, "precis": 14, "export": [], "onnx": [], "right": 15, "architectur": 15, "predictor": 15, "end": 15, "ocr": 15, "two": 15, "stage": 15, "approach": 15, "what": 15, "should": 15, "i": 15, "do": 15, "output": 15, "advanc": [], "option": [], "get": [], "start": [], "conda": [], "pre": [], "process": [], "post": 14, "build": [], "implement": [], "content": [], "compress": 14, "savedmodel": 14, "note": [], "refer": [], "backbon": [], "tensorflow": 14, "lite": 14, "quantiz": 14, "public": [], "privat": [], "lint": 2, "import": 2, "order": 2, "annot": 2, "type": 2, "docstr": 2, "format": 2}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"Changelog": [[0, "changelog"]], "v0.5.1 (2022-03-22)": [[0, "v0-5-1-2022-03-22"]], "v0.5.0 (2021-12-31)": [[0, "v0-5-0-2021-12-31"]], "v0.4.1 (2021-11-22)": [[0, "v0-4-1-2021-11-22"]], "v0.4.0 (2021-10-01)": [[0, "v0-4-0-2021-10-01"]], "v0.3.1 (2021-08-27)": [[0, "v0-3-1-2021-08-27"]], "v0.3.0 (2021-07-02)": [[0, "v0-3-0-2021-07-02"]], "v0.2.1 (2021-05-28)": [[0, "v0-2-1-2021-05-28"]], "v0.2.0 (2021-05-11)": [[0, "v0-2-0-2021-05-11"]], "v0.1.1 (2021-03-18)": [[0, "v0-1-1-2021-03-18"]], "v0.1.0 (2021-03-05)": [[0, "v0-1-0-2021-03-05"]], "Contributor Covenant Code of Conduct": [[1, "contributor-covenant-code-of-conduct"]], "Our Pledge": [[1, "our-pledge"]], "Our Standards": [[1, "our-standards"]], "Enforcement Responsibilities": [[1, "enforcement-responsibilities"]], "Scope": [[1, "scope"]], "Enforcement": [[1, "enforcement"]], "Enforcement Guidelines": [[1, "enforcement-guidelines"]], "1. Correction": [[1, "correction"]], "2. Warning": [[1, "warning"]], "3. Temporary Ban": [[1, "temporary-ban"]], "4. Permanent Ban": [[1, "permanent-ban"]], "Attribution": [[1, "attribution"]], "Contributing to docTR": [[2, "contributing-to-doctr"]], "Codebase structure": [[2, "codebase-structure"]], "Continuous Integration": [[2, "continuous-integration"]], "Feedback": [[2, "feedback"]], "Feature requests & bug report": [[2, "feature-requests-bug-report"]], "Questions": [[2, "questions"]], "Developing docTR": [[2, "developing-doctr"]], "Developer mode installation": [[2, "developer-mode-installation"]], "Commits": [[2, "commits"]], "Unit tests": [[2, "unit-tests"]], "Code quality": [[2, "code-quality"]], "Lint verification": [[2, "lint-verification"]], "Import order": [[2, "import-order"]], "Annotation typing": [[2, "annotation-typing"]], "Docstring format": [[2, "docstring-format"]], "Modifying the documentation": [[2, "modifying-the-documentation"]], "Let\u2019s connect": [[2, "let-s-connect"]], "Installation": [[3, "installation"]], "Prerequisites": [[3, "prerequisites"]], "Via Python Package": [[3, "via-python-package"]], "Via Git": [[3, "via-git"]], "docTR: Document Text Recognition": [[4, "doctr-document-text-recognition"]], "Main Features": [[4, "main-features"]], "Model zoo": [[4, "model-zoo"]], "Text detection models": [[4, "text-detection-models"]], "Text recognition models": [[4, "text-recognition-models"]], "Supported datasets": [[4, "supported-datasets"]], "doctr.datasets": [[5, "doctr-datasets"], [5, "datasets"]], "Synthetic dataset generator": [[5, "synthetic-dataset-generator"], [13, "synthetic-dataset-generator"]], "Custom dataset loader": [[5, "custom-dataset-loader"]], "Dataloader": [[5, "dataloader"]], "Supported Vocabs": [[5, "supported-vocabs"]], "docTR Vocabs": [[5, "id3"]], "doctr.io": [[6, "doctr-io"]], "Document structure": [[6, "document-structure"]], "Word": [[6, "word"]], "Line": [[6, "line"]], "Artefact": [[6, "artefact"]], "Block": [[6, "block"]], "Page": [[6, "page"]], "Document": [[6, "document"]], "File reading": [[6, "file-reading"]], "doctr.models": [[7, "doctr-models"]], "doctr.models.classification": [[7, "doctr-models-classification"]], "doctr.models.detection": [[7, "doctr-models-detection"]], "doctr.models.recognition": [[7, "doctr-models-recognition"]], "doctr.models.zoo": [[7, "doctr-models-zoo"]], "doctr.models.factory": [[7, "doctr-models-factory"]], "doctr.transforms": [[8, "doctr-transforms"]], "Supported transformations": [[8, "supported-transformations"]], "Composing transformations": [[8, "composing-transformations"]], "doctr.utils": [[9, "doctr-utils"]], "Visualization": [[9, "visualization"]], "Task evaluation": [[9, "task-evaluation"]], "docTR Notebooks": [[10, "doctr-notebooks"]], "AWS Lambda": [[11, "aws-lambda"]], "Share your model with the community": [[12, "share-your-model-with-the-community"]], "Loading from Huggingface Hub": [[12, "loading-from-huggingface-hub"]], "Pushing to the Huggingface Hub": [[12, "pushing-to-the-huggingface-hub"]], "Pretrained community models": [[12, "pretrained-community-models"]], "Naming conventions": [[12, "naming-conventions"]], "Classification": [[12, "classification"]], "Detection": [[12, "detection"], [13, "detection"]], "Recognition": [[12, "recognition"], [13, "recognition"]], "Choose a ready to use dataset": [[13, "choose-a-ready-to-use-dataset"]], "Available Datasets": [[13, "available-datasets"]], "Object Detection": [[13, "object-detection"]], "Use your own datasets": [[13, "use-your-own-datasets"]], "Data Loading": [[13, "data-loading"]], "Preparing your model for inference": [[14, "preparing-your-model-for-inference"]], "Model compression": [[14, "model-compression"]], "TensorFlow Lite": [[14, "tensorflow-lite"]], "Half-precision": [[14, "half-precision"]], "Post-training quantization": [[14, "post-training-quantization"]], "Using SavedModel": [[14, "using-savedmodel"]], "Choosing the right model": [[15, "choosing-the-right-model"]], "Text Detection": [[15, "text-detection"]], "Available architectures": [[15, "available-architectures"], [15, "id1"], [15, "id2"]], "Detection predictors": [[15, "detection-predictors"]], "Text Recognition": [[15, "text-recognition"]], "Text recognition model zoo": [[15, "id3"]], "Recognition predictors": [[15, "recognition-predictors"]], "End-to-End OCR": [[15, "end-to-end-ocr"]], "Two-stage approaches": [[15, "two-stage-approaches"]], "What should I do with the output?": [[15, "what-should-i-do-with-the-output"]]}, "indexentries": {"cord (class in doctr.datasets)": [[5, "doctr.datasets.CORD"]], "charactergenerator (class in doctr.datasets)": [[5, "doctr.datasets.CharacterGenerator"]], "dataloader (class in doctr.datasets.loader)": [[5, "doctr.datasets.loader.DataLoader"]], "detectiondataset (class in doctr.datasets)": [[5, "doctr.datasets.DetectionDataset"]], "docartefacts (class in doctr.datasets)": [[5, "doctr.datasets.DocArtefacts"]], "funsd (class in doctr.datasets)": [[5, "doctr.datasets.FUNSD"]], "ic03 (class in doctr.datasets)": [[5, "doctr.datasets.IC03"]], "ic13 (class in doctr.datasets)": [[5, "doctr.datasets.IC13"]], "iiit5k (class in doctr.datasets)": [[5, "doctr.datasets.IIIT5K"]], "imgur5k (class in doctr.datasets)": [[5, "doctr.datasets.IMGUR5K"]], "mjsynth (class in doctr.datasets)": [[5, "doctr.datasets.MJSynth"]], "ocrdataset (class in doctr.datasets)": [[5, "doctr.datasets.OCRDataset"]], "recognitiondataset (class in doctr.datasets)": [[5, "doctr.datasets.RecognitionDataset"]], "sroie (class in doctr.datasets)": [[5, "doctr.datasets.SROIE"]], "svhn (class in doctr.datasets)": [[5, "doctr.datasets.SVHN"]], "svt (class in doctr.datasets)": [[5, "doctr.datasets.SVT"]], "synthtext (class in doctr.datasets)": [[5, "doctr.datasets.SynthText"]], "wordgenerator (class in doctr.datasets)": [[5, "doctr.datasets.WordGenerator"]], "encode_sequences() (in module doctr.datasets)": [[5, "doctr.datasets.encode_sequences"]], "artefact (class in doctr.io)": [[6, "doctr.io.Artefact"]], "block (class in doctr.io)": [[6, "doctr.io.Block"]], "document (class in doctr.io)": [[6, "doctr.io.Document"]], "documentfile (class in doctr.io)": [[6, "doctr.io.DocumentFile"]], "line (class in doctr.io)": [[6, "doctr.io.Line"]], "page (class in doctr.io)": [[6, "doctr.io.Page"]], "word (class in doctr.io)": [[6, "doctr.io.Word"]], "decode_img_as_tensor() (in module doctr.io)": [[6, "doctr.io.decode_img_as_tensor"]], "from_images() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_images"]], "from_pdf() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_pdf"]], "from_url() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_url"]], "read_html() (in module doctr.io)": [[6, "doctr.io.read_html"]], "read_img_as_numpy() (in module doctr.io)": [[6, "doctr.io.read_img_as_numpy"]], "read_img_as_tensor() (in module doctr.io)": [[6, "doctr.io.read_img_as_tensor"]], "read_pdf() (in module doctr.io)": [[6, "doctr.io.read_pdf"]], "show() (doctr.io.document method)": [[6, "doctr.io.Document.show"]], "show() (doctr.io.page method)": [[6, "doctr.io.Page.show"]], "crnn_mobilenet_v3_large() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_mobilenet_v3_large"]], "crnn_mobilenet_v3_small() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_mobilenet_v3_small"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_vgg16_bn"]], "crop_orientation_predictor() (in module doctr.models.classification)": [[7, "doctr.models.classification.crop_orientation_predictor"]], "db_mobilenet_v3_large() (in module doctr.models.detection)": [[7, "doctr.models.detection.db_mobilenet_v3_large"]], "db_resnet50() (in module doctr.models.detection)": [[7, "doctr.models.detection.db_resnet50"]], "detection_predictor() (in module doctr.models.detection)": [[7, "doctr.models.detection.detection_predictor"]], "from_hub() (in module doctr.models.factory)": [[7, "doctr.models.factory.from_hub"]], "linknet_resnet18() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet18"]], "linknet_resnet18_rotation() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet18_rotation"]], "linknet_resnet34() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet34"]], "linknet_resnet50() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet50"]], "login_to_hub() (in module doctr.models.factory)": [[7, "doctr.models.factory.login_to_hub"]], "magc_resnet31() (in module doctr.models.classification)": [[7, "doctr.models.classification.magc_resnet31"]], "master() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.master"]], "mobilenet_v3_large() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_large"]], "mobilenet_v3_large_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_large_r"]], "mobilenet_v3_small() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small"]], "mobilenet_v3_small_orientation() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small_orientation"]], "mobilenet_v3_small_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small_r"]], "ocr_predictor() (in module doctr.models)": [[7, "doctr.models.ocr_predictor"]], "push_to_hf_hub() (in module doctr.models.factory)": [[7, "doctr.models.factory.push_to_hf_hub"]], "recognition_predictor() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.recognition_predictor"]], "resnet18() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet18"]], "resnet31() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet31"]], "resnet34() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet34"]], "resnet50() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet50"]], "sar_resnet31() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.sar_resnet31"]], "vgg16_bn_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.vgg16_bn_r"]], "vit_b() (in module doctr.models.classification)": [[7, "doctr.models.classification.vit_b"]], "vit_s() (in module doctr.models.classification)": [[7, "doctr.models.classification.vit_s"]], "vitstr_base() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.vitstr_base"]], "vitstr_small() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.vitstr_small"]], "channelshuffle (class in doctr.transforms)": [[8, "doctr.transforms.ChannelShuffle"]], "colorinversion (class in doctr.transforms)": [[8, "doctr.transforms.ColorInversion"]], "compose (class in doctr.transforms)": [[8, "doctr.transforms.Compose"]], "gaussianblur (class in doctr.transforms)": [[8, "doctr.transforms.GaussianBlur"]], "gaussiannoise (class in doctr.transforms)": [[8, "doctr.transforms.GaussianNoise"]], "lambdatransformation (class in doctr.transforms)": [[8, "doctr.transforms.LambdaTransformation"]], "normalize (class in doctr.transforms)": [[8, "doctr.transforms.Normalize"]], "oneof (class in doctr.transforms)": [[8, "doctr.transforms.OneOf"]], "randomapply (class in doctr.transforms)": [[8, "doctr.transforms.RandomApply"]], "randombrightness (class in doctr.transforms)": [[8, "doctr.transforms.RandomBrightness"]], "randomcontrast (class in doctr.transforms)": [[8, "doctr.transforms.RandomContrast"]], "randomcrop (class in doctr.transforms)": [[8, "doctr.transforms.RandomCrop"]], "randomgamma (class in doctr.transforms)": [[8, "doctr.transforms.RandomGamma"]], "randomhorizontalflip (class in doctr.transforms)": [[8, "doctr.transforms.RandomHorizontalFlip"]], "randomhue (class in doctr.transforms)": [[8, "doctr.transforms.RandomHue"]], "randomjpegquality (class in doctr.transforms)": [[8, "doctr.transforms.RandomJpegQuality"]], "randomrotate (class in doctr.transforms)": [[8, "doctr.transforms.RandomRotate"]], "randomsaturation (class in doctr.transforms)": [[8, "doctr.transforms.RandomSaturation"]], "randomshadow (class in doctr.transforms)": [[8, "doctr.transforms.RandomShadow"]], "resize (class in doctr.transforms)": [[8, "doctr.transforms.Resize"]], "togray (class in doctr.transforms)": [[8, "doctr.transforms.ToGray"]], "detectionmetric (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.DetectionMetric"]], "localizationconfusion (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.OCRMetric"]], "textmatch (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.TextMatch"]], "summary() (doctr.utils.metrics.detectionmetric method)": [[9, "doctr.utils.metrics.DetectionMetric.summary"]], "summary() (doctr.utils.metrics.localizationconfusion method)": [[9, "doctr.utils.metrics.LocalizationConfusion.summary"]], "summary() (doctr.utils.metrics.ocrmetric method)": [[9, "doctr.utils.metrics.OCRMetric.summary"]], "summary() (doctr.utils.metrics.textmatch method)": [[9, "doctr.utils.metrics.TextMatch.summary"]], "synthesize_page() (in module doctr.utils.visualization)": [[9, "doctr.utils.visualization.synthesize_page"]], "update() (doctr.utils.metrics.detectionmetric method)": [[9, "doctr.utils.metrics.DetectionMetric.update"]], "update() (doctr.utils.metrics.localizationconfusion method)": [[9, "doctr.utils.metrics.LocalizationConfusion.update"]], "update() (doctr.utils.metrics.ocrmetric method)": [[9, "doctr.utils.metrics.OCRMetric.update"]], "update() (doctr.utils.metrics.textmatch method)": [[9, "doctr.utils.metrics.TextMatch.update"]], "visualize_page() (in module doctr.utils.visualization)": [[9, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file