diff --git a/.doctrees/environment.pickle b/.doctrees/environment.pickle index 046e0ba834..b4e556c016 100644 Binary files a/.doctrees/environment.pickle and b/.doctrees/environment.pickle differ diff --git a/latest/_sources/using_doctr/using_models.rst.txt b/latest/_sources/using_doctr/using_models.rst.txt index 208e0956bb..27c087096a 100644 --- a/latest/_sources/using_doctr/using_models.rst.txt +++ b/latest/_sources/using_doctr/using_models.rst.txt @@ -279,6 +279,19 @@ For instance, this snippet instantiates an end-to-end ocr_predictor working with from doctr.model import ocr_predictor model = ocr_predictor('linknet_resnet18', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True) +To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying `DocumentBuilder`: + +* `resolve_lines`: whether words should be automatically grouped into lines (default: True) +* `resolve_blocks`: whether lines should be automatically grouped into blocks (default: True) +* `paragraph_break`: relative length of the minimum space separating paragraphs (default: 0.035) + +For example to disable the automatic grouping of lines into blocks: + +.. code:: python3 + + from doctr.model import ocr_predictor + model = ocr_predictor(pretrained=True, resolve_blocks=False) + What should I do with the output? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -304,6 +317,14 @@ Here is a typical `Document` layout:: )] ) +To get only the text content of the `Document`, you can use the `render` method:: + + text_output = result.render() + +For reference, here is the output for the `Document` above:: + + No. RECEIPT DATE + You can also export them as a nested dict, more appropriate for JSON format:: json_output = result.export() diff --git a/latest/searchindex.js b/latest/searchindex.js index 9a783a0a2f..df9dfd3265 100644 --- a/latest/searchindex.js +++ b/latest/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["changelog", "contributing/code_of_conduct", "contributing/contributing", "getting_started/installing", "index", "modules/datasets", "modules/io", "modules/models", "modules/transforms", "modules/utils", "notebooks", "using_doctr/custom_models_training", "using_doctr/running_on_aws", "using_doctr/sharing_models", "using_doctr/using_datasets", "using_doctr/using_model_export", "using_doctr/using_models"], "filenames": ["changelog.rst", "contributing/code_of_conduct.md", "contributing/contributing.md", "getting_started/installing.rst", "index.rst", "modules/datasets.rst", "modules/io.rst", "modules/models.rst", "modules/transforms.rst", "modules/utils.rst", "notebooks.rst", "using_doctr/custom_models_training.rst", "using_doctr/running_on_aws.rst", "using_doctr/sharing_models.rst", "using_doctr/using_datasets.rst", "using_doctr/using_model_export.rst", "using_doctr/using_models.rst"], "titles": ["Changelog", "Contributor Covenant Code of Conduct", "Contributing to docTR", "Installation", "docTR: Document Text Recognition", "doctr.datasets", "doctr.io", "doctr.models", "doctr.transforms", "doctr.utils", "docTR Notebooks", "Train your own model", "AWS Lambda", "Share your model with the community", "Choose a ready to use dataset", "Preparing your model for inference", "Choosing the right model"], "terms": {"releas": [0, 3], "note": [0, 2, 5, 7, 13, 15], "we": [1, 2, 3, 4, 6, 8, 13, 14, 15, 16], "member": 1, "leader": 1, "make": [1, 2, 9, 12, 13, 15, 16], "particip": 1, "commun": 1, "harass": 1, "free": [1, 2, 13], "experi": 1, "everyon": 1, "regardless": 1, "ag": 1, "bodi": [1, 16], "size": [1, 5, 6, 8, 9, 16], "visibl": 1, "invis": 1, "disabl": [1, 12], "ethnic": 1, "sex": 1, "characterist": 1, "gender": 1, "ident": 1, "express": [1, 8], "level": [1, 5, 9, 16], "educ": 1, "socio": 1, "econom": 1, "statu": 1, "nation": 1, "person": [1, 14], "appear": 1, "race": 1, "religion": 1, "sexual": 1, "orient": [1, 6, 7, 16], "act": 1, "interact": [1, 6, 9], "wai": [1, 4, 14], "contribut": 1, "an": [1, 2, 4, 5, 6, 7, 9, 15, 16], "open": [1, 2, 13, 15], "welcom": 1, "divers": 1, "inclus": 1, "healthi": 1, "exampl": [1, 2, 4, 5, 7, 13], "behavior": [1, 16], "posit": [1, 9], "environ": [1, 12], "includ": [1, 3, 5, 14, 15], "demonstr": 1, "empathi": 1, "kind": 1, "toward": [1, 3], "other": [1, 2], "peopl": 1, "Being": 1, "respect": 1, "differ": 1, "opinion": 1, "viewpoint": 1, "give": 1, "gracefulli": 1, "accept": 1, "construct": 1, "feedback": 1, "apolog": 1, "those": [1, 3, 6, 16], "affect": 1, "mistak": 1, "learn": [1, 4, 7, 15, 16], "from": [1, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 16], "focus": [1, 5], "what": 1, "i": [1, 2, 5, 6, 7, 8, 9, 12, 13, 14, 15], "best": 1, "just": 1, "u": [1, 2], "individu": 1, "overal": [1, 7], "unaccept": 1, "The": [1, 2, 5, 6, 9, 12, 16], "us": [1, 2, 3, 5, 7, 9, 11, 12, 13, 16], "languag": [1, 4, 5, 6, 7, 13, 16], "imageri": 1, "attent": [1, 7], "advanc": 1, "ani": [1, 5, 6, 7, 8, 9, 16], "troll": 1, "insult": 1, "derogatori": 1, "comment": 1, "polit": 1, "attack": 1, "public": [1, 4], "privat": 1, "publish": 1, "inform": [1, 2, 4, 5, 14], "physic": [1, 6], "email": 1, "address": [1, 6], "without": [1, 5, 7], "explicit": 1, "permiss": 1, "which": [1, 7, 12, 14, 16], "could": 1, "reason": [1, 4, 5], "consid": [1, 2, 5, 6, 9, 16], "inappropri": 1, "profession": 1, "set": [1, 5, 7, 9, 12, 16], "ar": [1, 2, 3, 5, 6, 8, 9, 10, 14, 16], "clarifi": 1, "take": [1, 5, 16], "appropri": [1, 2, 16], "fair": 1, "action": 1, "thei": [1, 9], "deem": 1, "threaten": 1, "offens": 1, "harm": 1, "have": [1, 2, 9, 11, 13, 14, 16], "right": [1, 7, 9], "remov": 1, "edit": 1, "reject": 1, "commit": 1, "wiki": 1, "issu": [1, 2, 13], "align": [1, 6], "thi": [1, 2, 3, 5, 9, 11, 12, 13, 14, 15, 16], "moder": 1, "decis": 1, "when": [1, 2, 7], "appli": [1, 5, 8], "within": 1, "all": [1, 2, 5, 6, 8, 9, 14, 16], "space": 1, "also": [1, 7, 13, 14, 16], "offici": [1, 7], "repres": [1, 9, 15, 16], "e": [1, 2, 3, 6, 7], "mail": 1, "post": [1, 16], "via": 1, "social": 1, "media": 1, "account": [1, 13], "appoint": 1, "onlin": 1, "offlin": 1, "event": 1, "instanc": [1, 16], "abus": 1, "otherwis": [1, 6, 9], "mai": [1, 2], "report": 1, "contact": 1, "minde": [1, 3, 4, 7], "com": [1, 3, 6, 7, 13], "complaint": 1, "review": 1, "investig": 1, "promptli": 1, "fairli": 1, "oblig": 1, "privaci": 1, "secur": [1, 12], "incid": 1, "follow": [1, 2, 3, 5, 8, 9, 11, 12, 13, 16], "impact": 1, "determin": 1, "consequ": 1, "violat": 1, "unprofession": 1, "unwelcom": 1, "A": [1, 2, 4, 5, 6, 7, 10, 15], "written": [1, 6], "provid": [1, 2, 4, 13, 14, 16], "clariti": 1, "around": 1, "natur": [1, 4, 5], "explan": [1, 16], "why": 1, "wa": 1, "apologi": 1, "request": [1, 13], "through": [1, 8, 14], "singl": [1, 2, 4, 5], "seri": 1, "continu": 1, "No": [1, 16], "involv": [1, 16], "unsolicit": 1, "specifi": [1, 5, 6], "period": 1, "time": [1, 4, 7, 9, 14], "avoid": [1, 3], "well": [1, 15], "extern": [1, 14], "channel": [1, 2, 6, 8], "like": 1, "term": 1, "lead": 1, "seriou": 1, "sustain": 1, "sort": 1, "allow": 1, "dure": 1, "pattern": 1, "aggress": 1, "disparag": 1, "class": [1, 5, 6, 8, 9, 16], "adapt": 1, "version": [1, 2, 3, 15, 16], "0": [1, 3, 5, 8, 9, 11, 14, 16], "avail": [1, 4, 8], "http": [1, 3, 5, 6, 7, 13, 16], "www": [1, 6, 16], "org": [1, 5, 7, 16], "_": [1, 5, 7], "html": [1, 2, 6, 16], "were": [1, 6, 16], "inspir": [1, 8], "mozilla": 1, "": [1, 6, 7, 9, 13], "ladder": 1, "For": [1, 2, 3, 11, 16], "answer": 1, "common": [1, 8, 9, 15], "question": 1, "about": [1, 14, 16], "see": [1, 2], "faq": 1, "translat": 1, "everyth": [2, 16], "you": [2, 3, 5, 6, 7, 11, 12, 13, 14, 15, 16], "need": [2, 3, 5, 9, 11, 12, 13, 16], "know": 2, "effici": [2, 4, 5, 7], "project": [2, 14], "packag": [2, 4, 9, 12, 14], "python": 2, "doc": [2, 6, 15, 16], "librari": [2, 3, 10, 11], "build": [2, 3], "script": [2, 14], "refer": [2, 3, 11, 13, 14, 16], "train": [2, 5, 7, 8, 13, 14, 15, 16], "demo": [2, 4], "small": [2, 7], "app": 2, "showcas": 2, "capabl": [2, 10, 16], "api": [2, 4], "minim": [2, 4], "templat": [2, 4], "deploi": 2, "rest": [2, 8, 9], "ensur": 2, "proper": 2, "mainten": 2, "github": [2, 3, 7, 13], "worklow": 2, "run": [2, 3, 7], "job": 2, "coverag": 2, "codecov": 2, "back": 2, "result": [2, 5, 6, 10, 13, 16], "As": 2, "contributor": 2, "onli": [2, 7, 8, 9, 13, 14, 15, 16], "your": [2, 4, 6, 9, 16], "ad": [2, 7, 8], "whether": [2, 5, 6, 8, 9, 14], "encount": 2, "problem": 2, "suggest": [2, 13], "input": [2, 6, 7, 8, 15, 16], "ha": [2, 5, 9, 14], "valu": [2, 6, 8, 16], "can": [2, 3, 11, 12, 13, 14, 16], "purpos": 2, "advis": 2, "first": [2, 5], "check": [2, 13, 16], "topic": 2, "wasn": 2, "t": [2, 5, 11, 16], "alreadi": 2, "cover": 2, "close": 2, "If": [2, 3, 6, 7, 11, 16], "feel": [2, 13], "new": [2, 9], "one": [2, 5, 7, 8, 11, 13, 16], "do": [2, 3, 7], "so": [2, 3, 5, 7, 13, 14], "whenev": 2, "possibl": [2, 9, 13, 16], "enough": [2, 16], "jump": 2, "wonder": 2, "how": [2, 11, 13, 14], "someth": 2, "more": [2, 9, 14, 16], "gener": [2, 4, 7], "should": [2, 5, 6, 8, 9], "out": [2, 7, 8, 9, 16], "discuss": 2, "q": 2, "forum": 2, "specif": [2, 3, 9, 11, 14, 16], "stackoverflow": 2, "addit": [2, 3, 6], "depend": [2, 3, 4], "command": 2, "m": [2, 9, 16], "pip": [2, 3], "upgrad": 2, "dev": [2, 12], "pre": [2, 7], "docstr": 2, "In": [2, 5, 14], "pleas": 2, "googl": 2, "eas": 2, "process": [2, 4, 6, 11, 16], "later": 2, "messag": 2, "udac": 2, "guid": 2, "order": [2, 5, 6, 8], "same": [2, 6, 9, 14, 16], "ci": 2, "workflow": 2, "unittest": 2, "local": [2, 4, 5, 7, 9, 14, 16], "To": [2, 3, 12, 13, 16], "togeth": [2, 6], "current": [2, 16], "built": 2, "sphinx": 2, "thank": 2, "our": [2, 7, 16], "file": [2, 5], "been": [2, 9, 14, 16], "rebuilt": 2, "want": [2, 15, 16], "forc": 2, "complet": 2, "rebuild": 2, "delet": 2, "_build": 2, "directori": [2, 12], "addition": [2, 16], "clear": 2, "web": [2, 6], "browser": [2, 4], "cach": [2, 5, 12], "modif": 2, "now": 2, "locat": [2, 6, 16], "index": [2, 6], "wish": 2, "somewher": 2, "els": 2, "than": [2, 3, 9, 13], "join": 2, "slack": 2, "where": [2, 6, 8, 9], "find": [2, 3, 14], "requir": [3, 8], "3": [3, 4, 6, 7, 8, 9, 15, 16], "8": [3, 7, 8, 16], "higher": [3, 5, 16], "whichev": 3, "o": 3, "least": 3, "tensorflow": [3, 4, 6, 7, 8, 11, 13, 15, 16], "pytorch": [3, 4, 7, 8, 11, 13, 15, 16], "correspond": [3, 6, 16], "page": [3, 5, 7, 9, 16], "2": [3, 4, 5, 6, 8, 16], "macbook": 3, "m1": 3, "chip": 3, "some": [3, 10, 13, 14], "metal": 3, "plugin": 3, "1": [3, 5, 6, 7, 8, 9, 11, 14, 16], "12": [3, 16], "anoth": [3, 7, 11, 14], "linux": 3, "few": [3, 15, 16], "extra": 3, "maco": 3, "user": [3, 4, 6, 10], "them": [3, 5, 16], "homebrew": 3, "brew": 3, "cairo": 3, "pango": 3, "gdk": 3, "pixbuf": 3, "libffi": 3, "window": [3, 7, 9], "gtk": 3, "latest": [3, 16], "over": [3, 5, 9, 16], "here": [3, 8, 10, 14, 16], "last": [3, 5], "stabl": 3, "doctr": [3, 11, 12, 13, 14, 16], "strive": 3, "reduc": [3, 8], "framework": [3, 13, 14, 16], "minimum": [3, 5, 8, 9], "necessari": [3, 11, 12], "featur": [3, 7, 9, 10], "develop": 3, "third": 3, "parti": 3, "miss": 3, "tf": [3, 6, 7, 8, 13, 15], "torch": [3, 8, 11, 13, 15], "mode": 3, "clone": 3, "state": [4, 9], "art": 4, "optic": [4, 16], "charact": [4, 5, 6, 9, 14, 16], "made": 4, "seamless": 4, "access": [4, 6, 14, 16], "anyon": 4, "power": 4, "easi": [4, 9, 13], "extract": [4, 5], "valuabl": 4, "autom": 4, "seamlessli": [4, 16], "understand": [4, 5, 16], "task": [4, 5, 7, 13, 14, 16], "ocr": [4, 5, 7, 9, 13, 14], "predictor": [4, 6, 7, 11, 13, 15], "pars": [4, 5], "textual": [4, 5, 6, 7, 16], "identifi": 4, "each": [4, 5, 6, 7, 8, 9, 14, 16], "word": [4, 5, 7, 9, 16], "research": 4, "quickli": 4, "compar": 4, "own": 4, "architectur": [4, 7, 13], "speed": [4, 7], "perform": [4, 6, 7, 8, 9, 12, 15, 16], "robust": [4, 5], "stage": 4, "pretrain": [4, 7, 9, 11, 15, 16], "paramet": [4, 6, 7, 15], "friendli": 4, "line": [4, 7, 9, 16], "code": [4, 6], "load": [4, 5, 7], "googlevis": 4, "aw": [4, 16], "textract": [4, 16], "optim": 4, "infer": [4, 7, 8], "both": [4, 5, 8, 14, 16], "cpu": [4, 11], "gpu": [4, 15], "light": 4, "activ": 4, "maintain": 4, "integr": [4, 13, 14], "deploy": 4, "dbnet": [4, 7], "real": [4, 7, 8], "scene": [4, 5, 7], "differenti": [4, 7], "binar": [4, 7, 16], "linknet": [4, 7], "exploit": [4, 7], "encod": [4, 5, 6, 7, 16], "represent": [4, 7], "semant": [4, 7], "segment": [4, 7, 16], "sar": [4, 7], "show": [4, 6, 7, 9, 11, 13], "attend": [4, 7], "read": [4, 5, 7], "simpl": [4, 7], "strong": [4, 7], "baselin": [4, 7, 16], "irregular": [4, 7, 14], "crnn": [4, 7, 13], "end": [4, 5, 7, 9], "trainabl": [4, 7], "neural": [4, 5, 7, 15], "network": [4, 5, 7, 15], "imag": [4, 5, 6, 7, 8, 9, 13, 14, 16], "base": [4, 7], "sequenc": [4, 5, 6, 7, 9, 16], "Its": [4, 7], "applic": [4, 7], "master": [4, 7, 16], "multi": [4, 7], "aspect": [4, 7, 8, 16], "non": [4, 5, 6, 7, 8, 9], "vitstr": [4, 7, 15], "vision": [4, 5, 7], "transform": [4, 5, 7], "fast": [4, 5, 7], "parseq": [4, 7, 13, 16], "permut": [4, 7], "autoregress": [4, 7], "funsd": [4, 5, 14, 16], "form": [4, 5, 16], "noisi": [4, 5], "scan": [4, 5], "cord": [4, 5, 14, 16], "consolid": [4, 5], "receipt": [4, 5, 16], "forpost": [4, 5], "sroie": [4, 5, 14], "icdar": [4, 5], "2019": 4, "iiit": [4, 5], "5k": [4, 5], "cvit": 4, "street": [4, 5], "view": [4, 5], "synthtext": [4, 5, 14], "visual": 4, "geometri": [4, 6, 16], "group": 4, "svhn": [4, 5, 14], "digit": [4, 5, 14], "unsupervis": 4, "ic03": [4, 5, 14], "2003": [4, 5], "ic13": [4, 5, 14], "2013": [4, 5], "imgur5k": [4, 5, 14], "textstylebrush": [4, 5], "transfer": [4, 5], "aesthet": [4, 5], "mjsynth": [4, 5, 14], "synthet": 4, "data": [4, 5, 6, 8, 9, 11, 13], "artifici": [4, 5], "iiithw": [4, 5, 14], "wildreceipt": [4, 5, 14], "spatial": [4, 5, 6, 9], "dual": [4, 5], "modal": [4, 5], "graph": [4, 5, 6], "kei": [4, 5], "bool": [5, 6, 7, 8, 9], "true": [5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16], "use_polygon": [5, 9, 14], "fals": [5, 6, 7, 8, 9, 11, 16], "recognition_task": [5, 14], "kwarg": [5, 6, 7, 9], "sourc": [5, 6, 7, 8, 9, 13], "document": [5, 7, 9, 10, 14, 16], "import": [5, 6, 7, 8, 9, 11, 13, 14, 15, 16], "train_set": [5, 14], "download": [5, 14], "img": [5, 8, 14], "target": [5, 6, 8, 9, 14], "subset": [5, 16], "polygon": [5, 9, 16], "rotat": [5, 6, 7, 8, 9, 14, 16], "bound": [5, 6, 7, 8, 9, 16], "box": [5, 6, 7, 8, 9, 14, 16], "instead": [5, 6, 7], "straight": [5, 7, 14, 16], "ones": [5, 8, 9], "recognit": [5, 9, 11], "keyword": [5, 6, 7, 9], "argument": [5, 6, 7, 9, 16], "visiondataset": 5, "icdar2019": 5, "competit": 5, "iiit5k": [5, 14], "bmvc": 5, "2012": 5, "text": [5, 6, 7, 9, 14], "prior": 5, "svt": [5, 14], "ucsd": 5, "comput": [5, 9, 15, 16], "hous": 5, "number": [5, 8, 9, 16], "localis": 5, "repositori": [5, 7, 13], "websit": 5, "entri": 5, "futur": 5, "direct": 5, "img_fold": [5, 14], "str": [5, 6, 7, 8, 9], "label_fold": 5, "label": [5, 8, 9, 14], "part": [5, 8, 16], "challeng": 5, "task2": 5, "2015": 5, "path": [5, 6, 14], "challenge2_training_task12_imag": 5, "challenge2_training_task1_gt": 5, "test_set": 5, "challenge2_test_task12_imag": 5, "challenge2_test_task1_gt": 5, "folder": 5, "annot": 5, "abstractdataset": 5, "label_path": [5, 14], "handwrit": 5, "dataset_info": 5, "imgur5k_annot": 5, "json": [5, 14, 16], "pure": 5, "mnt": 5, "ramdisk": 5, "max": [5, 8, 9], "90kdict32px": 5, "imlist": 5, "txt": 5, "hw": 5, "images_90k_norm": 5, "90k": 5, "docartefact": [5, 14], "object": [5, 9, 10, 16], "detect": [5, 9, 10, 11], "element": [5, 6, 7, 9, 16], "varieti": 5, "arxiv": [5, 7], "ab": 5, "2103": 5, "14470v1": 5, "test": [5, 14], "charactergener": [5, 14], "implement": [5, 6, 7, 8, 9, 16], "d": [5, 14], "abdef": [5, 14], "num_sampl": [5, 14], "100": [5, 8, 9, 14, 16], "vocabulari": [5, 11, 13], "sampl": [5, 14, 16], "iter": [5, 8, 14, 16], "cache_sampl": 5, "firsthand": 5, "font_famili": [5, 9], "font": [5, 9], "img_transform": 5, "compos": [5, 16], "sample_transform": 5, "wordgener": [5, 14], "min_char": [5, 14], "int": [5, 6, 8, 9], "max_char": [5, 14], "list": [5, 6, 8, 9, 13], "none": [5, 6, 7, 8, 9, 16], "callabl": [5, 8], "tupl": [5, 6, 8, 9], "32": [5, 7, 8, 11, 14, 15, 16], "maximum": [5, 8], "detectiondataset": [5, 14], "recognitiondataset": [5, 14], "labels_path": [5, 14], "contain": [5, 14], "ocrdataset": [5, 14], "label_fil": [5, 14], "jpg": [5, 6, 13], "root": 5, "shuffl": [5, 8], "batch_siz": [5, 11, 14, 15], "drop_last": 5, "num_work": 5, "collate_fn": 5, "wrapper": [5, 8], "train_load": [5, 14], "train_it": [5, 14], "next": [5, 14], "befor": [5, 7, 8, 16], "pass": [5, 6, 7, 16], "batch": [5, 7, 8, 14, 16], "drop": 5, "isn": 5, "full": [5, 9, 16], "worker": 5, "function": [5, 8, 9], "merg": 5, "sinc": [5, 14], "content": [5, 6, 9, 16], "properli": 5, "model": [5, 9, 12, 14], "interpret": [5, 6], "multipl": [5, 6, 8, 16], "name": [5, 7, 15, 16], "10": [5, 9, 16], "0123456789": 5, "hindi_digit": 5, "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": 5, "ascii_lett": 5, "52": [5, 16], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 5, "punctuat": 5, "currenc": 5, "5": [5, 8, 9, 16], "ancient_greek": 5, "48": [5, 16], "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": 5, "arabic_lett": 5, "37": [5, 16], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": 5, "persian_lett": 5, "\u067e\u0686\u06a2\u06a4\u06af": 5, "arabic_diacrit": 5, "arabic_punctu": 5, "latin": 5, "94": [5, 16], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 5, "english": [5, 14], "legacy_french": 5, "123": 5, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": 5, "french": [5, 11, 13, 16], "126": 5, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": 5, "portugues": 5, "131": 5, "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": 5, "spanish": 5, "116": 5, "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": 5, "italian": 5, "120": 5, "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": 5, "german": [5, 11, 13], "108": 5, "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": 5, "arab": 5, "101": 5, "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": 5, "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": 5, "czech": 5, "130": 5, "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": 5, "polish": 5, "118": 5, "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": 5, "dutch": 5, "114": 5, "norwegian": 5, "106": 5, "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": 5, "danish": 5, "finnish": 5, "104": 5, "\u00e4\u00f6\u00e4\u00f6": 5, "swedish": 5, "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": 5, "vietnames": 5, "234": 5, "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": 5, "hebrew": 5, "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": 5, "multilingu": [5, 13], "195": 5, "encode_sequ": 5, "target_s": 5, "eo": 5, "pad": [5, 7, 8, 16], "dynamic_seq_length": 5, "ndarrai": [5, 6, 8, 9], "given": [5, 6, 8, 9, 16], "map": [5, 7], "n": [5, 9], "length": 5, "Of": 5, "string": [5, 6, 9, 16], "option": [5, 7, 11], "start": 5, "case": [5, 9], "upper": [5, 8], "enabl": [5, 6], "dynam": 5, "tensor": [5, 6, 8, 16], "modul": [6, 7, 8, 9, 16], "easili": [6, 9, 11, 13, 14, 16], "export": [6, 7, 9, 10, 16], "analysi": 6, "format": [6, 9, 11, 14, 15, 16], "organ": 6, "uninterrupt": [6, 16], "confid": [6, 9, 16], "float": [6, 8, 9, 15], "associ": 6, "predict": [6, 7, 9, 16], "xmin": 6, "ymin": 6, "xmax": 6, "ymax": 6, "coordin": [6, 16], "rel": [6, 8, 9, 16], "collect": 6, "meant": [6, 15], "two": [6, 12], "column": 6, "horizont": [6, 8], "resolv": 6, "default": [6, 9, 11, 12], "smallest": 6, "enclos": 6, "g": [6, 7], "qr": 6, "pictur": 6, "chart": 6, "signatur": 6, "logo": [6, 14], "etc": 6, "artefact_typ": 6, "type": [6, 9, 13, 15, 16], "sever": [6, 8, 16], "its": [6, 7, 8, 9, 14, 16], "titl": [6, 16], "underneath": 6, "page_idx": [6, 16], "dimens": [6, 9, 16], "dict": [6, 9, 16], "numpi": [6, 7, 9, 16], "arrai": [6, 8, 9], "uint8": [6, 7, 9, 16], "raw": [6, 9], "pixel": [6, 8, 16], "height": 6, "width": 6, "dictionari": [6, 9], "angl": [6, 8], "degress": 6, "preserve_aspect_ratio": [6, 7, 8, 11, 16], "overlai": 6, "displai": [6, 9], "matplotlib": [6, 9], "pyplot": [6, 9], "method": [6, 8, 16], "high": 6, "convers": 6, "read_pdf": 6, "byte": [6, 16], "scale": [6, 7, 8, 9], "rgb_mode": 6, "password": 6, "pdf": [6, 7, 10], "convert": [6, 8], "render": 6, "72dpi": 6, "output": [6, 8, 15], "rgb": [6, 8], "bgr": 6, "unlock": 6, "encrypt": 6, "pypdfium2": 6, "pdfpage": 6, "decod": 6, "shape": [6, 7, 8, 9, 16], "h": [6, 7, 8], "x": [6, 8, 9], "w": [6, 7, 8, 9], "c": [6, 9], "read_img_as_numpi": 6, "output_s": [6, 8], "rgb_output": 6, "expect": [6, 8, 9], "read_img_as_tensor": 6, "img_path": 6, "dtype": [6, 7, 8, 9, 15], "float32": [6, 7, 8, 15], "desir": 6, "relat": 6, "divid": 6, "255": [6, 7, 8, 9, 16], "decode_img_as_tensor": 6, "img_cont": 6, "stream": 6, "read_html": 6, "url": 6, "yoursit": 6, "weasyprint": 6, "documentfil": [6, 13], "extens": 6, "classmethod": 6, "from_pdf": 6, "binari": [6, 15, 16], "from_url": 6, "from_imag": [6, 13], "page1": 6, "png": 6, "page2": 6, "vgg16_bn_r": 7, "vgg": 7, "16": [7, 15, 16], "describ": [7, 9], "veri": 7, "deep": [7, 16], "convolut": 7, "larg": [7, 13], "modifi": [7, 12, 16], "normal": [7, 8], "rectangular": 7, "pool": 7, "simpler": 7, "head": [7, 16], "input_tensor": 7, "random": [7, 8, 9, 16], "uniform": [7, 8], "512": 7, "maxval": [7, 8], "imagenet": 7, "extractor": 7, "resnet18": [7, 13], "resnet": 7, "18": 7, "residu": 7, "boolean": [7, 16], "resnet34": 7, "34": [7, 16], "resnet50": [7, 13], "50": [7, 14, 16], "resnet31": 7, "downsiz": 7, "4": [7, 8, 9, 16], "mobilenet_v3_smal": 7, "mobilenetv3": 7, "search": 7, "kera": [7, 15], "mobilenet_v3_larg": 7, "mobilenet_v3_small_r": 7, "mobilenet_v3_large_r": 7, "mobilenet_v3_small_orient": 7, "magc_resnet31": 7, "global": 7, "context": 7, "224": [7, 8], "vit_": 7, "visiontransform": 7, "worth": 7, "16x16": 7, "patch": [7, 9], "unoffici": 7, "config": 7, "vit_b": 7, "b": [7, 9, 16], "textnet_tini": 7, "textnet": 7, "faster": [7, 15], "arbitrarili": 7, "detector": 7, "minimalist": 7, "kernel": [7, 8], "czczup": 7, "tini": 7, "textnet_smal": 7, "textnet_bas": 7, "crop_orientation_predictor": 7, "arch": [7, 13], "croporientationpredictor": 7, "np": [7, 8, 9, 16], "classif_mobilenet_v3_smal": 7, "input_crop": 7, "rand": [7, 8, 9, 15, 16], "600": [7, 9, 16], "800": [7, 9, 14, 16], "astyp": [7, 9, 16], "crop": [7, 8, 14, 16], "dataset": [7, 11, 16], "linknet_resnet18": [7, 11, 16], "1024": [7, 9, 11, 16], "linknet_resnet34": [7, 15, 16], "linknet_resnet50": [7, 16], "db_resnet50": [7, 11, 13, 16], "backbon": 7, "db_mobilenet_v3_larg": [7, 13, 16], "mobilenet": [7, 13], "v3": [7, 13, 16], "detection_predictor": [7, 16], "assume_straight_pag": [7, 16], "detectionpredictor": [7, 11], "input_pag": [7, 9, 16], "itself": [7, 13], "fit": [7, 16], "crnn_vgg16_bn": [7, 11, 13, 16], "128": [7, 11, 15, 16], "crnn_mobilenet_v3_smal": [7, 15, 16], "crnn_mobilenet_v3_larg": [7, 13, 16], "sar_resnet31": [7, 16], "31": 7, "64": [7, 8, 16], "256": 7, "paper": 7, "1910": 7, "02562": 7, "keywoard": 7, "vitstr_smal": [7, 11, 15, 16], "vitstr_bas": [7, 16], "recognition_predictor": [7, 16], "recognitionpredictor": [7, 11], "ocr_predictor": [7, 11, 13, 15, 16], "det_arch": [7, 11, 13, 15], "reco_arch": [7, 11, 13, 15], "pretrained_backbon": [7, 11], "symmetric_pad": [7, 8, 16], "export_as_straight_box": [7, 16], "detect_orient": 7, "straighten_pag": 7, "detect_languag": 7, "ocrpredictor": [7, 11], "up": [7, 16], "assum": 7, "preserv": [7, 8, 16], "ratio": [7, 8, 16], "symmetr": [7, 8, 16], "bottom": [7, 16], "final": 7, "potenti": 7, "estim": 7, "slightli": 7, "deterior": 7, "latenc": 7, "median": 7, "Then": 7, "again": 7, "improv": 7, "kie_predictor": [7, 11], "kiepredictor": 7, "kie": [7, 11], "login_to_hub": [7, 13], "login": 7, "huggingfac": 7, "hub": 7, "from_hub": [7, 13], "repo_id": [7, 13], "instanti": [7, 16], "hf": 7, "fasterrcnn_mobilenet_v3_large_fpn": 7, "repo": 7, "hf_hub_download": 7, "snapshot_download": 7, "checkpoint": 7, "push_to_hf_hub": [7, 13], "model_nam": [7, 13, 15], "save": [7, 14], "configur": 7, "my": 7, "procedur": 8, "draw": [8, 9], "design": 8, "torchvis": 8, "resiz": [8, 16], "bilinear": 8, "transfo": 8, "minval": 8, "interpol": 8, "zero": [8, 9], "while": [8, 16], "done": 8, "mean": [8, 9, 11], "std": [8, 11], "gaussian": 8, "distribut": 8, "485": 8, "456": 8, "406": 8, "229": [8, 14], "225": 8, "averag": [8, 16], "per": [8, 16], "standard": 8, "deviat": 8, "lambdatransform": 8, "fn": 8, "lambda": 8, "tograi": 8, "num_output_channel": 8, "grayscal": 8, "colorinvers": 8, "min_val": 8, "tranform": 8, "color": [8, 9], "shift": 8, "randomli": 8, "invert": 8, "6": [8, 16], "rang": 8, "randombright": 8, "max_delta": 8, "adjust": 8, "bright": 8, "delta": 8, "offset": 8, "add": [8, 9, 13, 16], "pick": 8, "p": [8, 9, 16], "probabl": 8, "randomcontrast": 8, "contrast": 8, "contrast_factor": 8, "factor": 8, "randomsatur": 8, "satur": 8, "hsv": 8, "increas": 8, "randomhu": 8, "hue": 8, "randomgamma": 8, "min_gamma": 8, "max_gamma": 8, "min_gain": 8, "max_gain": 8, "gamma": 8, "correct": 8, "neg": 8, "lower": [8, 9, 16], "param": [8, 16], "constant": 8, "multipli": 8, "randomjpegqu": 8, "min_qual": 8, "60": 8, "max_qual": 8, "jpeg": 8, "qualiti": 8, "dimension": 8, "between": [8, 9, 16], "randomrot": 8, "max_angl": 8, "expand": 8, "degre": 8, "uniformli": 8, "randomcrop": 8, "08": [8, 16], "75": [8, 16], "33": [8, 16], "min_area": 8, "max_area": 8, "min_ratio": 8, "max_ratio": 8, "gaussianblur": 8, "kernel_shap": 8, "blur": 8, "min": 8, "channelshuffl": 8, "gaussiannois": 8, "nois": 8, "randomhorizontalflip": 8, "flip": 8, "int64": [8, 9], "randomshadow": 8, "opacity_rang": 8, "shade": 8, "opac": 8, "It": [8, 13, 15], "consecut": [8, 16], "sequenti": [8, 16], "oneof": 8, "jpegqual": 8, "randomappli": 8, "regroup": 9, "core": [9, 16], "complementari": 9, "sens": 9, "visualize_pag": 9, "words_onli": 9, "display_artefact": 9, "add_label": 9, "figur": 9, "block": [9, 16], "plt": 9, "ocr_db_crnn": 9, "artefact": [9, 10, 16], "figsiz": 9, "largest": 9, "side": 9, "plot": 9, "static": 9, "top": [9, 16], "synthesize_pag": 9, "draw_proba": 9, "respons": 9, "blank": 9, "blue": 9, "red": 9, "font_siz": 9, "13": [9, 16], "famili": 9, "synthes": 9, "metric": [9, 16], "assess": 9, "textmatch": 9, "match": [9, 16], "accuraci": 9, "aggreg": [9, 14], "foral": 9, "y": 9, "mathcal": 9, "frac": 9, "sum": 9, "limits_": 9, "f_": 9, "y_i": 9, "x_i": 9, "indic": 9, "defin": [9, 15], "f_a": 9, "left": [9, 16], "begin": 9, "ll": 9, "mbox": 9, "strictli": 9, "integ": 9, "updat": 9, "hello": [9, 16], "world": [9, 16], "summari": 9, "gt": 9, "pred": 9, "groung": 9, "truth": 9, "exact": [9, 16], "score": 9, "counterpart": 9, "unidecod": 9, "localizationconfus": 9, "iou_thresh": 9, "mask_shap": 9, "use_broadcast": 9, "confus": 9, "iou": 9, "recal": [9, 16], "g_": 9, "precis": [9, 16], "meaniou": 9, "j": 9, "y_j": 9, "being": [9, 16], "intersect": 9, "union": 9, "g_x": 9, "assign": 9, "_i": 9, "geq": 9, "ground": 9, "asarrai": 9, "70": [9, 16], "110": 9, "95": [9, 16], "200": 9, "150": [9, 16], "pair": 9, "broadcast": 9, "consum": 9, "memori": [9, 12, 15], "either": [9, 16], "ocrmetr": 9, "l": 9, "hat": 9, "h_": 9, "b_j": 9, "l_j": 9, "gt_box": 9, "pred_box": 9, "gt_label": 9, "pred_label": 9, "comparison": [9, 16], "detectionmetr": 9, "c_j": 9, "compil": [10, 16], "better": [10, 16], "leverag": 10, "descript": 10, "colab": 10, "quicktour": 10, "present": 10, "main": 10, "produc": [10, 16], "searchabl": 10, "don": [11, 16], "meet": 11, "detail": [11, 16], "link": 11, "section": [11, 13, 15, 16], "det_model": [11, 13], "load_weight": 11, "path_to_checkpoint": 11, "weight": 11, "reco_model": [11, 13], "det_param": 11, "path_to_pt": 11, "map_loc": 11, "load_state_dict": 11, "reco_param": 11, "vocab": [11, 13, 14, 16], "class_nam": 11, "total": 11, "date": [11, 16], "preprocessor": [11, 16], "det_predictor": [11, 16], "798": 11, "785": 11, "772": 11, "264": 11, "2749": 11, "287": 11, "reco_predictor": 11, "694": 11, "695": 11, "693": 11, "299": 11, "296": 11, "301": 11, "polici": 12, "restrict": 12, "write": 12, "outsid": 12, "tmp": 12, "work": [12, 16], "step": 12, "usag": [12, 15], "multiprocess": 12, "doctr_multiprocessing_dis": 12, "variabl": 12, "becaus": 12, "shm": 12, "share": [12, 14], "chang": 12, "By": 12, "doctr_cache_dir": 12, "focu": 13, "love": 13, "appreci": 13, "interfac": 13, "io": 13, "custom": [13, 16], "felix92": 13, "db": 13, "vgg16": 13, "bn": 13, "plug": 13, "obj_detect": 13, "exist": 13, "overwritten": 13, "prerequisit": 13, "creat": 13, "co": 13, "instal": 13, "git": 13, "lf": 13, "my_awesome_model": 13, "v1": 13, "directli": [13, 16], "after": [13, 16], "python3": 13, "train_tensorflow": 13, "py": 13, "train_pytorch": 13, "tabl": 13, "pull": 13, "dummi": 13, "tilman": 13, "rassi": 13, "fascan": 13, "evalu": [14, 16], "predefin": 14, "prefer": 14, "signific": 14, "valid": 14, "149": 14, "626": 14, "360": 14, "2000": 14, "3000": 14, "249": 14, "33402": 14, "13068": 14, "772875": 14, "85875": 14, "246": 14, "233": 14, "resourc": 14, "7149": 14, "796": 14, "handwritten": 14, "1268": 14, "472": 14, "21888": 14, "8707": 14, "33608": 14, "19342": 14, "uppercas": 14, "19370": 14, "2186": 14, "257": 14, "647": 14, "73257": 14, "26032": 14, "7100000": 14, "707470": 14, "1156": 14, "1107": 14, "849": 14, "1095": 14, "207901": 14, "22672": 14, "7581382": 14, "1337891": 14, "7141797": 14, "793533": 14, "49377": 14, "19598": 14, "alwai": 14, "regular": 14, "2700": 14, "300": 14, "background": 14, "qr_code": 14, "bar_cod": 14, "photo": 14, "classif": 14, "mani": [14, 16], "sensit": 14, "abl": [14, 16], "howev": 14, "guidanc": 14, "tool": 14, "further": 14, "anot": 14, "handl": 14, "underli": 14, "defer": 14, "dataload": 14, "good": 15, "achiev": 15, "might": [15, 16], "tune": 15, "thing": [15, 16], "product": 15, "readi": 15, "help": 15, "support": [15, 16], "devic": 15, "fp16": 15, "point": 15, "occupi": 15, "bit": 15, "advantag": 15, "less": [15, 16], "mixed_precis": 15, "set_global_polici": 15, "mixed_float16": 15, "cuda": 15, "re": 15, "exchang": 15, "interoper": 15, "machin": 15, "structur": [15, 16], "layer": 15, "metadata": 15, "util": 15, "export_model_to_onnx": 15, "input_shap": 15, "dummy_input": 15, "tensorspec": 15, "model_path": 15, "come": 15, "soon": 15, "seen": 16, "onc": 16, "separ": 16, "compon": 16, "charg": 16, "usabl": 16, "backend": 16, "along": 16, "processor": 16, "reusabl": 16, "consist": 16, "delimit": 16, "2d": 16, "corner": 16, "flag": 16, "belong": 16, "skew": 16, "comprehens": 16, "benchmark": 16, "publicli": 16, "sec": 16, "25": 16, "84": 16, "39": 16, "85": 16, "86": 16, "93": 16, "83": 16, "24": 16, "80": 16, "29": 16, "90": 16, "67": 16, "76": 16, "11": 16, "81": 16, "71": 16, "7": 16, "21": 16, "82": 16, "20": 16, "49": 16, "87": 16, "63": 16, "17": 16, "28": 16, "51": 16, "46": 16, "db_resnet34": 16, "22": 16, "89": 16, "74": 16, "56": 16, "68": 16, "92": 16, "61": 16, "41": 16, "00": 16, "79": 16, "38": 16, "88": 16, "62": 16, "26": 16, "06": 16, "78": 16, "47": 16, "54": 16, "abov": 16, "cf": 16, "disclaim": 16, "combin": 16, "199": 16, "second": 16, "warmup": 16, "phase": 16, "measur": 16, "1000": 16, "obtain": 16, "11th": 16, "gen": 16, "intel": 16, "r": 16, "tm": 16, "i7": 16, "11800h": 16, "30ghz": 16, "wrap": 16, "useabl": 16, "favorit": 16, "dummy_img": 16, "area": 16, "send": 16, "snippet": 16, "transcrib": 16, "partial": 16, "15": 16, "9": 16, "73": 16, "44": 16, "14": 16, "55": 16, "58": 16, "57": 16, "66": 16, "01": 16, "98": 16, "23": 16, "69": 16, "99": 16, "91": 16, "05": 16, "09": 16, "96": 16, "40": 16, "53": 16, "most": 16, "print": 16, "cfg": 16, "30595": 16, "45": 16, "72": 16, "43": 16, "65": 16, "77": 16, "30": 16, "07": 16, "27": 16, "gvision": 16, "59": 16, "03": 16, "azur": 16, "recogn": 16, "42": 16, "go": 16, "mention": 16, "still": 16, "return": 16, "nest": 16, "get": 16, "typic": 16, "layout": 16, "340": 16, "json_output": 16, "1357421875": 16, "0361328125": 16, "8564453125": 16, "8603515625": 16, "914085328578949": 16, "5478515625": 16, "06640625": 16, "5810546875": 16, "0966796875": 16, "9949972033500671": 16, "51171875": 16, "1630859375": 16, "9578408598899841": 16, "1396484375": 16, "3232421875": 16, "185546875": 16, "3515625": 16, "outpout": 16, "xml": 16, "hocr": 16, "export_as_xml": 16, "xml_output": 16, "xml_bytes_str": 16, "xml_element": 16, "utf": 16, "xmln": 16, "w3": 16, "1999": 16, "xhtml": 16, "lang": 16, "en": 16, "meta": 16, "equiv": 16, "charset": 16, "system": 16, "ocr_pag": 16, "ocr_carea": 16, "ocr_par": 16, "ocr_lin": 16, "ocrx_word": 16, "div": 16, "id": 16, "page_1": 16, "bbox": 16, "3456": 16, "ppageno": 16, "block_1_1": 16, "857": 16, "529": 16, "2504": 16, "2710": 16, "par_1_1": 16, "span": 16, "line_1_1": 16, "x_size": 16, "x_descend": 16, "x_ascend": 16, "word_1_1": 16, "1552": 16, "540": 16, "1778": 16, "580": 16, "x_wconf": 16, "word_1_2": 16, "1782": 16, "1900": 16, "583": 16, "word_1_3": 16, "1420": 16, "597": 16, "1684": 16, "641": 16, "threshold": 16, "region": 16, "accur": 16, "postprocessor": 16, "bin_thresh": 16, "box_thresh": 16, "hook": 16, "manipul": 16, "customhook": 16, "def": 16, "__call__": 16, "self": 16, "loc_pr": 16, "Be": 16, "awar": 16, "my_hook": 16, "middl": 16, "pipelin": 16, "add_hook": 16, "execut": 16}, "objects": {"doctr.datasets": [[5, 0, 1, "", "CORD"], [5, 0, 1, "", "CharacterGenerator"], [5, 0, 1, "", "DetectionDataset"], [5, 0, 1, "", "DocArtefacts"], [5, 0, 1, "", "FUNSD"], [5, 0, 1, "", "IC03"], [5, 0, 1, "", "IC13"], [5, 0, 1, "", "IIIT5K"], [5, 0, 1, "", "IIITHWS"], [5, 0, 1, "", "IMGUR5K"], [5, 0, 1, "", "MJSynth"], [5, 0, 1, "", "OCRDataset"], [5, 0, 1, "", "RecognitionDataset"], [5, 0, 1, "", "SROIE"], [5, 0, 1, "", "SVHN"], [5, 0, 1, "", "SVT"], [5, 0, 1, "", "SynthText"], [5, 0, 1, "", "WILDRECEIPT"], [5, 0, 1, "", "WordGenerator"], [5, 1, 1, "", "encode_sequences"]], "doctr.datasets.loader": [[5, 0, 1, "", "DataLoader"]], "doctr.io": [[6, 0, 1, "", "Artefact"], [6, 0, 1, "", "Block"], [6, 0, 1, "", "Document"], [6, 0, 1, "", "DocumentFile"], [6, 0, 1, "", "Line"], [6, 0, 1, "", "Page"], [6, 0, 1, "", "Word"], [6, 1, 1, "", "decode_img_as_tensor"], [6, 1, 1, "", "read_html"], [6, 1, 1, "", "read_img_as_numpy"], [6, 1, 1, "", "read_img_as_tensor"], [6, 1, 1, "", "read_pdf"]], "doctr.io.Document": [[6, 2, 1, "", "show"]], "doctr.io.DocumentFile": [[6, 2, 1, "", "from_images"], [6, 2, 1, "", "from_pdf"], [6, 2, 1, "", "from_url"]], "doctr.io.Page": [[6, 2, 1, "", "show"]], "doctr.models.classification": [[7, 1, 1, "", "crop_orientation_predictor"], [7, 1, 1, "", "magc_resnet31"], [7, 1, 1, "", "mobilenet_v3_large"], [7, 1, 1, "", "mobilenet_v3_large_r"], [7, 1, 1, "", "mobilenet_v3_small"], [7, 1, 1, "", "mobilenet_v3_small_orientation"], [7, 1, 1, "", "mobilenet_v3_small_r"], [7, 1, 1, "", "resnet18"], [7, 1, 1, "", "resnet31"], [7, 1, 1, "", "resnet34"], [7, 1, 1, "", "resnet50"], [7, 1, 1, "", "textnet_base"], [7, 1, 1, "", "textnet_small"], [7, 1, 1, "", "textnet_tiny"], [7, 1, 1, "", "vgg16_bn_r"], [7, 1, 1, "", "vit_b"], [7, 1, 1, "", "vit_s"]], "doctr.models.detection": [[7, 1, 1, "", "db_mobilenet_v3_large"], [7, 1, 1, "", "db_resnet50"], [7, 1, 1, "", "detection_predictor"], [7, 1, 1, "", "linknet_resnet18"], [7, 1, 1, "", "linknet_resnet34"], [7, 1, 1, "", "linknet_resnet50"]], "doctr.models.factory": [[7, 1, 1, "", "from_hub"], [7, 1, 1, "", "login_to_hub"], [7, 1, 1, "", "push_to_hf_hub"]], "doctr.models": [[7, 1, 1, "", "kie_predictor"], [7, 1, 1, "", "ocr_predictor"]], "doctr.models.recognition": [[7, 1, 1, "", "crnn_mobilenet_v3_large"], [7, 1, 1, "", "crnn_mobilenet_v3_small"], [7, 1, 1, "", "crnn_vgg16_bn"], [7, 1, 1, "", "master"], [7, 1, 1, "", "parseq"], [7, 1, 1, "", "recognition_predictor"], [7, 1, 1, "", "sar_resnet31"], [7, 1, 1, "", "vitstr_base"], [7, 1, 1, "", "vitstr_small"]], "doctr.transforms": [[8, 0, 1, "", "ChannelShuffle"], [8, 0, 1, "", "ColorInversion"], [8, 0, 1, "", "Compose"], [8, 0, 1, "", "GaussianBlur"], [8, 0, 1, "", "GaussianNoise"], [8, 0, 1, "", "LambdaTransformation"], [8, 0, 1, "", "Normalize"], [8, 0, 1, "", "OneOf"], [8, 0, 1, "", "RandomApply"], [8, 0, 1, "", "RandomBrightness"], [8, 0, 1, "", "RandomContrast"], [8, 0, 1, "", "RandomCrop"], [8, 0, 1, "", "RandomGamma"], [8, 0, 1, "", "RandomHorizontalFlip"], [8, 0, 1, "", "RandomHue"], [8, 0, 1, "", "RandomJpegQuality"], [8, 0, 1, "", "RandomRotate"], [8, 0, 1, "", "RandomSaturation"], [8, 0, 1, "", "RandomShadow"], [8, 0, 1, "", "Resize"], [8, 0, 1, "", "ToGray"]], "doctr.utils.metrics": [[9, 0, 1, "", "DetectionMetric"], [9, 0, 1, "", "LocalizationConfusion"], [9, 0, 1, "", "OCRMetric"], [9, 0, 1, "", "TextMatch"]], "doctr.utils.metrics.DetectionMetric": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.LocalizationConfusion": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.OCRMetric": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.TextMatch": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.visualization": [[9, 1, 1, "", "synthesize_page"], [9, 1, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:class", "1": "py:function", "2": "py:method"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"], "2": ["py", "method", "Python method"]}, "titleterms": {"changelog": 0, "v0": 0, "7": 0, "0": 0, "2024": 0, "09": 0, "6": 0, "2022": 0, "29": 0, "5": 0, "1": [0, 1], "03": 0, "22": 0, "2021": 0, "12": 0, "31": 0, "4": [0, 1], "11": 0, "10": 0, "01": 0, "3": [0, 1], "08": 0, "27": 0, "07": 0, "02": 0, "2": [0, 1], "05": 0, "28": 0, "18": 0, "contributor": 1, "coven": 1, "code": [1, 2], "conduct": 1, "our": 1, "pledg": 1, "standard": 1, "enforc": 1, "respons": 1, "scope": 1, "guidelin": 1, "correct": 1, "warn": 1, "temporari": 1, "ban": 1, "perman": 1, "attribut": 1, "contribut": 2, "doctr": [2, 4, 5, 6, 7, 8, 9, 10, 15], "codebas": 2, "structur": [2, 6], "continu": 2, "integr": 2, "feedback": 2, "featur": [2, 4], "request": 2, "bug": 2, "report": 2, "question": 2, "develop": 2, "mode": 2, "instal": [2, 3], "commit": 2, "unit": 2, "test": 2, "qualiti": 2, "style": 2, "verif": 2, "modifi": 2, "document": [2, 4, 6], "let": 2, "": 2, "connect": 2, "prerequisit": 3, "via": 3, "python": 3, "packag": 3, "git": 3, "text": [4, 16], "recognit": [4, 7, 13, 14, 16], "main": 4, "model": [4, 7, 11, 13, 15, 16], "zoo": [4, 7], "detect": [4, 7, 13, 14, 16], "support": [4, 5, 8], "dataset": [4, 5, 14], "arg": [5, 6, 7, 8, 9], "synthet": [5, 14], "gener": [5, 14], "custom": [5, 11], "loader": 5, "dataload": 5, "vocab": 5, "return": [5, 6, 7, 9], "io": 6, "word": 6, "line": 6, "artefact": 6, "block": 6, "page": 6, "file": 6, "read": 6, "classif": [7, 13], "factori": 7, "transform": 8, "compos": 8, "util": 9, "visual": 9, "task": 9, "evalu": 9, "notebook": 10, "train": 11, "your": [11, 13, 14, 15], "own": [11, 14], "load": [11, 13, 14], "aw": 12, "lambda": 12, "share": 13, "commun": 13, "from": 13, "huggingfac": 13, "hub": 13, "push": 13, "pretrain": 13, "name": 13, "convent": 13, "choos": [14, 16], "readi": 14, "us": [14, 15], "avail": [14, 16], "object": 14, "data": 14, "prepar": 15, "infer": 15, "optim": 15, "half": 15, "precis": 15, "export": 15, "onnx": 15, "right": 16, "architectur": 16, "predictor": 16, "end": 16, "ocr": 16, "two": 16, "stage": 16, "approach": 16, "what": 16, "should": 16, "i": 16, "do": 16, "output": 16, "advanc": 16, "option": 16}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"Changelog": [[0, "changelog"]], "v0.7.0 (2024-09-09)": [[0, "v0-7-0-2024-09-09"]], "v0.6.0 (2022-09-29)": [[0, "v0-6-0-2022-09-29"]], "v0.5.1 (2022-03-22)": [[0, "v0-5-1-2022-03-22"]], "v0.5.0 (2021-12-31)": [[0, "v0-5-0-2021-12-31"]], "v0.4.1 (2021-11-22)": [[0, "v0-4-1-2021-11-22"]], "v0.4.0 (2021-10-01)": [[0, "v0-4-0-2021-10-01"]], "v0.3.1 (2021-08-27)": [[0, "v0-3-1-2021-08-27"]], "v0.3.0 (2021-07-02)": [[0, "v0-3-0-2021-07-02"]], "v0.2.1 (2021-05-28)": [[0, "v0-2-1-2021-05-28"]], "v0.2.0 (2021-05-11)": [[0, "v0-2-0-2021-05-11"]], "v0.1.1 (2021-03-18)": [[0, "v0-1-1-2021-03-18"]], "v0.1.0 (2021-03-05)": [[0, "v0-1-0-2021-03-05"]], "Contributor Covenant Code of Conduct": [[1, "contributor-covenant-code-of-conduct"]], "Our Pledge": [[1, "our-pledge"]], "Our Standards": [[1, "our-standards"]], "Enforcement Responsibilities": [[1, "enforcement-responsibilities"]], "Scope": [[1, "scope"]], "Enforcement": [[1, "enforcement"]], "Enforcement Guidelines": [[1, "enforcement-guidelines"]], "1. Correction": [[1, "correction"]], "2. Warning": [[1, "warning"]], "3. Temporary Ban": [[1, "temporary-ban"]], "4. Permanent Ban": [[1, "permanent-ban"]], "Attribution": [[1, "attribution"]], "Contributing to docTR": [[2, "contributing-to-doctr"]], "Codebase structure": [[2, "codebase-structure"]], "Continuous Integration": [[2, "continuous-integration"]], "Feedback": [[2, "feedback"]], "Feature requests & bug report": [[2, "feature-requests-bug-report"]], "Questions": [[2, "questions"]], "Developing docTR": [[2, "developing-doctr"]], "Developer mode installation": [[2, "developer-mode-installation"]], "Commits": [[2, "commits"]], "Unit tests": [[2, "unit-tests"]], "Code quality": [[2, "code-quality"]], "Code style verification": [[2, "code-style-verification"]], "Modifying the documentation": [[2, "modifying-the-documentation"]], "Let\u2019s connect": [[2, "let-s-connect"]], "Installation": [[3, "installation"]], "Prerequisites": [[3, "prerequisites"]], "Via Python Package": [[3, "via-python-package"]], "Via Git": [[3, "via-git"]], "docTR: Document Text Recognition": [[4, "doctr-document-text-recognition"]], "Main Features": [[4, "main-features"]], "Model zoo": [[4, "model-zoo"]], "Text detection models": [[4, "text-detection-models"]], "Text recognition models": [[4, "text-recognition-models"]], "Supported datasets": [[4, "supported-datasets"]], "doctr.datasets": [[5, "doctr-datasets"], [5, "datasets"]], "Args:": [[5, "args"], [5, "id4"], [5, "id7"], [5, "id10"], [5, "id13"], [5, "id16"], [5, "id19"], [5, "id22"], [5, "id25"], [5, "id29"], [5, "id32"], [5, "id37"], [5, "id40"], [5, "id46"], [5, "id49"], [5, "id50"], [5, "id51"], [5, "id54"], [5, "id57"], [5, "id60"], [5, "id61"], [6, "args"], [6, "id2"], [6, "id3"], [6, "id4"], [6, "id5"], [6, "id6"], [6, "id7"], [6, "id10"], [6, "id12"], [6, "id14"], [6, "id16"], [6, "id20"], [6, "id24"], [6, "id28"], [7, "args"], [7, "id3"], [7, "id8"], [7, "id13"], [7, "id17"], [7, "id21"], [7, "id26"], [7, "id31"], [7, "id36"], [7, "id41"], [7, "id45"], [7, "id49"], [7, "id54"], [7, "id58"], [7, "id63"], [7, "id68"], [7, "id72"], [7, "id76"], [7, "id81"], [7, "id86"], [7, "id90"], [7, "id95"], [7, "id99"], [7, "id103"], [7, "id108"], [7, "id113"], [7, "id118"], [7, "id122"], [7, "id126"], [7, "id131"], [7, "id135"], [7, "id139"], [7, "id143"], [7, "id145"], [7, "id147"], [7, "id149"], [8, "args"], [8, "id1"], [8, "id2"], [8, "id3"], [8, "id4"], [8, "id5"], [8, "id6"], [8, "id7"], [8, "id8"], [8, "id9"], [8, "id10"], [8, "id11"], [8, "id12"], [8, "id13"], [8, "id14"], [8, "id15"], [8, "id16"], [8, "id17"], [8, "id18"], [9, "args"], [9, "id3"], [9, "id5"], [9, "id6"], [9, "id7"], [9, "id8"], [9, "id9"], [9, "id10"], [9, "id11"]], "Synthetic dataset generator": [[5, "synthetic-dataset-generator"], [14, "synthetic-dataset-generator"]], "Custom dataset loader": [[5, "custom-dataset-loader"]], "Dataloader": [[5, "dataloader"]], "Supported Vocabs": [[5, "supported-vocabs"]], "docTR Vocabs": [[5, "id62"]], "Returns:": [[5, "returns"], [6, "returns"], [6, "id11"], [6, "id13"], [6, "id15"], [6, "id19"], [6, "id23"], [6, "id27"], [6, "id31"], [7, "returns"], [7, "id6"], [7, "id11"], [7, "id16"], [7, "id20"], [7, "id24"], [7, "id29"], [7, "id34"], [7, "id39"], [7, "id44"], [7, "id48"], [7, "id52"], [7, "id57"], [7, "id61"], [7, "id66"], [7, "id71"], [7, "id75"], [7, "id79"], [7, "id84"], [7, "id89"], [7, "id93"], [7, "id98"], [7, "id102"], [7, "id106"], [7, "id111"], [7, "id116"], [7, "id121"], [7, "id125"], [7, "id129"], [7, "id134"], [7, "id138"], [7, "id142"], [7, "id144"], [7, "id146"], [7, "id148"], [9, "returns"], [9, "id4"]], "doctr.io": [[6, "doctr-io"]], "Document structure": [[6, "document-structure"]], "Word": [[6, "word"]], "Line": [[6, "line"]], "Artefact": [[6, "artefact"]], "Block": [[6, "block"]], "Page": [[6, "page"]], "Document": [[6, "document"]], "File reading": [[6, "file-reading"]], "doctr.models": [[7, "doctr-models"]], "doctr.models.classification": [[7, "doctr-models-classification"]], "doctr.models.detection": [[7, "doctr-models-detection"]], "doctr.models.recognition": [[7, "doctr-models-recognition"]], "doctr.models.zoo": [[7, "doctr-models-zoo"]], "doctr.models.factory": [[7, "doctr-models-factory"]], "doctr.transforms": [[8, "doctr-transforms"]], "Supported transformations": [[8, "supported-transformations"]], "Composing transformations": [[8, "composing-transformations"]], "doctr.utils": [[9, "doctr-utils"]], "Visualization": [[9, "visualization"]], "Task evaluation": [[9, "task-evaluation"]], "docTR Notebooks": [[10, "doctr-notebooks"]], "Train your own model": [[11, "train-your-own-model"]], "Loading your custom trained model": [[11, "loading-your-custom-trained-model"]], "AWS Lambda": [[12, "aws-lambda"]], "Share your model with the community": [[13, "share-your-model-with-the-community"]], "Loading from Huggingface Hub": [[13, "loading-from-huggingface-hub"]], "Pushing to the Huggingface Hub": [[13, "pushing-to-the-huggingface-hub"]], "Pretrained community models": [[13, "pretrained-community-models"]], "Naming conventions": [[13, "naming-conventions"]], "Classification": [[13, "classification"]], "Detection": [[13, "detection"], [14, "detection"]], "Recognition": [[13, "recognition"], [14, "recognition"]], "Choose a ready to use dataset": [[14, "choose-a-ready-to-use-dataset"]], "Available Datasets": [[14, "available-datasets"]], "Object Detection": [[14, "object-detection"]], "Use your own datasets": [[14, "use-your-own-datasets"]], "Data Loading": [[14, "data-loading"]], "Preparing your model for inference": [[15, "preparing-your-model-for-inference"]], "Model optimization": [[15, "model-optimization"]], "Half-precision": [[15, "half-precision"]], "Export to ONNX": [[15, "export-to-onnx"]], "Using your ONNX exported model in docTR": [[15, "using-your-onnx-exported-model-in-doctr"]], "Choosing the right model": [[16, "choosing-the-right-model"]], "Text Detection": [[16, "text-detection"]], "Available architectures": [[16, "available-architectures"], [16, "id1"], [16, "id2"]], "Detection predictors": [[16, "detection-predictors"]], "Text Recognition": [[16, "text-recognition"]], "Recognition predictors": [[16, "recognition-predictors"]], "End-to-End OCR": [[16, "end-to-end-ocr"]], "Two-stage approaches": [[16, "two-stage-approaches"]], "What should I do with the output?": [[16, "what-should-i-do-with-the-output"]], "Advanced options": [[16, "advanced-options"]]}, "indexentries": {"cord (class in doctr.datasets)": [[5, "doctr.datasets.CORD"]], "charactergenerator (class in doctr.datasets)": [[5, "doctr.datasets.CharacterGenerator"]], "dataloader (class in doctr.datasets.loader)": [[5, "doctr.datasets.loader.DataLoader"]], "detectiondataset (class in doctr.datasets)": [[5, "doctr.datasets.DetectionDataset"]], "docartefacts (class in doctr.datasets)": [[5, "doctr.datasets.DocArtefacts"]], "funsd (class in doctr.datasets)": [[5, "doctr.datasets.FUNSD"]], "ic03 (class in doctr.datasets)": [[5, "doctr.datasets.IC03"]], "ic13 (class in doctr.datasets)": [[5, "doctr.datasets.IC13"]], "iiit5k (class in doctr.datasets)": [[5, "doctr.datasets.IIIT5K"]], "iiithws (class in doctr.datasets)": [[5, "doctr.datasets.IIITHWS"]], "imgur5k (class in doctr.datasets)": [[5, "doctr.datasets.IMGUR5K"]], "mjsynth (class in doctr.datasets)": [[5, "doctr.datasets.MJSynth"]], "ocrdataset (class in doctr.datasets)": [[5, "doctr.datasets.OCRDataset"]], "recognitiondataset (class in doctr.datasets)": [[5, "doctr.datasets.RecognitionDataset"]], "sroie (class in doctr.datasets)": [[5, "doctr.datasets.SROIE"]], "svhn (class in doctr.datasets)": [[5, "doctr.datasets.SVHN"]], "svt (class in doctr.datasets)": [[5, "doctr.datasets.SVT"]], "synthtext (class in doctr.datasets)": [[5, "doctr.datasets.SynthText"]], "wildreceipt (class in doctr.datasets)": [[5, "doctr.datasets.WILDRECEIPT"]], "wordgenerator (class in doctr.datasets)": [[5, "doctr.datasets.WordGenerator"]], "encode_sequences() (in module doctr.datasets)": [[5, "doctr.datasets.encode_sequences"]], "artefact (class in doctr.io)": [[6, "doctr.io.Artefact"]], "block (class in doctr.io)": [[6, "doctr.io.Block"]], "document (class in doctr.io)": [[6, "doctr.io.Document"]], "documentfile (class in doctr.io)": [[6, "doctr.io.DocumentFile"]], "line (class in doctr.io)": [[6, "doctr.io.Line"]], "page (class in doctr.io)": [[6, "doctr.io.Page"]], "word (class in doctr.io)": [[6, "doctr.io.Word"]], "decode_img_as_tensor() (in module doctr.io)": [[6, "doctr.io.decode_img_as_tensor"]], "from_images() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_images"]], "from_pdf() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_pdf"]], "from_url() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_url"]], "read_html() (in module doctr.io)": [[6, "doctr.io.read_html"]], "read_img_as_numpy() (in module doctr.io)": [[6, "doctr.io.read_img_as_numpy"]], "read_img_as_tensor() (in module doctr.io)": [[6, "doctr.io.read_img_as_tensor"]], "read_pdf() (in module doctr.io)": [[6, "doctr.io.read_pdf"]], "show() (doctr.io.document method)": [[6, "doctr.io.Document.show"]], "show() (doctr.io.page method)": [[6, "doctr.io.Page.show"]], "crnn_mobilenet_v3_large() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_mobilenet_v3_large"]], "crnn_mobilenet_v3_small() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_mobilenet_v3_small"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_vgg16_bn"]], "crop_orientation_predictor() (in module doctr.models.classification)": [[7, "doctr.models.classification.crop_orientation_predictor"]], "db_mobilenet_v3_large() (in module doctr.models.detection)": [[7, "doctr.models.detection.db_mobilenet_v3_large"]], "db_resnet50() (in module doctr.models.detection)": [[7, "doctr.models.detection.db_resnet50"]], "detection_predictor() (in module doctr.models.detection)": [[7, "doctr.models.detection.detection_predictor"]], "from_hub() (in module doctr.models.factory)": [[7, "doctr.models.factory.from_hub"]], "kie_predictor() (in module doctr.models)": [[7, "doctr.models.kie_predictor"]], "linknet_resnet18() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet18"]], "linknet_resnet34() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet34"]], "linknet_resnet50() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet50"]], "login_to_hub() (in module doctr.models.factory)": [[7, "doctr.models.factory.login_to_hub"]], "magc_resnet31() (in module doctr.models.classification)": [[7, "doctr.models.classification.magc_resnet31"]], "master() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.master"]], "mobilenet_v3_large() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_large"]], "mobilenet_v3_large_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_large_r"]], "mobilenet_v3_small() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small"]], "mobilenet_v3_small_orientation() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small_orientation"]], "mobilenet_v3_small_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small_r"]], "ocr_predictor() (in module doctr.models)": [[7, "doctr.models.ocr_predictor"]], "parseq() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.parseq"]], "push_to_hf_hub() (in module doctr.models.factory)": [[7, "doctr.models.factory.push_to_hf_hub"]], "recognition_predictor() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.recognition_predictor"]], "resnet18() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet18"]], "resnet31() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet31"]], "resnet34() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet34"]], "resnet50() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet50"]], "sar_resnet31() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.sar_resnet31"]], "textnet_base() (in module doctr.models.classification)": [[7, "doctr.models.classification.textnet_base"]], "textnet_small() (in module doctr.models.classification)": [[7, "doctr.models.classification.textnet_small"]], "textnet_tiny() (in module doctr.models.classification)": [[7, "doctr.models.classification.textnet_tiny"]], "vgg16_bn_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.vgg16_bn_r"]], "vit_b() (in module doctr.models.classification)": [[7, "doctr.models.classification.vit_b"]], "vit_s() (in module doctr.models.classification)": [[7, "doctr.models.classification.vit_s"]], "vitstr_base() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.vitstr_base"]], "vitstr_small() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.vitstr_small"]], "channelshuffle (class in doctr.transforms)": [[8, "doctr.transforms.ChannelShuffle"]], "colorinversion (class in doctr.transforms)": [[8, "doctr.transforms.ColorInversion"]], "compose (class in doctr.transforms)": [[8, "doctr.transforms.Compose"]], "gaussianblur (class in doctr.transforms)": [[8, "doctr.transforms.GaussianBlur"]], "gaussiannoise (class in doctr.transforms)": [[8, "doctr.transforms.GaussianNoise"]], "lambdatransformation (class in doctr.transforms)": [[8, "doctr.transforms.LambdaTransformation"]], "normalize (class in doctr.transforms)": [[8, "doctr.transforms.Normalize"]], "oneof (class in doctr.transforms)": [[8, "doctr.transforms.OneOf"]], "randomapply (class in doctr.transforms)": [[8, "doctr.transforms.RandomApply"]], "randombrightness (class in doctr.transforms)": [[8, "doctr.transforms.RandomBrightness"]], "randomcontrast (class in doctr.transforms)": [[8, "doctr.transforms.RandomContrast"]], "randomcrop (class in doctr.transforms)": [[8, "doctr.transforms.RandomCrop"]], "randomgamma (class in doctr.transforms)": [[8, "doctr.transforms.RandomGamma"]], "randomhorizontalflip (class in doctr.transforms)": [[8, "doctr.transforms.RandomHorizontalFlip"]], "randomhue (class in doctr.transforms)": [[8, "doctr.transforms.RandomHue"]], "randomjpegquality (class in doctr.transforms)": [[8, "doctr.transforms.RandomJpegQuality"]], "randomrotate (class in doctr.transforms)": [[8, "doctr.transforms.RandomRotate"]], "randomsaturation (class in doctr.transforms)": [[8, "doctr.transforms.RandomSaturation"]], "randomshadow (class in doctr.transforms)": [[8, "doctr.transforms.RandomShadow"]], "resize (class in doctr.transforms)": [[8, "doctr.transforms.Resize"]], "togray (class in doctr.transforms)": [[8, "doctr.transforms.ToGray"]], "detectionmetric (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.DetectionMetric"]], "localizationconfusion (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.OCRMetric"]], "textmatch (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.TextMatch"]], "summary() (doctr.utils.metrics.detectionmetric method)": [[9, "doctr.utils.metrics.DetectionMetric.summary"]], "summary() (doctr.utils.metrics.localizationconfusion method)": [[9, "doctr.utils.metrics.LocalizationConfusion.summary"]], "summary() (doctr.utils.metrics.ocrmetric method)": [[9, "doctr.utils.metrics.OCRMetric.summary"]], "summary() (doctr.utils.metrics.textmatch method)": [[9, "doctr.utils.metrics.TextMatch.summary"]], "synthesize_page() (in module doctr.utils.visualization)": [[9, "doctr.utils.visualization.synthesize_page"]], "update() (doctr.utils.metrics.detectionmetric method)": [[9, "doctr.utils.metrics.DetectionMetric.update"]], "update() (doctr.utils.metrics.localizationconfusion method)": [[9, "doctr.utils.metrics.LocalizationConfusion.update"]], "update() (doctr.utils.metrics.ocrmetric method)": [[9, "doctr.utils.metrics.OCRMetric.update"]], "update() (doctr.utils.metrics.textmatch method)": [[9, "doctr.utils.metrics.TextMatch.update"]], "visualize_page() (in module doctr.utils.visualization)": [[9, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["changelog", "contributing/code_of_conduct", "contributing/contributing", "getting_started/installing", "index", "modules/datasets", "modules/io", "modules/models", "modules/transforms", "modules/utils", "notebooks", "using_doctr/custom_models_training", "using_doctr/running_on_aws", "using_doctr/sharing_models", "using_doctr/using_datasets", "using_doctr/using_model_export", "using_doctr/using_models"], "filenames": ["changelog.rst", "contributing/code_of_conduct.md", "contributing/contributing.md", "getting_started/installing.rst", "index.rst", "modules/datasets.rst", "modules/io.rst", "modules/models.rst", "modules/transforms.rst", "modules/utils.rst", "notebooks.rst", "using_doctr/custom_models_training.rst", "using_doctr/running_on_aws.rst", "using_doctr/sharing_models.rst", "using_doctr/using_datasets.rst", "using_doctr/using_model_export.rst", "using_doctr/using_models.rst"], "titles": ["Changelog", "Contributor Covenant Code of Conduct", "Contributing to docTR", "Installation", "docTR: Document Text Recognition", "doctr.datasets", "doctr.io", "doctr.models", "doctr.transforms", "doctr.utils", "docTR Notebooks", "Train your own model", "AWS Lambda", "Share your model with the community", "Choose a ready to use dataset", "Preparing your model for inference", "Choosing the right model"], "terms": {"releas": [0, 3], "note": [0, 2, 5, 7, 13, 15], "we": [1, 2, 3, 4, 6, 8, 13, 14, 15, 16], "member": 1, "leader": 1, "make": [1, 2, 9, 12, 13, 15, 16], "particip": 1, "commun": 1, "harass": 1, "free": [1, 2, 13], "experi": 1, "everyon": 1, "regardless": 1, "ag": 1, "bodi": [1, 16], "size": [1, 5, 6, 8, 9, 16], "visibl": 1, "invis": 1, "disabl": [1, 12, 16], "ethnic": 1, "sex": 1, "characterist": 1, "gender": 1, "ident": 1, "express": [1, 8], "level": [1, 5, 9, 16], "educ": 1, "socio": 1, "econom": 1, "statu": 1, "nation": 1, "person": [1, 14], "appear": 1, "race": 1, "religion": 1, "sexual": 1, "orient": [1, 6, 7, 16], "act": 1, "interact": [1, 6, 9], "wai": [1, 4, 14], "contribut": 1, "an": [1, 2, 4, 5, 6, 7, 9, 15, 16], "open": [1, 2, 13, 15], "welcom": 1, "divers": 1, "inclus": 1, "healthi": 1, "exampl": [1, 2, 4, 5, 7, 13, 16], "behavior": [1, 16], "posit": [1, 9], "environ": [1, 12], "includ": [1, 3, 5, 14, 15], "demonstr": 1, "empathi": 1, "kind": 1, "toward": [1, 3], "other": [1, 2], "peopl": 1, "Being": 1, "respect": 1, "differ": 1, "opinion": 1, "viewpoint": 1, "give": 1, "gracefulli": 1, "accept": 1, "construct": 1, "feedback": 1, "apolog": 1, "those": [1, 3, 6, 16], "affect": 1, "mistak": 1, "learn": [1, 4, 7, 15, 16], "from": [1, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 16], "focus": [1, 5], "what": 1, "i": [1, 2, 5, 6, 7, 8, 9, 12, 13, 14, 15], "best": 1, "just": 1, "u": [1, 2], "individu": 1, "overal": [1, 7], "unaccept": 1, "The": [1, 2, 5, 6, 9, 12, 16], "us": [1, 2, 3, 5, 7, 9, 11, 12, 13, 16], "languag": [1, 4, 5, 6, 7, 13, 16], "imageri": 1, "attent": [1, 7], "advanc": 1, "ani": [1, 5, 6, 7, 8, 9, 16], "troll": 1, "insult": 1, "derogatori": 1, "comment": 1, "polit": 1, "attack": 1, "public": [1, 4], "privat": 1, "publish": 1, "inform": [1, 2, 4, 5, 14], "physic": [1, 6], "email": 1, "address": [1, 6], "without": [1, 5, 7], "explicit": 1, "permiss": 1, "which": [1, 7, 12, 14, 16], "could": 1, "reason": [1, 4, 5], "consid": [1, 2, 5, 6, 9, 16], "inappropri": 1, "profession": 1, "set": [1, 5, 7, 9, 12, 16], "ar": [1, 2, 3, 5, 6, 8, 9, 10, 14, 16], "clarifi": 1, "take": [1, 5, 16], "appropri": [1, 2, 16], "fair": 1, "action": 1, "thei": [1, 9], "deem": 1, "threaten": 1, "offens": 1, "harm": 1, "have": [1, 2, 9, 11, 13, 14, 16], "right": [1, 7, 9], "remov": 1, "edit": 1, "reject": 1, "commit": 1, "wiki": 1, "issu": [1, 2, 13], "align": [1, 6], "thi": [1, 2, 3, 5, 9, 11, 12, 13, 14, 15, 16], "moder": 1, "decis": 1, "when": [1, 2, 7], "appli": [1, 5, 8], "within": 1, "all": [1, 2, 5, 6, 8, 9, 14, 16], "space": [1, 16], "also": [1, 7, 13, 14, 16], "offici": [1, 7], "repres": [1, 9, 15, 16], "e": [1, 2, 3, 6, 7], "mail": 1, "post": [1, 16], "via": 1, "social": 1, "media": 1, "account": [1, 13], "appoint": 1, "onlin": 1, "offlin": 1, "event": 1, "instanc": [1, 16], "abus": 1, "otherwis": [1, 6, 9], "mai": [1, 2], "report": 1, "contact": 1, "minde": [1, 3, 4, 7], "com": [1, 3, 6, 7, 13], "complaint": 1, "review": 1, "investig": 1, "promptli": 1, "fairli": 1, "oblig": 1, "privaci": 1, "secur": [1, 12], "incid": 1, "follow": [1, 2, 3, 5, 8, 9, 11, 12, 13, 16], "impact": 1, "determin": 1, "consequ": 1, "violat": 1, "unprofession": 1, "unwelcom": 1, "A": [1, 2, 4, 5, 6, 7, 10, 15], "written": [1, 6], "provid": [1, 2, 4, 13, 14, 16], "clariti": 1, "around": 1, "natur": [1, 4, 5], "explan": [1, 16], "why": 1, "wa": 1, "apologi": 1, "request": [1, 13], "through": [1, 8, 14], "singl": [1, 2, 4, 5], "seri": 1, "continu": 1, "No": [1, 16], "involv": [1, 16], "unsolicit": 1, "specifi": [1, 5, 6], "period": 1, "time": [1, 4, 7, 9, 14], "avoid": [1, 3], "well": [1, 15], "extern": [1, 14], "channel": [1, 2, 6, 8], "like": 1, "term": 1, "lead": 1, "seriou": 1, "sustain": 1, "sort": 1, "allow": 1, "dure": 1, "pattern": 1, "aggress": 1, "disparag": 1, "class": [1, 5, 6, 8, 9, 16], "adapt": 1, "version": [1, 2, 3, 15, 16], "0": [1, 3, 5, 8, 9, 11, 14, 16], "avail": [1, 4, 8], "http": [1, 3, 5, 6, 7, 13, 16], "www": [1, 6, 16], "org": [1, 5, 7, 16], "_": [1, 5, 7], "html": [1, 2, 6, 16], "were": [1, 6, 16], "inspir": [1, 8], "mozilla": 1, "": [1, 6, 7, 9, 13], "ladder": 1, "For": [1, 2, 3, 11, 16], "answer": 1, "common": [1, 8, 9, 15], "question": 1, "about": [1, 14, 16], "see": [1, 2], "faq": 1, "translat": 1, "everyth": [2, 16], "you": [2, 3, 5, 6, 7, 11, 12, 13, 14, 15, 16], "need": [2, 3, 5, 9, 11, 12, 13, 16], "know": 2, "effici": [2, 4, 5, 7], "project": [2, 14], "packag": [2, 4, 9, 12, 14], "python": 2, "doc": [2, 6, 15, 16], "librari": [2, 3, 10, 11], "build": [2, 3], "script": [2, 14], "refer": [2, 3, 11, 13, 14, 16], "train": [2, 5, 7, 8, 13, 14, 15, 16], "demo": [2, 4], "small": [2, 7], "app": 2, "showcas": 2, "capabl": [2, 10, 16], "api": [2, 4], "minim": [2, 4], "templat": [2, 4], "deploi": 2, "rest": [2, 8, 9], "ensur": 2, "proper": 2, "mainten": 2, "github": [2, 3, 7, 13], "worklow": 2, "run": [2, 3, 7], "job": 2, "coverag": 2, "codecov": 2, "back": 2, "result": [2, 5, 6, 10, 13, 16], "As": 2, "contributor": 2, "onli": [2, 7, 8, 9, 13, 14, 15, 16], "your": [2, 4, 6, 9, 16], "ad": [2, 7, 8], "whether": [2, 5, 6, 8, 9, 14, 16], "encount": 2, "problem": 2, "suggest": [2, 13], "input": [2, 6, 7, 8, 15, 16], "ha": [2, 5, 9, 14], "valu": [2, 6, 8, 16], "can": [2, 3, 11, 12, 13, 14, 16], "purpos": 2, "advis": 2, "first": [2, 5], "check": [2, 13, 16], "topic": 2, "wasn": 2, "t": [2, 5, 11, 16], "alreadi": 2, "cover": 2, "close": 2, "If": [2, 3, 6, 7, 11, 16], "feel": [2, 13], "new": [2, 9], "one": [2, 5, 7, 8, 11, 13, 16], "do": [2, 3, 7], "so": [2, 3, 5, 7, 13, 14], "whenev": 2, "possibl": [2, 9, 13, 16], "enough": [2, 16], "jump": 2, "wonder": 2, "how": [2, 11, 13, 14], "someth": 2, "more": [2, 9, 14, 16], "gener": [2, 4, 7], "should": [2, 5, 6, 8, 9], "out": [2, 7, 8, 9, 16], "discuss": 2, "q": 2, "forum": 2, "specif": [2, 3, 9, 11, 14, 16], "stackoverflow": 2, "addit": [2, 3, 6], "depend": [2, 3, 4], "command": 2, "m": [2, 9, 16], "pip": [2, 3], "upgrad": 2, "dev": [2, 12], "pre": [2, 7], "docstr": 2, "In": [2, 5, 14], "pleas": 2, "googl": 2, "eas": 2, "process": [2, 4, 6, 11, 16], "later": 2, "messag": 2, "udac": 2, "guid": 2, "order": [2, 5, 6, 8], "same": [2, 6, 9, 14, 16], "ci": 2, "workflow": 2, "unittest": 2, "local": [2, 4, 5, 7, 9, 14, 16], "To": [2, 3, 12, 13, 16], "togeth": [2, 6], "current": [2, 16], "built": 2, "sphinx": 2, "thank": 2, "our": [2, 7, 16], "file": [2, 5], "been": [2, 9, 14, 16], "rebuilt": 2, "want": [2, 15, 16], "forc": 2, "complet": 2, "rebuild": 2, "delet": 2, "_build": 2, "directori": [2, 12], "addition": [2, 16], "clear": 2, "web": [2, 6], "browser": [2, 4], "cach": [2, 5, 12], "modif": 2, "now": 2, "locat": [2, 6, 16], "index": [2, 6], "wish": 2, "somewher": 2, "els": 2, "than": [2, 3, 9, 13], "join": 2, "slack": 2, "where": [2, 6, 8, 9], "find": [2, 3, 14], "requir": [3, 8], "3": [3, 4, 6, 7, 8, 9, 15, 16], "8": [3, 7, 8, 16], "higher": [3, 5, 16], "whichev": 3, "o": 3, "least": 3, "tensorflow": [3, 4, 6, 7, 8, 11, 13, 15, 16], "pytorch": [3, 4, 7, 8, 11, 13, 15, 16], "correspond": [3, 6, 16], "page": [3, 5, 7, 9, 16], "2": [3, 4, 5, 6, 8, 16], "macbook": 3, "m1": 3, "chip": 3, "some": [3, 10, 13, 14], "metal": 3, "plugin": 3, "1": [3, 5, 6, 7, 8, 9, 11, 14, 16], "12": [3, 16], "anoth": [3, 7, 11, 14], "linux": 3, "few": [3, 15, 16], "extra": 3, "maco": 3, "user": [3, 4, 6, 10], "them": [3, 5, 16], "homebrew": 3, "brew": 3, "cairo": 3, "pango": 3, "gdk": 3, "pixbuf": 3, "libffi": 3, "window": [3, 7, 9], "gtk": 3, "latest": [3, 16], "over": [3, 5, 9, 16], "here": [3, 8, 10, 14, 16], "last": [3, 5], "stabl": 3, "doctr": [3, 11, 12, 13, 14, 16], "strive": 3, "reduc": [3, 8], "framework": [3, 13, 14, 16], "minimum": [3, 5, 8, 9, 16], "necessari": [3, 11, 12], "featur": [3, 7, 9, 10], "develop": 3, "third": 3, "parti": 3, "miss": 3, "tf": [3, 6, 7, 8, 13, 15], "torch": [3, 8, 11, 13, 15], "mode": 3, "clone": 3, "state": [4, 9], "art": 4, "optic": [4, 16], "charact": [4, 5, 6, 9, 14, 16], "made": 4, "seamless": 4, "access": [4, 6, 14, 16], "anyon": 4, "power": 4, "easi": [4, 9, 13], "extract": [4, 5], "valuabl": 4, "autom": 4, "seamlessli": [4, 16], "understand": [4, 5, 16], "task": [4, 5, 7, 13, 14, 16], "ocr": [4, 5, 7, 9, 13, 14], "predictor": [4, 6, 7, 11, 13, 15], "pars": [4, 5], "textual": [4, 5, 6, 7, 16], "identifi": 4, "each": [4, 5, 6, 7, 8, 9, 14, 16], "word": [4, 5, 7, 9, 16], "research": 4, "quickli": 4, "compar": 4, "own": 4, "architectur": [4, 7, 13], "speed": [4, 7], "perform": [4, 6, 7, 8, 9, 12, 15, 16], "robust": [4, 5], "stage": 4, "pretrain": [4, 7, 9, 11, 15, 16], "paramet": [4, 6, 7, 15], "friendli": 4, "line": [4, 7, 9, 16], "code": [4, 6], "load": [4, 5, 7], "googlevis": 4, "aw": [4, 16], "textract": [4, 16], "optim": 4, "infer": [4, 7, 8], "both": [4, 5, 8, 14, 16], "cpu": [4, 11], "gpu": [4, 15], "light": 4, "activ": 4, "maintain": 4, "integr": [4, 13, 14], "deploy": 4, "dbnet": [4, 7], "real": [4, 7, 8], "scene": [4, 5, 7], "differenti": [4, 7], "binar": [4, 7, 16], "linknet": [4, 7], "exploit": [4, 7], "encod": [4, 5, 6, 7, 16], "represent": [4, 7], "semant": [4, 7], "segment": [4, 7, 16], "sar": [4, 7], "show": [4, 6, 7, 9, 11, 13], "attend": [4, 7], "read": [4, 5, 7], "simpl": [4, 7], "strong": [4, 7], "baselin": [4, 7, 16], "irregular": [4, 7, 14], "crnn": [4, 7, 13], "end": [4, 5, 7, 9], "trainabl": [4, 7], "neural": [4, 5, 7, 15], "network": [4, 5, 7, 15], "imag": [4, 5, 6, 7, 8, 9, 13, 14, 16], "base": [4, 7], "sequenc": [4, 5, 6, 7, 9, 16], "Its": [4, 7], "applic": [4, 7], "master": [4, 7, 16], "multi": [4, 7], "aspect": [4, 7, 8, 16], "non": [4, 5, 6, 7, 8, 9], "vitstr": [4, 7, 15], "vision": [4, 5, 7], "transform": [4, 5, 7], "fast": [4, 5, 7], "parseq": [4, 7, 13, 16], "permut": [4, 7], "autoregress": [4, 7], "funsd": [4, 5, 14, 16], "form": [4, 5, 16], "noisi": [4, 5], "scan": [4, 5], "cord": [4, 5, 14, 16], "consolid": [4, 5], "receipt": [4, 5, 16], "forpost": [4, 5], "sroie": [4, 5, 14], "icdar": [4, 5], "2019": 4, "iiit": [4, 5], "5k": [4, 5], "cvit": 4, "street": [4, 5], "view": [4, 5], "synthtext": [4, 5, 14], "visual": 4, "geometri": [4, 6, 16], "group": [4, 16], "svhn": [4, 5, 14], "digit": [4, 5, 14], "unsupervis": 4, "ic03": [4, 5, 14], "2003": [4, 5], "ic13": [4, 5, 14], "2013": [4, 5], "imgur5k": [4, 5, 14], "textstylebrush": [4, 5], "transfer": [4, 5], "aesthet": [4, 5], "mjsynth": [4, 5, 14], "synthet": 4, "data": [4, 5, 6, 8, 9, 11, 13], "artifici": [4, 5], "iiithw": [4, 5, 14], "wildreceipt": [4, 5, 14], "spatial": [4, 5, 6, 9], "dual": [4, 5], "modal": [4, 5], "graph": [4, 5, 6], "kei": [4, 5], "bool": [5, 6, 7, 8, 9], "true": [5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16], "use_polygon": [5, 9, 14], "fals": [5, 6, 7, 8, 9, 11, 16], "recognition_task": [5, 14], "kwarg": [5, 6, 7, 9], "sourc": [5, 6, 7, 8, 9, 13], "document": [5, 7, 9, 10, 14, 16], "import": [5, 6, 7, 8, 9, 11, 13, 14, 15, 16], "train_set": [5, 14], "download": [5, 14], "img": [5, 8, 14], "target": [5, 6, 8, 9, 14], "subset": [5, 16], "polygon": [5, 9, 16], "rotat": [5, 6, 7, 8, 9, 14, 16], "bound": [5, 6, 7, 8, 9, 16], "box": [5, 6, 7, 8, 9, 14, 16], "instead": [5, 6, 7], "straight": [5, 7, 14, 16], "ones": [5, 8, 9], "recognit": [5, 9, 11], "keyword": [5, 6, 7, 9], "argument": [5, 6, 7, 9, 16], "visiondataset": 5, "icdar2019": 5, "competit": 5, "iiit5k": [5, 14], "bmvc": 5, "2012": 5, "text": [5, 6, 7, 9, 14], "prior": 5, "svt": [5, 14], "ucsd": 5, "comput": [5, 9, 15, 16], "hous": 5, "number": [5, 8, 9, 16], "localis": 5, "repositori": [5, 7, 13], "websit": 5, "entri": 5, "futur": 5, "direct": 5, "img_fold": [5, 14], "str": [5, 6, 7, 8, 9], "label_fold": 5, "label": [5, 8, 9, 14], "part": [5, 8, 16], "challeng": 5, "task2": 5, "2015": 5, "path": [5, 6, 14], "challenge2_training_task12_imag": 5, "challenge2_training_task1_gt": 5, "test_set": 5, "challenge2_test_task12_imag": 5, "challenge2_test_task1_gt": 5, "folder": 5, "annot": 5, "abstractdataset": 5, "label_path": [5, 14], "handwrit": 5, "dataset_info": 5, "imgur5k_annot": 5, "json": [5, 14, 16], "pure": 5, "mnt": 5, "ramdisk": 5, "max": [5, 8, 9], "90kdict32px": 5, "imlist": 5, "txt": 5, "hw": 5, "images_90k_norm": 5, "90k": 5, "docartefact": [5, 14], "object": [5, 9, 10, 16], "detect": [5, 9, 10, 11], "element": [5, 6, 7, 9, 16], "varieti": 5, "arxiv": [5, 7], "ab": 5, "2103": 5, "14470v1": 5, "test": [5, 14], "charactergener": [5, 14], "implement": [5, 6, 7, 8, 9, 16], "d": [5, 14], "abdef": [5, 14], "num_sampl": [5, 14], "100": [5, 8, 9, 14, 16], "vocabulari": [5, 11, 13], "sampl": [5, 14, 16], "iter": [5, 8, 14, 16], "cache_sampl": 5, "firsthand": 5, "font_famili": [5, 9], "font": [5, 9], "img_transform": 5, "compos": [5, 16], "sample_transform": 5, "wordgener": [5, 14], "min_char": [5, 14], "int": [5, 6, 8, 9], "max_char": [5, 14], "list": [5, 6, 8, 9, 13], "none": [5, 6, 7, 8, 9, 16], "callabl": [5, 8], "tupl": [5, 6, 8, 9], "32": [5, 7, 8, 11, 14, 15, 16], "maximum": [5, 8], "detectiondataset": [5, 14], "recognitiondataset": [5, 14], "labels_path": [5, 14], "contain": [5, 14], "ocrdataset": [5, 14], "label_fil": [5, 14], "jpg": [5, 6, 13], "root": 5, "shuffl": [5, 8], "batch_siz": [5, 11, 14, 15], "drop_last": 5, "num_work": 5, "collate_fn": 5, "wrapper": [5, 8], "train_load": [5, 14], "train_it": [5, 14], "next": [5, 14], "befor": [5, 7, 8, 16], "pass": [5, 6, 7, 16], "batch": [5, 7, 8, 14, 16], "drop": 5, "isn": 5, "full": [5, 9, 16], "worker": 5, "function": [5, 8, 9], "merg": 5, "sinc": [5, 14], "content": [5, 6, 9, 16], "properli": 5, "model": [5, 9, 12, 14], "interpret": [5, 6], "multipl": [5, 6, 8, 16], "name": [5, 7, 15, 16], "10": [5, 9, 16], "0123456789": 5, "hindi_digit": 5, "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": 5, "ascii_lett": 5, "52": [5, 16], "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 5, "punctuat": 5, "currenc": 5, "5": [5, 8, 9, 16], "ancient_greek": 5, "48": [5, 16], "\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9\u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03bf\u03c0\u03c1\u03c3\u03c4\u03c5\u03c6\u03c7\u03c8\u03c9": 5, "arabic_lett": 5, "37": [5, 16], "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a": 5, "persian_lett": 5, "\u067e\u0686\u06a2\u06a4\u06af": 5, "arabic_diacrit": 5, "arabic_punctu": 5, "latin": 5, "94": [5, 16], "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz": 5, "english": [5, 14], "legacy_french": 5, "123": 5, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7\u00e0\u00e2\u00e9\u00e8\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00e7": 5, "french": [5, 11, 13, 16], "126": 5, "\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7\u00e0\u00e2\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00e7": 5, "portugues": 5, "131": 5, "\u00e1\u00e0\u00e2\u00e3\u00e9\u00ea\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7\u00e1\u00e0\u00e2\u00e3\u00e9\u00eb\u00ed\u00ef\u00f3\u00f4\u00f5\u00fa\u00fc\u00e7": 5, "spanish": 5, "116": 5, "\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00f1": 5, "italian": 5, "120": 5, "\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa\u00e0\u00e8\u00e9\u00ec\u00ed\u00ee\u00f2\u00f3\u00f9\u00fa": 5, "german": [5, 11, 13], "108": 5, "\u00e4\u00f6\u00fc\u00df\u00e4\u00f6\u00fc\u00df": 5, "arab": 5, "101": 5, "\u0621\u0622\u0623\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637\u0638\u0639\u063a\u0640\u0641\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u067e\u0686\u06a2\u06a4\u06af": 5, "0123456789\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669": 5, "czech": 5, "130": 5, "\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e\u00e1\u010d\u010f\u00e9\u011b\u00ed\u0148\u00f3\u0159\u0161\u0165\u00fa\u016f\u00fd\u017e": 5, "polish": 5, "118": 5, "\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c\u0105\u0107\u0119\u0142\u0144\u00f3\u015b\u017a\u017c": 5, "dutch": 5, "114": 5, "norwegian": 5, "106": 5, "\u00e6\u00f8\u00e5\u00e6\u00f8\u00e5": 5, "danish": 5, "finnish": 5, "104": 5, "\u00e4\u00f6\u00e4\u00f6": 5, "swedish": 5, "\u00e5\u00e4\u00f6\u00e5\u00e4\u00f6": 5, "vietnames": 5, "234": 5, "\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5\u00e1\u00e0\u1ea3\u1ea1\u00e3\u0103\u1eaf\u1eb1\u1eb3\u1eb5\u1eb7\u00e2\u1ea5\u1ea7\u1ea9\u1eab\u1ead\u00e9\u00e8\u1ebb\u1ebd\u1eb9\u00ea\u1ebf\u1ec1\u1ec3\u1ec5\u1ec7\u00f3\u00f2\u1ecf\u00f5\u1ecd\u00f4\u1ed1\u1ed3\u1ed5\u1ed9\u1ed7\u01a1\u1edb\u1edd\u1edf\u1ee3\u1ee1\u00fa\u00f9\u1ee7\u0169\u1ee5\u01b0\u1ee9\u1eeb\u1eed\u1eef\u1ef1i\u00ed\u00ec\u1ec9\u0129\u1ecb\u00fd\u1ef3\u1ef7\u1ef9\u1ef5": 5, "hebrew": 5, "\u05d0\u05d1\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05db\u05dc\u05de\u05e0\u05e1\u05e2\u05e4\u05e6\u05e7\u05e8\u05e9\u05ea": 5, "multilingu": [5, 13], "195": 5, "encode_sequ": 5, "target_s": 5, "eo": 5, "pad": [5, 7, 8, 16], "dynamic_seq_length": 5, "ndarrai": [5, 6, 8, 9], "given": [5, 6, 8, 9, 16], "map": [5, 7], "n": [5, 9], "length": [5, 16], "Of": 5, "string": [5, 6, 9, 16], "option": [5, 7, 11], "start": 5, "case": [5, 9], "upper": [5, 8], "enabl": [5, 6], "dynam": 5, "tensor": [5, 6, 8, 16], "modul": [6, 7, 8, 9, 16], "easili": [6, 9, 11, 13, 14, 16], "export": [6, 7, 9, 10, 16], "analysi": 6, "format": [6, 9, 11, 14, 15, 16], "organ": 6, "uninterrupt": [6, 16], "confid": [6, 9, 16], "float": [6, 8, 9, 15], "associ": 6, "predict": [6, 7, 9, 16], "xmin": 6, "ymin": 6, "xmax": 6, "ymax": 6, "coordin": [6, 16], "rel": [6, 8, 9, 16], "collect": 6, "meant": [6, 15], "two": [6, 12], "column": 6, "horizont": [6, 8], "resolv": 6, "default": [6, 9, 11, 12, 16], "smallest": 6, "enclos": 6, "g": [6, 7], "qr": 6, "pictur": 6, "chart": 6, "signatur": 6, "logo": [6, 14], "etc": 6, "artefact_typ": 6, "type": [6, 9, 13, 15, 16], "sever": [6, 8, 16], "its": [6, 7, 8, 9, 14, 16], "titl": [6, 16], "underneath": 6, "page_idx": [6, 16], "dimens": [6, 9, 16], "dict": [6, 9, 16], "numpi": [6, 7, 9, 16], "arrai": [6, 8, 9], "uint8": [6, 7, 9, 16], "raw": [6, 9], "pixel": [6, 8, 16], "height": 6, "width": 6, "dictionari": [6, 9], "angl": [6, 8], "degress": 6, "preserve_aspect_ratio": [6, 7, 8, 11, 16], "overlai": 6, "displai": [6, 9], "matplotlib": [6, 9], "pyplot": [6, 9], "method": [6, 8, 16], "high": 6, "convers": 6, "read_pdf": 6, "byte": [6, 16], "scale": [6, 7, 8, 9], "rgb_mode": 6, "password": 6, "pdf": [6, 7, 10], "convert": [6, 8], "render": [6, 16], "72dpi": 6, "output": [6, 8, 15], "rgb": [6, 8], "bgr": 6, "unlock": 6, "encrypt": 6, "pypdfium2": 6, "pdfpage": 6, "decod": 6, "shape": [6, 7, 8, 9, 16], "h": [6, 7, 8], "x": [6, 8, 9], "w": [6, 7, 8, 9], "c": [6, 9], "read_img_as_numpi": 6, "output_s": [6, 8], "rgb_output": 6, "expect": [6, 8, 9], "read_img_as_tensor": 6, "img_path": 6, "dtype": [6, 7, 8, 9, 15], "float32": [6, 7, 8, 15], "desir": 6, "relat": 6, "divid": 6, "255": [6, 7, 8, 9, 16], "decode_img_as_tensor": 6, "img_cont": 6, "stream": 6, "read_html": 6, "url": 6, "yoursit": 6, "weasyprint": 6, "documentfil": [6, 13], "extens": 6, "classmethod": 6, "from_pdf": 6, "binari": [6, 15, 16], "from_url": 6, "from_imag": [6, 13], "page1": 6, "png": 6, "page2": 6, "vgg16_bn_r": 7, "vgg": 7, "16": [7, 15, 16], "describ": [7, 9], "veri": 7, "deep": [7, 16], "convolut": 7, "larg": [7, 13], "modifi": [7, 12, 16], "normal": [7, 8], "rectangular": 7, "pool": 7, "simpler": 7, "head": [7, 16], "input_tensor": 7, "random": [7, 8, 9, 16], "uniform": [7, 8], "512": 7, "maxval": [7, 8], "imagenet": 7, "extractor": 7, "resnet18": [7, 13], "resnet": 7, "18": 7, "residu": 7, "boolean": [7, 16], "resnet34": 7, "34": [7, 16], "resnet50": [7, 13], "50": [7, 14, 16], "resnet31": 7, "downsiz": 7, "4": [7, 8, 9, 16], "mobilenet_v3_smal": 7, "mobilenetv3": 7, "search": 7, "kera": [7, 15], "mobilenet_v3_larg": 7, "mobilenet_v3_small_r": 7, "mobilenet_v3_large_r": 7, "mobilenet_v3_small_orient": 7, "magc_resnet31": 7, "global": 7, "context": 7, "224": [7, 8], "vit_": 7, "visiontransform": 7, "worth": 7, "16x16": 7, "patch": [7, 9], "unoffici": 7, "config": 7, "vit_b": 7, "b": [7, 9, 16], "textnet_tini": 7, "textnet": 7, "faster": [7, 15], "arbitrarili": 7, "detector": 7, "minimalist": 7, "kernel": [7, 8], "czczup": 7, "tini": 7, "textnet_smal": 7, "textnet_bas": 7, "crop_orientation_predictor": 7, "arch": [7, 13], "croporientationpredictor": 7, "np": [7, 8, 9, 16], "classif_mobilenet_v3_smal": 7, "input_crop": 7, "rand": [7, 8, 9, 15, 16], "600": [7, 9, 16], "800": [7, 9, 14, 16], "astyp": [7, 9, 16], "crop": [7, 8, 14, 16], "dataset": [7, 11, 16], "linknet_resnet18": [7, 11, 16], "1024": [7, 9, 11, 16], "linknet_resnet34": [7, 15, 16], "linknet_resnet50": [7, 16], "db_resnet50": [7, 11, 13, 16], "backbon": 7, "db_mobilenet_v3_larg": [7, 13, 16], "mobilenet": [7, 13], "v3": [7, 13, 16], "detection_predictor": [7, 16], "assume_straight_pag": [7, 16], "detectionpredictor": [7, 11], "input_pag": [7, 9, 16], "itself": [7, 13], "fit": [7, 16], "crnn_vgg16_bn": [7, 11, 13, 16], "128": [7, 11, 15, 16], "crnn_mobilenet_v3_smal": [7, 15, 16], "crnn_mobilenet_v3_larg": [7, 13, 16], "sar_resnet31": [7, 16], "31": 7, "64": [7, 8, 16], "256": 7, "paper": 7, "1910": 7, "02562": 7, "keywoard": 7, "vitstr_smal": [7, 11, 15, 16], "vitstr_bas": [7, 16], "recognition_predictor": [7, 16], "recognitionpredictor": [7, 11], "ocr_predictor": [7, 11, 13, 15, 16], "det_arch": [7, 11, 13, 15], "reco_arch": [7, 11, 13, 15], "pretrained_backbon": [7, 11], "symmetric_pad": [7, 8, 16], "export_as_straight_box": [7, 16], "detect_orient": 7, "straighten_pag": 7, "detect_languag": 7, "ocrpredictor": [7, 11], "up": [7, 16], "assum": 7, "preserv": [7, 8, 16], "ratio": [7, 8, 16], "symmetr": [7, 8, 16], "bottom": [7, 16], "final": 7, "potenti": 7, "estim": 7, "slightli": 7, "deterior": 7, "latenc": 7, "median": 7, "Then": 7, "again": 7, "improv": 7, "kie_predictor": [7, 11], "kiepredictor": 7, "kie": [7, 11], "login_to_hub": [7, 13], "login": 7, "huggingfac": 7, "hub": 7, "from_hub": [7, 13], "repo_id": [7, 13], "instanti": [7, 16], "hf": 7, "fasterrcnn_mobilenet_v3_large_fpn": 7, "repo": 7, "hf_hub_download": 7, "snapshot_download": 7, "checkpoint": 7, "push_to_hf_hub": [7, 13], "model_nam": [7, 13, 15], "save": [7, 14], "configur": 7, "my": 7, "procedur": 8, "draw": [8, 9], "design": 8, "torchvis": 8, "resiz": [8, 16], "bilinear": 8, "transfo": 8, "minval": 8, "interpol": 8, "zero": [8, 9], "while": [8, 16], "done": 8, "mean": [8, 9, 11], "std": [8, 11], "gaussian": 8, "distribut": 8, "485": 8, "456": 8, "406": 8, "229": [8, 14], "225": 8, "averag": [8, 16], "per": [8, 16], "standard": 8, "deviat": 8, "lambdatransform": 8, "fn": 8, "lambda": 8, "tograi": 8, "num_output_channel": 8, "grayscal": 8, "colorinvers": 8, "min_val": 8, "tranform": 8, "color": [8, 9], "shift": 8, "randomli": 8, "invert": 8, "6": [8, 16], "rang": 8, "randombright": 8, "max_delta": 8, "adjust": 8, "bright": 8, "delta": 8, "offset": 8, "add": [8, 9, 13, 16], "pick": 8, "p": [8, 9, 16], "probabl": 8, "randomcontrast": 8, "contrast": 8, "contrast_factor": 8, "factor": 8, "randomsatur": 8, "satur": 8, "hsv": 8, "increas": 8, "randomhu": 8, "hue": 8, "randomgamma": 8, "min_gamma": 8, "max_gamma": 8, "min_gain": 8, "max_gain": 8, "gamma": 8, "correct": 8, "neg": 8, "lower": [8, 9, 16], "param": [8, 16], "constant": 8, "multipli": 8, "randomjpegqu": 8, "min_qual": 8, "60": 8, "max_qual": 8, "jpeg": 8, "qualiti": 8, "dimension": 8, "between": [8, 9, 16], "randomrot": 8, "max_angl": 8, "expand": 8, "degre": 8, "uniformli": 8, "randomcrop": 8, "08": [8, 16], "75": [8, 16], "33": [8, 16], "min_area": 8, "max_area": 8, "min_ratio": 8, "max_ratio": 8, "gaussianblur": 8, "kernel_shap": 8, "blur": 8, "min": 8, "channelshuffl": 8, "gaussiannois": 8, "nois": 8, "randomhorizontalflip": 8, "flip": 8, "int64": [8, 9], "randomshadow": 8, "opacity_rang": 8, "shade": 8, "opac": 8, "It": [8, 13, 15], "consecut": [8, 16], "sequenti": [8, 16], "oneof": 8, "jpegqual": 8, "randomappli": 8, "regroup": 9, "core": [9, 16], "complementari": 9, "sens": 9, "visualize_pag": 9, "words_onli": 9, "display_artefact": 9, "add_label": 9, "figur": 9, "block": [9, 16], "plt": 9, "ocr_db_crnn": 9, "artefact": [9, 10, 16], "figsiz": 9, "largest": 9, "side": 9, "plot": 9, "static": 9, "top": [9, 16], "synthesize_pag": 9, "draw_proba": 9, "respons": 9, "blank": 9, "blue": 9, "red": 9, "font_siz": 9, "13": [9, 16], "famili": 9, "synthes": 9, "metric": [9, 16], "assess": 9, "textmatch": 9, "match": [9, 16], "accuraci": 9, "aggreg": [9, 14], "foral": 9, "y": 9, "mathcal": 9, "frac": 9, "sum": 9, "limits_": 9, "f_": 9, "y_i": 9, "x_i": 9, "indic": 9, "defin": [9, 15], "f_a": 9, "left": [9, 16], "begin": 9, "ll": 9, "mbox": 9, "strictli": 9, "integ": 9, "updat": 9, "hello": [9, 16], "world": [9, 16], "summari": 9, "gt": 9, "pred": 9, "groung": 9, "truth": 9, "exact": [9, 16], "score": 9, "counterpart": 9, "unidecod": 9, "localizationconfus": 9, "iou_thresh": 9, "mask_shap": 9, "use_broadcast": 9, "confus": 9, "iou": 9, "recal": [9, 16], "g_": 9, "precis": [9, 16], "meaniou": 9, "j": 9, "y_j": 9, "being": [9, 16], "intersect": 9, "union": 9, "g_x": 9, "assign": 9, "_i": 9, "geq": 9, "ground": 9, "asarrai": 9, "70": [9, 16], "110": 9, "95": [9, 16], "200": 9, "150": [9, 16], "pair": 9, "broadcast": 9, "consum": 9, "memori": [9, 12, 15], "either": [9, 16], "ocrmetr": 9, "l": 9, "hat": 9, "h_": 9, "b_j": 9, "l_j": 9, "gt_box": 9, "pred_box": 9, "gt_label": 9, "pred_label": 9, "comparison": [9, 16], "detectionmetr": 9, "c_j": 9, "compil": [10, 16], "better": [10, 16], "leverag": 10, "descript": 10, "colab": 10, "quicktour": 10, "present": 10, "main": 10, "produc": [10, 16], "searchabl": 10, "don": [11, 16], "meet": 11, "detail": [11, 16], "link": 11, "section": [11, 13, 15, 16], "det_model": [11, 13], "load_weight": 11, "path_to_checkpoint": 11, "weight": 11, "reco_model": [11, 13], "det_param": 11, "path_to_pt": 11, "map_loc": 11, "load_state_dict": 11, "reco_param": 11, "vocab": [11, 13, 14, 16], "class_nam": 11, "total": 11, "date": [11, 16], "preprocessor": [11, 16], "det_predictor": [11, 16], "798": 11, "785": 11, "772": 11, "264": 11, "2749": 11, "287": 11, "reco_predictor": 11, "694": 11, "695": 11, "693": 11, "299": 11, "296": 11, "301": 11, "polici": 12, "restrict": 12, "write": 12, "outsid": 12, "tmp": 12, "work": [12, 16], "step": 12, "usag": [12, 15], "multiprocess": 12, "doctr_multiprocessing_dis": 12, "variabl": 12, "becaus": 12, "shm": 12, "share": [12, 14], "chang": 12, "By": 12, "doctr_cache_dir": 12, "focu": 13, "love": 13, "appreci": 13, "interfac": 13, "io": 13, "custom": [13, 16], "felix92": 13, "db": 13, "vgg16": 13, "bn": 13, "plug": 13, "obj_detect": 13, "exist": 13, "overwritten": 13, "prerequisit": 13, "creat": 13, "co": 13, "instal": 13, "git": 13, "lf": 13, "my_awesome_model": 13, "v1": 13, "directli": [13, 16], "after": [13, 16], "python3": 13, "train_tensorflow": 13, "py": 13, "train_pytorch": 13, "tabl": 13, "pull": 13, "dummi": 13, "tilman": 13, "rassi": 13, "fascan": 13, "evalu": [14, 16], "predefin": 14, "prefer": 14, "signific": 14, "valid": 14, "149": 14, "626": 14, "360": 14, "2000": 14, "3000": 14, "249": 14, "33402": 14, "13068": 14, "772875": 14, "85875": 14, "246": 14, "233": 14, "resourc": 14, "7149": 14, "796": 14, "handwritten": 14, "1268": 14, "472": 14, "21888": 14, "8707": 14, "33608": 14, "19342": 14, "uppercas": 14, "19370": 14, "2186": 14, "257": 14, "647": 14, "73257": 14, "26032": 14, "7100000": 14, "707470": 14, "1156": 14, "1107": 14, "849": 14, "1095": 14, "207901": 14, "22672": 14, "7581382": 14, "1337891": 14, "7141797": 14, "793533": 14, "49377": 14, "19598": 14, "alwai": 14, "regular": 14, "2700": 14, "300": 14, "background": 14, "qr_code": 14, "bar_cod": 14, "photo": 14, "classif": 14, "mani": [14, 16], "sensit": 14, "abl": [14, 16], "howev": 14, "guidanc": 14, "tool": 14, "further": 14, "anot": 14, "handl": [14, 16], "underli": [14, 16], "defer": 14, "dataload": 14, "good": 15, "achiev": 15, "might": [15, 16], "tune": 15, "thing": [15, 16], "product": 15, "readi": 15, "help": 15, "support": [15, 16], "devic": 15, "fp16": 15, "point": 15, "occupi": 15, "bit": 15, "advantag": 15, "less": [15, 16], "mixed_precis": 15, "set_global_polici": 15, "mixed_float16": 15, "cuda": 15, "re": 15, "exchang": 15, "interoper": 15, "machin": 15, "structur": [15, 16], "layer": 15, "metadata": 15, "util": 15, "export_model_to_onnx": 15, "input_shap": 15, "dummy_input": 15, "tensorspec": 15, "model_path": 15, "come": 15, "soon": 15, "seen": 16, "onc": 16, "separ": 16, "compon": 16, "charg": 16, "usabl": 16, "backend": 16, "along": 16, "processor": 16, "reusabl": 16, "consist": 16, "delimit": 16, "2d": 16, "corner": 16, "flag": 16, "belong": 16, "skew": 16, "comprehens": 16, "benchmark": 16, "publicli": 16, "sec": 16, "25": 16, "84": 16, "39": 16, "85": 16, "86": 16, "93": 16, "83": 16, "24": 16, "80": 16, "29": 16, "90": 16, "67": 16, "76": 16, "11": 16, "81": 16, "71": 16, "7": 16, "21": 16, "82": 16, "20": 16, "49": 16, "87": 16, "63": 16, "17": 16, "28": 16, "51": 16, "46": 16, "db_resnet34": 16, "22": 16, "89": 16, "74": 16, "56": 16, "68": 16, "92": 16, "61": 16, "41": 16, "00": 16, "79": 16, "38": 16, "88": 16, "62": 16, "26": 16, "06": 16, "78": 16, "47": 16, "54": 16, "abov": 16, "cf": 16, "disclaim": 16, "combin": 16, "199": 16, "second": 16, "warmup": 16, "phase": 16, "measur": 16, "1000": 16, "obtain": 16, "11th": 16, "gen": 16, "intel": 16, "r": 16, "tm": 16, "i7": 16, "11800h": 16, "30ghz": 16, "wrap": 16, "useabl": 16, "favorit": 16, "dummy_img": 16, "area": 16, "send": 16, "snippet": 16, "transcrib": 16, "partial": 16, "15": 16, "9": 16, "73": 16, "44": 16, "14": 16, "55": 16, "58": 16, "57": 16, "66": 16, "01": 16, "98": 16, "23": 16, "69": 16, "99": 16, "91": 16, "05": 16, "09": 16, "96": 16, "40": 16, "53": 16, "most": 16, "print": 16, "cfg": 16, "30595": 16, "45": 16, "72": 16, "43": 16, "65": 16, "77": 16, "30": 16, "07": 16, "27": 16, "gvision": 16, "59": 16, "03": 16, "azur": 16, "recogn": 16, "42": 16, "go": 16, "mention": 16, "still": 16, "return": 16, "documentbuild": 16, "resolve_lin": 16, "automat": 16, "resolve_block": 16, "paragraph_break": 16, "paragraph": 16, "035": 16, "nest": 16, "get": 16, "typic": 16, "layout": 16, "340": 16, "text_output": 16, "json_output": 16, "1357421875": 16, "0361328125": 16, "8564453125": 16, "8603515625": 16, "914085328578949": 16, "5478515625": 16, "06640625": 16, "5810546875": 16, "0966796875": 16, "9949972033500671": 16, "51171875": 16, "1630859375": 16, "9578408598899841": 16, "1396484375": 16, "3232421875": 16, "185546875": 16, "3515625": 16, "outpout": 16, "xml": 16, "hocr": 16, "export_as_xml": 16, "xml_output": 16, "xml_bytes_str": 16, "xml_element": 16, "utf": 16, "xmln": 16, "w3": 16, "1999": 16, "xhtml": 16, "lang": 16, "en": 16, "meta": 16, "equiv": 16, "charset": 16, "system": 16, "ocr_pag": 16, "ocr_carea": 16, "ocr_par": 16, "ocr_lin": 16, "ocrx_word": 16, "div": 16, "id": 16, "page_1": 16, "bbox": 16, "3456": 16, "ppageno": 16, "block_1_1": 16, "857": 16, "529": 16, "2504": 16, "2710": 16, "par_1_1": 16, "span": 16, "line_1_1": 16, "x_size": 16, "x_descend": 16, "x_ascend": 16, "word_1_1": 16, "1552": 16, "540": 16, "1778": 16, "580": 16, "x_wconf": 16, "word_1_2": 16, "1782": 16, "1900": 16, "583": 16, "word_1_3": 16, "1420": 16, "597": 16, "1684": 16, "641": 16, "threshold": 16, "region": 16, "accur": 16, "postprocessor": 16, "bin_thresh": 16, "box_thresh": 16, "hook": 16, "manipul": 16, "customhook": 16, "def": 16, "__call__": 16, "self": 16, "loc_pr": 16, "Be": 16, "awar": 16, "my_hook": 16, "middl": 16, "pipelin": 16, "add_hook": 16, "execut": 16}, "objects": {"doctr.datasets": [[5, 0, 1, "", "CORD"], [5, 0, 1, "", "CharacterGenerator"], [5, 0, 1, "", "DetectionDataset"], [5, 0, 1, "", "DocArtefacts"], [5, 0, 1, "", "FUNSD"], [5, 0, 1, "", "IC03"], [5, 0, 1, "", "IC13"], [5, 0, 1, "", "IIIT5K"], [5, 0, 1, "", "IIITHWS"], [5, 0, 1, "", "IMGUR5K"], [5, 0, 1, "", "MJSynth"], [5, 0, 1, "", "OCRDataset"], [5, 0, 1, "", "RecognitionDataset"], [5, 0, 1, "", "SROIE"], [5, 0, 1, "", "SVHN"], [5, 0, 1, "", "SVT"], [5, 0, 1, "", "SynthText"], [5, 0, 1, "", "WILDRECEIPT"], [5, 0, 1, "", "WordGenerator"], [5, 1, 1, "", "encode_sequences"]], "doctr.datasets.loader": [[5, 0, 1, "", "DataLoader"]], "doctr.io": [[6, 0, 1, "", "Artefact"], [6, 0, 1, "", "Block"], [6, 0, 1, "", "Document"], [6, 0, 1, "", "DocumentFile"], [6, 0, 1, "", "Line"], [6, 0, 1, "", "Page"], [6, 0, 1, "", "Word"], [6, 1, 1, "", "decode_img_as_tensor"], [6, 1, 1, "", "read_html"], [6, 1, 1, "", "read_img_as_numpy"], [6, 1, 1, "", "read_img_as_tensor"], [6, 1, 1, "", "read_pdf"]], "doctr.io.Document": [[6, 2, 1, "", "show"]], "doctr.io.DocumentFile": [[6, 2, 1, "", "from_images"], [6, 2, 1, "", "from_pdf"], [6, 2, 1, "", "from_url"]], "doctr.io.Page": [[6, 2, 1, "", "show"]], "doctr.models.classification": [[7, 1, 1, "", "crop_orientation_predictor"], [7, 1, 1, "", "magc_resnet31"], [7, 1, 1, "", "mobilenet_v3_large"], [7, 1, 1, "", "mobilenet_v3_large_r"], [7, 1, 1, "", "mobilenet_v3_small"], [7, 1, 1, "", "mobilenet_v3_small_orientation"], [7, 1, 1, "", "mobilenet_v3_small_r"], [7, 1, 1, "", "resnet18"], [7, 1, 1, "", "resnet31"], [7, 1, 1, "", "resnet34"], [7, 1, 1, "", "resnet50"], [7, 1, 1, "", "textnet_base"], [7, 1, 1, "", "textnet_small"], [7, 1, 1, "", "textnet_tiny"], [7, 1, 1, "", "vgg16_bn_r"], [7, 1, 1, "", "vit_b"], [7, 1, 1, "", "vit_s"]], "doctr.models.detection": [[7, 1, 1, "", "db_mobilenet_v3_large"], [7, 1, 1, "", "db_resnet50"], [7, 1, 1, "", "detection_predictor"], [7, 1, 1, "", "linknet_resnet18"], [7, 1, 1, "", "linknet_resnet34"], [7, 1, 1, "", "linknet_resnet50"]], "doctr.models.factory": [[7, 1, 1, "", "from_hub"], [7, 1, 1, "", "login_to_hub"], [7, 1, 1, "", "push_to_hf_hub"]], "doctr.models": [[7, 1, 1, "", "kie_predictor"], [7, 1, 1, "", "ocr_predictor"]], "doctr.models.recognition": [[7, 1, 1, "", "crnn_mobilenet_v3_large"], [7, 1, 1, "", "crnn_mobilenet_v3_small"], [7, 1, 1, "", "crnn_vgg16_bn"], [7, 1, 1, "", "master"], [7, 1, 1, "", "parseq"], [7, 1, 1, "", "recognition_predictor"], [7, 1, 1, "", "sar_resnet31"], [7, 1, 1, "", "vitstr_base"], [7, 1, 1, "", "vitstr_small"]], "doctr.transforms": [[8, 0, 1, "", "ChannelShuffle"], [8, 0, 1, "", "ColorInversion"], [8, 0, 1, "", "Compose"], [8, 0, 1, "", "GaussianBlur"], [8, 0, 1, "", "GaussianNoise"], [8, 0, 1, "", "LambdaTransformation"], [8, 0, 1, "", "Normalize"], [8, 0, 1, "", "OneOf"], [8, 0, 1, "", "RandomApply"], [8, 0, 1, "", "RandomBrightness"], [8, 0, 1, "", "RandomContrast"], [8, 0, 1, "", "RandomCrop"], [8, 0, 1, "", "RandomGamma"], [8, 0, 1, "", "RandomHorizontalFlip"], [8, 0, 1, "", "RandomHue"], [8, 0, 1, "", "RandomJpegQuality"], [8, 0, 1, "", "RandomRotate"], [8, 0, 1, "", "RandomSaturation"], [8, 0, 1, "", "RandomShadow"], [8, 0, 1, "", "Resize"], [8, 0, 1, "", "ToGray"]], "doctr.utils.metrics": [[9, 0, 1, "", "DetectionMetric"], [9, 0, 1, "", "LocalizationConfusion"], [9, 0, 1, "", "OCRMetric"], [9, 0, 1, "", "TextMatch"]], "doctr.utils.metrics.DetectionMetric": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.LocalizationConfusion": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.OCRMetric": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.metrics.TextMatch": [[9, 2, 1, "", "summary"], [9, 2, 1, "", "update"]], "doctr.utils.visualization": [[9, 1, 1, "", "synthesize_page"], [9, 1, 1, "", "visualize_page"]]}, "objtypes": {"0": "py:class", "1": "py:function", "2": "py:method"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "function", "Python function"], "2": ["py", "method", "Python method"]}, "titleterms": {"changelog": 0, "v0": 0, "7": 0, "0": 0, "2024": 0, "09": 0, "6": 0, "2022": 0, "29": 0, "5": 0, "1": [0, 1], "03": 0, "22": 0, "2021": 0, "12": 0, "31": 0, "4": [0, 1], "11": 0, "10": 0, "01": 0, "3": [0, 1], "08": 0, "27": 0, "07": 0, "02": 0, "2": [0, 1], "05": 0, "28": 0, "18": 0, "contributor": 1, "coven": 1, "code": [1, 2], "conduct": 1, "our": 1, "pledg": 1, "standard": 1, "enforc": 1, "respons": 1, "scope": 1, "guidelin": 1, "correct": 1, "warn": 1, "temporari": 1, "ban": 1, "perman": 1, "attribut": 1, "contribut": 2, "doctr": [2, 4, 5, 6, 7, 8, 9, 10, 15], "codebas": 2, "structur": [2, 6], "continu": 2, "integr": 2, "feedback": 2, "featur": [2, 4], "request": 2, "bug": 2, "report": 2, "question": 2, "develop": 2, "mode": 2, "instal": [2, 3], "commit": 2, "unit": 2, "test": 2, "qualiti": 2, "style": 2, "verif": 2, "modifi": 2, "document": [2, 4, 6], "let": 2, "": 2, "connect": 2, "prerequisit": 3, "via": 3, "python": 3, "packag": 3, "git": 3, "text": [4, 16], "recognit": [4, 7, 13, 14, 16], "main": 4, "model": [4, 7, 11, 13, 15, 16], "zoo": [4, 7], "detect": [4, 7, 13, 14, 16], "support": [4, 5, 8], "dataset": [4, 5, 14], "arg": [5, 6, 7, 8, 9], "synthet": [5, 14], "gener": [5, 14], "custom": [5, 11], "loader": 5, "dataload": 5, "vocab": 5, "return": [5, 6, 7, 9], "io": 6, "word": 6, "line": 6, "artefact": 6, "block": 6, "page": 6, "file": 6, "read": 6, "classif": [7, 13], "factori": 7, "transform": 8, "compos": 8, "util": 9, "visual": 9, "task": 9, "evalu": 9, "notebook": 10, "train": 11, "your": [11, 13, 14, 15], "own": [11, 14], "load": [11, 13, 14], "aw": 12, "lambda": 12, "share": 13, "commun": 13, "from": 13, "huggingfac": 13, "hub": 13, "push": 13, "pretrain": 13, "name": 13, "convent": 13, "choos": [14, 16], "readi": 14, "us": [14, 15], "avail": [14, 16], "object": 14, "data": 14, "prepar": 15, "infer": 15, "optim": 15, "half": 15, "precis": 15, "export": 15, "onnx": 15, "right": 16, "architectur": 16, "predictor": 16, "end": 16, "ocr": 16, "two": 16, "stage": 16, "approach": 16, "what": 16, "should": 16, "i": 16, "do": 16, "output": 16, "advanc": 16, "option": 16}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 58}, "alltitles": {"Changelog": [[0, "changelog"]], "v0.7.0 (2024-09-09)": [[0, "v0-7-0-2024-09-09"]], "v0.6.0 (2022-09-29)": [[0, "v0-6-0-2022-09-29"]], "v0.5.1 (2022-03-22)": [[0, "v0-5-1-2022-03-22"]], "v0.5.0 (2021-12-31)": [[0, "v0-5-0-2021-12-31"]], "v0.4.1 (2021-11-22)": [[0, "v0-4-1-2021-11-22"]], "v0.4.0 (2021-10-01)": [[0, "v0-4-0-2021-10-01"]], "v0.3.1 (2021-08-27)": [[0, "v0-3-1-2021-08-27"]], "v0.3.0 (2021-07-02)": [[0, "v0-3-0-2021-07-02"]], "v0.2.1 (2021-05-28)": [[0, "v0-2-1-2021-05-28"]], "v0.2.0 (2021-05-11)": [[0, "v0-2-0-2021-05-11"]], "v0.1.1 (2021-03-18)": [[0, "v0-1-1-2021-03-18"]], "v0.1.0 (2021-03-05)": [[0, "v0-1-0-2021-03-05"]], "Contributor Covenant Code of Conduct": [[1, "contributor-covenant-code-of-conduct"]], "Our Pledge": [[1, "our-pledge"]], "Our Standards": [[1, "our-standards"]], "Enforcement Responsibilities": [[1, "enforcement-responsibilities"]], "Scope": [[1, "scope"]], "Enforcement": [[1, "enforcement"]], "Enforcement Guidelines": [[1, "enforcement-guidelines"]], "1. Correction": [[1, "correction"]], "2. Warning": [[1, "warning"]], "3. Temporary Ban": [[1, "temporary-ban"]], "4. Permanent Ban": [[1, "permanent-ban"]], "Attribution": [[1, "attribution"]], "Contributing to docTR": [[2, "contributing-to-doctr"]], "Codebase structure": [[2, "codebase-structure"]], "Continuous Integration": [[2, "continuous-integration"]], "Feedback": [[2, "feedback"]], "Feature requests & bug report": [[2, "feature-requests-bug-report"]], "Questions": [[2, "questions"]], "Developing docTR": [[2, "developing-doctr"]], "Developer mode installation": [[2, "developer-mode-installation"]], "Commits": [[2, "commits"]], "Unit tests": [[2, "unit-tests"]], "Code quality": [[2, "code-quality"]], "Code style verification": [[2, "code-style-verification"]], "Modifying the documentation": [[2, "modifying-the-documentation"]], "Let\u2019s connect": [[2, "let-s-connect"]], "Installation": [[3, "installation"]], "Prerequisites": [[3, "prerequisites"]], "Via Python Package": [[3, "via-python-package"]], "Via Git": [[3, "via-git"]], "docTR: Document Text Recognition": [[4, "doctr-document-text-recognition"]], "Main Features": [[4, "main-features"]], "Model zoo": [[4, "model-zoo"]], "Text detection models": [[4, "text-detection-models"]], "Text recognition models": [[4, "text-recognition-models"]], "Supported datasets": [[4, "supported-datasets"]], "doctr.datasets": [[5, "doctr-datasets"], [5, "datasets"]], "Args:": [[5, "args"], [5, "id4"], [5, "id7"], [5, "id10"], [5, "id13"], [5, "id16"], [5, "id19"], [5, "id22"], [5, "id25"], [5, "id29"], [5, "id32"], [5, "id37"], [5, "id40"], [5, "id46"], [5, "id49"], [5, "id50"], [5, "id51"], [5, "id54"], [5, "id57"], [5, "id60"], [5, "id61"], [6, "args"], [6, "id2"], [6, "id3"], [6, "id4"], [6, "id5"], [6, "id6"], [6, "id7"], [6, "id10"], [6, "id12"], [6, "id14"], [6, "id16"], [6, "id20"], [6, "id24"], [6, "id28"], [7, "args"], [7, "id3"], [7, "id8"], [7, "id13"], [7, "id17"], [7, "id21"], [7, "id26"], [7, "id31"], [7, "id36"], [7, "id41"], [7, "id45"], [7, "id49"], [7, "id54"], [7, "id58"], [7, "id63"], [7, "id68"], [7, "id72"], [7, "id76"], [7, "id81"], [7, "id86"], [7, "id90"], [7, "id95"], [7, "id99"], [7, "id103"], [7, "id108"], [7, "id113"], [7, "id118"], [7, "id122"], [7, "id126"], [7, "id131"], [7, "id135"], [7, "id139"], [7, "id143"], [7, "id145"], [7, "id147"], [7, "id149"], [8, "args"], [8, "id1"], [8, "id2"], [8, "id3"], [8, "id4"], [8, "id5"], [8, "id6"], [8, "id7"], [8, "id8"], [8, "id9"], [8, "id10"], [8, "id11"], [8, "id12"], [8, "id13"], [8, "id14"], [8, "id15"], [8, "id16"], [8, "id17"], [8, "id18"], [9, "args"], [9, "id3"], [9, "id5"], [9, "id6"], [9, "id7"], [9, "id8"], [9, "id9"], [9, "id10"], [9, "id11"]], "Synthetic dataset generator": [[5, "synthetic-dataset-generator"], [14, "synthetic-dataset-generator"]], "Custom dataset loader": [[5, "custom-dataset-loader"]], "Dataloader": [[5, "dataloader"]], "Supported Vocabs": [[5, "supported-vocabs"]], "docTR Vocabs": [[5, "id62"]], "Returns:": [[5, "returns"], [6, "returns"], [6, "id11"], [6, "id13"], [6, "id15"], [6, "id19"], [6, "id23"], [6, "id27"], [6, "id31"], [7, "returns"], [7, "id6"], [7, "id11"], [7, "id16"], [7, "id20"], [7, "id24"], [7, "id29"], [7, "id34"], [7, "id39"], [7, "id44"], [7, "id48"], [7, "id52"], [7, "id57"], [7, "id61"], [7, "id66"], [7, "id71"], [7, "id75"], [7, "id79"], [7, "id84"], [7, "id89"], [7, "id93"], [7, "id98"], [7, "id102"], [7, "id106"], [7, "id111"], [7, "id116"], [7, "id121"], [7, "id125"], [7, "id129"], [7, "id134"], [7, "id138"], [7, "id142"], [7, "id144"], [7, "id146"], [7, "id148"], [9, "returns"], [9, "id4"]], "doctr.io": [[6, "doctr-io"]], "Document structure": [[6, "document-structure"]], "Word": [[6, "word"]], "Line": [[6, "line"]], "Artefact": [[6, "artefact"]], "Block": [[6, "block"]], "Page": [[6, "page"]], "Document": [[6, "document"]], "File reading": [[6, "file-reading"]], "doctr.models": [[7, "doctr-models"]], "doctr.models.classification": [[7, "doctr-models-classification"]], "doctr.models.detection": [[7, "doctr-models-detection"]], "doctr.models.recognition": [[7, "doctr-models-recognition"]], "doctr.models.zoo": [[7, "doctr-models-zoo"]], "doctr.models.factory": [[7, "doctr-models-factory"]], "doctr.transforms": [[8, "doctr-transforms"]], "Supported transformations": [[8, "supported-transformations"]], "Composing transformations": [[8, "composing-transformations"]], "doctr.utils": [[9, "doctr-utils"]], "Visualization": [[9, "visualization"]], "Task evaluation": [[9, "task-evaluation"]], "docTR Notebooks": [[10, "doctr-notebooks"]], "Train your own model": [[11, "train-your-own-model"]], "Loading your custom trained model": [[11, "loading-your-custom-trained-model"]], "AWS Lambda": [[12, "aws-lambda"]], "Share your model with the community": [[13, "share-your-model-with-the-community"]], "Loading from Huggingface Hub": [[13, "loading-from-huggingface-hub"]], "Pushing to the Huggingface Hub": [[13, "pushing-to-the-huggingface-hub"]], "Pretrained community models": [[13, "pretrained-community-models"]], "Naming conventions": [[13, "naming-conventions"]], "Classification": [[13, "classification"]], "Detection": [[13, "detection"], [14, "detection"]], "Recognition": [[13, "recognition"], [14, "recognition"]], "Choose a ready to use dataset": [[14, "choose-a-ready-to-use-dataset"]], "Available Datasets": [[14, "available-datasets"]], "Object Detection": [[14, "object-detection"]], "Use your own datasets": [[14, "use-your-own-datasets"]], "Data Loading": [[14, "data-loading"]], "Preparing your model for inference": [[15, "preparing-your-model-for-inference"]], "Model optimization": [[15, "model-optimization"]], "Half-precision": [[15, "half-precision"]], "Export to ONNX": [[15, "export-to-onnx"]], "Using your ONNX exported model in docTR": [[15, "using-your-onnx-exported-model-in-doctr"]], "Choosing the right model": [[16, "choosing-the-right-model"]], "Text Detection": [[16, "text-detection"]], "Available architectures": [[16, "available-architectures"], [16, "id1"], [16, "id2"]], "Detection predictors": [[16, "detection-predictors"]], "Text Recognition": [[16, "text-recognition"]], "Recognition predictors": [[16, "recognition-predictors"]], "End-to-End OCR": [[16, "end-to-end-ocr"]], "Two-stage approaches": [[16, "two-stage-approaches"]], "What should I do with the output?": [[16, "what-should-i-do-with-the-output"]], "Advanced options": [[16, "advanced-options"]]}, "indexentries": {"cord (class in doctr.datasets)": [[5, "doctr.datasets.CORD"]], "charactergenerator (class in doctr.datasets)": [[5, "doctr.datasets.CharacterGenerator"]], "dataloader (class in doctr.datasets.loader)": [[5, "doctr.datasets.loader.DataLoader"]], "detectiondataset (class in doctr.datasets)": [[5, "doctr.datasets.DetectionDataset"]], "docartefacts (class in doctr.datasets)": [[5, "doctr.datasets.DocArtefacts"]], "funsd (class in doctr.datasets)": [[5, "doctr.datasets.FUNSD"]], "ic03 (class in doctr.datasets)": [[5, "doctr.datasets.IC03"]], "ic13 (class in doctr.datasets)": [[5, "doctr.datasets.IC13"]], "iiit5k (class in doctr.datasets)": [[5, "doctr.datasets.IIIT5K"]], "iiithws (class in doctr.datasets)": [[5, "doctr.datasets.IIITHWS"]], "imgur5k (class in doctr.datasets)": [[5, "doctr.datasets.IMGUR5K"]], "mjsynth (class in doctr.datasets)": [[5, "doctr.datasets.MJSynth"]], "ocrdataset (class in doctr.datasets)": [[5, "doctr.datasets.OCRDataset"]], "recognitiondataset (class in doctr.datasets)": [[5, "doctr.datasets.RecognitionDataset"]], "sroie (class in doctr.datasets)": [[5, "doctr.datasets.SROIE"]], "svhn (class in doctr.datasets)": [[5, "doctr.datasets.SVHN"]], "svt (class in doctr.datasets)": [[5, "doctr.datasets.SVT"]], "synthtext (class in doctr.datasets)": [[5, "doctr.datasets.SynthText"]], "wildreceipt (class in doctr.datasets)": [[5, "doctr.datasets.WILDRECEIPT"]], "wordgenerator (class in doctr.datasets)": [[5, "doctr.datasets.WordGenerator"]], "encode_sequences() (in module doctr.datasets)": [[5, "doctr.datasets.encode_sequences"]], "artefact (class in doctr.io)": [[6, "doctr.io.Artefact"]], "block (class in doctr.io)": [[6, "doctr.io.Block"]], "document (class in doctr.io)": [[6, "doctr.io.Document"]], "documentfile (class in doctr.io)": [[6, "doctr.io.DocumentFile"]], "line (class in doctr.io)": [[6, "doctr.io.Line"]], "page (class in doctr.io)": [[6, "doctr.io.Page"]], "word (class in doctr.io)": [[6, "doctr.io.Word"]], "decode_img_as_tensor() (in module doctr.io)": [[6, "doctr.io.decode_img_as_tensor"]], "from_images() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_images"]], "from_pdf() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_pdf"]], "from_url() (doctr.io.documentfile class method)": [[6, "doctr.io.DocumentFile.from_url"]], "read_html() (in module doctr.io)": [[6, "doctr.io.read_html"]], "read_img_as_numpy() (in module doctr.io)": [[6, "doctr.io.read_img_as_numpy"]], "read_img_as_tensor() (in module doctr.io)": [[6, "doctr.io.read_img_as_tensor"]], "read_pdf() (in module doctr.io)": [[6, "doctr.io.read_pdf"]], "show() (doctr.io.document method)": [[6, "doctr.io.Document.show"]], "show() (doctr.io.page method)": [[6, "doctr.io.Page.show"]], "crnn_mobilenet_v3_large() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_mobilenet_v3_large"]], "crnn_mobilenet_v3_small() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_mobilenet_v3_small"]], "crnn_vgg16_bn() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.crnn_vgg16_bn"]], "crop_orientation_predictor() (in module doctr.models.classification)": [[7, "doctr.models.classification.crop_orientation_predictor"]], "db_mobilenet_v3_large() (in module doctr.models.detection)": [[7, "doctr.models.detection.db_mobilenet_v3_large"]], "db_resnet50() (in module doctr.models.detection)": [[7, "doctr.models.detection.db_resnet50"]], "detection_predictor() (in module doctr.models.detection)": [[7, "doctr.models.detection.detection_predictor"]], "from_hub() (in module doctr.models.factory)": [[7, "doctr.models.factory.from_hub"]], "kie_predictor() (in module doctr.models)": [[7, "doctr.models.kie_predictor"]], "linknet_resnet18() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet18"]], "linknet_resnet34() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet34"]], "linknet_resnet50() (in module doctr.models.detection)": [[7, "doctr.models.detection.linknet_resnet50"]], "login_to_hub() (in module doctr.models.factory)": [[7, "doctr.models.factory.login_to_hub"]], "magc_resnet31() (in module doctr.models.classification)": [[7, "doctr.models.classification.magc_resnet31"]], "master() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.master"]], "mobilenet_v3_large() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_large"]], "mobilenet_v3_large_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_large_r"]], "mobilenet_v3_small() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small"]], "mobilenet_v3_small_orientation() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small_orientation"]], "mobilenet_v3_small_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.mobilenet_v3_small_r"]], "ocr_predictor() (in module doctr.models)": [[7, "doctr.models.ocr_predictor"]], "parseq() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.parseq"]], "push_to_hf_hub() (in module doctr.models.factory)": [[7, "doctr.models.factory.push_to_hf_hub"]], "recognition_predictor() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.recognition_predictor"]], "resnet18() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet18"]], "resnet31() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet31"]], "resnet34() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet34"]], "resnet50() (in module doctr.models.classification)": [[7, "doctr.models.classification.resnet50"]], "sar_resnet31() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.sar_resnet31"]], "textnet_base() (in module doctr.models.classification)": [[7, "doctr.models.classification.textnet_base"]], "textnet_small() (in module doctr.models.classification)": [[7, "doctr.models.classification.textnet_small"]], "textnet_tiny() (in module doctr.models.classification)": [[7, "doctr.models.classification.textnet_tiny"]], "vgg16_bn_r() (in module doctr.models.classification)": [[7, "doctr.models.classification.vgg16_bn_r"]], "vit_b() (in module doctr.models.classification)": [[7, "doctr.models.classification.vit_b"]], "vit_s() (in module doctr.models.classification)": [[7, "doctr.models.classification.vit_s"]], "vitstr_base() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.vitstr_base"]], "vitstr_small() (in module doctr.models.recognition)": [[7, "doctr.models.recognition.vitstr_small"]], "channelshuffle (class in doctr.transforms)": [[8, "doctr.transforms.ChannelShuffle"]], "colorinversion (class in doctr.transforms)": [[8, "doctr.transforms.ColorInversion"]], "compose (class in doctr.transforms)": [[8, "doctr.transforms.Compose"]], "gaussianblur (class in doctr.transforms)": [[8, "doctr.transforms.GaussianBlur"]], "gaussiannoise (class in doctr.transforms)": [[8, "doctr.transforms.GaussianNoise"]], "lambdatransformation (class in doctr.transforms)": [[8, "doctr.transforms.LambdaTransformation"]], "normalize (class in doctr.transforms)": [[8, "doctr.transforms.Normalize"]], "oneof (class in doctr.transforms)": [[8, "doctr.transforms.OneOf"]], "randomapply (class in doctr.transforms)": [[8, "doctr.transforms.RandomApply"]], "randombrightness (class in doctr.transforms)": [[8, "doctr.transforms.RandomBrightness"]], "randomcontrast (class in doctr.transforms)": [[8, "doctr.transforms.RandomContrast"]], "randomcrop (class in doctr.transforms)": [[8, "doctr.transforms.RandomCrop"]], "randomgamma (class in doctr.transforms)": [[8, "doctr.transforms.RandomGamma"]], "randomhorizontalflip (class in doctr.transforms)": [[8, "doctr.transforms.RandomHorizontalFlip"]], "randomhue (class in doctr.transforms)": [[8, "doctr.transforms.RandomHue"]], "randomjpegquality (class in doctr.transforms)": [[8, "doctr.transforms.RandomJpegQuality"]], "randomrotate (class in doctr.transforms)": [[8, "doctr.transforms.RandomRotate"]], "randomsaturation (class in doctr.transforms)": [[8, "doctr.transforms.RandomSaturation"]], "randomshadow (class in doctr.transforms)": [[8, "doctr.transforms.RandomShadow"]], "resize (class in doctr.transforms)": [[8, "doctr.transforms.Resize"]], "togray (class in doctr.transforms)": [[8, "doctr.transforms.ToGray"]], "detectionmetric (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.DetectionMetric"]], "localizationconfusion (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.LocalizationConfusion"]], "ocrmetric (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.OCRMetric"]], "textmatch (class in doctr.utils.metrics)": [[9, "doctr.utils.metrics.TextMatch"]], "summary() (doctr.utils.metrics.detectionmetric method)": [[9, "doctr.utils.metrics.DetectionMetric.summary"]], "summary() (doctr.utils.metrics.localizationconfusion method)": [[9, "doctr.utils.metrics.LocalizationConfusion.summary"]], "summary() (doctr.utils.metrics.ocrmetric method)": [[9, "doctr.utils.metrics.OCRMetric.summary"]], "summary() (doctr.utils.metrics.textmatch method)": [[9, "doctr.utils.metrics.TextMatch.summary"]], "synthesize_page() (in module doctr.utils.visualization)": [[9, "doctr.utils.visualization.synthesize_page"]], "update() (doctr.utils.metrics.detectionmetric method)": [[9, "doctr.utils.metrics.DetectionMetric.update"]], "update() (doctr.utils.metrics.localizationconfusion method)": [[9, "doctr.utils.metrics.LocalizationConfusion.update"]], "update() (doctr.utils.metrics.ocrmetric method)": [[9, "doctr.utils.metrics.OCRMetric.update"]], "update() (doctr.utils.metrics.textmatch method)": [[9, "doctr.utils.metrics.TextMatch.update"]], "visualize_page() (in module doctr.utils.visualization)": [[9, "doctr.utils.visualization.visualize_page"]]}}) \ No newline at end of file diff --git a/latest/using_doctr/using_models.html b/latest/using_doctr/using_models.html index b2e6a5d739..b471cdc1f1 100644 --- a/latest/using_doctr/using_models.html +++ b/latest/using_doctr/using_models.html @@ -836,6 +836,17 @@
To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying DocumentBuilder:
+resolve_lines: whether words should be automatically grouped into lines (default: True)
resolve_blocks: whether lines should be automatically grouped into blocks (default: True)
paragraph_break: relative length of the minimum space separating paragraphs (default: 0.035)
For example to disable the automatic grouping of lines into blocks:
+from doctr.model import ocr_predictor
+model = ocr_predictor(pretrained=True, resolve_blocks=False)
+
To get only the text content of the Document, you can use the render method:
+text_output = result.render()
+
For reference, here is the output for the Document above:
+No. RECEIPT DATE
+
You can also export them as a nested dict, more appropriate for JSON format:
json_output = result.export()
To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying DocumentBuilder:
+resolve_lines: whether words should be automatically grouped into lines (default: True)
resolve_blocks: whether lines should be automatically grouped into blocks (default: True)
paragraph_break: relative length of the minimum space separating paragraphs (default: 0.035)
For example to disable the automatic grouping of lines into blocks:
+from doctr.model import ocr_predictor
+model = ocr_predictor(pretrained=True, resolve_blocks=False)
+
To get only the text content of the Document, you can use the render method:
+text_output = result.render()
+
For reference, here is the output for the Document above:
+No. RECEIPT DATE
+
You can also export them as a nested dict, more appropriate for JSON format:
json_output = result.export()
To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying DocumentBuilder:
+resolve_lines: whether words should be automatically grouped into lines (default: True)
resolve_blocks: whether lines should be automatically grouped into blocks (default: True)
paragraph_break: relative length of the minimum space separating paragraphs (default: 0.035)
For example to disable the automatic grouping of lines into blocks:
+from doctr.model import ocr_predictor
+model = ocr_predictor(pretrained=True, resolve_blocks=False)
+
To get only the text content of the Document, you can use the render method:
+text_output = result.render()
+
For reference, here is the output for the Document above:
+No. RECEIPT DATE
+
You can also export them as a nested dict, more appropriate for JSON format:
json_output = result.export()
To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying DocumentBuilder:
+resolve_lines: whether words should be automatically grouped into lines (default: True)
resolve_blocks: whether lines should be automatically grouped into blocks (default: True)
paragraph_break: relative length of the minimum space separating paragraphs (default: 0.035)
For example to disable the automatic grouping of lines into blocks:
+from doctr.model import ocr_predictor
+model = ocr_predictor(pretrained=True, resolve_blocks=False)
+
To get only the text content of the Document, you can use the render method:
+text_output = result.render()
+
For reference, here is the output for the Document above:
+No. RECEIPT DATE
+
You can also export them as a nested dict, more appropriate for JSON format:
json_output = result.export()
To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying DocumentBuilder:
+resolve_lines: whether words should be automatically grouped into lines (default: True)
resolve_blocks: whether lines should be automatically grouped into blocks (default: True)
paragraph_break: relative length of the minimum space separating paragraphs (default: 0.035)
For example to disable the automatic grouping of lines into blocks:
+from doctr.model import ocr_predictor
+model = ocr_predictor(pretrained=True, resolve_blocks=False)
+
To get only the text content of the Document, you can use the render method:
+text_output = result.render()
+
For reference, here is the output for the Document above:
+No. RECEIPT DATE
+
You can also export them as a nested dict, more appropriate for JSON format:
json_output = result.export()
To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying DocumentBuilder:
+resolve_lines: whether words should be automatically grouped into lines (default: True)
resolve_blocks: whether lines should be automatically grouped into blocks (default: True)
paragraph_break: relative length of the minimum space separating paragraphs (default: 0.035)
For example to disable the automatic grouping of lines into blocks:
+from doctr.model import ocr_predictor
+model = ocr_predictor(pretrained=True, resolve_blocks=False)
+
To get only the text content of the Document, you can use the render method:
+text_output = result.render()
+
For reference, here is the output for the Document above:
+No. RECEIPT DATE
+
You can also export them as a nested dict, more appropriate for JSON format:
json_output = result.export()
To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying DocumentBuilder:
+resolve_lines: whether words should be automatically grouped into lines (default: True)
resolve_blocks: whether lines should be automatically grouped into blocks (default: True)
paragraph_break: relative length of the minimum space separating paragraphs (default: 0.035)
For example to disable the automatic grouping of lines into blocks:
+from doctr.model import ocr_predictor
+model = ocr_predictor(pretrained=True, resolve_blocks=False)
+
To get only the text content of the Document, you can use the render method:
+text_output = result.render()
+
For reference, here is the output for the Document above:
+No. RECEIPT DATE
+
You can also export them as a nested dict, more appropriate for JSON format:
json_output = result.export()
To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying DocumentBuilder:
+resolve_lines: whether words should be automatically grouped into lines (default: True)
resolve_blocks: whether lines should be automatically grouped into blocks (default: True)
paragraph_break: relative length of the minimum space separating paragraphs (default: 0.035)
For example to disable the automatic grouping of lines into blocks:
+from doctr.model import ocr_predictor
+model = ocr_predictor(pretrained=True, resolve_blocks=False)
+
To get only the text content of the Document, you can use the render method:
+text_output = result.render()
+
For reference, here is the output for the Document above:
+No. RECEIPT DATE
+
You can also export them as a nested dict, more appropriate for JSON format:
json_output = result.export()
To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying DocumentBuilder:
+resolve_lines: whether words should be automatically grouped into lines (default: True)
resolve_blocks: whether lines should be automatically grouped into blocks (default: True)
paragraph_break: relative length of the minimum space separating paragraphs (default: 0.035)
For example to disable the automatic grouping of lines into blocks:
+from doctr.model import ocr_predictor
+model = ocr_predictor(pretrained=True, resolve_blocks=False)
+
To get only the text content of the Document, you can use the render method:
+text_output = result.render()
+
For reference, here is the output for the Document above:
+No. RECEIPT DATE
+
You can also export them as a nested dict, more appropriate for JSON format:
json_output = result.export()
To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying DocumentBuilder:
+resolve_lines: whether words should be automatically grouped into lines (default: True)
resolve_blocks: whether lines should be automatically grouped into blocks (default: True)
paragraph_break: relative length of the minimum space separating paragraphs (default: 0.035)
For example to disable the automatic grouping of lines into blocks:
+from doctr.model import ocr_predictor
+model = ocr_predictor(pretrained=True, resolve_blocks=False)
+
To get only the text content of the Document, you can use the render method:
+text_output = result.render()
+
For reference, here is the output for the Document above:
+No. RECEIPT DATE
+
You can also export them as a nested dict, more appropriate for JSON format:
json_output = result.export()