diff --git a/docs/source/modules/datasets.rst b/docs/source/modules/datasets.rst index d5f07b5651..d06498e766 100644 --- a/docs/source/modules/datasets.rst +++ b/docs/source/modules/datasets.rst @@ -180,6 +180,9 @@ of vocabs. * - bangla - 70 - অআইঈউঊঋএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ঽািীুূৃেৈোৌ্ৎংঃঁ০১২৩৪৫৬৭৮৯ + * - gujarati + - 98 + - અઆઇઈઉઊઋએઐઓઔખગઘચછજઝઞટઠડઢણતથદધનપફબભમયરલવશસહળક્ષ૦૧૨૩૪૫૬૭૮૯!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~૰ઽ◌ંઃ॥ૐ઼ ઁ૱ * - multilingual - 195 - english & french & german & italian & spanish & portuguese & czech & polish & dutch & norwegian & danish & finnish & swedish & § diff --git a/doctr/datasets/vocabs.py b/doctr/datasets/vocabs.py index cab91377db..3bd2d9032b 100644 --- a/doctr/datasets/vocabs.py +++ b/doctr/datasets/vocabs.py @@ -22,6 +22,10 @@ "hindi_letters": "अआइईउऊऋॠऌॡएऐओऔंःकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसह", "hindi_digits": "०१२३४५६७८९", "hindi_punctuation": "।,?!:्ॐ॰॥", + "gujarati_vowels": "અઆઇઈઉઊઋએઐઓ", + "gujarati_consonants": "ખગઘચછજઝઞટઠડઢણતથદધનપફબભમયરલવશસહળક્ષ", + "gujarati_digits": "૦૧૨૩૪૫૬૭૮૯", + "gujarati_punctuation": "૰ઽ◌ંઃ॥ૐ઼ઁ" + "૱", "bangla_letters": "অআইঈউঊঋএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ঽািীুূৃেৈোৌ্ৎংঃঁ", "bangla_digits": "০১২৩৪৫৬৭৮৯", "generic_cyrillic_letters": "абвгдежзийклмнопрстуфхцчшщьюяАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЮЯ", @@ -58,6 +62,13 @@ ) VOCABS["hebrew"] = VOCABS["english"] + "אבגדהוזחטיכלמנסעפצקרשת" + "₪" VOCABS["hindi"] = VOCABS["hindi_letters"] + VOCABS["hindi_digits"] + VOCABS["hindi_punctuation"] +VOCABS["gujarati"] = ( + VOCABS["gujarati_vowels"] + + VOCABS["gujarati_consonants"] + + VOCABS["gujarati_digits"] + + VOCABS["gujarati_punctuation"] + + VOCABS["punctuation"] +) VOCABS["bangla"] = VOCABS["bangla_letters"] + VOCABS["bangla_digits"] VOCABS["ukrainian"] = ( VOCABS["generic_cyrillic_letters"] + VOCABS["digits"] + VOCABS["punctuation"] + VOCABS["currency"] + "ґіїєҐІЇЄ₴"