-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAadharExtractor.py
154 lines (123 loc) · 3.38 KB
/
AadharExtractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import json
from unicodedata import name
import OCR
import re
from pip import main
def isEnglish(str):
return str.isascii()
def matchesDateFormat(str):
slashCounter = 0
for i in str:
if i == '/':
slashCounter += 1
if slashCounter == 2:
return True
else:
return False
def matchesAadharFormat(str):
flag = True
str = str.replace(" ", "")
counter=0
for char in str:
if char.isdigit():
counter+=1
if counter == 12:
flag=True
else:
flag = False
return flag
def getAadharDict(OcrList):
aadharNumber = ""
dob = ""
gender = "" # F for female and M for Male
name = ""
aadharFlag = False
dobFlag = False
genderFlag = False
nameFlag = False
# testing
# print(OcrList)
# dob detector
eleCounter = 0
for ele in OcrList:
if matchesDateFormat(ele):
dobFlag = True
opstr = ""
counter = 0
for i in ele:
if i == '/':
opstr = ele[counter - 2] + ele[counter - 1] + ele[counter] + ele[counter + 1] + ele[counter + 2] + \
ele[counter + 3] + ele[counter + 4] + ele[counter + 5] + ele[counter + 6] + ele[counter + 7]
dob = opstr
garbage = OcrList.pop(eleCounter)
break
counter += 1
eleCounter += 1
# aadhar number detector
eleCounter = 0
for ele in OcrList:
if matchesAadharFormat(ele):
ano=""
for char in ele:
if char.isdigit()==True:
ano+=char
aadharFlag = True
aadharNumber = ano
garbage = OcrList.pop(eleCounter)
break
eleCounter += 1
# gender detector
eleCounter = 0
for ele in OcrList:
lowerCaseEle = ele.lower()
if "female" in lowerCaseEle or "fmale" in lowerCaseEle:
gender = "F"
genderFlag = True
garbage = OcrList.pop(eleCounter)
break
elif "male" in lowerCaseEle:
gender = "M"
genderFlag = True
garbage = OcrList.pop(eleCounter)
break
eleCounter += 1
# removing marathi text in case some of it makes it through the pre-processor
eleCounter = 0
poppingList = []
for ele in OcrList:
if isEnglish(ele) == False:
poppingList.append(eleCounter)
eleCounter += 1
poppingList.reverse()
for i in poppingList:
garbage = OcrList.pop(i)
if len (OcrList)>=2:
nameFlag = True
name = OcrList.pop(1)
Dict = {}
if nameFlag == True:
Dict["name"] = name
else:
Dict["name"] = "NA"
if dobFlag == True:
Dict["dob"] = dob
else:
Dict["dob"] = "NA"
if aadharFlag == True:
Dict["aadharNumber"] = aadharNumber
else:
Dict["aadharNumber"] = "NA"
if genderFlag == True:
Dict["gender"] = gender
else:
Dict["gender"] = "NA"
# testing
# print(Dict)
# print(dob)
# print(aadharNumber)
# print(gender)
# print(name)
# print(OcrList)
return Dict
# if __name__ == "__main__":
# print (OCR.getOCRList("/Users/aditya_gitte/Projects/SIH/Antons-ML-Model/SampleImages/Aadhar/pranav.jpeg "))