-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathfindText.py
58 lines (54 loc) · 2.15 KB
/
findText.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from PIL import Image
import re
import cv2
import numpy as np
import pytesseract
width = 800
height = 600
def cleanText(origin_text):
text = re.sub('[,/\^$*\"※~&』\\‘|\(\)\[\]\\`\'…》]', '', origin_text)
return text
def findText(img, mode = "default", offset = 10):
# img = cv2.imread(img)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #Converting to GrayScale
text = cleanText(pytesseract.image_to_boxes(gray, config="--psm 11 --oem 1"))
text = text.split("\n")
if text[0] == '':
return None
array = []
for m in text:
info = m.split(" ")
x1, y1, x2, y2 = int(info[1]) - offset, int(height - int(info[4])) - offset, int(info[3]) + offset, int(height - int(info[2])) + offset
if x1 < 0:
x1 = 0
elif y1 < 0:
y1 = 0
elif x2 > 800:
x2 = 800
elif y2 > 600:
y2 = 600
info = [info[0], [x1, y1, x2, y2]]
array.append(info)
index = 0
if mode.lower() == "default":
while index < len(array) - 1:
if abs(array[index][1][1] - array[index + 1][1][1]) <= offset and abs(array[index][1][2] - array[index + 1][1][0]) <= 2 * offset:
str = array[index][0] + array[index + 1][0]
temp = [str, [int(array[index][1][0]), int(array[index][1][1]), int(array[index + 1][1][2]), int(array[index + 1][1][3])]]
array[index] = temp
array.pop(index + 1)
index -= 1
index += 1
elif mode.lower() == "findpos":
while index < len(array) - 1:
if abs(array[index][1][1] - array[index + 1][1][1]) <= offset and abs(array[index][1][2] - array[index + 1][1][0]) <= 2 * offset:
str = "ocrtext"
temp = [str, [int(array[index][1][0]), int(array[index][1][1]), int(array[index + 1][1][2]), int(array[index + 1][1][3])]]
array[index] = temp
array.pop(index + 1)
index -= 1
index += 1
# print(array)
return array
if __name__=='__main__':
textMser('onlyTextimg.jpg', "findword")