-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDistanceEstimation.py
127 lines (100 loc) · 4.12 KB
/
DistanceEstimation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import cv2 as cv
import numpy as np
from gtts import gTTS
import IPython.display as ipd
import subprocess
import os
import time
# Distance constants
KNOWN_DISTANCE = 45 #INCHES
PERSON_WIDTH = 16 #INCHES
MOBILE_WIDTH = 3.0 #INCHES
# Object detector constant
CONFIDENCE_THRESHOLD = 0.4
NMS_THRESHOLD = 0.3
# colors for object detected
COLORS = [(255,0,0),(255,0,255),(0, 255, 255), (255, 255, 0), (0, 255, 0), (255, 0, 0)]
GREEN =(0,255,0)
BLACK =(0,0,0)
# defining fonts
FONTS = cv.FONT_HERSHEY_COMPLEX
# getting class names from classes.txt file
class_names = []
with open("classes.txt", "r") as f:
class_names = [cname.strip() for cname in f.readlines()]
# setttng up opencv net
yoloNet = cv.dnn.readNet('yolov4-tiny.weights', 'yolov4-tiny.cfg')
yoloNet.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA)
yoloNet.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA_FP16)
model = cv.dnn_DetectionModel(yoloNet)
model.setInputParams(size=(416, 416), scale=1/255, swapRB=True)
# object detector funciton /method
def object_detector(image):
classes, scores, boxes = model.detect(image, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)
# creating empty list to add objects data
data_list =[]
text = []
for (classid, score, box) in zip(classes, scores, boxes):
# define color of each, object based on its class id
color= COLORS[int(classid) % len(COLORS)]
label = "%s : %f" % (class_names[classid], score)
# draw rectangle on and label on object
cv.rectangle(image, box, color, 2)
cv.putText(image, label, (box[0], box[1]-14), FONTS, 0.5, color, 2)
# getting the data
# 1: class name 2: object width in pixels, 3: position where have to draw text(distance)
if classid ==0: # person class id
data_list.append([class_names[classid], box[2], (box[0], box[1]-2)])
elif classid ==67:
data_list.append([class_names[classid], box[2], (box[0], box[1]-2)])
text.append(class_names[classid])
# if you want inclulde more classes then you have to simply add more [elif] statements here
# returning list containing the object data.
return data_list, text
def focal_length_finder (measured_distance, real_width, width_in_rf):
focal_length = (width_in_rf * measured_distance) / real_width
return focal_length
# distance finder function
def distance_finder(focal_length, real_object_width, width_in_frame):
distance = (real_object_width * focal_length) / width_in_frame
return distance
# reading the reference image from dir
ref_person = cv.imread('ReferenceImages/image14.png')
ref_mobile = cv.imread('ReferenceImages/image4.png')
mobile_data, _ = object_detector(ref_mobile)
mobile_width_in_rf = mobile_data[1][1]
person_data, _ = object_detector(ref_person)
person_width_in_rf = person_data[0][1]
print(f"Person width in pixels : {person_width_in_rf} mobile width in pixel: {mobile_width_in_rf}")
# finding focal length
focal_person = focal_length_finder(KNOWN_DISTANCE, PERSON_WIDTH, person_width_in_rf)
focal_mobile = focal_length_finder(KNOWN_DISTANCE, MOBILE_WIDTH, mobile_width_in_rf)
cap = cv.VideoCapture(0)
while True:
ret, frame = cap.read()
#cv.imshow('frame',frame)
data, text = object_detector(frame)
for d in data:
if d[0] =='person':
distance = distance_finder(focal_person, PERSON_WIDTH, d[1])
x, y = d[2]
elif d[0] =='cell phone':
distance = distance_finder(focal_mobile, MOBILE_WIDTH, d[1])
x, y = d[2]
cv.rectangle(frame, (x, y-3), (x+150, y+23),BLACK,-1 )
cv.putText(frame, f'Dis: {round(distance,2)} inch', (x+5,y+13), FONTS, 0.48, GREEN, 2)
cv.imshow('frame',frame)
if text:
description = f"{text} is at {round(distance,2)} inches far"
tts = gTTS(description, lang='en')
tts.save('tts.mp3')
ipd.Audio('tts.mp3')
#tts = AudioSegment.from_mp3("tts.mp3")
subprocess.call(["ffplay", "-nodisp", "-autoexit", "tts.mp3"], shell=False)
time.sleep(0.5)
key = cv.waitKey(1)
if key ==ord('q'):
break
cv.destroyAllWindows()
cap.release()
os.remove("tts.mp3")