-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathgesturemodelfunctions.py
145 lines (105 loc) · 4.48 KB
/
gesturemodelfunctions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import numpy as np
import tensorflow as tf
import cv2 as cv
import mediapipe as mp
import csv
import copy
import itertools
import config
max_value = None
def normalize_(n):
return n / max_value
def _calc_bounding_rect(image, landmarks):
image_width, image_height = image.shape[1], image.shape[0]
landmark_array = np.empty((0, 2), int)
for _, landmark in enumerate(landmarks.landmark):
landmark_x = min(int(landmark.x * image_width), image_width - 1)
landmark_y = min(int(landmark.y * image_height), image_height - 1)
landmark_point = [np.array((landmark_x, landmark_y))]
landmark_array = np.append(landmark_array, landmark_point, axis=0)
x, y, w, h = cv.boundingRect(landmark_array)
return [x, y, x + w, y + h]
def _calc_landmark_list(image, landmarks):
image_width, image_height = image.shape[1], image.shape[0]
landmark_point = []
# Keypoint
for _, landmark in enumerate(landmarks.landmark):
landmark_x = min(int(landmark.x * image_width), image_width - 1)
landmark_y = min(int(landmark.y * image_height), image_height - 1)
# landmark_z = landmark.z
landmark_point.append([landmark_x, landmark_y])
return landmark_point
def _pre_process_landmark(landmark_list):
temp_landmark_list = copy.deepcopy(landmark_list)
# Convert to relative coordinates
base_x, base_y = 0, 0
for index, landmark_point in enumerate(temp_landmark_list):
if index == 0:
base_x, base_y = landmark_point[0], landmark_point[1]
temp_landmark_list[index][0] = temp_landmark_list[index][0] - base_x
temp_landmark_list[index][1] = temp_landmark_list[index][1] - base_y
# Convert to a one-dimensional list
temp_landmark_list = list(
itertools.chain.from_iterable(temp_landmark_list))
# Normalization
global max_value
max_value = max(list(map(abs, temp_landmark_list)))
temp_landmark_list = list(map(normalize_, temp_landmark_list))
return temp_landmark_list
interpreter = tf.lite.Interpreter(model_path='keypoint_classifier.tflite', num_threads=1)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
with open('keypoint_classifier_label.csv',
encoding='utf-8-sig') as f:
keypoint_classifier_labels = csv.reader(f)
keypoint_classifier_labels = [
row[0] for row in keypoint_classifier_labels
]
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
static_image_mode=True,
max_num_hands=1,
min_detection_confidence=0.5,
min_tracking_confidence=0.5,
)
def gesturemodelmatch(image):
image = cv.flip(image, 1)
debug_image = copy.deepcopy(image)
gesture_id = -1
image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
image.flags.writeable = False
results = hands.process(image)
if results.multi_hand_landmarks is not None:
for hand_landmarks, handedness in zip(results.multi_hand_landmarks,
results.multi_handedness):
# Bounding box calculation
brect = _calc_bounding_rect(debug_image, hand_landmarks)
# make sure hand is big enough
width = brect[2]-brect[0]
height = brect[3]-brect[1]
area = width * height
if area > config.config['gesture']['handsize']:
# Landmark calculation
landmark_list = _calc_landmark_list(debug_image, hand_landmarks)
# Conversion to relative coordinates / normalized coordinates
pre_processed_landmark_list = _pre_process_landmark(
landmark_list)
# Hand sign classification
input_details_tensor_index = input_details[0]['index']
interpreter.set_tensor(
input_details_tensor_index,
np.array([pre_processed_landmark_list], dtype=np.float32))
interpreter.invoke()
output_details_tensor_index = output_details[0]['index']
result = interpreter.get_tensor(output_details_tensor_index)
hand_sign_id = np.argmax(np.squeeze(result))
confidence = np.squeeze(result)[hand_sign_id]
if confidence > config.config['gesture']['confidence']:
return keypoint_classifier_labels[hand_sign_id]
else:
return ""
else:
return ""
else:
return ""