-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy pathsemaphore.py
432 lines (350 loc) · 14.5 KB
/
semaphore.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
import argparse
from keyboard import keyboard # local fork
import mediapipe as mp
import cv2
from scipy.spatial import distance as dist
from math import atan, atan2, pi, degrees
from datetime import datetime
DEFAULT_LANDMARKS_STYLE = mp.solutions.drawing_styles.get_default_pose_landmarks_style()
DEFAULT_HAND_CONNECTIONS_STYLE = mp.solutions.drawing_styles.get_default_hand_connections_style()
# Optionally record the video feed to a timestamped AVI in the current directory
RECORDING_FILENAME = str(datetime.now()).replace('.','').replace(':','') + '.avi'
FPS = 10
VISIBILITY_THRESHOLD = .8 # amount of certainty that a body landmark is visible
STRAIGHT_LIMB_MARGIN = 20 # degrees from 180
EXTENDED_LIMB_MARGIN = .8 # lower limb length as fraction of upper limb
LEG_LIFT_MIN = -30 # degrees below horizontal
ARM_CROSSED_RATIO = 2 # max distance from wrist to opposite elbow, relative to mouth width
MOUTH_COVER_THRESHOLD = .03 # hands over mouth max distance error out of 1
SQUAT_THRESHOLD = .1 # max hip-to-knee vertical distance
JUMP_THRESHOLD = .0001
LEG_ARROW_ANGLE = 18 # degrees from vertical standing; should be divisor of 90
FINGER_MOUTH_RATIO = 1.5 # open hand relative to mouth width
# R side: 90 top to 0 right to -90 bottom
# L side: 90 top to 180 left to 269... -> -90 bottom
SEMAPHORES = {
(-90, -45): {'a': "a", 'n': "1"},
(-90, 0): {'a': "b", 'n': "2"},
(-90, 45): {'a': "c", 'n': "3"},
(-90, 90): {'a': "d", 'n': "4"},
(135, -90): {'a': "e", 'n': "5"},
(180, -90): {'a': "f", 'n': "6"},
(225, -90): {'a': "g", 'n': "7"},
(-45, 0): {'a': "h", 'n': "8"},
(-45, 45): {'a': "i", 'n': "9"},
(180, 90): {'a': "j", 'n': "capslock"},
(90, -45): {'a': "k", 'n': "0"},
(135, -45): {'a': "l", 'n': "\\"},
(180, -45): {'a': "m", 'n': "["},
(225, -45): {'a': "n", 'n': "]"},
(0, 45): {'a': "o", 'n': ","},
(90, 0): {'a': "p", 'n': ";"},
(135, 0): {'a': "q", 'n': "="},
(180, 0): {'a': "r", 'n': "-"},
(225, 0): {'a': "s", 'n': "."},
(90, 45): {'a': "t", 'n': "`"},
(135, 45): {'a': "u", 'n': "/"},
(225, 90): {'a': "v", 'n': '"'},
(135, 180): {'a': "w"},
(135, 225): {'a': "x", 'n': ""}, # clear last signal
(180, 45): {'a': "y"},
(180, 225): {'a': "z"},
(90, 90): {'a': "space", 'n': "enter"},
(135, 90): {'a': "tab"}, # custom "numerals" replacement
(225, 45): {'a': "escape"}, # custom "cancel" replacement
}
leg_arrow_angles = {
(-90, -90 + LEG_ARROW_ANGLE): "right",
(-90, -90 + 2*LEG_ARROW_ANGLE): "up",
(270 - LEG_ARROW_ANGLE, -90): "left",
(270 - 2*LEG_ARROW_ANGLE, -90): "down",
}
FRAME_HISTORY = 8 # pose history is compared against FRAME_HISTORY recent frames
HALF_HISTORY = int(FRAME_HISTORY/2)
QUARTER_HISTORY = int(FRAME_HISTORY/4)
empty_frame = {
'hipL_y': 0,
'hipR_y': 0,
'hips_dy': 0,
'dxL_thrust_hipL': 0,
'dxL_thrust_hipR': 0,
'dxR_thrust_hipL': 0,
'dxR_thrust_hipR': 0,
'signed': False,
}
last_frames = FRAME_HISTORY*[empty_frame.copy()]
frame_midpoint = (0,0)
current_semaphore = ''
last_keys = []
def get_angle(a, b, c):
ang = degrees(atan2(c['y']-b['y'], c['x']-b['x']) - atan2(a['y']-b['y'], a['x']-b['x']))
return ang + 360 if ang < 0 else ang
def is_missing(part):
return any(joint['visibility'] < VISIBILITY_THRESHOLD for joint in part)
def is_limb_pointing(upper, mid, lower):
if is_missing([upper, mid, lower]):
return False
limb_angle = get_angle(upper, mid, lower)
is_in_line = abs(180 - limb_angle) < STRAIGHT_LIMB_MARGIN
if is_in_line:
upper_length = dist.euclidean([upper['x'], upper['y']], [mid['x'], mid['y']])
lower_length = dist.euclidean([lower['x'], lower['y']], [mid['x'], mid['y']])
is_extended = lower_length > EXTENDED_LIMB_MARGIN * upper_length
return is_extended
return False
def get_limb_direction(arm, closest_degrees=45):
# should also use atan2 but I don't want to do more math
dy = arm[2]['y'] - arm[0]['y'] # wrist -> shoulder
dx = arm[2]['x'] - arm[0]['x']
angle = degrees(atan(dy/dx))
if (dx < 0):
angle += 180
# collapse to nearest closest_degrees; 45 for semaphore
mod_close = angle % closest_degrees
angle -= mod_close
if mod_close > closest_degrees/2:
angle += closest_degrees
angle = int(angle)
if angle == 270:
angle = -90
return angle
def is_arm_crossed(elbow, wrist, max_dist):
return dist.euclidean([elbow['x'], elbow['y']], [wrist['x'], wrist['y']]) < max_dist
def is_arms_crossed(elbowL, wristL, elbowR, wristR, mouth_width):
max_dist = mouth_width * ARM_CROSSED_RATIO
return is_arm_crossed(elbowL, wristR, max_dist) and is_arm_crossed(elbowR, wristL, max_dist)
def is_leg_lifted(leg):
if is_missing(leg):
return False
dy = leg[1]['y'] - leg[0]['y'] # knee -> hip
dx = leg[1]['x'] - leg[0]['x']
angle = degrees(atan2(dy, dx))
return angle > LEG_LIFT_MIN
def is_jumping(hipL, hipR):
global last_frames
if is_missing([hipL, hipR]):
return False
last_frames[-1]['hipL_y'] = hipL['y']
last_frames[-1]['hipR_y'] = hipR['y']
if (hipL['y'] > last_frames[-2]['hipL_y'] + JUMP_THRESHOLD) and (
hipR['y'] > last_frames[-2]['hipR_y'] + JUMP_THRESHOLD):
last_frames[-1]['hips_dy'] = 1 # rising
elif (hipL['y'] < last_frames[-2]['hipL_y'] - JUMP_THRESHOLD) and (
hipR['y'] < last_frames[-2]['hipR_y'] - JUMP_THRESHOLD):
last_frames[-1]['hips_dy'] = -1 # falling
else:
last_frames[-1]['hips_dy'] = 0 # not significant dy
# consistently rising first half, lowering second half
jump_up = all(frame['hips_dy'] == 1 for frame in last_frames[:HALF_HISTORY])
get_down = all(frame['hips_dy'] == -1 for frame in last_frames[HALF_HISTORY:])
return jump_up and get_down
def is_mouth_covered(mouth, palms):
if is_missing(palms):
return False
dxL = (mouth[0]['x'] - palms[0]['x'])
dyL = (mouth[0]['y'] - palms[0]['y'])
dxR = (mouth[1]['x'] - palms[1]['x'])
dyR = (mouth[1]['y'] - palms[1]['y'])
return all(abs(d) < MOUTH_COVER_THRESHOLD for d in [dxL, dyL, dxR, dyR])
def is_squatting(hipL, kneeL, hipR, kneeR):
if is_missing([hipL, kneeL, hipR, kneeR]):
return False
dyL = abs(hipL['y'] - kneeL['y'])
dyR = abs(hipR['y'] - kneeR['y'])
return (dyL < SQUAT_THRESHOLD) and (dyR < SQUAT_THRESHOLD)
def is_finger_out(finger, palmL, palmR, min_finger_reach):
dL_finger = dist.euclidean([finger['x'], finger['y']], [palmL['x'], palmL['y']])
dR_finger = dist.euclidean([finger['x'], finger['y']], [palmR['x'], palmR['y']])
d_finger = min(dL_finger, dR_finger)
return d_finger > min_finger_reach
def is_hand_open(thumb, forefinger, pinky, palmL, palmR, min_finger_reach):
thumb_out = is_finger_out(thumb, palmL, palmR, min_finger_reach)
forefinger_out = is_finger_out(forefinger, palmL, palmR, min_finger_reach)
pinky_out = is_finger_out(pinky, palmL, palmR, min_finger_reach)
return thumb_out and forefinger_out and pinky_out
def type_semaphore(armL_angle, armR_angle, image, shift_on, numerals, command_on, control_on,
display_only, allow_repeat):
global current_semaphore
arm_match = SEMAPHORES.get((armL_angle, armR_angle), '')
if arm_match:
current_semaphore = arm_match.get('n', '') if numerals else arm_match.get('a', '')
type_and_remember(image, shift_on, command_on, control_on, display_only, allow_repeat)
return current_semaphore
return False
def type_and_remember(image=None, shift_on=False, command_on=False, control_on=False,
display_only=True, allow_repeat=False):
global current_semaphore, last_keys
if len(current_semaphore) == 0:
return
keys = []
if shift_on:
keys.append('shift')
if command_on:
keys.append('command')
if control_on:
keys.append('control')
keys.append(current_semaphore)
if allow_repeat or (keys != last_keys):
last_keys = keys.copy()
current_semaphore = ''
output(keys, image, display_only)
def get_key_text(keys):
if not (len(keys) > 0):
return ''
semaphore = keys[-1]
keystring = ''
if 'shift' in keys:
keystring += 'S+'
if 'command' in keys:
keystring += 'CMD+'
if 'control' in keys:
keystring += 'CTL+'
keystring += semaphore
return keystring
def output(keys, image, display_only=True):
keystring = '+'.join(keys)
if len(keystring):
print("keys:", keystring)
if not display_only:
keyboard.press_and_release(keystring)
else:
to_display = get_key_text(keys)
cv2.putText(image, to_display, frame_midpoint,
cv2.FONT_HERSHEY_SIMPLEX, 10, (0,0,255), 10)
def render_and_maybe_exit(image, recording):
cv2.imshow('Semaphore', image)
if recording:
recording.write(image)
return cv2.waitKey(5) & 0xFF == 27
def main():
global last_frames, frame_midpoint
parser = argparse.ArgumentParser()
parser.add_argument('--input', '-i', help='Input video device or file (number or path), defaults to 0', default='0')
parser.add_argument('--flip', '-f', help='Set to any value to flip resulting output (selfie view)')
parser.add_argument('--landmarks', '-l', help='Set to any value to draw body landmarks')
parser.add_argument('--record', '-r', help='Set to any value to save a timestamped AVI in current directory')
parser.add_argument('--type', '-t', help='Set to any value to type output rather than only display')
parser.add_argument('--repeat', '-p', help='Set to any value to allow instant semaphore repetitions')
args = parser.parse_args()
INPUT = int(args.input) if (args.input and args.input.isdigit()) else args.input
FLIP = args.flip is not None
DRAW_LANDMARKS = args.landmarks is not None
RECORDING = args.record is not None
DISPLAY_ONLY = args.type is None
ALLOW_REPEAT = args.repeat is not None
cap = cv2.VideoCapture(INPUT)
frame_size = (int(cap.get(3)), int(cap.get(4)))
frame_midpoint = (int(frame_size[0]/2), int(frame_size[1]/2))
recording = cv2.VideoWriter(RECORDING_FILENAME,
cv2.VideoWriter_fourcc(*'MJPG'), FPS, frame_size) if RECORDING else None
with mp.solutions.pose.Pose() as pose_model:
with mp.solutions.hands.Hands(max_num_hands=2) as hands_model:
while cap.isOpened():
success, image = cap.read()
if not success: break
image.flags.writeable = False
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
pose_results = pose_model.process(image)
hand_results = hands_model.process(image)
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
# draw pose
if DRAW_LANDMARKS:
mp.solutions.drawing_utils.draw_landmarks(
image,
pose_results.pose_landmarks,
mp.solutions.pose.POSE_CONNECTIONS,
DEFAULT_LANDMARKS_STYLE)
hands = []
hand_index = 0
if hand_results.multi_hand_landmarks:
for hand_landmarks in hand_results.multi_hand_landmarks:
# draw hands
if DRAW_LANDMARKS:
mp.solutions.drawing_utils.draw_landmarks(
image,
hand_landmarks,
mp.solutions.hands.HAND_CONNECTIONS,
DEFAULT_LANDMARKS_STYLE,
DEFAULT_HAND_CONNECTIONS_STYLE)
hands.append([])
for point in hand_landmarks.landmark:
hands[hand_index].append({
'x': 1 - point.x,
'y': 1 - point.y
})
hand_index += 1
if FLIP:
image = cv2.flip(image, 1) # selfie view
if pose_results.pose_landmarks:
# prepare to store most recent frame of movement updates over time
last_frames = last_frames[1:] + [empty_frame.copy()]
# short cool off period of last_frames for each sign
if any(frame['signed'] for frame in last_frames):
if render_and_maybe_exit(image, recording):
break
else:
continue
body = []
# (0,0) bottom left to (1,1) top right
for point in pose_results.pose_landmarks.landmark:
body.append({
'x': 1 - point.x,
'y': 1 - point.y,
'visibility': point.visibility
})
# cover mouth: backspace
mouth = (body[9], body[10])
palms = (body[19], body[20])
if is_mouth_covered(mouth, palms):
output(['backspace'], image, DISPLAY_ONLY)
# command: left leg lift
legL = (body[23], body[25], body[27]) # L hip, knee, ankle
command_on = is_leg_lifted(legL)
# control: right leg lift
legR = (body[24], body[26], body[28]) # R hip, knee, ankle
control_on = is_leg_lifted(legR)
shoulderL, elbowL, wristL = body[11], body[13], body[15]
armL = (shoulderL, elbowL, wristL)
shoulderR, elbowR, wristR = body[12], body[14], body[16]
armR = (shoulderR, elbowR, wristR)
mouth_width = abs(mouth[1]['x']-mouth[0]['x'])
# arrow keys: arms crossed + leg angles
if is_arms_crossed(elbowL, wristL, elbowR, wristR, mouth_width):
if is_limb_pointing(*legL) and is_limb_pointing(*legR):
legL_angle = get_limb_direction(legL, LEG_ARROW_ANGLE)
legR_angle = get_limb_direction(legR, LEG_ARROW_ANGLE)
leg_arrow = leg_arrow_angles.get((legL_angle, legR_angle), '')
if leg_arrow:
output([leg_arrow + ' arrow'], image, DISPLAY_ONLY)
# shift: both hands open
shift_on = len(hands) > 0
min_finger_reach = FINGER_MOUTH_RATIO * mouth_width
palmL, palmR = body[17], body[18]
for hand in hands:
thumb, forefinger, pinky = hand[4], hand[8], hand[20]
hand_open = is_hand_open(thumb, forefinger, pinky, palmL, palmR, min_finger_reach)
shift_on = shift_on and hand_open
# numbers: squat
kneeL, kneeR = body[25], body[26]
hipL, hipR = body[23], body[24]
numerals = is_squatting(hipL, kneeL, hipR, kneeR)
# alphanumeric: arm flags
if is_limb_pointing(*armL) and is_limb_pointing(*armR):
armL_angle = get_limb_direction(armL)
armR_angle = get_limb_direction(armR)
if type_semaphore(armL_angle, armR_angle, image,
shift_on, numerals, command_on, control_on, DISPLAY_ONLY, ALLOW_REPEAT):
last_frames[-1]['signed'] = True
# repeat last: jump (hips rise + fall)
# TODO: if ankles are always in view, could be more accurate than hips
if is_jumping(hipL, hipR):
output(last_keys, image, DISPLAY_ONLY)
if render_and_maybe_exit(image, recording):
break
if RECORDING:
recording.release()
cap.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
main()