Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

7 strip out unused models #8

Merged
merged 8 commits into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 0 additions & 30 deletions eye_tracking/constants.py

This file was deleted.

3 changes: 0 additions & 3 deletions eye_tracking/gaze/__main__.py

This file was deleted.

14 changes: 4 additions & 10 deletions eye_tracking/gaze/common/face_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,20 +84,14 @@ def compute_3d_pose(self, face: Face) -> None:
rot = face.head_pose_rot.as_matrix() # Has units of radians
face.model3d = self.LANDMARKS @ rot.T + face.head_position # This is the 3D model of the face in world coordinates

def compute_face_eye_centers(self, face: Face, mode: str) -> None:
def compute_face_eye_centers(self, face: Face) -> None:
"""Compute the centers of the face and eyes.

In the case of MPIIFaceGaze, the face center is defined as the
The face center is defined as the
average coordinates of the six points at the corners of both
eyes and the mouth. In the case of ETH-XGaze, it's defined as
the average coordinates of the six points at the corners of both
eyes and the nose. The eye centers are defined as the average
coordinates of the corners of each eye.
eyes and the mouth.
"""
if mode == "ETH-XGaze":
face.center = face.model3d[np.concatenate([self.REYE_INDICES, self.LEYE_INDICES, self.NOSE_INDICES])].mean(axis=0)
else:
face.center = face.model3d[np.concatenate([self.REYE_INDICES, self.LEYE_INDICES, self.MOUTH_INDICES])].mean(axis=0)
face.center = face.model3d[np.concatenate([self.REYE_INDICES, self.LEYE_INDICES, self.MOUTH_INDICES])].mean(axis=0)

# Face centre is world coordinates in 3D with units metres relative to the camera
face.reye.center = face.model3d[self.REYE_INDICES].mean(axis=0)
Expand Down
62 changes: 27 additions & 35 deletions eye_tracking/gaze/gaze_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,18 +271,17 @@ def _draw_face_template_model(self, face: Face) -> None:
def _display_normalized_image(self, face: Face) -> None:
if not self.config.demo.display_on_screen:
return

if not self.show_normalized_image:
return
if self.config.mode == "MPIIGaze":
reye = face.reye.normalized_image
leye = face.leye.normalized_image
normalized = np.hstack([reye, leye])
elif self.config.mode in ["MPIIFaceGaze", "ETH-XGaze"]:
normalized = face.normalized_image
else:
raise ValueError

reye = face.reye.normalized_image
leye = face.leye.normalized_image
normalized = np.hstack([reye, leye])

if self.config.demo.use_camera:
normalized = utils.flip_image(normalized)

cv2.imshow("normalized", normalized)

def _draw_gaze_vector(self, face: Face) -> None:
Expand All @@ -291,37 +290,30 @@ def _draw_gaze_vector(self, face: Face) -> None:

length = self.config.demo.gaze_visualization_length

if self.config.mode == "MPIIGaze":
for key in [FacePartsName.REYE, FacePartsName.LEYE]:
eye = getattr(face, key.name.lower())
end_point = eye.center + length * eye.gaze_vector # eye.gaze_vector.z is always -1. We scale by length
self.visualizer.draw_3d_line(eye.center, end_point)
for key in [FacePartsName.REYE, FacePartsName.LEYE]:
eye = getattr(face, key.name.lower())
end_point = eye.center + length * eye.gaze_vector # eye.gaze_vector.z is always -1. We scale by length
self.visualizer.draw_3d_line(eye.center, end_point)

pitch, yaw = np.rad2deg(eye.vector_to_angle(eye.gaze_vector))
logger.info(f"[{key.name.lower()}] pitch: {pitch:.2f}, yaw: {yaw:.2f}")
pitch, yaw = np.rad2deg(eye.vector_to_angle(eye.gaze_vector))
logger.info(f"[{key.name.lower()}] pitch: {pitch:.2f}, yaw: {yaw:.2f}")

average_eye_distance = (face.reye.distance + face.leye.distance) / 2
average_eye_center = (face.reye.center + face.leye.center) / 2
average_gaze_vector = (face.reye.gaze_vector + face.leye.gaze_vector) / 2
average_eye_distance = (face.reye.distance + face.leye.distance) / 2
average_eye_center = (face.reye.center + face.leye.center) / 2
average_gaze_vector = (face.reye.gaze_vector + face.leye.gaze_vector) / 2

end_point = average_eye_center + length * average_gaze_vector
self.visualizer.draw_3d_line(average_eye_center, end_point)
end_point = average_eye_center + length * average_gaze_vector
self.visualizer.draw_3d_line(average_eye_center, end_point)

# Draw the point on the screen the user is looking at
point_on_screen = average_eye_center + (average_eye_distance * 0.9) * average_gaze_vector
point_on_screen[1] *= 0.15 # Scale y-coordinate
# Draw the point on the screen the user is looking at
point_on_screen = average_eye_center + (average_eye_distance * 0.9) * average_gaze_vector
point_on_screen[1] *= 0.15 # Scale y-coordinate

# Update buffer and calculate smoothed point
self.point_buffer.append(point_on_screen)
if len(self.point_buffer) > self.point_on_screen_smoothing_factor:
self.point_buffer.pop(0) # Remove oldest point
# Update buffer and calculate smoothed point
self.point_buffer.append(point_on_screen)
if len(self.point_buffer) > self.point_on_screen_smoothing_factor:
self.point_buffer.pop(0) # Remove oldest point

smoothed_point = np.mean(self.point_buffer, axis=0)
smoothed_point = np.mean(self.point_buffer, axis=0)

self.visualizer.draw_3d_points(np.array([smoothed_point]), color=(0, 255, 0), size=10, clamp_to_screen=True)
elif self.config.mode in ["MPIIFaceGaze", "ETH-XGaze"]:
self.visualizer.draw_3d_line(face.center, face.center + length * face.gaze_vector)
pitch, yaw = np.rad2deg(face.vector_to_angle(face.gaze_vector))
logger.info(f"[face] pitch: {pitch:.2f}, yaw: {yaw:.2f}")
else:
raise ValueError
self.visualizer.draw_3d_points(np.array([smoothed_point]), color=(0, 255, 0), size=10, clamp_to_screen=True)
56 changes: 14 additions & 42 deletions eye_tracking/gaze/gaze_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(self, config: DictConfig):
self.camera, self._normalized_camera, self._config.gaze_estimator.normalized_camera_distance
)
self._gaze_estimation_model = self._load_model()
self._transform = create_transform(config)
self._transform = create_transform()

def _load_model(self) -> torch.nn.Module:
model = create_model(self._config)
Expand All @@ -55,36 +55,32 @@ def detect_faces_raw(self, image: np.ndarray) -> List[np.ndarray]:
def estimate_gaze(self, image: np.ndarray, face: Face) -> None:
self._face_model3d.estimate_head_pose(face, self.camera)
self._face_model3d.compute_3d_pose(face)
self._face_model3d.compute_face_eye_centers(face, self._config.mode)

if self._config.mode == "MPIIGaze":
for key in self.EYE_KEYS:
eye = getattr(face, key.name.lower())
self._head_pose_normalizer.normalize(image, eye)
self._run_mpiigaze_model(face)
elif self._config.mode == "MPIIFaceGaze":
self._head_pose_normalizer.normalize(image, face)
self._run_mpiifacegaze_model(face)
elif self._config.mode == "ETH-XGaze":
self._head_pose_normalizer.normalize(image, face)
self._run_ethxgaze_model(face)
else:
raise ValueError
self._face_model3d.compute_face_eye_centers(face)

for key in self.EYE_KEYS:
eye = getattr(face, key.name.lower())
self._head_pose_normalizer.normalize(image, eye)

self._run_mpiigaze_model(face)

@torch.no_grad()
def _run_mpiigaze_model(self, face: Face) -> None:
images = []
head_poses = []

for key in self.EYE_KEYS:
eye = getattr(face, key.name.lower())
image = eye.normalized_image
normalized_head_pose = eye.normalized_head_rot2d

if key == FacePartsName.REYE:
image = utils.flip_image(image).copy()
normalized_head_pose *= np.array([1, -1])

image = self._transform(image)
images.append(image)
head_poses.append(normalized_head_pose)

images = torch.stack(images)
head_poses = np.array(head_poses).astype(np.float32)
head_poses = torch.from_numpy(head_poses)
Expand All @@ -98,33 +94,9 @@ def _run_mpiigaze_model(self, face: Face) -> None:
for i, key in enumerate(self.EYE_KEYS):
eye = getattr(face, key.name.lower())
eye.normalized_gaze_angles = predictions[i]

if key == FacePartsName.REYE:
eye.normalized_gaze_angles *= np.array([1, -1])

eye.angle_to_vector()
eye.denormalize_gaze_vector()

@torch.no_grad()
def _run_mpiifacegaze_model(self, face: Face) -> None:
image = self._transform(face.normalized_image).unsqueeze(0)

device = torch.device(self._config.device)
image = image.to(device)
prediction = self._gaze_estimation_model(image)
prediction = prediction.cpu().numpy()

face.normalized_gaze_angles = prediction[0]
face.angle_to_vector()
face.denormalize_gaze_vector()

@torch.no_grad()
def _run_ethxgaze_model(self, face: Face) -> None:
image = self._transform(face.normalized_image).unsqueeze(0)

device = torch.device(self._config.device)
image = image.to(device)
prediction = self._gaze_estimation_model(image)
prediction = prediction.cpu().numpy()

face.normalized_gaze_angles = prediction[0]
face.angle_to_vector()
face.denormalize_gaze_vector()
90 changes: 18 additions & 72 deletions eye_tracking/gaze/head_pose_estimation/face_landmark_estimator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from typing import List

import face_alignment
import face_alignment.detection.sfd
import mediapipe
import numpy as np
from omegaconf import DictConfig
Expand All @@ -12,81 +10,21 @@
class LandmarkEstimator:
def __init__(self, config: DictConfig):
self.mode = config.face_detector.mode
if self.mode == "dlib":
raise NotImplementedError("Dlib is not supported for landmark estimation")
elif self.mode == "face_alignment_dlib":
raise NotImplementedError("Dlib is not supported for landmark estimation")
elif self.mode == "face_alignment_sfd":
self.detector = face_alignment.detection.sfd.sfd_detector.SFDDetector(device=config.device)
self.predictor = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False, device=config.device)
elif self.mode == "mediapipe":
self.detector = mediapipe.solutions.face_mesh.FaceMesh(
max_num_faces=config.face_detector.mediapipe_max_num_faces,
static_image_mode=config.face_detector.mediapipe_static_image_mode,
refine_landmarks=True, # Adds eye pupil landmarks (468-477)
)
else:
raise ValueError
self.detector = mediapipe.solutions.face_mesh.FaceMesh(
max_num_faces=config.face_detector.mediapipe_max_num_faces,
static_image_mode=config.face_detector.mediapipe_static_image_mode,
refine_landmarks=True, # Adds eye pupil landmarks (468-477)
)

def detect_faces(self, image: np.ndarray) -> List[Face]:
if self.mode == "dlib":
return self._detect_faces_dlib(image)
elif self.mode == "face_alignment_dlib":
return self._detect_faces_face_alignment_dlib(image)
elif self.mode == "face_alignment_sfd":
return self._detect_faces_face_alignment_sfd(image)
elif self.mode == "mediapipe":
return self._detect_faces_mediapipe(image)
else:
raise ValueError

def detect_faces_raw(self, image: np.ndarray) -> List[np.ndarray]:
if self.mode == "mediapipe":
return self._detect_faces_mediapipe_raw(image)
else:
raise ValueError

def _detect_faces_dlib(self, image: np.ndarray) -> List[Face]:
bboxes = self.detector(self._get_bgr_frame(image), 0)
detected = []
for bbox in bboxes:
predictions = self.predictor(self._get_bgr_frame(image), bbox)
landmarks = np.array([(pt.x, pt.y) for pt in predictions.parts()], dtype=np.float32)
bbox = np.array([[bbox.left(), bbox.top()], [bbox.right(), bbox.bottom()]], dtype=np.float32)
detected.append(Face(bbox, landmarks))
return detected

def _detect_faces_face_alignment_dlib(self, image: np.ndarray) -> List[Face]:
bboxes = self.detector(self._get_bgr_frame(image), 0)
bboxes = [[bbox.left(), bbox.top(), bbox.right(), bbox.bottom()] for bbox in bboxes]
predictions = self.predictor.get_landmarks(self._get_bgr_frame(image), detected_faces=bboxes)
if predictions is None:
predictions = []
detected = []
for bbox, landmarks in zip(bboxes, predictions):
bbox = np.array(bbox, dtype=np.float32).reshape(2, 2)
detected.append(Face(bbox, landmarks))
return detected

def _detect_faces_face_alignment_sfd(self, image: np.ndarray) -> List[Face]:
bboxes = self.detector.detect_from_image(self._get_bgr_frame(image).copy())
bboxes = [bbox[:4] for bbox in bboxes]
predictions = self.predictor.get_landmarks(self._get_bgr_frame(image), detected_faces=bboxes)
if predictions is None:
predictions = []
detected = []
for bbox, landmarks in zip(bboxes, predictions):
bbox = np.array(bbox, dtype=np.float32).reshape(2, 2)
detected.append(Face(bbox, landmarks))
return detected

def _detect_faces_mediapipe(self, image: np.ndarray) -> List[Face]:
"""
Calculated landmarks scaled to the image size with a bounding box
:param image: RGB image
:return: List of faces
"""

h, w = image.shape[:2]
faces_landmarks = self._detect_faces_mediapipe_raw(image)
faces_landmarks = self._detect_faces_raw(image)
detected = []
if faces_landmarks:
for face in faces_landmarks:
Expand All @@ -96,9 +34,17 @@ def _detect_faces_mediapipe(self, image: np.ndarray) -> List[Face]:
detected.append(Face(bbox, pts))
return detected

def _detect_faces_mediapipe_raw(self, image: np.ndarray) -> List[np.ndarray]:
def detect_faces_raw(self, image: np.ndarray) -> List[np.ndarray]:
if self.mode == "mediapipe":
return self._detect_faces_raw(image)
else:
raise ValueError

def _detect_faces_raw(self, image: np.ndarray) -> List[np.ndarray]:
"""
Returns landmarks as they come from the mediapipe model
Returns landmarks as they come from the mediapipe model (not scaled to the image size)
:param image: RGB image
:return: List of faces landmarks
"""
predictions = self.detector.process(self._get_bgr_frame(image))
faces_landmarks = []
Expand Down
Loading