Skip to content

Commit

Permalink
faceID part 2
Browse files Browse the repository at this point in the history
  • Loading branch information
oguzhanbsolak committed Dec 7, 2023
1 parent ccbf10b commit 796754a
Show file tree
Hide file tree
Showing 7 changed files with 147 additions and 140 deletions.
26 changes: 12 additions & 14 deletions ai8x_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,30 +147,28 @@ def __init__(self, in_channels, out_channels, expansion_factor, stride=1, bias=F
if stride == 1:
if depthwise_bias:
self.conv2 = ai8x.FusedConv2dBN(hidden_channels, out_channels, 3,
padding=1, stride=stride,
bias=depthwise_bias, **kwargs)
padding=1, stride=stride,
bias=depthwise_bias, **kwargs)

else:
self.conv2 = ai8x.Conv2d(hidden_channels, out_channels, 3,
padding=1, stride=stride,
bias=depthwise_bias, **kwargs)
padding=1, stride=stride,
bias=depthwise_bias, **kwargs)

else:
if depthwise_bias:
self.conv2 = ai8x.FusedMaxPoolConv2dBN(hidden_channels,
out_channels,
3, padding=1, pool_size=stride,
pool_stride=stride,
bias=depthwise_bias,
**kwargs)
out_channels, 3,
padding=1, pool_size=stride,
pool_stride=stride,
bias=depthwise_bias, **kwargs)

else:
self.conv2 = ai8x.FusedMaxPoolConv2d(hidden_channels,
out_channels,
3, padding=1, pool_size=stride,
pool_stride=stride,
bias=depthwise_bias,
**kwargs)
out_channels, 3,
padding=1, pool_size=stride,
pool_stride=stride,
bias=depthwise_bias, **kwargs)

if (stride == 1) and (in_channels == out_channels):
self.resid = ai8x.Add()
Expand Down
85 changes: 42 additions & 43 deletions datasets/vggface2.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,27 @@
https://ieeexplore.ieee.org/abstract/document/8373813
"""

import os
import glob
import errno
import os
import pickle

import numpy as np
import cv2
from tqdm import tqdm
from PIL import Image

import torch
import torchvision.transforms.functional as FT
from torch.utils.data import Dataset
from torchvision import transforms
import torchvision.transforms.functional as FT

from skimage import transform as trans
from hawk_eyes.face import RetinaFace
import cv2
import kornia.geometry.transform as GT
from hawk_eyes.face import RetinaFace
from PIL import Image
from skimage import transform as trans

from tqdm import tqdm

from utils import augmentation_utils
import ai8x
from utils import augmentation_utils


class VGGFace2(Dataset):
Expand Down Expand Up @@ -63,7 +63,7 @@ def __init__(self, root_dir, d_type, mode, transform=None,
[73.5318, 51.5014],
[56.0252, 71.7366],
[41.5493, 92.3655],
[70.7299, 92.2041] ], dtype=np.float32 )
[70.7299, 92.2041]], dtype=np.float32)

self.__makedir_exist_ok(self.dataset_path)
self.__makedir_exist_ok(os.path.join(self.dataset_path, "processed"))
Expand Down Expand Up @@ -96,12 +96,11 @@ def __init__(self, root_dir, d_type, mode, transform=None,
print(f'Unknown data type: {self.d_type}')
return


def __extract_gt(self):
"""
Extracts the ground truth from the dataset
"""
retina = RetinaFace(model_name='retina_l', conf = 0.5)
retina = RetinaFace(model_name='retina_l', conf=0.5)
img_paths = list(glob.glob(os.path.join(self.d_path + '/**/', '*.jpg'), recursive=True))
nf_number = 0
n_words = 0
Expand All @@ -122,9 +121,8 @@ def __extract_gt(self):

lndmrks = lndmrks[0]


dir_name = os.path.dirname(jpg)
lbl = os.path.relpath(dir_name, self.d_path)
lbl = os.path.relpath(dir_name, self.d_path)

if lbl not in pickle_dict["word2index"]:
pickle_dict["word2index"][lbl] = n_words
Expand All @@ -143,7 +141,6 @@ def __extract_gt(self):
def __len__(self):
return len(self.pickle_dict["img_list"]) - 1


def __getitem__(self, index):
if index >= len(self):
raise IndexError
Expand All @@ -157,7 +154,7 @@ def __getitem__(self, index):
if self.mode == 'identification_dr':
return self.__getitem_identification_dr(index)

#Will never reached
# Will never reached
return None

def __getitem_detection(self, index):
Expand Down Expand Up @@ -196,26 +193,26 @@ def __getitem_identification(self, index):
img = Image.open(os.path.join(self.dataset_path, self.pickle_dict["img_list"][index]))
img_A = img.copy()

#Apply transformation to the image that will be aligned
# Apply transformation to the image that will be aligned
if self.teacher_transform is not None:
img_A = self.teacher_transform(img_A)

#Apply transformation to the image that will be cropped
# Apply transformation to the image that will be cropped
if self.transform is not None:
img = self.transform(img)

#Use landmarks to estimate affine transformation
# Use landmarks to estimate affine transformation
landmark = self.pickle_dict["landmarks"][index]
self.tform.estimate(landmark, self.src)
A = self.tform.params[0:2,:]
A = self.tform.params[0:2, :]
A = torch.as_tensor(A, dtype=torch.float32)
A = A.unsqueeze(0)

#Apply affine transformation to obtain aligned image
# Apply affine transformation to obtain aligned image
img_A = GT.warp_affine(img_A.unsqueeze(0), A, (self.img_size[0],self.img_size[1]))
img_A = img_A.squeeze(0)

#Convert bounding box to square
# Convert bounding box to square
height = box[3] - box[1]
width = box[2] - box[0]
max_dim = max(height, width)
Expand All @@ -224,11 +221,11 @@ def __getitem_identification(self, index):
box[2] = np.clip(box[2] + (max_dim - width) / 2, 0, img.shape[2])
box[3] = np.clip(box[3] + (max_dim - height) / 2, 0, img.shape[1])

#Crop image with the square bounding box
img_C = FT.crop(img= img, top=int(box[1]), left=int(box[0]),
# Crop image with the square bounding box
img_C = FT.crop(img=img, top=int(box[1]), left=int(box[0]),
height=int(box[3]-box[1]), width=int(box[2]-box[0]))

#Check if the cropped image is square, if not, pad it
# Check if the cropped image is square, if not, pad it
_, h, w = img_C.shape
if w != h:
max_dim = max(w, h)
Expand All @@ -241,10 +238,10 @@ def __getitem_identification(self, index):
padding = (int(l_pad), int(t_pad), int(r_pad), int(b_pad))
img_C = FT.pad(img_C, padding, 0, 'constant')

#Resize cropped image to the desired size
# Resize cropped image to the desired size
img_C = FT.resize(img_C, (self.img_size[0], self.img_size[1]))

#Concatenate images
# Concatenate images
concat_img = torch.cat((img_C, img_A), 0)

return concat_img, lbl_index
Expand All @@ -259,24 +256,23 @@ def __getitem_identification_dr(self, index):
lbl_index = torch.tensor(lbl_index, dtype=torch.long)
img = Image.open(os.path.join(self.dataset_path, self.pickle_dict["img_list"][index]))

#Apply transformation to the image that will be aligned
# Apply transformation to the image that will be aligned
if self.transform is not None:
img = self.transform(img)

#Use landmarks to estimate affine transformation
# Use landmarks to estimate affine transformation
landmark = self.pickle_dict["landmarks"][index]
self.tform.estimate(landmark, self.src)
A = self.tform.params[0:2,:]
A = self.tform.params[0:2, :]
A = torch.as_tensor(A, dtype=torch.float32)
A = A.unsqueeze(0)

#Apply affine transformation to obtain aligned image
img = GT.warp_affine(img.unsqueeze(0), A, (self.img_size[0],self.img_size[1]))
# Apply affine transformation to obtain aligned image
img = GT.warp_affine(img.unsqueeze(0), A, (self.img_size[0], self.img_size[1]))
img = img.squeeze(0)

return img, lbl_index


@staticmethod
def __makedir_exist_ok(dirpath):
"""Make directory if not already exists
Expand Down Expand Up @@ -308,6 +304,7 @@ def collate_fn(batch):
images = torch.stack(images, dim=0)
return images, boxes_and_labels


def VGGFace2_FaceID_get_datasets(data, load_train=True, load_test=True, img_size=(112, 112)):

""" Returns FaceID Dataset
Expand All @@ -317,11 +314,11 @@ def VGGFace2_FaceID_get_datasets(data, load_train=True, load_test=True, img_size
train_transform = transforms.Compose([
transforms.ToTensor(),
transforms.RandomHorizontalFlip(p=0.5),
transforms.ColorJitter(brightness=(0.6,1.4),
saturation=(0.6,1.4),contrast=(0.6,1.4),hue=(-0.4,0.4)),
transforms.ColorJitter(brightness=(0.6, 1.4), saturation=(0.6, 1.4),
contrast=(0.6, 1.4), hue=(-0.4, 0.4)),
transforms.RandomErasing(p=0.1),
ai8x.normalize(args=args)
])
])

teacher_transform = transforms.Compose([
transforms.ToTensor(),
Expand All @@ -330,8 +327,8 @@ def VGGFace2_FaceID_get_datasets(data, load_train=True, load_test=True, img_size
if load_train:

train_dataset = VGGFace2(root_dir=data_dir, d_type='train', mode='identification',
transform=train_transform, teacher_transform=teacher_transform,
img_size=img_size)
transform=train_transform, teacher_transform=teacher_transform,
img_size=img_size)

print(f'Train dataset length: {len(train_dataset)}\n')
else:
Expand All @@ -342,15 +339,16 @@ def VGGFace2_FaceID_get_datasets(data, load_train=True, load_test=True, img_size
ai8x.normalize(args=args)])

test_dataset = VGGFace2(root_dir=data_dir, d_type='test', mode='identification',
transform=test_transform, teacher_transform=teacher_transform,
img_size=img_size)
transform=test_transform, teacher_transform=teacher_transform,
img_size=img_size)

print(f'Test dataset length: {len(test_dataset)}\n')
else:
test_dataset = None

return train_dataset, test_dataset


def VGGFace2_FaceID_dr_get_datasets(data, load_train=True, load_test=True, img_size=(112, 112)):

""" Returns FaceID Dataset for dimensionality reduction
Expand All @@ -361,7 +359,7 @@ def VGGFace2_FaceID_dr_get_datasets(data, load_train=True, load_test=True, img_s
transforms.ToTensor(),
transforms.RandomHorizontalFlip(p=0.5),
ai8x.normalize(args=args)
])
])

if load_train:

Expand All @@ -385,6 +383,7 @@ def VGGFace2_FaceID_dr_get_datasets(data, load_train=True, load_test=True, img_s

return train_dataset, test_dataset


def VGGFace2_Facedet_get_datasets(data, load_train=True, load_test=True, img_size=(224, 168)):

""" Returns FaceDetection Dataset
Expand All @@ -393,8 +392,7 @@ def VGGFace2_Facedet_get_datasets(data, load_train=True, load_test=True, img_siz

if load_train:
train_transform = transforms.Compose([
ai8x.normalize(args=args)
])
ai8x.normalize(args=args)])

train_dataset = VGGFace2(root_dir=data_dir, d_type='train', mode='detection',
transform=train_transform, img_size=img_size)
Expand All @@ -415,6 +413,7 @@ def VGGFace2_Facedet_get_datasets(data, load_train=True, load_test=True, img_siz

return train_dataset, test_dataset


datasets = [
{
'name': 'VGGFace2_FaceID',
Expand Down
Loading

0 comments on commit 796754a

Please sign in to comment.