faceID part 2

oguzhanbsolak · Dec 7, 2023 · 796754a · 796754a
1 parent ccbf10b
commit 796754a
Show file tree

Hide file tree

Showing 7 changed files with 147 additions and 140 deletions.
diff --git a/ai8x_blocks.py b/ai8x_blocks.py
@@ -147,30 +147,28 @@ def __init__(self, in_channels, out_channels, expansion_factor, stride=1, bias=F
         if stride == 1:
             if depthwise_bias:
                 self.conv2 = ai8x.FusedConv2dBN(hidden_channels, out_channels, 3,
-                                                             padding=1, stride=stride,
-                                                             bias=depthwise_bias, **kwargs)
+                                                padding=1, stride=stride,
+                                                bias=depthwise_bias, **kwargs)
 
             else:
                 self.conv2 = ai8x.Conv2d(hidden_channels, out_channels, 3,
-                                                           padding=1, stride=stride,
-                                                           bias=depthwise_bias, **kwargs)
+                                         padding=1, stride=stride,
+                                         bias=depthwise_bias, **kwargs)
 
         else:
             if depthwise_bias:
                 self.conv2 = ai8x.FusedMaxPoolConv2dBN(hidden_channels,
-                                                                    out_channels,
-                                                                    3, padding=1, pool_size=stride,
-                                                                    pool_stride=stride,
-                                                                    bias=depthwise_bias,
-                                                                    **kwargs)
+                                                       out_channels, 3,
+                                                       padding=1, pool_size=stride,
+                                                       pool_stride=stride,
+                                                       bias=depthwise_bias, **kwargs)
 
             else:
                 self.conv2 = ai8x.FusedMaxPoolConv2d(hidden_channels,
-                                                                  out_channels,
-                                                                  3, padding=1, pool_size=stride,
-                                                                  pool_stride=stride,
-                                                                  bias=depthwise_bias,
-                                                                  **kwargs)
+                                                     out_channels, 3,
+                                                     padding=1, pool_size=stride,
+                                                     pool_stride=stride,
+                                                     bias=depthwise_bias, **kwargs)
 
         if (stride == 1) and (in_channels == out_channels):
             self.resid = ai8x.Add()

diff --git a/datasets/vggface2.py b/datasets/vggface2.py
@@ -11,27 +11,27 @@
 https://ieeexplore.ieee.org/abstract/document/8373813
 """
 
-import os
 import glob
 import errno
+import os
 import pickle
 
 import numpy as np
-import cv2
-from tqdm import tqdm
-from PIL import Image
-
 import torch
+import torchvision.transforms.functional as FT
 from torch.utils.data import Dataset
 from torchvision import transforms
-import torchvision.transforms.functional as FT
 
-from skimage import transform as trans
-from hawk_eyes.face import RetinaFace
+import cv2
 import kornia.geometry.transform as GT
+from hawk_eyes.face import RetinaFace
+from PIL import Image
+from skimage import transform as trans
+
+from tqdm import tqdm
 
-from utils import augmentation_utils
 import ai8x
+from utils import augmentation_utils
 
 
 class VGGFace2(Dataset):
@@ -63,7 +63,7 @@ def __init__(self, root_dir, d_type, mode, transform=None,
                             [73.5318, 51.5014],
                             [56.0252, 71.7366],
                             [41.5493, 92.3655],
-                            [70.7299, 92.2041] ], dtype=np.float32 )
+                            [70.7299, 92.2041]], dtype=np.float32)
 
         self.__makedir_exist_ok(self.dataset_path)
         self.__makedir_exist_ok(os.path.join(self.dataset_path, "processed"))
@@ -96,12 +96,11 @@ def __init__(self, root_dir, d_type, mode, transform=None,
             print(f'Unknown data type: {self.d_type}')
             return
 
-
     def __extract_gt(self):
         """
         Extracts the ground truth from the dataset
         """
-        retina = RetinaFace(model_name='retina_l', conf = 0.5)
+        retina = RetinaFace(model_name='retina_l', conf=0.5)
         img_paths = list(glob.glob(os.path.join(self.d_path + '/**/', '*.jpg'), recursive=True))
         nf_number = 0
         n_words = 0
@@ -122,9 +121,8 @@ def __extract_gt(self):
 
             lndmrks = lndmrks[0]
 
-
             dir_name = os.path.dirname(jpg)
-            lbl =  os.path.relpath(dir_name, self.d_path)
+            lbl = os.path.relpath(dir_name, self.d_path)
 
             if lbl not in pickle_dict["word2index"]:
                 pickle_dict["word2index"][lbl] = n_words
@@ -143,7 +141,6 @@ def __extract_gt(self):
     def __len__(self):
         return len(self.pickle_dict["img_list"]) - 1
 
-
     def __getitem__(self, index):
         if index >= len(self):
             raise IndexError
@@ -157,7 +154,7 @@ def __getitem__(self, index):
         if self.mode == 'identification_dr':
             return self.__getitem_identification_dr(index)
 
-        #Will never reached
+        # Will never reached
         return None
 
     def __getitem_detection(self, index):
@@ -196,26 +193,26 @@ def __getitem_identification(self, index):
         img = Image.open(os.path.join(self.dataset_path, self.pickle_dict["img_list"][index]))
         img_A = img.copy()
 
-        #Apply transformation to the image that will be aligned
+        # Apply transformation to the image that will be aligned
         if self.teacher_transform is not None:
             img_A = self.teacher_transform(img_A)
 
-        #Apply transformation to the image that will be cropped
+        # Apply transformation to the image that will be cropped
         if self.transform is not None:
             img = self.transform(img)
 
-        #Use landmarks to estimate affine transformation
+        # Use landmarks to estimate affine transformation
         landmark = self.pickle_dict["landmarks"][index]
         self.tform.estimate(landmark, self.src)
-        A = self.tform.params[0:2,:]
+        A = self.tform.params[0:2, :]
         A = torch.as_tensor(A, dtype=torch.float32)
         A = A.unsqueeze(0)
 
-        #Apply affine transformation to obtain aligned image
+        # Apply affine transformation to obtain aligned image
         img_A = GT.warp_affine(img_A.unsqueeze(0), A, (self.img_size[0],self.img_size[1]))
         img_A = img_A.squeeze(0)
 
-        #Convert bounding box to square
+        # Convert bounding box to square
         height = box[3] - box[1]
         width = box[2] - box[0]
         max_dim = max(height, width)
@@ -224,11 +221,11 @@ def __getitem_identification(self, index):
         box[2] = np.clip(box[2] + (max_dim - width) / 2, 0, img.shape[2])
         box[3] = np.clip(box[3] + (max_dim - height) / 2, 0, img.shape[1])
 
-        #Crop image with the square bounding box
-        img_C = FT.crop(img= img, top=int(box[1]), left=int(box[0]),
+        # Crop image with the square bounding box
+        img_C = FT.crop(img=img, top=int(box[1]), left=int(box[0]),
                         height=int(box[3]-box[1]), width=int(box[2]-box[0]))
 
-        #Check if the cropped image is square, if not, pad it
+        # Check if the cropped image is square, if not, pad it
         _, h, w = img_C.shape
         if w != h:
             max_dim = max(w, h)
@@ -241,10 +238,10 @@ def __getitem_identification(self, index):
             padding = (int(l_pad), int(t_pad), int(r_pad), int(b_pad))
             img_C = FT.pad(img_C, padding, 0, 'constant')
 
-        #Resize cropped image to the desired size
+        # Resize cropped image to the desired size
         img_C = FT.resize(img_C, (self.img_size[0], self.img_size[1]))
 
-        #Concatenate images
+        # Concatenate images
         concat_img = torch.cat((img_C, img_A), 0)
 
         return concat_img, lbl_index
@@ -259,24 +256,23 @@ def __getitem_identification_dr(self, index):
         lbl_index = torch.tensor(lbl_index, dtype=torch.long)
         img = Image.open(os.path.join(self.dataset_path, self.pickle_dict["img_list"][index]))
 
-        #Apply transformation to the image that will be aligned
+        # Apply transformation to the image that will be aligned
         if self.transform is not None:
             img = self.transform(img)
 
-        #Use landmarks to estimate affine transformation
+        # Use landmarks to estimate affine transformation
         landmark = self.pickle_dict["landmarks"][index]
         self.tform.estimate(landmark, self.src)
-        A = self.tform.params[0:2,:]
+        A = self.tform.params[0:2, :]
         A = torch.as_tensor(A, dtype=torch.float32)
         A = A.unsqueeze(0)
 
-        #Apply affine transformation to obtain aligned image
-        img = GT.warp_affine(img.unsqueeze(0), A, (self.img_size[0],self.img_size[1]))
+        # Apply affine transformation to obtain aligned image
+        img = GT.warp_affine(img.unsqueeze(0), A, (self.img_size[0], self.img_size[1]))
         img = img.squeeze(0)
 
         return img, lbl_index
 
-
     @staticmethod
     def __makedir_exist_ok(dirpath):
         """Make directory if not already exists
@@ -308,6 +304,7 @@ def collate_fn(batch):
         images = torch.stack(images, dim=0)
         return images, boxes_and_labels
 
+
 def VGGFace2_FaceID_get_datasets(data, load_train=True, load_test=True, img_size=(112, 112)):
 
     """ Returns FaceID Dataset
@@ -317,11 +314,11 @@ def VGGFace2_FaceID_get_datasets(data, load_train=True, load_test=True, img_size
     train_transform = transforms.Compose([
         transforms.ToTensor(),
         transforms.RandomHorizontalFlip(p=0.5),
-        transforms.ColorJitter(brightness=(0.6,1.4),
-                               saturation=(0.6,1.4),contrast=(0.6,1.4),hue=(-0.4,0.4)),
+        transforms.ColorJitter(brightness=(0.6, 1.4), saturation=(0.6, 1.4),
+                               contrast=(0.6, 1.4), hue=(-0.4, 0.4)),
         transforms.RandomErasing(p=0.1),
         ai8x.normalize(args=args)
-    ])
+])
 
     teacher_transform = transforms.Compose([
         transforms.ToTensor(),
@@ -330,8 +327,8 @@ def VGGFace2_FaceID_get_datasets(data, load_train=True, load_test=True, img_size
     if load_train:
 
         train_dataset = VGGFace2(root_dir=data_dir, d_type='train', mode='identification',
-                                transform=train_transform, teacher_transform=teacher_transform,
-                                img_size=img_size)
+                                 transform=train_transform, teacher_transform=teacher_transform,
+                                 img_size=img_size)
 
         print(f'Train dataset length: {len(train_dataset)}\n')
     else:
@@ -342,15 +339,16 @@ def VGGFace2_FaceID_get_datasets(data, load_train=True, load_test=True, img_size
                                             ai8x.normalize(args=args)])
 
         test_dataset = VGGFace2(root_dir=data_dir, d_type='test', mode='identification',
-                               transform=test_transform, teacher_transform=teacher_transform,
-                               img_size=img_size)
+                                transform=test_transform, teacher_transform=teacher_transform,
+                                img_size=img_size)
 
         print(f'Test dataset length: {len(test_dataset)}\n')
     else:
         test_dataset = None
 
     return train_dataset, test_dataset
 
+
 def VGGFace2_FaceID_dr_get_datasets(data, load_train=True, load_test=True, img_size=(112, 112)):
 
     """ Returns FaceID Dataset for dimensionality reduction
@@ -361,7 +359,7 @@ def VGGFace2_FaceID_dr_get_datasets(data, load_train=True, load_test=True, img_s
         transforms.ToTensor(),
         transforms.RandomHorizontalFlip(p=0.5),
         ai8x.normalize(args=args)
-    ])
+])
 
     if load_train:
 
@@ -385,6 +383,7 @@ def VGGFace2_FaceID_dr_get_datasets(data, load_train=True, load_test=True, img_s
 
     return train_dataset, test_dataset
 
+
 def VGGFace2_Facedet_get_datasets(data, load_train=True, load_test=True, img_size=(224, 168)):
 
     """ Returns FaceDetection Dataset
@@ -393,8 +392,7 @@ def VGGFace2_Facedet_get_datasets(data, load_train=True, load_test=True, img_siz
 
     if load_train:
         train_transform = transforms.Compose([
-            ai8x.normalize(args=args)
-        ])
+            ai8x.normalize(args=args)])
 
         train_dataset = VGGFace2(root_dir=data_dir, d_type='train', mode='detection',
                                  transform=train_transform, img_size=img_size)
@@ -415,6 +413,7 @@ def VGGFace2_Facedet_get_datasets(data, load_train=True, load_test=True, img_siz
 
     return train_dataset, test_dataset
 
+
 datasets = [
     {
        'name': 'VGGFace2_FaceID',