diff --git a/ai8x_blocks.py b/ai8x_blocks.py
index 53c6b7980..fc2b314c7 100644
--- a/ai8x_blocks.py
+++ b/ai8x_blocks.py
@@ -147,30 +147,28 @@ def __init__(self, in_channels, out_channels, expansion_factor, stride=1, bias=F
         if stride == 1:
             if depthwise_bias:
                 self.conv2 = ai8x.FusedConv2dBN(hidden_channels, out_channels, 3,
-                                                             padding=1, stride=stride,
-                                                             bias=depthwise_bias, **kwargs)
+                                                padding=1, stride=stride,
+                                                bias=depthwise_bias, **kwargs)
 
             else:
                 self.conv2 = ai8x.Conv2d(hidden_channels, out_channels, 3,
-                                                           padding=1, stride=stride,
-                                                           bias=depthwise_bias, **kwargs)
+                                         padding=1, stride=stride,
+                                         bias=depthwise_bias, **kwargs)
 
         else:
             if depthwise_bias:
                 self.conv2 = ai8x.FusedMaxPoolConv2dBN(hidden_channels,
-                                                                    out_channels,
-                                                                    3, padding=1, pool_size=stride,
-                                                                    pool_stride=stride,
-                                                                    bias=depthwise_bias,
-                                                                    **kwargs)
+                                                       out_channels, 3,
+                                                       padding=1, pool_size=stride,
+                                                       pool_stride=stride,
+                                                       bias=depthwise_bias, **kwargs)
 
             else:
                 self.conv2 = ai8x.FusedMaxPoolConv2d(hidden_channels,
-                                                                  out_channels,
-                                                                  3, padding=1, pool_size=stride,
-                                                                  pool_stride=stride,
-                                                                  bias=depthwise_bias,
-                                                                  **kwargs)
+                                                     out_channels, 3,
+                                                     padding=1, pool_size=stride,
+                                                     pool_stride=stride,
+                                                     bias=depthwise_bias, **kwargs)
 
         if (stride == 1) and (in_channels == out_channels):
             self.resid = ai8x.Add()
diff --git a/datasets/vggface2.py b/datasets/vggface2.py
index cbbf87898..f7aba0a00 100644
--- a/datasets/vggface2.py
+++ b/datasets/vggface2.py
@@ -11,27 +11,27 @@
 https://ieeexplore.ieee.org/abstract/document/8373813
 """
 
-import os
 import glob
 import errno
+import os
 import pickle
 
 import numpy as np
-import cv2
-from tqdm import tqdm
-from PIL import Image
-
 import torch
+import torchvision.transforms.functional as FT
 from torch.utils.data import Dataset
 from torchvision import transforms
-import torchvision.transforms.functional as FT
 
-from skimage import transform as trans
-from hawk_eyes.face import RetinaFace
+import cv2
 import kornia.geometry.transform as GT
+from hawk_eyes.face import RetinaFace
+from PIL import Image
+from skimage import transform as trans
+
+from tqdm import tqdm
 
-from utils import augmentation_utils
 import ai8x
+from utils import augmentation_utils
 
 
 class VGGFace2(Dataset):
@@ -63,7 +63,7 @@ def __init__(self, root_dir, d_type, mode, transform=None,
                             [73.5318, 51.5014],
                             [56.0252, 71.7366],
                             [41.5493, 92.3655],
-                            [70.7299, 92.2041] ], dtype=np.float32 )
+                            [70.7299, 92.2041]], dtype=np.float32)
 
         self.__makedir_exist_ok(self.dataset_path)
         self.__makedir_exist_ok(os.path.join(self.dataset_path, "processed"))
@@ -96,12 +96,11 @@ def __init__(self, root_dir, d_type, mode, transform=None,
             print(f'Unknown data type: {self.d_type}')
             return
 
-
     def __extract_gt(self):
         """
         Extracts the ground truth from the dataset
         """
-        retina = RetinaFace(model_name='retina_l', conf = 0.5)
+        retina = RetinaFace(model_name='retina_l', conf=0.5)
         img_paths = list(glob.glob(os.path.join(self.d_path + '/**/', '*.jpg'), recursive=True))
         nf_number = 0
         n_words = 0
@@ -122,9 +121,8 @@ def __extract_gt(self):
 
             lndmrks = lndmrks[0]
 
-
             dir_name = os.path.dirname(jpg)
-            lbl =  os.path.relpath(dir_name, self.d_path)
+            lbl = os.path.relpath(dir_name, self.d_path)
 
             if lbl not in pickle_dict["word2index"]:
                 pickle_dict["word2index"][lbl] = n_words
@@ -143,7 +141,6 @@ def __extract_gt(self):
     def __len__(self):
         return len(self.pickle_dict["img_list"]) - 1
 
-
     def __getitem__(self, index):
         if index >= len(self):
             raise IndexError
@@ -157,7 +154,7 @@ def __getitem__(self, index):
         if self.mode == 'identification_dr':
             return self.__getitem_identification_dr(index)
 
-        #Will never reached
+        # Will never reached
         return None
 
     def __getitem_detection(self, index):
@@ -196,26 +193,26 @@ def __getitem_identification(self, index):
         img = Image.open(os.path.join(self.dataset_path, self.pickle_dict["img_list"][index]))
         img_A = img.copy()
 
-        #Apply transformation to the image that will be aligned
+        # Apply transformation to the image that will be aligned
         if self.teacher_transform is not None:
             img_A = self.teacher_transform(img_A)
 
-        #Apply transformation to the image that will be cropped
+        # Apply transformation to the image that will be cropped
         if self.transform is not None:
             img = self.transform(img)
 
-        #Use landmarks to estimate affine transformation
+        # Use landmarks to estimate affine transformation
         landmark = self.pickle_dict["landmarks"][index]
         self.tform.estimate(landmark, self.src)
-        A = self.tform.params[0:2,:]
+        A = self.tform.params[0:2, :]
         A = torch.as_tensor(A, dtype=torch.float32)
         A = A.unsqueeze(0)
 
-        #Apply affine transformation to obtain aligned image
+        # Apply affine transformation to obtain aligned image
         img_A = GT.warp_affine(img_A.unsqueeze(0), A, (self.img_size[0],self.img_size[1]))
         img_A = img_A.squeeze(0)
 
-        #Convert bounding box to square
+        # Convert bounding box to square
         height = box[3] - box[1]
         width = box[2] - box[0]
         max_dim = max(height, width)
@@ -224,11 +221,11 @@ def __getitem_identification(self, index):
         box[2] = np.clip(box[2] + (max_dim - width) / 2, 0, img.shape[2])
         box[3] = np.clip(box[3] + (max_dim - height) / 2, 0, img.shape[1])
 
-        #Crop image with the square bounding box
-        img_C = FT.crop(img= img, top=int(box[1]), left=int(box[0]),
+        # Crop image with the square bounding box
+        img_C = FT.crop(img=img, top=int(box[1]), left=int(box[0]),
                         height=int(box[3]-box[1]), width=int(box[2]-box[0]))
 
-        #Check if the cropped image is square, if not, pad it
+        # Check if the cropped image is square, if not, pad it
         _, h, w = img_C.shape
         if w != h:
             max_dim = max(w, h)
@@ -241,10 +238,10 @@ def __getitem_identification(self, index):
             padding = (int(l_pad), int(t_pad), int(r_pad), int(b_pad))
             img_C = FT.pad(img_C, padding, 0, 'constant')
 
-        #Resize cropped image to the desired size
+        # Resize cropped image to the desired size
         img_C = FT.resize(img_C, (self.img_size[0], self.img_size[1]))
 
-        #Concatenate images
+        # Concatenate images
         concat_img = torch.cat((img_C, img_A), 0)
 
         return concat_img, lbl_index
@@ -259,24 +256,23 @@ def __getitem_identification_dr(self, index):
         lbl_index = torch.tensor(lbl_index, dtype=torch.long)
         img = Image.open(os.path.join(self.dataset_path, self.pickle_dict["img_list"][index]))
 
-        #Apply transformation to the image that will be aligned
+        # Apply transformation to the image that will be aligned
         if self.transform is not None:
             img = self.transform(img)
 
-        #Use landmarks to estimate affine transformation
+        # Use landmarks to estimate affine transformation
         landmark = self.pickle_dict["landmarks"][index]
         self.tform.estimate(landmark, self.src)
-        A = self.tform.params[0:2,:]
+        A = self.tform.params[0:2, :]
         A = torch.as_tensor(A, dtype=torch.float32)
         A = A.unsqueeze(0)
 
-        #Apply affine transformation to obtain aligned image
-        img = GT.warp_affine(img.unsqueeze(0), A, (self.img_size[0],self.img_size[1]))
+        # Apply affine transformation to obtain aligned image
+        img = GT.warp_affine(img.unsqueeze(0), A, (self.img_size[0], self.img_size[1]))
         img = img.squeeze(0)
 
         return img, lbl_index
 
-
     @staticmethod
     def __makedir_exist_ok(dirpath):
         """Make directory if not already exists
@@ -308,6 +304,7 @@ def collate_fn(batch):
         images = torch.stack(images, dim=0)
         return images, boxes_and_labels
 
+
 def VGGFace2_FaceID_get_datasets(data, load_train=True, load_test=True, img_size=(112, 112)):
 
     """ Returns FaceID Dataset
@@ -317,11 +314,11 @@ def VGGFace2_FaceID_get_datasets(data, load_train=True, load_test=True, img_size
     train_transform = transforms.Compose([
         transforms.ToTensor(),
         transforms.RandomHorizontalFlip(p=0.5),
-        transforms.ColorJitter(brightness=(0.6,1.4),
-                               saturation=(0.6,1.4),contrast=(0.6,1.4),hue=(-0.4,0.4)),
+        transforms.ColorJitter(brightness=(0.6, 1.4), saturation=(0.6, 1.4),
+                               contrast=(0.6, 1.4), hue=(-0.4, 0.4)),
         transforms.RandomErasing(p=0.1),
         ai8x.normalize(args=args)
-    ])
+])
 
     teacher_transform = transforms.Compose([
         transforms.ToTensor(),
@@ -330,8 +327,8 @@ def VGGFace2_FaceID_get_datasets(data, load_train=True, load_test=True, img_size
     if load_train:
 
         train_dataset = VGGFace2(root_dir=data_dir, d_type='train', mode='identification',
-                                transform=train_transform, teacher_transform=teacher_transform,
-                                img_size=img_size)
+                                 transform=train_transform, teacher_transform=teacher_transform,
+                                 img_size=img_size)
 
         print(f'Train dataset length: {len(train_dataset)}\n')
     else:
@@ -342,8 +339,8 @@ def VGGFace2_FaceID_get_datasets(data, load_train=True, load_test=True, img_size
                                             ai8x.normalize(args=args)])
 
         test_dataset = VGGFace2(root_dir=data_dir, d_type='test', mode='identification',
-                               transform=test_transform, teacher_transform=teacher_transform,
-                               img_size=img_size)
+                                transform=test_transform, teacher_transform=teacher_transform,
+                                img_size=img_size)
 
         print(f'Test dataset length: {len(test_dataset)}\n')
     else:
@@ -351,6 +348,7 @@ def VGGFace2_FaceID_get_datasets(data, load_train=True, load_test=True, img_size
 
     return train_dataset, test_dataset
 
+
 def VGGFace2_FaceID_dr_get_datasets(data, load_train=True, load_test=True, img_size=(112, 112)):
 
     """ Returns FaceID Dataset for dimensionality reduction
@@ -361,7 +359,7 @@ def VGGFace2_FaceID_dr_get_datasets(data, load_train=True, load_test=True, img_s
         transforms.ToTensor(),
         transforms.RandomHorizontalFlip(p=0.5),
         ai8x.normalize(args=args)
-    ])
+])
 
     if load_train:
 
@@ -385,6 +383,7 @@ def VGGFace2_FaceID_dr_get_datasets(data, load_train=True, load_test=True, img_s
 
     return train_dataset, test_dataset
 
+
 def VGGFace2_Facedet_get_datasets(data, load_train=True, load_test=True, img_size=(224, 168)):
 
     """ Returns FaceDetection Dataset
@@ -393,8 +392,7 @@ def VGGFace2_Facedet_get_datasets(data, load_train=True, load_test=True, img_siz
 
     if load_train:
         train_transform = transforms.Compose([
-            ai8x.normalize(args=args)
-        ])
+            ai8x.normalize(args=args)])
 
         train_dataset = VGGFace2(root_dir=data_dir, d_type='train', mode='detection',
                                  transform=train_transform, img_size=img_size)
@@ -415,6 +413,7 @@ def VGGFace2_Facedet_get_datasets(data, load_train=True, load_test=True, img_siz
 
     return train_dataset, test_dataset
 
+
 datasets = [
     {
        'name': 'VGGFace2_FaceID',
diff --git a/models/ai85net-faceid_112.py b/models/ai85net-faceid_112.py
index 7b8fcfb1f..2875b0608 100644
--- a/models/ai85net-faceid_112.py
+++ b/models/ai85net-faceid_112.py
@@ -29,8 +29,8 @@ def __init__(  # pylint: disable=too-many-arguments
             bottleneck_settings,
             last_layer_width,
             emb_dimensionality,
-            num_classes=None, # pylint: disable=unused-argument
-            avg_pool_size=(7,7),
+            num_classes=None,  # pylint: disable=unused-argument
+            avg_pool_size=(7, 7),
             num_channels=3,
             dimensions=(112, 112),  # pylint: disable=unused-argument
             bias=False,
@@ -41,8 +41,8 @@ def __init__(  # pylint: disable=too-many-arguments
         super().__init__()
         # bias = False due to streaming
         self.pre_stage = ai8x.FusedConv2dReLU(num_channels, bottleneck_settings[0][1], 3,
-                                                padding=1, stride=pre_layer_stride,
-                                                bias=False, **kwargs)
+                                              padding=1, stride=pre_layer_stride,
+                                              bias=False, **kwargs)
         # bias = False due to streaming
         self.pre_stage_2 = ai8x.FusedMaxPoolConv2dReLU(bottleneck_settings[0][1],
                                                        bottleneck_settings[0][1], 3, padding=1,
@@ -57,11 +57,10 @@ def __init__(  # pylint: disable=too-many-arguments
                                                padding=0, stride=1, bias=False, **kwargs)
 
         self.pre_avg = ai8x.Conv2d(last_layer_width, last_layer_width, 3, padding=1, stride=1,
-                                   bias = False, **kwargs)
+                                   bias=False, **kwargs)
         self.avg_pool = ai8x.AvgPool2d(avg_pool_size, stride=1)
         self.linear = ai8x.Linear(last_layer_width, emb_dimensionality, bias=bias, **kwargs)
 
-
     def _create_bottleneck_stage(self, setting, bias, depthwise_bias,
                                  reduced_depthwise_bias, **kwargs):
         """Function to create bottlencek stage. Setting format is:
@@ -71,21 +70,21 @@ def _create_bottleneck_stage(self, setting, bias, depthwise_bias,
 
         if setting[0] > 0:
             stage.append(ai8x_blocks.ConvResidualBottleneck(in_channels=setting[1],
-                                                        out_channels=setting[2],
-                                                        stride=setting[3],
-                                                        expansion_factor=setting[4],
-                                                        bias=bias, depthwise_bias=depthwise_bias,
-                                                        **kwargs))
+                                                            out_channels=setting[2],
+                                                            stride=setting[3],
+                                                            expansion_factor=setting[4], bias=bias,
+                                                            depthwise_bias=depthwise_bias,
+                                                            **kwargs))
 
             for i in range(1, setting[0]):
                 if reduced_depthwise_bias:
                     stage.append(ai8x_blocks.ConvResidualBottleneck(in_channels=setting[2],
-                                                                out_channels=setting[2],
-                                                                stride=1,
-                                                                expansion_factor=setting[4],
-                                                                bias=bias,
-                                                                depthwise_bias=(i%2==0) and
-                                                                depthwise_bias, **kwargs))
+                                                                    out_channels=setting[2],
+                                                                    stride=1,
+                                                                    expansion_factor=setting[4],
+                                                                    bias=bias,
+                                                                    depthwise_bias=(i % 2==0) and
+                                                                    depthwise_bias, **kwargs))
                 else:
                     stage.append(ai8x_blocks.ConvResidualBottleneck(in_channels=setting[2],
                                                                 out_channels=setting[2],
@@ -100,7 +99,7 @@ def _create_bottleneck_stage(self, setting, bias, depthwise_bias,
     def forward(self, x):  # pylint: disable=arguments-differ
         """Forward prop"""
         if x.shape[1] == 6:
-            x = x[:,0:3,:,:]
+            x = x[:, 0:3, :, :]
         x = self.pre_stage(x)
         x = self.pre_stage_2(x)
         for stage in self.feature_stage:
@@ -113,6 +112,7 @@ def forward(self, x):  # pylint: disable=arguments-differ
         x = F.normalize(x, p=2, dim=1)
         return x
 
+
 def ai85faceidnet_112(pretrained=False, **kwargs):
     """
     Constructs a FaceIDNet_112 model.
@@ -130,7 +130,8 @@ def ai85faceidnet_112(pretrained=False, **kwargs):
 
     return AI85FaceIDNet_112(pre_layer_stride=1, bottleneck_settings=bottleneck_settings,
                              last_layer_width=128, emb_dimensionality=64, avg_pool_size=(7,7),
-                             depthwise_bias=True, reduced_depthwise_bias = True, **kwargs)
+                             depthwise_bias=True, reduced_depthwise_bias=True, **kwargs)
+
 
 models = [
     {
diff --git a/models/ai87net-mobilefacenet_112.py b/models/ai87net-mobilefacenet_112.py
index fec0b1473..b522283eb 100644
--- a/models/ai87net-mobilefacenet_112.py
+++ b/models/ai87net-mobilefacenet_112.py
@@ -19,6 +19,7 @@
 import ai8x
 import ai8x_blocks
 
+
 class AI87MobileFaceNet(nn.Module):
     """
     MobileFaceNet for MAX78002
@@ -29,8 +30,8 @@ def __init__(  # pylint: disable=too-many-arguments
             bottleneck_settings,
             last_layer_width,
             emb_dimensionality,
-            num_classes=None, # pylint: disable=unused-argument
-            avg_pool_size=(7,7),
+            num_classes=None,  # pylint: disable=unused-argument
+            avg_pool_size=(7, 7),
             num_channels=3,
             dimensions=(112, 112),  # pylint: disable=unused-argument
             bias=False,
@@ -42,12 +43,12 @@ def __init__(  # pylint: disable=too-many-arguments
 
         # bias = False due to streaming
         self.pre_stage = ai8x.FusedConv2dReLU(num_channels, bottleneck_settings[0][1], 3,
-                                                padding=1, stride=pre_layer_stride,
-                                                bias=False, **kwargs)
+                                              padding=1, stride=pre_layer_stride,
+                                              bias=False, **kwargs)
 
         self.dwise = ai8x.FusedMaxPoolDepthwiseConv2dReLU(64, 64, 3, padding=1, stride=1,
-                                                            pool_size=2, pool_stride=2,
-                                                             bias=depthwise_bias, **kwargs)
+                                                          pool_size=2, pool_stride=2,
+                                                          bias=depthwise_bias, **kwargs)
         self.feature_stage = nn.ModuleList([])
         for setting in bottleneck_settings:
             self._create_bottleneck_stage(setting, bias, depthwise_bias,
@@ -82,7 +83,7 @@ def _create_bottleneck_stage(self, setting, bias, depthwise_bias,
                                                                 stride=1,
                                                                 expansion_factor=setting[4],
                                                                 bias=bias,
-                                                                depthwise_bias=(i%2==0) and
+                                                                depthwise_bias=(i % 2==0) and
                                                                 depthwise_bias, **kwargs))
                 else:
                     stage.append(ai8x_blocks.ResidualBottleneck(in_channels=setting[2],
@@ -98,7 +99,7 @@ def _create_bottleneck_stage(self, setting, bias, depthwise_bias,
     def forward(self, x):  # pylint: disable=arguments-differ
         """Forward prop"""
         if x.shape[1] == 6:
-            x = x[:,0:3,:,:]
+            x = x[:, 0:3, :, :]
         x = self.pre_stage(x)
         x = self.dwise(x)
         for stage in self.feature_stage:
@@ -109,6 +110,7 @@ def forward(self, x):  # pylint: disable=arguments-differ
         x = x.squeeze()
         return x
 
+
 def ai87netmobilefacenet_112(pretrained=False, **kwargs):
     """
     Constructs a MobileFaceNet model.
@@ -125,8 +127,10 @@ def ai87netmobilefacenet_112(pretrained=False, **kwargs):
     ]
 
     return AI87MobileFaceNet(pre_layer_stride=1, bottleneck_settings=bottleneck_settings,
-                             last_layer_width=128, emb_dimensionality=64, avg_pool_size=(7,7),
-                             depthwise_bias=True, reduced_depthwise_bias = True, **kwargs)
+                             last_layer_width=128, emb_dimensionality=64, avg_pool_size=(7, 7),
+                             depthwise_bias=True, reduced_depthwise_bias=True, **kwargs)
+
+
 models = [
     {
         'name': 'ai87netmobilefacenet_112',
diff --git a/models/model_irse_DRL.py b/models/model_irse_DRL.py
index d29889045..a25ba52e1 100644
--- a/models/model_irse_DRL.py
+++ b/models/model_irse_DRL.py
@@ -1,26 +1,26 @@
 ###################################################################################################
 #
-#MIT License
-
-#Copyright (c) 2019 Jian Zhao
-
-#Permission is hereby granted, free of charge, to any person obtaining a copy
-#of this software and associated documentation files (the "Software"), to deal
-#in the Software without restriction, including without limitation the rights
-#to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-#copies of the Software, and to permit persons to whom the Software is
-#furnished to do so, subject to the following conditions:
-
-#The above copyright notice and this permission notice shall be included in all
-#copies or substantial portions of the Software.
-
-#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-#FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-#AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-#LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-#OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-#SOFTWARE.
+# MIT License
+
+# Copyright (c) 2019 Jian Zhao
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
 #
 ###################################################################################################
 #
@@ -54,10 +54,10 @@ def __init__(
     ):
         super().__init__()
         self.conv1 = nn.Conv1d(512, 512, 1, padding=0, bias=bias, **kwargs)
-        self.BN1 =  nn.BatchNorm1d(512)
+        self.BN1 = nn.BatchNorm1d(512)
         self.PRelu1 = nn.PReLU(512)
         self.conv2 = nn.Conv1d(512, dimensionality, 1, padding=0, bias=bias, **kwargs)
-        self.BN2 =  nn.BatchNorm1d(dimensionality)
+        self.BN2 = nn.BatchNorm1d(dimensionality)
 
     def forward(self, x):  # pylint: disable=arguments-differ
         """Forward prop"""
@@ -86,16 +86,17 @@ def forward(self, x):
         if x.shape[1] == 6:
             if not self.Teacher_mode:
                 self.Teacher_mode=True
-            x = x[:,3: ,:,:]
+            x = x[:, 3:, :, :]
             x_flip = FT.hflip(x)
             x = torch.cat((x, x_flip), 0)
         x = self.resnet(x)
         x = self.DRL(x)
         if self.Teacher_mode:
-            x = x[:x.shape[0]//2] + x[x.shape[0]//2:] #Flip fusion
+            x = x[:x.shape[0]//2] + x[x.shape[0]//2:]   # Flip fusion
         x = F.normalize(x, p=2, dim=1)
         return x
 
+
 class Flatten(nn.Module):
     """Flattens the input"""
     def forward(self, x):
@@ -245,21 +246,21 @@ def __init__(self, input_size, num_layers, mode='ir'):
         elif mode == 'ir_se':
             unit_module = bottleneck_IR_SE
         self.input_layer = nn.Sequential(nn.Conv2d(3, 64, (3, 3), 1, 1, bias=False),
-                                      nn.BatchNorm2d(64),
-                                      nn.PReLU(64))
+                                         nn.BatchNorm2d(64),
+                                         nn.PReLU(64))
         if input_size[0] == 112:
             # Dropout is set to 0, due to the train.py structure
             self.output_layer = nn.Sequential(nn.BatchNorm2d(512),
-                                           nn.Dropout(p=0),
-                                           Flatten(),
-                                           nn.Linear(512 * 7 * 7, 512),
-                                           nn.BatchNorm1d(512))
+                                              nn.Dropout(p=0),
+                                              Flatten(),
+                                              nn.Linear(512 * 7 * 7, 512),
+                                              nn.BatchNorm1d(512))
         else:
             self.output_layer = nn.Sequential(nn.BatchNorm2d(512),
-                                           nn.Dropout(p=0),
-                                           Flatten(),
-                                           nn.Linear(512 * 14 * 14, 512),
-                                           nn.BatchNorm1d(512))
+                                              nn.Dropout(p=0),
+                                              Flatten(),
+                                              nn.Linear(512 * 14 * 14, 512),
+                                              nn.BatchNorm1d(512))
 
         modules = []
         for block in blocks:
@@ -298,7 +299,7 @@ def _initialize_weights(self):
                     m.bias.data.zero_()
 
 
-def ir_50(input_size=(112, 112), dimensionality=64, backbone_checkpoint = None, **kwargs):
+def ir_50(input_size=(112, 112), dimensionality=64, backbone_checkpoint=None, **kwargs):
     """Constructs a ir-50 model.
     """
     model = Backbone(input_size, 50, 'ir')
@@ -312,7 +313,7 @@ def ir_50(input_size=(112, 112), dimensionality=64, backbone_checkpoint = None,
     return ensemble
 
 
-def ir_101(input_size=(112, 112), dimensionality=64, backbone_checkpoint = None, **kwargs):
+def ir_101(input_size=(112, 112), dimensionality=64, backbone_checkpoint=None, **kwargs):
     """Constructs a ir-101 model.
     """
     model = Backbone(input_size, 100, 'ir')
@@ -326,7 +327,7 @@ def ir_101(input_size=(112, 112), dimensionality=64, backbone_checkpoint = None,
     return ensemble
 
 
-def ir_152(input_size=(112, 112), dimensionality=64, backbone_checkpoint = None, **kwargs):
+def ir_152(input_size=(112, 112), dimensionality=64, backbone_checkpoint=None, **kwargs):
     """Constructs a ir-152 model.
     """
     model = Backbone(input_size, 152, 'ir')
@@ -341,7 +342,7 @@ def ir_152(input_size=(112, 112), dimensionality=64, backbone_checkpoint = None,
     return ensemble
 
 
-def ir_se_50(input_size=(112, 112), dimensionality=64, backbone_checkpoint = None, **kwargs):
+def ir_se_50(input_size=(112, 112), dimensionality=64, backbone_checkpoint=None, **kwargs):
     """Constructs a ir_se-50 model.
     """
     model = Backbone(input_size, 50, 'ir_se')
@@ -355,7 +356,7 @@ def ir_se_50(input_size=(112, 112), dimensionality=64, backbone_checkpoint = Non
     return ensemble
 
 
-def ir_se_101(input_size=(112, 112), dimensionality=64, backbone_checkpoint = None, **kwargs):
+def ir_se_101(input_size=(112, 112), dimensionality=64, backbone_checkpoint=None, **kwargs):
     """Constructs a ir_se-101 model.
     """
     model = Backbone(input_size, 100, 'ir_se')
@@ -369,7 +370,7 @@ def ir_se_101(input_size=(112, 112), dimensionality=64, backbone_checkpoint = No
     return ensemble
 
 
-def ir_se_152(input_size=(112, 112), dimensionality=64, backbone_checkpoint = None, **kwargs):
+def ir_se_152(input_size=(112, 112), dimensionality=64, backbone_checkpoint=None, **kwargs):
     """Constructs a ir_se-152 model.
     """
     model = Backbone(input_size, 152, 'ir_se')
@@ -382,6 +383,7 @@ def ir_se_152(input_size=(112, 112), dimensionality=64, backbone_checkpoint = No
 
     return ensemble
 
+
 models = [
     {
         'name': 'ir_50',
diff --git a/parsecmd.py b/parsecmd.py
index 293e300f0..6f0690dbb 100644
--- a/parsecmd.py
+++ b/parsecmd.py
@@ -71,10 +71,10 @@ def get_parser(model_names, dataset_names):
                         help='Embedding dimensionality for dimensionality'
                              'reduction (default: None)')
     parser.add_argument('--scaf-margin', default=28.6,
-                                type=float, help='Margin hyperparameter'
-                                                 'for Sub-center ArcFace Loss')
+                        type=float, help='Margin hyperparameter'
+                                         'for Sub-center ArcFace Loss')
     parser.add_argument('--scaf-scale', default=64,
-                                type=int, help='Scale hyperparameter for Sub-center ArcFace Loss')
+                        type=int, help='Scale hyperparameter for Sub-center ArcFace Loss')
     parser.add_argument('--backbone-checkpoint', type=str, default=None, metavar='PATH',
                         help='path to checkpoint from which to load'
                              'backbone weights (default: None)')
@@ -108,8 +108,8 @@ def get_parser(model_names, dataset_names):
                                 type=float, metavar='LR', help='initial learning rate')
     optimizer_args.add_argument('--scaf-lr', default=1e-4,
                                 type=float, metavar='SCAF_LR',
-                                  help='initial learning rate for Sub-center'
-                                       'ArcFace Loss optimizer')
+                                help='initial learning rate for Sub-center'
+                                     'ArcFace Loss optimizer')
     optimizer_args.add_argument('--momentum', default=0.9, type=float,
                                 metavar='M', help='momentum')
     optimizer_args.add_argument('--weight-decay', '--wd', default=1e-4, type=float,
diff --git a/train.py b/train.py
index 6653b4154..6c18295bc 100644
--- a/train.py
+++ b/train.py
@@ -397,8 +397,9 @@ def main():
     elif args.dr:
 
         criterion = pml_losses.SubCenterArcFaceLoss(num_classes=args.num_classes,
-                                                    embedding_size=args.dr, margin=
-                                                    args.scaf_margin, scale=args.scaf_scale)
+                                                    embedding_size=args.dr,
+                                                    margin=args.scaf_margin,
+                                                    scale=args.scaf_scale)
         if args.resumed_checkpoint_path:
             checkpoint = torch.load(args.resumed_checkpoint_path,
                                     map_location=lambda storage, loc: storage)
@@ -414,7 +415,6 @@ def main():
         accuracy_calculator = AccuracyCalculator(knn_func=custom_knn,
                                                  include=("precision_at_1",), k=1)
 
-
     else:
         if not args.regression:
             if 'weight' in selected_source:
@@ -587,7 +587,7 @@ def main():
         # Train for one epoch
         with collectors_context(activations_collectors["train"]) as collectors:
             train(train_loader, model, criterion, optimizer, epoch, compression_scheduler,
-                loggers=all_loggers, args=args,  loss_optimizer = loss_optimizer)
+                  loggers=all_loggers, args=args, loss_optimizer=loss_optimizer)
 
             # distiller.log_weights_sparsity(model, epoch, loggers=all_loggers)
             distiller.log_activation_statistics(epoch, "train", loggers=all_tbloggers,
@@ -617,7 +617,7 @@ def main():
             with collectors_context(activations_collectors["valid"]) as collectors:
                 if not args.dr:
                     top1, top5, vloss, mAP = validate(val_loader, model, criterion, [pylogger],
-                                                    args, epoch, tflogger)
+                                                      args, epoch, tflogger)
                 else:
                     top1, top5, vloss, mAP = scaf_test(val_loader, model, accuracy_calculator)
                 distiller.log_activation_statistics(epoch, "valid", loggers=all_tbloggers,
@@ -703,7 +703,7 @@ def create_model(supported_models, dimensions, args, mode='default'):
             raise RuntimeError("Model " + args.kd_teacher + " not found\n")
 
     if args.dr:
-        if not 'dr' in module or not module['dr']:
+        if 'dr' not in module or not module['dr']:
             raise ValueError("Dimensionality reduction is not supported for this model")
 
     # Set model parameters
@@ -777,7 +777,7 @@ def create_nas_kd_policy(model, compression_scheduler, epoch, next_state_start_e
 
 
 def train(train_loader, model, criterion, optimizer, epoch,
-          compression_scheduler, loggers, args, loss_optimizer = None):
+          compression_scheduler, loggers, args, loss_optimizer=None):
     """Training loop for one epoch."""
     losses = OrderedDict([(OVERALL_LOSS_KEY, tnt.AverageValueMeter()),
                           (OBJECTIVE_LOSS_KEY, tnt.AverageValueMeter())])
@@ -998,11 +998,13 @@ def update_bn_stats(train_loader, model, args):
         inputs, target = inputs.to(args.device), target.to(args.device)
         _ = model(inputs)
 
+
 def get_all_embeddings(dataset, model):
     """Get all embeddings from the test set"""
     tester = testers.BaseTester()
     return tester.get_all_embeddings(dataset, model)
 
+
 def scaf_test(val_loader, model, accuracy_calculator):
     """Perform test for SCAF"""
     test_embeddings, test_labels = get_all_embeddings(val_loader.dataset, model)
@@ -1011,7 +1013,8 @@ def scaf_test(val_loader, model, accuracy_calculator):
         test_embeddings, test_embeddings, test_labels, test_labels, True
     )
     msglogger.info('Test set accuracy (Precision@1) = %f', accuracies['precision_at_1'])
-    return accuracies["precision_at_1"], 0 , 0 , 0
+    return accuracies["precision_at_1"], 0, 0, 0
+
 
 def validate(val_loader, model, criterion, loggers, args, epoch=-1, tflogger=None):
     """Model validation"""