Skip to content

Commit

Permalink
Add utilities and example for fine tuning.
Browse files Browse the repository at this point in the history
  • Loading branch information
timesler committed Aug 17, 2019
1 parent 03f5dfe commit 27918a7
Show file tree
Hide file tree
Showing 6 changed files with 455 additions and 9 deletions.
1 change: 1 addition & 0 deletions __init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .models.inception_resnet_v1 import InceptionResnetV1
from .models.mtcnn import MTCNN, PNet, RNet, ONet, prewhiten
from .models.utils.detect_face import extract_face
from .models.utils import training
320 changes: 320 additions & 0 deletions examples/train.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,320 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Face detection and recognition training pipeline\n",
"\n",
"The following example illustrates how to use the `facenet_pytorch` python package to perform face detection and recogition on an image dataset using an Inception Resnet V1 pretrained on the VGGFace2 dataset.\n",
"\n",
"The following Pytorch methods are included:\n",
"* Datasets\n",
"* Dataloaders\n",
"* GPU/CPU processing"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from facenet_pytorch import MTCNN, InceptionResnetV1, prewhiten, training\n",
"import torch\n",
"from torch.utils.data import DataLoader, SubsetRandomSampler\n",
"from torch import optim\n",
"from torch.optim.lr_scheduler import MultiStepLR\n",
"from torchvision import datasets, transforms\n",
"import numpy as np\n",
"import pandas as pd\n",
"import multiprocessing as mp\n",
"import os"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Define run parameters"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"data_dir = '../../../data/vggface2/train'\n",
"batch_size = 16\n",
"epochs = 15"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Determine if an nvidia GPU is available"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Running on device: cpu\n"
]
}
],
"source": [
"device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')\n",
"print('Running on device: {}'.format(device))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Define MTCNN module\n",
"\n",
"Default params shown for illustration, but not needed. Note that, since MTCNN is a collection of neural nets and other code, the device must be passed in the following way to enable copying of objects when needed internally.\n",
"\n",
"See `help(MTCNN)` for more details."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"mtcnn = MTCNN(\n",
" image_size=160, margin=0, min_face_size=20,\n",
" thresholds=[0.6, 0.7, 0.7], factor=0.709, prewhiten=True,\n",
" device=device\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Perfom MTCNN facial detection\n",
"\n",
"Iterate through the DataLoader object and obtained cropped faces."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Images processed: 6353 of 6353"
]
}
],
"source": [
"dataset = datasets.ImageFolder(data_dir)\n",
"dataset.idx_to_class = {i:c for c, i in dataset.class_to_idx.items()}\n",
"loader = DataLoader(dataset, collate_fn=lambda x: x[0], num_workers=mp.cpu_count(), shuffle=False)\n",
"\n",
"for i, (x, y) in enumerate(loader):\n",
" print(f'\\rImages processed: {i + 1} of {len(loader)}', end='')\n",
" save_dir = os.path.join(data_dir + '_cropped', dataset.idx_to_class[y])\n",
" os.makedirs(save_dir, exist_ok=True)\n",
" filename = f'{len(os.listdir(save_dir)):05n}.png'\n",
" mtcnn(x, save_path=os.path.join(save_dir, filename))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Define Inception Resnet V1 module\n",
"\n",
"Set classify=True for classifier.\n",
"\n",
"See `help(InceptionResnetV1)` for more details."
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"resnet = InceptionResnetV1(\n",
" pretrained='vggface2',\n",
" classify=True,\n",
" num_classes=len(dataset.class_to_idx)\n",
").to(device)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Define optimizer, scheduler, dataset, and dataloader"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"optimizer = optim.Adam(resnet.parameters(), lr=0.001)\n",
"scheduler = MultiStepLR(optimizer, [5, 10])\n",
"\n",
"trans = transforms.Compose([\n",
" np.float32,\n",
" transforms.ToTensor(),\n",
" prewhiten\n",
"])\n",
"dataset = datasets.ImageFolder(data_dir + '_cropped', transform=trans)\n",
"img_inds = np.arange(len(dataset))\n",
"np.random.shuffle(img_inds)\n",
"train_inds = img_inds[:int(0.8 * len(img_inds))]\n",
"val_inds = img_inds[int(0.8 * len(img_inds)):]\n",
"\n",
"train_loader = DataLoader(\n",
" dataset,\n",
" num_workers=mp.cpu_count(),\n",
" batch_size=batch_size,\n",
" sampler=SubsetRandomSampler(train_inds)\n",
")\n",
"val_loader = DataLoader(\n",
" dataset,\n",
" num_workers=mp.cpu_count(),\n",
" batch_size=batch_size,\n",
" sampler=SubsetRandomSampler(val_inds)\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Define loss and evaluation functions"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"loss_fn = torch.nn.CrossEntropyLoss()\n",
"metrics = {\n",
" 'fps': training.BatchTimer(),\n",
" 'acc': training.accuracy\n",
"}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Train model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"Initial\n",
"----------\n",
"Eval | 80/80 | loss: 2.9421 | fps: 7.6358 | acc: 0.0602 \n",
"\n",
"\n",
"Epoch 1/15\n",
"----------\n",
"Train | 317/317 | loss: 1.9690 | fps: 2.4324 | acc: 0.5260 \n",
"Eval | 80/80 | loss: 1.4802 | fps: 8.2792 | acc: 0.5591 \n",
"\n",
"\n",
"Epoch 2/15\n",
"----------\n",
"Train | 317/317 | loss: 1.0367 | fps: 2.4487 | acc: 0.7467 \n",
"Eval | 80/80 | loss: 0.8572 | fps: 8.0474 | acc: 0.7799 \n",
"\n",
"\n",
"Epoch 3/15\n",
"----------\n",
"Train | 124/317 | loss: 0.6837 | fps: 2.4360 | acc: 0.8362 "
]
}
],
"source": [
"print(f'\\n\\nInitial')\n",
"print('-' * 10)\n",
"resnet.eval()\n",
"training.pass_epoch(\n",
" resnet, loss_fn, val_loader,\n",
" batch_metrics=metrics, show_running=True, device=device\n",
")\n",
"\n",
"for epoch in range(epochs):\n",
" print(f'\\n\\nEpoch {epoch + 1}/{epochs}')\n",
" print('-' * 10)\n",
"\n",
" resnet.train()\n",
" training.pass_epoch(\n",
" resnet, loss_fn, train_loader, optimizer, scheduler,\n",
" batch_metrics=metrics, show_running=True, device=device\n",
" )\n",
"\n",
" resnet.eval()\n",
" training.pass_epoch(\n",
" resnet, loss_fn, val_loader,\n",
" batch_metrics=metrics, show_running=True, device=device\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
16 changes: 10 additions & 6 deletions models/inception_resnet_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,9 @@ class InceptionResnetV1(nn.Module):
(default: {None})
classify {bool} -- Whether the model should output classification probabilities or feature
embeddings. (default: {False})
num_classes {int} -- Number of output classes. Ignored if 'pretrained' is set, in which
case the number of classes is set to that used for training. (default: {1001})
num_classes {int} -- Number of output classes. Ignored if 'pretrained' is set, and
num_classes not equal to that used for the pretrained model, the final linear layer
will be randomly initialized. (default: {1001})
"""
def __init__(self, pretrained=None, classify=False, num_classes=1001):
super().__init__()
Expand All @@ -202,10 +203,11 @@ def __init__(self, pretrained=None, classify=False, num_classes=1001):
self.classify = classify
self.num_classes = num_classes

tmp_classes = self.num_classes
if pretrained == 'vggface2':
self.num_classes = 8631
tmp_classes = 8631
elif pretrained == 'casia-webface':
self.num_classes = 10575
tmp_classes = 10575

# Define layers
self.conv2d_1a = BasicConv2d(3, 32, kernel_size=3, stride=2)
Expand Down Expand Up @@ -248,11 +250,14 @@ def __init__(self, pretrained=None, classify=False, num_classes=1001):
self.last_linear = nn.Linear(1792, 512, bias=False)
self.last_bn = nn.BatchNorm1d(512, eps=0.001, momentum=0.1, affine=True)

self.logits = nn.Linear(512, self.num_classes)
self.logits = nn.Linear(512, tmp_classes)
self.softmax = nn.Softmax(dim=1)

if pretrained is not None:
load_weights(self, pretrained)

if self.num_classes != tmp_classes:
self.logits = nn.Linear(512, self.num_classes)

def forward(self, x):
"""Calculate embeddings or probabilities given a batch of input image tensors.
Expand Down Expand Up @@ -282,7 +287,6 @@ def forward(self, x):
x = F.normalize(x, p=2, dim=1)
if self.classify:
x = self.logits(x)
x = self.softmax(x)
return x


Expand Down
Loading

0 comments on commit 27918a7

Please sign in to comment.