diff --git a/data/scripts/Face_dataset_scripts/FDDB/FDDB.png b/data/scripts/Face_dataset_scripts/FDDB/FDDB.png new file mode 100644 index 000000000..66a895d68 Binary files /dev/null and b/data/scripts/Face_dataset_scripts/FDDB/FDDB.png differ diff --git a/data/scripts/Face_dataset_scripts/FDDB/README.md b/data/scripts/Face_dataset_scripts/FDDB/README.md new file mode 100644 index 000000000..2ce761cb6 --- /dev/null +++ b/data/scripts/Face_dataset_scripts/FDDB/README.md @@ -0,0 +1,50 @@ +# FDDB_DataSet_4_faster_rcnn + +## Step1: get datas from FDDB +``` +./get_data.sh +``` +this should downloads originalPics.tar.gz(~500MB), and FDDB-folds.tgz from FDDB +and Checksum test ,if pass -> unzip tar.gz into originalPics directory. + +if link fails : download from FDDB website +http://vis-www.cs.umass.edu/fddb/ + + +if always checksum fails : + +``` +wget http://tamaraberg.com/faceDataset/originalPics.tar.gz +wget http://vis-www.cs.umass.edu/fddb/FDDB-folds.tgz + +mkdir originalPics; +tar -C originalPics -zxf originalPics.tar.gz +tar -C originalPics -zxf FDDB-folds.tgz +``` + + +## Step2: create data set that can be used in pyfaster-rcnn +``` +cd pyxml; +./runit.sh; +``` +this will create FDDB_2010 directory +and JPEGImages/Annotation directory inside FDDB_2010 + +runit.sh contains: +``` +python anno2xml.py FDDB-fold-01-ellipseList.txt; +python anno2xml.py FDDB-fold-02-ellipseList.txt; +python anno2xml.py FDDB-fold-03-ellipseList.txt; +.... +``` +which FDDB-fold-01-ellipseList.txt were in the originalPics/FDDB-folds +you can check if the list are the same (01~10) + + +## Step3: using labelImg to test if create properly +you can use labelImg to see if it deals properly(This is god dame awesome) +https://github.com/tzutalin/labelImg +![alt tag](https://raw.githubusercontent.com/penolove/FDDB_DataSet_4_faster_rcnn/master/FDDB.png) + + diff --git a/data/scripts/Face_dataset_scripts/FDDB/get_data.sh b/data/scripts/Face_dataset_scripts/FDDB/get_data.sh new file mode 100755 index 000000000..0df89670f --- /dev/null +++ b/data/scripts/Face_dataset_scripts/FDDB/get_data.sh @@ -0,0 +1,48 @@ +ori_CheckSum=cf414253ac596cd858daae0cc321d793 +folds_CheckSum=4cf9badc939a3398a0d6f3a3c8540f55 +if [ -d originalPics ]; +then + echo "[FDDB] originalPics dir alreday exist"; +else + # ---- download originalPics.tar.gz ---- + FILE=originalPics.tar.gz + #if file not exist + if [ ! -f $FILE ]; then + echo "[FDDB] Downloading originalPics.tar.gz ....." + wget http://tamaraberg.com/faceDataset/originalPics.tar.gz + fi + + checksum=`md5sum $FILE | awk '{ print $1 }'` + if [ ! "$checksum" = "$ori_CheckSum" ]; then + rm $File + echo $checksum + echo $folds_CheckSum + echo "[FDDB] file $FILE : checksum error , need to rerun the script"; + exit 1; + fi + + # --------------------------------------- + + # ---- download FDDB.tgz ---- + FILE=FDDB-folds.tgz + #if file not exist + if [ ! -f $FILE ]; then + echo "[FDDB] Downloading FDDB-folds.tgz ....." + wget http://vis-www.cs.umass.edu/fddb/FDDB-folds.tgz + fi + + checksum=`md5sum $FILE | awk '{ print $1 }'` + if [ ! "$checksum" = "$folds_CheckSum" ]; then + rm $FILE + echo $checksum + echo $folds_CheckSum + echo "[FDDB] file $FILE : checksum error , need to rerun the script"; + exit 1; + fi + + echo "[FDDB] Making originalPics , uncompress files ..." + mkdir originalPics; + tar -C originalPics -zxf originalPics.tar.gz + tar -C originalPics -zxf FDDB-folds.tgz + +fi; diff --git a/data/scripts/Face_dataset_scripts/FDDB/makefile b/data/scripts/Face_dataset_scripts/FDDB/makefile new file mode 100644 index 000000000..e60dfb947 --- /dev/null +++ b/data/scripts/Face_dataset_scripts/FDDB/makefile @@ -0,0 +1,5 @@ +clean: + echo "time to clean produced dirs/files" + rm -rf originalPics/ + rm FDDB-folds.tgz + rm originalPics.tar.gz diff --git a/data/scripts/Face_dataset_scripts/FDDB/pyxml/anno2xml.py b/data/scripts/Face_dataset_scripts/FDDB/pyxml/anno2xml.py new file mode 100644 index 000000000..5f4fc7a73 --- /dev/null +++ b/data/scripts/Face_dataset_scripts/FDDB/pyxml/anno2xml.py @@ -0,0 +1,190 @@ + +from lxml import etree +import sys +import cv2 +import math +import glob +import os.path + + +# target_check exist +target_dir=os.path.join(os.getcwd(), 'FDDB_2010') +target_dir_Jpg=os.path.join(target_dir,'JPEGImages') +target_dir_Ana=os.path.join(target_dir,'Annotations') + +if not os.path.exists(target_dir): + os.makedirs(target_dir) + +if not os.path.exists(target_dir_Jpg): + os.makedirs(target_dir_Jpg) + +if not os.path.exists(target_dir_Ana): + os.makedirs(target_dir_Ana) + +target_dir_Jpg_set=os.path.join(target_dir_Jpg,'*.jpg') +cur_ind=0 +outfileID=len(glob.glob(target_dir_Jpg_set)) + + +def img2xml(path,objects,shape): + root = etree.Element("annotation") + folder = etree.SubElement(root, "folder") + filename = etree.SubElement(root, "filename") + source = etree.SubElement(root, "source") + databases = etree.SubElement(source, "database") + + folder.text = "VOC2007" + filename.text = str(path).zfill(6) + databases.text = "FDDB" + + size = etree.SubElement(root, "size") + width = etree.SubElement(size,"width") + height = etree.SubElement(size,"height") + depth = etree.SubElement(size,"depth") + depth.text = str(shape[2]) + width.text = str(shape[1]) + height.text = str(shape[0]) + + obj_count=0 + for obj in objects: + #object + obj=[float(i) for i in obj.split()] + #the smallest circumscribed parallelogram + #[link] https://github.com/nouiz/lisa_emotiw/blob/master/emotiw/common/datasets/faces/FDDB.py + maj_rad = obj[0] + min_rad = obj[1] + angle = obj[2] + xcenter = obj[3] + ycenter = obj[4] + cosin = math.cos(math.radians(-angle)) + sin = math.sin(math.radians(-angle)) + + x1 = cosin * (-min_rad) - sin * (-maj_rad) + xcenter + y1 = sin * (-min_rad) + cosin * (-maj_rad) + ycenter + x2 = cosin * (min_rad) - sin * (-maj_rad) + xcenter + y2 = sin * (min_rad) + cosin * (-maj_rad) + ycenter + x3 = cosin * (min_rad) - sin * (maj_rad) + xcenter + y3 = sin * (min_rad) + cosin * (maj_rad) + ycenter + x4 = cosin * (-min_rad) - sin * (maj_rad) + xcenter + y4 = sin * (-min_rad) + cosin * (maj_rad) + ycenter + wid=[x1,x2,x3,x4] + hei=[y1,y2,y3,y4] + xmin_ = int(min(wid)) + xmax_ = int(max(wid)) + ymin_ = int(min(hei)) + ymax_ = int(max(hei)) + + # check if out of box + if(xmin_ >0 and ymin_>0 and xmax_0: + et = etree.ElementTree(root) + Ana_write2xml = os.path.join(target_dir_Ana, path+".xml") + et.write( Ana_write2xml, pretty_print=True) + return True + else: + return False + +def face_box_wh(path,objects,shape): + obj_count=0 + wh=list() + for obj in objects: + #object + obj=[float(i) for i in obj.split()] + #the smallest circumscribed parallelogram + #[link] https://github.com/nouiz/lisa_emotiw/blob/master/emotiw/common/datasets/faces/FDDB.py + maj_rad = obj[0] + min_rad = obj[1] + angle = obj[2] + xcenter = obj[3] + ycenter = obj[4] + cosin = math.cos(math.radians(-angle)) + sin = math.sin(math.radians(-angle)) + + x1 = cosin * (-min_rad) - sin * (-maj_rad) + xcenter + y1 = sin * (-min_rad) + cosin * (-maj_rad) + ycenter + x2 = cosin * (min_rad) - sin * (-maj_rad) + xcenter + y2 = sin * (min_rad) + cosin * (-maj_rad) + ycenter + x3 = cosin * (min_rad) - sin * (maj_rad) + xcenter + y3 = sin * (min_rad) + cosin * (maj_rad) + ycenter + x4 = cosin * (-min_rad) - sin * (maj_rad) + xcenter + y4 = sin * (-min_rad) + cosin * (maj_rad) + ycenter + wid=[x1,x2,x3,x4] + hei=[y1,y2,y3,y4] + xmin_ = int(min(wid)) + xmax_ = int(max(wid)) + ymin_ = int(min(hei)) + ymax_ = int(max(hei)) + + # check if out of box + if(xmin_ >0 and ymin_>0 and xmax_0: + return wh + else: + return list() + +# the annotation files path +FDDB_folds=os.path.join("..",'originalPics','FDDB-folds') +originalPics_folds=os.path.join("..",'originalPics') + +if __name__=="__main__": + # you need to modify the path_img below + # and the FDDB-fold-were assign by your own + if len(sys.argv) < 2: + ellipseList=os.path.join(FDDB_folds,'FDDB-fold-01-ellipseList.txt') + elif len(sys.argv)==2: + ellipseList=os.path.join(FDDB_folds,sys.argv[1]) + else: + print "usage : python example.py [ellipseList]" + sys.exit(0) + + current_file=open(ellipseList,'r') + image_with_target=[i.replace('\n','') for i in current_file.readlines()] + current_file.close() + + while (cur_ind0 and ymin_>0 and xmax_0: + et = etree.ElementTree(root) + xml_output_path = os.path.join(target_dir_Ana,path+".xml") + et.write(xml_output_path, pretty_print=True) + return True + else: + return False + + +# the annotation files path +originalPics_folds=os.path.join("..","WIDER_train","WIDER_train","images") + +if __name__=="__main__": + # you need to modify the path_img below + # and the FDDB-fold-were assign by your own + if len(sys.argv) < 2: + file_path='xywhXfile.txt' + elif len(sys.argv)==2: + file_path=sys.argv[1] + else: + print "usage : python example.py [ellipseList]" + current_file=open(file_path,'r') + image_with_target=[i.replace('\n','') for i in current_file.readlines()] + while (cur_ind xywhXfile.txt; + +echo "[WIDER] Since WIDER is a larger dataset, it may take for a while" +echo "[WIDER] Createing WIDER_2016 fold, Images , Annotation xmls..." + +start=`date +%s` +python anno2xml.py +end=`date +%s` + +runtime=$((end-start)) + +rm xywhXfile.txt +echo "[WIDER] totally it takes :" $runtime "seconds" + +#creating training testing set +python randomSet.py WIDER_2016 0.9 diff --git a/data/scripts/Face_dataset_scripts/WIDER/zip_img.png b/data/scripts/Face_dataset_scripts/WIDER/zip_img.png new file mode 100644 index 000000000..d4ceb1a1a Binary files /dev/null and b/data/scripts/Face_dataset_scripts/WIDER/zip_img.png differ diff --git a/data/scripts/get_FaceData.sh b/data/scripts/get_FaceData.sh new file mode 100755 index 000000000..6d2bc6a09 --- /dev/null +++ b/data/scripts/get_FaceData.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# + +case "$1" in + "") echo "Not given dataste , using FDDB..." + dataset="FDDB"; + SetFolder="FDDB_2010"; + ;; + "FDDB") echo "using FDDB ..." + dataset="FDDB"; + SetFolder="FDDB_2010"; + ;; + "WIDER") echo "using WIDER ..." + dataset="WIDER" + SetFolder="WIDER_2016" + ;; + *) echo "dataset $1 unknown" + exit 1; + ;; +esac + + +# check folder exist or not +cwd=$PWD +DIRECTORY="$cwd/data/FacesDevkit2017" +DIRECTORY_FDDB="$cwd/data/FacesDevkit2017/$SetFolder" + +if [ -d "$cwd/data/scripts/Face_dataset_scripts/$dataset" ]; then + if [ ! -d "$DIRECTORY" ]; then + echo "the FaceDevkit2017 doesn't exist." + echo "Create one, and fetching FDDB dataset..." + mkdir $DIRECTORY; + fi +else + echo "folder data/scripts/Face_dataset_scripts/$dataset not found" + echo "make sure you execute this shell in FRCN_ROOT!" + echo "i.e. ~ooxx/py-faster-rcnn$ ./data/scripts/get_FaceData.sh [dataset]" + exit 1 +fi + +if [ ! -d "$DIRECTORY_FDDB" ]; then + # get the dataset + cd $cwd/data/scripts/Face_dataset_scripts/$dataset; + ./get_data.sh; + + # creating folder FDDB_2010/WIDER_2017 contatins: + # Annotations /JpgeImages / Imagesets + cd pyxml; + ./runit.sh; + + # move FDDB_2010/WIDER_2017 to FacesDevkit + mv $SetFolder $DIRECTORY_FDDB + mkdir -p $DIRECTORY/results/$SetFolder/Main +else + echo "the FacesDevkit2017/$SetFolder already exist." +fi + diff --git a/experiments/scripts/faster_rcnn_end2end.sh b/experiments/scripts/faster_rcnn_end2end.sh index 79770aa56..35697e381 100755 --- a/experiments/scripts/faster_rcnn_end2end.sh +++ b/experiments/scripts/faster_rcnn_end2end.sh @@ -23,6 +23,18 @@ EXTRA_ARGS=${array[@]:3:$len} EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} case $DATASET in + WIDER) + TRAIN_IMDB="WIDER_2016_trainval" + TEST_IMDB="WIDER_2016_test" + PT_DIR="faces_data" + ITERS=10000 + ;; + FDDB) + TRAIN_IMDB="FDDB_2010_trainval" + TEST_IMDB="FDDB_2010_test" + PT_DIR="faces_data" + ITERS=10000 + ;; pascal_voc) TRAIN_IMDB="voc_2007_trainval" TEST_IMDB="voc_2007_test" diff --git a/lib/datasets/Faces_data.py b/lib/datasets/Faces_data.py new file mode 100644 index 000000000..31dba89aa --- /dev/null +++ b/lib/datasets/Faces_data.py @@ -0,0 +1,344 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +import os +from datasets.imdb import imdb +import datasets.ds_utils as ds_utils +import xml.etree.ElementTree as ET +import numpy as np +import scipy.sparse +import scipy.io as sio +import utils.cython_bbox +import cPickle +import subprocess +import uuid +from voc_eval import voc_eval +from fast_rcnn.config import cfg + +class Faces_data(imdb): + def __init__(self, image_set, year, DB_name, devkit_path=None): + imdb.__init__(self, DB_name+'_' + year + '_' + image_set) + self._year = year + self._DB_name = DB_name + self._image_set = image_set + self._devkit_path = self._get_default_path() if devkit_path is None \ + else devkit_path + self._data_path = os.path.join(self._devkit_path, DB_name+'_'+ self._year) + self._classes = ('__background__', # always index 0 + 'face') + self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes))) + self._image_ext = '.jpg' + self._image_index = self._load_image_set_index() + # Default to roidb handler + self._roidb_handler = self.selective_search_roidb + self._salt = str(uuid.uuid4()) + self._comp_id = 'comp4' + + # PASCAL specific config options + self.config = {'cleanup' : True, + 'use_salt' : True, + 'use_diff' : False, + 'matlab_eval' : False, + 'rpn_file' : None, + 'min_size' : 2} + + assert os.path.exists(self._devkit_path), \ + 'FacesDevkit2017 path does not exist: {}'.format(self._devkit_path) + assert os.path.exists(self._data_path), \ + 'Path does not exist: {}'.format(self._data_path) + + def image_path_at(self, i): + """ + Return the absolute path to image i in the image sequence. + """ + return self.image_path_from_index(self._image_index[i]) + + def image_path_from_index(self, index): + """ + Construct an image path from the image's "index" identifier. + """ + image_path = os.path.join(self._data_path, 'JPEGImages', + index + self._image_ext) + assert os.path.exists(image_path), \ + 'Path does not exist: {}'.format(image_path) + return image_path + + def _load_image_set_index(self): + """ + Load the indexes listed in this dataset's image set file. + """ + # Example path to image set file: + # self._devkit_path + /FacesDevkit2017/FDDB_2010/ImageSets/Main/val.txt + # self._devkit_path + /FacesDevkit2017/WIDER_2016/ImageSets/Main/val.txt + image_set_file = os.path.join(self._data_path, 'ImageSets', 'Main', + self._image_set + '.txt') + assert os.path.exists(image_set_file), \ + 'Path does not exist: {}'.format(image_set_file) + with open(image_set_file) as f: + image_index = [x.strip() for x in f.readlines()] + return image_index + + def _get_default_path(self): + """ + Return the default path where FACE DataSet is expected to be installed. + """ + return os.path.join(cfg.DATA_DIR, 'FacesDevkit2017' ) + + def gt_roidb(self): + """ + Return the database of ground-truth regions of interest. + + This function loads/saves from/to a cache file to speed up future calls. + """ + cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl') + if os.path.exists(cache_file): + with open(cache_file, 'rb') as fid: + roidb = cPickle.load(fid) + print '{} gt roidb loaded from {}'.format(self.name, cache_file) + return roidb + + gt_roidb = [self._load_pascal_annotation(index) + for index in self.image_index] + with open(cache_file, 'wb') as fid: + cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL) + print 'wrote gt roidb to {}'.format(cache_file) + + return gt_roidb + + def selective_search_roidb(self): + """ + Return the database of selective search regions of interest. + Ground-truth ROIs are also included. + + This function loads/saves from/to a cache file to speed up future calls. + """ + cache_file = os.path.join(self.cache_path, + self.name + '_selective_search_roidb.pkl') + + if os.path.exists(cache_file): + with open(cache_file, 'rb') as fid: + roidb = cPickle.load(fid) + print '{} ss roidb loaded from {}'.format(self.name, cache_file) + return roidb + + if int(self._year) < 2017 or self._image_set != 'test': + gt_roidb = self.gt_roidb() + ss_roidb = self._load_selective_search_roidb(gt_roidb) + roidb = imdb.merge_roidbs(gt_roidb, ss_roidb) + else: + roidb = self._load_selective_search_roidb(None) + with open(cache_file, 'wb') as fid: + cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) + print 'wrote ss roidb to {}'.format(cache_file) + + return roidb + + def rpn_roidb(self): + if int(self._year) <2017 or self._image_set != 'test': + gt_roidb = self.gt_roidb() + rpn_roidb = self._load_rpn_roidb(gt_roidb) + roidb = imdb.merge_roidbs(gt_roidb, rpn_roidb) + else: + roidb = self._load_rpn_roidb(None) + + return roidb + + def _load_rpn_roidb(self, gt_roidb): + filename = self.config['rpn_file'] + print 'loading {}'.format(filename) + assert os.path.exists(filename), \ + 'rpn data not found at: {}'.format(filename) + with open(filename, 'rb') as f: + box_list = cPickle.load(f) + return self.create_roidb_from_box_list(box_list, gt_roidb) + + def _load_selective_search_roidb(self, gt_roidb): + filename = os.path.abspath(os.path.join(cfg.DATA_DIR, + 'selective_search_data', + self.name + '.mat')) + assert os.path.exists(filename), \ + 'Selective search data not found at: {}'.format(filename) + raw_data = sio.loadmat(filename)['boxes'].ravel() + + box_list = [] + for i in xrange(raw_data.shape[0]): + boxes = raw_data[i][:, (1, 0, 3, 2)] - 1 + keep = ds_utils.unique_boxes(boxes) + boxes = boxes[keep, :] + keep = ds_utils.filter_small_boxes(boxes, self.config['min_size']) + boxes = boxes[keep, :] + box_list.append(boxes) + + return self.create_roidb_from_box_list(box_list, gt_roidb) + + def _load_pascal_annotation(self, index): + """ + Load image and bounding boxes info from XML file in the FACE Dataset + format. + """ + filename = os.path.join(self._data_path, 'Annotations', index + '.xml') + tree = ET.parse(filename) + objs = tree.findall('object') + if not self.config['use_diff']: + # Exclude the samples labeled as difficult + non_diff_objs = [ + obj for obj in objs if int(obj.find('difficult').text) == 0] + # if len(non_diff_objs) != len(objs): + # print 'Removed {} difficult objects'.format( + # len(objs) - len(non_diff_objs)) + objs = non_diff_objs + num_objs = len(objs) + + boxes = np.zeros((num_objs, 4), dtype=np.uint16) + gt_classes = np.zeros((num_objs), dtype=np.int32) + overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) + # "Seg" area for pascal is just the box area + seg_areas = np.zeros((num_objs), dtype=np.float32) + + # Load object bounding boxes into a data frame. + for ix, obj in enumerate(objs): + bbox = obj.find('bndbox') + # Make pixel indexes 0-based + x1 = float(bbox.find('xmin').text) - 1 + y1 = float(bbox.find('ymin').text) - 1 + x2 = float(bbox.find('xmax').text) - 1 + y2 = float(bbox.find('ymax').text) - 1 + cls = self._class_to_ind[obj.find('name').text.lower().strip()] + boxes[ix, :] = [x1, y1, x2, y2] + gt_classes[ix] = cls + overlaps[ix, cls] = 1.0 + seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1) + + overlaps = scipy.sparse.csr_matrix(overlaps) + + return {'boxes' : boxes, + 'gt_classes': gt_classes, + 'gt_overlaps' : overlaps, + 'flipped' : False, + 'seg_areas' : seg_areas} + + def _get_comp_id(self): + comp_id = (self._comp_id + '_' + self._salt if self.config['use_salt'] + else self._comp_id) + return comp_id + + def _get_voc_results_file_template(self): + # FacesDevkit2017/results/FDDB2010/Main/_det_test_aeroplane.txt + # FacesDevkit2017/results/WIDER2016/Main/_det_test_aeroplane.txt + filename = self._get_comp_id() + '_det_' + self._image_set + '_{:s}.txt' + path = os.path.join( + self._devkit_path, + 'results', + self._DB_name +'_'+ self._year, + 'Main', + filename) + return path + + def _write_voc_results_file(self, all_boxes): + for cls_ind, cls in enumerate(self.classes): + if cls == '__background__': + continue + print 'Writing {} '+self._DB_name+' results file'.format(cls) + filename = self._get_voc_results_file_template().format(cls) + with open(filename, 'wt') as f: + for im_ind, index in enumerate(self.image_index): + dets = all_boxes[cls_ind][im_ind] + if dets == []: + continue + # the VOCdevkit expects 1-based indices + for k in xrange(dets.shape[0]): + f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. + format(index, dets[k, -1], + dets[k, 0] + 1, dets[k, 1] + 1, + dets[k, 2] + 1, dets[k, 3] + 1)) + + def _do_python_eval(self, output_dir = 'output'): + annopath = os.path.join( + self._devkit_path, + self._DB_name +'_' + self._year, + 'Annotations', + '{:s}.xml') + imagesetfile = os.path.join( + self._devkit_path, + self._DB_name+ '_' + self._year, + 'ImageSets', + 'Main', + self._image_set + '.txt') + cachedir = os.path.join(self._devkit_path, 'annotations_cache',self._DB_name) + if not os.path.isdir(cachedir): + os.makedirs(cachedir) + aps = [] + # The PASCAL VOC metric changed in 2010 + use_07_metric = True if int(self._year) < 2017 else False + print self._DB_name+' metric? ' + ('Yes' if use_07_metric else 'No') + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + for i, cls in enumerate(self._classes): + if cls == '__background__': + continue + filename = self._get_voc_results_file_template().format(cls) + rec, prec, ap = voc_eval( + filename, annopath, imagesetfile, cls, cachedir, ovthresh=0.5, + use_07_metric=use_07_metric) + aps += [ap] + print('AP for {} = {:.4f}'.format(cls, ap)) + with open(os.path.join(output_dir, cls + '_pr.pkl'), 'w') as f: + cPickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f) + print('Mean AP = {:.4f}'.format(np.mean(aps))) + print('~~~~~~~~') + print('Results:') + for ap in aps: + print('{:.3f}'.format(ap)) + print('{:.3f}'.format(np.mean(aps))) + print('~~~~~~~~') + print('') + print('--------------------------------------------------------------') + print('Results computed with the **unofficial** Python eval code.') + print('Results should be very close to the official MATLAB eval code.') + print('Recompute with `./tools/reval.py --matlab ...` for your paper.') + print('-- Thanks, The Management') + print('--------------------------------------------------------------') + + def _do_matlab_eval(self, output_dir='output'): + print '-----------------------------------------------------' + print 'Computing results with the official MATLAB eval code.' + print '-----------------------------------------------------' + path = os.path.join(cfg.ROOT_DIR, 'lib', 'datasets', + 'VOCdevkit-matlab-wrapper') + cmd = 'cd {} && '.format(path) + cmd += '{:s} -nodisplay -nodesktop '.format(cfg.MATLAB) + cmd += '-r "dbstop if error; ' + cmd += 'voc_eval(\'{:s}\',\'{:s}\',\'{:s}\',\'{:s}\'); quit;"' \ + .format(self._devkit_path, self._get_comp_id(), + self._image_set, output_dir) + print('Running:\n{}'.format(cmd)) + status = subprocess.call(cmd, shell=True) + + def evaluate_detections(self, all_boxes, output_dir): + self._write_voc_results_file(all_boxes) + self._do_python_eval(output_dir) + if self.config['matlab_eval']: + self._do_matlab_eval(output_dir) + if self.config['cleanup']: + for cls in self._classes: + if cls == '__background__': + continue + filename = self._get_voc_results_file_template().format(cls) + os.remove(filename) + + def competition_mode(self, on): + if on: + self.config['use_salt'] = False + self.config['cleanup'] = False + else: + self.config['use_salt'] = True + self.config['cleanup'] = True + +if __name__ == '__main__': + d = Faces_data('trainval', '2010','FDDB') + #res = d.roidb + from IPython import embed; embed() diff --git a/lib/datasets/factory.py b/lib/datasets/factory.py index 8c3fdb898..4317f2615 100644 --- a/lib/datasets/factory.py +++ b/lib/datasets/factory.py @@ -11,9 +11,21 @@ from datasets.pascal_voc import pascal_voc from datasets.coco import coco +from datasets.Faces_data import Faces_data import numpy as np # Set up voc__ using selective search "fast" mode + +for year in ['2010']: + for split in ['trainval', 'test']: + name = 'FDDB_{}_{}'.format(year, split) + __sets[name] = (lambda split=split, year=year: Faces_data(split, year, 'FDDB')) + +for year in ['2016']: + for split in ['trainval', 'test']: + name = 'WIDER_{}_{}'.format(year, split) + __sets[name] = (lambda split=split, year=year: Faces_data(split, year, 'WIDER')) + for year in ['2007', '2012']: for split in ['train', 'val', 'trainval', 'test']: name = 'voc_{}_{}'.format(year, split) diff --git a/models/faces_data/VGG16/faster_rcnn_end2end/solver.prototxt b/models/faces_data/VGG16/faster_rcnn_end2end/solver.prototxt new file mode 100644 index 000000000..d01678eb9 --- /dev/null +++ b/models/faces_data/VGG16/faster_rcnn_end2end/solver.prototxt @@ -0,0 +1,17 @@ +train_net: "models/faces_data/VGG16/faster_rcnn_end2end/train.prototxt" +base_lr: 0.001 +lr_policy: "step" +gamma: 0.5 +stepsize: 10000 +display: 20 +average_loss: 100 +# iter_size: 1 +momentum: 0.9 +weight_decay: 0.0002 +# We disable standard caffe solver snapshotting and implement our own snapshot +# function +snapshot: 0 +# We still use the snapshot prefix, though +snapshot_prefix: "vgg16_faster_rcnn" +iter_size: 2 +type: "Adam" diff --git a/models/faces_data/VGG16/faster_rcnn_end2end/test.prototxt b/models/faces_data/VGG16/faster_rcnn_end2end/test.prototxt new file mode 100644 index 000000000..15c9efa68 --- /dev/null +++ b/models/faces_data/VGG16/faster_rcnn_end2end/test.prototxt @@ -0,0 +1,608 @@ +name: "VGG_ILSVRC_16_layers" + +input: "data" +input_shape { + dim: 1 + dim: 3 + dim: 224 + dim: 224 +} + +input: "im_info" +input_shape { + dim: 1 + dim: 3 +} + +layer { + name: "conv1_1" + type: "Convolution" + bottom: "data" + top: "conv1_1" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu1_1" + type: "ReLU" + bottom: "conv1_1" + top: "conv1_1" +} +layer { + name: "conv1_2" + type: "Convolution" + bottom: "conv1_1" + top: "conv1_2" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu1_2" + type: "ReLU" + bottom: "conv1_2" + top: "conv1_2" +} +layer { + name: "pool1" + type: "Pooling" + bottom: "conv1_2" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv2_1" + type: "Convolution" + bottom: "pool1" + top: "conv2_1" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu2_1" + type: "ReLU" + bottom: "conv2_1" + top: "conv2_1" +} +layer { + name: "conv2_2" + type: "Convolution" + bottom: "conv2_1" + top: "conv2_2" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu2_2" + type: "ReLU" + bottom: "conv2_2" + top: "conv2_2" +} +layer { + name: "pool2" + type: "Pooling" + bottom: "conv2_2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv3_1" + type: "Convolution" + bottom: "pool2" + top: "conv3_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu3_1" + type: "ReLU" + bottom: "conv3_1" + top: "conv3_1" +} +layer { + name: "conv3_2" + type: "Convolution" + bottom: "conv3_1" + top: "conv3_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu3_2" + type: "ReLU" + bottom: "conv3_2" + top: "conv3_2" +} +layer { + name: "conv3_3" + type: "Convolution" + bottom: "conv3_2" + top: "conv3_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu3_3" + type: "ReLU" + bottom: "conv3_3" + top: "conv3_3" +} +layer { + name: "pool3" + type: "Pooling" + bottom: "conv3_3" + top: "pool3" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv4_1" + type: "Convolution" + bottom: "pool3" + top: "conv4_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu4_1" + type: "ReLU" + bottom: "conv4_1" + top: "conv4_1" +} +layer { + name: "conv4_2" + type: "Convolution" + bottom: "conv4_1" + top: "conv4_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu4_2" + type: "ReLU" + bottom: "conv4_2" + top: "conv4_2" +} +layer { + name: "conv4_3" + type: "Convolution" + bottom: "conv4_2" + top: "conv4_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu4_3" + type: "ReLU" + bottom: "conv4_3" + top: "conv4_3" +} +layer { + name: "pool4" + type: "Pooling" + bottom: "conv4_3" + top: "pool4" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv5_1" + type: "Convolution" + bottom: "pool4" + top: "conv5_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu5_1" + type: "ReLU" + bottom: "conv5_1" + top: "conv5_1" +} +layer { + name: "conv5_2" + type: "Convolution" + bottom: "conv5_1" + top: "conv5_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu5_2" + type: "ReLU" + bottom: "conv5_2" + top: "conv5_2" +} +layer { + name: "conv5_3" + type: "Convolution" + bottom: "conv5_2" + top: "conv5_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu5_3" + type: "ReLU" + bottom: "conv5_3" + top: "conv5_3" +} + +#========= RPN ============ + +layer { + name: "rpn_conv/3x3" + type: "Convolution" + bottom: "conv5_3" + top: "rpn/output" + param { lr_mult: 1.0 decay_mult: 1.0 } + param { lr_mult: 2.0 decay_mult: 0 } + convolution_param { + num_output: 512 + kernel_size: 3 pad: 1 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} +layer { + name: "rpn_relu/3x3" + type: "ReLU" + bottom: "rpn/output" + top: "rpn/output" +} + +layer { + name: "rpn_cls_score" + type: "Convolution" + bottom: "rpn/output" + top: "rpn_cls_score" + param { lr_mult: 1.0 decay_mult: 1.0 } + param { lr_mult: 2.0 decay_mult: 0 } + convolution_param { + num_output: 18 # 2(bg/fg) * 9(anchors) + kernel_size: 1 pad: 0 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} +layer { + name: "rpn_bbox_pred" + type: "Convolution" + bottom: "rpn/output" + top: "rpn_bbox_pred" + param { lr_mult: 1.0 decay_mult: 1.0 } + param { lr_mult: 2.0 decay_mult: 0 } + convolution_param { + num_output: 36 # 4 * 9(anchors) + kernel_size: 1 pad: 0 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} +layer { + bottom: "rpn_cls_score" + top: "rpn_cls_score_reshape" + name: "rpn_cls_score_reshape" + type: "Reshape" + reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } +} + +#========= RoI Proposal ============ + +layer { + name: "rpn_cls_prob" + type: "Softmax" + bottom: "rpn_cls_score_reshape" + top: "rpn_cls_prob" +} +layer { + name: 'rpn_cls_prob_reshape' + type: 'Reshape' + bottom: 'rpn_cls_prob' + top: 'rpn_cls_prob_reshape' + reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } } +} +layer { + name: 'proposal' + type: 'Python' + bottom: 'rpn_cls_prob_reshape' + bottom: 'rpn_bbox_pred' + bottom: 'im_info' + top: 'rois' + python_param { + module: 'rpn.proposal_layer' + layer: 'ProposalLayer' + param_str: "'feat_stride': 16" + } +} + +#========= RCNN ============ + +layer { + name: "roi_pool5" + type: "ROIPooling" + bottom: "conv5_3" + bottom: "rois" + top: "pool5" + roi_pooling_param { + pooled_w: 7 + pooled_h: 7 + spatial_scale: 0.0625 # 1/16 + } +} +layer { + name: "fc6" + type: "InnerProduct" + bottom: "pool5" + top: "fc6" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 4096 + } +} +layer { + name: "relu6" + type: "ReLU" + bottom: "fc6" + top: "fc6" +} +layer { + name: "drop6" + type: "Dropout" + bottom: "fc6" + top: "fc6" + dropout_param { + dropout_ratio: 0.5 + } +} +layer { + name: "fc7" + type: "InnerProduct" + bottom: "fc6" + top: "fc7" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 4096 + } +} +layer { + name: "relu7" + type: "ReLU" + bottom: "fc7" + top: "fc7" +} +layer { + name: "drop7" + type: "Dropout" + bottom: "fc7" + top: "fc7" + dropout_param { + dropout_ratio: 0.5 + } +} +layer { + name: "cls_score" + type: "InnerProduct" + bottom: "fc7" + top: "cls_score" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 2 #21 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "bbox_pred" + type: "InnerProduct" + bottom: "fc7" + top: "bbox_pred" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 8 #84 + weight_filler { + type: "gaussian" + std: 0.001 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "cls_prob" + type: "Softmax" + bottom: "cls_score" + top: "cls_prob" +} diff --git a/models/faces_data/VGG16/faster_rcnn_end2end/train.prototxt b/models/faces_data/VGG16/faster_rcnn_end2end/train.prototxt new file mode 100644 index 000000000..0b3a43e3b --- /dev/null +++ b/models/faces_data/VGG16/faster_rcnn_end2end/train.prototxt @@ -0,0 +1,676 @@ +name: "VGG_ILSVRC_16_layers" +layer { + name: 'input-data' + type: 'Python' + top: 'data' + top: 'im_info' + top: 'gt_boxes' + python_param { + module: 'roi_data_layer.layer' + layer: 'RoIDataLayer' + param_str: "'num_classes': 2" #21 + } +} + +layer { + name: "conv1_1" + type: "Convolution" + bottom: "data" + top: "conv1_1" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu1_1" + type: "ReLU" + bottom: "conv1_1" + top: "conv1_1" +} +layer { + name: "conv1_2" + type: "Convolution" + bottom: "conv1_1" + top: "conv1_2" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu1_2" + type: "ReLU" + bottom: "conv1_2" + top: "conv1_2" +} +layer { + name: "pool1" + type: "Pooling" + bottom: "conv1_2" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv2_1" + type: "Convolution" + bottom: "pool1" + top: "conv2_1" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu2_1" + type: "ReLU" + bottom: "conv2_1" + top: "conv2_1" +} +layer { + name: "conv2_2" + type: "Convolution" + bottom: "conv2_1" + top: "conv2_2" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu2_2" + type: "ReLU" + bottom: "conv2_2" + top: "conv2_2" +} +layer { + name: "pool2" + type: "Pooling" + bottom: "conv2_2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv3_1" + type: "Convolution" + bottom: "pool2" + top: "conv3_1" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu3_1" + type: "ReLU" + bottom: "conv3_1" + top: "conv3_1" +} +layer { + name: "conv3_2" + type: "Convolution" + bottom: "conv3_1" + top: "conv3_2" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu3_2" + type: "ReLU" + bottom: "conv3_2" + top: "conv3_2" +} +layer { + name: "conv3_3" + type: "Convolution" + bottom: "conv3_2" + top: "conv3_3" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu3_3" + type: "ReLU" + bottom: "conv3_3" + top: "conv3_3" +} +layer { + name: "pool3" + type: "Pooling" + bottom: "conv3_3" + top: "pool3" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv4_1" + type: "Convolution" + bottom: "pool3" + top: "conv4_1" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu4_1" + type: "ReLU" + bottom: "conv4_1" + top: "conv4_1" +} +layer { + name: "conv4_2" + type: "Convolution" + bottom: "conv4_1" + top: "conv4_2" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu4_2" + type: "ReLU" + bottom: "conv4_2" + top: "conv4_2" +} +layer { + name: "conv4_3" + type: "Convolution" + bottom: "conv4_2" + top: "conv4_3" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu4_3" + type: "ReLU" + bottom: "conv4_3" + top: "conv4_3" +} +layer { + name: "pool4" + type: "Pooling" + bottom: "conv4_3" + top: "pool4" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv5_1" + type: "Convolution" + bottom: "pool4" + top: "conv5_1" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu5_1" + type: "ReLU" + bottom: "conv5_1" + top: "conv5_1" +} +layer { + name: "conv5_2" + type: "Convolution" + bottom: "conv5_1" + top: "conv5_2" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu5_2" + type: "ReLU" + bottom: "conv5_2" + top: "conv5_2" +} +layer { + name: "conv5_3" + type: "Convolution" + bottom: "conv5_2" + top: "conv5_3" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu5_3" + type: "ReLU" + bottom: "conv5_3" + top: "conv5_3" +} + +#========= RPN ============ + +layer { + name: "rpn_conv/3x3" + type: "Convolution" + bottom: "conv5_3" + top: "rpn/output" + param { lr_mult: 1.0 } + param { lr_mult: 2.0 } + convolution_param { + num_output: 512 + kernel_size: 3 pad: 1 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} +layer { + name: "rpn_relu/3x3" + type: "ReLU" + bottom: "rpn/output" + top: "rpn/output" +} + +layer { + name: "rpn_cls_score" + type: "Convolution" + bottom: "rpn/output" + top: "rpn_cls_score" + param { lr_mult: 1.0 } + param { lr_mult: 2.0 } + convolution_param { + num_output: 18 # 2(bg/fg) * 9(anchors) + kernel_size: 1 pad: 0 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} + +layer { + name: "rpn_bbox_pred" + type: "Convolution" + bottom: "rpn/output" + top: "rpn_bbox_pred" + param { lr_mult: 1.0 } + param { lr_mult: 2.0 } + convolution_param { + num_output: 36 # 4 * 9(anchors) + kernel_size: 1 pad: 0 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} + +layer { + bottom: "rpn_cls_score" + top: "rpn_cls_score_reshape" + name: "rpn_cls_score_reshape" + type: "Reshape" + reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } +} + +layer { + name: 'rpn-data' + type: 'Python' + bottom: 'rpn_cls_score' + bottom: 'gt_boxes' + bottom: 'im_info' + bottom: 'data' + top: 'rpn_labels' + top: 'rpn_bbox_targets' + top: 'rpn_bbox_inside_weights' + top: 'rpn_bbox_outside_weights' + python_param { + module: 'rpn.anchor_target_layer' + layer: 'AnchorTargetLayer' + param_str: "'feat_stride': 16" + } +} + +layer { + name: "rpn_loss_cls" + type: "SoftmaxWithLoss" + bottom: "rpn_cls_score_reshape" + bottom: "rpn_labels" + propagate_down: 1 + propagate_down: 0 + top: "rpn_cls_loss" + loss_weight: 1 + loss_param { + ignore_label: -1 + normalize: true + } +} + +layer { + name: "rpn_loss_bbox" + type: "SmoothL1Loss" + bottom: "rpn_bbox_pred" + bottom: "rpn_bbox_targets" + bottom: 'rpn_bbox_inside_weights' + bottom: 'rpn_bbox_outside_weights' + top: "rpn_loss_bbox" + loss_weight: 1 + smooth_l1_loss_param { sigma: 3.0 } +} + +#========= RoI Proposal ============ + +layer { + name: "rpn_cls_prob" + type: "Softmax" + bottom: "rpn_cls_score_reshape" + top: "rpn_cls_prob" +} + +layer { + name: 'rpn_cls_prob_reshape' + type: 'Reshape' + bottom: 'rpn_cls_prob' + top: 'rpn_cls_prob_reshape' + reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } } +} + +layer { + name: 'proposal' + type: 'Python' + bottom: 'rpn_cls_prob_reshape' + bottom: 'rpn_bbox_pred' + bottom: 'im_info' + top: 'rpn_rois' +# top: 'rpn_scores' + python_param { + module: 'rpn.proposal_layer' + layer: 'ProposalLayer' + param_str: "'feat_stride': 16" + } +} + +#layer { +# name: 'debug-data' +# type: 'Python' +# bottom: 'data' +# bottom: 'rpn_rois' +# bottom: 'rpn_scores' +# python_param { +# module: 'rpn.debug_layer' +# layer: 'RPNDebugLayer' +# } +#} + +layer { + name: 'roi-data' + type: 'Python' + bottom: 'rpn_rois' + bottom: 'gt_boxes' + top: 'rois' + top: 'labels' + top: 'bbox_targets' + top: 'bbox_inside_weights' + top: 'bbox_outside_weights' + python_param { + module: 'rpn.proposal_target_layer' + layer: 'ProposalTargetLayer' + param_str: "'num_classes': 2" #21 + } +} + +#========= RCNN ============ + +layer { + name: "roi_pool5" + type: "ROIPooling" + bottom: "conv5_3" + bottom: "rois" + top: "pool5" + roi_pooling_param { + pooled_w: 7 + pooled_h: 7 + spatial_scale: 0.0625 # 1/16 + } +} + + + +layer { + name: "fc6" + type: "InnerProduct" + bottom: "pool5" + top: "fc6" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + inner_product_param { + num_output: 4096 + } +} +layer { + name: "relu6" + type: "ReLU" + bottom: "fc6" + top: "fc6" +} +layer { + name: "drop6" + type: "Dropout" + bottom: "fc6" + top: "fc6" + dropout_param { + dropout_ratio: 0.5 + } +} +layer { + name: "fc7" + type: "InnerProduct" + bottom: "fc6" + top: "fc7" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + inner_product_param { + num_output: 4096 + } +} +layer { + name: "relu7" + type: "ReLU" + bottom: "fc7" + top: "fc7" +} +layer { + name: "drop7" + type: "Dropout" + bottom: "fc7" + top: "fc7" + dropout_param { + dropout_ratio: 0.5 + } +} +layer { + name: "cls_score" + type: "InnerProduct" + bottom: "fc7" + top: "cls_score" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + inner_product_param { + num_output: 2 #21 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "bbox_pred" + type: "InnerProduct" + bottom: "fc7" + top: "bbox_pred" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + inner_product_param { + num_output: 8 #84 + weight_filler { + type: "gaussian" + std: 0.001 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "loss_cls" + type: "SoftmaxWithLoss" + bottom: "cls_score" + bottom: "labels" + propagate_down: 1 + propagate_down: 0 + top: "loss_cls" + loss_weight: 1 +} +layer { + name: "loss_bbox" + type: "SmoothL1Loss" + bottom: "bbox_pred" + bottom: "bbox_targets" + bottom: "bbox_inside_weights" + bottom: "bbox_outside_weights" + top: "loss_bbox" + loss_weight: 1 +}