rbgirshick · Asiapenolove · Feb 17, 2017 · Feb 18, 2017 · Feb 18, 2017 · Feb 18, 2017
diff --git a/data/scripts/Face_dataset_scripts/FDDB/FDDB.png b/data/scripts/Face_dataset_scripts/FDDB/FDDB.png
diff --git a/data/scripts/Face_dataset_scripts/FDDB/README.md b/data/scripts/Face_dataset_scripts/FDDB/README.md
@@ -0,0 +1,50 @@
+# FDDB_DataSet_4_faster_rcnn
+
+## Step1: get datas from FDDB
+```
+./get_data.sh
+```
+this should downloads originalPics.tar.gz(~500MB), and FDDB-folds.tgz from FDDB
+and Checksum test ,if pass -> unzip tar.gz  into originalPics directory.
+
+if link fails : download from  FDDB website 
+http://vis-www.cs.umass.edu/fddb/
+
+
+if always checksum fails :
+
+```
+wget http://tamaraberg.com/faceDataset/originalPics.tar.gz 
+wget http://vis-www.cs.umass.edu/fddb/FDDB-folds.tgz
+
+mkdir originalPics;
+tar -C originalPics  -zxf originalPics.tar.gz
+tar -C originalPics  -zxf FDDB-folds.tgz
+```
+
+
+## Step2: create data set that can be used in pyfaster-rcnn
+```
+cd pyxml;
+./runit.sh;
+```
+this will create FDDB_2010 directory
+and JPEGImages/Annotation directory inside FDDB_2010
+
+runit.sh contains:
+```
+python anno2xml.py FDDB-fold-01-ellipseList.txt;
+python anno2xml.py FDDB-fold-02-ellipseList.txt;
+python anno2xml.py FDDB-fold-03-ellipseList.txt;
+....
+```
+which FDDB-fold-01-ellipseList.txt were in the originalPics/FDDB-folds
+you can check if the list are the same (01~10)
+
+
+## Step3: using labelImg to test if create properly
+you can use labelImg to see if it deals properly(This is god dame awesome)
+https://github.com/tzutalin/labelImg 
+![alt tag](https://raw.githubusercontent.com/penolove/FDDB_DataSet_4_faster_rcnn/master/FDDB.png)
+
+
diff --git a/data/scripts/Face_dataset_scripts/FDDB/get_data.sh b/data/scripts/Face_dataset_scripts/FDDB/get_data.sh
@@ -0,0 +1,48 @@
+ori_CheckSum=cf414253ac596cd858daae0cc321d793
+folds_CheckSum=4cf9badc939a3398a0d6f3a3c8540f55
+if [ -d originalPics ];
+then
+    echo "[FDDB] originalPics dir alreday exist";
+else
+    # ---- download originalPics.tar.gz ----
+    FILE=originalPics.tar.gz
+    #if file not exist
+    if [ ! -f $FILE ]; then
+        echo "[FDDB] Downloading originalPics.tar.gz ....."
+        wget http://tamaraberg.com/faceDataset/originalPics.tar.gz 
+    fi
+
+    checksum=`md5sum $FILE | awk '{ print $1 }'`
+    if [ ! "$checksum" = "$ori_CheckSum" ]; then 
+        rm $File
+        echo $checksum
+        echo $folds_CheckSum
+        echo "[FDDB] file $FILE : checksum error , need to rerun the script";
+        exit 1;
+    fi
+
+    # ---------------------------------------
+
+    # ---- download FDDB.tgz ----
+    FILE=FDDB-folds.tgz
+    #if file not exist
+    if [ ! -f $FILE ]; then
+        echo "[FDDB] Downloading FDDB-folds.tgz ....."
+        wget http://vis-www.cs.umass.edu/fddb/FDDB-folds.tgz
+    fi
+
+    checksum=`md5sum $FILE | awk '{ print $1 }'`
+    if [ ! "$checksum" = "$folds_CheckSum" ]; then 
+        rm $FILE
+        echo $checksum
+        echo $folds_CheckSum
+        echo "[FDDB] file $FILE : checksum error , need to rerun the script";
+        exit 1;
+    fi
+
+    echo "[FDDB] Making originalPics , uncompress files ..."
+    mkdir originalPics;
+    tar -C originalPics  -zxf originalPics.tar.gz
+    tar -C originalPics  -zxf FDDB-folds.tgz
+
+fi;
diff --git a/data/scripts/Face_dataset_scripts/FDDB/makefile b/data/scripts/Face_dataset_scripts/FDDB/makefile
@@ -0,0 +1,5 @@
+clean:
+	echo "time to clean produced dirs/files"
+	rm -rf originalPics/	
+	rm FDDB-folds.tgz
+	rm originalPics.tar.gz
diff --git a/data/scripts/Face_dataset_scripts/FDDB/pyxml/anno2xml.py b/data/scripts/Face_dataset_scripts/FDDB/pyxml/anno2xml.py
@@ -0,0 +1,190 @@
+
+from lxml import etree
+import sys
+import cv2
+import math
+import glob
+import os.path
+
+
+# target_check exist
+target_dir=os.path.join(os.getcwd(), 'FDDB_2010')
+target_dir_Jpg=os.path.join(target_dir,'JPEGImages')
+target_dir_Ana=os.path.join(target_dir,'Annotations')
+
+if not os.path.exists(target_dir):
+    os.makedirs(target_dir)   
+
+if not os.path.exists(target_dir_Jpg):
+    os.makedirs(target_dir_Jpg)
+
+if not os.path.exists(target_dir_Ana):
+    os.makedirs(target_dir_Ana)   
+
+target_dir_Jpg_set=os.path.join(target_dir_Jpg,'*.jpg')
+cur_ind=0
+outfileID=len(glob.glob(target_dir_Jpg_set))
+
+
+def img2xml(path,objects,shape):
+    root = etree.Element("annotation")
+    folder = etree.SubElement(root, "folder")
+    filename = etree.SubElement(root, "filename")
+    source = etree.SubElement(root, "source")
+    databases = etree.SubElement(source, "database")
+
+    folder.text = "VOC2007"
+    filename.text = str(path).zfill(6)
+    databases.text = "FDDB"
+
+    size = etree.SubElement(root, "size")
+    width = etree.SubElement(size,"width")
+    height = etree.SubElement(size,"height")
+    depth = etree.SubElement(size,"depth")
+    depth.text = str(shape[2])
+    width.text = str(shape[1])
+    height.text = str(shape[0])
+
+    obj_count=0
+    for obj in objects:
+        #object
+        obj=[float(i) for i in obj.split()]
+        #the smallest circumscribed parallelogram
+        #[link] https://github.com/nouiz/lisa_emotiw/blob/master/emotiw/common/datasets/faces/FDDB.py
+        maj_rad = obj[0]
+        min_rad = obj[1]
+        angle = obj[2]
+        xcenter = obj[3]
+        ycenter = obj[4]
+        cosin = math.cos(math.radians(-angle))
+        sin = math.sin(math.radians(-angle))
+
+        x1 = cosin * (-min_rad) - sin * (-maj_rad) + xcenter
+        y1 = sin * (-min_rad) + cosin * (-maj_rad) + ycenter
+        x2 = cosin * (min_rad) - sin * (-maj_rad) + xcenter
+        y2 = sin * (min_rad) + cosin * (-maj_rad) + ycenter
+        x3 = cosin * (min_rad) - sin * (maj_rad) + xcenter
+        y3 = sin * (min_rad) + cosin * (maj_rad) + ycenter
+        x4 = cosin * (-min_rad) - sin * (maj_rad) + xcenter
+        y4 = sin * (-min_rad) + cosin * (maj_rad) + ycenter
+        wid=[x1,x2,x3,x4]
+        hei=[y1,y2,y3,y4]
+        xmin_ = int(min(wid))
+        xmax_ = int(max(wid))
+        ymin_ = int(min(hei))
+        ymax_ = int(max(hei))
+
+        # check if out of box
+        if(xmin_ >0 and ymin_>0 and xmax_<shape[1] and ymax_<shape[0]):
+            obj_count+=1
+            object_=etree.SubElement(root, "object")
+            name=etree.SubElement(object_, "name")
+            name.text="face"
+            pose=etree.SubElement(object_, "pose")
+            pose.text="Unspecified"
+            truncated=etree.SubElement(object_, "truncated")
+            truncated.text="0"
+            difficult=etree.SubElement(object_, "difficult")
+            difficult.text="0"
+            # bndbox
+            bndbox=etree.SubElement(object_, "bndbox")
+            xmin=etree.SubElement(bndbox,"xmin")
+            ymin=etree.SubElement(bndbox,"ymin")
+            xmax=etree.SubElement(bndbox,"xmax")
+            ymax=etree.SubElement(bndbox,"ymax")
+            xmin.text = str(xmin_)
+            ymin.text = str(ymin_)
+            xmax.text = str(xmax_)
+            ymax.text = str(ymax_)
+    if obj_count>0:
+        et = etree.ElementTree(root)
+        Ana_write2xml = os.path.join(target_dir_Ana, path+".xml")
+        et.write( Ana_write2xml, pretty_print=True)
+        return True
+    else: 
+        return False
+
+def face_box_wh(path,objects,shape):
+    obj_count=0
+    wh=list()
+    for obj in objects:
+        #object
+        obj=[float(i) for i in obj.split()]
+        #the smallest circumscribed parallelogram
+        #[link] https://github.com/nouiz/lisa_emotiw/blob/master/emotiw/common/datasets/faces/FDDB.py
+        maj_rad = obj[0]
+        min_rad = obj[1]
+        angle = obj[2]
+        xcenter = obj[3]
+        ycenter = obj[4]
+        cosin = math.cos(math.radians(-angle))
+        sin = math.sin(math.radians(-angle))
+
+        x1 = cosin * (-min_rad) - sin * (-maj_rad) + xcenter
+        y1 = sin * (-min_rad) + cosin * (-maj_rad) + ycenter
+        x2 = cosin * (min_rad) - sin * (-maj_rad) + xcenter
+        y2 = sin * (min_rad) + cosin * (-maj_rad) + ycenter
+        x3 = cosin * (min_rad) - sin * (maj_rad) + xcenter
+        y3 = sin * (min_rad) + cosin * (maj_rad) + ycenter
+        x4 = cosin * (-min_rad) - sin * (maj_rad) + xcenter
+        y4 = sin * (-min_rad) + cosin * (maj_rad) + ycenter
+        wid=[x1,x2,x3,x4]
+        hei=[y1,y2,y3,y4]
+        xmin_ = int(min(wid))
+        xmax_ = int(max(wid))
+        ymin_ = int(min(hei))
+        ymax_ = int(max(hei))
+
+        # check if out of box
+        if(xmin_ >0 and ymin_>0 and xmax_<shape[1] and ymax_<shape[0]):
+            obj_count+=1
+            wh.append([xmax-xmin,ymax-ymin])
+    if obj_count>0:
+        return wh
+    else: 
+        return list()
+
+# the annotation files path
+FDDB_folds=os.path.join("..",'originalPics','FDDB-folds')
+originalPics_folds=os.path.join("..",'originalPics')
+
+if __name__=="__main__":
+    # you need to modify the path_img below
+    # and the FDDB-fold-were assign by your own
+    if len(sys.argv) < 2:
+        ellipseList=os.path.join(FDDB_folds,'FDDB-fold-01-ellipseList.txt')
+    elif len(sys.argv)==2:
+        ellipseList=os.path.join(FDDB_folds,sys.argv[1])
+    else:
+        print "usage : python example.py [ellipseList]"
+        sys.exit(0)
+
+    current_file=open(ellipseList,'r')
+    image_with_target=[i.replace('\n','') for i in current_file.readlines()]
+    current_file.close()
+
+    while (cur_ind<len(image_with_target)):
+        """ since the format of the string is :
+        (2 object in 2002/08/02/big/img_769)
+        2
+        58.887348 37.286244 1.441974 88.083450 78.409537  1
+        60.381076 40.303691 1.377522 260.502940 102.769525  1
+        2002/08/02/big/img_760
+        (1 object in 2002/08/07/big/img_1453)
+        1
+        67.995400 38.216200 -1.559920 208.966471 109.764400  1
+        2002/08/07/big/img_1453
+        """
+        path_img = os.path.join(originalPics_folds,image_with_target[cur_ind]+'.jpg')
+        img = cv2.imread(path_img) 
+        cur_ind+=1
+        len_obj=int(image_with_target[cur_ind])
+        cur_ind+=1
+        objects=image_with_target[cur_ind:cur_ind+len_obj]
+        cur_ind+=len_obj
+        path=str(outfileID).zfill(6)
+        if(img2xml(path,objects,img.shape)):
+            img_path2write = os.path.join(target_dir_Jpg,path+".jpg")
+            cv2.imwrite(img_path2write, img)
+            outfileID+=1
+
diff --git a/data/scripts/Face_dataset_scripts/FDDB/pyxml/randomSet.py b/data/scripts/Face_dataset_scripts/FDDB/pyxml/randomSet.py
@@ -0,0 +1,56 @@
+from random import shuffle
+import glob
+import sys
+import os.path
+
+
+if __name__=='__main__':
+    if len(sys.argv) ==2 :
+         dataset_name=sys.argv[1] 
+         #Ratio split the training set / data set.
+         trainRatio=0.9 
+    elif len(sys.argv)==3:
+         dataset_name=sys.argv[1] 
+         trainRatio=float(sys.argv[2])
+    else:
+        print "usage : python randomSet.py dataset_name [trainXtest ratio]"
+        sys.exit(0)
+
+    target_dir = os.path.join(os.getcwd(), 'FDDB_2010')
+    target_dir_ImSets = os.path.join(target_dir,'ImageSets')
+    target_dir_ImSets_Main = os.path.join(target_dir_ImSets,'Main')
+    target_dir_Anno = os.path.join(target_dir,'Annotations')
+
+    if not os.path.exists(target_dir):
+        print("DataSet doesn't exit, you should check if anno2xml.py runs properly")
+        sys.exit(0)
+
+    if not os.path.exists(target_dir_ImSets):
+        os.makedirs(target_dir_ImSets)
+
+    if not os.path.exists(target_dir_ImSets_Main):
+        os.makedirs(target_dir_ImSets_Main)
+
+
+    fileID=glob.glob(os.path.join(target_dir_Anno, "*.xml"))
+    xml_list=[i.split('/')[-1].replace('.xml','') for i in fileID]
+
+    shuffle(xml_list)
+    trainSize=int(len(xml_list)*trainRatio)
+
+    #train
+    print "[FDDB] Training set creating..."
+    f=open(os.path.join(target_dir_ImSets_Main , 'trainval.txt'),'w')
+    for i in xml_list[:trainSize]:
+        f.write(i+'\n')
+    f.close()
+    print "[FDDB] Done!"
+
+    #test
+    print "[FDDB] Testing set creating..."
+    f=open(os.path.join(target_dir_ImSets_Main , 'test.txt'), 'w')
+    for i in xml_list[trainSize:]:
+        f.write(i+'\n')
+    f.close()
+    print "[FDDB] Done!"
+
diff --git a/data/scripts/Face_dataset_scripts/FDDB/pyxml/runit.sh b/data/scripts/Face_dataset_scripts/FDDB/pyxml/runit.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+DIRECTORY="../originalPics/"
+
+if [ ! -d "$DIRECTORY" ]; then
+    echo " Oops ,It seems data not downloaded properly !"
+    exit 1;
+fi
+
+#dealing FDDB-fold-01-ellipseList.txt;
+#where there are 01~10 list need to process
+# python anno2xml.py FDDB-fold-01-ellipseList.txt;
+for i in $(seq 10)
+do
+    echo "[FDDB] Processing $i-th List";
+    if (( i<10 ));then
+         python anno2xml.py FDDB-fold-0$i-ellipseList.txt;
+    else
+         python anno2xml.py FDDB-fold-$i-ellipseList.txt;
+    fi
+done
+
+python randomSet.py FDDB_2010 0.9