Skip to content

Commit

Permalink
Add annoset relative tool
Browse files Browse the repository at this point in the history
  • Loading branch information
eric612 committed Sep 17, 2018
1 parent ab6746c commit 996f1bb
Show file tree
Hide file tree
Showing 8 changed files with 985 additions and 0 deletions.
38 changes: 38 additions & 0 deletions data/coco/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
### Preparation
1. Download Images and Annotations from [MSCOCO](http://mscoco.org/dataset/#download). By default, we assume the data is stored in `$HOME/data/coco`

2. Get the coco code. We will call the directory that you cloned coco into `$COCO_ROOT`
```Shell
git clone https://github.com/weiliu89/coco.git
cd coco
git checkout dev
```

3. Build the coco code.
```Shell
cd PythonAPI
python setup.py build_ext --inplace
```

4. Split the annotation to many files per image and get the image size info.
```Shell
# Check scripts/batch_split_annotation.py and change settings accordingly.
python scripts/batch_split_annotation.py
# Create the minival2014_name_size.txt and test-dev2015_name_size.txt in $CAFFE_ROOT/data/coco
python scripts/batch_get_image_size.py
```

5. Create the LMDB file.
```Shell
cd $CAFFE_ROOT
# Create the minival.txt, testdev.txt, test.txt, train.txt in data/coco/
python data/coco/create_list.py
# You can modify the parameters in create_data.sh if needed.
# It will create lmdb files for minival, testdev, test, and train with encoded original image:
# - $HOME/data/coco/lmdb/coco_minival_lmdb
# - $HOME/data/coco/lmdb/coco_testdev_lmdb
# - $HOME/data/coco/lmdb/coco_test_lmdb
# - $HOME/data/coco/lmdb/coco_train_lmdb
# and make soft links at examples/coco/
./data/coco/create_data.sh
```
29 changes: 29 additions & 0 deletions data/coco/create_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
cur_dir=$(cd $( dirname ${BASH_SOURCE[0]} ) && pwd )
root_dir=$cur_dir/../..

cd $root_dir

redo=false
data_root_dir="$HOME/data/coco"
dataset_name="coco"
mapfile="$root_dir/data/$dataset_name/labelmap_coco.prototxt"
anno_type="detection"
label_type="json"
db="lmdb"
min_dim=0
max_dim=0
width=0
height=0

extra_cmd="--encode-type=jpg --encoded"
if $redo
then
extra_cmd="$extra_cmd --redo"
fi
#for subset in minival testdev train test
for subset in minival testdev train test
do
python $root_dir/scripts/create_annoset.py --anno-type=$anno_type --label-type=$label_type --label-map-file=$mapfile --min-dim=$min_dim --max-dim=$max_dim \
--resize-width=$width --resize-height=$height --check-label $extra_cmd $data_root_dir $root_dir/data/$dataset_name/$subset.txt \
$data_root_dir/$db/$dataset_name"_"$subset"_"$db examples/$dataset_name 2>&1 | tee $root_dir/data/$dataset_name/$subset.log
done
121 changes: 121 additions & 0 deletions data/coco/create_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import argparse
import os
from random import shuffle
import shutil
import subprocess
import sys

HOMEDIR = os.path.expanduser("~")
CURDIR = os.path.dirname(os.path.realpath(__file__))

# If true, re-create all list files.
redo = True
# The root directory which holds all information of the dataset.
data_dir = "{}/data/coco".format(HOMEDIR)
# The directory name which holds the image sets.
imgset_dir = "ImageSets"
# The direcotry which contains the images.
img_dir = "images"
img_ext = "jpg"
# The directory which contains the annotations.
anno_dir = "Annotations"
anno_ext = "json"

train_list_file = "{}/train.txt".format(CURDIR)
minival_list_file = "{}/minival.txt".format(CURDIR)
testdev_list_file = "{}/testdev.txt".format(CURDIR)
test_list_file = "{}/test.txt".format(CURDIR)

# Create training set.
# We follow Ross Girschick's split.
if redo or not os.path.exists(train_list_file):
datasets = ["train2014", "valminusminival2014"]
img_files = []
anno_files = []
for dataset in datasets:
imgset_file = "{}/{}/{}.txt".format(data_dir, imgset_dir, dataset)
with open(imgset_file, "r") as f:
for line in f.readlines():
name = line.strip("\n")
subset = name.split("_")[1]
img_file = "{}/{}/{}.{}".format(img_dir, subset, name, img_ext)
assert os.path.exists("{}/{}".format(data_dir, img_file)), \
"{}/{} does not exist".format(data_dir, img_file)
anno_file = "{}/{}/{}.{}".format(anno_dir, subset, name, anno_ext)
assert os.path.exists("{}/{}".format(data_dir, anno_file)), \
"{}/{} does not exist".format(data_dir, anno_file)
img_files.append(img_file)
anno_files.append(anno_file)
# Shuffle the images.
idx = [i for i in xrange(len(img_files))]
shuffle(idx)
with open(train_list_file, "w") as f:
for i in idx:
f.write("{} {}\n".format(img_files[i], anno_files[i]))

if redo or not os.path.exists(minival_list_file):
datasets = ["minival2014"]
subset = "val2014"
img_files = []
anno_files = []
for dataset in datasets:
imgset_file = "{}/{}/{}.txt".format(data_dir, imgset_dir, dataset)
with open(imgset_file, "r") as f:
for line in f.readlines():
name = line.strip("\n")
img_file = "{}/{}/{}.{}".format(img_dir, subset, name, img_ext)
assert os.path.exists("{}/{}".format(data_dir, img_file)), \
"{}/{} does not exist".format(data_dir, img_file)
anno_file = "{}/{}/{}.{}".format(anno_dir, subset, name, anno_ext)
assert os.path.exists("{}/{}".format(data_dir, anno_file)), \
"{}/{} does not exist".format(data_dir, anno_file)
img_files.append(img_file)
anno_files.append(anno_file)
with open(minival_list_file, "w") as f:
for i in xrange(len(img_files)):
f.write("{} {}\n".format(img_files[i], anno_files[i]))

if redo or not os.path.exists(testdev_list_file):
datasets = ["test-dev2015"]
subset = "test2015"
img_files = []
anno_files = []
for dataset in datasets:
imgset_file = "{}/{}/{}.txt".format(data_dir, imgset_dir, dataset)
with open(imgset_file, "r") as f:
for line in f.readlines():
name = line.strip("\n")
img_file = "{}/{}/{}.{}".format(img_dir, subset, name, img_ext)
assert os.path.exists("{}/{}".format(data_dir, img_file)), \
"{}/{} does not exist".format(data_dir, img_file)
anno_file = "{}/{}/{}.{}".format(anno_dir, subset, name, anno_ext)
assert os.path.exists("{}/{}".format(data_dir, anno_file)), \
"{}/{} does not exist".format(data_dir, anno_file)
img_files.append(img_file)
anno_files.append(anno_file)
with open(testdev_list_file, "w") as f:
for i in xrange(len(img_files)):
f.write("{} {}\n".format(img_files[i], anno_files[i]))

if redo or not os.path.exists(test_list_file):
datasets = ["test2015"]
subset = "test2015"
img_files = []
anno_files = []
for dataset in datasets:
imgset_file = "{}/{}/{}.txt".format(data_dir, imgset_dir, dataset)
with open(imgset_file, "r") as f:
for line in f.readlines():
name = line.strip("\n")
img_file = "{}/{}/{}.{}".format(img_dir, subset, name, img_ext)
assert os.path.exists("{}/{}".format(data_dir, img_file)), \
"{}/{} does not exist".format(data_dir, img_file)
anno_file = "{}/{}/{}.{}".format(anno_dir, subset, name, anno_ext)
assert os.path.exists("{}/{}".format(data_dir, anno_file)), \
"{}/{} does not exist".format(data_dir, anno_file)
img_files.append(img_file)
anno_files.append(anno_file)
with open(test_list_file, "w") as f:
for i in xrange(len(img_files)):
f.write("{} {}\n".format(img_files[i], anno_files[i]))

Loading

0 comments on commit 996f1bb

Please sign in to comment.