Skip to content

Commit

Permalink
increase coco split and yolo export speeds (#58)
Browse files Browse the repository at this point in the history
  • Loading branch information
fcakyon authored Apr 28, 2021
1 parent 1cfce05 commit 18083cf
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 65 deletions.
10 changes: 6 additions & 4 deletions docs/COCO.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,14 @@ coco_path = "coco.json"
# init Coco object
coco = Coco.from_coco_dict_or_path(coco_path)

# split and export COCO dataset into given target_dir with a 85% train/15% val split
coco.split_coco_as_train_val(
file_name="splitted_coco",
target_dir="output/folder/dir",
# split COCO dataset with a 85% train/15% val split
result = coco.split_coco_as_train_val(
train_split_rate=0.85
)

# export train val split files
save_json(result["train_coco"].json, "train_split.json")
save_json(result["val_coco"].json, "val_split.json")
```

## Combine COCO dataset files:
Expand Down
92 changes: 31 additions & 61 deletions sahi/utils/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -890,36 +890,26 @@ def json(self):
)

def split_coco_as_train_val(
self, file_name=None, target_dir=None, train_split_rate=0.9, numpy_seed=0
self, train_split_rate=0.9, numpy_seed=0
):
"""
Split images into train-val and saves as seperate coco dataset files.
Split images into train-val and returns them as sahi.utils.coco.Coco objects.
Args:
file_name: str
target_dir: str
train_split_rate: float
numpy_seed: int
To fix the numpy seed.
Returns:
result : dict
{
"train_dict": "",
"val_dict": "",
"train_path": "",
"val_path": "",
"train_coco": "",
"val_coco": "",
}
"""
# fix numpy numpy seed
np.random.seed(numpy_seed)

# set output coco file name
if file_name:
None
elif target_dir:
raise ValueError("file_name should be specified.")

# divide images
num_images = len(self.images)
shuffled_images = copy.deepcopy(self.images)
Expand All @@ -928,36 +918,20 @@ def split_coco_as_train_val(
train_images = shuffled_images[:num_train]
val_images = shuffled_images[num_train:]

# form train val coco dicts
train_coco_dict = create_coco_dict(
images=train_images,
categories=self.json_categories,
ignore_negative_samples=False,
)
val_coco_dict = create_coco_dict(
images=val_images,
categories=self.json_categories,
ignore_negative_samples=False,
)
# form train val coco objects
train_coco = Coco(name=self.name if self.name else "split" + '_train')
train_coco.images = train_images
train_coco.categories = self.categories

val_coco = Coco(name=self.name if self.name else "split" + '_val')
val_coco.images = val_images
val_coco.categories = self.categories

# return result
if not target_dir:
return {
"train_dict": train_coco_dict,
"val_dict": val_coco_dict,
"train_path": "",
"val_path": "",
}
else:
train_coco_dict_path = os.path.join(target_dir, file_name + "_train.json")
save_json(train_coco_dict, train_coco_dict_path)
val_coco_dict_path = os.path.join(target_dir, file_name + "_val.json")
save_json(val_coco_dict, val_coco_dict_path)
return {
"train_dict": train_coco_dict,
"val_dict": val_coco_dict,
"train_path": train_coco_dict_path,
"val_path": val_coco_dict_path,
}
return {
"train_coco": train_coco,
"val_coco": val_coco,
}

def export_as_yolov5(self, image_dir, output_dir, train_split_rate=1, numpy_seed=0):
"""
Expand Down Expand Up @@ -991,19 +965,17 @@ def export_as_yolov5(self, image_dir, output_dir, train_split_rate=1, numpy_seed
# split dataset
if split_mode == "TRAINVAL":
result = self.split_coco_as_train_val(
file_name=None,
target_dir=None,
train_split_rate=train_split_rate,
numpy_seed=numpy_seed,
)
train_coco_dict = result["train_dict"]
val_coco_dict = result["val_dict"]
train_coco = result["train_coco"]
val_coco = result["val_coco"]
elif split_mode == "TRAIN":
train_coco_dict = self.json
val_coco_dict = None
train_coco = self
val_coco = None
elif split_mode == "VAL":
train_coco_dict = None
val_coco_dict = self.json
train_coco = None
val_coco = self

# create train val image dirs
train_dir = ""
Expand All @@ -1017,12 +989,12 @@ def export_as_yolov5(self, image_dir, output_dir, train_split_rate=1, numpy_seed

# create image symlinks and annotation txts
if split_mode in ["TRAINVAL", "TRAIN"]:
export_yolov5_images_and_txts_from_coco_dict(
image_dir, output_dir=train_dir, coco_dict_or_path=train_coco_dict
export_yolov5_images_and_txts_from_coco_object(
image_dir, output_dir=train_dir, coco=train_coco
)
if split_mode in ["TRAINVAL", "VAL"]:
export_yolov5_images_and_txts_from_coco_dict(
image_dir, output_dir=val_dir, coco_dict_or_path=val_coco_dict
export_yolov5_images_and_txts_from_coco_object(
image_dir, output_dir=val_dir, coco=val_coco
)

# create yolov5 data yaml
Expand Down Expand Up @@ -1054,8 +1026,8 @@ def get_subsampled_coco(self, subsample_ratio=10):
return subsampled_coco


def export_yolov5_images_and_txts_from_coco_dict(
image_dir, output_dir, coco_dict_or_path
def export_yolov5_images_and_txts_from_coco_object(
image_dir, output_dir, coco
):
"""
Creates image symlinks and annotation txts in yolo format from coco dataset.
Expand All @@ -1065,11 +1037,9 @@ def export_yolov5_images_and_txts_from_coco_dict(
Source image directory that contains coco images.
output_dir: str
Export directory.
coco_dict_or_path: str or dict
Path for the coco dataset file or coco dataset as python dictionary.
coco: sahi.utils.coco.Coco
Initialized Coco object that contains images and categories.
"""
# create coco instance from coco_dict_or_path
coco = Coco.from_coco_dict_or_path(coco_dict_or_path)

for image in tqdm(coco.images):
# Create a symbolic link pointing to src named dst
Expand Down
16 changes: 16 additions & 0 deletions tests/test_cocoutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,22 @@ def test_coco(self):
category_mapping,
)

def test_split_coco_as_train_val(self):
from sahi.utils.coco import Coco

coco_dict_path = "tests/data/coco_utils/combined_coco.json"
coco = Coco.from_coco_dict_or_path(coco_dict_path)
result = coco.split_coco_as_train_val(
train_split_rate=0.5, numpy_seed=0
)
self.assertEqual(len(result["train_coco"].json["images"]), 1)
self.assertEqual(len(result["train_coco"].json["annotations"]), 5)
self.assertEqual(result["train_coco"].json["images"][0]["height"], 682)

self.assertEqual(len(result["val_coco"].json["images"]), 1)
self.assertEqual(len(result["val_coco"].json["annotations"]), 7)
self.assertEqual(result["val_coco"].json["images"][0]["height"], 1365)

def test_coco2yolo(self):
from sahi.utils.coco import Coco

Expand Down

0 comments on commit 18083cf

Please sign in to comment.